From ab3e264b1c5adce08f2ef8190e41f2f2258c03a2 Mon Sep 17 00:00:00 2001 From: Alexander Kiel Date: Tue, 14 Dec 2021 19:26:11 +0100 Subject: [PATCH] Allow Setting the Database Sync Timeout All reading FHIR interactions have to acquire the last database state known at the time the request arrived in order to ensure consistency. That database state might not be ready immediately because indexing might be still undergoing. In such a situation, the request has to wait for the database state becoming available. If the database state won't be available before the timeout expires, a 503 Service Unavailable response will be returned. Please increase this timeout if you experience such 503 responses, and you are not able to improve indexing performance or lower transaction load. --- docs/deployment/environment-variables.md | 56 ++++++++++++------- modules/rest-api/src/blaze/rest_api.clj | 8 ++- .../rest-api/src/blaze/rest_api/routes.clj | 43 +++++++------- modules/rest-api/src/blaze/rest_api/spec.clj | 5 ++ modules/rest-api/test/blaze/rest_api_test.clj | 9 ++- .../src/blaze/middleware/fhir/db.clj | 19 ++++--- .../src/blaze/middleware/fhir/db_spec.clj | 2 +- .../test/blaze/middleware/fhir/db_test.clj | 10 ++++ src/blaze/system.clj | 1 + 9 files changed, 96 insertions(+), 57 deletions(-) diff --git a/docs/deployment/environment-variables.md b/docs/deployment/environment-variables.md index c2641c1ad..d12020ebc 100644 --- a/docs/deployment/environment-variables.md +++ b/docs/deployment/environment-variables.md @@ -63,29 +63,43 @@ More information about distributed deployment are available [here](distributed.m ### Other Environment Variables -| Name | Default | Since | Depr ¹ | Description | -|:----------------------------------------|:---------------------------|:-------|--------|:-----------------------------------------------------------------------| -| PROXY_HOST | — | v0.6 | — | REMOVED: use -Dhttp.proxyHost | -| PROXY_PORT | — | v0.6 | — | REMOVED: use -Dhttp.proxyPort | -| PROXY_USER | — | v0.6.1 | — | REMOVED: try [SOCKS Options][1] | -| PROXY_PASSWORD | — | v0.6.1 | — | REMOVED: try [SOCKS Options][1] | -| CONNECTION_TIMEOUT | 5 s | v0.6.3 | — | connection timeout for outbound HTTP requests | -| REQUEST_TIMEOUT | 30 s | v0.6.3 | — | REMOVED | -| TERM_SERVICE_URI | [http://tx.fhir.org/r4][6] | v0.6 | v0.11 | Base URI of the terminology service | -| BASE_URL | http://localhost:8080 | — | — | The URL under which Blaze is accessible by clients. ² | -| CONTEXT_PATH | /fhir | v0.11 | — | Context path under which the FHIR RESTful API will be accessible. | -| SERVER_PORT | 8080 | — | — | The port of the main HTTP server | -| METRICS_SERVER_PORT | 8081 | v0.6 | — | The port of the Prometheus metrics server | -| LOG_LEVEL | info | v0.6 | — | one of trace, debug, info, warn or error | -| JAVA_TOOL_OPTIONS | — | — | — | JVM options \(Docker only\) | -| FHIR_OPERATION_EVALUATE_MEASURE_THREADS | 4 | v0.8 | — | The maximum number of parallel $evaluate-measure executions. ³ | -| OPENID_PROVIDER_URL | — | v0.11 | — | [OpenID Connect][4] provider URL to enable [authentication][5] | -| ENFORCE_REFERENTIAL_INTEGRITY | true | v0.14 | — | Enforce referential integrity on resource create, update and delete. ⁴ | +| Name | Default | Since | Depr ¹ | Description | +|:----------------------------------------|:---------------------------|:-------|--------|:-----------------------------------------------------------------------------------------------| +| PROXY_HOST | — | v0.6 | — | REMOVED: use -Dhttp.proxyHost | +| PROXY_PORT | — | v0.6 | — | REMOVED: use -Dhttp.proxyPort | +| PROXY_USER | — | v0.6.1 | — | REMOVED: try [SOCKS Options][1] | +| PROXY_PASSWORD | — | v0.6.1 | — | REMOVED: try [SOCKS Options][1] | +| CONNECTION_TIMEOUT | 5 s | v0.6.3 | — | connection timeout for outbound HTTP requests | +| REQUEST_TIMEOUT | 30 s | v0.6.3 | — | REMOVED | +| TERM_SERVICE_URI | [http://tx.fhir.org/r4][6] | v0.6 | v0.11 | Base URI of the terminology service | +| BASE_URL | http://localhost:8080 | — | — | The URL under which Blaze is accessible by clients. | +| CONTEXT_PATH | /fhir | v0.11 | — | Context path under which the FHIR RESTful API will be accessible. | +| SERVER_PORT | 8080 | — | — | The port of the main HTTP server | +| METRICS_SERVER_PORT | 8081 | v0.6 | — | The port of the Prometheus metrics server | +| LOG_LEVEL | info | v0.6 | — | one of trace, debug, info, warn or error | +| JAVA_TOOL_OPTIONS | — | — | — | JVM options \(Docker only\) | +| FHIR_OPERATION_EVALUATE_MEASURE_THREADS | 4 | v0.8 | — | The maximum number of parallel $evaluate-measure executions. | +| OPENID_PROVIDER_URL | — | v0.11 | — | [OpenID Connect][4] provider URL to enable [authentication][5] | +| ENFORCE_REFERENTIAL_INTEGRITY | true | v0.14 | — | Enforce referential integrity on resource create, update and delete. | +| DB_SYNC_TIMEOUT | 10000 | v0.15 | — | Timeout in milliseconds for all reading FHIR interactions acquiring the newest database state. | ¹ Deprecated -² The [FHIR RESTful API](https://www.hl7.org/fhir/http.html) will be accessible under `BASE_URL/CONTEXT_PATH`. Possible X-Forwarded-Host, X-Forwarded-Proto and Forwarded request headers will override this URL. -³ Not the same as the number of threads used for measure evaluation which equal to the number of available processors. -⁴ It's enabled by default but can be disabled on proxy/middleware/secondary systems were a primary system ensures referential integrity. + +#### BASE_URL + +The [FHIR RESTful API](https://www.hl7.org/fhir/http.html) will be accessible under `BASE_URL/CONTEXT_PATH`. Possible X-Forwarded-Host, X-Forwarded-Proto and Forwarded request headers will override this URL. + +#### FHIR_OPERATION_EVALUATE_MEASURE_THREADS + +Not the same as the number of threads used for measure evaluation which equal to the number of available processors. + +#### ENFORCE_REFERENTIAL_INTEGRITY + +It's enabled by default but can be disabled on proxy/middleware/secondary systems were a primary system ensures referential integrity. + +#### DB_SYNC_TIMEOUT + +All reading FHIR interactions have to acquire the last database state known at the time the request arrived in order to ensure [consistency](../consistency.md). That database state might not be ready immediately because indexing might be still undergoing. In such a situation, the request has to wait for the database state becoming available. If the database state won't be available before the timeout expires, a 503 Service Unavailable response will be returned. Please increase this timeout if you experience such 503 responses, and you are not able to improve indexing performance or lower transaction load. ### Common JAVA_TOOL_OPTIONS diff --git a/modules/rest-api/src/blaze/rest_api.clj b/modules/rest-api/src/blaze/rest_api.clj index 255bb6dee..def0ab9d2 100644 --- a/modules/rest-api/src/blaze/rest_api.clj +++ b/modules/rest-api/src/blaze/rest_api.clj @@ -115,7 +115,8 @@ :blaze/version :blaze.rest-api/structure-definitions :blaze.db/node - :blaze.db/search-param-registry] + :blaze.db/search-param-registry + :blaze.rest-api/db-sync-timeout] :opt-un [:blaze/context-path ::auth-backends @@ -128,8 +129,9 @@ (defmethod ig/init-key :blaze/rest-api - [_ {:keys [base-url context-path] :as context}] - (log/info "Init FHIR RESTful API with base URL:" (str base-url context-path)) + [_ {:keys [base-url context-path db-sync-timeout] :as context}] + (log/info "Init FHIR RESTful API with base URL:" (str base-url context-path) + "and a database sync timeout of" db-sync-timeout "ms") (handler context)) diff --git a/modules/rest-api/src/blaze/rest_api/routes.clj b/modules/rest-api/src/blaze/rest_api/routes.clj index dd0a2e4c9..262a81842 100644 --- a/modules/rest-api/src/blaze/rest_api/routes.clj +++ b/modules/rest-api/src/blaze/rest_api/routes.clj @@ -51,7 +51,7 @@ Route data contains the resource type under :fhir.resource/type." {:arglists '([context resource-patterns structure-definition])} - [{:keys [node] parse-executor :blaze.rest-api.json-parse/executor} + [{:keys [node db-sync-timeout] parse-executor :blaze.rest-api.json-parse/executor} resource-patterns {:keys [name] :as structure-definition}] (when-let @@ -62,7 +62,7 @@ ["" (cond-> {:name (keyword name "type")} (contains? interactions :search-type) - (assoc :get {:middleware [[wrap-db node]] + (assoc :get {:middleware [[wrap-db node db-sync-timeout]] :handler (-> interactions :search-type :blaze.rest-api.interaction/handler)}) (contains? interactions :create) @@ -72,23 +72,23 @@ ["/_history" (cond-> {:conflicting true} (contains? interactions :history-type) - (assoc :get {:middleware [[wrap-db node]] + (assoc :get {:middleware [[wrap-db node db-sync-timeout]] :handler (-> interactions :history-type :blaze.rest-api.interaction/handler)}))] ["/_search" (cond-> {:name (keyword name "search") :conflicting true} (contains? interactions :search-type) - (assoc :post {:middleware [[wrap-db node]] + (assoc :post {:middleware [[wrap-db node db-sync-timeout]] :handler (-> interactions :search-type :blaze.rest-api.interaction/handler)}))] ["/__page" (cond-> {:name (keyword name "page") :conflicting true} (contains? interactions :search-type) (assoc - :get {:middleware [[wrap-db node]] + :get {:middleware [[wrap-db node db-sync-timeout]] :handler (-> interactions :search-type :blaze.rest-api.interaction/handler)} - :post {:middleware [[wrap-db node]] + :post {:middleware [[wrap-db node db-sync-timeout]] :handler (-> interactions :search-type :blaze.rest-api.interaction/handler)}))] ["/{id}" @@ -97,7 +97,7 @@ {:name (keyword name "instance") :conflicting true} (contains? interactions :read) - (assoc :get {:middleware [[wrap-db node]] + (assoc :get {:middleware [[wrap-db node db-sync-timeout]] :handler (-> interactions :read :blaze.rest-api.interaction/handler)}) (contains? interactions :update) @@ -113,41 +113,41 @@ {:name (keyword name "history-instance") :conflicting true} (contains? interactions :history-instance) - (assoc :get {:middleware [[wrap-db node]] + (assoc :get {:middleware [[wrap-db node db-sync-timeout]] :handler (-> interactions :history-instance :blaze.rest-api.interaction/handler)}))] ["/{vid}" (cond-> {:name (keyword name "versioned-instance")} (contains? interactions :vread) - (assoc :get {:middleware [[wrap-db node]] + (assoc :get {:middleware [[wrap-db node db-sync-timeout]] :handler (-> interactions :vread :blaze.rest-api.interaction/handler)}))]]]])) (defn compartment-route {:arglists '([context compartment])} - [{:keys [node]} {:blaze.rest-api.compartment/keys [code search-handler]}] + [{:keys [node db-sync-timeout]} {:blaze.rest-api.compartment/keys [code search-handler]}] [(format "/%s/{id}/{type}" code) {:name (keyword code "compartment") :fhir.compartment/code code :conflicting true - :get {:middleware [[wrap-db node]] + :get {:middleware [[wrap-db node db-sync-timeout]] :handler search-handler}}]) (defn- operation-system-handler-route - [{:keys [node] parse-executor :blaze.rest-api.json-parse/executor} + [{:keys [node db-sync-timeout] parse-executor :blaze.rest-api.json-parse/executor} {:blaze.rest-api.operation/keys [code system-handler]}] (when system-handler [[(str "/$" code) - {:middleware [[wrap-db node]] + {:middleware [[wrap-db node db-sync-timeout]] :get system-handler :post {:middleware [[wrap-resource parse-executor]] :handler system-handler}}]])) (defn operation-type-handler-route - [{:keys [node] parse-executor :blaze.rest-api.json-parse/executor} + [{:keys [node db-sync-timeout] parse-executor :blaze.rest-api.json-parse/executor} {:blaze.rest-api.operation/keys [code resource-types type-handler]}] (when type-handler @@ -155,7 +155,7 @@ (fn [resource-type] [(str "/" resource-type "/$" code) {:conflicting true - :middleware [[wrap-db node]] + :middleware [[wrap-db node db-sync-timeout]] :get type-handler :post {:middleware [[wrap-resource parse-executor]] :handler type-handler}}]) @@ -163,14 +163,14 @@ (defn operation-instance-handler-route - [{:keys [node] parse-executor :blaze.rest-api.json-parse/executor} + [{:keys [node db-sync-timeout] parse-executor :blaze.rest-api.json-parse/executor} {:blaze.rest-api.operation/keys [code resource-types instance-handler]}] (when instance-handler (map (fn [resource-type] [(str "/" resource-type "/{id}/$" code) - {:middleware [[wrap-db node]] + {:middleware [[wrap-db node db-sync-timeout]] :get instance-handler :post {:middleware [[wrap-resource parse-executor]] :handler instance-handler}}]) @@ -185,6 +185,7 @@ structure-definitions node auth-backends + db-sync-timeout search-system-handler transaction-handler history-system-handler @@ -205,7 +206,7 @@ ["" (cond-> {} (some? search-system-handler) - (assoc :get {:middleware [[wrap-db node]] + (assoc :get {:middleware [[wrap-db node db-sync-timeout]] :handler search-system-handler}) (some? transaction-handler) (assoc :post {:middleware @@ -217,15 +218,15 @@ ["/_history" (cond-> {} (some? history-system-handler) - (assoc :get {:middleware [[wrap-db node]] + (assoc :get {:middleware [[wrap-db node db-sync-timeout]] :handler history-system-handler}))] ["/__page" (cond-> {} (some? search-system-handler) (assoc - :get {:middleware [[wrap-db node]] + :get {:middleware [[wrap-db node db-sync-timeout]] :handler search-system-handler} - :post {:middleware [[wrap-db node]] + :post {:middleware [[wrap-db node db-sync-timeout]] :handler search-system-handler}))]] (into (mapcat (partial operation-system-handler-route context)) diff --git a/modules/rest-api/src/blaze/rest_api/spec.clj b/modules/rest-api/src/blaze/rest_api/spec.clj index 68cc7270c..3e8ed53bf 100644 --- a/modules/rest-api/src/blaze/rest_api/spec.clj +++ b/modules/rest-api/src/blaze/rest_api/spec.clj @@ -140,3 +140,8 @@ (s/def :blaze.rest-api/structure-definitions (s/coll-of :fhir.un/StructureDefinition)) + + +;; in milliseconds +(s/def :blaze.rest-api/db-sync-timeout + pos-int?) diff --git a/modules/rest-api/test/blaze/rest_api_test.clj b/modules/rest-api/test/blaze/rest_api_test.clj index 8fc7c066f..056121720 100644 --- a/modules/rest-api/test/blaze/rest_api_test.clj +++ b/modules/rest-api/test/blaze/rest_api_test.clj @@ -267,7 +267,8 @@ [:explain ::s/problems 2 :pred] := `(fn ~'[%] (contains? ~'% :version)) [:explain ::s/problems 3 :pred] := `(fn ~'[%] (contains? ~'% :structure-definitions)) [:explain ::s/problems 4 :pred] := `(fn ~'[%] (contains? ~'% :node)) - [:explain ::s/problems 5 :pred] := `(fn ~'[%] (contains? ~'% :search-param-registry)))) + [:explain ::s/problems 5 :pred] := `(fn ~'[%] (contains? ~'% :search-param-registry)) + [:explain ::s/problems 6 :pred] := `(fn ~'[%] (contains? ~'% :db-sync-timeout)))) (testing "invalid enforce-referential-integrity" (given-thrown (ig/init {:blaze/rest-api {:enforce-referential-integrity ::invalid}}) @@ -279,8 +280,9 @@ [:explain ::s/problems 3 :pred] := `(fn ~'[%] (contains? ~'% :structure-definitions)) [:explain ::s/problems 4 :pred] := `(fn ~'[%] (contains? ~'% :node)) [:explain ::s/problems 5 :pred] := `(fn ~'[%] (contains? ~'% :search-param-registry)) - [:explain ::s/problems 6 :pred] := `boolean? - [:explain ::s/problems 6 :val] := ::invalid))) + [:explain ::s/problems 6 :pred] := `(fn ~'[%] (contains? ~'% :db-sync-timeout)) + [:explain ::s/problems 7 :pred] := `boolean? + [:explain ::s/problems 7 :val] := ::invalid))) (def system @@ -291,6 +293,7 @@ :structure-definitions [] :node (ig/ref :blaze.db/node) :search-param-registry (ig/ref :blaze.db/search-param-registry) + :db-sync-timeout 10000 :blaze.rest-api.json-parse/executor (ig/ref :blaze.rest-api.json-parse/executor)} :blaze.db/search-param-registry {} :blaze.rest-api.json-parse/executor {})) diff --git a/modules/rest-util/src/blaze/middleware/fhir/db.clj b/modules/rest-util/src/blaze/middleware/fhir/db.clj index e26fec4d7..1992f063b 100644 --- a/modules/rest-util/src/blaze/middleware/fhir/db.clj +++ b/modules/rest-util/src/blaze/middleware/fhir/db.clj @@ -15,18 +15,21 @@ (and vid (re-matches #"\d+" vid) (Long/parseLong vid))) -(defn- db [node {:keys [query-params] :as request}] +(defn- db [node timeout {:keys [query-params] :as request}] (if-let [t (vid request)] (do-sync [db (d/sync node t)] (d/as-of db t)) (if-let [t (fhir-util/t query-params)] (d/sync node t) - (ac/or-timeout! (d/sync node) 2 TimeUnit/SECONDS)))) + (ac/or-timeout! (d/sync node) timeout TimeUnit/MILLISECONDS)))) -(defn wrap-db [handler node] - (fn [request] - (if (:blaze/db request) - (handler request) - (-> (db node request) - (ac/then-compose #(handler (assoc request :blaze/db %))))))) +(defn wrap-db + ([handler node] + (wrap-db handler node 10000)) + ([handler node timeout] + (fn [request] + (if (:blaze/db request) + (handler request) + (-> (db node timeout request) + (ac/then-compose #(handler (assoc request :blaze/db %)))))))) diff --git a/modules/rest-util/src/blaze/middleware/fhir/db_spec.clj b/modules/rest-util/src/blaze/middleware/fhir/db_spec.clj index 85484102b..3c3895c90 100644 --- a/modules/rest-util/src/blaze/middleware/fhir/db_spec.clj +++ b/modules/rest-util/src/blaze/middleware/fhir/db_spec.clj @@ -5,4 +5,4 @@ (s/fdef db/wrap-db - :args (s/cat :handler ifn? :node :blaze.db/node)) + :args (s/cat :handler ifn? :node :blaze.db/node :timeout (s/? pos-int?))) diff --git a/modules/rest-util/test/blaze/middleware/fhir/db_test.clj b/modules/rest-util/test/blaze/middleware/fhir/db_test.clj index 5f990159d..6503f498c 100644 --- a/modules/rest-util/test/blaze/middleware/fhir/db_test.clj +++ b/modules/rest-util/test/blaze/middleware/fhir/db_test.clj @@ -85,5 +85,15 @@ (assert (= ::node node)) (ac/supply-async (constantly ::db) (ac/delayed-executor 3 TimeUnit/SECONDS)))] + (given-failed-future ((db/wrap-db handler ::node 2000) {}) + ::anom/category := ::anom/busy))) + + (testing "default timeout are 10 seconds" + (with-redefs + [d/sync + (fn [node] + (assert (= ::node node)) + (ac/supply-async (constantly ::db) (ac/delayed-executor 11 TimeUnit/SECONDS)))] + (given-failed-future ((db/wrap-db handler ::node) {}) ::anom/category := ::anom/busy))))) diff --git a/src/blaze/system.clj b/src/blaze/system.clj index 3a4c6f6b0..48e48ed6e 100644 --- a/src/blaze/system.clj +++ b/src/blaze/system.clj @@ -109,6 +109,7 @@ :search-param-registry (ig/ref :blaze.db/search-param-registry) :auth-backends (ig/refset :blaze.auth/backend) :context-path (->Cfg "CONTEXT_PATH" string? "/fhir") + :db-sync-timeout (->Cfg "DB_SYNC_TIMEOUT" pos-int? 10000) :blaze.rest-api.json-parse/executor (ig/ref :blaze.rest-api.json-parse/executor)} :blaze.rest-api/requests-total {}