Skip to content

Commit f29c010

Browse files
committed
refactor 'submit_job_handler' to allow submit from pre-created job rather than parsing request from scratch
1 parent d5a086b commit f29c010

File tree

10 files changed

+160
-68
lines changed

10 files changed

+160
-68
lines changed

tests/functional/test_celery.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,12 @@ def test_celery_registry_resolution():
5050
settings = get_settings_from_testapp(webapp)
5151
wps_url = get_wps_url(settings)
5252
job_store = get_db(settings).get_store("jobs")
53-
job1 = job_store.save_job(task_id="tmp", process="jsonarray2netcdf",
54-
inputs={"input": {"href": "http://random-dont-care.com/fake.json"}})
55-
job2 = job_store.save_job(task_id="tmp", process="jsonarray2netcdf",
56-
inputs={"input": {"href": "http://random-dont-care.com/fake.json"}})
53+
job1 = job_store.save_job(
54+
task_id="tmp", process="jsonarray2netcdf", inputs={"input": {"href": "http://random-dont-care.com/fake.json"}}
55+
)
56+
job2 = job_store.save_job(
57+
task_id="tmp", process="jsonarray2netcdf", inputs={"input": {"href": "http://random-dont-care.com/fake.json"}}
58+
)
5759

5860
with contextlib.ExitStack() as stack:
5961
celery_mongo_broker = f"""mongodb://{settings["mongodb.host"]}:{settings["mongodb.port"]}/celery-test"""

tests/functional/test_cli.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -770,8 +770,9 @@ def test_jobs_search_multi_status(self):
770770
class TestWeaverCLI(TestWeaverClientBase):
771771
def setUp(self):
772772
super(TestWeaverCLI, self).setUp()
773-
job = self.job_store.save_job(task_id="12345678-1111-2222-3333-111122223333", process="fake-process",
774-
access=Visibility.PUBLIC)
773+
job = self.job_store.save_job(
774+
task_id="12345678-1111-2222-3333-111122223333", process="fake-process", access=Visibility.PUBLIC
775+
)
775776
job.status = Status.SUCCEEDED
776777
self.test_job = self.job_store.update_job(job)
777778

tests/functional/test_wps_package.py

+26-26
Large diffs are not rendered by default.

tests/wps_restapi/test_jobs.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,10 @@ def make_job(self,
179179
): # type: (...) -> Job
180180
if isinstance(created, str):
181181
created = date_parser.parse(created)
182-
job = self.job_store.save_job(task_id=task_id, process=process, service=service, is_workflow=False,
183-
execute_async=True, user_id=user_id, access=access, created=created)
182+
job = self.job_store.save_job(
183+
task_id=task_id, process=process, service=service, is_workflow=False, execute_async=True, user_id=user_id,
184+
access=access, created=created
185+
)
184186
job.status = status
185187
if status != Status.ACCEPTED:
186188
job.started = job.created + datetime.timedelta(seconds=offset if offset is not None else 0)

weaver/datatype.py

+38
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,25 @@ def wps_id(self, wps_id):
859859
raise TypeError(f"Type 'str' or 'UUID' is required for '{self.__name__}.wps_id'")
860860
self["wps_id"] = wps_id
861861

862+
@property
863+
def wps_url(self):
864+
# type: () -> Optional[str]
865+
"""
866+
Service URL reference for :term:`WPS` interface.
867+
868+
.. seealso::
869+
- :attr:`Process.processEndpointWPS1`
870+
- :attr:`Service.url`
871+
"""
872+
return self.get("wps_url", None)
873+
874+
@wps_url.setter
875+
def wps_url(self, service):
876+
# type: (Optional[str]) -> None
877+
if not isinstance(service, str):
878+
raise TypeError(f"Type 'str' is required for '{self.__name__}.wps_url'")
879+
self["wps_url"] = service
880+
862881
@property
863882
def service(self):
864883
# type: () -> Optional[str]
@@ -1071,6 +1090,23 @@ def execution_mode(self, mode):
10711090
raise ValueError(f"Invalid value for '{self.__name__}.execution_mode'. Must be one of {modes}")
10721091
self["execution_mode"] = mode
10731092

1093+
@property
1094+
def execution_wait(self):
1095+
# type: () -> Optional[int]
1096+
"""
1097+
Execution time (in seconds) to wait for a synchronous response.
1098+
"""
1099+
if not self.execute_sync:
1100+
return None
1101+
return self.get("execution_wait")
1102+
1103+
@execution_wait.setter
1104+
def execution_wait(self, wait):
1105+
# type: (Optional[int]) -> None
1106+
if wait is not None or not isinstance(wait, int):
1107+
raise ValueError(f"Invalid value for '{self.__name__}.execution_wait'. Must be None or an integer.")
1108+
self["execution_wait"] = wait
1109+
10741110
@property
10751111
def execution_response(self):
10761112
# type: () -> AnyExecuteResponse
@@ -1533,6 +1569,7 @@ def params(self):
15331569
"id": self.id,
15341570
"task_id": self.task_id,
15351571
"wps_id": self.wps_id,
1572+
"wps_url": self.wps_url,
15361573
"service": self.service,
15371574
"process": self.process,
15381575
"inputs": self.inputs,
@@ -1544,6 +1581,7 @@ def params(self):
15441581
"execution_response": self.execution_response,
15451582
"execution_return": self.execution_return,
15461583
"execution_mode": self.execution_mode,
1584+
"execution_wait": self.execution_wait,
15471585
"is_workflow": self.is_workflow,
15481586
"created": self.created,
15491587
"started": self.started,

weaver/processes/execution.py

+61-23
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
AnyProcessRef,
9292
AnyResponseType,
9393
AnyServiceRef,
94+
AnySettingsContainer,
9495
AnyViewResponse,
9596
AnyValueType,
9697
CeleryResult,
@@ -754,7 +755,7 @@ def submit_job(request, reference, tags=None, process_id=None):
754755

755756
def submit_job_handler(payload, # type: ProcessExecution
756757
settings, # type: SettingsType
757-
service_url, # type: str
758+
wps_url, # type: str
758759
provider=None, # type: Optional[AnyServiceRef]
759760
process=None, # type: AnyProcessRef
760761
is_workflow=False, # type: bool
@@ -767,9 +768,11 @@ def submit_job_handler(payload, # type: ProcessExecution
767768
context=None, # type: Optional[str]
768769
): # type: (...) -> AnyResponseType
769770
"""
770-
Submits the job to the Celery worker with provided parameters.
771+
Parses parameters that defines the submitted :term:`Job`, and responds accordingly with the selected execution mode.
771772
772-
Assumes that parameters have been pre-fetched and validated, except for the :paramref:`payload`.
773+
Assumes that parameters have been pre-fetched and validated, except for the :paramref:`payload` containing the
774+
desired inputs and outputs from the :term:`Job`. The selected execution mode looks up the various combinations
775+
of headers and body parameters available across :term:`API` implementations and revisions.
773776
"""
774777
json_body = validate_job_schema(payload)
775778
db = get_db(settings)
@@ -820,58 +823,93 @@ def submit_job_handler(payload, # type: ProcessExecution
820823
store = db.get_store(StoreJobs) # type: StoreJobs
821824
job = store.save_job(task_id=job_status, process=process, service=provider_id,
822825
inputs=job_inputs, outputs=job_outputs, is_workflow=is_workflow, is_local=is_local,
823-
execute_async=is_execute_async, execute_response=exec_resp, execute_return=exec_return,
826+
execute_async=is_execute_async, execute_wait=wait,
827+
execute_response=exec_resp, execute_return=exec_return,
824828
custom_tags=tags, user_id=user, access=visibility, context=context, subscribers=subscribers,
825829
accept_type=accept_type, accept_language=language)
826830
job.save_log(logger=LOGGER, message=job_message, status=job_status, progress=0)
827-
831+
job.wps_url = wps_url
828832
job = store.update_job(job)
829-
location_url = job.status_url(settings)
833+
834+
return submit_job_dispatch_task(job, headers=req_headers, container=settings)
835+
836+
837+
def submit_job_dispatch_task(
838+
job, # type: Job
839+
*, # force named keyword arguments after
840+
container, # type: AnySettingsContainer
841+
headers=None, # type: AnyHeadersContainer
842+
): # type: (...) -> AnyResponseType
843+
"""
844+
Submits the :term:`Job` to the :mod:`celery` worker with provided parameters.
845+
846+
Assumes that parameters have been pre-fetched, validated, and can be resolved from the :term:`Job`.
847+
"""
848+
db = get_db(container)
849+
store = db.get_store(StoreJobs)
850+
851+
location_url = job.status_url(container)
830852
resp_headers = {"Location": location_url}
831-
resp_headers.update(applied)
853+
req_headers = copy.deepcopy(headers or {})
832854

833855
task_result = None # type: Optional[CeleryResult]
856+
job_pending_created = job.status == Status.CREATED
834857
if not job_pending_created:
835-
wps_url = clean_ows_url(service_url)
858+
wps_url = clean_ows_url(job.wps_url)
836859
task_result = execute_process.delay(job_id=job.id, wps_url=wps_url, headers=headers)
837860
LOGGER.debug("Celery pending task [%s] for job [%s].", task_result.id, job.id)
838-
if not job_pending_created and not is_execute_async:
839-
LOGGER.debug("Celery task requested as sync if it completes before (wait=%ss)", wait)
861+
862+
execute_sync = not job_pending_created and not job.execute_async
863+
if execute_sync:
864+
LOGGER.debug("Celery task requested as sync if it completes before (wait=%ss)", job.execution_wait)
840865
try:
841-
task_result.wait(timeout=wait)
866+
task_result.wait(timeout=job.execution_wait)
842867
except CeleryTaskTimeoutError:
843868
pass
844869
if task_result.ready():
845870
job = store.fetch_by_id(job.id)
846871
# when sync is successful, it must return the results direct instead of status info
847872
# see: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response
848873
if job.status == Status.SUCCEEDED:
874+
_, _, sync_applied = parse_prefer_header_execute_mode(req_headers, [ExecuteControlOption.SYNC])
875+
if sync_applied:
876+
resp_headers.update(sync_applied)
849877
return get_job_results_response(
850878
job,
851879
request_headers=req_headers,
852880
response_headers=resp_headers,
853-
container=settings,
881+
container=container,
854882
)
855883
# otherwise return the error status
856-
body = job.json(container=settings)
884+
body = job.json(container=container)
857885
body["location"] = location_url
858886
resp = get_job_submission_response(body, resp_headers, error=True)
859887
return resp
860888
else:
861-
LOGGER.debug("Celery task requested as sync took too long to complete (wait=%ss). Continue in async.", wait)
862-
# sync not respected, therefore must drop it
863-
# since both could be provided as alternative preferences, drop only async with limited subset
864-
prefer = get_header("Preference-Applied", headers, pop=True)
865-
_, _, async_applied = parse_prefer_header_execute_mode({"Prefer": prefer}, [ExecuteControlOption.ASYNC])
866-
if async_applied:
867-
resp_headers.update(async_applied)
889+
job.save_log(
890+
logger=LOGGER,
891+
level=logging.WARNING,
892+
message=(
893+
f"Job requested as synchronous execution took too long to complete (wait={job.execution_wait}s). "
894+
"Will resume with asynchronous execution."
895+
)
896+
)
897+
job = store.update_job(job)
898+
execute_sync = False
899+
900+
if not execute_sync:
901+
# either sync was not respected, therefore must drop it, or it was not requested at all
902+
# since both could be provided as alternative preferences, drop only sync with limited subset
903+
_, _, async_applied = parse_prefer_header_execute_mode(req_headers, [ExecuteControlOption.ASYNC])
904+
if async_applied:
905+
resp_headers.update(async_applied)
868906

869907
LOGGER.debug("Celery task submitted to run async.")
870908
body = {
871909
"jobID": job.id,
872910
"processID": job.process,
873-
"providerID": provider_id, # dropped by validator if not applicable
874-
"status": map_status(job_status),
911+
"providerID": job.service, # dropped by validator if not applicable
912+
"status": map_status(job.status),
875913
"location": location_url, # for convenience/backward compatibility, but official is Location *header*
876914
}
877915
resp_headers = update_preference_applied_return_header(job, req_headers, resp_headers)
@@ -893,7 +931,7 @@ def update_job_parameters(job, request):
893931
def validate_job_json(request):
894932
# type: (Request) -> JSON
895933
"""
896-
Validates that the request contains valid :term:`JSON` conctens, but not ncessary valid against expected schema.
934+
Validates that the request contains valid :term:`JSON` contents, but not necessary valid against expected schema.
897935
898936
.. seealso::
899937
:func:`validate_job_schema`

weaver/store/base.py

+1
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ def save_job(self,
175175
is_workflow=False, # type: bool
176176
is_local=False, # type: bool
177177
execute_async=True, # type: bool
178+
execute_wait=None, # type: Optional[int]
178179
execute_response=None, # type: Optional[AnyExecuteResponse]
179180
execute_return=None, # type: Optional[AnyExecuteReturnPreference]
180181
custom_tags=None, # type: Optional[List[str]]

weaver/store/mongodb.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import copy
55
import logging
66
import uuid
7-
from typing import TYPE_CHECKING
7+
from typing import TYPE_CHECKING, cast
88

99
import pymongo
1010
from pymongo.collation import Collation
@@ -791,6 +791,7 @@ def save_job(self,
791791
is_workflow=False, # type: bool
792792
is_local=False, # type: bool
793793
execute_async=True, # type: bool
794+
execute_wait=None, # type: Optional[int]
794795
execute_response=None, # type: Optional[AnyExecuteResponse]
795796
execute_return=None, # type: Optional[AnyExecuteReturnPreference]
796797
custom_tags=None, # type: Optional[List[str]]
@@ -812,10 +813,11 @@ def save_job(self,
812813
tags.append(ProcessType.WORKFLOW)
813814
else:
814815
tags.append(ProcessType.APPLICATION)
815-
if execute_async:
816-
tags.append(ExecuteMode.ASYNC)
816+
if execute_async in [None, False] and execute_wait:
817+
execute_mode = ExecuteMode.SYNC
817818
else:
818-
tags.append(ExecuteMode.SYNC)
819+
execute_mode = ExecuteMode.ASYNC
820+
tags.append(execute_mode)
819821
if not access:
820822
access = Visibility.PRIVATE
821823

@@ -829,7 +831,8 @@ def save_job(self,
829831
"inputs": inputs,
830832
"outputs": outputs,
831833
"status": map_status(Status.ACCEPTED),
832-
"execute_async": execute_async,
834+
"execution_mode": execute_mode,
835+
"execution_wait": execute_wait,
833836
"execution_response": execute_response,
834837
"execution_return": execute_return,
835838
"is_workflow": is_workflow,
@@ -1047,6 +1050,7 @@ def _find_jobs_grouped(self, pipeline, group_categories):
10471050
items = found[0]["items"]
10481051
# convert to Job object where applicable, since pipeline result contains (category, jobs, count)
10491052
items = [{k: (v if k != "jobs" else [Job(j) for j in v]) for k, v in i.items()} for i in items]
1053+
items = cast("JobGroupCategory", items)
10501054
if has_provider:
10511055
for group_result in items:
10521056
group_service = group_result["category"].pop("service", None)
@@ -1147,13 +1151,14 @@ def _apply_status_filter(status):
11471151
statuses = set()
11481152
for _status in status:
11491153
if _status in StatusCategory:
1150-
category_status = JOB_STATUS_CATEGORIES[StatusCategory[_status]]
1151-
statuses = statuses.union(category_status)
1154+
status_cat = StatusCategory.get(_status)
1155+
category_statuses = JOB_STATUS_CATEGORIES[status_cat]
1156+
statuses = statuses.union(category_statuses)
11521157
else:
11531158
statuses.add(_status)
11541159
search_filters["status"] = {"$in": list(statuses)} # type: ignore
11551160
elif status:
1156-
search_filters["status"] = status[0]
1161+
search_filters["status"] = str(status[0])
11571162
return search_filters
11581163

11591164
@staticmethod

weaver/wps_restapi/jobs/jobs.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,13 @@
2626
)
2727
from weaver.processes.constants import JobInputsOutputsSchema
2828
from weaver.processes.convert import convert_input_values_schema, convert_output_params_schema
29-
from weaver.processes.execution import submit_job, submit_job_dispatch_wps, submit_job_handler, update_job_parameters
29+
from weaver.processes.execution import (
30+
submit_job,
31+
submit_job_dispatch_task,
32+
submit_job_dispatch_wps,
33+
submit_job_handler,
34+
update_job_parameters
35+
)
3036
from weaver.processes.utils import get_process
3137
from weaver.processes.wps_package import mask_process_inputs
3238
from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory
@@ -285,7 +291,7 @@ def trigger_job_execution(request):
285291
raise_job_bad_status_locked(job, request)
286292
# FIXME: reuse job, adjust function or map parameters from attributes
287293
# FIXME: alt 202 code for accepted on async when triggered this way
288-
return submit_job_handler(request, job)
294+
return submit_job_dispatch_task(job, container=request)
289295

290296

291297
@sd.provider_job_service.get(

weaver/wps_restapi/jobs/utils.py

-1
Original file line numberDiff line numberDiff line change
@@ -1101,7 +1101,6 @@ def get_job_submission_response(body, headers, error=False):
11011101
"Execution should begin when resources are available."
11021102
)
11031103
body = sd.CreatedJobStatusSchema().deserialize(body)
1104-
headers.setdefault("Location", body["location"])
11051104
return HTTPCreated(json=body, headerlist=headers)
11061105

11071106

0 commit comments

Comments
 (0)