Skip to content

Commit 7ed29c4

Browse files
committed
fix CWL namespace variable schema URI validation
1 parent b96b956 commit 7ed29c4

File tree

5 files changed

+118
-34
lines changed

5 files changed

+118
-34
lines changed

tests/wps_restapi/test_processes.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from weaver.formats import AcceptLanguage, ContentType, get_cwl_file_format
4242
from weaver.processes.builtin import register_builtin_processes
4343
from weaver.processes.constants import (
44+
CWL_NAMESPACE_WEAVER,
4445
CWL_REQUIREMENT_APP_DOCKER,
4546
CWL_REQUIREMENT_APP_OGC_API,
4647
CWL_REQUIREMENT_APP_WPS1,
@@ -1545,7 +1546,7 @@ def validate_ogcapi_process_description(self, process_description, process_id, r
15451546
ref = self.get_application_package(remote_process)
15461547
pkg = self.get_application_package(process_id)
15471548
assert pkg["hints"] == {
1548-
"OGCAPIRequirement": {
1549+
f"{CWL_NAMESPACE_WEAVER}:{CWL_REQUIREMENT_APP_OGC_API}": {
15491550
"process": ref_url
15501551
}
15511552
}

tests/wps_restapi/test_swagger_definitions.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -64,23 +64,52 @@ def test_process_id_with_version_tag_get_valid():
6464
{IANA_NAMESPACE: IANA_NAMESPACE_URL, CWL_NAMESPACE: CWL_NAMESPACE_URL},
6565
{CWL_NAMESPACE: CWL_NAMESPACE_URL, CWL_NAMESPACE_WEAVER: CWL_NAMESPACE_WEAVER_URL},
6666
{CWL_NAMESPACE: CWL_NAMESPACE_URL, "random": "https://random.com"},
67+
{
68+
CWL_NAMESPACE: CWL_NAMESPACE_URL,
69+
"random": "https://random.com",
70+
"another": "https://another.com#", # ensure no '/' does not cause an error (fails with URL regex, needs '/#')
71+
"slashed": "https://another.com/#",
72+
"object": "https://another.com/schema#object",
73+
},
6774
])
68-
def test_cwl_namespaces(test_value):
75+
def test_cwl_namespaces_valid(test_value):
6976
result = sd.CWLNamespaces().deserialize(test_value)
7077
assert result == test_value
7178

7279

7380
@pytest.mark.parametrize("test_value", [
7481
{CWL_NAMESPACE: "bad"},
7582
{CWL_NAMESPACE: EDAM_NAMESPACE_URL},
83+
{CWL_NAMESPACE_WEAVER: "https://random.com"}, # disallow conflict with well-known namespaces, even if URI is valid
7684
{"random": "bad"},
7785
{"random": 12345},
86+
{"bad": "bad", "good": "https://random.com"}, # disallow partial mapping even if other URI are valid
7887
])
79-
def test_cwl_namespaces_invalid_url(test_value):
88+
def test_cwl_namespaces_invalid_uri(test_value):
8089
with pytest.raises(colander.Invalid):
8190
sd.CWLNamespaces().deserialize(test_value)
8291

8392

93+
@pytest.mark.parametrize(["test_value", "expect_result"], [
94+
# literal property name under class definition
95+
({"var": "bad"}, colander.Invalid),
96+
({"var": "https://random.com"}, {"var": "https://random.com"}), # valid
97+
# name under 'variable' keyword passed as argument
98+
({"{namespace}": "bad"}, colander.Invalid),
99+
({"{namespace}": "https://random.com"}, {"{namespace}": "https://random.com"}), # valid
100+
])
101+
def test_cwl_namespaces_var_not_conflict_namespace_name(test_value, expect_result):
102+
assert any(getattr(field, "variable", None) is not None for field in sd.CWLNamespaces().children), (
103+
"Requirement not met for test. Field 'variable' expected to be defined in schema definition."
104+
)
105+
if expect_result is colander.Invalid:
106+
with pytest.raises(colander.Invalid):
107+
sd.CWLNamespaces().deserialize(test_value)
108+
else:
109+
result = sd.CWLNamespaces().deserialize(test_value)
110+
assert result == expect_result
111+
112+
84113
@pytest.mark.parametrize(
85114
["test_data", "expect_result"],
86115
[

weaver/datatype.py

+20-4
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,14 @@
3636
from weaver.exceptions import ProcessInstanceError, ServiceParsingError
3737
from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteResponse, ExecuteTransmissionMode
3838
from weaver.formats import AcceptLanguage, ContentType, repr_json
39-
from weaver.processes.constants import CWL_REQUIREMENT_APP_DOCKER, CWL_REQUIREMENT_APP_OGC_API, ProcessSchema
39+
from weaver.processes.constants import (
40+
CWL_NAMESPACE_WEAVER,
41+
CWL_REQUIREMENT_APP_DOCKER,
42+
CWL_REQUIREMENT_APP_DOCKER_GPU,
43+
CWL_REQUIREMENT_APP_OGC_API,
44+
CWL_REQUIREMENT_APP_WPS1,
45+
ProcessSchema
46+
)
4047
from weaver.processes.convert import get_field, json2oas_io, normalize_ordered_io, null, ows2json, wps2json_io
4148
from weaver.processes.types import ProcessType
4249
from weaver.quotation.status import QuoteStatus
@@ -2184,11 +2191,20 @@ def deployment_profile(self):
21842191

21852192
if cls == ProcessType.WORKFLOW:
21862193
profile = f"{base}workflow"
2187-
elif ProcessType.is_wps(typ):
2194+
elif ProcessType.is_wps(typ) or req in [
2195+
CWL_REQUIREMENT_APP_WPS1,
2196+
f"{CWL_NAMESPACE_WEAVER}:{CWL_REQUIREMENT_APP_WPS1}",
2197+
]:
21882198
profile = f"{base}wpsApplication"
2189-
elif typ == ProcessType.OGC_API or req == CWL_REQUIREMENT_APP_OGC_API:
2199+
elif typ == ProcessType.OGC_API or req in [
2200+
CWL_REQUIREMENT_APP_OGC_API,
2201+
f"{CWL_NAMESPACE_WEAVER}:{CWL_REQUIREMENT_APP_OGC_API}",
2202+
]:
21902203
profile = f"{base}ogcapiApplication"
2191-
elif typ == ProcessType.APPLICATION or req == CWL_REQUIREMENT_APP_DOCKER:
2204+
elif typ == ProcessType.APPLICATION or req in [
2205+
CWL_REQUIREMENT_APP_DOCKER,
2206+
CWL_REQUIREMENT_APP_DOCKER_GPU,
2207+
]:
21922208
profile = f"{base}dockerizedApplication"
21932209
else:
21942210
profile = base + typ

weaver/wps_restapi/colander_extras.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
of generated OpenAPI model definitions. If not explicitly provided, the
5252
value of ``title`` **WILL** default to the name of the schema node class.
5353
"""
54+
import copy
5455
import inspect
5556
import re
5657
import uuid
@@ -134,7 +135,7 @@
134135
URL = colander.Regex(URL_REGEX, msg=colander._("Must be a URL"), flags=re.IGNORECASE)
135136
FILE_URL_REGEX = colander.URI_REGEX.replace(r"://", r"://(?!//)")
136137
FILE_URI = colander.Regex(FILE_URL_REGEX, msg=colander._("Must be a file:// URI scheme"), flags=re.IGNORECASE)
137-
URI_REGEX = rf"{colander.URL_REGEX[:-1]}(?:#?|[#?]\S+)$"
138+
URI_REGEX = rf"{URL_REGEX[:-1]}(?:#?|[#?]\S+)$"
138139
URI = colander.Regex(URI_REGEX, msg=colander._("Must be a URI"), flags=re.IGNORECASE)
139140
STRING_FORMATTERS.update({
140141
"uri": {"converter": BaseStringTypeConverter, "validator": URI},
@@ -1025,6 +1026,9 @@ def _check_deserialize(node, cstruct):
10251026
def _deserialize_remap(node, cstruct, var_map, var_name, has_const_child):
10261027
invalid_var = colander.Invalid(node, value=var_map)
10271028
try:
1029+
# ensure to use a copy to avoid modifying a structure passed down to here since we pop variable-mapped keys
1030+
cstruct = copy.deepcopy(cstruct)
1031+
10281032
# Substitute real keys with matched variables to run full deserialize so
10291033
# that mapping can find nodes name against attribute names, then re-apply originals.
10301034
# We must do this as non-variable sub-schemas could be present, and we must also
@@ -1072,7 +1076,9 @@ def _deserialize_remap(node, cstruct, var_map, var_name, has_const_child):
10721076
if mapped is None and node.missing is colander.required:
10731077
raise colander.Invalid(node, value=cstruct)
10741078
for var_mapped in mapped:
1075-
result[var_mapped["name"]] = var_mapped["cstruct"]
1079+
# variable schema validation failed, but it is not marked as 'required'
1080+
if var_mapped["cstruct"] not in [colander.drop, colander.null]:
1081+
result[var_mapped["name"]] = var_mapped["cstruct"]
10761082
except colander.Invalid as invalid:
10771083
if invalid.msg:
10781084
invalid_var.msg = invalid.msg
@@ -1100,7 +1106,7 @@ def _deserialize_impl(self, cstruct):
11001106
var_map_invalid = {} # type: Dict[str, colander.Invalid]
11011107
for var_child in var_children:
11021108
var = getattr(var_child, self._variable, None)
1103-
var_map[var] = []
1109+
var_map.setdefault(var, [])
11041110
var_msg = f"Requirement not met under variable: {var}."
11051111
var_map_invalid[var] = colander.Invalid(node=self, msg=var_msg, value=cstruct)
11061112
# attempt to find any sub-node matching the sub-schema under variable
@@ -1135,10 +1141,10 @@ def _deserialize_impl(self, cstruct):
11351141
# use position as tested child field name for later reference by invalid schema
11361142
var_map_invalid[var].add(invalid, pos=child_key)
11371143

1138-
var_val = var_map.get(var, colander.null)
1139-
if var_val is colander.null:
1140-
# allow unmatched variable item under mapping if it is not required
1141-
if var_child.missing is colander.drop:
1144+
var_mapped = var_map.get(var, [])
1145+
if not var_mapped:
1146+
# allow unmatched/unprovided variable item under mapping if it is not required
1147+
if var_child.missing in [colander.drop, colander.null]:
11421148
continue
11431149
# if required, don't waste more time doing lookup
11441150
# fail immediately since this variable schema is missing

weaver/wps_restapi/swagger_definitions.py

+52-20
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@
8989
from weaver.visibility import Visibility
9090
from weaver.wps_restapi.colander_extras import (
9191
NO_DOUBLE_SLASH_PATTERN,
92-
URI,
9392
AllOfKeywordSchema,
9493
AnyOfKeywordSchema,
9594
BoundedRange,
@@ -387,11 +386,31 @@ class Tag(ExtendedSchemaNode):
387386

388387

389388
class URL(ExtendedSchemaNode):
389+
"""
390+
String format that will be automatically mapped to a URL-pattern validator.
391+
392+
.. seealso::
393+
- :data:`weaver.wps_restapi.colander_extras.URL`
394+
- :class:`weaver.wps_restapi.colander_extras.ExtendedSchemaBase`
395+
"""
390396
schema_type = String
391397
description = "URL reference."
392398
format = "url"
393399

394400

401+
class URI(ExtendedSchemaNode):
402+
"""
403+
String format that will be automatically mapped to a URI-pattern validator.
404+
405+
.. seealso::
406+
- :data:`weaver.wps_restapi.colander_extras.URI`
407+
- :class:`weaver.wps_restapi.colander_extras.ExtendedSchemaBase`
408+
"""
409+
schema_type = String
410+
description = "URI reference."
411+
format = "uri"
412+
413+
395414
class Email(ExtendedSchemaNode):
396415
schema_type = String
397416
description = "Email recipient."
@@ -4990,50 +5009,63 @@ class CWLBase(ExtendedMappingSchema):
49905009
cwlVersion = CWLVersion()
49915010

49925011

4993-
class CWLNamespaces(ExtendedMappingSchema):
5012+
class CWLNamespaces(StrictMappingSchema):
5013+
"""
5014+
Mapping of :term:`CWL` namespace definitions for shorthand notation.
5015+
5016+
.. note::
5017+
Use a combination of `strict` mapping and ``variable`` (see ``var`` field) such that any additional namespace
5018+
other than the ones explicitly listed are allowed, but if provided, they must succeed URI validation minimally.
5019+
If no additional namespace is provided, including none at all, the mapping definition remains valid because
5020+
of ``missing=drop`` under ``var``. If a URI is invalid for additional namespaces, the failing validation causes
5021+
the property to be unmapped to the variable, which leads to an ``"unknown"`` property raised by the `strict`
5022+
mapping. For explicit URI definitions, the specific URI combinations provided must be matched exactly to
5023+
succeed. This ensures that no invalid mapping gets applied for commonly-known URI namespaces.
5024+
"""
49945025
name = "$namespaces"
49955026
title = "CWL Namespaces Mapping"
4996-
cwl = URL(
5027+
description = "Mapping of CWL namespace definitions for shorthand notation."
5028+
var = URI(variable="{namespace}", missing=drop)
5029+
cwl = URI(
49975030
missing=drop,
49985031
name=CWL_NAMESPACE,
4999-
validator=OneOf([CWL_NAMESPACE_URL]),
5032+
validator=OneOf([CWL_NAMESPACE_URL, CWL_NAMESPACE_URL.rstrip("#")]),
50005033
)
5001-
cwltool = URL(
5034+
cwltool = URI(
50025035
missing=drop,
50035036
name=CWL_NAMESPACE_CWLTOOL,
5004-
validator=OneOf([CWL_NAMESPACE_CWLTOOL_URL]),
5037+
validator=OneOf([CWL_NAMESPACE_CWLTOOL_URL, CWL_NAMESPACE_CWLTOOL_URL.rstrip("#")]),
50055038
)
5006-
edam = URL(
5039+
edam = URI(
50075040
missing=drop,
50085041
name=EDAM_NAMESPACE,
5009-
validator=OneOf([EDAM_NAMESPACE_URL]),
5042+
validator=OneOf([EDAM_NAMESPACE_URL, EDAM_NAMESPACE_URL.rstrip("#")]),
50105043
)
5011-
iana = URL(
5044+
iana = URI(
50125045
missing=drop,
50135046
name=IANA_NAMESPACE,
5014-
validator=OneOf([IANA_NAMESPACE_URL]),
5047+
validator=OneOf([IANA_NAMESPACE_URL, IANA_NAMESPACE_URL.rstrip("#")]),
50155048
)
5016-
ogc = URL(
5049+
ogc = URI(
50175050
missing=drop,
50185051
name=OGC_NAMESPACE,
5019-
validator=OneOf([OGC_NAMESPACE_URL]),
5052+
validator=OneOf([OGC_NAMESPACE_URL, OGC_NAMESPACE_URL.rstrip("#")]),
50205053
)
5021-
opengis = URL(
5054+
opengis = URI(
50225055
missing=drop,
50235056
name=OPENGIS_NAMESPACE,
5024-
validator=OneOf([OPENGIS_NAMESPACE_URL]),
5057+
validator=OneOf([OPENGIS_NAMESPACE_URL, OPENGIS_NAMESPACE_URL.rstrip("#")]),
50255058
)
5026-
s = URL(
5059+
s = URI(
50275060
missing=drop,
50285061
name=CWL_NAMESPACE_SCHEMA,
5029-
validator=OneOf([CWL_NAMESPACE_SCHEMA_URL]),
5062+
validator=OneOf([CWL_NAMESPACE_SCHEMA_URL, CWL_NAMESPACE_SCHEMA_URL.rstrip("#")]),
50305063
)
5031-
weaver = URL(
5064+
weaver = URI(
50325065
missing=drop,
50335066
name=CWL_NAMESPACE_WEAVER,
5034-
validator=OneOf([CWL_NAMESPACE_WEAVER_URL]),
5067+
validator=OneOf([CWL_NAMESPACE_WEAVER_URL, CWL_NAMESPACE_WEAVER_URL.rstrip("#")]),
50355068
)
5036-
var = URL(variable="{namespace}", missing=drop)
50375069

50385070

50395071
class CWLSchemas(ExtendedSequenceSchema):
@@ -6096,7 +6128,7 @@ class ErrorJsonResponseBodySchema(ExtendedMappingSchema):
60966128
cause = ErrorCause(missing=drop)
60976129
value = ErrorCause(missing=drop)
60986130
error = ErrorDetail(missing=drop)
6099-
instance = ExtendedSchemaNode(String(), validator=URI, missing=drop)
6131+
instance = URI(missing=drop)
61006132
exception = OWSExceptionResponse(missing=drop)
61016133

61026134

0 commit comments

Comments
 (0)