Skip to content

Commit e909288

Browse files
authored
Merge pull request #173 from opossum-tool/feat-opossum-file-parsing
feat: introduce opossum files to CLI
2 parents bc5054a + ec9650d commit e909288

17 files changed

+776
-153
lines changed

src/opossum_lib/cli.py

+32-10
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import click
1313

1414
from opossum_lib.opossum.file_generation import write_opossum_information_to_file
15+
from opossum_lib.opossum.opossum_file import OpossumInformation
16+
from opossum_lib.opossum.read_opossum_file import read_opossum_file
1517
from opossum_lib.spdx.convert_to_opossum import convert_spdx_to_opossum_information
1618

1719

@@ -27,6 +29,12 @@ def opossum_file() -> None:
2729
multiple=True,
2830
type=click.Path(exists=True),
2931
)
32+
@click.option(
33+
"--opossum",
34+
help="opossum files used as input.",
35+
multiple=True,
36+
type=click.Path(exists=True),
37+
)
3038
@click.option(
3139
"--outfile",
3240
"-o",
@@ -35,23 +43,16 @@ def opossum_file() -> None:
3543
help="The file path to write the generated opossum document to. "
3644
'If appropriate, the extension ".opossum" will be appended.',
3745
)
38-
def generate(spdx: list[str], outfile: str) -> None:
46+
def generate(spdx: list[str], opossum: list[str], outfile: str) -> None:
3947
"""
4048
Generate an Opossum file from various other file formats.
4149
4250
\b
4351
Currently supported input formats:
4452
- SPDX
4553
"""
46-
if len(spdx) == 0:
47-
logging.warning("No input provided. Exiting.")
48-
sys.exit(1)
49-
if len(spdx) > 1:
50-
logging.error("Merging of multiple SPDX files not yet supported!")
51-
sys.exit(1)
52-
53-
the_spdx_file = spdx[0]
54-
opossum_information = convert_spdx_to_opossum_information(the_spdx_file)
54+
validate_input_exit_on_error(spdx, opossum)
55+
opossum_information = convert_after_valid_input(spdx, opossum)
5556

5657
if not outfile.endswith(".opossum"):
5758
outfile += ".opossum"
@@ -62,5 +63,26 @@ def generate(spdx: list[str], outfile: str) -> None:
6263
write_opossum_information_to_file(opossum_information, Path(outfile))
6364

6465

66+
def validate_input_exit_on_error(spdx: list[str], opossum: list[str]) -> None:
67+
total_number_of_files = len(spdx) + len(opossum)
68+
if total_number_of_files == 0:
69+
logging.warning("No input provided. Exiting.")
70+
sys.exit(1)
71+
if total_number_of_files > 1:
72+
logging.error("Merging of multiple files not yet supported!")
73+
sys.exit(1)
74+
75+
76+
def convert_after_valid_input(
77+
spdx: list[str], opossum_files: list[str]
78+
) -> OpossumInformation:
79+
if len(spdx) == 1:
80+
the_spdx_file = spdx[0]
81+
return convert_spdx_to_opossum_information(the_spdx_file)
82+
else:
83+
opossum_input_file = opossum_files[0]
84+
return read_opossum_file(opossum_input_file)
85+
86+
6587
if __name__ == "__main__":
6688
opossum_file()

src/opossum_lib/opossum/constants.py

+2
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44
COMPRESSION_LEVEL = 5
5+
INPUT_JSON_NAME = "input.json"
6+
OUTPUT_JSON_NAME = "output.json"
+9-45
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,14 @@
11
# SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com>
22
#
33
# SPDX-License-Identifier: Apache-2.0
4-
import json
5-
from dataclasses import fields
64
from pathlib import Path
75
from zipfile import ZIP_DEFLATED, ZipFile
86

9-
from opossum_lib.opossum.constants import COMPRESSION_LEVEL
7+
from pydantic import TypeAdapter
8+
9+
from opossum_lib.opossum.constants import COMPRESSION_LEVEL, INPUT_JSON_NAME
1010
from opossum_lib.opossum.opossum_file import (
11-
ExternalAttributionSource,
12-
Metadata,
1311
OpossumInformation,
14-
OpossumPackage,
15-
Resource,
16-
SourceInfo,
1712
)
1813

1914

@@ -23,40 +18,9 @@ def write_opossum_information_to_file(
2318
with ZipFile(
2419
file_path, "w", compression=ZIP_DEFLATED, compresslevel=COMPRESSION_LEVEL
2520
) as z:
26-
z.writestr("input.json", json.dumps(to_dict(opossum_information), indent=4))
27-
28-
29-
def to_dict(
30-
element: Resource
31-
| Metadata
32-
| OpossumPackage
33-
| OpossumInformation
34-
| SourceInfo
35-
| ExternalAttributionSource
36-
| str
37-
| int
38-
| bool
39-
| dict[str, OpossumPackage]
40-
| dict[str, list[str]]
41-
| list[str]
42-
| None,
43-
) -> dict | str | list[str] | bool | int | None:
44-
if isinstance(element, Resource):
45-
return element.to_dict()
46-
if isinstance(
47-
element,
48-
Metadata
49-
| OpossumPackage
50-
| OpossumInformation
51-
| SourceInfo
52-
| ExternalAttributionSource,
53-
):
54-
result = []
55-
for f in fields(element):
56-
value = to_dict(getattr(element, f.name))
57-
result.append((f.name, value))
58-
return {k: v for (k, v) in result if v is not None}
59-
elif isinstance(element, dict):
60-
return {k: to_dict(v) for k, v in element.items()}
61-
else:
62-
return element
21+
z.writestr(
22+
INPUT_JSON_NAME,
23+
TypeAdapter(OpossumInformation).dump_json(
24+
opossum_information, indent=4, exclude_none=True
25+
),
26+
)

src/opossum_lib/opossum/merger.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
Resource,
1010
ResourcePath,
1111
ResourceType,
12+
convert_resource_in_file_to_resource,
1213
)
1314

1415

@@ -23,10 +24,10 @@ def merge_opossum_information(
2324
metadata=expanded_opossum_information[0].metadata,
2425
resources=_merge_resources(
2526
[
26-
opossum_information.resources
27+
convert_resource_in_file_to_resource(opossum_information.resources)
2728
for opossum_information in expanded_opossum_information
2829
]
29-
),
30+
).convert_to_file_resource(),
3031
externalAttributions=_merge_dicts_without_duplicates(
3132
[
3233
opossum_information.externalAttributions

src/opossum_lib/opossum/opossum_file.py

+57-8
Original file line numberDiff line numberDiff line change
@@ -6,30 +6,52 @@
66
from copy import deepcopy
77
from dataclasses import field
88
from enum import Enum, auto
9-
from typing import Literal
9+
from typing import Literal, cast
1010

11+
from pydantic import BaseModel, ConfigDict, model_serializer
1112
from pydantic.dataclasses import dataclass
1213

13-
OpossumPackageIdentifier = str
14-
ResourcePath = str
14+
type OpossumPackageIdentifier = str
15+
type ResourcePath = str
16+
type ResourceInFile = dict[str, ResourceInFile] | int
1517

1618

1719
@dataclass(frozen=True)
1820
class OpossumInformation:
1921
metadata: Metadata
20-
resources: Resource
22+
resources: ResourceInFile
2123
externalAttributions: dict[OpossumPackageIdentifier, OpossumPackage]
2224
resourcesToAttributions: dict[ResourcePath, list[OpossumPackageIdentifier]]
2325
attributionBreakpoints: list[str] = field(default_factory=list)
2426
externalAttributionSources: dict[str, ExternalAttributionSource] = field(
2527
default_factory=dict
2628
)
29+
frequentLicenses: list[FrequentLicense] | None = None
30+
filesWithChildren: list[str] | None = None
31+
baseUrlsForSources: BaseUrlsForSources | None = None
32+
33+
34+
class BaseUrlsForSources(BaseModel):
35+
@model_serializer
36+
def serialize(self) -> dict:
37+
# hack to override not serializing keys with corresponding value none:
38+
# In this case this is valid and should be part of the serialization
39+
return {k: v for k, v in self}
40+
41+
model_config = ConfigDict(extra="allow", frozen=True)
42+
43+
44+
class FrequentLicense(BaseModel):
45+
fullName: str
46+
shortName: str
47+
defaultText: str
2748

2849

2950
@dataclass(frozen=True)
3051
class SourceInfo:
3152
name: str
32-
documentConfidence: int | None = 0
53+
documentConfidence: int | float | None = 0
54+
additionalName: str | None = None
3355

3456

3557
@dataclass(frozen=True)
@@ -51,11 +73,13 @@ class OpossumPackage:
5173
preSelected: bool | None = None
5274
followUp: Literal["FOLLOW_UP"] | None = None
5375
originId: str | None = None
76+
originIds: list[str] | None = None
5477
criticality: Literal["high"] | Literal["medium"] | None = None
78+
wasPreferred: bool | None = None
5579

5680

57-
@dataclass(frozen=True)
58-
class Metadata:
81+
class Metadata(BaseModel):
82+
model_config = ConfigDict(extra="allow", frozen=True)
5983
projectId: str
6084
fileCreationDate: str
6185
projectTitle: str
@@ -123,7 +147,7 @@ def drop_element(
123147

124148
return resource
125149

126-
def to_dict(self) -> int | dict:
150+
def to_dict(self) -> ResourceInFile:
127151
if not self.has_children():
128152
if self.type == ResourceType.FOLDER:
129153
return {}
@@ -154,8 +178,33 @@ def get_paths_of_all_leaf_nodes_with_types(
154178
def has_children(self) -> bool:
155179
return len(self.children) > 0
156180

181+
def convert_to_file_resource(self) -> ResourceInFile:
182+
return self.to_dict()
183+
157184

158185
@dataclass(frozen=True)
159186
class ExternalAttributionSource:
160187
name: str
161188
priority: int
189+
isRelevantForPreferred: bool | None = None
190+
191+
192+
def _build_resource_tree(resource: ResourceInFile) -> Resource:
193+
if isinstance(resource, int):
194+
return Resource(type=ResourceType.FILE)
195+
else:
196+
result = Resource(type=ResourceType.FOLDER)
197+
for name, child_resource in resource.items():
198+
result.children[name] = _build_resource_tree(child_resource)
199+
return result
200+
201+
202+
def convert_resource_in_file_to_resource(resource: ResourceInFile) -> Resource:
203+
root_node = Resource(ResourceType.TOP_LEVEL)
204+
205+
if isinstance(resource, dict):
206+
dict_resource = cast(dict[str, ResourceInFile], resource)
207+
for name, child_resource in dict_resource.items():
208+
root_node.children[name] = _build_resource_tree(child_resource)
209+
210+
return root_node
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
import json
5+
import logging
6+
import sys
7+
from zipfile import ZipFile
8+
9+
from pydantic import TypeAdapter
10+
11+
from opossum_lib.opossum.constants import INPUT_JSON_NAME, OUTPUT_JSON_NAME
12+
from opossum_lib.opossum.opossum_file import (
13+
OpossumInformation,
14+
)
15+
16+
17+
def read_opossum_file(filename: str) -> OpossumInformation:
18+
logging.info(f"Converting opossum to opossum {filename}")
19+
20+
try:
21+
with (
22+
ZipFile(filename, "r") as input_zip_file,
23+
):
24+
validate_zip_file_contents(input_zip_file)
25+
with input_zip_file.open(INPUT_JSON_NAME) as input_json_file:
26+
input_json = json.load(input_json_file)
27+
return TypeAdapter(OpossumInformation).validate_python(input_json)
28+
except Exception as e:
29+
# handle the exception
30+
print(f"Error reading file {filename}: {e}")
31+
sys.exit(1)
32+
33+
34+
def validate_zip_file_contents(input_zip_file: ZipFile) -> None:
35+
if INPUT_JSON_NAME not in input_zip_file.namelist():
36+
logging.error(
37+
f"Opossum file {input_zip_file.filename} is corrupt"
38+
f" and does not contain '{INPUT_JSON_NAME}'"
39+
)
40+
sys.exit(1)
41+
if OUTPUT_JSON_NAME in input_zip_file.namelist():
42+
logging.error(
43+
f"Opossum file {input_zip_file.filename} also contains"
44+
f" '{OUTPUT_JSON_NAME}' which cannot be processed"
45+
)
46+
sys.exit(1)

src/opossum_lib/spdx/convert_to_opossum.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949

5050

5151
def convert_spdx_to_opossum_information(filename: str) -> OpossumInformation:
52+
logging.info(f"Converting {filename} to opossum information.")
5253
try:
5354
document: SpdxDocument = parse_file(filename)
5455

@@ -116,7 +117,7 @@ def convert_tree_to_opossum_information(tree: DiGraph) -> OpossumInformation:
116117

117118
opossum_information = OpossumInformation(
118119
metadata=metadata,
119-
resources=resources,
120+
resources=resources.convert_to_file_resource(),
120121
externalAttributions=external_attributions,
121122
resourcesToAttributions=resources_to_attributions,
122123
attributionBreakpoints=attribution_breakpoints,
@@ -170,5 +171,9 @@ def create_attribution_and_link_with_resource(
170171
def create_metadata(tree: DiGraph) -> Metadata:
171172
doc_name = tree.nodes["SPDXRef-DOCUMENT"]["element"].name
172173
created = tree.nodes["SPDXRef-DOCUMENT"]["element"].created
173-
metadata = Metadata(str(uuid.uuid4()), created.isoformat(), doc_name)
174+
metadata = Metadata(
175+
projectId=str(uuid.uuid4()),
176+
fileCreationDate=created.isoformat(),
177+
projectTitle=doc_name,
178+
)
174179
return metadata

0 commit comments

Comments
 (0)