Skip to content

Commit 10b16d6

Browse files
authored
Merge pull request #192 from opossum-tool/refactor-consistent-model-pipeline
refactor: introduce new internal representation for Opossum files
2 parents 4812d5b + ddf4130 commit 10b16d6

File tree

6 files changed

+357
-292
lines changed

6 files changed

+357
-292
lines changed

src/opossum_lib/opossum_model.py

+310
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
# SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from __future__ import annotations
6+
7+
import uuid
8+
from collections import defaultdict
9+
from collections.abc import Iterable
10+
from copy import deepcopy
11+
from dataclasses import field
12+
from enum import Enum, auto
13+
from pathlib import PurePath
14+
from typing import Literal
15+
16+
from pydantic import BaseModel, ConfigDict
17+
18+
import opossum_lib.opossum.opossum_file as opossum_file
19+
from opossum_lib.opossum.opossum_file_content import OpossumFileContent
20+
from opossum_lib.opossum.output_model import OpossumOutputFile
21+
22+
type OpossumPackageIdentifier = str
23+
type ResourcePath = str
24+
25+
26+
def _convert_path_to_str(path: PurePath) -> str:
27+
return str(path).replace("\\", "/")
28+
29+
30+
def default_attribution_id_mapper() -> dict[OpossumPackage, str]:
31+
return defaultdict(lambda: str(uuid.uuid4()))
32+
33+
34+
class Opossum(BaseModel):
35+
model_config = ConfigDict(frozen=True, extra="forbid")
36+
scan_results: ScanResults
37+
review_results: OpossumOutputFile | None = None
38+
39+
def to_opossum_file_format(self) -> OpossumFileContent:
40+
return OpossumFileContent(
41+
input_file=self.scan_results.to_opossum_file_format(),
42+
output_file=self.review_results,
43+
)
44+
45+
46+
class ScanResults(BaseModel):
47+
model_config = ConfigDict(frozen=True, extra="forbid")
48+
metadata: Metadata
49+
resources: list[Resource]
50+
attribution_breakpoints: list[str] = []
51+
external_attribution_sources: dict[str, ExternalAttributionSource] = {}
52+
frequent_licenses: list[FrequentLicense] | None = None
53+
files_with_children: list[str] | None = None
54+
base_urls_for_sources: BaseUrlsForSources | None = None
55+
attribution_to_id: dict[OpossumPackage, str] = field(
56+
default_factory=default_attribution_id_mapper
57+
)
58+
59+
def to_opossum_file_format(self) -> opossum_file.OpossumInformation:
60+
external_attributions, resources_to_attributions = (
61+
self.create_attribution_mapping(self.resources)
62+
)
63+
frequent_licenses = None
64+
if self.frequent_licenses:
65+
frequent_licenses = [
66+
license.to_opossum_file_format() for license in self.frequent_licenses
67+
]
68+
base_urls_for_sources = (
69+
self.base_urls_for_sources
70+
and self.base_urls_for_sources.to_opossum_file_format()
71+
)
72+
73+
external_attribution_sources = {
74+
key: val.to_opossum_file_format()
75+
for (key, val) in self.external_attribution_sources.items()
76+
}
77+
78+
return opossum_file.OpossumInformation(
79+
metadata=self.metadata.to_opossum_file_format(),
80+
resources={
81+
str(resource.path): resource.to_opossum_file_format()
82+
for resource in self.resources
83+
},
84+
external_attributions=external_attributions,
85+
resources_to_attributions=resources_to_attributions,
86+
attribution_breakpoints=deepcopy(self.attribution_breakpoints),
87+
external_attribution_sources=external_attribution_sources,
88+
frequent_licenses=frequent_licenses,
89+
files_with_children=deepcopy(self.files_with_children),
90+
base_urls_for_sources=base_urls_for_sources,
91+
)
92+
93+
def create_attribution_mapping(
94+
self,
95+
root_nodes: list[Resource],
96+
) -> tuple[
97+
dict[opossum_file.OpossumPackageIdentifier, opossum_file.OpossumPackage],
98+
dict[opossum_file.ResourcePath, list[opossum_file.OpossumPackageIdentifier]],
99+
]:
100+
external_attributions: dict[
101+
opossum_file.OpossumPackageIdentifier, opossum_file.OpossumPackage
102+
] = {}
103+
resources_to_attributions: dict[
104+
opossum_file.ResourcePath, list[opossum_file.OpossumPackageIdentifier]
105+
] = {}
106+
107+
def process_node(node: Resource) -> None:
108+
path = _convert_path_to_str(node.path)
109+
if not path.startswith("/"):
110+
# the / is required by OpossumUI
111+
path = "/" + path
112+
113+
node_attributions_by_id = {
114+
self.get_attribution_key(a): a.to_opossum_file_format()
115+
for a in node.attributions
116+
}
117+
external_attributions.update(node_attributions_by_id)
118+
119+
if len(node_attributions_by_id) > 0:
120+
resources_to_attributions[path] = list(node_attributions_by_id.keys())
121+
122+
for child in node.children.values():
123+
process_node(child)
124+
125+
for root in root_nodes:
126+
process_node(root)
127+
128+
return external_attributions, resources_to_attributions
129+
130+
def get_attribution_key(
131+
self, attribution: OpossumPackage
132+
) -> OpossumPackageIdentifier:
133+
id = self.attribution_to_id[attribution]
134+
self.attribution_to_id[attribution] = id
135+
return id
136+
137+
138+
class ResourceType(Enum):
139+
FILE = auto()
140+
FOLDER = auto()
141+
142+
143+
class Resource(BaseModel):
144+
model_config = ConfigDict(frozen=False, extra="forbid")
145+
path: PurePath
146+
type: ResourceType | None = None
147+
attributions: list[OpossumPackage] = []
148+
children: dict[str, Resource] = {}
149+
150+
def to_opossum_file_format(self) -> opossum_file.ResourceInFile:
151+
if self.children or self.type == ResourceType.FOLDER:
152+
return {
153+
_convert_path_to_str(
154+
child.path.relative_to(self.path)
155+
): child.to_opossum_file_format()
156+
for child in self.children.values()
157+
}
158+
else:
159+
return 1
160+
161+
def add_resource(self, resource: Resource) -> None:
162+
if not resource.path.is_relative_to(self.path):
163+
raise RuntimeError(
164+
f"The path {resource.path} is not a child of this node at {self.path}."
165+
)
166+
remaining_path_parts = resource.path.relative_to(self.path).parts
167+
if remaining_path_parts:
168+
self._add_resource(resource, remaining_path_parts)
169+
else:
170+
self._update(resource)
171+
172+
def _add_resource(
173+
self, resource: Resource, remaining_path_parts: Iterable[str]
174+
) -> None:
175+
if not remaining_path_parts:
176+
self._update(resource)
177+
return
178+
next, *rest_parts = remaining_path_parts
179+
if next not in self.children:
180+
self.children[next] = Resource(path=self.path / next)
181+
self.children[next]._add_resource(resource, rest_parts)
182+
183+
def _update(self, other: Resource) -> None:
184+
if self.path != other.path:
185+
raise RuntimeError(
186+
"Trying to merge nodes with different paths: "
187+
+ f"{self.path} vs. {other.path}"
188+
)
189+
if self.type and other.type and self.type != other.type:
190+
raise RuntimeError(
191+
"Trying to merge incompatible node types. "
192+
+ f"Current node is {self.type}. Other is {other.type}"
193+
)
194+
self.type = self.type or other.type
195+
self.attributions.extend(other.attributions)
196+
for key, child in other.children.items():
197+
if key in self.children:
198+
self.children[key]._update(child)
199+
else:
200+
self.children[key] = child
201+
202+
203+
class BaseUrlsForSources(BaseModel):
204+
model_config = ConfigDict(frozen=True, extra="allow")
205+
206+
def to_opossum_file_format(self) -> opossum_file.BaseUrlsForSources:
207+
return opossum_file.BaseUrlsForSources(**self.model_dump())
208+
209+
210+
class FrequentLicense(BaseModel):
211+
model_config = ConfigDict(frozen=True, extra="forbid")
212+
full_name: str
213+
short_name: str
214+
default_text: str
215+
216+
def to_opossum_file_format(self) -> opossum_file.FrequentLicense:
217+
return opossum_file.FrequentLicense(
218+
full_name=self.full_name,
219+
short_name=self.short_name,
220+
default_text=self.default_text,
221+
)
222+
223+
224+
class SourceInfo(BaseModel):
225+
model_config = ConfigDict(frozen=True, extra="forbid")
226+
name: str
227+
document_confidence: int | float | None = 0
228+
additional_name: str | None = None
229+
230+
def to_opossum_file_format(self) -> opossum_file.SourceInfo:
231+
return opossum_file.SourceInfo(
232+
name=self.name,
233+
document_confidence=self.document_confidence,
234+
additional_name=self.additional_name,
235+
)
236+
237+
238+
class OpossumPackage(BaseModel):
239+
model_config = ConfigDict(frozen=True, extra="forbid")
240+
source: SourceInfo
241+
attribution_confidence: int | None = None
242+
comment: str | None = None
243+
package_name: str | None = None
244+
package_version: str | None = None
245+
package_namespace: str | None = None
246+
package_type: str | None = None
247+
package_purl_appendix: str | None = None
248+
copyright: str | None = None
249+
license_name: str | None = None
250+
license_text: str | None = None
251+
url: str | None = None
252+
first_party: bool | None = None
253+
exclude_from_notice: bool | None = None
254+
pre_selected: bool | None = None
255+
follow_up: Literal["FOLLOW_UP"] | None = None
256+
origin_id: str | None = None
257+
origin_ids: list[str] | None = None
258+
criticality: Literal["high"] | Literal["medium"] | None = None
259+
was_preferred: bool | None = None
260+
261+
def to_opossum_file_format(self) -> opossum_file.OpossumPackage:
262+
return opossum_file.OpossumPackage(
263+
source=self.source.to_opossum_file_format(),
264+
attribution_confidence=self.attribution_confidence,
265+
comment=self.comment,
266+
package_name=self.package_name,
267+
package_version=self.package_version,
268+
package_namespace=self.package_namespace,
269+
package_type=self.package_type,
270+
package_p_u_r_l_appendix=self.package_purl_appendix,
271+
copyright=self.copyright,
272+
license_name=self.license_name,
273+
license_text=self.license_text,
274+
url=self.url,
275+
first_party=self.first_party,
276+
exclude_from_notice=self.exclude_from_notice,
277+
pre_selected=self.pre_selected,
278+
follow_up=self.follow_up,
279+
origin_id=self.origin_id,
280+
origin_ids=self.origin_ids,
281+
criticality=self.criticality,
282+
was_preferred=self.was_preferred,
283+
)
284+
285+
286+
class Metadata(BaseModel):
287+
model_config = ConfigDict(frozen=True, extra="allow")
288+
project_id: str
289+
file_creation_date: str
290+
project_title: str
291+
project_version: str | None = None
292+
expected_release_date: str | None = None
293+
build_date: str | None = None
294+
295+
def to_opossum_file_format(self) -> opossum_file.Metadata:
296+
return opossum_file.Metadata(**self.model_dump())
297+
298+
299+
class ExternalAttributionSource(BaseModel):
300+
model_config = ConfigDict(frozen=True, extra="forbid")
301+
name: str
302+
priority: int
303+
is_relevant_for_preferred: bool | None = None
304+
305+
def to_opossum_file_format(self) -> opossum_file.ExternalAttributionSource:
306+
return opossum_file.ExternalAttributionSource(
307+
name=self.name,
308+
priority=self.priority,
309+
is_relevant_for_preferred=self.is_relevant_for_preferred,
310+
)

src/opossum_lib/scancode/convert_scancode_to_opossum.py

+6-19
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,10 @@
88
import sys
99
import uuid
1010

11-
from opossum_lib.opossum.opossum_file import (
12-
Metadata,
13-
OpossumInformation,
14-
)
11+
import opossum_lib.opossum_model as opossum_model
1512
from opossum_lib.opossum.opossum_file_content import OpossumFileContent
1613
from opossum_lib.scancode.model import Header, ScanCodeData
1714
from opossum_lib.scancode.resource_tree import (
18-
convert_to_opossum_resources,
19-
create_attribution_mapping,
2015
scancode_to_file_tree,
2116
)
2217

@@ -26,29 +21,21 @@ def convert_scancode_to_opossum(filename: str) -> OpossumFileContent:
2621

2722
scancode_data = load_scancode_json(filename)
2823

29-
filetree = scancode_to_file_tree(scancode_data)
30-
resources = convert_to_opossum_resources(filetree)
31-
external_attributions, resources_to_attributions = create_attribution_mapping(
32-
filetree
33-
)
24+
resources = [scancode_to_file_tree(scancode_data)]
3425

3526
scancode_header = extract_scancode_header(scancode_data, filename)
36-
metadata = Metadata(
27+
metadata = opossum_model.Metadata(
3728
project_id=str(uuid.uuid4()),
3829
file_creation_date=scancode_header.end_timestamp,
3930
project_title="ScanCode file",
4031
)
4132

42-
return OpossumFileContent(
43-
OpossumInformation(
33+
return opossum_model.Opossum(
34+
scan_results=opossum_model.ScanResults(
4435
metadata=metadata,
4536
resources=resources,
46-
external_attributions=external_attributions,
47-
resources_to_attributions=resources_to_attributions,
48-
attribution_breakpoints=[],
49-
external_attribution_sources={},
5037
)
51-
)
38+
).to_opossum_file_format()
5239

5340

5441
def load_scancode_json(filename: str) -> ScanCodeData:

src/opossum_lib/scancode/helpers.py

-19
This file was deleted.

0 commit comments

Comments
 (0)