Skip to content

Commit 057727c

Browse files
committed
refactor: simplify scancode conversion using new Resource functions
1 parent efa56f9 commit 057727c

File tree

5 files changed

+21
-156
lines changed

5 files changed

+21
-156
lines changed

src/opossum_lib/scancode/convert_scancode_to_opossum.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,7 @@ def convert_scancode_to_opossum(filename: str) -> OpossumFileContent:
2121

2222
scancode_data = load_scancode_json(filename)
2323

24-
filetree = scancode_to_file_tree(scancode_data)
25-
resources = filetree.to_opossum_resources()
26-
with open("debug.json", "w") as out:
27-
out.write(resources[0].model_dump_json(indent=4, by_alias=True))
24+
resources = [scancode_to_file_tree(scancode_data)]
2825

2926
scancode_header = extract_scancode_header(scancode_data, filename)
3027
metadata = opossum_model.Metadata(

src/opossum_lib/scancode/helpers.py

-19
This file was deleted.

src/opossum_lib/scancode/resource_tree.py

+10-44
Original file line numberDiff line numberDiff line change
@@ -5,59 +5,25 @@
55

66
from __future__ import annotations
77

8-
from pydantic import BaseModel
8+
from pathlib import Path
99

1010
import opossum_lib.opossum_model as opossum_model
1111
from opossum_lib.scancode.constants import SCANCODE_SOURCE_NAME
12-
from opossum_lib.scancode.helpers import check_schema, path_segments
1312
from opossum_lib.scancode.model import File, FileType, ScanCodeData
1413

1514

16-
class ScanCodeFileTree(BaseModel):
17-
file: File
18-
children: dict[str, ScanCodeFileTree] = {}
19-
20-
def get_path(self, path: list[str]) -> ScanCodeFileTree:
21-
if len(path) == 0:
22-
return self
23-
next_segment, *rest = path
24-
if next_segment not in self.children:
25-
self.children[next_segment] = ScanCodeFileTree.model_construct(None) # type: ignore
26-
return self.children[next_segment].get_path(rest)
27-
28-
def revalidate(self) -> None:
29-
check_schema(self)
30-
for child in self.children.values():
31-
child.revalidate()
32-
33-
def to_opossum_resources(
34-
self,
35-
) -> list[opossum_model.Resource]:
36-
def process_node(
37-
node: ScanCodeFileTree,
38-
) -> opossum_model.Resource:
39-
return opossum_model.Resource(
40-
path=node.file.path,
41-
attributions=get_attribution_info(node.file),
42-
type=convert_resource_type(node.file.type),
43-
children={
44-
key: process_node(child) for (key, child) in node.children.items()
45-
},
46-
)
47-
48-
return [process_node(self)]
49-
50-
51-
def scancode_to_file_tree(scancode_data: ScanCodeData) -> ScanCodeFileTree:
52-
temp_root = ScanCodeFileTree.model_construct(file=None) # type: ignore
15+
def scancode_to_file_tree(scancode_data: ScanCodeData) -> opossum_model.Resource:
16+
temp_root = opossum_model.Resource(path=Path(""))
5317
for file in scancode_data.files:
54-
segments = path_segments(file.path)
55-
temp_root.get_path(segments).file = file
18+
resource = opossum_model.Resource(
19+
path=Path(file.path),
20+
attributions=get_attribution_info(file),
21+
type=convert_resource_type(file.type),
22+
)
23+
temp_root.add_resource(resource)
5624

5725
assert len(temp_root.children) == 1
58-
root = list(temp_root.children.values())[0]
59-
check_schema(root)
60-
return root
26+
return list(temp_root.children.values())[0]
6127

6228

6329
def get_attribution_info(file: File) -> list[opossum_model.OpossumPackage]:

tests/test_scancode/model_helpers.py

+10
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,16 @@
1414
)
1515

1616

17+
def _create_reference_scancode_files() -> list[File]:
18+
return [
19+
_create_file("A", FileType.DIRECTORY),
20+
_create_file("A/B", FileType.DIRECTORY),
21+
_create_file("A/file1", FileType.FILE),
22+
_create_file("A/file2.txt", FileType.FILE),
23+
_create_file("A/B/file3", FileType.FILE),
24+
]
25+
26+
1727
def _create_file(
1828
path: str,
1929
type: FileType,

tests/test_scancode/test_resource_tree.py

-89
Original file line numberDiff line numberDiff line change
@@ -4,83 +4,20 @@
44

55
from copy import deepcopy
66

7-
import pytest
8-
from pydantic import ValidationError
9-
107
from opossum_lib.opossum_model import OpossumPackage, SourceInfo
118
from opossum_lib.scancode.constants import SCANCODE_SOURCE_NAME
129
from opossum_lib.scancode.model import (
1310
Copyright,
14-
File,
1511
FileBasedLicenseDetection,
1612
FileType,
1713
Match,
18-
ScanCodeData,
1914
)
2015
from opossum_lib.scancode.resource_tree import (
21-
ScanCodeFileTree,
2216
get_attribution_info,
23-
scancode_to_file_tree,
2417
)
2518
from tests.test_scancode.model_helpers import _create_file
2619

2720

28-
class TestRevalidate:
29-
def test_successfully_revalidate_valid_file_tree(self) -> None:
30-
dummy_file = _create_file("A", FileType.FILE)
31-
valid_structure = ScanCodeFileTree(
32-
file=dummy_file,
33-
children={
34-
"A": ScanCodeFileTree(file=dummy_file),
35-
"B": ScanCodeFileTree(
36-
file=dummy_file, children={"C": ScanCodeFileTree(file=dummy_file)}
37-
),
38-
},
39-
)
40-
valid_structure.revalidate()
41-
42-
def test_fail_to_revalidate_file_tree_invalid_at_toplevel(self) -> None:
43-
dummy_file = _create_file("A", FileType.FILE)
44-
invalid_structure = ScanCodeFileTree.model_construct(
45-
children={
46-
"A": ScanCodeFileTree(file=dummy_file),
47-
"B": ScanCodeFileTree(
48-
file=dummy_file, children={"C": ScanCodeFileTree(file=dummy_file)}
49-
),
50-
},
51-
file=None, # type: ignore
52-
)
53-
with pytest.raises(ValidationError):
54-
invalid_structure.revalidate()
55-
56-
def test_fail_to_revalidate_file_tree_invalid_only_at_lower_level(self) -> None:
57-
dummy_file = _create_file("A", FileType.FILE)
58-
invalid_structure = ScanCodeFileTree(
59-
file=dummy_file,
60-
children={
61-
"A": ScanCodeFileTree(file=dummy_file),
62-
"B": ScanCodeFileTree(
63-
file=dummy_file,
64-
children={"C": ScanCodeFileTree.model_construct(None)}, # type: ignore
65-
),
66-
},
67-
)
68-
with pytest.raises(ValidationError):
69-
invalid_structure.revalidate()
70-
71-
72-
def test_scancode_to_resource_tree_produces_expected_result() -> None:
73-
files = _create_reference_scancode_files()
74-
scancode_data = ScanCodeData(
75-
headers=[], packages=[], dependencies=[], license_detections=[], files=files
76-
)
77-
78-
tree = scancode_to_file_tree(scancode_data)
79-
reference = _create_reference_node_structure()
80-
81-
assert tree == reference
82-
83-
8421
def test_get_attribution_info_directory() -> None:
8522
folder = _create_file("A", FileType.DIRECTORY)
8623
assert get_attribution_info(folder) == []
@@ -159,29 +96,3 @@ def test_get_attribution_info_file_multiple() -> None:
15996
attribution_confidence=50,
16097
)
16198
assert set(attributions) == {expected1, expected2}
162-
163-
164-
def _create_reference_scancode_files() -> list[File]:
165-
return [
166-
_create_file("A", FileType.DIRECTORY),
167-
_create_file("A/B", FileType.DIRECTORY),
168-
_create_file("A/file1", FileType.FILE),
169-
_create_file("A/file2.txt", FileType.FILE),
170-
_create_file("A/B/file3", FileType.FILE),
171-
]
172-
173-
174-
def _create_reference_node_structure() -> ScanCodeFileTree:
175-
folder, subfolder, file1, file2, file3 = _create_reference_scancode_files()
176-
inner = ScanCodeFileTree(
177-
file=subfolder, children={"file3": ScanCodeFileTree(file=file3)}
178-
)
179-
reference = ScanCodeFileTree(
180-
file=folder,
181-
children={
182-
"B": inner,
183-
"file1": ScanCodeFileTree(file=file1),
184-
"file2.txt": ScanCodeFileTree(file=file2),
185-
},
186-
)
187-
return reference

0 commit comments

Comments
 (0)