opossum-tool · Hellgartner · Jan 14, 2025 · Jan 13, 2025 · Jan 13, 2025 · Jan 14, 2025
diff --git a/src/opossum_lib/cli.py b/src/opossum_lib/cli.py
@@ -12,6 +12,8 @@
 import click
 
 from opossum_lib.opossum.file_generation import write_opossum_information_to_file
+from opossum_lib.opossum.opossum_file import OpossumInformation
+from opossum_lib.opossum.read_opossum_file import read_opossum_file
 from opossum_lib.spdx.convert_to_opossum import convert_spdx_to_opossum_information
 
 
@@ -27,6 +29,12 @@ def opossum_file() -> None:
     multiple=True,
     type=click.Path(exists=True),
 )
+@click.option(
+    "--opossum",
+    help="opossum files used as input.",
+    multiple=True,
+    type=click.Path(exists=True),
+)
 @click.option(
     "--outfile",
     "-o",
@@ -35,23 +43,16 @@ def opossum_file() -> None:
     help="The file path to write the generated opossum document to. "
     'If appropriate, the extension ".opossum" will be appended.',
 )
-def generate(spdx: list[str], outfile: str) -> None:
+def generate(spdx: list[str], opossum: list[str], outfile: str) -> None:
     """
     Generate an Opossum file from various other file formats.
 
     \b
     Currently supported input formats:
       - SPDX
     """
-    if len(spdx) == 0:
-        logging.warning("No input provided. Exiting.")
-        sys.exit(1)
-    if len(spdx) > 1:
-        logging.error("Merging of multiple SPDX files not yet supported!")
-        sys.exit(1)
-
-    the_spdx_file = spdx[0]
-    opossum_information = convert_spdx_to_opossum_information(the_spdx_file)
+    validate_input_exit_on_error(spdx, opossum)
+    opossum_information = convert_after_valid_input(spdx, opossum)
 
     if not outfile.endswith(".opossum"):
         outfile += ".opossum"
@@ -62,5 +63,26 @@ def generate(spdx: list[str], outfile: str) -> None:
     write_opossum_information_to_file(opossum_information, Path(outfile))
 
 
+def validate_input_exit_on_error(spdx: list[str], opossum: list[str]) -> None:
+    total_number_of_files = len(spdx) + len(opossum)
+    if total_number_of_files == 0:
+        logging.warning("No input provided. Exiting.")
+        sys.exit(1)
+    if total_number_of_files > 1:
+        logging.error("Merging of multiple files not yet supported!")
+        sys.exit(1)
+
+
+def convert_after_valid_input(
+    spdx: list[str], opossum_files: list[str]
+) -> OpossumInformation:
+    if len(spdx) == 1:
+        the_spdx_file = spdx[0]
+        return convert_spdx_to_opossum_information(the_spdx_file)
+    else:
+        opossum_input_file = opossum_files[0]
+        return read_opossum_file(opossum_input_file)
+
+
 if __name__ == "__main__":
     opossum_file()
diff --git a/src/opossum_lib/opossum/constants.py b/src/opossum_lib/opossum/constants.py
@@ -2,3 +2,5 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 COMPRESSION_LEVEL = 5
+INPUT_JSON_NAME = "input.json"
+OUTPUT_JSON_NAME = "output.json"
diff --git a/src/opossum_lib/opossum/file_generation.py b/src/opossum_lib/opossum/file_generation.py
@@ -1,19 +1,14 @@
 # SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com>
 #
 # SPDX-License-Identifier: Apache-2.0
-import json
-from dataclasses import fields
 from pathlib import Path
 from zipfile import ZIP_DEFLATED, ZipFile
 
-from opossum_lib.opossum.constants import COMPRESSION_LEVEL
+from pydantic import TypeAdapter
+
+from opossum_lib.opossum.constants import COMPRESSION_LEVEL, INPUT_JSON_NAME
 from opossum_lib.opossum.opossum_file import (
-    ExternalAttributionSource,
-    Metadata,
     OpossumInformation,
-    OpossumPackage,
-    Resource,
-    SourceInfo,
 )
 
 
@@ -23,40 +18,9 @@ def write_opossum_information_to_file(
     with ZipFile(
         file_path, "w", compression=ZIP_DEFLATED, compresslevel=COMPRESSION_LEVEL
     ) as z:
-        z.writestr("input.json", json.dumps(to_dict(opossum_information), indent=4))
-
-
-def to_dict(
-    element: Resource
-    | Metadata
-    | OpossumPackage
-    | OpossumInformation
-    | SourceInfo
-    | ExternalAttributionSource
-    | str
-    | int
-    | bool
-    | dict[str, OpossumPackage]
-    | dict[str, list[str]]
-    | list[str]
-    | None,
-) -> dict | str | list[str] | bool | int | None:
-    if isinstance(element, Resource):
-        return element.to_dict()
-    if isinstance(
-        element,
-        Metadata
-        | OpossumPackage
-        | OpossumInformation
-        | SourceInfo
-        | ExternalAttributionSource,
-    ):
-        result = []
-        for f in fields(element):
-            value = to_dict(getattr(element, f.name))
-            result.append((f.name, value))
-        return {k: v for (k, v) in result if v is not None}
-    elif isinstance(element, dict):
-        return {k: to_dict(v) for k, v in element.items()}
-    else:
-        return element
+        z.writestr(
+            INPUT_JSON_NAME,
+            TypeAdapter(OpossumInformation).dump_json(
+                opossum_information, indent=4, exclude_none=True
+            ),
+        )
diff --git a/src/opossum_lib/opossum/merger.py b/src/opossum_lib/opossum/merger.py
@@ -9,6 +9,7 @@
     Resource,
     ResourcePath,
     ResourceType,
+    convert_resource_in_file_to_resource,
 )
 
 
@@ -23,10 +24,10 @@ def merge_opossum_information(
         metadata=expanded_opossum_information[0].metadata,
         resources=_merge_resources(
             [
-                opossum_information.resources
+                convert_resource_in_file_to_resource(opossum_information.resources)
                 for opossum_information in expanded_opossum_information
             ]
-        ),
+        ).convert_to_file_resource(),
         externalAttributions=_merge_dicts_without_duplicates(
             [
                 opossum_information.externalAttributions

diff --git a/src/opossum_lib/opossum/opossum_file.py b/src/opossum_lib/opossum/opossum_file.py
@@ -6,30 +6,52 @@
 from copy import deepcopy
 from dataclasses import field
 from enum import Enum, auto
-from typing import Literal
+from typing import Literal, cast
 
+from pydantic import BaseModel, ConfigDict, model_serializer
 from pydantic.dataclasses import dataclass
 
-OpossumPackageIdentifier = str
-ResourcePath = str
+type OpossumPackageIdentifier = str
+type ResourcePath = str
+type ResourceInFile = dict[str, ResourceInFile] | int
 
 
 @dataclass(frozen=True)
 class OpossumInformation:
     metadata: Metadata
-    resources: Resource
+    resources: ResourceInFile
     externalAttributions: dict[OpossumPackageIdentifier, OpossumPackage]
     resourcesToAttributions: dict[ResourcePath, list[OpossumPackageIdentifier]]
     attributionBreakpoints: list[str] = field(default_factory=list)
     externalAttributionSources: dict[str, ExternalAttributionSource] = field(
         default_factory=dict
     )
+    frequentLicenses: list[FrequentLicense] | None = None
+    filesWithChildren: list[str] | None = None
+    baseUrlsForSources: BaseUrlsForSources | None = None
+
+
+class BaseUrlsForSources(BaseModel):
+    @model_serializer
+    def serialize(self) -> dict:
+        # hack to override not serializing keys with corresponding value none:
+        # In this case this is valid and should be part of the serialization
+        return {k: v for k, v in self}
+
+    model_config = ConfigDict(extra="allow", frozen=True)
+
+
+class FrequentLicense(BaseModel):
+    fullName: str
+    shortName: str
+    defaultText: str
 
 
 @dataclass(frozen=True)
 class SourceInfo:
     name: str
-    documentConfidence: int | None = 0
+    documentConfidence: int | float | None = 0
+    additionalName: str | None = None
 
 
 @dataclass(frozen=True)
@@ -51,11 +73,13 @@ class OpossumPackage:
     preSelected: bool | None = None
     followUp: Literal["FOLLOW_UP"] | None = None
     originId: str | None = None
+    originIds: list[str] | None = None
     criticality: Literal["high"] | Literal["medium"] | None = None
+    wasPreferred: bool | None = None
 
 
-@dataclass(frozen=True)
-class Metadata:
+class Metadata(BaseModel):
+    model_config = ConfigDict(extra="allow", frozen=True)
     projectId: str
     fileCreationDate: str
     projectTitle: str
@@ -123,7 +147,7 @@ def drop_element(
 
             return resource
 
-    def to_dict(self) -> int | dict:
+    def to_dict(self) -> ResourceInFile:
         if not self.has_children():
             if self.type == ResourceType.FOLDER:
                 return {}
@@ -154,8 +178,33 @@ def get_paths_of_all_leaf_nodes_with_types(
     def has_children(self) -> bool:
         return len(self.children) > 0
 
+    def convert_to_file_resource(self) -> ResourceInFile:
+        return self.to_dict()
+
 
 @dataclass(frozen=True)
 class ExternalAttributionSource:
     name: str
     priority: int
+    isRelevantForPreferred: bool | None = None
+
+
+def _build_resource_tree(resource: ResourceInFile) -> Resource:
+    if isinstance(resource, int):
+        return Resource(type=ResourceType.FILE)
+    else:
+        result = Resource(type=ResourceType.FOLDER)
+        for name, child_resource in resource.items():
+            result.children[name] = _build_resource_tree(child_resource)
+        return result
+
+
+def convert_resource_in_file_to_resource(resource: ResourceInFile) -> Resource:
+    root_node = Resource(ResourceType.TOP_LEVEL)
+
+    if isinstance(resource, dict):
+        dict_resource = cast(dict[str, ResourceInFile], resource)
+        for name, child_resource in dict_resource.items():
+            root_node.children[name] = _build_resource_tree(child_resource)
+
+    return root_node
diff --git a/src/opossum_lib/opossum/read_opossum_file.py b/src/opossum_lib/opossum/read_opossum_file.py
@@ -0,0 +1,46 @@
+# SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+import json
+import logging
+import sys
+from zipfile import ZipFile
+
+from pydantic import TypeAdapter
+
+from opossum_lib.opossum.constants import INPUT_JSON_NAME, OUTPUT_JSON_NAME
+from opossum_lib.opossum.opossum_file import (
+    OpossumInformation,
+)
+
+
+def read_opossum_file(filename: str) -> OpossumInformation:
+    logging.info(f"Converting opossum to opossum {filename}")
+
+    try:
+        with (
+            ZipFile(filename, "r") as input_zip_file,
+        ):
+            validate_zip_file_contents(input_zip_file)
+            with input_zip_file.open(INPUT_JSON_NAME) as input_json_file:
+                input_json = json.load(input_json_file)
+                return TypeAdapter(OpossumInformation).validate_python(input_json)
+    except Exception as e:
+        # handle the exception
+        print(f"Error reading file {filename}: {e}")
+        sys.exit(1)
+
+
+def validate_zip_file_contents(input_zip_file: ZipFile) -> None:
+    if INPUT_JSON_NAME not in input_zip_file.namelist():
+        logging.error(
+            f"Opossum file {input_zip_file.filename} is corrupt"
+            f" and does not contain '{INPUT_JSON_NAME}'"
+        )
+        sys.exit(1)
+    if OUTPUT_JSON_NAME in input_zip_file.namelist():
+        logging.error(
+            f"Opossum file {input_zip_file.filename} also contains"
+            f" '{OUTPUT_JSON_NAME}' which cannot be processed"
+        )
+        sys.exit(1)
diff --git a/src/opossum_lib/spdx/convert_to_opossum.py b/src/opossum_lib/spdx/convert_to_opossum.py
@@ -49,6 +49,7 @@
 
 
 def convert_spdx_to_opossum_information(filename: str) -> OpossumInformation:
+    logging.info(f"Converting {filename} to opossum information.")
     try:
         document: SpdxDocument = parse_file(filename)
 
@@ -116,7 +117,7 @@ def convert_tree_to_opossum_information(tree: DiGraph) -> OpossumInformation:
 
     opossum_information = OpossumInformation(
         metadata=metadata,
-        resources=resources,
+        resources=resources.convert_to_file_resource(),
         externalAttributions=external_attributions,
         resourcesToAttributions=resources_to_attributions,
         attributionBreakpoints=attribution_breakpoints,
@@ -170,5 +171,9 @@ def create_attribution_and_link_with_resource(
 def create_metadata(tree: DiGraph) -> Metadata:
     doc_name = tree.nodes["SPDXRef-DOCUMENT"]["element"].name
     created = tree.nodes["SPDXRef-DOCUMENT"]["element"].created
-    metadata = Metadata(str(uuid.uuid4()), created.isoformat(), doc_name)
+    metadata = Metadata(
+        projectId=str(uuid.uuid4()),
+        fileCreationDate=created.isoformat(),
+        projectTitle=doc_name,
+    )
     return metadata