Merge branch 'main' into else

stefan6419846 · web-flow · commit 1f1f831bbb33 · 2025-03-13T14:14:29.000+01:00
diff --git a/make_release.py b/make_release.py
@@ -255,8 +255,8 @@ def get_formatted_changes(git_tag: str) -> Tuple[str, str]:
     if grouped:
         output += "\n### Other\n"
         output_with_user += "\n### Other\n"
-        for prefix in grouped:
-            for commit in grouped[prefix]:
+        for prefix, commits in grouped.items():
+            for commit in commits:
                 output += f"- {prefix}: {commit['msg']}\n"
                 output_with_user += (
                     f"- {prefix}: {commit['msg']} by @{commit['author']}\n"
diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py
@@ -73,7 +73,7 @@ def build_char_map_from_dict(
 unknown_char_map: Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any]] = (
     "Unknown",
     9999,
-    dict(zip(range(256), ["�"] * 256)),
+    {key: "�" for key in range(256)},
     {},
 )
 
diff --git a/pypdf/_encryption.py b/pypdf/_encryption.py
@@ -253,8 +253,7 @@ def compute_O_value_key(owner_password: bytes, rev: int, key_size: int) -> bytes
             for _ in range(50):
                 o_hash_digest = hashlib.md5(o_hash_digest).digest()
 
-        rc4_key = o_hash_digest[: key_size // 8]
-        return rc4_key
+        return o_hash_digest[: key_size // 8]
 
     @staticmethod
     def compute_O_value(rc4_key: bytes, user_password: bytes, rev: int) -> bytes:
@@ -303,8 +302,7 @@ def compute_U_value(key: bytes, rev: int, id1_entry: bytes) -> bytes:
 
         """
         if rev <= 2:
-            value = rc4_encrypt(key, _PADDING)
-            return value
+            return rc4_encrypt(key, _PADDING)
 
         """
         Algorithm 5: Computing the encryption dictionary’s U (user password) value.
@@ -542,8 +540,7 @@ def verify_owner_password(
             return b""
         iv = bytes(0 for _ in range(16))
         tmp_key = AlgV5.calculate_hash(R, password, o_value[40:48], u_value[:48])
-        key = aes_cbc_decrypt(tmp_key, iv, oe_value)
-        return key
+        return aes_cbc_decrypt(tmp_key, iv, oe_value)
 
     @staticmethod
     def verify_user_password(
@@ -761,8 +758,7 @@ def compute_Perms_value(key: bytes, p: int, metadata_encrypted: bool) -> bytes:
         b8 = b"T" if metadata_encrypted else b"F"
         rr = secrets.token_bytes(4)
         data = struct.pack("<I", p) + b"\xff\xff\xff\xff" + b8 + b"adb" + rr
-        perms = aes_ecb_encrypt(key, data)
-        return perms
+        return aes_ecb_encrypt(key, data)
 
 
 class PasswordType(IntEnum):
diff --git a/pypdf/_page.py b/pypdf/_page.py
@@ -622,9 +622,9 @@ def _get_ids_image(
             if not isinstance(x_object[o], StreamObject):
                 continue
             if x_object[o][IA.SUBTYPE] == "/Image":
-                lst.append(o if len(ancest) == 0 else ancest + [o])
+                lst.append(o if len(ancest) == 0 else [*ancest, o])
             else:  # is a form with possible images inside
-                lst.extend(self._get_ids_image(x_object[o], ancest + [o], call_stack))
+                lst.extend(self._get_ids_image(x_object[o], [*ancest, o], call_stack))
         assert self.inline_images is not None
         lst.extend(list(self.inline_images.keys()))
         return lst
diff --git a/pypdf/_reader.py b/pypdf/_reader.py
@@ -229,7 +229,16 @@ def root_object(self) -> DictionaryObject:
                     self._validated_root = o
                     logger_warning(f"Root found at {o.indirect_reference!r}", __name__)
                     break
-            if self._validated_root is None:
+        if self._validated_root is None:
+            if not is_null_or_none(root) and "/Pages" in cast(DictionaryObject, cast(PdfObject, root).get_object()):
+                logger_warning(
+                    f"Possible root found at {cast(PdfObject, root).indirect_reference!r}, but missing /Catalog key",
+                    __name__
+                )
+                self._validated_root = cast(
+                    DictionaryObject, cast(PdfObject, root).get_object()
+                )
+            else:
                 raise PdfReadError("Cannot find Root object in pdf")
         return self._validated_root
 
@@ -245,13 +254,12 @@ def _info(self) -> Optional[DictionaryObject]:
         info = self.trailer.get(TK.INFO, None)
         if is_null_or_none(info):
             return None
-        else:
-            info = info.get_object()
-            if not isinstance(info, DictionaryObject):
-                raise PdfReadError(
-                    "Trailer not found or does not point to document information directory"
-                )
-            return info
+        info = info.get_object()
+        if not isinstance(info, DictionaryObject):
+            raise PdfReadError(
+                "Trailer not found or does not point to document information directory"
+            )
+        return info
 
     @property
     def _ID(self) -> Optional[ArrayObject]:
@@ -316,8 +324,7 @@ def _get_page_number_by_indirect(
         else:
             idnum = indirect_reference.idnum
         assert self._page_id2num is not None, "hint for mypy"
-        ret = self._page_id2num.get(idnum, None)
-        return ret
+        return self._page_id2num.get(idnum, None)
 
     def _get_object_from_stream(
         self, indirect_reference: IndirectObject
@@ -913,10 +920,8 @@ def _read_xref(self, stream: StreamType) -> Optional[int]:
                 )
             stream.seek(p, 0)
         if "/Prev" in new_trailer:
-            startxref = new_trailer["/Prev"]
-            return startxref
-        else:
-            return None
+            return new_trailer["/Prev"]
+        return None
 
     def _read_xref_other_error(
         self, stream: StreamType, startxref: int
@@ -988,8 +993,7 @@ def get_entry(i: int) -> Union[int, Tuple[int, ...]]:
             # W array indicates...the default value shall be used
             if i == 0:
                 return 1  # First value defaults to 1
-            else:
-                return 0
+            return 0
 
         def used_before(num: int, generation: Union[int, Tuple[int, ...]]) -> bool:
             # We move backwards through the xrefs, don't replace any.
diff --git a/pypdf/_writer.py b/pypdf/_writer.py
@@ -2074,11 +2074,13 @@ def remove_objects_from_page(
         jump_operators = []
         if to_delete & ObjectDeletionFlag.DRAWING_IMAGES:
             jump_operators = (
-                [b"w", b"J", b"j", b"M", b"d", b"i"]
-                + [b"W", b"W*"]
-                + [b"b", b"b*", b"B", b"B*", b"S", b"s", b"f", b"f*", b"F", b"n"]
-                + [b"m", b"l", b"c", b"v", b"y", b"h", b"re"]
-                + [b"sh"]
+                [
+                    b"w", b"J", b"j", b"M", b"d", b"i",
+                    b"W", b"W*",
+                    b"b", b"b*", b"B", b"B*", b"S", b"s", b"f", b"f*", b"F", b"n",
+                    b"m", b"l", b"c", b"v", b"y", b"h", b"re",
+                    b"sh"
+                ]
             )
         if to_delete & ObjectDeletionFlag.TEXT:
             jump_operators = [b"Tj", b"TJ", b"'", b'"']
@@ -2698,11 +2700,11 @@ def merge(
                 # numbers in the exclude list identifies that the exclusion is
                 # only applicable to 1st level of cloning
                 srcpages[pg.indirect_reference.idnum] = self.add_page(
-                    pg, list(excluded_fields) + [1, "/B", 1, "/Annots"]  # type: ignore
+                    pg, [*list(excluded_fields), 1, "/B", 1, "/Annots"]  # type: ignore
                 )
             else:
                 srcpages[pg.indirect_reference.idnum] = self.insert_page(
-                    pg, position, list(excluded_fields) + [1, "/B", 1, "/Annots"]  # type: ignore
+                    pg, position, [*list(excluded_fields), 1, "/B", 1, "/Annots"]  # type: ignore
                 )
                 position += 1
             srcpages[pg.indirect_reference.idnum].original_page = pg
diff --git a/pypdf/filters.py b/pypdf/filters.py
@@ -298,7 +298,7 @@ def decode(
             char = data[index : index + 1]
             if char == b">":
                 break
-            elif char.isspace():
+            if char.isspace():
                 index += 1
                 continue
             hex_pair += char
diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py
@@ -569,7 +569,7 @@ def write_to_stream(
     @staticmethod
     def read_from_stream(stream: StreamType) -> Union["NumberObject", "FloatObject"]:
         num = read_until_regex(stream, NumberObject.NumberPattern)
-        if num.find(b".") != -1:
+        if b"." in num:
             return FloatObject(num)
         return NumberObject(num)
 
@@ -624,7 +624,7 @@ def write_to_stream(
         stream.write(b">")
 
     def __str__(self) -> str:
-        charset_to_try = ["utf-16"] + list(NameObject.CHARSETS)
+        charset_to_try = ["utf-16", *list(NameObject.CHARSETS)]
         for enc in charset_to_try:
             try:
                 return self.decode(enc)
diff --git a/pypdf/pagerange.py b/pypdf/pagerange.py
@@ -173,7 +173,7 @@ def parse_filename_page_ranges(
     pairs: List[Tuple[str, PageRange]] = []
     pdf_filename: Union[str, None] = None
     did_page_range = False
-    for arg in args + [None]:
+    for arg in [*args, None]:
         if PageRange.valid(arg):
             if not pdf_filename:
                 raise ValueError(
diff --git a/pyproject.toml b/pyproject.toml
@@ -129,7 +129,6 @@ select = ["ALL"]
 ignore = [
     "A001",    # Variable is shadowing a Python builtin
     "A002",    # Function argument is shadowing a Python builtin
-    "A005",    # Module shadows a Python standard-library module
     "ANN401",  # Dynamically typed expressions (typing.Any) are disallowed
     "ARG001",  # Unused function argument
     "ARG002",  # Unused method argument
@@ -170,7 +169,6 @@ ignore = [
     "N817",    # CamelCase `PagesAttributes` imported as acronym `PA`
     "PERF203", # `try`-`except` within a loop incurs performance overhead
     "PGH003",  # Use specific rule codes when ignoring type issues
-    "PLC0206", # Extracting value from dictionary without calling `.items()`
     "PLW0603", # Using the global statement to update `CUSTOM_RTL_SPECIAL_CHARS` is discouraged
     "PLW1510", # `subprocess.run` without explicit `check` argument
     "PLW2901", # `with` statement variable `img` overwritten by assignment target
@@ -189,7 +187,6 @@ ignore = [
     "RET508",  # Unnecessary `else` after `break` statement
     "RUF001",  # Detect confusable Unicode-to-Unicode units. Introduces bugs
     "RUF002",  # Detect confusable Unicode-to-Unicode units. Introduces bugs
-    "RUF005",  # Detect confusable Unicode-to-Unicode units. Introduces bugs
     "S101",    # Use of `assert` detected
     "S110",    # `try`-`except`-`pass` detected, consider logging the exception
     "SIM105",  # contextlib.suppress
@@ -219,6 +216,8 @@ max-complexity = 54  # Recommended: 10
 "_cryptography.py" = ["S304", "S305"]  # Use of insecure cipher / modes, aka RC4 and AES-ECB
 "_encryption.py" = ["S324"]
 "_writer.py" = ["S324"]
+"pypdf/_codecs/symbol.py" = ["A005"]  # Module shadows a Python standard-library module
+"types.py" = ["A005"]  # Module shadows a Python standard-library module
 "docs/conf.py" = ["INP001", "PTH100"]
 "json_consistency.py" = ["T201"]
 "make_release.py" = ["S603", "S607", "T201"]
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -5,29 +5,16 @@
 
 import pytest
 
-from pypdf import PdfReader
-
 TESTS_ROOT = Path(__file__).parent.resolve()
 PROJECT_ROOT = TESTS_ROOT.parent
 RESOURCE_ROOT = PROJECT_ROOT / "resources"
 
 
 @pytest.fixture(scope="session")
 def pdf_file_path(tmp_path_factory):
-    fn = tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.pdf"
-    return fn
+    return tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.pdf"
 
 
 @pytest.fixture(scope="session")
 def txt_file_path(tmp_path_factory):
-    fn = tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.txt"
-    return fn
-
-
-@pytest.fixture(scope="session")
-def pdf_reader_page():
-    """Gives a page that was retrieved from a PDF via PdfReader."""
-    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
-    reader = PdfReader(pdf_path)
-    page = reader.pages[0]
-    return page
+    return tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.txt"
diff --git a/tests/test_page_labels.py b/tests/test_page_labels.py
@@ -135,7 +135,8 @@ def test_index2label_kids():
         "XV",
         "XVI",
         "XVII",
-    ] + list(map(str, range(1, 284)))
+        *list(map(str, range(1, 284)))
+    ]
     for x in ["20", "44", "58", "82", "94", "116", "154", "166", "192", "224", "250"]:
         # Some page labels are unused. Removing them is still easier than copying the
         # whole list itself here.
diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -1760,7 +1760,7 @@ def test_repair_root(caplog):
     caplog.clear()
     reader = PdfReader(
         BytesIO(
-            b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog", b"/Catalo ")
+            b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog/Pages 3 0 R", b"/Catalo ")
         )
     )
     with pytest.raises(PdfReadError):
@@ -1775,9 +1775,9 @@ def test_repair_root(caplog):
 
     # Invalid /Root Entry + error in get_object
     caplog.clear()
-    b = b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog", b"/Catalo ")
-    b = b[:5124] + b"A" + b[5125:]
-    reader = PdfReader(BytesIO(b))
+    data = b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog/Pages 3 0 R", b"/Catalo ")
+    data = data[:5124] + b"A" + data[5125:]
+    reader = PdfReader(BytesIO(data))
     with pytest.raises(PdfReadError):
         len(reader.pages)
     assert all(
@@ -1788,6 +1788,23 @@ def test_repair_root(caplog):
         )
     )
 
+    # Invalid /Root Entry without /Type, but /Pages.
+    caplog.clear()
+    reader = PdfReader(
+        BytesIO(
+            b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog", b"/Catalo ")
+        )
+    )
+    assert len(reader.pages) == 1
+    assert all(
+        msg in caplog.text
+        for msg in (
+            "Invalid Root object in trailer",
+            'Searching object with "/Catalog" key',
+            f"Possible root found at IndirectObject(2, 0, {id(reader)}), but missing /Catalog key"
+        )
+    )
+
 
 @pytest.mark.enable_socket
 def test_issue3151(caplog):

Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,7 @@ def build_char_map_from_dict(`
`73`	`73`	`unknown_char_map: Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any]] = (`
`74`	`74`	`"Unknown",`
`75`	`75`	`9999,`
`76`		`- dict(zip(range(256), ["�"] * 256)),`
	`76`	`+ {key: "�" for key in range(256)},`
`77`	`77`	`{},`
`78`	`78`	`)`
`79`	`79`