Skip to content

Commit 1f1f831

Browse files
Merge branch 'main' into else
2 parents 196499d + 56220ab commit 1f1f831

13 files changed

+69
-63
lines changed

make_release.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,8 @@ def get_formatted_changes(git_tag: str) -> Tuple[str, str]:
255255
if grouped:
256256
output += "\n### Other\n"
257257
output_with_user += "\n### Other\n"
258-
for prefix in grouped:
259-
for commit in grouped[prefix]:
258+
for prefix, commits in grouped.items():
259+
for commit in commits:
260260
output += f"- {prefix}: {commit['msg']}\n"
261261
output_with_user += (
262262
f"- {prefix}: {commit['msg']} by @{commit['author']}\n"

pypdf/_cmap.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def build_char_map_from_dict(
7373
unknown_char_map: Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any]] = (
7474
"Unknown",
7575
9999,
76-
dict(zip(range(256), ["�"] * 256)),
76+
{key: "�" for key in range(256)},
7777
{},
7878
)
7979

pypdf/_encryption.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,7 @@ def compute_O_value_key(owner_password: bytes, rev: int, key_size: int) -> bytes
253253
for _ in range(50):
254254
o_hash_digest = hashlib.md5(o_hash_digest).digest()
255255

256-
rc4_key = o_hash_digest[: key_size // 8]
257-
return rc4_key
256+
return o_hash_digest[: key_size // 8]
258257

259258
@staticmethod
260259
def compute_O_value(rc4_key: bytes, user_password: bytes, rev: int) -> bytes:
@@ -303,8 +302,7 @@ def compute_U_value(key: bytes, rev: int, id1_entry: bytes) -> bytes:
303302
304303
"""
305304
if rev <= 2:
306-
value = rc4_encrypt(key, _PADDING)
307-
return value
305+
return rc4_encrypt(key, _PADDING)
308306

309307
"""
310308
Algorithm 5: Computing the encryption dictionary’s U (user password) value.
@@ -542,8 +540,7 @@ def verify_owner_password(
542540
return b""
543541
iv = bytes(0 for _ in range(16))
544542
tmp_key = AlgV5.calculate_hash(R, password, o_value[40:48], u_value[:48])
545-
key = aes_cbc_decrypt(tmp_key, iv, oe_value)
546-
return key
543+
return aes_cbc_decrypt(tmp_key, iv, oe_value)
547544

548545
@staticmethod
549546
def verify_user_password(
@@ -761,8 +758,7 @@ def compute_Perms_value(key: bytes, p: int, metadata_encrypted: bool) -> bytes:
761758
b8 = b"T" if metadata_encrypted else b"F"
762759
rr = secrets.token_bytes(4)
763760
data = struct.pack("<I", p) + b"\xff\xff\xff\xff" + b8 + b"adb" + rr
764-
perms = aes_ecb_encrypt(key, data)
765-
return perms
761+
return aes_ecb_encrypt(key, data)
766762

767763

768764
class PasswordType(IntEnum):

pypdf/_page.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -622,9 +622,9 @@ def _get_ids_image(
622622
if not isinstance(x_object[o], StreamObject):
623623
continue
624624
if x_object[o][IA.SUBTYPE] == "/Image":
625-
lst.append(o if len(ancest) == 0 else ancest + [o])
625+
lst.append(o if len(ancest) == 0 else [*ancest, o])
626626
else: # is a form with possible images inside
627-
lst.extend(self._get_ids_image(x_object[o], ancest + [o], call_stack))
627+
lst.extend(self._get_ids_image(x_object[o], [*ancest, o], call_stack))
628628
assert self.inline_images is not None
629629
lst.extend(list(self.inline_images.keys()))
630630
return lst

pypdf/_reader.py

+20-16
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,16 @@ def root_object(self) -> DictionaryObject:
229229
self._validated_root = o
230230
logger_warning(f"Root found at {o.indirect_reference!r}", __name__)
231231
break
232-
if self._validated_root is None:
232+
if self._validated_root is None:
233+
if not is_null_or_none(root) and "/Pages" in cast(DictionaryObject, cast(PdfObject, root).get_object()):
234+
logger_warning(
235+
f"Possible root found at {cast(PdfObject, root).indirect_reference!r}, but missing /Catalog key",
236+
__name__
237+
)
238+
self._validated_root = cast(
239+
DictionaryObject, cast(PdfObject, root).get_object()
240+
)
241+
else:
233242
raise PdfReadError("Cannot find Root object in pdf")
234243
return self._validated_root
235244

@@ -245,13 +254,12 @@ def _info(self) -> Optional[DictionaryObject]:
245254
info = self.trailer.get(TK.INFO, None)
246255
if is_null_or_none(info):
247256
return None
248-
else:
249-
info = info.get_object()
250-
if not isinstance(info, DictionaryObject):
251-
raise PdfReadError(
252-
"Trailer not found or does not point to document information directory"
253-
)
254-
return info
257+
info = info.get_object()
258+
if not isinstance(info, DictionaryObject):
259+
raise PdfReadError(
260+
"Trailer not found or does not point to document information directory"
261+
)
262+
return info
255263

256264
@property
257265
def _ID(self) -> Optional[ArrayObject]:
@@ -316,8 +324,7 @@ def _get_page_number_by_indirect(
316324
else:
317325
idnum = indirect_reference.idnum
318326
assert self._page_id2num is not None, "hint for mypy"
319-
ret = self._page_id2num.get(idnum, None)
320-
return ret
327+
return self._page_id2num.get(idnum, None)
321328

322329
def _get_object_from_stream(
323330
self, indirect_reference: IndirectObject
@@ -913,10 +920,8 @@ def _read_xref(self, stream: StreamType) -> Optional[int]:
913920
)
914921
stream.seek(p, 0)
915922
if "/Prev" in new_trailer:
916-
startxref = new_trailer["/Prev"]
917-
return startxref
918-
else:
919-
return None
923+
return new_trailer["/Prev"]
924+
return None
920925

921926
def _read_xref_other_error(
922927
self, stream: StreamType, startxref: int
@@ -988,8 +993,7 @@ def get_entry(i: int) -> Union[int, Tuple[int, ...]]:
988993
# W array indicates...the default value shall be used
989994
if i == 0:
990995
return 1 # First value defaults to 1
991-
else:
992-
return 0
996+
return 0
993997

994998
def used_before(num: int, generation: Union[int, Tuple[int, ...]]) -> bool:
995999
# We move backwards through the xrefs, don't replace any.

pypdf/_writer.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -2074,11 +2074,13 @@ def remove_objects_from_page(
20742074
jump_operators = []
20752075
if to_delete & ObjectDeletionFlag.DRAWING_IMAGES:
20762076
jump_operators = (
2077-
[b"w", b"J", b"j", b"M", b"d", b"i"]
2078-
+ [b"W", b"W*"]
2079-
+ [b"b", b"b*", b"B", b"B*", b"S", b"s", b"f", b"f*", b"F", b"n"]
2080-
+ [b"m", b"l", b"c", b"v", b"y", b"h", b"re"]
2081-
+ [b"sh"]
2077+
[
2078+
b"w", b"J", b"j", b"M", b"d", b"i",
2079+
b"W", b"W*",
2080+
b"b", b"b*", b"B", b"B*", b"S", b"s", b"f", b"f*", b"F", b"n",
2081+
b"m", b"l", b"c", b"v", b"y", b"h", b"re",
2082+
b"sh"
2083+
]
20822084
)
20832085
if to_delete & ObjectDeletionFlag.TEXT:
20842086
jump_operators = [b"Tj", b"TJ", b"'", b'"']
@@ -2698,11 +2700,11 @@ def merge(
26982700
# numbers in the exclude list identifies that the exclusion is
26992701
# only applicable to 1st level of cloning
27002702
srcpages[pg.indirect_reference.idnum] = self.add_page(
2701-
pg, list(excluded_fields) + [1, "/B", 1, "/Annots"] # type: ignore
2703+
pg, [*list(excluded_fields), 1, "/B", 1, "/Annots"] # type: ignore
27022704
)
27032705
else:
27042706
srcpages[pg.indirect_reference.idnum] = self.insert_page(
2705-
pg, position, list(excluded_fields) + [1, "/B", 1, "/Annots"] # type: ignore
2707+
pg, position, [*list(excluded_fields), 1, "/B", 1, "/Annots"] # type: ignore
27062708
)
27072709
position += 1
27082710
srcpages[pg.indirect_reference.idnum].original_page = pg

pypdf/filters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def decode(
298298
char = data[index : index + 1]
299299
if char == b">":
300300
break
301-
elif char.isspace():
301+
if char.isspace():
302302
index += 1
303303
continue
304304
hex_pair += char

pypdf/generic/_base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ def write_to_stream(
569569
@staticmethod
570570
def read_from_stream(stream: StreamType) -> Union["NumberObject", "FloatObject"]:
571571
num = read_until_regex(stream, NumberObject.NumberPattern)
572-
if num.find(b".") != -1:
572+
if b"." in num:
573573
return FloatObject(num)
574574
return NumberObject(num)
575575

@@ -624,7 +624,7 @@ def write_to_stream(
624624
stream.write(b">")
625625

626626
def __str__(self) -> str:
627-
charset_to_try = ["utf-16"] + list(NameObject.CHARSETS)
627+
charset_to_try = ["utf-16", *list(NameObject.CHARSETS)]
628628
for enc in charset_to_try:
629629
try:
630630
return self.decode(enc)

pypdf/pagerange.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def parse_filename_page_ranges(
173173
pairs: List[Tuple[str, PageRange]] = []
174174
pdf_filename: Union[str, None] = None
175175
did_page_range = False
176-
for arg in args + [None]:
176+
for arg in [*args, None]:
177177
if PageRange.valid(arg):
178178
if not pdf_filename:
179179
raise ValueError(

pyproject.toml

+2-3
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ select = ["ALL"]
129129
ignore = [
130130
"A001", # Variable is shadowing a Python builtin
131131
"A002", # Function argument is shadowing a Python builtin
132-
"A005", # Module shadows a Python standard-library module
133132
"ANN401", # Dynamically typed expressions (typing.Any) are disallowed
134133
"ARG001", # Unused function argument
135134
"ARG002", # Unused method argument
@@ -170,7 +169,6 @@ ignore = [
170169
"N817", # CamelCase `PagesAttributes` imported as acronym `PA`
171170
"PERF203", # `try`-`except` within a loop incurs performance overhead
172171
"PGH003", # Use specific rule codes when ignoring type issues
173-
"PLC0206", # Extracting value from dictionary without calling `.items()`
174172
"PLW0603", # Using the global statement to update `CUSTOM_RTL_SPECIAL_CHARS` is discouraged
175173
"PLW1510", # `subprocess.run` without explicit `check` argument
176174
"PLW2901", # `with` statement variable `img` overwritten by assignment target
@@ -189,7 +187,6 @@ ignore = [
189187
"RET508", # Unnecessary `else` after `break` statement
190188
"RUF001", # Detect confusable Unicode-to-Unicode units. Introduces bugs
191189
"RUF002", # Detect confusable Unicode-to-Unicode units. Introduces bugs
192-
"RUF005", # Detect confusable Unicode-to-Unicode units. Introduces bugs
193190
"S101", # Use of `assert` detected
194191
"S110", # `try`-`except`-`pass` detected, consider logging the exception
195192
"SIM105", # contextlib.suppress
@@ -219,6 +216,8 @@ max-complexity = 54 # Recommended: 10
219216
"_cryptography.py" = ["S304", "S305"] # Use of insecure cipher / modes, aka RC4 and AES-ECB
220217
"_encryption.py" = ["S324"]
221218
"_writer.py" = ["S324"]
219+
"pypdf/_codecs/symbol.py" = ["A005"] # Module shadows a Python standard-library module
220+
"types.py" = ["A005"] # Module shadows a Python standard-library module
222221
"docs/conf.py" = ["INP001", "PTH100"]
223222
"json_consistency.py" = ["T201"]
224223
"make_release.py" = ["S603", "S607", "T201"]

tests/conftest.py

+2-15
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,16 @@
55

66
import pytest
77

8-
from pypdf import PdfReader
9-
108
TESTS_ROOT = Path(__file__).parent.resolve()
119
PROJECT_ROOT = TESTS_ROOT.parent
1210
RESOURCE_ROOT = PROJECT_ROOT / "resources"
1311

1412

1513
@pytest.fixture(scope="session")
1614
def pdf_file_path(tmp_path_factory):
17-
fn = tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.pdf"
18-
return fn
15+
return tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.pdf"
1916

2017

2118
@pytest.fixture(scope="session")
2219
def txt_file_path(tmp_path_factory):
23-
fn = tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.txt"
24-
return fn
25-
26-
27-
@pytest.fixture(scope="session")
28-
def pdf_reader_page():
29-
"""Gives a page that was retrieved from a PDF via PdfReader."""
30-
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
31-
reader = PdfReader(pdf_path)
32-
page = reader.pages[0]
33-
return page
20+
return tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.txt"

tests/test_page_labels.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ def test_index2label_kids():
135135
"XV",
136136
"XVI",
137137
"XVII",
138-
] + list(map(str, range(1, 284)))
138+
*list(map(str, range(1, 284)))
139+
]
139140
for x in ["20", "44", "58", "82", "94", "116", "154", "166", "192", "224", "250"]:
140141
# Some page labels are unused. Removing them is still easier than copying the
141142
# whole list itself here.

tests/test_reader.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -1760,7 +1760,7 @@ def test_repair_root(caplog):
17601760
caplog.clear()
17611761
reader = PdfReader(
17621762
BytesIO(
1763-
b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog", b"/Catalo ")
1763+
b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog/Pages 3 0 R", b"/Catalo ")
17641764
)
17651765
)
17661766
with pytest.raises(PdfReadError):
@@ -1775,9 +1775,9 @@ def test_repair_root(caplog):
17751775

17761776
# Invalid /Root Entry + error in get_object
17771777
caplog.clear()
1778-
b = b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog", b"/Catalo ")
1779-
b = b[:5124] + b"A" + b[5125:]
1780-
reader = PdfReader(BytesIO(b))
1778+
data = b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog/Pages 3 0 R", b"/Catalo ")
1779+
data = data[:5124] + b"A" + data[5125:]
1780+
reader = PdfReader(BytesIO(data))
17811781
with pytest.raises(PdfReadError):
17821782
len(reader.pages)
17831783
assert all(
@@ -1788,6 +1788,23 @@ def test_repair_root(caplog):
17881788
)
17891789
)
17901790

1791+
# Invalid /Root Entry without /Type, but /Pages.
1792+
caplog.clear()
1793+
reader = PdfReader(
1794+
BytesIO(
1795+
b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog", b"/Catalo ")
1796+
)
1797+
)
1798+
assert len(reader.pages) == 1
1799+
assert all(
1800+
msg in caplog.text
1801+
for msg in (
1802+
"Invalid Root object in trailer",
1803+
'Searching object with "/Catalog" key',
1804+
f"Possible root found at IndirectObject(2, 0, {id(reader)}), but missing /Catalog key"
1805+
)
1806+
)
1807+
17911808

17921809
@pytest.mark.enable_socket
17931810
def test_issue3151(caplog):

0 commit comments

Comments
 (0)