Skip to content

Commit 48fb235

Browse files
ROB: Avoid negative seek values when reading partially broken files (#3157)
Closes #3151.
1 parent f65245f commit 48fb235

File tree

2 files changed

+13
-1
lines changed

2 files changed

+13
-1
lines changed

pypdf/_reader.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,9 @@ def _get_xref_issues(stream: StreamType, startxref: int) -> int:
10031003
0 means no issue, other values represent specific issues.
10041004
10051005
"""
1006+
if startxref == 0:
1007+
return 4
1008+
10061009
stream.seek(startxref - 1, 0) # -1 to check character before
10071010
line = stream.read(1)
10081011
if line == b"j":
@@ -1011,7 +1014,7 @@ def _get_xref_issues(stream: StreamType, startxref: int) -> int:
10111014
return 1
10121015
line = stream.read(4)
10131016
if line != b"xref":
1014-
# not an xref so check if it is an XREF object
1017+
# not a xref so check if it is an XREF object
10151018
line = b""
10161019
while line in b"0123456789 \t":
10171020
line = stream.read(1)

tests/test_reader.py

+9
Original file line numberDiff line numberDiff line change
@@ -1787,3 +1787,12 @@ def test_repair_root(caplog):
17871787
'Searching object with "/Catalog" key',
17881788
)
17891789
)
1790+
1791+
1792+
@pytest.mark.enable_socket
1793+
def test_issue3151(caplog):
1794+
"""Tests for #3151"""
1795+
url = "https://github.com/user-attachments/files/18941494/bible.pdf"
1796+
name = "issue3151.pdf"
1797+
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
1798+
assert len(reader.pages) == 742

0 commit comments

Comments
 (0)