Skip to content

Commit cd6d07b

Browse files
authored
Merge branch 'main' into Common2
2 parents 7c8c168 + f8edf3c commit cd6d07b

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

pypdf/_reader.py

+6
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,11 @@ def get_object(
370370
self.stream.seek(start, 0)
371371
try:
372372
idnum, generation = self.read_object_header(self.stream)
373+
if (
374+
idnum != indirect_reference.idnum
375+
or generation != indirect_reference.generation
376+
):
377+
raise PdfReadError("not matching, we parse the file for it")
373378
except Exception:
374379
if hasattr(self.stream, "getbuffer"):
375380
buf = bytes(self.stream.getbuffer())
@@ -558,6 +563,7 @@ def read(self, stream: StreamType) -> None:
558563
try:
559564
pid, _pgen = self.read_object_header(stream)
560565
except ValueError:
566+
self._rebuild_xref_table(stream)
561567
break
562568
if pid == id - self.xref_index:
563569
# fixing index item per item is required for revised PDF.

tests/test_reader.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1291,8 +1291,6 @@ def test_reader(caplog):
12911291
caplog.clear()
12921292
# first call requires some reparations...
12931293
reader.pages[0].extract_text()
1294-
assert "repaired" in caplog.text
1295-
assert "found" in caplog.text
12961294
caplog.clear()
12971295
# ...and now no more required
12981296
reader.pages[0].extract_text()
@@ -1499,3 +1497,11 @@ def test_xyz_with_missing_param():
14991497
assert reader.outline[0]["/Top"] == 0
15001498
assert reader.outline[1]["/Left"] == 0
15011499
assert reader.outline[0]["/Top"] == 0
1500+
1501+
1502+
@pytest.mark.enable_socket()
1503+
def test_corrupted_xref():
1504+
url = "https://github.com/py-pdf/pypdf/files/14628314/iss2516.pdf"
1505+
name = "iss2516.pdf"
1506+
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
1507+
assert reader.root_object["/Type"] == "/Catalog"

0 commit comments

Comments
 (0)