File tree 2 files changed +14
-2
lines changed
2 files changed +14
-2
lines changed Original file line number Diff line number Diff line change @@ -1274,6 +1274,11 @@ def get_object(
1274
1274
self .stream .seek (start , 0 )
1275
1275
try :
1276
1276
idnum , generation = self .read_object_header (self .stream )
1277
+ if (
1278
+ idnum != indirect_reference .idnum
1279
+ or generation != indirect_reference .generation
1280
+ ):
1281
+ raise PdfReadError ("not matching, we parse the file for it" )
1277
1282
except Exception :
1278
1283
if hasattr (self .stream , "getbuffer" ):
1279
1284
buf = bytes (self .stream .getbuffer ())
@@ -1452,6 +1457,7 @@ def read(self, stream: StreamType) -> None:
1452
1457
try :
1453
1458
pid , _pgen = self .read_object_header (stream )
1454
1459
except ValueError :
1460
+ self ._rebuild_xref_table (stream )
1455
1461
break
1456
1462
if pid == id - self .xref_index :
1457
1463
# fixing index item per item is required for revised PDF.
Original file line number Diff line number Diff line change @@ -1290,8 +1290,6 @@ def test_reader(caplog):
1290
1290
caplog .clear ()
1291
1291
# first call requires some reparations...
1292
1292
reader .pages [0 ].extract_text ()
1293
- assert "repaired" in caplog .text
1294
- assert "found" in caplog .text
1295
1293
caplog .clear ()
1296
1294
# ...and now no more required
1297
1295
reader .pages [0 ].extract_text ()
@@ -1498,3 +1496,11 @@ def test_xyz_with_missing_param():
1498
1496
assert reader .outline [0 ]["/Top" ] == 0
1499
1497
assert reader .outline [1 ]["/Left" ] == 0
1500
1498
assert reader .outline [0 ]["/Top" ] == 0
1499
+
1500
+
1501
+ @pytest .mark .enable_socket ()
1502
+ def test_corrupted_xref ():
1503
+ url = "https://github.com/py-pdf/pypdf/files/14628314/iss2516.pdf"
1504
+ name = "iss2516.pdf"
1505
+ reader = PdfReader (BytesIO (get_data_from_url (url , name = name )))
1506
+ assert reader .root_object ["/Type" ] == "/Catalog"
You can’t perform that action at this time.
0 commit comments