File tree 2 files changed +14
-2
lines changed
2 files changed +14
-2
lines changed Original file line number Diff line number Diff line change @@ -370,6 +370,11 @@ def get_object(
370
370
self .stream .seek (start , 0 )
371
371
try :
372
372
idnum , generation = self .read_object_header (self .stream )
373
+ if (
374
+ idnum != indirect_reference .idnum
375
+ or generation != indirect_reference .generation
376
+ ):
377
+ raise PdfReadError ("not matching, we parse the file for it" )
373
378
except Exception :
374
379
if hasattr (self .stream , "getbuffer" ):
375
380
buf = bytes (self .stream .getbuffer ())
@@ -558,6 +563,7 @@ def read(self, stream: StreamType) -> None:
558
563
try :
559
564
pid , _pgen = self .read_object_header (stream )
560
565
except ValueError :
566
+ self ._rebuild_xref_table (stream )
561
567
break
562
568
if pid == id - self .xref_index :
563
569
# fixing index item per item is required for revised PDF.
Original file line number Diff line number Diff line change @@ -1291,8 +1291,6 @@ def test_reader(caplog):
1291
1291
caplog .clear ()
1292
1292
# first call requires some reparations...
1293
1293
reader .pages [0 ].extract_text ()
1294
- assert "repaired" in caplog .text
1295
- assert "found" in caplog .text
1296
1294
caplog .clear ()
1297
1295
# ...and now no more required
1298
1296
reader .pages [0 ].extract_text ()
@@ -1499,3 +1497,11 @@ def test_xyz_with_missing_param():
1499
1497
assert reader .outline [0 ]["/Top" ] == 0
1500
1498
assert reader .outline [1 ]["/Left" ] == 0
1501
1499
assert reader .outline [0 ]["/Top" ] == 0
1500
+
1501
+
1502
+ @pytest .mark .enable_socket ()
1503
+ def test_corrupted_xref ():
1504
+ url = "https://github.com/py-pdf/pypdf/files/14628314/iss2516.pdf"
1505
+ name = "iss2516.pdf"
1506
+ reader = PdfReader (BytesIO (get_data_from_url (url , name = name )))
1507
+ assert reader .root_object ["/Type" ] == "/Catalog"
You can’t perform that action at this time.
0 commit comments