Skip to content

Commit a86c319

Browse files
ROB: Consider root objects without catalog type as fallback (#3175)
Closes #3164.
1 parent d046d7e commit a86c319

File tree

2 files changed

+31
-5
lines changed

2 files changed

+31
-5
lines changed

pypdf/_reader.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,16 @@ def root_object(self) -> DictionaryObject:
229229
self._validated_root = o
230230
logger_warning(f"Root found at {o.indirect_reference!r}", __name__)
231231
break
232-
if self._validated_root is None:
232+
if self._validated_root is None:
233+
if not is_null_or_none(root) and "/Pages" in cast(DictionaryObject, cast(PdfObject, root).get_object()):
234+
logger_warning(
235+
f"Possible root found at {cast(PdfObject, root).indirect_reference!r}, but missing /Catalog key",
236+
__name__
237+
)
238+
self._validated_root = cast(
239+
DictionaryObject, cast(PdfObject, root).get_object()
240+
)
241+
else:
233242
raise PdfReadError("Cannot find Root object in pdf")
234243
return self._validated_root
235244

tests/test_reader.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -1760,7 +1760,7 @@ def test_repair_root(caplog):
17601760
caplog.clear()
17611761
reader = PdfReader(
17621762
BytesIO(
1763-
b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog", b"/Catalo ")
1763+
b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog/Pages 3 0 R", b"/Catalo ")
17641764
)
17651765
)
17661766
with pytest.raises(PdfReadError):
@@ -1775,9 +1775,9 @@ def test_repair_root(caplog):
17751775

17761776
# Invalid /Root Entry + error in get_object
17771777
caplog.clear()
1778-
b = b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog", b"/Catalo ")
1779-
b = b[:5124] + b"A" + b[5125:]
1780-
reader = PdfReader(BytesIO(b))
1778+
data = b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog/Pages 3 0 R", b"/Catalo ")
1779+
data = data[:5124] + b"A" + data[5125:]
1780+
reader = PdfReader(BytesIO(data))
17811781
with pytest.raises(PdfReadError):
17821782
len(reader.pages)
17831783
assert all(
@@ -1788,6 +1788,23 @@ def test_repair_root(caplog):
17881788
)
17891789
)
17901790

1791+
# Invalid /Root Entry without /Type, but /Pages.
1792+
caplog.clear()
1793+
reader = PdfReader(
1794+
BytesIO(
1795+
b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog", b"/Catalo ")
1796+
)
1797+
)
1798+
assert len(reader.pages) == 1
1799+
assert all(
1800+
msg in caplog.text
1801+
for msg in (
1802+
"Invalid Root object in trailer",
1803+
'Searching object with "/Catalog" key',
1804+
f"Possible root found at IndirectObject(2, 0, {id(reader)}), but missing /Catalog key"
1805+
)
1806+
)
1807+
17911808

17921809
@pytest.mark.enable_socket
17931810
def test_issue3151(caplog):

0 commit comments

Comments
 (0)