Skip to content

Commit 15ea592

Browse files
committed
BUG: Truncate mediabox and cropbox values with > 4 points.
Closes #2991
1 parent 27edc06 commit 15ea592

File tree

2 files changed

+31
-5
lines changed

2 files changed

+31
-5
lines changed

pypdf/_page.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@
106106
MERGE_CROP_BOX = "cropbox" # pypdf<=3.4.0 used 'trimbox'
107107

108108

109-
def _get_rectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleObject:
109+
def _get_rectangle(self: Any, name: str, defaults: Iterable[str], allow_truncate: bool) -> RectangleObject:
110110
retval: Union[None, RectangleObject, IndirectObject] = self.get(name)
111111
if isinstance(retval, RectangleObject):
112112
return retval
@@ -117,6 +117,13 @@ def _get_rectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleOb
117117
break
118118
if isinstance(retval, IndirectObject):
119119
retval = self.pdf.get_object(retval)
120+
if allow_truncate and (isinstance(retval, list) or isinstance(retval, tuple)):
121+
if len(retval) != 4:
122+
logger_warning(
123+
f"Expected {name} to be a rectangle with 4 points, but found: {retval}",
124+
__name__
125+
)
126+
retval = retval[:4]
120127
retval = RectangleObject(retval) # type: ignore
121128
_set_rectangle(self, name, retval)
122129
return retval
@@ -131,9 +138,14 @@ def _delete_rectangle(self: Any, name: str) -> None:
131138
del self[name]
132139

133140

134-
def _create_rectangle_accessor(name: str, fallback: Iterable[str]) -> property:
141+
def _create_rectangle_accessor(name: str, fallback: Iterable[str], allow_truncate: bool = False) -> property:
142+
"""
143+
Params:
144+
allow_truncate: True to permissively truncate the value at name down to the 4 points
145+
expected by RectangleObject if the value is a Tuple or List with a greater length.
146+
"""
135147
return property(
136-
lambda self: _get_rectangle(self, name, fallback),
148+
lambda self: _get_rectangle(self, name, fallback, allow_truncate=allow_truncate),
137149
lambda self, value: _set_rectangle(self, name, value),
138150
lambda self: _delete_rectangle(self, name),
139151
)
@@ -2452,12 +2464,12 @@ def _get_fonts(self) -> Tuple[Set[str], Set[str]]:
24522464
unembedded = fonts - embedded
24532465
return embedded, unembedded
24542466

2455-
mediabox = _create_rectangle_accessor(PG.MEDIABOX, ())
2467+
mediabox = _create_rectangle_accessor(PG.MEDIABOX, (), allow_truncate=True)
24562468
"""A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
24572469
default user space units, defining the boundaries of the physical medium on
24582470
which the page is intended to be displayed or printed."""
24592471

2460-
cropbox = _create_rectangle_accessor("/CropBox", (PG.MEDIABOX,))
2472+
cropbox = _create_rectangle_accessor("/CropBox", (PG.MEDIABOX,), allow_truncate=True)
24612473
"""
24622474
A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
24632475
default user space units, defining the visible region of default user

tests/test_page.py

+14
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,20 @@ def test_page_properties():
326326
assert page.bleedbox == RectangleObject((0, 1, 100, 101))
327327

328328

329+
@pytest.mark.parametrize("key", [PG.MEDIABOX, PG.CROPBOX])
330+
@pytest.mark.parametrize("values", [
331+
[0, 0, 612, 792, 0, 0, 612, 792],
332+
(0, 0, 612, 792, 0, 0, 612, 792),
333+
[0, 0, 612, 792, 0, 0, 612, 792, 0, 0],
334+
(0, 0, 612, 792, 0, 0, 612, 792, 0, 0),
335+
])
336+
def test_page_handles_long_media_and_crop_box_iss_2991(key: str, values: List[float] | Tuple[float, ...]):
337+
reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
338+
page = reader.pages[0]
339+
page[NameObject(key)] = ArrayObject(values)
340+
assert page.mediabox == RectangleObject((0, 0, 612, 792))
341+
342+
329343
def test_page_rotation():
330344
reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
331345
page = reader.pages[0]

0 commit comments

Comments
 (0)