Skip to content

Commit ae0d27b

Browse files
TST: Allow loading truncated images if required (#2586)
* TST: Allow loading truncated images if required
1 parent 956fd03 commit ae0d27b

File tree

3 files changed

+39
-15
lines changed

3 files changed

+39
-15
lines changed

tests/__init__.py

+22
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,25 @@ def test_csv_consistency():
143143

144144
# Ensure the urls are unique
145145
assert len(pdfs) == len({pdf["url"] for pdf in pdfs})
146+
147+
148+
class PILContext:
149+
"""Allow changing the PIL/Pillow configuration for some limited scope."""
150+
151+
def __init__(self):
152+
self._saved_load_truncated_images = False
153+
154+
def __enter__(self):
155+
# Allow loading incomplete images.
156+
from PIL import ImageFile
157+
self._saved_load_truncated_images = ImageFile.LOAD_TRUNCATED_IMAGES
158+
ImageFile.LOAD_TRUNCATED_IMAGES = True
159+
return self
160+
161+
def __exit__(self, type_, value, traceback):
162+
from PIL import ImageFile
163+
ImageFile.LOAD_TRUNCATED_IMAGES = self._saved_load_truncated_images
164+
if type_:
165+
# Error.
166+
return
167+
return True

tests/test_filters.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
)
2222
from pypdf.generic import ArrayObject, DictionaryObject, NameObject, NumberObject
2323

24-
from . import get_data_from_url
24+
from . import PILContext, get_data_from_url
2525
from .test_encryption import HAS_AES
2626
from .test_images import image_similarity
2727

@@ -371,13 +371,14 @@ def test_tiff_predictor():
371371
@pytest.mark.enable_socket()
372372
def test_rgba():
373373
"""Decode rgb with transparency"""
374-
reader = PdfReader(BytesIO(get_data_from_url(name="tika-972174.pdf")))
375-
data = reader.pages[0].images[0]
376-
assert ".jp2" in data.name
377-
similarity = image_similarity(
378-
data.image, BytesIO(get_data_from_url(name="tika-972174_p0-im0.png"))
379-
)
380-
assert similarity > 0.99
374+
with PILContext():
375+
reader = PdfReader(BytesIO(get_data_from_url(name="tika-972174.pdf")))
376+
data = reader.pages[0].images[0]
377+
assert ".jp2" in data.name
378+
similarity = image_similarity(
379+
data.image, BytesIO(get_data_from_url(name="tika-972174_p0-im0.png"))
380+
)
381+
assert similarity > 0.99
381382

382383

383384
@pytest.mark.enable_socket()

tests/test_workflows.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
read_object,
2727
)
2828

29-
from . import get_data_from_url, normalize_warnings
29+
from . import PILContext, get_data_from_url, normalize_warnings
3030

3131
TESTS_ROOT = Path(__file__).parent.resolve()
3232
PROJECT_ROOT = TESTS_ROOT.parent
@@ -672,12 +672,13 @@ def test_image_extraction(url, name):
672672
if not root.exists():
673673
root.mkdir()
674674

675-
for page in reader.pages:
676-
for image in page.images:
677-
filename = root / image.name
678-
with open(filename, "wb") as img:
679-
img.write(image.data)
680-
images_extracted.append(filename)
675+
with PILContext():
676+
for page in reader.pages:
677+
for image in page.images:
678+
filename = root / image.name
679+
with open(filename, "wb") as img:
680+
img.write(image.data)
681+
images_extracted.append(filename)
681682

682683
# Cleanup
683684
do_cleanup = True # set this to False for manual inspection

0 commit comments

Comments
 (0)