Skip to content

Commit e411b76

Browse files
authored
MAINT: Return None instead of -1 when page is not attached (#2376)
If a page is not attached to a document, it does not have a page number. So we cannot return a "normal" number. Before this PR, we returned -1. Returning None compared to using `-1` has two advantages: * It makes intuitive sense what it means * It is part of the type annotation and mypy will complain about it if you don't handle that. If the callers (users) of pypdf are not careful, mypy might catch their error. For this reason, we now return `None`. See #2010 Closes #2371
1 parent ef5bacb commit e411b76

File tree

4 files changed

+25
-19
lines changed

4 files changed

+25
-19
lines changed

pypdf/_page.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1480,21 +1480,21 @@ def compress_content_streams(self, level: int = -1) -> None:
14801480
raise ValueError("Page must be part of a PdfWriter")
14811481

14821482
@property
1483-
def page_number(self) -> int:
1483+
def page_number(self) -> Optional[int]:
14841484
"""
14851485
Read-only property which return the page number with the pdf file.
14861486
14871487
Returns:
1488-
int : page number ; -1 if the page is not attached to a pdf
1488+
int : page number ; None if the page is not attached to a pdf
14891489
"""
14901490
if self.indirect_reference is None:
1491-
return -1
1491+
return None
14921492
else:
14931493
try:
14941494
lst = self.indirect_reference.pdf.pages
14951495
return lst.index(self)
14961496
except ValueError:
1497-
return -1
1497+
return None
14981498

14991499
def _debug_for_extract(self) -> str: # pragma: no cover
15001500
out = ""

pypdf/_reader.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -805,32 +805,32 @@ def threads(self) -> Optional[ArrayObject]:
805805

806806
def _get_page_number_by_indirect(
807807
self, indirect_reference: Union[None, int, NullObject, IndirectObject]
808-
) -> int:
808+
) -> Optional[int]:
809809
"""
810810
Generate _page_id2num.
811811
812812
Args:
813813
indirect_reference:
814814
815815
Returns:
816-
The page number.
816+
The page number or None
817817
"""
818818
if self._page_id2num is None:
819819
self._page_id2num = {
820820
x.indirect_reference.idnum: i for i, x in enumerate(self.pages) # type: ignore
821821
}
822822

823823
if indirect_reference is None or isinstance(indirect_reference, NullObject):
824-
return -1
824+
return None
825825
if isinstance(indirect_reference, int):
826826
idnum = indirect_reference
827827
else:
828828
idnum = indirect_reference.idnum
829829
assert self._page_id2num is not None, "hint for mypy"
830-
ret = self._page_id2num.get(idnum, -1)
830+
ret = self._page_id2num.get(idnum, None)
831831
return ret
832832

833-
def get_page_number(self, page: PageObject) -> int:
833+
def get_page_number(self, page: PageObject) -> Optional[int]:
834834
"""
835835
Retrieve page number of a given PageObject.
836836
@@ -839,19 +839,19 @@ def get_page_number(self, page: PageObject) -> int:
839839
an instance of :class:`PageObject<pypdf._page.PageObject>`
840840
841841
Returns:
842-
The page number or -1 if page is not found
842+
The page number or None if page is not found
843843
"""
844844
return self._get_page_number_by_indirect(page.indirect_reference)
845845

846-
def get_destination_page_number(self, destination: Destination) -> int:
846+
def get_destination_page_number(self, destination: Destination) -> Optional[int]:
847847
"""
848848
Retrieve page number of a given Destination object.
849849
850850
Args:
851851
destination: The destination to get page number.
852852
853853
Returns:
854-
The page number or -1 if page is not found
854+
The page number or None if page is not found
855855
"""
856856
return self._get_page_number_by_indirect(destination.page)
857857

tests/test_page.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1199,12 +1199,12 @@ def test_merge_transformed_page_into_blank():
11991199
True,
12001200
)
12011201
blank = PageObject.create_blank_page(width=100, height=100)
1202-
assert blank.page_number == -1
1202+
assert blank.page_number is None
12031203
inserted_blank = writer.add_page(blank)
1204-
assert blank.page_number == -1 # the inserted page is a clone
1204+
assert blank.page_number is None # the inserted page is a clone
12051205
assert inserted_blank.page_number == len(writer.pages) - 1
12061206
del writer._pages.get_object()["/Kids"][-1]
1207-
assert inserted_blank.page_number == -1
1207+
assert inserted_blank.page_number is None
12081208

12091209

12101210
def test_pages_printing():

tests/test_reader.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import time
44
from io import BytesIO
55
from pathlib import Path
6+
from typing import List, Union
67

78
import pytest
89

@@ -36,6 +37,9 @@
3637
SAMPLE_ROOT = PROJECT_ROOT / "sample-files"
3738

3839

40+
NestedList = Union[int, None, List["NestedList"]]
41+
42+
3943
@pytest.mark.parametrize(
4044
("src", "num_pages"),
4145
[("selenium-pypdf-issue-177.pdf", 1), ("pdflatex-outline.pdf", 4)],
@@ -695,12 +699,14 @@ def test_issue604(caplog, strict):
695699
]
696700
assert normalize_warnings(caplog.text) == msg
697701

698-
def get_dest_pages(x) -> int:
702+
def get_dest_pages(x) -> NestedList:
699703
if isinstance(x, list):
700-
r = [get_dest_pages(y) for y in x]
701-
return r
704+
return [get_dest_pages(y) for y in x]
702705
else:
703-
return pdf.get_destination_page_number(x) + 1
706+
destination_page_number = pdf.get_destination_page_number(x)
707+
if destination_page_number is None:
708+
return destination_page_number
709+
return destination_page_number + 1
704710

705711
out = []
706712

0 commit comments

Comments
 (0)