Skip to content

Commit 4bc7513

Browse files
authored
New optional reset_page_indices parameter for insert_toc_placeholder() - partially fix issue #1343 (#1366)
1 parent 293941b commit 4bc7513

14 files changed

+249
-120
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
# my files
21
.env
32
.DS_Store
43

54
# codecov.io
65
coverage.xml
76

7+
__pycache__
88
htmlcov
99
public
1010
tutorial/invoice.pdf

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
2626
* documentation on [verifying provenance of `fpdf2` releases](https://py-pdf.github.io/fpdf2/#verifying-provenance)
2727
* documentation on [`fpdf2` internals](https://py-pdf.github.io/fpdf2/Internals.html)
2828
* support for adding TrueType fonts that are missing the `.notdef` glyph - [issue #1161](https://github.com/py-pdf/fpdf2/issues/1161)
29+
* new optional `reset_page_indices` parameter for [`insert_toc_placeholder()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.insert_toc_placeholder)
2930
### Fixed
3031
* [`FPDF.write_html()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.write_html): Fixed rendering of content following `<a>` tags; now correctly resets emphasis style post `</a>` tag: hyperlink styling contained within the tag authority. - [Issue #1311](https://github.com/py-pdf/fpdf2/issues/1311)
3132

docs/Development.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,9 @@ Ask maintainers through comments if some errors in the pipeline seem obscure to
212212
[typos](https://github.com/crate-ci/typos) is a handy CLI tool to detect & auto-fix [typos](https://en.wikipedia.org/wiki/Typographical_error) in source files.
213213
Installation is relatively straightforward ([read the docs](https://github.com/crate-ci/typos?tab=readme-ov-file#install)).
214214

215-
This tool is invoked in our CI pipeline.
215+
This tool is invoked in the [pre-commit hooks](#pre-commit-hook) and in our CI pipeline.
216216
If it fails, you should either:
217+
217218
* auto-fix the errors detected by invoking `typos --write-changes`
218219
* add an exclusion rule to `.typos.toml`
219220

docs/PageLabels.md

+2
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ pdf.set_page_label(
7878

7979
If you need to get the current page label, for example, to display it in a header or footer, you can use the `get_page_label()` method.
8080

81+
!!! warning "In case of a [table of contents](DocumentOutlineAndTableOfContents.md) spanning **more than one page**, the page number returned by `get_page_label()` will be **incorrect**.<br>_cf._ [GitHub issue #1343](https://github.com/py-pdf/fpdf2/issues/1343)"
82+
8183
---
8284

8385
## Example Usage

fpdf/fpdf.py

+64-4
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ class ToCPlaceholder(NamedTuple):
167167
y: int
168168
page_orientation: str
169169
pages: int = 1
170+
reset_page_indices: bool = True
170171

171172

172173
# Disabling this check due to the "format" parameter below:
@@ -870,17 +871,29 @@ def alias_nb_pages(self, alias="{nb}"):
870871
"""
871872
self.str_alias_nb_pages = alias
872873

874+
@check_page
873875
def set_page_label(
874876
self,
875877
label_style: Union[str, PageLabelStyle] = None,
876878
label_prefix: str = None,
877879
label_start: int = None,
878880
):
879-
current_page_label = (
880-
None if self.page == 1 else self.pages[self.page - 1].get_page_label()
881-
)
881+
current_page_label = None
882+
if self.page in self.pages:
883+
current_page_label = self.pages[self.page].get_page_label()
884+
elif self.page > 1:
885+
current_page_label = self.pages[self.page - 1].get_page_label()
882886
new_page_label = None
883887
if label_style or label_prefix or label_start:
888+
if current_page_label:
889+
if label_style is None:
890+
label_style = current_page_label.get_style()
891+
if label_prefix is None:
892+
label_prefix = current_page_label.get_prefix()
893+
if label_start is None and not (
894+
self.toc_placeholder and self.toc_placeholder.reset_page_indices
895+
):
896+
label_start = current_page_label.get_start()
884897
label_style = (
885898
PageLabelStyle.coerce(label_style, case_sensitive=True)
886899
if label_style
@@ -4883,6 +4896,44 @@ def _insert_table_of_contents(self):
48834896
self.in_footer = True
48844897
self.footer()
48854898
self.in_footer = False
4899+
# We need to reorder the pages, because some new pages have been inserted in the ToC,
4900+
# but they have been inserted at the end of self.pages:
4901+
new_pages = [
4902+
self.pages.pop(len(self.pages)) for _ in range(self._toc_inserted_pages)
4903+
]
4904+
new_pages = list(reversed(new_pages))
4905+
indices_remap = {}
4906+
for page_index in range(
4907+
tocp.start_page + 1, self.pages_count + len(new_pages) + 1
4908+
):
4909+
if page_index in self.pages:
4910+
new_pages.append(self.pages.pop(page_index))
4911+
page = self.pages[page_index] = new_pages.pop(0)
4912+
# Fix page indices:
4913+
indices_remap[page.index()] = page_index
4914+
page.set_index(page_index)
4915+
# Fix page labels:
4916+
if tocp.reset_page_indices is False:
4917+
page.get_page_label().st = page_index
4918+
assert len(new_pages) == 0, f"#new_pages: {len(new_pages)}"
4919+
# Fix outline:
4920+
for section in self._outline:
4921+
new_index = indices_remap.get(section.page_number)
4922+
if new_index is not None:
4923+
section.dest = section.dest.replace(page=new_index)
4924+
section.page_number = new_index
4925+
if section.struct_elem:
4926+
# pylint: disable=protected-access
4927+
section.struct_elem._page_number = new_index
4928+
# Fix resource catalog:
4929+
new_resources_per_page = defaultdict(set)
4930+
for (
4931+
page_number,
4932+
resource_type,
4933+
), resource in self._resource_catalog.resources_per_page.items():
4934+
key = (indices_remap.get(page_number, page_number), resource_type)
4935+
new_resources_per_page[key] = resource
4936+
self._resource_catalog.resources_per_page = new_resources_per_page
48864937
self.page, self.y = prev_page, prev_y
48874938

48884939
def file_id(self): # pylint: disable=no-self-use
@@ -5186,6 +5237,7 @@ def insert_toc_placeholder(
51865237
render_toc_function: Callable,
51875238
pages: int = 1,
51885239
allow_extra_pages: bool = False,
5240+
reset_page_indices: bool = True,
51895241
):
51905242
"""
51915243
Configure Table Of Contents rendering at the end of the document generation,
@@ -5202,6 +5254,7 @@ def insert_toc_placeholder(
52025254
extra pages in the ToC, which may cause discrepancies with pre-rendered
52035255
page numbers. For consistent numbering, using page labels to create a
52045256
separate numbering style for the ToC is recommended.
5257+
reset_page_indices (bool): Whether to reset the pages indixes after the ToC. Default to True.
52055258
"""
52065259
if not callable(render_toc_function):
52075260
raise TypeError(
@@ -5213,7 +5266,12 @@ def insert_toc_placeholder(
52135266
f" on page {self.toc_placeholder.start_page}"
52145267
)
52155268
self.toc_placeholder = ToCPlaceholder(
5216-
render_toc_function, self.page, self.y, self.cur_orientation, pages
5269+
render_toc_function,
5270+
self.page,
5271+
self.y,
5272+
self.cur_orientation,
5273+
pages,
5274+
reset_page_indices,
52175275
)
52185276
self._toc_allow_page_insertion = allow_extra_pages
52195277
for _ in range(pages):
@@ -5267,6 +5325,8 @@ def start_section(self, name, level=0, strict=True):
52675325
Args:
52685326
name (str): section name
52695327
level (int): section level in the document outline. 0 means top-level.
5328+
strict (bool): whether to raise an exception if levels increase incorrectly,
5329+
for example with a level-3 section following a level-1 section.
52705330
"""
52715331
if level < 0:
52725332
raise ValueError('"level" mut be equal or greater than zero')

fpdf/outline.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
in non-backward-compatible ways.
99
"""
1010

11-
from typing import List, NamedTuple, Optional, TYPE_CHECKING
11+
from dataclasses import dataclass
12+
from typing import List, Optional, TYPE_CHECKING
1213

1314
from .enums import Align, XPos, YPos
1415
from .fonts import TextStyle
@@ -19,12 +20,23 @@
1920
from .fpdf import FPDF
2021

2122

22-
class OutlineSection(NamedTuple):
23+
@dataclass
24+
class OutlineSection:
25+
# RAM usage optimization
26+
__slots__ = ("name", "level", "page_number", "dest", "struct_elem")
2327
name: str
2428
level: int
2529
page_number: int
2630
dest: Destination
27-
struct_elem: Optional[StructElem] = None
31+
struct_elem: Optional[StructElem]
32+
33+
# With __slots__ used, we need an __init__ method in order to define default values:
34+
def __init__(self, name, level, page_number, dest, struct_elem=None):
35+
self.name = name
36+
self.level = level
37+
self.page_number = page_number
38+
self.dest = dest
39+
self.struct_elem = struct_elem
2840

2941

3042
class OutlineItemDictionary(PDFObject):
@@ -58,6 +70,9 @@ def __init__(
5870
self.dest = dest
5971
self.struct_elem = struct_elem
6072

73+
def __str__(self):
74+
return f"OutlineItemDictionary(title={self.title}, dest={self.dest})"
75+
6176

6277
class OutlineDictionary(PDFObject):
6378
__slots__ = ("_id", "type", "first", "last", "count") # RAM usage optimization
@@ -69,6 +84,9 @@ def __init__(self, **kwargs):
6984
self.last = None
7085
self.count = 0
7186

87+
def __str__(self):
88+
return f"OutlineDictionary(count={self.count})"
89+
7290

7391
def build_outline_objs(sections):
7492
"""

fpdf/output.py

+23-21
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def __init__(
236236

237237

238238
class PDFPageLabel:
239-
__slots__ = ["_style", "_prefix", "st"]
239+
__slots__ = ("_style", "_prefix", "st") # RAM usage optimization
240240

241241
def __init__(
242242
self, label_style: PageLabelStyle, label_prefix: str, label_start: int
@@ -254,6 +254,9 @@ def p(self) -> PDFString:
254254
return PDFString(self._prefix) if self._prefix else None
255255

256256
def __repr__(self):
257+
return f"PDFPageLabel({self._style}, {self._prefix}, {self.st})"
258+
259+
def __str__(self):
257260
ret = self._prefix if self._prefix else ""
258261
if self._style:
259262
if self._style == PageLabelStyle.NUMBER:
@@ -266,7 +269,7 @@ def __repr__(self):
266269
ret += int_to_letters(self.st - 1)
267270
elif self._style == PageLabelStyle.LOWER_LETTER:
268271
ret += int_to_letters(self.st - 1).lower()
269-
return None if ret == "" else ret
272+
return ret
270273

271274
def serialize(self) -> dict:
272275
return build_obj_dict({key: getattr(self, key) for key in dir(self)})
@@ -319,6 +322,7 @@ def __init__(
319322
self.struct_parents = None
320323
self.resources = None # must always be set before calling .serialize()
321324
self.parent = None # must always be set before calling .serialize()
325+
# Useful properties that will not be serialized in the final PDF document:
322326
self._index = index
323327
self._width_pt, self._height_pt = None, None
324328
self._page_label: PDFPageLabel = None
@@ -327,6 +331,9 @@ def __init__(
327331
def index(self):
328332
return self._index
329333

334+
def set_index(self, i):
335+
self._index = i
336+
330337
def dimensions(self):
331338
"Return a pair (width, height) in the unit specified to FPDF constructor"
332339
return self._width_pt, self._height_pt
@@ -527,7 +534,7 @@ def bufferize(self):
527534
self.pdf_objs.append(xref)
528535

529536
# 2. Plumbing - Inject all PDF object references required:
530-
pages_root_obj.kids = PDFArray(self._reorder_page_objects(page_objs))
537+
pages_root_obj.kids = PDFArray(page_objs)
531538
self._finalize_catalog(
532539
catalog_obj,
533540
pages_root_obj=pages_root_obj,
@@ -627,10 +634,19 @@ def _add_pages_root(self):
627634
self._add_pdf_obj(pages_root_obj)
628635
return pages_root_obj
629636

637+
def _iter_pages_in_order(self):
638+
for page_index in range(1, self.fpdf.pages_count + 1):
639+
page_obj = self.fpdf.pages[page_index]
640+
# Defensive check:
641+
assert (
642+
page_obj.index() == page_index
643+
), f"{page_obj.index()=} != {page_index=}"
644+
yield page_obj
645+
630646
def _add_pages(self, _slice=slice(0, None)):
631647
fpdf = self.fpdf
632648
page_objs = []
633-
for page_obj in list(fpdf.pages.values())[_slice]:
649+
for page_obj in list(self._iter_pages_in_order())[_slice]:
634650
if fpdf.pdf_version > "1.3":
635651
page_obj.group = pdf_dict(
636652
{"/Type": "/Group", "/S": "/Transparency", "/CS": "/DeviceRGB"},
@@ -650,16 +666,6 @@ def _add_pages(self, _slice=slice(0, None)):
650666

651667
return page_objs
652668

653-
def _reorder_page_objects(self, page_objs: list):
654-
"Reorder page objects to move any Table of Contents pages generated at the end of the document to follow the ToC placeholder."
655-
if not self.fpdf._toc_inserted_pages:
656-
return page_objs
657-
reordered = page_objs.copy()
658-
for _ in range(self.fpdf._toc_inserted_pages):
659-
last_page = reordered.pop()
660-
reordered.insert(self.fpdf.toc_placeholder.start_page, last_page)
661-
return reordered
662-
663669
def _add_annotations_as_objects(self):
664670
sig_annotation_obj = None
665671
for page_obj in self.fpdf.pages.values():
@@ -1209,14 +1215,10 @@ def _finalize_catalog(
12091215
catalog_obj.names = pdf_dict(
12101216
{"/EmbeddedFiles": pdf_dict({"/Names": pdf_list(file_spec_names)})}
12111217
)
1212-
ordered_pages = list(fpdf.pages.items())
1213-
for _ in range(self.fpdf._toc_inserted_pages):
1214-
last_page = ordered_pages.pop()
1215-
ordered_pages.insert(self.fpdf.toc_placeholder.start_page, last_page)
12161218
page_labels = [
1217-
f"{seq} {pdf_dict(page[1].get_page_label().serialize())}"
1218-
for (seq, page) in enumerate(ordered_pages)
1219-
if page[1].get_page_label()
1219+
f"{i} {pdf_dict(page.get_page_label().serialize())}"
1220+
for i, page in enumerate(self._iter_pages_in_order())
1221+
if page.get_page_label()
12201222
]
12211223
if page_labels and not fpdf.pages[1].get_page_label():
12221224
# If page labels are used, an entry for sequence 0 is mandatory

fpdf/syntax.py

+11
Original file line numberDiff line numberDiff line change
@@ -377,3 +377,14 @@ def serialize(self, _security_handler=None, _obj_id=None):
377377
top = round(self.top, 2) if isinstance(self.top, float) else self.top
378378
assert self.page_ref
379379
return f"[{self.page_ref} /XYZ {left} {top} {self.zoom}]"
380+
381+
def replace(self, page=None, top=None, left=None, zoom=None):
382+
assert (
383+
not self.page_ref
384+
), "DestinationXYZ should not be copied after serialization"
385+
return DestinationXYZ(
386+
page=self.page_number if page is None else page,
387+
top=self.top if top is None else top,
388+
left=self.left if left is None else left,
389+
zoom=self.zoom if zoom is None else zoom,
390+
)

fpdf/util.py

+2
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ def convert_unit(
133133
def int2roman(n):
134134
"Convert an integer to Roman numeral"
135135
result = ""
136+
if n is None:
137+
return result
136138
for numeral, integer in ROMAN_NUMERAL_MAP:
137139
while n >= integer:
138140
result += numeral

0 commit comments

Comments
 (0)