Skip to content

Commit efe9aba

Browse files
authored
Unicode characters in headings are now properly displayed in the table of content - fix #320 (#324)
1 parent ec34c0f commit efe9aba

22 files changed

+30
-27
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ and [PEP 440](https://www.python.org/dev/peps/pep-0440/).
1414

1515
### Fixed
1616
- `will_page_break()` & `accept_page_break` are not invoked anymore during a call to `multi_cell(split_only=True)`
17+
- Unicode characters in headings are now properly displayed in the table of content, _cf._ [#320](https://github.com/PyFPDF/fpdf2/issues/320)
1718

1819
## [2.4.6] - 2021-11-16
1920
### Added

docs/Presentations.md

100755100644
File mode changed.

docs/qpdf-logo.svg

+1-7
Loading

fpdf/fpdf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1585,7 +1585,7 @@ def set_font(self, family=None, style="", size=0):
15851585
15861586
Standard fonts use `Latin-1` encoding by default, but Windows
15871587
encoding `cp1252` (Western Europe) can be used with
1588-
[set_doc_option](set_doc_option.md) ("core_fonts_encoding", encoding).
1588+
`self.core_fonts_encoding = encoding`.
15891589
15901590
The font specified is retained from page to page.
15911591
The method can be called before the first page is created.

fpdf/syntax.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def create_dictionary_string(
9696
return "".join(
9797
[
9898
open_dict,
99-
field_join.join(key_value_join.join(map(str, f)) for f in dict_.items()),
99+
field_join.join(key_value_join.join((k, str(v))) for k, v in dict_.items()),
100100
close_dict,
101101
]
102102
)
@@ -198,9 +198,7 @@ def camel_case(property_name):
198198

199199
class PDFString(str):
200200
def serialize(self):
201-
# Filtering out characters that are not encodable as Latin1 for now,
202-
# as an outline /Title seemingly cannot "just" be encoded as UTF-16BE:
203-
return f'({self.encode("latin-1", "ignore").decode("latin-1")})'
201+
return f'({self.encode("UTF-16").decode("latin-1")})'
204202

205203

206204
class PDFArray(list):
84 Bytes
Binary file not shown.

test/html/html_features.pdf

24 Bytes
Binary file not shown.

test/html/html_heading_hebrew.pdf

27 Bytes
Binary file not shown.
762 Bytes
Binary file not shown.
156 Bytes
Binary file not shown.
Binary file not shown.

test/link_alt_text.pdf

40 Bytes
Binary file not shown.

test/outline/2_pages_outline.pdf

780 Bytes
Binary file not shown.

test/outline/custom_HTML2FPDF.pdf

54 Bytes
Binary file not shown.

test/outline/html_toc.pdf

96 Bytes
Binary file not shown.

test/outline/html_toc_2_pages.pdf

734 Bytes
Binary file not shown.
27 Bytes
Binary file not shown.

test/outline/russian_heading.pdf

7.91 KB
Binary file not shown.

test/outline/simple_outline.pdf

148 Bytes
Binary file not shown.

test/outline/test_outline.py

+10
Original file line numberDiff line numberDiff line change
@@ -157,3 +157,13 @@ def test_2_pages_outline(tmp_path):
157157
" sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
158158
)
159159
assert_pdf_equal(pdf, HERE / "2_pages_outline.pdf", tmp_path)
160+
161+
162+
def test_russian_heading(tmp_path): # issue-320
163+
pdf = FPDF()
164+
pdf.add_font("Roboto", style="B", fname="test/fonts/Roboto-Regular.ttf", uni=True)
165+
pdf.set_font("Roboto", style="B")
166+
pdf.add_page()
167+
pdf.start_section("Русский, English, 1 2 3...")
168+
pdf.write(8, "Русский текст в параграфе.")
169+
assert_pdf_equal(pdf, HERE / "russian_heading.pdf", tmp_path)

test/outline/test_outline_serializer.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def test_serialize_outline():
1818
)
1919
assert (
2020
serialize_outline(sections, first_object_id=6)
21-
== """\
21+
== f"""\
2222
6 0 obj
2323
<<
2424
/Count 2
@@ -35,15 +35,15 @@ def test_serialize_outline():
3535
/Last 8 0 R
3636
/Next 9 0 R
3737
/Parent 6 0 R
38-
/Title (Title 1)
38+
/Title ({'Title 1'.encode('UTF-16').decode('latin-1')})
3939
>>
4040
endobj
4141
8 0 obj
4242
<<
4343
/Count 0
4444
/Dest [5 0 R /XYZ 0 0 null]
4545
/Parent 7 0 R
46-
/Title (Subtitle 1.1)
46+
/Title ({'Subtitle 1.1'.encode('UTF-16').decode('latin-1')})
4747
>>
4848
endobj
4949
9 0 obj
@@ -54,7 +54,7 @@ def test_serialize_outline():
5454
/Last 11 0 R
5555
/Parent 6 0 R
5656
/Prev 7 0 R
57-
/Title (Title 2)
57+
/Title ({'Title 2'.encode('UTF-16').decode('latin-1')})
5858
>>
5959
endobj
6060
10 0 obj
@@ -63,7 +63,7 @@ def test_serialize_outline():
6363
/Dest [9 0 R /XYZ 0 0 null]
6464
/Next 11 0 R
6565
/Parent 9 0 R
66-
/Title (Subtitle 2.1)
66+
/Title ({'Subtitle 2.1'.encode('UTF-16').decode('latin-1')})
6767
>>
6868
endobj
6969
11 0 obj
@@ -72,7 +72,7 @@ def test_serialize_outline():
7272
/Dest [11 0 R /XYZ 0 0 null]
7373
/Parent 9 0 R
7474
/Prev 10 0 R
75-
/Title (Subtitle 2.2)
75+
/Title ({'Subtitle 2.2'.encode('UTF-16').decode('latin-1')})
7676
>>
7777
endobj"""
7878
)
@@ -88,7 +88,7 @@ def test_serialize_outline_with_headless_hierarchy(): # issues 239
8888
)
8989
assert (
9090
serialize_outline(sections, first_object_id=6)
91-
== """\
91+
== f"""\
9292
6 0 obj
9393
<<
9494
/Count 2
@@ -104,15 +104,15 @@ def test_serialize_outline_with_headless_hierarchy(): # issues 239
104104
/First 8 0 R
105105
/Last 8 0 R
106106
/Parent 6 0 R
107-
/Title (?-1)
107+
/Title ({'?-1'.encode('UTF-16').decode('latin-1')})
108108
>>
109109
endobj
110110
8 0 obj
111111
<<
112112
/Count 0
113113
/Dest [5 0 R /XYZ 0 0 null]
114114
/Parent 7 0 R
115-
/Title (?-1-1)
115+
/Title ({'?-1-1'.encode('UTF-16').decode('latin-1')})
116116
>>
117117
endobj
118118
9 0 obj
@@ -122,7 +122,7 @@ def test_serialize_outline_with_headless_hierarchy(): # issues 239
122122
/First 10 0 R
123123
/Last 10 0 R
124124
/Parent 6 0 R
125-
/Title (1)
125+
/Title ({'1'.encode('UTF-16').decode('latin-1')})
126126
>>
127127
endobj
128128
10 0 obj
@@ -132,15 +132,15 @@ def test_serialize_outline_with_headless_hierarchy(): # issues 239
132132
/First 11 0 R
133133
/Last 11 0 R
134134
/Parent 9 0 R
135-
/Title (1-1)
135+
/Title ({'1-1'.encode('UTF-16').decode('latin-1')})
136136
>>
137137
endobj
138138
11 0 obj
139139
<<
140140
/Count 0
141141
/Dest [5 0 R /XYZ 0 0 null]
142142
/Parent 10 0 R
143-
/Title (1-1-1)
143+
/Title ({'1-1-1'.encode('UTF-16').decode('latin-1')})
144144
>>
145145
endobj"""
146146
)

test/test_structure_tree.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def test_single_image_structure_tree():
8383
)
8484
assert (
8585
struct_builder.serialize(first_object_id=3)
86-
== """\
86+
== f"""\
8787
3 0 obj
8888
<<
8989
/K [4 0 R]
@@ -106,12 +106,12 @@ def test_single_image_structure_tree():
106106
endobj
107107
6 0 obj
108108
<<
109-
/Alt (Image description)
109+
/Alt ({'Image description'.encode('UTF-16').decode('latin-1')})
110110
/K [0]
111111
/P 4 0 R
112112
/Pg 1 0 R
113113
/S /Figure
114-
/T (Image title)
114+
/T ({'Image title'.encode('UTF-16').decode('latin-1')})
115115
/Type /StructElem
116116
>>
117117
endobj"""

0 commit comments

Comments
 (0)