diff --git a/CHANGELOG.md b/CHANGELOG.md index dd593b038..b2a5374ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ This change was made for consistency between row-height governed by text or imag * Edge case when parsing a Markdown link followed by a newline - _cf._ issue [#916](https://github.com/py-pdf/fpdf2/issues/916), and when bold/italics/underline markers are repeated * Zoom not set correctly when a numeric value was set in [`set_display_mode()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.set_display_mode) - _cf._ issue [#926](https://github.com/py-pdf/fpdf2/issues/926) * [`FPDF.table()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.table): images no longer overlap with cell borders - _cf._ issue [#892](https://github.com/py-pdf/fpdf2/issues/892) +* Encryption of strings containing non-latin characters - __cf.__ issue [#933](https://github.com/py-pdf/fpdf2/issues/933) ## [2.7.5] - 2023-08-04 ### Added diff --git a/fpdf/encryption.py b/fpdf/encryption.py index f4b872cb9..c2af31ac8 100644 --- a/fpdf/encryption.py +++ b/fpdf/encryption.py @@ -11,6 +11,8 @@ import math import stringprep import unicodedata +from binascii import hexlify +from codecs import BOM_UTF16_BE from os import urandom from typing import Callable, Iterable, Type, Union @@ -220,7 +222,11 @@ def encrypt_string(self, string: str, obj_id: int) -> str: if self.encryption_method == EncryptionMethod.NO_ENCRYPTION: return PDFString(string, encrypt=False).serialize() LOGGER.debug("Encrypting string: %s", string) - return f"<{bytes(self.encrypt_bytes(string.encode('latin-1'), obj_id)).hex().upper()}>" + try: + string.encode("latin-1") + return f"<{bytes(self.encrypt_bytes(string.encode('latin-1'), obj_id)).hex().upper()}>" + except UnicodeEncodeError: + return f'<{hexlify(bytearray(self.encrypt_bytes(BOM_UTF16_BE + string.encode("utf-16-be"), obj_id))).decode("latin-1")}>' def encrypt_stream(self, stream: bytes, obj_id: int) -> bytes: if self.encryption_method == EncryptionMethod.NO_ENCRYPTION: diff --git a/test/encryption/encryption_unicode.pdf b/test/encryption/encryption_unicode.pdf new file mode 100644 index 000000000..a260d4cf9 Binary files /dev/null and b/test/encryption/encryption_unicode.pdf differ diff --git a/test/encryption/test_encryption.py b/test/encryption/test_encryption.py index d41fd8389..18b1bce47 100644 --- a/test/encryption/test_encryption.py +++ b/test/encryption/test_encryption.py @@ -293,3 +293,20 @@ def test_password_prep(): with pytest.raises(FPDFException) as e: sh.prepare_string("\u0627\x31") # Error - bidirectional check assert sh.prepare_string("A" * 300) == b"A" * 127 # test cap 127 chars + + +def test_encryption_unicode(tmp_path): + "Issue #933" + pdf = FPDF() + pdf.set_author("Thai") + pdf.set_subject("ทดสอบภาษาไทย") + pdf.add_page() + pdf.set_text_shaping() + pdf.add_font("Garuda", fname=HERE.parent / "fonts" / "Garuda.ttf") + pdf.set_font("Garuda", size=12) + pdf.start_section("ทดสอบภาษาไทย") + pdf.cell( + txt="สวัสดี ทดสอบภาษาไทย กีกี้ กาก้า ก๋า อ้า อ้ำ ฤาษี ทุ่มทุน อุ้งอุ๋ง น้ำใจ ฯลฯ ญาญ่า ฐาน ฎีกา ฏฒัฯนณ ภัทร์ สิทธิ์" + ) + pdf.set_encryption(owner_password="fpdf2") + assert_pdf_equal(pdf, HERE / "encryption_unicode.pdf", tmp_path) diff --git a/test/fonts/Garuda.ttf b/test/fonts/Garuda.ttf new file mode 100644 index 000000000..b9fcdfd90 Binary files /dev/null and b/test/fonts/Garuda.ttf differ diff --git a/test/fonts/charmap_first_999_chars-Garuda.pdf b/test/fonts/charmap_first_999_chars-Garuda.pdf new file mode 100644 index 000000000..a3c0c4b1c Binary files /dev/null and b/test/fonts/charmap_first_999_chars-Garuda.pdf differ