From 9d5396a6ae0b36a01bba4764fe1ab11e3c79d28b Mon Sep 17 00:00:00 2001 From: Anderson Herzogenrath da Costa Date: Fri, 28 Jul 2023 08:49:12 -0400 Subject: [PATCH 01/11] New AES256 encryption fix annotation encryption --- docs/Encryption.md | 7 +- fpdf/annotations.py | 6 +- fpdf/encryption.py | 333 ++++++++++++++++++++++---- fpdf/enums.py | 1 + test/encryption/encryption_aes256.pdf | Bin 0 -> 2321 bytes test/encryption/test_encryption.py | 33 ++- 6 files changed, 325 insertions(+), 55 deletions(-) create mode 100644 test/encryption/encryption_aes256.pdf diff --git a/docs/Encryption.md b/docs/Encryption.md index a2f94173a..61a3c770a 100644 --- a/docs/Encryption.md +++ b/docs/Encryption.md @@ -81,7 +81,7 @@ If no permission is specified it will default to `all()`. ## Encryption method ## -There are 3 available encryption methods: +There are 4 available encryption methods: * `NO_ENCRYPTION` Data is not encrypted, only add the access permission flags. @@ -90,7 +90,10 @@ There are 3 available encryption methods: Default PDF encryption algorithm. * `AES_128` - Encrypts the data with AES algorithm. Requires the `cryptography` package. + Encrypts the data with 128 bit key AES algorithm. Requires the `cryptography` package. + + * `AES_256` + Encrypts the data with 256 bit key AES algorithm. Requires the `cryptography` package. ```python from fpdf import FPDF diff --git a/fpdf/annotations.py b/fpdf/annotations.py index 687cae8b2..b67a1e17c 100644 --- a/fpdf/annotations.py +++ b/fpdf/annotations.py @@ -52,12 +52,12 @@ def __init__( self.f_t = Name(field_type) if field_type else None self.v = value self.f = sum(flags) - self.contents = PDFString(contents) if contents else None + self.contents = PDFString(contents, encrypt=True) if contents else None self.a = action self.dest = dest self.c = f"[{color[0]} {color[1]} {color[2]}]" if color else None - self.t = PDFString(title) if title else None - self.m = PDFDate(modification_time) if modification_time else None + self.t = PDFString(title, encrypt=True) if title else None + self.m = PDFDate(modification_time, encrypt=True) if modification_time else None self.quad_points = ( pdf_list(f"{quad_point:.2f}" for quad_point in quad_points) if quad_points diff --git a/fpdf/encryption.py b/fpdf/encryption.py index 6e6624c81..b423051a2 100644 --- a/fpdf/encryption.py +++ b/fpdf/encryption.py @@ -1,16 +1,20 @@ import hashlib import logging import math +import stringprep +import unicodedata from os import urandom +from typing import Callable, Iterable, Type, Union -from .enums import EncryptionMethod -from .syntax import Name, PDFObject, PDFString -from .syntax import create_dictionary_string as pdf_dict, build_obj_dict +from .enums import AccessPermission, EncryptionMethod +from .errors import FPDFException +from .syntax import Name, PDFObject, PDFString, build_obj_dict +from .syntax import create_dictionary_string as pdf_dict # try to use cryptography for AES encryption try: from cryptography.hazmat.primitives.ciphers import Cipher, modes - from cryptography.hazmat.primitives.ciphers.algorithms import AES128 + from cryptography.hazmat.primitives.ciphers.algorithms import AES128, AES256 from cryptography.hazmat.primitives.padding import PKCS7 import_error = None @@ -35,7 +39,7 @@ class ARC4: MOD = 256 - def KSA(self, key): + def KSA(self, key: bytes) -> list[int]: key_length = len(key) S = list(range(self.MOD)) j = 0 @@ -44,7 +48,7 @@ def KSA(self, key): S[i], S[j] = S[j], S[i] return S - def PRGA(self, S): + def PRGA(self, S: list[int]) -> Iterable[int]: i = 0 j = 0 while True: @@ -54,7 +58,7 @@ def PRGA(self, S): K = S[(S[i] + S[j]) % self.MOD] yield K - def encrypt(self, key, text): + def encrypt(self, key: bytes, text: Union[bytes, bytearray]) -> list: keystream = self.PRGA(self.KSA(key)) res = [] for c in text: @@ -65,13 +69,14 @@ def encrypt(self, key, text): class CryptFilter: """Represents one crypt filter, listed under CF inside the encryption dictionary""" - def __init__(self, mode, length): + def __init__(self, mode: str, length: int) -> None: super().__init__() self.type = Name("CryptFilter") self.c_f_m = Name(mode) self.length = int(length / 8) + # self.auth_event = Name("DocOpen") - def serialize(self): + def serialize(self) -> str: obj_dict = build_obj_dict({key: getattr(self, key) for key in dir(self)}) return pdf_dict(obj_dict) @@ -83,13 +88,17 @@ class EncryptionDictionary(PDFObject): The PDF trailer must reference this object (/Encrypt) """ - def __init__(self, security_handler): + def __init__(self, security_handler: "StandardSecurityHandler") -> None: super().__init__() self.filter = Name("Standard") self.length = security_handler.key_length self.r = security_handler.r self.o = f"<{security_handler.o.upper()}>" self.u = f"<{security_handler.u.upper()}>" + if security_handler.r == 6: + self.o_e = f"<{security_handler.oe.upper()}>" + self.u_e = f"<{security_handler.ue.upper()}>" + self.perms = f"<{security_handler.perms.upper()}>" self.v = security_handler.v self.p = int32(security_handler.access_permission) if not security_handler.encrypt_metadata: @@ -119,11 +128,11 @@ class StandardSecurityHandler: def __init__( self, fpdf, - owner_password, - user_password=None, - permission=None, - encryption_method=None, - encrypt_metadata=False, + owner_password: str, + user_password: Union[str, None] = None, + permission: AccessPermission = AccessPermission.all(), + encryption_method: EncryptionMethod = EncryptionMethod.RC4, + encrypt_metadata: bool = False, ): self.fpdf = fpdf self.access_permission = ( @@ -137,17 +146,26 @@ def __init__( self.cf = None self.key_length = 128 + if import_error and self.encryption_method in ( + EncryptionMethod.AES_128, + EncryptionMethod.AES_256, + ): + raise EnvironmentError( + "cryptography module not available" + " - Try: 'pip install cryptography' or use RC4 encryption method" + f" - Import error was: {import_error}" + ) if self.encryption_method == EncryptionMethod.AES_128: - if import_error: - raise EnvironmentError( - "cryptography module not available" - " - Try: 'pip install cryptography' or use RC4 encryption method" - f" - Import error was: {import_error}" - ) self.v = 4 self.r = 4 fpdf._set_min_pdf_version("1.6") self.cf = CryptFilter(mode="AESV2", length=self.key_length) + elif self.encryption_method == EncryptionMethod.AES_256: + self.v = 5 + self.r = 6 + fpdf._set_min_pdf_version("2.0") + self.key_length = 256 + self.cf = CryptFilter(mode="AESV3", length=self.key_length) elif self.encryption_method == EncryptionMethod.NO_ENCRYPTION: self.v = 4 self.r = 4 @@ -162,41 +180,54 @@ def __init__( self.encrypt_metadata = encrypt_metadata - def generate_passwords(self, file_id): - """Return the first hash of the PDF file id""" + def generate_passwords(self, file_id: str) -> None: + """File_id is the first hash of the PDF file id""" self.file_id = file_id self.info_id = file_id[1:33] - self.o = self.generate_owner_password() - self.k = self.generate_encryption_key() - self.u = self.generate_user_password() + LOGGER.debug("Current revision: %s (%s)", self.r, self.info_id) + if self.r == 6: + self.k = self.get_random_bytes(32) + self.generate_user_password_rev6() + self.generate_owner_password_rev6() + self.generate_perms_rev6() + else: + self.o = self.generate_owner_password() + self.k = self.generate_encryption_key() + self.u = self.generate_user_password() - def get_encryption_obj(self): + def get_encryption_obj(self) -> EncryptionDictionary: """Return an encryption dictionary""" return EncryptionDictionary(self) - def encrypt(self, text, obj_id): + def encrypt( + self, text: Union[str, bytearray, bytes], obj_id: int + ) -> Union[str, bytes]: """Method invoked by PDFObject and PDFContentStream to encrypt strings and streams""" + LOGGER.debug("Encrypting %s", text) return ( self.encrypt_stream(text, obj_id) if isinstance(text, (bytearray, bytes)) else self.encrypt_string(text, obj_id) ) - def encrypt_string(self, string, obj_id): + def encrypt_string(self, string: str, obj_id: int) -> str: if self.encryption_method == EncryptionMethod.NO_ENCRYPTION: - return PDFString(string).serialize() + return PDFString(string, encrypt=False).serialize() LOGGER.debug("Encrypting string: %s", string) return f"<{bytes(self.encrypt_bytes(string.encode('latin-1'), obj_id)).hex().upper()}>" - def encrypt_stream(self, stream, obj_id): + def encrypt_stream(self, stream: bytes, obj_id: int) -> bytes: if self.encryption_method == EncryptionMethod.NO_ENCRYPTION: return stream return bytes(self.encrypt_bytes(stream, obj_id)) - def is_aes_algorithm(self): - return self.encryption_method == EncryptionMethod.AES_128 + def is_aes_algorithm(self) -> bool: + return ( + self.encryption_method == EncryptionMethod.AES_128 + or self.encryption_method == EncryptionMethod.AES_256 + ) - def encrypt_bytes(self, data, obj_id): + def encrypt_bytes(self, data: bytes, obj_id: int): """ PDF32000 reference - Algorithm 1: Encryption of data using the RC4 or AES algorithms Append object ID and generation ID to the key and encrypt the data @@ -218,22 +249,119 @@ def encrypt_bytes(self, data, obj_id): return self.encrypt_AES_cryptography(key, data) return ARC4().encrypt(key, data) - def encrypt_AES_cryptography(self, key, data): - iv = self.get_initialization_vector(16) - padder = PKCS7(self.key_length).padder() + def encrypt_AES_cryptography(self, key: bytes, data: bytes) -> bytes: + """Encrypts an array of bytes using AES algorithms (AES 128 or AES 256)""" + iv = bytearray(self.get_random_bytes(16)) + padder = PKCS7(128).padder() padded_data = padder.update(data) padded_data += padder.finalize() - cipher = Cipher(AES128(key), modes.CBC(iv)) + cipher = ( + Cipher(AES128(key), modes.CBC(iv)) + if self.encryption_method == EncryptionMethod.AES_128 + else Cipher(AES256(self.k), modes.CBC(iv)) + ) encryptor = cipher.encryptor() data = encryptor.update(padded_data) + encryptor.finalize() iv.extend(data) return iv @classmethod - def get_initialization_vector(cls, size): - return bytearray(urandom(size)) + def get_random_bytes(cls: Type["StandardSecurityHandler"], size: int) -> bytes: + """ + https://docs.python.org/3/library/os.html#os.urandom + os.urandom will use OS-specific sources to generate random bytes + suitable for cryptographic use + """ + return urandom(size) + + @classmethod + def prepare_string(cls: Type["StandardSecurityHandler"], string: str) -> bytes: + """ + PDF2.0 - ISO 32000-2:2020 + All passwords for revision 6 shall be based on Unicode. Preprocessing of a user-provided password + consists first of normalizing its representation by applying the "SASLPrep" profile (Internet RFC 4013) + of the "stringprep" algorithm (Internet RFC 3454) to the supplied password using the Normalize and BiDi + options. Next, the password string shall be converted to UTF-8 encoding, and then truncated to the + first 127 bytes if the string is longer than 127 bytes + + Python offers a stringprep module with the tables mapped in methods + """ + + LOGGER.debug("preparing password %s", string) + + # Mapping + def char_map(char: str) -> str: + if not char: + return "" + # Commonly mapped to nothing + if stringprep.in_table_b1(char): + return "" + # Map non-ascii space characters to space + if stringprep.in_table_c12(char): + return "\u0020" + return char + + if len(string) < 1: + return bytes() + + prepared_string = "".join(char_map(c) for c in string) + + # Normalization - applies Unicode normalization form KC + prepared_string = unicodedata.ucd_3_2_0.normalize("NFKC", prepared_string) + + # Prohibited output - RCF4013 2.3 + def is_prohibited(char: str) -> bool: + return ( + stringprep.in_table_c12(char) # Non-ASCII space characters + or stringprep.in_table_c21_c22(char) # Control characters + or stringprep.in_table_c3(char) # Private use + or stringprep.in_table_c4(char) # Non-character code points + or stringprep.in_table_c5(char) # Surrogate codes + or stringprep.in_table_c6(char) # Inappropriate for plain text + or stringprep.in_table_c7( + char + ) # Inappropriate for canonical representation + or stringprep.in_table_c8( + char + ) # Change display properties or are deprecated + or stringprep.in_table_c9(char) # Tagging characters + ) - def padded_password(self, password): + for char in prepared_string: + if is_prohibited(char): + raise FPDFException( + f"The password {string} contains prohibited characters" + ) + + # Bidirectional characters + def has_character(string: str, fun: Callable) -> bool: + for char in string: + if fun(char): + return True + return False + + if has_character(prepared_string, stringprep.in_table_d1): + # If a string contains any RandALCat character, the string MUST NOT contain any LCat character. + if has_character(prepared_string, stringprep.in_table_d2): + raise FPDFException( + f"The password {string} contains invalid bidirectional characters." + ) + # If a string contains any RandALCat character, a RandALCat character MUST be the first character + # of the string, and a RandALCat character MUST be the last character of the string. + if not ( + stringprep.in_table_d1(prepared_string[0]) + and stringprep.in_table_d1(prepared_string[-1]) + ): + raise FPDFException( + f"The password {string} contains invalid bidirectional characters." + ) + + if len(prepared_string) > 127: + prepared_string = prepared_string[:127] + + return prepared_string.encode("UTF-8") + + def padded_password(self, password: str) -> bytearray: """ PDF32000 reference - Algorithm 2: Computing an encryption key Step (a) - Add the default padding at the end of provided password to make it 32 bit long @@ -244,7 +372,7 @@ def padded_password(self, password): p.extend(self.DEFAULT_PADDING[: (32 - len(p))]) return p - def generate_owner_password(self): + def generate_owner_password(self) -> str: """ PDF32000 reference - Algorithm 3: Computing the encryption dictionary's O (owner password) value The security handler is only using revision 3 or 4, so the legacy r2 version is not implemented here @@ -258,10 +386,10 @@ def generate_owner_password(self): new_key = [] for k in rc4key: new_key.append(k ^ i) - result = ARC4().encrypt(new_key, result) + result = ARC4().encrypt(bytes(new_key), result) return bytes(result).hex() - def generate_user_password(self): + def generate_user_password(self) -> str: """ PDF32000 reference - Algorithm 5: Computing the encryption dictionary's U (user password) value The security handler is only using revision 3 or 4, so the legacy r2 version is not implemented here @@ -269,19 +397,126 @@ def generate_user_password(self): m = hashlib.new("md5", usedforsecurity=False) m.update(bytearray(self.DEFAULT_PADDING)) m.update(bytes.fromhex(self.info_id)) - result = m.digest() + result = bytearray(m.digest()) key = self.k for i in range(20): new_key = [] for k in key: new_key.append(k ^ i) - result = ARC4().encrypt(new_key, result) + result = ARC4().encrypt(bytes(new_key), result) result.extend( (result[x] ^ self.DEFAULT_PADDING[x]) for x in range(16) ) # add 16 bytes of random padding return bytes(result).hex() - def generate_encryption_key(self): + def compute_hash( + self, input_password: bytes, salt: bytes, user_key: bytes = bytearray() + ) -> bytes: + """ + Algorithm 2B - section 7.6.4.3.4 of the ISO 32000-2:2020 + Applied on Security handlers revision 6 + """ + k = hashlib.sha256(input_password + salt + user_key).digest() + round = 0 + while True: + round += 1 + k1 = input_password + k + user_key + # Step (a + b) + cipher = Cipher(AES128(k[:16]), modes.CBC(k[16:32])) + encryptor = cipher.encryptor() + e = encryptor.update(k1 * 64) + encryptor.finalize() + # Step (c) + # remainder = int.from_bytes(e[:16], byteorder="big") % 3 + remainder = sum(e[:16]) % 3 + # Step (d) + if remainder == 0: + k = hashlib.sha256(e).digest() + elif remainder == 1: + k = hashlib.sha384(e).digest() + else: + k = hashlib.sha512(e).digest() + # Step (e) + if round >= 64 and e[-1] <= round - 32: + break + + return k[:32] + + def generate_user_password_rev6(self) -> None: + """ + Generating the U (user password) and UE (user encryption) + for security handlers of revision 6 + Algorithm 8 - Section 7.6.4.4.7 of the ISO 32000-2:2020 + """ + user_password = self.prepare_string(self.user_password) + if not user_password: + user_password = bytearray() + user_validation_salt = self.get_random_bytes(8) + user_key_salt = self.get_random_bytes(8) + u = ( + self.compute_hash(input_password=user_password, salt=user_validation_salt) + + user_validation_salt + + user_key_salt + ) + self.u = u.hex() + + key = self.compute_hash(input_password=user_password, salt=user_key_salt) + cipher = Cipher(AES256(key), modes.CBC(b"\x00" * 16)) + encryptor = cipher.encryptor() + ue = encryptor.update(self.k) + encryptor.finalize() + self.ue = ue.hex() + + def generate_owner_password_rev6(self) -> None: + """ + Generating the O (owner password) and OE (owner encryption) + for security handlers of revision 6 + Algorithm 9 - Section 7.6.4.4.8 of the ISO 32000-2:2020 + """ + owner_password = self.prepare_string(self.owner_password) + if not owner_password: + owner_password = bytearray() + owner_validation_salt = self.get_random_bytes(8) + owner_key_salt = self.get_random_bytes(8) + o = ( + self.compute_hash( + input_password=owner_password, + salt=owner_validation_salt, + user_key=bytes.fromhex(self.u), + ) + + owner_validation_salt + + owner_key_salt + ) + self.o = o.hex() + + key = self.compute_hash( + input_password=owner_password, + salt=owner_key_salt, + user_key=bytes.fromhex(self.u), + ) + + cipher = Cipher(AES256(key), modes.CBC(b"\x00" * 16)) + encryptor = cipher.encryptor() + oe = encryptor.update(self.k) + encryptor.finalize() + self.oe = oe.hex() + + def generate_perms_rev6(self) -> None: + """ + 7.6.4.4.9 Algorithm 10: Computing the encryption dictionary’s Perms (permissions) value + (Security handlers of revision 6) of the ISO 32000-2:2020 + """ + perms64b = 0xFFFFFFFF00000000 | self.access_permission + encrypt_metadata = b"T" if self.encrypt_metadata else b"F" + perms_input = ( + perms64b.to_bytes(8, byteorder="little", signed=False) + + encrypt_metadata + + b"adb" + + self.get_random_bytes(4) + ) + cipher = Cipher(AES256(self.k), modes.ECB()) + encryptor = cipher.encryptor() + perms = encryptor.update(perms_input) + encryptor.finalize() + self.perms = perms.hex() + + def generate_encryption_key(self) -> bytes: """ PDF32000 reference Algorithm 2: Computing an encryption key @@ -303,13 +538,13 @@ def generate_encryption_key(self): return result -def md5(data): +def md5(data: Union[bytes, bytearray]) -> bytes: h = hashlib.new("md5", usedforsecurity=False) h.update(data) return h.digest() -def int32(n): +def int32(n: int) -> int: """convert long to signed 32 bit integer""" n = n & 0xFFFFFFFF return (n ^ 0x80000000) - 0x80000000 diff --git a/fpdf/enums.py b/fpdf/enums.py index 32b73a9de..68b184a5b 100644 --- a/fpdf/enums.py +++ b/fpdf/enums.py @@ -845,3 +845,4 @@ class EncryptionMethod(Enum): NO_ENCRYPTION = 0 RC4 = 1 AES_128 = 2 + AES_256 = 3 diff --git a/test/encryption/encryption_aes256.pdf b/test/encryption/encryption_aes256.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f90f862095cdb59ee265861dc45b30f83c613ab7 GIT binary patch literal 2321 zcmai0zi%8x6o!au3j_rfLYl@vka9b}e<;>TvokxfMJ$`UBqlO0Hg{|1!g<%)+q3LQ zK&mupkZ2Jk2qZ*LcXY<>>TX`xUOgy_L!T%PgL(-ST3kLhC#h*sgC5zp2}yMF9{4+)J#?Uq zX)%SSMp)AwU#ch6!1(~+{Cue=%BS80&Lnk)8PBaz0g`3H;$R@3Dc2LA+wEB}`IM$J zRXN^8K)`h2^fT_F_*|70qus2^i{1f>9awz6H|XV^GYM!0Bmgs@ys4K?J}SpmFCQT} z%YLJi8S0xOkEjoWVGVr8VVKpq&wFJ*kK(g=v0YzAgz9G3sIKyCCwl6!v2^?0C9%_e zu>RWDFI~GoeD=F{ZlAuXPQLfW-p?=o_sefTeSGb?{rAnC-+#UJ$rU~*7buQE?&L+#>@3TC*OMY;Q9x9pReCKvH#@@gKyo5)1Upd{M8?K ze$;Q?`1|_ZkN#X(d8Vn4!%z+@Ma<-pWTQNQKvBGu5BKtV(95DYEg+vkv5n&OL1By0 z;K(q;af3J1?G*OL!wgUjh!25koC80Bpyu4p1pKP(b#u^D3_Ss;%J<>?j_b>1HD6*-OIQRbB}@`&aDb{IGVxe>nJVrQkb(o6npWX~$@va(qOoKK z3+h!u389HII;5OfV->-{C)(PCJFKBTm2k>y%RLC%5lV?q;XtU9D@{4g?FWRV7&ER2 z<=7`lU|tGKQ%()X+9oQvRA{H2OO&(L223z9G{r#@oEpm7GvMhN@LXsZ>DK+}iwi3# zwrTeQdnjQ>kEnIk!EQZOdmF|?9F1uuud_aMN3@jLswMDC|-qR zgQuy+P#lgKBNHnnW&l#47Er=oh9qbyNJyB`3g!h1TylU^NzQ|x*E=D!)f6qt0AMB3 zTI#f-A#i1Z5eMC~AqHC~6c5x`49w6Vh|<=lGUcgAsRUhsn>BO_2!=komRCE_wkhL` zazIx)Wwe%>C<&dH2^^fmGQgG&f+V~34ybtoYx7YZf1F2)oSBd&78_$F^Vq|7OHvUM zsX|~_F=~i5(E5y!oC+HPbo9hCPekgSoR>X@AkIVjhAW*GU{{uV`SIz(c literal 0 HcmV?d00001 diff --git a/test/encryption/test_encryption.py b/test/encryption/test_encryption.py index 81eeeab95..0c5942515 100644 --- a/test/encryption/test_encryption.py +++ b/test/encryption/test_encryption.py @@ -132,7 +132,7 @@ def fixed_iv(size): encryption_method=EncryptionMethod.AES_128, permissions=AccessPermission.none(), ) - pdf._security_handler.get_initialization_vector = fixed_iv + pdf._security_handler.get_random_bytes = fixed_iv assert_pdf_equal(pdf, HERE / "encryption_aes128.pdf", tmp_path) @@ -201,3 +201,34 @@ def test_encrypt_outline(tmp_path): # issue 732 pdf.start_section("Subtitle", level=1) pdf.set_encryption(owner_password="fpdf2") assert_pdf_equal(pdf, HERE / "encrypt_outline.pdf", tmp_path) + + +def test_encryption_aes256(tmp_path): + pdf = FPDF() + + def custom_file_id(): + return pdf._default_file_id(bytearray([0xFF])) + + pdf.file_id = custom_file_id + + def fixed_iv(size): + return bytearray(size) + + pdf.set_author("author") + pdf.set_subject("string to be encrypted") + pdf.add_page() + pdf.set_font("helvetica", size=12) + pdf.cell(txt="hello world") + pdf.text(50, 50, "Some text") + pdf.ink_annotation( + [(40, 50), (70, 25), (100, 50), (70, 75), (40, 50)], + title="Lucas", + contents="Some encrypted annotation", + ) + pdf.set_encryption( + owner_password="fpdf2", + encryption_method=EncryptionMethod.AES_256, + permissions=AccessPermission.none(), + ) + pdf._security_handler.get_random_bytes = fixed_iv + assert_pdf_equal(pdf, HERE / "encryption_aes256.pdf", tmp_path) From 53a3a0ea03c2eff532b8b720fd2c01c819c6b31f Mon Sep 17 00:00:00 2001 From: Anderson Herzogenrath da Costa Date: Fri, 28 Jul 2023 09:06:29 -0400 Subject: [PATCH 02/11] change typing --- fpdf/encryption.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fpdf/encryption.py b/fpdf/encryption.py index b423051a2..b382d1674 100644 --- a/fpdf/encryption.py +++ b/fpdf/encryption.py @@ -39,7 +39,7 @@ class ARC4: MOD = 256 - def KSA(self, key: bytes) -> list[int]: + def KSA(self, key: bytes) -> list: key_length = len(key) S = list(range(self.MOD)) j = 0 @@ -48,7 +48,7 @@ def KSA(self, key: bytes) -> list[int]: S[i], S[j] = S[j], S[i] return S - def PRGA(self, S: list[int]) -> Iterable[int]: + def PRGA(self, S: list) -> Iterable[int]: i = 0 j = 0 while True: From e99165a7b75e95bd9e8d3f0401a12763df62d556 Mon Sep 17 00:00:00 2001 From: Anderson Herzogenrath da Costa Date: Fri, 28 Jul 2023 09:54:29 -0400 Subject: [PATCH 03/11] please pylint --- fpdf/encryption.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fpdf/encryption.py b/fpdf/encryption.py index b382d1674..0ebb09356 100644 --- a/fpdf/encryption.py +++ b/fpdf/encryption.py @@ -222,9 +222,9 @@ def encrypt_stream(self, stream: bytes, obj_id: int) -> bytes: return bytes(self.encrypt_bytes(stream, obj_id)) def is_aes_algorithm(self) -> bool: - return ( - self.encryption_method == EncryptionMethod.AES_128 - or self.encryption_method == EncryptionMethod.AES_256 + return self.encryption_method in ( + EncryptionMethod.AES_128, + EncryptionMethod.AES_256, ) def encrypt_bytes(self, data: bytes, obj_id: int): @@ -335,10 +335,7 @@ def is_prohibited(char: str) -> bool: # Bidirectional characters def has_character(string: str, fun: Callable) -> bool: - for char in string: - if fun(char): - return True - return False + return any(fun(char) for char in string) if has_character(prepared_string, stringprep.in_table_d1): # If a string contains any RandALCat character, the string MUST NOT contain any LCat character. @@ -409,17 +406,21 @@ def generate_user_password(self) -> str: ) # add 16 bytes of random padding return bytes(result).hex() + @classmethod def compute_hash( - self, input_password: bytes, salt: bytes, user_key: bytes = bytearray() + cls: Type["StandardSecurityHandler"], + input_password: bytes, + salt: bytes, + user_key: bytes = bytearray(), ) -> bytes: """ Algorithm 2B - section 7.6.4.3.4 of the ISO 32000-2:2020 Applied on Security handlers revision 6 """ k = hashlib.sha256(input_password + salt + user_key).digest() - round = 0 + round_number = 0 while True: - round += 1 + round_number += 1 k1 = input_password + k + user_key # Step (a + b) cipher = Cipher(AES128(k[:16]), modes.CBC(k[16:32])) @@ -436,7 +437,7 @@ def compute_hash( else: k = hashlib.sha512(e).digest() # Step (e) - if round >= 64 and e[-1] <= round - 32: + if round_number >= 64 and e[-1] <= round - 32: break return k[:32] From a6538880c8aa7b759942889b0f78baec9a81813a Mon Sep 17 00:00:00 2001 From: Anderson Herzogenrath da Costa Date: Fri, 28 Jul 2023 09:56:32 -0400 Subject: [PATCH 04/11] please pylint --- fpdf/encryption.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fpdf/encryption.py b/fpdf/encryption.py index 0ebb09356..746179089 100644 --- a/fpdf/encryption.py +++ b/fpdf/encryption.py @@ -437,7 +437,7 @@ def compute_hash( else: k = hashlib.sha512(e).digest() # Step (e) - if round_number >= 64 and e[-1] <= round - 32: + if round_number >= 64 and e[-1] <= round_number - 32: break return k[:32] From bef6da20ca0ae45fc67a6fa1cb4c6a965a992358 Mon Sep 17 00:00:00 2001 From: Anderson Herzogenrath da Costa Date: Fri, 28 Jul 2023 10:42:22 -0400 Subject: [PATCH 05/11] bypass error B305 on bandit --- .banditrc.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.banditrc.yml b/.banditrc.yml index b9f6df50d..d72c34a0e 100644 --- a/.banditrc.yml +++ b/.banditrc.yml @@ -2,3 +2,6 @@ skips: # Use of assert detected. The enclosed code will be removed when compiling to optimised byte code. # => OK, we don't care though - B101 + # [B305:blacklist] Use of insecure cipher mode cryptography.hazmat.primitives.ciphers.modes.ECB. + # Need to bypass this check because the PDF specification demands the use of ECB mode on one of the encryption algorithms + - B305 From 677b06bb91e28ac0e095a4e1c1d4d7d894b6ddc4 Mon Sep 17 00:00:00 2001 From: Anderson Herzogenrath da Costa Date: Fri, 28 Jul 2023 10:46:13 -0400 Subject: [PATCH 06/11] Update .banditrc.yml --- .banditrc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.banditrc.yml b/.banditrc.yml index d72c34a0e..411a418a1 100644 --- a/.banditrc.yml +++ b/.banditrc.yml @@ -2,6 +2,6 @@ skips: # Use of assert detected. The enclosed code will be removed when compiling to optimised byte code. # => OK, we don't care though - B101 - # [B305:blacklist] Use of insecure cipher mode cryptography.hazmat.primitives.ciphers.modes.ECB. + # B305:blacklist - Use of insecure cipher mode cryptography.hazmat.primitives.ciphers.modes.ECB. # Need to bypass this check because the PDF specification demands the use of ECB mode on one of the encryption algorithms - B305 From e6cd2597682da93b5684694c4356c61a12929830 Mon Sep 17 00:00:00 2001 From: Anderson Herzogenrath da Costa Date: Fri, 28 Jul 2023 11:24:35 -0400 Subject: [PATCH 07/11] include nosemgrep on ECB encryption --- fpdf/encryption.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fpdf/encryption.py b/fpdf/encryption.py index 746179089..d7de6cc53 100644 --- a/fpdf/encryption.py +++ b/fpdf/encryption.py @@ -287,8 +287,6 @@ def prepare_string(cls: Type["StandardSecurityHandler"], string: str) -> bytes: Python offers a stringprep module with the tables mapped in methods """ - LOGGER.debug("preparing password %s", string) - # Mapping def char_map(char: str) -> str: if not char: @@ -512,6 +510,7 @@ def generate_perms_rev6(self) -> None: + b"adb" + self.get_random_bytes(4) ) + # nosemgrep: python.cryptography.security.insecure-cipher-mode-ecb.insecure-cipher-mode-ecb cipher = Cipher(AES256(self.k), modes.ECB()) encryptor = cipher.encryptor() perms = encryptor.update(perms_input) + encryptor.finalize() From eb40d62d8a9ecdeecfd521b3ea17c26578e8cef8 Mon Sep 17 00:00:00 2001 From: Anderson Herzogenrath da Costa Date: Fri, 28 Jul 2023 11:28:05 -0400 Subject: [PATCH 08/11] Update encryption.py --- fpdf/encryption.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fpdf/encryption.py b/fpdf/encryption.py index d7de6cc53..a243ef604 100644 --- a/fpdf/encryption.py +++ b/fpdf/encryption.py @@ -510,6 +510,7 @@ def generate_perms_rev6(self) -> None: + b"adb" + self.get_random_bytes(4) ) + # nosemgrep: python.cryptography.security.insecure-cipher-mode-ecb.insecure-cipher-mode-ecb cipher = Cipher(AES256(self.k), modes.ECB()) encryptor = cipher.encryptor() From f310cfc078d8a7217ebca64e14b75a4898616ab4 Mon Sep 17 00:00:00 2001 From: Anderson Herzogenrath da Costa Date: Fri, 28 Jul 2023 11:35:44 -0400 Subject: [PATCH 09/11] Update encryption.py --- fpdf/encryption.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fpdf/encryption.py b/fpdf/encryption.py index a243ef604..d7de6cc53 100644 --- a/fpdf/encryption.py +++ b/fpdf/encryption.py @@ -510,7 +510,6 @@ def generate_perms_rev6(self) -> None: + b"adb" + self.get_random_bytes(4) ) - # nosemgrep: python.cryptography.security.insecure-cipher-mode-ecb.insecure-cipher-mode-ecb cipher = Cipher(AES256(self.k), modes.ECB()) encryptor = cipher.encryptor() From 73c6db524428cb8414890539a96d54691848f487 Mon Sep 17 00:00:00 2001 From: Anderson Herzogenrath da Costa Date: Tue, 1 Aug 2023 23:16:17 -0400 Subject: [PATCH 10/11] Implement revision suggestions --- fpdf/encryption.py | 30 +++++++++++++-------------- test/encryption/test_encryption.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 16 deletions(-) diff --git a/fpdf/encryption.py b/fpdf/encryption.py index d7de6cc53..5f3bf77d9 100644 --- a/fpdf/encryption.py +++ b/fpdf/encryption.py @@ -74,7 +74,6 @@ def __init__(self, mode: str, length: int) -> None: self.type = Name("CryptFilter") self.c_f_m = Name(mode) self.length = int(length / 8) - # self.auth_event = Name("DocOpen") def serialize(self) -> str: obj_dict = build_obj_dict({key: getattr(self, key) for key in dir(self)}) @@ -92,14 +91,14 @@ def __init__(self, security_handler: "StandardSecurityHandler") -> None: super().__init__() self.filter = Name("Standard") self.length = security_handler.key_length - self.r = security_handler.r + self.r = security_handler.revision self.o = f"<{security_handler.o.upper()}>" self.u = f"<{security_handler.u.upper()}>" - if security_handler.r == 6: + if security_handler.revision == 6: self.o_e = f"<{security_handler.oe.upper()}>" self.u_e = f"<{security_handler.ue.upper()}>" self.perms = f"<{security_handler.perms.upper()}>" - self.v = security_handler.v + self.v = security_handler.version self.p = int32(security_handler.access_permission) if not security_handler.encrypt_metadata: self.encrypt_metadata = "false" @@ -156,24 +155,24 @@ def __init__( f" - Import error was: {import_error}" ) if self.encryption_method == EncryptionMethod.AES_128: - self.v = 4 - self.r = 4 + self.version = 4 + self.revision = 4 fpdf._set_min_pdf_version("1.6") self.cf = CryptFilter(mode="AESV2", length=self.key_length) elif self.encryption_method == EncryptionMethod.AES_256: - self.v = 5 - self.r = 6 + self.version = 5 + self.revision = 6 fpdf._set_min_pdf_version("2.0") self.key_length = 256 self.cf = CryptFilter(mode="AESV3", length=self.key_length) elif self.encryption_method == EncryptionMethod.NO_ENCRYPTION: - self.v = 4 - self.r = 4 + self.version = 4 + self.revision = 4 fpdf._set_min_pdf_version("1.6") self.cf = CryptFilter(mode="V2", length=self.key_length) else: - self.v = 2 - self.r = 3 + self.version = 2 + self.revision = 3 fpdf._set_min_pdf_version("1.5") # not including crypt filter because it's only required on V=4 # if needed, it would be CryptFilter(mode=V2) @@ -184,8 +183,7 @@ def generate_passwords(self, file_id: str) -> None: """File_id is the first hash of the PDF file id""" self.file_id = file_id self.info_id = file_id[1:33] - LOGGER.debug("Current revision: %s (%s)", self.r, self.info_id) - if self.r == 6: + if self.revision == 6: self.k = self.get_random_bytes(32) self.generate_user_password_rev6() self.generate_owner_password_rev6() @@ -472,7 +470,7 @@ def generate_owner_password_rev6(self) -> None: """ owner_password = self.prepare_string(self.owner_password) if not owner_password: - owner_password = bytearray() + raise FPDFException(f"Invalid owner password {self.owner_password}") owner_validation_salt = self.get_random_bytes(8) owner_key_salt = self.get_random_bytes(8) o = ( @@ -530,7 +528,7 @@ def generate_encryption_key(self) -> bytes: ) ) m.update(bytes.fromhex(self.info_id)) - if self.encrypt_metadata is False and self.v == 4: + if self.encrypt_metadata is False and self.version == 4: m.update(bytes([0xFF, 0xFF, 0xFF, 0xFF])) result = m.digest()[: (math.ceil(self.key_length / 8))] for _ in range(50): diff --git a/test/encryption/test_encryption.py b/test/encryption/test_encryption.py index 0c5942515..098784d2d 100644 --- a/test/encryption/test_encryption.py +++ b/test/encryption/test_encryption.py @@ -1,11 +1,14 @@ # pylint: disable=protected-access +from os import devnull from pathlib import Path import sys import pytest from fpdf import FPDF +from fpdf.encryption import StandardSecurityHandler as sh from fpdf.enums import AccessPermission, EncryptionMethod +from fpdf.errors import FPDFException from test.conftest import assert_pdf_equal HERE = Path(__file__).resolve().parent @@ -232,3 +235,33 @@ def fixed_iv(size): ) pdf._security_handler.get_random_bytes = fixed_iv assert_pdf_equal(pdf, HERE / "encryption_aes256.pdf", tmp_path) + + +def test_blank_owner_password(tmp_path): + pdf = FPDF() + pdf.set_encryption( + owner_password="", + encryption_method=EncryptionMethod.AES_256, + permissions=AccessPermission.none(), + ) + with pytest.raises(FPDFException) as e: + pdf.output(devnull) + assert str(e.value) == "Invalid owner password " + + +def test_password_prep(): + # The PDF standard requires the passwords to be prepared using the stringprep algorithm + # using the SASLprep as per RFC 4013 + # https://datatracker.ietf.org/doc/html/rfc4013 + # Those assertions are bases on the examples section of the RFC + # + assert sh.prepare_string("I\xadX") == b"IX" # SOFT HYPHEN mapped to nothing + assert sh.prepare_string("user") == b"user" # no transformation + assert sh.prepare_string("USER") == b"USER" # case preserved + assert sh.prepare_string("\xaa") == b"a" # output is NFKC, input in ISO 8859-1 + assert sh.prepare_string("\u2168") == b"IX" # output is NFKC, will match #1 + with pytest.raises(FPDFException) as e: + sh.prepare_string("\x07") # Error - prohibited character + assert str(e.value) == "The password  contains prohibited characters" + with pytest.raises(FPDFException) as e: + sh.prepare_string("\u0627\x31") # Error - bidirectional check From 525a7d538d4fc4a1006bb3e9a6b747c6b7c577b3 Mon Sep 17 00:00:00 2001 From: Anderson Herzogenrath da Costa Date: Wed, 2 Aug 2023 05:31:57 -0400 Subject: [PATCH 11/11] add tests --- .../encryption_aes256_user_password.pdf | Bin 0 -> 1914 bytes test/encryption/test_encryption.py | 38 +++++++++++++++--- 2 files changed, 33 insertions(+), 5 deletions(-) create mode 100644 test/encryption/encryption_aes256_user_password.pdf diff --git a/test/encryption/encryption_aes256_user_password.pdf b/test/encryption/encryption_aes256_user_password.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a12487e97e81ff9890fd32bec4b74580ec5b78b3 GIT binary patch literal 1914 zcma)7ON$&;6rPnx;W|6vf)iY5rtYKei!_7xnZ!w$(4EA^5U|pHXQo3>)lgL(W`ep> zbQ3q?&XpVe1%lv4!RLPnf(ti-u3QKTp6Z^-G>I_YMLo`~`<-*X@2h(+-t_Sj-M|?^ z7}fXgXM;hOyLw)Ah-CTJbkd?722;bkS$-o;rlqZq&<=(O*L;I2qzba3G|b*QK1?XT zS?;Aa+uY1jHR<8#3hee_s?4~X*3}6n%Hy=VimLf+1_1ri>*aAh?@+#7C0Ebp2h}p% zqQ1Q0X%bg<<*eRYNz<2jR(;i_s%ufPTyv$V=Z9!8IH_>?nvYO^t0}AYuxwH_K1R8N zh))lu<21Ztp#i7|W`Kqz$}qL{ycwq!iPcj2qF3tZQ=Zllr?W0K(BrJ^l27A$lCu1I zs`k2lq(#4|?V3~`WNS~2`TpB4|MK?-#wf=@Yd0%zm*R^{PXd9kKX+K-OC^0Ti>q# z_3QV&7AL7rRN*US+OkcthDP(NY4#v>({Y*Qp@MR!)gH>B7|AIfoCT#F$k5x;gHY!KVym!V8guHUrG!xH zw2Gl1MjJ_R^i~;T3x}2WT5>17GKDV~cZ5>n10)0v@24@aa!&1-uz)cuB%uZN)<)(9 zH#8Jf7g!l9qYGR)<(!qy7!xtUMALwyAUJ5sHdn|~E9AM@D>CXP%Lf;4pxlJf9rnbb zjGjvCn&ZQ6Y35BB3v=|L8>uTN@F$|(a@K;iPyIgX%4$+JlP6G=i!9$p1FeGhjB+0g zGc;Q7f-z2DDR80sUYs_Oy1;x?IylNCXOZE+gruBse;!#-+IBz#77TXAF}`pancegvW+O;<%(XV#O`U zFE|sdu&OuE5J88d?f|U3;Iw~f&R%Y#frTW_LOv=(AuPk*b0ajD7Mn<{Cb1BjS`J-$ zEQI1JIxN9bR8i@9{2#sFhTdZ&h6Iiat^|*cYwMW;HhGXF`hvPBEQPRF#78f%cF?*2 z3mcA^lLmUZgZPSRj+(TaVWhLO0+8UGakPuhVj3-`QFRv6um$vEB}VkK-!VXX`a6!{ z)Oczw|BtmoJ(GjL-*UFLYs%>?HH$NCG<}$$jeJ