Skip to content

Commit c8ba914

Browse files
committed
NEW: add reattach_fields function
parse page/document annotations for orphan fields and reattach them to AcroForm/Fields closes #2453
1 parent 178014e commit c8ba914

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

pypdf/_writer.py

+46
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,52 @@ def update_page_form_field_values(
932932
value if value in k[AA.AP]["/N"] else "/Off"
933933
)
934934

935+
def reattach_fields(
936+
self, page: Optional[PageObject] = None
937+
) -> List[DictionaryObject]:
938+
"""
939+
Parse annotations within the page looking for orphan fields and
940+
reattach then into the Fields Structure
941+
942+
Args:
943+
page: page to analyze.
944+
If none is provided, all pages will be analyzed
945+
Returns:
946+
list of reattached fields
947+
"""
948+
lst = []
949+
if page is None:
950+
for p in self.pages:
951+
lst += self.reattach_fields(p)
952+
return lst
953+
954+
try:
955+
af = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])
956+
except KeyError:
957+
af = DictionaryObject()
958+
self._root_object[NameObject(CatalogDictionary.ACRO_FORM)] = af
959+
try:
960+
fields = cast(ArrayObject, af[InteractiveFormDictEntries.Fields])
961+
except KeyError:
962+
fields = ArrayObject()
963+
af[NameObject(InteractiveFormDictEntries.Fields)] = fields
964+
965+
if "/Annots" not in page:
966+
return lst
967+
annots = cast(ArrayObject, page["/Annots"])
968+
for idx in range(len(annots)):
969+
ano = annots[idx]
970+
indirect = isinstance(ano, IndirectObject)
971+
ano = cast(DictionaryObject, ano.get_object())
972+
if ano.get("/Subtype", "") == "/Widget" and "/FT" in ano:
973+
if ano.indirect_reference in fields:
974+
continue
975+
if not indirect:
976+
annots[idx] = self._add_object(ano)
977+
fields.append(ano.indirect_reference)
978+
lst.append(ano)
979+
return lst
980+
935981
def clone_reader_document_root(self, reader: PdfReader) -> None:
936982
"""
937983
Copy the reader document root to the writer and all sub elements,

tests/test_writer.py

+21
Original file line numberDiff line numberDiff line change
@@ -1978,3 +1978,24 @@ def create_number_pdf(n) -> BytesIO:
19781978
for n, page in enumerate(reader.pages):
19791979
text = page.extract_text()
19801980
assert text == str(n)
1981+
1982+
1983+
@pytest.mark.enable_socket()
1984+
def test_reattach_fields():
1985+
"""
1986+
Test Reattach function
1987+
addressed in #2453
1988+
"""
1989+
url = "https://github.com/py-pdf/pypdf/files/14241368/ExampleForm.pdf"
1990+
name = "iss2453.pdf"
1991+
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
1992+
writer = PdfWriter()
1993+
for p in reader.pages:
1994+
writer.add_page(p)
1995+
assert len(writer.reattach_fields()) == 15
1996+
assert len(writer.reattach_fields()) == 0 # nothing to append anymore
1997+
assert len(writer._root_object["/AcroForm"]["/Fields"]) == 15
1998+
writer = PdfWriter(clone_from=reader)
1999+
assert len(writer.reattach_fields()) == 7
2000+
writer.reattach_fields()
2001+
assert len(writer._root_object["/AcroForm"]["/Fields"]) == 15

0 commit comments

Comments
 (0)