57
57
aa to zz for the next 26, and so on)
58
58
"""
59
59
60
- from typing import Iterator , Optional , Tuple , cast
60
+ from typing import Iterator , List , Optional , Tuple , cast
61
61
62
62
from ._protocols import PdfCommonDocProtocol
63
63
from ._utils import logger_warning
64
- from .generic import ArrayObject , DictionaryObject , NumberObject
64
+ from .generic import ArrayObject , DictionaryObject , NullObject , NumberObject
65
65
66
66
67
67
def number2uppercase_roman_numeral (num : int ) -> str :
@@ -116,6 +116,42 @@ def number2lowercase_letter(number: int) -> str:
116
116
return number2uppercase_letter (number ).lower ()
117
117
118
118
119
+ def get_label_from_nums (dictionary_object : DictionaryObject , index : int ) -> str :
120
+ # [Nums] shall be an array of the form
121
+ # [ key 1 value 1 key 2 value 2 ... key n value n ]
122
+ # where each key_i is an integer and the corresponding
123
+ # value_i shall be the object associated with that key.
124
+ # The keys shall be sorted in numerical order,
125
+ # analogously to the arrangement of keys in a name tree
126
+ # as described in 7.9.6, "Name Trees."
127
+ nums = cast (ArrayObject , dictionary_object ["/Nums" ])
128
+ i = 0
129
+ value = None
130
+ start_index = 0
131
+ while i < len (nums ):
132
+ start_index = nums [i ]
133
+ value = nums [i + 1 ].get_object ()
134
+ if i + 2 == len (nums ):
135
+ break
136
+ if nums [i + 2 ] > index :
137
+ break
138
+ i += 2
139
+ m = {
140
+ None : lambda n : "" ,
141
+ "/D" : lambda n : str (n ),
142
+ "/R" : number2uppercase_roman_numeral ,
143
+ "/r" : number2lowercase_roman_numeral ,
144
+ "/A" : number2uppercase_letter ,
145
+ "/a" : number2lowercase_letter ,
146
+ }
147
+ # if /Nums array is not following the specification or if /Nums is empty
148
+ if not isinstance (value , dict ):
149
+ return str (index + 1 ) # Fallback
150
+ start = value .get ("/St" , 1 )
151
+ prefix = value .get ("/P" , "" )
152
+ return prefix + m [value .get ("/S" )](index - start_index + start )
153
+
154
+
119
155
def index2label (reader : PdfCommonDocProtocol , index : int ) -> str :
120
156
"""
121
157
See 7.9.7 "Number Trees".
@@ -132,49 +168,37 @@ def index2label(reader: PdfCommonDocProtocol, index: int) -> str:
132
168
return str (index + 1 ) # Fallback
133
169
number_tree = cast (DictionaryObject , root ["/PageLabels" ].get_object ())
134
170
if "/Nums" in number_tree :
135
- # [Nums] shall be an array of the form
136
- # [ key 1 value 1 key 2 value 2 ... key n value n ]
137
- # where each key_i is an integer and the corresponding
138
- # value_i shall be the object associated with that key.
139
- # The keys shall be sorted in numerical order,
140
- # analogously to the arrangement of keys in a name tree
141
- # as described in 7.9.6, "Name Trees."
142
- nums = cast (ArrayObject , number_tree ["/Nums" ])
143
- i = 0
144
- value = None
145
- start_index = 0
146
- while i < len (nums ):
147
- start_index = nums [i ]
148
- value = nums [i + 1 ].get_object ()
149
- if i + 2 == len (nums ):
171
+ return get_label_from_nums (number_tree , index )
172
+ if "/Kids" in number_tree and not isinstance (number_tree ["/Kids" ], NullObject ):
173
+ # number_tree = {'/Kids': [IndirectObject(7333, 0, 140132998195856), ...]}
174
+ # Limit maximum depth.
175
+ level = 0
176
+ while level < 100 :
177
+ kids = cast (List [DictionaryObject ], number_tree ["/Kids" ])
178
+ for kid in kids :
179
+ # kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]}
180
+ limits = cast (List [int ], kid ["/Limits" ])
181
+ if limits [0 ] <= index <= limits [1 ]:
182
+ if kid .get ("/Kids" , None ) is not None :
183
+ # Recursive definition.
184
+ level += 1
185
+ if level == 100 : # pragma: no cover
186
+ raise NotImplementedError ("Too deep nesting is not supported." )
187
+ number_tree = kid
188
+ # Exit the inner `for` loop and continue at the next level with the
189
+ # next iteration of the `while` loop.
190
+ break
191
+ return get_label_from_nums (kid , index )
192
+ else :
193
+ # When there are no kids, make sure to exit the `while` loop directly
194
+ # and continue with the fallback.
150
195
break
151
- if nums [i + 2 ] > index :
152
- break
153
- i += 2
154
- m = {
155
- None : lambda n : "" ,
156
- "/D" : lambda n : str (n ),
157
- "/R" : number2uppercase_roman_numeral ,
158
- "/r" : number2lowercase_roman_numeral ,
159
- "/A" : number2uppercase_letter ,
160
- "/a" : number2lowercase_letter ,
161
- }
162
- # if /Nums array is not following the specification or if /Nums is empty
163
- if not isinstance (value , dict ):
164
- return str (index + 1 ) # Fallback
165
- start = value .get ("/St" , 1 )
166
- prefix = value .get ("/P" , "" )
167
- return prefix + m [value .get ("/S" )](index - start_index + start )
168
- if "/Kids" in number_tree or "/Limits" in number_tree :
169
- logger_warning (
170
- (
171
- "/Kids or /Limits found in PageLabels. "
172
- "This is not yet supported."
173
- ),
174
- __name__ ,
175
- )
176
- # TODO: Implement /Kids and /Limits for number tree
177
- return str (index + 1 ) # Fallback if /Nums is not in the number_tree
196
+
197
+ logger_warning (
198
+ f"Could not reliably determine page label for { index } ." ,
199
+ __name__
200
+ )
201
+ return str (index + 1 ) # Fallback if neither /Nums nor /Kids is in the number_tree
178
202
179
203
180
204
def nums_insert (
0 commit comments