1
1
"""
2
2
For a given search query, return the results. If the number of results exceeds
3
- the chunkSize, return multiple chunks. For now, we only allow very limited set
3
+ the chunkSize, return multiple chunks. For now, we only allow very limited set
4
4
of search queries based on a single id (group, exhibit, location, approvalGrp).
5
5
6
- A chunk consists of, say, 1000 objects and their corresponding multimedia and
7
- persons items. However, if the person or multimedia items reference other
8
- persons and multimedia, we don't include them, i.e. we're including only
6
+ A chunk consists of, say, 1000 objects and their corresponding multimedia and
7
+ persons items. However, if the person or multimedia items reference other
8
+ persons and multimedia, we don't include them, i.e. we're including only
9
9
immediate relatives, no distant cousins. To be excplicit, we call this a multi-
10
- part response in contrast with a single-part chunk that only contains items of
10
+ part response in contrast with a single-part chunk that only contains items of
11
11
type object.
12
12
13
13
USAGE
14
14
from Mp.Api.Chunky import Chunky
15
15
c = Chunky(chunkSize=1000, baseURL=baseURL, pw=pw, user=user)
16
16
for chunkM in c.getByType(ID=ID, Type="group"):
17
- do_something_with (chunkM)
17
+ do_something_with (chunkM)
18
18
19
19
for chunkM in c.search(query=query, offset=0):
20
- do_something_with (chunkM)
20
+ do_something_with (chunkM)
21
21
22
22
23
23
TOWARDS AN ALGORITHM
64
64
65
65
# types aliasses
66
66
ET = etree ._Element
67
- ETNone = etree ._Element | None
67
+ ETNone = etree ._Element | None
68
68
since = str | None
69
69
70
70
# typed variables
@@ -124,28 +124,28 @@ def getByType(
124
124
m = self ._savedQuery (Type = target , ID = ID , offset = offset )
125
125
else :
126
126
m = self ._getObjects (Type = Type , ID = ID , offset = offset , since = since )
127
- chunkData += m
127
+ chunkData += m
128
128
# only look for related data if there is something in current chunk
129
129
if m :
130
- partET = m .toET ()
131
130
# all related Multimedia and Persons items, no chunking
132
131
for targetType in ["Multimedia" , "Person" ]:
133
- relatedET = self ._relatedItems (
134
- part = partET ,
132
+ relatedM = self ._relatedItems (
133
+ part = m . toET () ,
135
134
target = targetType ,
136
135
since = since ,
137
136
onlyPublished = onlyPublished ,
138
137
)
139
- if relatedET is not None :
140
- chunkData . add ( doc = relatedET )
138
+ if relatedM :
139
+ chunkData += relatedM
141
140
142
141
offset += self .chunkSize # wrong for last chunk
143
- actualSize = chunkData .actualSize (module = "Object" )
144
- if actualSize < self .chunkSize :
142
+ if chunkData .actualSize (module = "Object" ) < self .chunkSize :
145
143
lastChunk = True
146
144
yield chunkData
147
145
148
- def search (self , query : Search , since : since = None , offset : int = 0 ) -> Iterator [Module ]:
146
+ def search (
147
+ self , query : Search , since : since = None , offset : int = 0
148
+ ) -> Iterator [Module ]:
149
149
"""
150
150
We could attempt a general chunky search. Just hand over a search query
151
151
(presumably one which finds object items). We split the results into
@@ -157,22 +157,20 @@ def search(self, query: Search, since: since = None, offset: int = 0) -> Iterato
157
157
while not lastChunk :
158
158
chunkData = Module () # make a new zml module document
159
159
query .offset (value = offset ) # todo in search
160
- r = self .api .search (xml = query .toString ())
161
- partET = etree .fromstring (r .content , ETparser )
162
- chunkData .add (doc = partET )
160
+ m = self .api .search2 (query = query )
161
+ chunkData += m
163
162
# all related Multimedia and Persons items, no chunking
164
163
for targetType in ["Multimedia" , "Person" ]:
165
- relatedET = self ._relatedItems (
164
+ relatedM = self ._relatedItems (
166
165
part = partET , target = targetType , since = since
167
166
)
168
- if relatedET is not None :
169
- chunkData . add ( doc = relatedET )
167
+ if relatedM :
168
+ chunkData += relatedM
170
169
171
170
offset = offset + self .chunkSize
172
- actualNo = chunkData .actualSize (module = "Object" )
173
171
# print(f"*** actual VS chunkSize: {actualNo} VS {self.chunkSize}")
174
172
175
- if actualNo < self .chunkSize :
173
+ if chunkData . actualSize ( module = "Object" ) < self .chunkSize :
176
174
lastChunk = True
177
175
yield chunkData
178
176
@@ -228,7 +226,7 @@ def _getObjects(
228
226
229
227
def _relatedItems (
230
228
self , * , part : ET , target : str , since : since = None , onlyPublished : bool = False
231
- ) -> ET | None :
229
+ ) -> Module :
232
230
"""
233
231
For a zml document, return all related items of the target type.
234
232
@@ -237,9 +235,9 @@ def _relatedItems(
237
235
* target: target module type (either "Person" or "Multimedia")
238
236
* since: TODO. Date to filter for updates
239
237
240
- RETURNS
241
- * etree document with related items of the target type
242
- * this is old way which returns ET
238
+ NEW
239
+ * returns Module, not ET | None
240
+ * avoid optional (mixed) return value
243
241
"""
244
242
245
243
IDs : Any = part .xpath (
@@ -249,7 +247,7 @@ def _relatedItems(
249
247
250
248
if len (IDs ) == 0 :
251
249
print (f"***WARN: No related { target } IDs found!" ) # this is not an ERROR
252
- return None
250
+ return Module ()
253
251
254
252
# use limit=0 for a deterministic search as RIA's response provides the
255
253
# number of search results limit -1 not documented at
@@ -287,14 +285,15 @@ def _relatedItems(
287
285
s .toFile (path = "debug.search.xml" )
288
286
# s.print()
289
287
s .validate (mode = "search" )
290
- r = self .api .search (xml = s .toString ())
291
- # DEBUG
292
- # with open("DEBUGresponse.xml", "wb") as binary_file:
293
- # Write bytes to file
294
- # binary_file.write(r.content)
295
- return etree .fromstring (r .content , ETparser )
288
+ return self .api .search2 (query = s )
296
289
297
290
def _savedQuery (self , * , Type : str = "Object" , ID : int , offset : int = 0 ) -> Module :
291
+ """
292
+ returns the result of a saved query (limited to chunkSize)
293
+
294
+ Is this correct? `Yes, we're calling this from getByType with various offsets.
295
+ Each call returns the object part of the a chunk.
296
+ """
298
297
return self .api .runSavedQuery2 (
299
298
Type = Type , ID = ID , offset = offset , limit = self .chunkSize
300
299
)
0 commit comments