Skip to content

Commit d193694

Browse files
committed
chunky: more refactoring
1 parent 366449a commit d193694

File tree

3 files changed

+38
-42
lines changed

3 files changed

+38
-42
lines changed

src/mpapi/chunky.py

+35-36
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
"""
22
For a given search query, return the results. If the number of results exceeds
3-
the chunkSize, return multiple chunks. For now, we only allow very limited set
3+
the chunkSize, return multiple chunks. For now, we only allow very limited set
44
of search queries based on a single id (group, exhibit, location, approvalGrp).
55
6-
A chunk consists of, say, 1000 objects and their corresponding multimedia and
7-
persons items. However, if the person or multimedia items reference other
8-
persons and multimedia, we don't include them, i.e. we're including only
6+
A chunk consists of, say, 1000 objects and their corresponding multimedia and
7+
persons items. However, if the person or multimedia items reference other
8+
persons and multimedia, we don't include them, i.e. we're including only
99
immediate relatives, no distant cousins. To be excplicit, we call this a multi-
10-
part response in contrast with a single-part chunk that only contains items of
10+
part response in contrast with a single-part chunk that only contains items of
1111
type object.
1212
1313
USAGE
1414
from Mp.Api.Chunky import Chunky
1515
c = Chunky(chunkSize=1000, baseURL=baseURL, pw=pw, user=user)
1616
for chunkM in c.getByType(ID=ID, Type="group"):
17-
do_something_with (chunkM)
17+
do_something_with (chunkM)
1818
1919
for chunkM in c.search(query=query, offset=0):
20-
do_something_with (chunkM)
20+
do_something_with (chunkM)
2121
2222
2323
TOWARDS AN ALGORITHM
@@ -64,7 +64,7 @@
6464

6565
# types aliasses
6666
ET = etree._Element
67-
ETNone = etree._Element| None
67+
ETNone = etree._Element | None
6868
since = str | None
6969

7070
# typed variables
@@ -124,28 +124,28 @@ def getByType(
124124
m = self._savedQuery(Type=target, ID=ID, offset=offset)
125125
else:
126126
m = self._getObjects(Type=Type, ID=ID, offset=offset, since=since)
127-
chunkData += m
127+
chunkData += m
128128
# only look for related data if there is something in current chunk
129129
if m:
130-
partET = m.toET()
131130
# all related Multimedia and Persons items, no chunking
132131
for targetType in ["Multimedia", "Person"]:
133-
relatedET = self._relatedItems(
134-
part=partET,
132+
relatedM = self._relatedItems(
133+
part=m.toET(),
135134
target=targetType,
136135
since=since,
137136
onlyPublished=onlyPublished,
138137
)
139-
if relatedET is not None:
140-
chunkData.add(doc=relatedET)
138+
if relatedM:
139+
chunkData += relatedM
141140

142141
offset += self.chunkSize # wrong for last chunk
143-
actualSize = chunkData.actualSize(module="Object")
144-
if actualSize < self.chunkSize:
142+
if chunkData.actualSize(module="Object") < self.chunkSize:
145143
lastChunk = True
146144
yield chunkData
147145

148-
def search(self, query: Search, since: since = None, offset: int = 0) -> Iterator[Module]:
146+
def search(
147+
self, query: Search, since: since = None, offset: int = 0
148+
) -> Iterator[Module]:
149149
"""
150150
We could attempt a general chunky search. Just hand over a search query
151151
(presumably one which finds object items). We split the results into
@@ -157,22 +157,20 @@ def search(self, query: Search, since: since = None, offset: int = 0) -> Iterato
157157
while not lastChunk:
158158
chunkData = Module() # make a new zml module document
159159
query.offset(value=offset) # todo in search
160-
r = self.api.search(xml=query.toString())
161-
partET = etree.fromstring(r.content, ETparser)
162-
chunkData.add(doc=partET)
160+
m = self.api.search2(query=query)
161+
chunkData += m
163162
# all related Multimedia and Persons items, no chunking
164163
for targetType in ["Multimedia", "Person"]:
165-
relatedET = self._relatedItems(
164+
relatedM = self._relatedItems(
166165
part=partET, target=targetType, since=since
167166
)
168-
if relatedET is not None:
169-
chunkData.add(doc=relatedET)
167+
if relatedM:
168+
chunkData += relatedM
170169

171170
offset = offset + self.chunkSize
172-
actualNo = chunkData.actualSize(module="Object")
173171
# print(f"*** actual VS chunkSize: {actualNo} VS {self.chunkSize}")
174172

175-
if actualNo < self.chunkSize:
173+
if chunkData.actualSize(module="Object") < self.chunkSize:
176174
lastChunk = True
177175
yield chunkData
178176

@@ -228,7 +226,7 @@ def _getObjects(
228226

229227
def _relatedItems(
230228
self, *, part: ET, target: str, since: since = None, onlyPublished: bool = False
231-
) -> ET | None:
229+
) -> Module:
232230
"""
233231
For a zml document, return all related items of the target type.
234232
@@ -237,9 +235,9 @@ def _relatedItems(
237235
* target: target module type (either "Person" or "Multimedia")
238236
* since: TODO. Date to filter for updates
239237
240-
RETURNS
241-
* etree document with related items of the target type
242-
* this is old way which returns ET
238+
NEW
239+
* returns Module, not ET | None
240+
* avoid optional (mixed) return value
243241
"""
244242

245243
IDs: Any = part.xpath(
@@ -249,7 +247,7 @@ def _relatedItems(
249247

250248
if len(IDs) == 0:
251249
print(f"***WARN: No related {target} IDs found!") # this is not an ERROR
252-
return None
250+
return Module()
253251

254252
# use limit=0 for a deterministic search as RIA's response provides the
255253
# number of search results limit -1 not documented at
@@ -287,14 +285,15 @@ def _relatedItems(
287285
s.toFile(path="debug.search.xml")
288286
# s.print()
289287
s.validate(mode="search")
290-
r = self.api.search(xml=s.toString())
291-
# DEBUG
292-
# with open("DEBUGresponse.xml", "wb") as binary_file:
293-
# Write bytes to file
294-
# binary_file.write(r.content)
295-
return etree.fromstring(r.content, ETparser)
288+
return self.api.search2(query=s)
296289

297290
def _savedQuery(self, *, Type: str = "Object", ID: int, offset: int = 0) -> Module:
291+
"""
292+
returns the result of a saved query (limited to chunkSize)
293+
294+
Is this correct? `Yes, we're calling this from getByType with various offsets.
295+
Each call returns the object part of the a chunk.
296+
"""
298297
return self.api.runSavedQuery2(
299298
Type=Type, ID=ID, offset=offset, limit=self.chunkSize
300299
)

src/mpapi/mink.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
def arg():
4040
"""
41-
since fvh is blocked by Windows group policy
41+
since fvh is blocked by Windows group policy
4242
"""
4343
import argparse
4444
from mpapi.constants import get_credentials

test/test_chunky.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from mpapi.constants import NSMAP, get_credentials
22
from mpapi.chunky import Chunky
3+
from mpapi.module import Module
34
from lxml import etree # type: ignore
45

56
# types
@@ -42,20 +43,16 @@ def test_relatedItems():
4243
relMul = c._relatedItems(part=partET, target="Multimedia")
4344
resL = relMul.xpath(
4445
"//m:module[@name = 'Multimedia']/m:moduleItem[@id = '468698']",
45-
namespaces=NSMAP,
4646
)
4747
assert len(resL) == 1
4848
resL = relMul.xpath(
4949
"//m:module[@name = 'Multimedia']/m:moduleItem[@id = '517501']",
50-
namespaces=NSMAP,
5150
)
5251
assert len(resL) == 1
5352

5453
relPer = c._relatedItems(part=partET, target="Person")
5554
# toFile(relPer, "sdata/relPer.xml")
56-
resL = relPer.xpath(
57-
"count(//m:module[@name = 'Person']/m:moduleItem)", namespaces=NSMAP
58-
)
55+
resL = relPer.xpath("count(//m:module[@name = 'Person']/m:moduleItem)")
5956
assert int(resL) == 1
6057

6158

0 commit comments

Comments
 (0)