29
29
from typing import DefaultDict
30
30
from typing import Dict
31
31
from typing import FrozenSet
32
- from typing import Hashable
33
32
from typing import Iterable
34
33
from typing import Iterator
35
34
from typing import List
@@ -1280,33 +1279,37 @@ def process(self, wkv):
1280
1279
# pylint: disable=unidiomatic-typecheck
1281
1280
# Optimization for the global window case.
1282
1281
if self .is_default_windowing :
1283
- wkey = key # type: Hashable
1282
+ self . add_key_value ( key , value , None )
1284
1283
else :
1285
- wkey = tuple (wkv .windows ), key
1286
- entry = self .table .get (wkey , None )
1287
- if entry is None :
1288
- if self .key_count >= self .max_keys :
1289
- target = self .key_count * 9 // 10
1290
- old_wkeys = []
1291
- # TODO(robertwb): Use an LRU cache?
1292
- for old_wkey , old_wvalue in self .table .items ():
1293
- old_wkeys .append (old_wkey ) # Can't mutate while iterating.
1294
- self .output_key (old_wkey , old_wvalue [0 ], old_wvalue [1 ])
1295
- self .key_count -= 1
1296
- if self .key_count <= target :
1297
- break
1298
- for old_wkey in reversed (old_wkeys ):
1299
- del self .table [old_wkey ]
1300
- self .key_count += 1
1301
- # We save the accumulator as a one element list so we can efficiently
1302
- # mutate when new values are added without searching the cache again.
1303
- entry = self .table [wkey ] = [self .combine_fn .create_accumulator (), None ]
1304
- if not self .is_default_windowing :
1305
- # Conditional as the timestamp attribute is lazily initialized.
1306
- entry [1 ] = wkv .timestamp
1307
- entry [0 ] = self .combine_fn_add_input (entry [0 ], value )
1308
- if not self .is_default_windowing and self .timestamp_combiner :
1309
- entry [1 ] = self .timestamp_combiner .combine (entry [1 ], wkv .timestamp )
1284
+ for window in wkv .windows :
1285
+ self .add_key_value ((window , key ),
1286
+ value ,
1287
+ wkv .timestamp if self .timestamp_combiner else None )
1288
+
1289
+ def add_key_value (self , wkey , value , timestamp ):
1290
+ entry = self .table .get (wkey , None )
1291
+ if entry is None :
1292
+ if self .key_count >= self .max_keys :
1293
+ target = self .key_count * 9 // 10
1294
+ old_wkeys = []
1295
+ # TODO(robertwb): Use an LRU cache?
1296
+ for old_wkey , old_wvalue in self .table .items ():
1297
+ old_wkeys .append (old_wkey ) # Can't mutate while iterating.
1298
+ self .output_key (old_wkey , old_wvalue [0 ], old_wvalue [1 ])
1299
+ self .key_count -= 1
1300
+ if self .key_count <= target :
1301
+ break
1302
+ for old_wkey in reversed (old_wkeys ):
1303
+ del self .table [old_wkey ]
1304
+ self .key_count += 1
1305
+ # We save the accumulator as a one element list so we can efficiently
1306
+ # mutate when new values are added without searching the cache again.
1307
+ entry = self .table [wkey ] = [
1308
+ self .combine_fn .create_accumulator (), timestamp
1309
+ ]
1310
+ entry [0 ] = self .combine_fn_add_input (entry [0 ], value )
1311
+ if not self .is_default_windowing and self .timestamp_combiner :
1312
+ entry [1 ] = self .timestamp_combiner .combine (entry [1 ], timestamp )
1310
1313
1311
1314
def finish (self ):
1312
1315
# type: () -> None
@@ -1331,10 +1334,10 @@ def output_key(self, wkey, accumulator, timestamp):
1331
1334
if self .is_default_windowing :
1332
1335
self .output (_globally_windowed_value .with_value ((wkey , value )))
1333
1336
else :
1334
- windows , key = wkey
1337
+ window , key = wkey
1335
1338
if self .timestamp_combiner is None :
1336
- timestamp = windows [ 0 ] .max_timestamp ()
1337
- self .output (WindowedValue ((key , value ), timestamp , windows ))
1339
+ timestamp = window .max_timestamp ()
1340
+ self .output (WindowedValue ((key , value ), timestamp , ( window , ) ))
1338
1341
1339
1342
1340
1343
class FlattenOperation (Operation ):
0 commit comments