@@ -10,6 +10,7 @@ use icu_provider::prelude::*;
10
10
use std:: collections:: HashMap ;
11
11
use std:: collections:: HashSet ;
12
12
use std:: fmt;
13
+ use std:: sync:: Arc ;
13
14
use std:: time:: Duration ;
14
15
use std:: time:: Instant ;
15
16
use writeable:: Writeable ;
@@ -53,8 +54,7 @@ impl ExportDriver {
53
54
include_full,
54
55
fallbacker,
55
56
deduplication_strategy,
56
- additional_collations,
57
- segmenter_models,
57
+ attributes_filters,
58
58
} = self ;
59
59
60
60
let markers = markers. unwrap_or_else ( || provider. supported_markers ( ) ) ;
@@ -177,9 +177,8 @@ impl ExportDriver {
177
177
provider,
178
178
marker,
179
179
& requested_families,
180
+ & attributes_filters,
180
181
include_full,
181
- & additional_collations,
182
- & segmenter_models,
183
182
& fallbacker,
184
183
) ?;
185
184
@@ -262,13 +261,16 @@ impl ExportDriver {
262
261
263
262
/// Selects the maximal set of locales to export based on a [`DataMarkerInfo`] and this datagen
264
263
/// provider's options bag. The locales may be later optionally deduplicated for fallback.
264
+ #[ allow( clippy:: type_complexity) ] // sigh
265
265
fn select_locales_for_marker < ' a > (
266
266
provider : & ' a dyn ExportableProvider ,
267
267
marker : DataMarkerInfo ,
268
268
requested_families : & HashMap < DataLocale , DataLocaleFamilyAnnotations > ,
269
+ attributes_filters : & HashMap <
270
+ String ,
271
+ Arc < Box < dyn Fn ( & DataMarkerAttributes ) -> bool + Send + Sync + ' static > > ,
272
+ > ,
269
273
include_full : bool ,
270
- additional_collations : & HashSet < String > ,
271
- segmenter_models : & [ String ] ,
272
274
fallbacker : & LocaleFallbacker ,
273
275
) -> Result < HashSet < DataIdentifierCow < ' a > > , DataError > {
274
276
// Map from all supported DataLocales to their corresponding supported DataIdentifiers.
@@ -283,41 +285,13 @@ fn select_locales_for_marker<'a>(
283
285
. insert ( id) ;
284
286
}
285
287
286
- if marker. path . as_str ( ) . starts_with ( "segmenter/dictionary/" ) {
287
- supported_map. retain ( |_, ids| {
288
- ids. retain ( |id| {
289
- segmenter_models
290
- . iter ( )
291
- . any ( |m| * * m == * * id. marker_attributes )
292
- } ) ;
293
- !ids. is_empty ( )
294
- } ) ;
295
- // Don't perform additional locale filtering
296
- return Ok ( supported_map. into_values ( ) . flatten ( ) . collect ( ) ) ;
297
- } else if marker. path . as_str ( ) . starts_with ( "segmenter/lstm/" ) {
298
- supported_map. retain ( |_, locales| {
299
- locales. retain ( |id| {
300
- segmenter_models
301
- . iter ( )
302
- . any ( |m| * * m == * * id. marker_attributes )
303
- } ) ;
304
- !locales. is_empty ( )
305
- } ) ;
306
- // Don't perform additional locale filtering
307
- return Ok ( supported_map. into_values ( ) . flatten ( ) . collect ( ) ) ;
308
- } else if marker. path . as_str ( ) . starts_with ( "collator/" ) {
309
- supported_map. retain ( |_, ids| {
310
- ids. retain ( |id| {
311
- id. marker_attributes . is_empty ( )
312
- || additional_collations. contains ( id. marker_attributes . as_str ( ) )
313
- || if id. marker_attributes . as_str ( ) . starts_with ( "search" ) {
314
- additional_collations. contains ( "search*" )
315
- } else {
316
- ![ "big5han" , "gb2312" ] . contains ( & id. marker_attributes . as_str ( ) )
317
- }
288
+ if !marker. attributes_domain . is_empty ( ) {
289
+ if let Some ( filter) = attributes_filters. get ( marker. attributes_domain ) {
290
+ supported_map. retain ( |_, ids| {
291
+ ids. retain ( |id| filter ( & id. marker_attributes ) ) ;
292
+ !ids. is_empty ( )
318
293
} ) ;
319
- !ids. is_empty ( )
320
- } ) ;
294
+ }
321
295
}
322
296
323
297
if include_full && requested_families. is_empty ( ) {
@@ -510,6 +484,7 @@ impl fmt::Display for DisplayDuration {
510
484
511
485
#[ test]
512
486
fn test_collation_filtering ( ) {
487
+ use crate :: DataLocaleFamily ;
513
488
use icu:: locale:: locale;
514
489
use std:: collections:: BTreeSet ;
515
490
@@ -619,16 +594,19 @@ fn test_collation_filtering() {
619
594
} ,
620
595
] ;
621
596
for cas in cases {
597
+ let driver = ExportDriver :: new (
598
+ [ DataLocaleFamily :: single ( cas. language . clone ( ) ) ] ,
599
+ DeduplicationStrategy :: None . into ( ) ,
600
+ LocaleFallbacker :: new_without_data ( ) ,
601
+ )
602
+ . with_additional_collations ( cas. include_collations . iter ( ) . copied ( ) . map ( String :: from) ) ;
622
603
let resolved_locales = select_locales_for_marker (
623
604
& Provider ,
624
605
icu:: collator:: provider:: CollationDataV1Marker :: INFO ,
625
- & [ ( cas. language . clone ( ) , DataLocaleFamilyAnnotations :: single ( ) ) ]
626
- . into_iter ( )
627
- . collect ( ) ,
606
+ & driver. requested_families ,
607
+ & driver. attributes_filters ,
628
608
false ,
629
- & HashSet :: from_iter ( cas. include_collations . iter ( ) . copied ( ) . map ( String :: from) ) ,
630
- & [ ] ,
631
- & LocaleFallbacker :: new_without_data ( ) ,
609
+ & driver. fallbacker ,
632
610
)
633
611
. unwrap ( )
634
612
. into_iter ( )
0 commit comments