@@ -395,46 +395,6 @@ def emit_conversions_module(f, to_upper, to_lower, to_title):
395
395
is_pub = False , t_type = t_type , pfun = pfun )
396
396
f .write ("}\n \n " )
397
397
398
- def emit_grapheme_module (f , grapheme_table , grapheme_cats ):
399
- f .write ("""pub mod grapheme {
400
- use core::slice::SliceExt;
401
- pub use self::GraphemeCat::*;
402
- use core::result::Result::{Ok, Err};
403
-
404
- #[allow(non_camel_case_types)]
405
- #[derive(Clone, Copy)]
406
- pub enum GraphemeCat {
407
- """ )
408
- for cat in grapheme_cats + ["Any" ]:
409
- f .write (" GC_" + cat + ",\n " )
410
- f .write (""" }
411
-
412
- fn bsearch_range_value_table(c: char, r: &'static [(char, char, GraphemeCat)]) -> GraphemeCat {
413
- use core::cmp::Ordering::{Equal, Less, Greater};
414
- match r.binary_search_by(|&(lo, hi, _)| {
415
- if lo <= c && c <= hi { Equal }
416
- else if hi < c { Less }
417
- else { Greater }
418
- }) {
419
- Ok(idx) => {
420
- let (_, _, cat) = r[idx];
421
- cat
422
- }
423
- Err(_) => GC_Any
424
- }
425
- }
426
-
427
- pub fn grapheme_category(c: char) -> GraphemeCat {
428
- bsearch_range_value_table(c, grapheme_cat_table)
429
- }
430
-
431
- """ )
432
-
433
- emit_table (f , "grapheme_cat_table" , grapheme_table , "&'static [(char, char, GraphemeCat)]" ,
434
- pfun = lambda x : "(%s,%s,GC_%s)" % (escape_char (x [0 ]), escape_char (x [1 ]), x [2 ]),
435
- is_pub = False )
436
- f .write ("}\n " )
437
-
438
398
def emit_charwidth_module (f , width_table ):
439
399
f .write ("pub mod charwidth {\n " )
440
400
f .write (" use core::option::Option;\n " )
@@ -497,79 +457,6 @@ def emit_norm_module(f, canon, compat, combine, norm_props):
497
457
canon_comp_keys = canon_comp .keys ()
498
458
canon_comp_keys .sort ()
499
459
500
- f .write ("pub mod normalization {\n " )
501
-
502
- def mkdata_fun (table ):
503
- def f (char ):
504
- data = "(%s,&[" % escape_char (char )
505
- first = True
506
- for d in table [char ]:
507
- if not first :
508
- data += ","
509
- first = False
510
- data += escape_char (d )
511
- data += "])"
512
- return data
513
- return f
514
-
515
- f .write (" // Canonical decompositions\n " )
516
- emit_table (f , "canonical_table" , canon_keys , "&'static [(char, &'static [char])]" ,
517
- pfun = mkdata_fun (canon ))
518
-
519
- f .write (" // Compatibility decompositions\n " )
520
- emit_table (f , "compatibility_table" , compat_keys , "&'static [(char, &'static [char])]" ,
521
- pfun = mkdata_fun (compat ))
522
-
523
- def comp_pfun (char ):
524
- data = "(%s,&[" % escape_char (char )
525
- canon_comp [char ].sort (lambda x , y : x [0 ] - y [0 ])
526
- first = True
527
- for pair in canon_comp [char ]:
528
- if not first :
529
- data += ","
530
- first = False
531
- data += "(%s,%s)" % (escape_char (pair [0 ]), escape_char (pair [1 ]))
532
- data += "])"
533
- return data
534
-
535
- f .write (" // Canonical compositions\n " )
536
- emit_table (f , "composition_table" , canon_comp_keys ,
537
- "&'static [(char, &'static [(char, char)])]" , pfun = comp_pfun )
538
-
539
- f .write ("""
540
- fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
541
- use core::cmp::Ordering::{Equal, Less, Greater};
542
- use core::slice::SliceExt;
543
- use core::result::Result::{Ok, Err};
544
- match r.binary_search_by(|&(lo, hi, _)| {
545
- if lo <= c && c <= hi { Equal }
546
- else if hi < c { Less }
547
- else { Greater }
548
- }) {
549
- Ok(idx) => {
550
- let (_, _, result) = r[idx];
551
- result
552
- }
553
- Err(_) => 0
554
- }
555
- }\n
556
- """ )
557
-
558
- emit_table (f , "combining_class_table" , combine , "&'static [(char, char, u8)]" , is_pub = False ,
559
- pfun = lambda x : "(%s,%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ]), x [2 ]))
560
-
561
- f .write (""" #[deprecated(reason = "use the crates.io `unicode-normalization` lib instead",
562
- since = "1.0.0")]
563
- #[unstable(feature = "unicode",
564
- reason = "this functionality will be moved to crates.io")]
565
- pub fn canonical_combining_class(c: char) -> u8 {
566
- bsearch_range_value_table(c, combining_class_table)
567
- }
568
-
569
- }
570
-
571
- """ )
572
-
573
460
def remove_from_wtable (wtable , val ):
574
461
wtable_out = []
575
462
while wtable :
@@ -649,53 +536,3 @@ def optimize_width_table(wtable):
649
536
# normalizations and conversions module
650
537
emit_norm_module (rf , canon_decomp , compat_decomp , combines , norm_props )
651
538
emit_conversions_module (rf , to_upper , to_lower , to_title )
652
-
653
- ### character width module
654
- width_table = []
655
- for zwcat in ["Me" , "Mn" , "Cf" ]:
656
- width_table .extend (map (lambda (lo , hi ): (lo , hi , 0 , 0 ), gencats [zwcat ]))
657
- width_table .append ((4448 , 4607 , 0 , 0 ))
658
-
659
- # get widths, except those that are explicitly marked zero-width above
660
- ea_widths = load_east_asian_width (["W" , "F" , "A" ], ["Me" , "Mn" , "Cf" ])
661
- # these are doublewidth
662
- for dwcat in ["W" , "F" ]:
663
- width_table .extend (map (lambda (lo , hi ): (lo , hi , 2 , 2 ), ea_widths [dwcat ]))
664
- width_table .extend (map (lambda (lo , hi ): (lo , hi , 1 , 2 ), ea_widths ["A" ]))
665
-
666
- width_table .sort (key = lambda w : w [0 ])
667
-
668
- # soft hyphen is not zero width in preformatted text; it's used to indicate
669
- # a hyphen inserted to facilitate a linebreak.
670
- width_table = remove_from_wtable (width_table , 173 )
671
-
672
- # optimize the width table by collapsing adjacent entities when possible
673
- width_table = optimize_width_table (width_table )
674
- emit_charwidth_module (rf , width_table )
675
-
676
- ### grapheme cluster module
677
- # from http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Break_Property_Values
678
- grapheme_cats = load_properties ("auxiliary/GraphemeBreakProperty.txt" , [])
679
-
680
- # Control
681
- # Note 1:
682
- # This category also includes Cs (surrogate codepoints), but Rust's `char`s are
683
- # Unicode Scalar Values only, and surrogates are thus invalid `char`s.
684
- # Thus, we have to remove Cs from the Control category
685
- # Note 2:
686
- # 0x0a and 0x0d (CR and LF) are not in the Control category for Graphemes.
687
- # However, the Graphemes iterator treats these as a special case, so they
688
- # should be included in grapheme_cats["Control"] for our implementation.
689
- grapheme_cats ["Control" ] = group_cat (list (
690
- (set (ungroup_cat (grapheme_cats ["Control" ]))
691
- | set (ungroup_cat (grapheme_cats ["CR" ]))
692
- | set (ungroup_cat (grapheme_cats ["LF" ])))
693
- - set (ungroup_cat ([surrogate_codepoints ]))))
694
- del (grapheme_cats ["CR" ])
695
- del (grapheme_cats ["LF" ])
696
-
697
- grapheme_table = []
698
- for cat in grapheme_cats :
699
- grapheme_table .extend ([(x , y , cat ) for (x , y ) in grapheme_cats [cat ]])
700
- grapheme_table .sort (key = lambda w : w [0 ])
701
- emit_grapheme_module (rf , grapheme_table , grapheme_cats .keys ())
0 commit comments