@@ -2564,22 +2564,13 @@ fn num_decimal_digits(num: usize) -> usize {
2564
2564
2565
2565
// We replace some characters so the CLI output is always consistent and underlines aligned.
2566
2566
// Keep the following list in sync with `rustc_span::char_width`.
2567
+ // ATTENTION: keep lexicografically sorted so that the binary search will work
2567
2568
const OUTPUT_REPLACEMENTS : & [ ( char , & str ) ] = & [
2568
- ( '\t' , " " ) , // We do our own tab replacement
2569
- ( '\u{200D}' , "" ) , // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
2570
- ( '\u{202A}' , "�" ) , // The following unicode text flow control characters are inconsistently
2571
- ( '\u{202B}' , "�" ) , // supported across CLIs and can cause confusion due to the bytes on disk
2572
- ( '\u{202D}' , "�" ) , // not corresponding to the visible source code, so we replace them always.
2573
- ( '\u{202E}' , "�" ) ,
2574
- ( '\u{2066}' , "�" ) ,
2575
- ( '\u{2067}' , "�" ) ,
2576
- ( '\u{2068}' , "�" ) ,
2577
- ( '\u{202C}' , "�" ) ,
2578
- ( '\u{2069}' , "�" ) ,
2569
+ // tidy-alphabetical-start
2579
2570
// In terminals without Unicode support the following will be garbled, but in *all* terminals
2580
2571
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode
2581
2572
// support" gate.
2582
- ( '\u{0000} ' , "␀" ) ,
2573
+ ( '\0 ' , "␀" ) ,
2583
2574
( '\u{0001}' , "␁" ) ,
2584
2575
( '\u{0002}' , "␂" ) ,
2585
2576
( '\u{0003}' , "␃" ) ,
@@ -2588,11 +2579,12 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
2588
2579
( '\u{0006}' , "␆" ) ,
2589
2580
( '\u{0007}' , "␇" ) ,
2590
2581
( '\u{0008}' , "␈" ) ,
2591
- ( '\u{000B}' , "␋" ) ,
2592
- ( '\u{000C}' , "␌" ) ,
2593
- ( '\u{000D}' , "␍" ) ,
2594
- ( '\u{000E}' , "␎" ) ,
2595
- ( '\u{000F}' , "␏" ) ,
2582
+ ( '\u{0009}' , " " ) , // We do our own tab replacement
2583
+ ( '\u{000b}' , "␋" ) ,
2584
+ ( '\u{000c}' , "␌" ) ,
2585
+ ( '\u{000d}' , "␍" ) ,
2586
+ ( '\u{000e}' , "␎" ) ,
2587
+ ( '\u{000f}' , "␏" ) ,
2596
2588
( '\u{0010}' , "␐" ) ,
2597
2589
( '\u{0011}' , "␑" ) ,
2598
2590
( '\u{0012}' , "␒" ) ,
@@ -2603,21 +2595,37 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
2603
2595
( '\u{0017}' , "␗" ) ,
2604
2596
( '\u{0018}' , "␘" ) ,
2605
2597
( '\u{0019}' , "␙" ) ,
2606
- ( '\u{001A}' , "␚" ) ,
2607
- ( '\u{001B}' , "␛" ) ,
2608
- ( '\u{001C}' , "␜" ) ,
2609
- ( '\u{001D}' , "␝" ) ,
2610
- ( '\u{001E}' , "␞" ) ,
2611
- ( '\u{001F}' , "␟" ) ,
2612
- ( '\u{007F}' , "␡" ) ,
2598
+ ( '\u{001a}' , "␚" ) ,
2599
+ ( '\u{001b}' , "␛" ) ,
2600
+ ( '\u{001c}' , "␜" ) ,
2601
+ ( '\u{001d}' , "␝" ) ,
2602
+ ( '\u{001e}' , "␞" ) ,
2603
+ ( '\u{001f}' , "␟" ) ,
2604
+ ( '\u{007f}' , "␡" ) ,
2605
+ ( '\u{200d}' , "" ) , // Replace ZWJ for consistent terminal output of grapheme clusters.
2606
+ ( '\u{202a}' , "�" ) , // The following unicode text flow control characters are inconsistently
2607
+ ( '\u{202b}' , "�" ) , // supported across CLIs and can cause confusion due to the bytes on disk
2608
+ ( '\u{202c}' , "�" ) , // not corresponding to the visible source code, so we replace them always.
2609
+ ( '\u{202d}' , "�" ) ,
2610
+ ( '\u{202e}' , "�" ) ,
2611
+ ( '\u{2066}' , "�" ) ,
2612
+ ( '\u{2067}' , "�" ) ,
2613
+ ( '\u{2068}' , "�" ) ,
2614
+ ( '\u{2069}' , "�" ) ,
2615
+ // tidy-alphabetical-end
2613
2616
] ;
2614
2617
2615
- fn normalize_whitespace ( str : & str ) -> String {
2616
- let mut s = str. to_string ( ) ;
2617
- for ( c, replacement) in OUTPUT_REPLACEMENTS {
2618
- s = s. replace ( * c, replacement) ;
2619
- }
2620
- s
2618
+ fn normalize_whitespace ( s : & str ) -> String {
2619
+ // Scan the input string for a character in the ordered table above. If it's present, replace
2620
+ // it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input
2621
+ // char. At the end, allocate all chars into a string in one operation.
2622
+ s. chars ( ) . fold ( String :: with_capacity ( s. len ( ) ) , |mut s, c| {
2623
+ match OUTPUT_REPLACEMENTS . binary_search_by_key ( & c, |( k, _) | * k) {
2624
+ Ok ( i) => s. push_str ( OUTPUT_REPLACEMENTS [ i] . 1 ) ,
2625
+ _ => s. push ( c) ,
2626
+ }
2627
+ s
2628
+ } )
2621
2629
}
2622
2630
2623
2631
fn draw_col_separator ( buffer : & mut StyledBuffer , line : usize , col : usize ) {
0 commit comments