Skip to content

Commit d26f9e4

Browse files
committed
Auto merge of #49698 - SimonSapin:unicode-for-everyone, r=alexcrichton
Merge the std_unicode crate into the core crate [The standard library facade](#27783) has historically contained a number of crates with different roles, but that number has decreased over time. `rand` and `libc` have moved to crates.io, and [`collections` was merged into `alloc`](#42648). Today we have `core` that applies everywhere, `std` that expects a full operating system, and `alloc` in-between that only requires a memory allocator (which can be provided by users)… and `std_unicode`, which doesn’t really have a reason to be separate anymore. It contains functionality based on Unicode data tables that can be large, but as long as relevant functions are not called the tables should be removed from binaries by linkers. This deprecates the unstable `std_unicode` crate and moves all of its contents into `core`, replacing them with `pub use` reexports. The crate can be removed later. This also removes the `CharExt` trait (replaced with inherent methods in libcore) and `UnicodeStr` trait (merged into `StrExt`). There traits were both unstable and not intended to be used or named directly. A number of new items are newly-available in libcore and instantly stable there, but only if they were already stable in libstd. Fixes #49319.
2 parents e28ef22 + ef41788 commit d26f9e4

File tree

42 files changed

+1398
-1537
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1398
-1537
lines changed

src/Cargo.lock

-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/ci/docker/wasm32-unknown/Dockerfile

-1
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,3 @@ ENV SCRIPT python2.7 /checkout/x.py test --target $TARGETS \
3434
src/test/mir-opt \
3535
src/test/codegen-units \
3636
src/libcore \
37-
src/libstd_unicode/ \

src/doc/unstable-book/src/language-features/lang-items.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ the source code.
243243
- `usize`: `libcore/num/mod.rs`
244244
- `f32`: `libstd/f32.rs`
245245
- `f64`: `libstd/f64.rs`
246-
- `char`: `libstd_unicode/char.rs`
246+
- `char`: `libcore/char.rs`
247247
- `slice`: `liballoc/slice.rs`
248248
- `str`: `liballoc/str.rs`
249249
- `const_ptr`: `libcore/ptr.rs`

src/liballoc/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ path = "lib.rs"
99

1010
[dependencies]
1111
core = { path = "../libcore" }
12-
std_unicode = { path = "../libstd_unicode" }
1312
compiler_builtins = { path = "../rustc/compiler_builtins_shim" }
1413

1514
[dev-dependencies]

src/liballoc/lib.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@
113113
#![feature(trusted_len)]
114114
#![feature(try_reserve)]
115115
#![feature(unboxed_closures)]
116-
#![feature(unicode)]
116+
#![feature(unicode_internals)]
117117
#![feature(unsize)]
118118
#![feature(allocator_internals)]
119119
#![feature(on_unimplemented)]
@@ -135,8 +135,6 @@ extern crate test;
135135
#[cfg(test)]
136136
extern crate rand;
137137

138-
extern crate std_unicode;
139-
140138
// Module with internal macros used by other modules (needs to be included before other modules).
141139
#[macro_use]
142140
mod macros;

src/liballoc/str.rs

+30-14
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,10 @@ use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
4545
use core::mem;
4646
use core::ptr;
4747
use core::iter::FusedIterator;
48-
use std_unicode::str::{UnicodeStr, Utf16Encoder};
4948

5049
use vec_deque::VecDeque;
5150
use borrow::{Borrow, ToOwned};
5251
use string::String;
53-
use std_unicode;
5452
use vec::Vec;
5553
use slice::{SliceConcatExt, SliceIndex};
5654
use boxed::Box;
@@ -75,7 +73,7 @@ pub use core::str::{from_utf8, from_utf8_mut, Chars, CharIndices, Bytes};
7573
#[stable(feature = "rust1", since = "1.0.0")]
7674
pub use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut, ParseBoolError};
7775
#[stable(feature = "rust1", since = "1.0.0")]
78-
pub use std_unicode::str::SplitWhitespace;
76+
pub use core::str::SplitWhitespace;
7977
#[stable(feature = "rust1", since = "1.0.0")]
8078
pub use core::str::pattern;
8179

@@ -147,7 +145,8 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
147145
#[derive(Clone)]
148146
#[stable(feature = "encode_utf16", since = "1.8.0")]
149147
pub struct EncodeUtf16<'a> {
150-
encoder: Utf16Encoder<Chars<'a>>,
148+
chars: Chars<'a>,
149+
extra: u16,
151150
}
152151

153152
#[stable(feature = "collection_debug", since = "1.17.0")]
@@ -163,12 +162,29 @@ impl<'a> Iterator for EncodeUtf16<'a> {
163162

164163
#[inline]
165164
fn next(&mut self) -> Option<u16> {
166-
self.encoder.next()
165+
if self.extra != 0 {
166+
let tmp = self.extra;
167+
self.extra = 0;
168+
return Some(tmp);
169+
}
170+
171+
let mut buf = [0; 2];
172+
self.chars.next().map(|ch| {
173+
let n = ch.encode_utf16(&mut buf).len();
174+
if n == 2 {
175+
self.extra = buf[1];
176+
}
177+
buf[0]
178+
})
167179
}
168180

169181
#[inline]
170182
fn size_hint(&self) -> (usize, Option<usize>) {
171-
self.encoder.size_hint()
183+
let (low, high) = self.chars.size_hint();
184+
// every char gets either one u16 or two u16,
185+
// so this iterator is between 1 or 2 times as
186+
// long as the underlying iterator.
187+
(low, high.and_then(|n| n.checked_mul(2)))
172188
}
173189
}
174190

@@ -801,7 +817,7 @@ impl str {
801817
#[stable(feature = "split_whitespace", since = "1.1.0")]
802818
#[inline]
803819
pub fn split_whitespace(&self) -> SplitWhitespace {
804-
UnicodeStr::split_whitespace(self)
820+
StrExt::split_whitespace(self)
805821
}
806822

807823
/// An iterator over the lines of a string, as string slices.
@@ -871,7 +887,7 @@ impl str {
871887
/// ```
872888
#[stable(feature = "encode_utf16", since = "1.8.0")]
873889
pub fn encode_utf16(&self) -> EncodeUtf16 {
874-
EncodeUtf16 { encoder: Utf16Encoder::new(self[..].chars()) }
890+
EncodeUtf16 { chars: self[..].chars(), extra: 0 }
875891
}
876892

877893
/// Returns `true` if the given pattern matches a sub-slice of
@@ -1571,7 +1587,7 @@ impl str {
15711587
/// ```
15721588
#[stable(feature = "rust1", since = "1.0.0")]
15731589
pub fn trim(&self) -> &str {
1574-
UnicodeStr::trim(self)
1590+
StrExt::trim(self)
15751591
}
15761592

15771593
/// Returns a string slice with leading whitespace removed.
@@ -1607,7 +1623,7 @@ impl str {
16071623
/// ```
16081624
#[stable(feature = "rust1", since = "1.0.0")]
16091625
pub fn trim_left(&self) -> &str {
1610-
UnicodeStr::trim_left(self)
1626+
StrExt::trim_left(self)
16111627
}
16121628

16131629
/// Returns a string slice with trailing whitespace removed.
@@ -1643,7 +1659,7 @@ impl str {
16431659
/// ```
16441660
#[stable(feature = "rust1", since = "1.0.0")]
16451661
pub fn trim_right(&self) -> &str {
1646-
UnicodeStr::trim_right(self)
1662+
StrExt::trim_right(self)
16471663
}
16481664

16491665
/// Returns a string slice with all prefixes and suffixes that match a
@@ -1960,7 +1976,7 @@ impl str {
19601976
}
19611977

19621978
fn case_ignoreable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
1963-
use std_unicode::derived_property::{Cased, Case_Ignorable};
1979+
use core::unicode::derived_property::{Cased, Case_Ignorable};
19641980
match iter.skip_while(|&c| Case_Ignorable(c)).next() {
19651981
Some(c) => Cased(c),
19661982
None => false,
@@ -2142,7 +2158,7 @@ impl str {
21422158
#[stable(feature = "unicode_methods_on_intrinsics", since = "1.27.0")]
21432159
#[inline]
21442160
pub fn is_whitespace(&self) -> bool {
2145-
UnicodeStr::is_whitespace(self)
2161+
StrExt::is_whitespace(self)
21462162
}
21472163

21482164
/// Returns true if this `str` is entirely alphanumeric, and false otherwise.
@@ -2161,7 +2177,7 @@ impl str {
21612177
#[stable(feature = "unicode_methods_on_intrinsics", since = "1.27.0")]
21622178
#[inline]
21632179
pub fn is_alphanumeric(&self) -> bool {
2164-
UnicodeStr::is_alphanumeric(self)
2180+
StrExt::is_alphanumeric(self)
21652181
}
21662182

21672183
/// Checks if all characters in this string are within the ASCII range.

src/liballoc/string.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,15 @@
5656
5757
#![stable(feature = "rust1", since = "1.0.0")]
5858

59+
use core::char::{decode_utf16, REPLACEMENT_CHARACTER};
5960
use core::fmt;
6061
use core::hash;
6162
use core::iter::{FromIterator, FusedIterator};
6263
use core::ops::Bound::{Excluded, Included, Unbounded};
6364
use core::ops::{self, Add, AddAssign, Index, IndexMut, RangeBounds};
6465
use core::ptr;
6566
use core::str::pattern::Pattern;
66-
use std_unicode::lossy;
67-
use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};
67+
use core::str::lossy;
6868

6969
use borrow::{Cow, ToOwned};
7070
use str::{self, from_boxed_utf8_unchecked, FromStr, Utf8Error, Chars};

src/liballoc/tests/lib.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,11 @@
2424
#![feature(string_retain)]
2525
#![feature(try_reserve)]
2626
#![feature(unboxed_closures)]
27-
#![feature(unicode)]
2827
#![feature(exact_chunks)]
2928
#![feature(inclusive_range_fields)]
3029

3130
extern crate alloc_system;
32-
extern crate std_unicode;
31+
extern crate core;
3332
extern crate rand;
3433

3534
use std::hash::{Hash, Hasher};

src/liballoc/tests/str.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1204,8 +1204,7 @@ fn test_rev_split_char_iterator_no_trailing() {
12041204

12051205
#[test]
12061206
fn test_utf16_code_units() {
1207-
use std_unicode::str::Utf16Encoder;
1208-
assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(),
1207+
assert_eq!(\u{1F4A9}".encode_utf16().collect::<Vec<u16>>(),
12091208
[0xE9, 0xD83D, 0xDCA9])
12101209
}
12111210

src/liballoc/tests/string.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ fn test_from_utf16() {
132132
let s_as_utf16 = s.encode_utf16().collect::<Vec<u16>>();
133133
let u_as_string = String::from_utf16(&u).unwrap();
134134

135-
assert!(::std_unicode::char::decode_utf16(u.iter().cloned()).all(|r| r.is_ok()));
135+
assert!(::core::char::decode_utf16(u.iter().cloned()).all(|r| r.is_ok()));
136136
assert_eq!(s_as_utf16, u);
137137

138138
assert_eq!(u_as_string, s);

0 commit comments

Comments
 (0)