Skip to content

Commit 0b098f1

Browse files
Merge pull request #351 from Kijewski/pr-ascii_str
Replace `from_utf8_unsafe()` with `AsciiStr`
2 parents c3fac0c + 6ae689c commit 0b098f1

File tree

14 files changed

+471
-245
lines changed

14 files changed

+471
-245
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ members = [
66
"testing",
77
"testing-alloc",
88
"testing-no-std",
9-
"testing-renamed"
9+
"testing-renamed",
1010
]
1111
resolver = "2"

_typos.toml

+4
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,13 @@ extend-exclude = [
1717
]
1818

1919
[default.extend-words]
20+
# It's actually called that in the ASCII standard
21+
Enquiry = "Enquiry"
22+
2023
# French words
2124
exemple = "exemple"
2225
existant = "existant"
26+
2327
# used in tests
2428
Ba = "Ba"
2529
fo = "fo"

fuzzing/fuzz/src/ascii_str.rs

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../rinja/src/ascii_str.rs

fuzzing/fuzz/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#![deny(unreachable_pub)]
44

55
pub mod all;
6+
mod ascii_str;
67
pub mod filters;
78
pub mod html;
89
pub mod parser;

rinja/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ blocks = ["rinja_derive?/blocks"]
5757
code-in-doc = ["rinja_derive?/code-in-doc"]
5858
config = ["rinja_derive?/config"]
5959
derive = ["rinja_derive"]
60-
serde_json = ["rinja_derive?/serde_json", "dep:serde", "dep:serde_json"]
60+
serde_json = ["std", "rinja_derive?/serde_json", "dep:serde", "dep:serde_json"]
6161
std = [
6262
"alloc",
6363
"rinja_derive?/std",

rinja/src/ascii_str.rs

+144
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
// FIXME: Replace `AsciiChar` with `[core:ascii::Char]` once [#110998] is stable
2+
// [#110998]: https://github.com/rust-lang/rust/issues/110998
3+
4+
#![allow(unreachable_pub)]
5+
6+
use core::ops::{Deref, Index, IndexMut};
7+
8+
pub use _ascii_char::AsciiChar;
9+
10+
/// A string that only contains ASCII characters, same layout as [`str`].
11+
#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
12+
#[repr(transparent)]
13+
pub struct AsciiStr([AsciiChar]);
14+
15+
impl AsciiStr {
16+
#[inline]
17+
pub const fn new_sized<const N: usize>(src: &str) -> [AsciiChar; N] {
18+
if !src.is_ascii() || src.len() > N {
19+
panic!();
20+
}
21+
22+
let src = src.as_bytes();
23+
let mut result = [AsciiChar::NULL; N];
24+
let mut i = 0;
25+
while i < src.len() {
26+
result[i] = AsciiChar::new(src[i]);
27+
i += 1;
28+
}
29+
result
30+
}
31+
32+
#[inline]
33+
pub const fn from_slice(src: &[AsciiChar]) -> &Self {
34+
// SAFETY: `Self` is transparent over `[AsciiChar]`.
35+
unsafe { core::mem::transmute::<&[AsciiChar], &AsciiStr>(src) }
36+
}
37+
38+
#[inline]
39+
pub const fn as_str(&self) -> &str {
40+
// SAFETY: `Self` has the same layout as `str`,
41+
// and all ASCII characters are valid UTF-8 characters.
42+
unsafe { core::mem::transmute::<&AsciiStr, &str>(self) }
43+
}
44+
45+
#[inline]
46+
pub const fn len(&self) -> usize {
47+
self.0.len()
48+
}
49+
50+
#[inline]
51+
pub const fn is_empty(&self) -> bool {
52+
self.0.is_empty()
53+
}
54+
}
55+
56+
// Must not implement `DerefMut`. Not every `char` is an ASCII character.
57+
impl Deref for AsciiStr {
58+
type Target = str;
59+
60+
#[inline]
61+
fn deref(&self) -> &Self::Target {
62+
self.as_str()
63+
}
64+
}
65+
66+
impl<Idx> Index<Idx> for AsciiStr
67+
where
68+
[AsciiChar]: Index<Idx, Output = [AsciiChar]>,
69+
{
70+
type Output = [AsciiChar];
71+
72+
#[inline]
73+
fn index(&self, index: Idx) -> &Self::Output {
74+
&self.0[index]
75+
}
76+
}
77+
78+
impl<Idx> IndexMut<Idx> for AsciiStr
79+
where
80+
[AsciiChar]: IndexMut<Idx, Output = [AsciiChar]>,
81+
{
82+
#[inline]
83+
fn index_mut(&mut self, index: Idx) -> &mut Self::Output {
84+
&mut self.0[index]
85+
}
86+
}
87+
88+
impl Default for &'static AsciiStr {
89+
#[inline]
90+
fn default() -> Self {
91+
// SAFETY: `Self` has the same layout as `str`.
92+
unsafe { core::mem::transmute::<&str, &AsciiStr>("") }
93+
}
94+
}
95+
96+
impl AsciiChar {
97+
pub const NULL: AsciiChar = AsciiChar::new(0);
98+
99+
#[inline]
100+
pub const fn slice_as_bytes<const N: usize>(src: &[AsciiChar; N]) -> &[u8; N] {
101+
// SAFETY: `[AsciiChar]` has the same layout as `[u8]`.
102+
unsafe { core::mem::transmute::<&[AsciiChar; N], &[u8; N]>(src) }
103+
}
104+
105+
#[inline]
106+
pub const fn two_digits(d: u32) -> [Self; 2] {
107+
const ALPHABET: &[u8; 10] = b"0123456789";
108+
109+
if d >= ALPHABET.len().pow(2) as u32 {
110+
panic!();
111+
}
112+
[
113+
Self::new(ALPHABET[d as usize / ALPHABET.len()]),
114+
Self::new(ALPHABET[d as usize % ALPHABET.len()]),
115+
]
116+
}
117+
118+
#[inline]
119+
pub const fn two_hex_digits(d: u32) -> [Self; 2] {
120+
const ALPHABET: &[u8; 16] = b"0123456789abcdef";
121+
122+
if d >= ALPHABET.len().pow(2) as u32 {
123+
panic!();
124+
}
125+
[
126+
Self::new(ALPHABET[d as usize / ALPHABET.len()]),
127+
Self::new(ALPHABET[d as usize % ALPHABET.len()]),
128+
]
129+
}
130+
}
131+
132+
mod _ascii_char {
133+
/// A character that is known to be in ASCII range, same layout as [`u8`].
134+
#[derive(Debug, Clone, Copy, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
135+
#[repr(transparent)]
136+
pub struct AsciiChar(u8);
137+
138+
impl AsciiChar {
139+
#[inline]
140+
pub const fn new(c: u8) -> Self {
141+
if c.is_ascii() { Self(c) } else { panic!() }
142+
}
143+
}
144+
}

rinja/src/filters/humansize.rs

+26-25
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
use core::convert::Infallible;
22
use core::fmt;
33
use core::mem::MaybeUninit;
4-
use core::str::from_utf8_unchecked;
54

65
use super::FastWritable;
6+
use crate::ascii_str::{AsciiChar, AsciiStr};
77

88
/// Returns adequate string representation (in KB, ..) of number of bytes
99
///
@@ -58,26 +58,27 @@ impl FastWritable for FilesizeFormatFilter {
5858
}
5959

6060
/// Formats `buffer` to contain the decimal point, decimal places and unit
61-
fn format_frac(buffer: &mut MaybeUninit<[u8; 8]>, prefix: u8, scaled: u32) -> &str {
62-
// LLVM generates better byte code for register sized buffers, so we add some NULs
63-
let buffer = buffer.write(*b"..0 kB\0\0");
61+
fn format_frac(buffer: &mut MaybeUninit<[AsciiChar; 8]>, prefix: AsciiChar, scaled: u32) -> &str {
62+
// LLVM generates better byte code for register sized buffers
63+
let buffer = buffer.write(AsciiStr::new_sized("..0 kB"));
6464
buffer[4] = prefix;
6565

6666
let frac = scaled % 100;
6767
let buffer = if frac == 0 {
6868
&buffer[3..6]
69-
} else if frac % 10 == 0 {
70-
// the decimal separator '.' is already contained in buffer[1]
71-
buffer[2] = b'0' + (frac / 10) as u8;
72-
&buffer[1..6]
7369
} else {
74-
// the decimal separator '.' is already contained in buffer[0]
75-
buffer[1] = b'0' + (frac / 10) as u8;
76-
buffer[2] = b'0' + (frac % 10) as u8;
77-
&buffer[0..6]
70+
let digits = AsciiChar::two_digits(frac);
71+
if digits[1] == AsciiChar::new(b'0') {
72+
// the decimal separator '.' is already contained in buffer[1]
73+
buffer[2] = digits[0];
74+
&buffer[1..6]
75+
} else {
76+
// the decimal separator '.' is already contained in buffer[0]
77+
[buffer[1], buffer[2]] = digits;
78+
&buffer[0..6]
79+
}
7880
};
79-
// SAFETY: we know that the buffer contains only ASCII data
80-
unsafe { from_utf8_unchecked(buffer) }
81+
AsciiStr::from_slice(buffer).as_str()
8182
}
8283

8384
#[cold]
@@ -87,17 +88,17 @@ fn too_big<W: fmt::Write + ?Sized>(value: f32, dest: &mut W) -> crate::Result<()
8788
}
8889

8990
/// `((si_prefix, factor), limit)`, the factor is offset by 10**2 to account for 2 decimal places
90-
const SI_PREFIXES: &[((u8, f32), f32)] = &[
91-
((b'k', 1e-1), 1e6),
92-
((b'M', 1e-4), 1e9),
93-
((b'G', 1e-7), 1e12),
94-
((b'T', 1e-10), 1e15),
95-
((b'P', 1e-13), 1e18),
96-
((b'E', 1e-16), 1e21),
97-
((b'Z', 1e-19), 1e24),
98-
((b'Y', 1e-22), 1e27),
99-
((b'R', 1e-25), 1e30),
100-
((b'Q', 1e-28), 1e33),
91+
const SI_PREFIXES: &[((AsciiChar, f32), f32)] = &[
92+
((AsciiChar::new(b'k'), 1e-1), 1e6),
93+
((AsciiChar::new(b'M'), 1e-4), 1e9),
94+
((AsciiChar::new(b'G'), 1e-7), 1e12),
95+
((AsciiChar::new(b'T'), 1e-10), 1e15),
96+
((AsciiChar::new(b'P'), 1e-13), 1e18),
97+
((AsciiChar::new(b'E'), 1e-16), 1e21),
98+
((AsciiChar::new(b'Z'), 1e-19), 1e24),
99+
((AsciiChar::new(b'Y'), 1e-22), 1e27),
100+
((AsciiChar::new(b'R'), 1e-25), 1e30),
101+
((AsciiChar::new(b'Q'), 1e-28), 1e33),
101102
];
102103

103104
#[test]

0 commit comments

Comments
 (0)