Skip to content

Commit 8d6b595

Browse files
committed
Speed-up HTML escaping a bit using bit tests
```text $ cargo bench --bench escape Escaping time: [3.2732 µs 3.2803 µs 3.2882 µs] change: [-7.0453% -6.7529% -6.4625%] (p = 0.00 < 0.05) Performance has improved. ```
1 parent f90e561 commit 8d6b595

File tree

1 file changed

+69
-40
lines changed

1 file changed

+69
-40
lines changed

rinja/src/html.rs

+69-40
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,39 @@
1-
use std::fmt;
2-
use std::num::NonZeroU8;
1+
use std::{fmt, str};
32

43
#[allow(unused)]
5-
pub(crate) fn write_escaped_str(mut fmt: impl fmt::Write, string: &str) -> fmt::Result {
6-
let mut escaped_buf = *b"&#__;";
7-
let mut last = 0;
4+
pub(crate) fn write_escaped_str(mut fmt: impl fmt::Write, mut string: &str) -> fmt::Result {
5+
let mut escaped_buf = *b"&#__;\0\0\0";
6+
'outer: while !string.is_empty() {
7+
for (index, byte) in string.bytes().enumerate() {
8+
if !(MIN_CHAR..=MAX_CHAR).contains(&byte) || (BITS & (1 << (byte - MIN_CHAR))) == 0 {
9+
continue;
10+
}
811

9-
for (index, byte) in string.bytes().enumerate() {
10-
let escaped = match byte {
11-
MIN_CHAR..=MAX_CHAR => TABLE.lookup[(byte - MIN_CHAR) as usize],
12-
_ => None,
13-
};
14-
if let Some(escaped) = escaped {
15-
escaped_buf[2] = escaped[0].get();
16-
escaped_buf[3] = escaped[1].get();
17-
fmt.write_str(&string[last..index])?;
18-
fmt.write_str(unsafe { std::str::from_utf8_unchecked(escaped_buf.as_slice()) })?;
19-
last = index + 1;
12+
[escaped_buf[2], escaped_buf[3]] = TABLE.lookup[(byte - MIN_CHAR) as usize];
13+
14+
if index > 0 {
15+
// SAFETY: We know that the current index cannot exceed the string's length,
16+
// and that it is on a char boundary, because `string[i].is_ascii()`.
17+
fmt.write_str(unsafe { string.get_unchecked(..index) })?;
18+
}
19+
20+
// SAFETY: We know that `escaped_buf` contains ASCII data.
21+
fmt.write_str(unsafe { str::from_utf8_unchecked(&escaped_buf[..5]) })?;
22+
23+
if index + 1 == string.len() {
24+
break 'outer;
25+
}
26+
// SAFETY: We just checked that the new length is inside the string's bounds.
27+
string = unsafe { string.get_unchecked(index + 1..) };
28+
29+
continue 'outer;
2030
}
31+
if !string.is_empty() {
32+
fmt.write_str(string)?;
33+
}
34+
break;
2135
}
22-
fmt.write_str(&string[last..])
36+
Ok(())
2337
}
2438

2539
#[allow(unused)]
@@ -34,38 +48,53 @@ pub(crate) fn write_escaped_char(mut fmt: impl fmt::Write, c: char) -> fmt::Resu
3448
})
3549
}
3650

37-
const MIN_CHAR: u8 = b'"';
38-
const MAX_CHAR: u8 = b'>';
51+
const CHARS: &[u8] = b"\"&\'<>";
52+
53+
const MIN_MAX_CHAR: (u8, u8) = {
54+
let mut min = 0xff;
55+
let mut max = 0x00;
56+
let mut i = 0;
57+
while i < CHARS.len() {
58+
if CHARS[i] < min {
59+
min = CHARS[i];
60+
}
61+
if CHARS[i] > max {
62+
max = CHARS[i];
63+
}
64+
i += 1;
65+
}
66+
assert!(max - min < 32);
67+
(min, max)
68+
};
69+
const MIN_CHAR: u8 = MIN_MAX_CHAR.0;
70+
const MAX_CHAR: u8 = MIN_MAX_CHAR.1;
3971

4072
struct Table {
4173
_align: [usize; 0],
42-
lookup: [Option<[NonZeroU8; 2]>; (MAX_CHAR - MIN_CHAR + 1) as usize],
74+
lookup: [[u8; 2]; (MAX_CHAR - MIN_CHAR + 1) as usize],
4375
}
4476

45-
const TABLE: Table = {
46-
const fn n(c: u8) -> Option<[NonZeroU8; 2]> {
47-
assert!(MIN_CHAR <= c && c <= MAX_CHAR);
48-
49-
let n0 = match NonZeroU8::new(c / 10 + b'0') {
50-
Some(n) => n,
51-
None => panic!(),
52-
};
53-
let n1 = match NonZeroU8::new(c % 10 + b'0') {
54-
Some(n) => n,
55-
None => panic!(),
56-
};
57-
Some([n0, n1])
77+
const BITS: u32 = {
78+
let mut bits = 0u32;
79+
let mut i = 0;
80+
while i < CHARS.len() {
81+
bits |= 1 << (CHARS[i] - MIN_CHAR);
82+
i += 1;
5883
}
84+
bits
85+
};
5986

87+
const TABLE: Table = {
6088
let mut table = Table {
6189
_align: [],
62-
lookup: [None; (MAX_CHAR - MIN_CHAR + 1) as usize],
90+
lookup: [[0; 2]; (MAX_CHAR - MIN_CHAR + 1) as usize],
6391
};
64-
65-
table.lookup[(b'"' - MIN_CHAR) as usize] = n(b'"');
66-
table.lookup[(b'&' - MIN_CHAR) as usize] = n(b'&');
67-
table.lookup[(b'\'' - MIN_CHAR) as usize] = n(b'\'');
68-
table.lookup[(b'<' - MIN_CHAR) as usize] = n(b'<');
69-
table.lookup[(b'>' - MIN_CHAR) as usize] = n(b'>');
92+
let mut i = 0;
93+
while i < CHARS.len() {
94+
let h = CHARS[i] / 10 + b'0';
95+
let l = CHARS[i] % 10 + b'0';
96+
table.lookup[(CHARS[i] - MIN_CHAR) as usize] = [h, l];
97+
i += 1;
98+
}
7099
table
71100
};

0 commit comments

Comments
 (0)