Skip to content

Commit 62b834f

Browse files
Rollup merge of rust-lang#84751 - Soveu:is_char_boundary_opt, r=Amanieu
str::is_char_boundary - slight optimization Current `str::is_char_boundary` implementation emits slightly more instructions, because it includes an additional branch for `index == s.len()` ```rust pub fn is_char_boundary(s: &str, index: usize) -> bool { if index == 0 || index == s.len() { return true; } match s.as_bytes().get(index) { None => false, Some(&b) => (b as i8) >= -0x40, } } ``` Just changing the place of `index == s.len()` merges it with `index < s.len()` from `s.as_bytes().get(index)` ```rust pub fn is_char_boundary2(s: &str, index: usize) -> bool { if index == 0 { return true; } match s.as_bytes().get(index) { // For some reason, LLVM likes this comparison here more None => index == s.len(), // This is bit magic equivalent to: b < 128 || b >= 192 Some(&b) => (b as i8) >= -0x40, } } ``` This one has better codegen on every platform, except powerpc <details><summary>x86 codegen</summary> <p> ```nasm example::is_char_boundary: mov al, 1 test rdx, rdx je .LBB0_5 cmp rsi, rdx je .LBB0_5 cmp rsi, rdx jbe .LBB0_3 cmp byte ptr [rdi + rdx], -65 setg al .LBB0_5: ret .LBB0_3: xor eax, eax ret example::is_char_boundary2: test rdx, rdx je .LBB1_1 cmp rsi, rdx jbe .LBB1_4 cmp byte ptr [rdi + rdx], -65 setg al ret .LBB1_1: ; technically this branch is the same as LBB1_4 mov al, 1 ret .LBB1_4: sete al ret ``` </p> </details> <details><summary>aarch64 codegen</summary> <p> ```as example::is_char_boundary: mov x8, x0 mov w0, #1 cbz x2, .LBB0_4 cmp x1, x2 b.eq .LBB0_4 b.ls .LBB0_5 ldrsb w8, [x8, x2] cmn w8, rust-lang#65 cset w0, gt .LBB0_4: ret .LBB0_5: mov w0, wzr ret example::is_char_boundary2: cbz x2, .LBB1_3 cmp x1, x2 b.ls .LBB1_4 ldrsb w8, [x0, x2] cmn w8, rust-lang#65 cset w0, gt ret .LBB1_3: mov w0, #1 ret .LBB1_4: cset w0, eq ret ``` </p> </details> <details><summary>riscv64gc codegen</summary> <p> example::is_char_boundary: seqz a3, a2 xor a4, a1, a2 seqz a4, a4 or a4, a4, a3 addi a3, zero, 1 bnez a4, .LBB0_3 bgeu a2, a1, .LBB0_4 add a0, a0, a2 lb a0, 0(a0) addi a1, zero, -65 slt a3, a1, a0 .LBB0_3: mv a0, a3 ret .LBB0_4: mv a0, zero ret example::is_char_boundary2: beqz a2, .LBB1_3 bgeu a2, a1, .LBB1_4 add a0, a0, a2 lb a0, 0(a0) addi a1, zero, -65 slt a0, a1, a0 ret .LBB1_3: addi a0, zero, 1 ret .LBB1_4: xor a0, a1, a2 seqz a0, a0 ret </p> </details> [Link to godbolt](https://godbolt.org/z/K8avEz8Gr) `@rustbot` label: A-codegen
2 parents 2a245f4 + 7bd9d9f commit 62b834f

File tree

1 file changed

+15
-3
lines changed

1 file changed

+15
-3
lines changed

library/core/src/str/mod.rs

+15-3
Original file line numberDiff line numberDiff line change
@@ -192,14 +192,26 @@ impl str {
192192
#[stable(feature = "is_char_boundary", since = "1.9.0")]
193193
#[inline]
194194
pub fn is_char_boundary(&self, index: usize) -> bool {
195-
// 0 and len are always ok.
195+
// 0 is always ok.
196196
// Test for 0 explicitly so that it can optimize out the check
197197
// easily and skip reading string data for that case.
198-
if index == 0 || index == self.len() {
198+
// Note that optimizing `self.get(..index)` relies on this.
199+
if index == 0 {
199200
return true;
200201
}
202+
201203
match self.as_bytes().get(index) {
202-
None => false,
204+
// For `None` we have two options:
205+
//
206+
// - index == self.len()
207+
// Empty strings are valid, so return true
208+
// - index > self.len()
209+
// In this case return false
210+
//
211+
// The check is placed exactly here, because it improves generated
212+
// code on higher opt-levels. See PR #84751 for more details.
213+
None => index == self.len(),
214+
203215
// This is bit magic equivalent to: b < 128 || b >= 192
204216
Some(&b) => (b as i8) >= -0x40,
205217
}

0 commit comments

Comments
 (0)