Skip to content

Commit 9fb91aa

Browse files
committed
Auto merge of #122059 - nyurik:with-as-const-str, r=cuviper
Optimize write with as_const_str for shorter code Following up on #121001 Apparently this code generates significant code block for each call to `write()` with non-simple formatting string - approx 100 lines of assembly code, possibly due to `dyn` (?). See generated assembly code [here](https://github.com/nyurik/rust-optimize-format-str/compare/before-changes..with-my-change#diff-6b404e954c692d8cdc8c452d819a216aa5dcf40522b5944639e9ad947279a477): <details><summary>Details</summary> <p> This is the inlining of `write!(buffer, "Iteration {value} was written")` ```asm core::fmt::Write::write_fmt: // /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 194 fn write_fmt(&mut self, args: Arguments<'_>) -> Result { push r15 push r14 push r13 push r12 push rbx mov rdx, rsi // /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 427 match (self.pieces, self.args) { mov rcx, qword ptr [rsi + 8] mov rax, qword ptr [rsi + 24] // /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 428 ([], []) => Some(""), cmp rcx, 1 je .LBB0_8 test rcx, rcx jne .LBB0_9 test rax, rax jne .LBB0_9 // /home/nyurik/dev/rust/rust/library/alloc/src/vec/mod.rs : 911 self.buf.reserve(self.len, additional); lea r12, [rdi + 16] lea rsi, [rip + .L__unnamed_2] xor ebx, ebx .LBB0_6: mov r14, qword ptr [r12] jmp .LBB0_7 .LBB0_8: // /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 429 ([s], []) => Some(s), test rax, rax je .LBB0_4 .LBB0_9: // /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 1108 if let Some(s) = args.as_str() { output.write_str(s) } else { write_internal(output, args) } lea rsi, [rip + .L__unnamed_1] pop rbx pop r12 pop r13 pop r14 pop r15 jmp qword ptr [rip + core::fmt::write_internal@GOTPCREL] .LBB0_4: mov rax, qword ptr [rdx] // /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 429 ([s], []) => Some(s), mov rsi, qword ptr [rax] mov rbx, qword ptr [rax + 8] // /home/nyurik/dev/rust/rust/library/alloc/src/raw_vec.rs : 248 if T::IS_ZST { usize::MAX } else { self.cap.0 } mov rax, qword ptr [rdi] // /home/nyurik/dev/rust/rust/library/alloc/src/vec/mod.rs : 911 self.buf.reserve(self.len, additional); mov r14, qword ptr [rdi + 16] // /home/nyurik/dev/rust/rust/library/core/src/num/mod.rs : 1281 uint_impl! { sub rax, r14 // /home/nyurik/dev/rust/rust/library/alloc/src/raw_vec.rs : 392 additional > self.capacity().wrapping_sub(len) cmp rax, rbx // /home/nyurik/dev/rust/rust/library/alloc/src/raw_vec.rs : 309 if self.needs_to_grow(len, additional) { jb .LBB0_5 .LBB0_7: mov rax, qword ptr [rdi + 8] // /home/nyurik/dev/rust/rust/library/core/src/ptr/mut_ptr.rs : 1046 unsafe { intrinsics::offset(self, count) } add rax, r14 mov r15, rdi // /home/nyurik/dev/rust/rust/library/core/src/intrinsics.rs : 2922 copy_nonoverlapping(src, dst, count) mov rdi, rax mov rdx, rbx call qword ptr [rip + memcpy@GOTPCREL] // /home/nyurik/dev/rust/rust/library/alloc/src/vec/mod.rs : 2040 self.len += count; add r14, rbx mov qword ptr [r15 + 16], r14 // /home/nyurik/dev/rust/rust/library/core/src/fmt/mod.rs : 216 } xor eax, eax pop rbx pop r12 pop r13 pop r14 pop r15 ret .LBB0_5: // /home/nyurik/dev/rust/rust/library/alloc/src/vec/mod.rs : 911 self.buf.reserve(self.len, additional); lea r12, [rdi + 16] mov r15, rdi mov r13, rsi // /home/nyurik/dev/rust/rust/library/alloc/src/raw_vec.rs : 310 do_reserve_and_handle(self, len, additional); mov rsi, r14 mov rdx, rbx call alloc::raw_vec::RawVec<T,A>::reserve::do_reserve_and_handle mov rsi, r13 mov rdi, r15 jmp .LBB0_6 ``` </p> </details> ```rust #[inline] pub fn write(output: &mut dyn Write, args: Arguments<'_>) -> Result { if let Some(s) = args.as_str() { output.write_str(s) } else { write_internal(output, args) } } ``` So, this brings back the older experiment - where I used `if core::intrinsics::is_val_statically_known(s.is_some()) { s } else { None }` helper function, and called it in multiple places that used `write`. This is not as optimal because now every user of `write` must do this logic, but at least it results in significantly smaller assembly code for the formatting case, and results in identical code as now for the "simple" (no formatting) case. See [assembly comparison](https://github.com/nyurik/rust-optimize-format-str/compare/with-my-change..with-as-const-str#diff-6b404e954c692d8cdc8c452d819a216aa5dcf40522b5944639e9ad947279a477) of what is now with what this change brings (focus only on `fmt/intel-lib.txt` and `str/intel-lib.txt` files). ```rust if let Some(s) = args.as_const_str() { self.write_str(s) } else { write(self, args) } ```
2 parents 79d2461 + 3d0d0ce commit 9fb91aa

File tree

1 file changed

+26
-10
lines changed

1 file changed

+26
-10
lines changed

library/core/src/fmt/mod.rs

+26-10
Original file line numberDiff line numberDiff line change
@@ -201,14 +201,22 @@ pub trait Write {
201201
impl<W: Write + ?Sized> SpecWriteFmt for &mut W {
202202
#[inline]
203203
default fn spec_write_fmt(mut self, args: Arguments<'_>) -> Result {
204-
write(&mut self, args)
204+
if let Some(s) = args.as_const_str() {
205+
self.write_str(s)
206+
} else {
207+
write(&mut self, args)
208+
}
205209
}
206210
}
207211

208212
impl<W: Write> SpecWriteFmt for &mut W {
209213
#[inline]
210214
fn spec_write_fmt(self, args: Arguments<'_>) -> Result {
211-
write(self, args)
215+
if let Some(s) = args.as_const_str() {
216+
self.write_str(s)
217+
} else {
218+
write(self, args)
219+
}
212220
}
213221
}
214222

@@ -430,6 +438,14 @@ impl<'a> Arguments<'a> {
430438
_ => None,
431439
}
432440
}
441+
442+
/// Same as [`Arguments::as_str`], but will only return `Some(s)` if it can be determined at compile time.
443+
#[must_use]
444+
#[inline]
445+
fn as_const_str(&self) -> Option<&'static str> {
446+
let s = self.as_str();
447+
if core::intrinsics::is_val_statically_known(s.is_some()) { s } else { None }
448+
}
433449
}
434450

435451
#[stable(feature = "rust1", since = "1.0.0")]
@@ -1119,14 +1135,8 @@ pub trait UpperExp {
11191135
/// ```
11201136
///
11211137
/// [`write!`]: crate::write!
1122-
#[inline]
11231138
#[stable(feature = "rust1", since = "1.0.0")]
11241139
pub fn write(output: &mut dyn Write, args: Arguments<'_>) -> Result {
1125-
if let Some(s) = args.as_str() { output.write_str(s) } else { write_internal(output, args) }
1126-
}
1127-
1128-
/// Actual implementation of the [`write()`], but without the simple string optimization.
1129-
fn write_internal(output: &mut dyn Write, args: Arguments<'_>) -> Result {
11301140
let mut formatter = Formatter::new(output);
11311141
let mut idx = 0;
11321142

@@ -1605,8 +1615,9 @@ impl<'a> Formatter<'a> {
16051615
/// assert_eq!(format!("{:0>8}", Foo(2)), "Foo 2");
16061616
/// ```
16071617
#[stable(feature = "rust1", since = "1.0.0")]
1618+
#[inline]
16081619
pub fn write_fmt(&mut self, fmt: Arguments<'_>) -> Result {
1609-
write(self.buf, fmt)
1620+
if let Some(s) = fmt.as_const_str() { self.buf.write_str(s) } else { write(self.buf, fmt) }
16101621
}
16111622

16121623
/// Flags for formatting
@@ -2295,8 +2306,13 @@ impl Write for Formatter<'_> {
22952306
self.buf.write_char(c)
22962307
}
22972308

2309+
#[inline]
22982310
fn write_fmt(&mut self, args: Arguments<'_>) -> Result {
2299-
write(self.buf, args)
2311+
if let Some(s) = args.as_const_str() {
2312+
self.buf.write_str(s)
2313+
} else {
2314+
write(self.buf, args)
2315+
}
23002316
}
23012317
}
23022318

0 commit comments

Comments
 (0)