uucore: format: Small optimizations in num_format for seq

drinkcat · drinkcat · commit e4bf65785110 · 2025-03-18T18:10:31.000+01:00
In most common use cases:
 - We can bypass a lot of `write_output` when width == 0.
 - Simplify format_float_decimal when the input is an integer.

Also document interesting cases in src/uu/seq/BENCHMARKING.md.
diff --git a/src/uu/seq/BENCHMARKING.md b/src/uu/seq/BENCHMARKING.md
@@ -19,7 +19,38 @@ Finally, you can compare the performance of the two versions of `seq`
 by running, for example,
 
 ```shell
-hyperfine "seq 1000000" "target/release/seq 1000000"
+hyperfine -L seq seq,target/release/seq "{seq} 1000000"
+```
+
+## Interesting test cases
+
+Performance characteristics may vary a lot depending on the parameters,
+and if custom formatting is required. In particular, it does appear
+that the GNU implementation is heavily optimized for positive integer
+outputs (which is probably the most common use case for `seq`).
+
+Specifying a format or fixed width will slow down the
+execution a lot (~15-20 times on GNU `seq`):
+```shell
+hyperfine -L seq seq,target/release/seq "{seq} -f%g 1000000"
+hyperfine -L seq seq,target/release/seq "{seq} -w 1000000"
+```
+
+Floating point increments, or any negative bound, also degrades the
+performance (~10-15 times on GNU `seq`):
+```shell
+hyperfine -L seq seq,./target/release/seq "{seq} 0 0.000001 1"
+hyperfine -L seq seq,./target/release/seq "{seq} -100 1 1000000"
+```
+
+It is also interesting to compare performance with large precision
+format. But in this case, the output itself should also be compared,
+as GNU `seq` may not provide the same precision (`uutils` version of
+`seq` provides arbitrary precision, while GNU `seq` appears to be
+limited to `long double` on the given platform, i.e. 64/80/128-bit
+float):
+```shell
+hyperfine -L seq seq,target/release/seq "{seq} -f%.30f 0 0.000001 1"
 ```
 
 [0]: https://github.com/sharkdp/hyperfine
diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs
@@ -354,11 +354,16 @@ fn format_float_non_finite(e: &ExtendedBigDecimal, case: Case) -> String {
 
 fn format_float_decimal(bd: &BigDecimal, precision: usize, force_decimal: ForceDecimal) -> String {
     debug_assert!(!bd.is_negative());
-    if precision == 0 && force_decimal == ForceDecimal::Yes {
-        format!("{bd:.0}.")
-    } else {
-        format!("{bd:.precision$}")
+    if precision == 0 {
+        let (bi, scale) = bd.as_bigint_and_scale();
+        if scale == 0 && force_decimal != ForceDecimal::Yes {
+            // Optimization when printing integers.
+            return bi.to_str_radix(10);
+        } else if force_decimal == ForceDecimal::Yes {
+            return format!("{bd:.0}.");
+        }
     }
+    format!("{bd:.precision$}")
 }
 
 fn format_float_scientific(
@@ -614,6 +619,11 @@ fn write_output(
     width: usize,
     alignment: NumberAlignment,
 ) -> std::io::Result<()> {
+    if width == 0 {
+        writer.write_all(sign_indicator.as_bytes())?;
+        writer.write_all(s.as_bytes())?;
+        return Ok(());
+    }
     // Take length of `sign_indicator`, which could be 0 or 1, into consideration when padding
     // by storing remaining_width indicating the actual width needed.
     // Using min() because self.width could be 0, 0usize - 1usize should be avoided