v2.4 --scale --stdev

dkechag · dkechag · commit a39ce01c3f2e · 2023-09-29T19:40:22.000+01:00
diff --git a/Changes b/Changes
@@ -1,5 +1,8 @@
 Revision history for Benchmark-DKbench
 
+2.4     2023-09-29
+        - Added --scale and --stdev options.
+
 2.3     2023-09-25
         - Show Perl threads/multi config.
         - Minimum module version for passing benchmarks.
diff --git a/README.md b/README.md
@@ -99,11 +99,13 @@ options to control number of threads, iterations, which benchmarks to run etc:
     --multi,       -m     : Multi-threaded using all your CPU cores/threads.
     --max_threads <i>     : Override the cpu detection to specify max cpu threads.
     --iter <i>,    -i <i> : Number of suite iterations (with min/max/avg at the end).
+    --stdev               : Show relative standard deviation (for iter > 1).
     --include <regex>     : Run only benchmarks that match regex.
     --exclude <regex>     : Do not run benchmarks that match regex.
     --time,        -t     : Report time (sec) instead of score.
     --quick,       -q     : Quick benchmark run (implies -t).
     --no_mce              : Do not run under MCE::Loop (implies -j 1).
+    --scale <i>,   -s <i> : Scale the bench workload by x times.
     --skip_bio            : Skip BioPerl benchmarks.
     --skip_prove          : Skip Moose prove benchmark.
     --time_piece          : Run optional Time::Piece benchmark (see benchmark details).
@@ -121,18 +123,26 @@ multi vs single threaded scalability.
 The scores are calibrated such that a reference CPU (Intel Xeon Platinum 8481C -
 Sapphire Rapids) would achieve a score of 1000 in a single-core benchmark run using
 the default software configuration (Linux/Perl 5.36.0 built with multiplicity and
-threads, with reference CPAN module versions).
+threads, with reference CPAN module versions). Perl built without thread support and
+multi(plicity) will be a bit faster (usually in the order of ~3-4%), while older Perl
+versions will most likely be slower. Different CPAN module versions will also impact
+scores, using `setup_dkbench` is a way to ensure a reference environment for more
+meaningful hardware comparisons.
 
-The multi-thread scalability should approach 100% if each thread runs on a full core
-(i.e. no SMT), and the core can maintain the clock speed it had on the single-thread
-runs. Note that the overall scalability is an average of the benchmarks that drops
-non-scaling outliers (over 2\*stdev less than the mean).
+The multi-thread scalability calculated by the suite should approach 100% if each
+thread runs on a full core (i.e. no SMT), and the core can maintain the clock speed
+it had on the single-thread runs. Note that the overall scalability is an average
+of the benchmarks that drops non-scaling outliers (over 2\*stdev less than the mean).
+
+If you want to reduce the effects of thermal throttling, which will lower the speed
+of (mainly multi-threaded) benchmarks as the CPU temperature increases, the `sleep`
+option can help by adding cooldown time between each benchmark.
 
 The suite will report a Pass/Fail per benchmark. A failure may be caused if you have
 different CPAN module version installed - this is normal, and you will be warned.
 
-The suite uses [MCE::Loop](https://metacpan.org/pod/MCE%3A%3ALoop) to run on the desired number of parallel threads, although
-there is an option to disable it, which forces a single-thread run.
+[MCE::Loop](https://metacpan.org/pod/MCE%3A%3ALoop) is used to run on the desired number of parallel threads, with minimal
+overhead., There is an option to disable it, which forces a single-thread run.
 
 ## `setup_dkbench`
 
@@ -238,6 +248,9 @@ Prints out software/hardware configuration and returns then number of cores dete
 Runs the benchmark suite given the `%options` and prints results. Returns a hash
 with run stats.
 
+The options accepted are the same as the `dkbench` script (in their long form),
+except `help`, `setup` and `max_threads` which are command-line only.
+
 ## `calc_scalability`
 
     calc_scalability(\%options, \%stat_single, \%stat_multi);
@@ -263,7 +276,7 @@ actual workload.
 ## SCORES
 
 Some sample DKbench score results from various systems for comparison (all on
-reference setup with Perl 5.36.0):
+reference setup with Perl 5.36.0 thread-multi):
 
     CPU                                     Cores/HT   Single   Multi   Scalability
     Intel i7-4750HQ @ 2.0 (MacOS)                4/8     612     2332      46.9%
diff --git a/dkbench b/dkbench
@@ -21,11 +21,13 @@ See POD on the main module for info:
  --multi,       -m     : Multi-threaded using all your CPU cores/threads.
  --max_threads <i>     : Override the cpu detection to specify max cpu threads.
  --iter <i>,    -i <i> : Number of suite iterations (with min/max/avg at the end).
+ --stdev               : Show relative standard deviation (for iter > 1).
  --include <regex>     : Run only benchmarks that match regex.
  --exclude <regex>     : Do not run benchmarks that match regex.
  --time,        -t     : Report time (sec) instead of score.
  --quick,       -q     : Quick benchmark run (implies -t).
  --no_mce              : Do not run under MCE::Loop (implies -j 1).
+ --scale <i>,   -s <i> : Scale the bench workload by x times.
  --skip_bio            : Skip BioPerl benchmarks.
  --skip_prove          : Skip Moose prove benchmark.
  --time_piece          : Run optional Time::Piece benchmark (see benchmark details).
@@ -70,9 +72,10 @@ GetOptions (
     'max_threads=i',
     'include=s',
     'exclude=s',
-    'repeat|r=i',
+    'scale|s=i',
     'no_mce|n',
     'sleep=i',
+    'stdev',
     'ver=s',
     'setup',
     'datapath=s',
@@ -81,9 +84,9 @@ GetOptions (
 
 pod2usage({ -verbose => 1, -output => \*STDOUT, -noperldoc => 1}) if $opt{help};
 
-$opt{iter} ||= 1;
-$opt{repeat} ||= 1;
-$opt{time} = 1 if $opt{quick} || $opt{repeat} > 1;
+$opt{iter}  ||= 1;
+$opt{scale} ||= 1;
+$opt{time} = 1 if $opt{quick};
 
 Benchmark::DKbench::Setup::fetch_genbank($opt{datapath}) if $opt{setup};
 
diff --git a/lib/Benchmark/DKbench.pm b/lib/Benchmark/DKbench.pm
@@ -40,7 +40,7 @@ our @EXPORT    = qw(system_identity suite_run calc_scalability);
 our $datadir   = dist_dir("Benchmark-DKbench");
 my $mono_clock = $^O !~ /win/i || $Time::HiRes::VERSION >= 1.9764;
 
-our $VERSION = '2.3';
+our $VERSION = '2.4';
 
 =head1 NAME
 
@@ -143,11 +143,13 @@ options to control number of threads, iterations, which benchmarks to run etc:
  --multi,       -m     : Multi-threaded using all your CPU cores/threads.
  --max_threads <i>     : Override the cpu detection to specify max cpu threads.
  --iter <i>,    -i <i> : Number of suite iterations (with min/max/avg at the end).
+ --stdev               : Show relative standard deviation (for iter > 1).
  --include <regex>     : Run only benchmarks that match regex.
  --exclude <regex>     : Do not run benchmarks that match regex.
  --time,        -t     : Report time (sec) instead of score.
  --quick,       -q     : Quick benchmark run (implies -t).
  --no_mce              : Do not run under MCE::Loop (implies -j 1).
+ --scale <i>,   -s <i> : Scale the bench workload by x times.
  --skip_bio            : Skip BioPerl benchmarks.
  --skip_prove          : Skip Moose prove benchmark.
  --time_piece          : Run optional Time::Piece benchmark (see benchmark details).
@@ -165,18 +167,26 @@ multi vs single threaded scalability.
 The scores are calibrated such that a reference CPU (Intel Xeon Platinum 8481C -
 Sapphire Rapids) would achieve a score of 1000 in a single-core benchmark run using
 the default software configuration (Linux/Perl 5.36.0 built with multiplicity and
-threads, with reference CPAN module versions).
+threads, with reference CPAN module versions). Perl built without thread support and
+multi(plicity) will be a bit faster (usually in the order of ~3-4%), while older Perl
+versions will most likely be slower. Different CPAN module versions will also impact
+scores, using C<setup_dkbench> is a way to ensure a reference environment for more
+meaningful hardware comparisons.
 
-The multi-thread scalability should approach 100% if each thread runs on a full core
-(i.e. no SMT), and the core can maintain the clock speed it had on the single-thread
-runs. Note that the overall scalability is an average of the benchmarks that drops
-non-scaling outliers (over 2*stdev less than the mean).
+The multi-thread scalability calculated by the suite should approach 100% if each
+thread runs on a full core (i.e. no SMT), and the core can maintain the clock speed
+it had on the single-thread runs. Note that the overall scalability is an average
+of the benchmarks that drops non-scaling outliers (over 2*stdev less than the mean).
+
+If you want to reduce the effects of thermal throttling, which will lower the speed
+of (mainly multi-threaded) benchmarks as the CPU temperature increases, the C<sleep>
+option can help by adding cooldown time between each benchmark.
 
 The suite will report a Pass/Fail per benchmark. A failure may be caused if you have
 different CPAN module version installed - this is normal, and you will be warned.
 
-The suite uses L<MCE::Loop> to run on the desired number of parallel threads, although
-there is an option to disable it, which forces a single-thread run.
+L<MCE::Loop> is used to run on the desired number of parallel threads, with minimal
+overhead., There is an option to disable it, which forces a single-thread run.
 
 =head2 C<setup_dkbench>
 
@@ -306,6 +316,9 @@ Prints out software/hardware configuration and returns then number of cores dete
 Runs the benchmark suite given the C<%options> and prints results. Returns a hash
 with run stats.
 
+The options accepted are the same as the C<dkbench> script (in their long form),
+except C<help>, C<setup> and C<max_threads> which are command-line only.
+
 =head2 C<calc_scalability>
 
  calc_scalability(\%options, \%stat_single, \%stat_multi);
@@ -331,7 +344,7 @@ actual workload.
 =head2 SCORES
 
 Some sample DKbench score results from various systems for comparison (all on
-reference setup with Perl 5.36.0):
+reference setup with Perl 5.36.0 thread-multi):
 
  CPU                                     Cores/HT   Single   Multi   Scalability
  Intel i7-4750HQ @ 2.0 (MacOS)                4/8     612     2332      46.9%
@@ -423,7 +436,7 @@ sub suite_run {
     my $opt = shift;
     $datadir = $opt->{datapath} if $opt->{datapath};
     $opt->{threads} //= 1;
-    $opt->{repeat} //= 1;
+    $opt->{scale} //= 1;
     $opt->{f} = $opt->{time} ? '%.3f' : '%5.0f';
     my %stats = (threads => $opt->{threads});
 
@@ -529,15 +542,15 @@ sub mce_bench_run {
             MCE->gather([$time, $res]);
         }
     }
-    (1 .. $opt->{threads} * $opt->{repeat});
+    (1 .. $opt->{threads} * $opt->{scale});
 
     my ($res, $time) = ('Pass', 0);
     foreach (@stats) {
         $time += $_->[0];
         $res = $_->[1] if $_->[1] ne 'Pass';
     }
 
-    return $time/($opt->{threads}*$opt->{repeat} || 1), $res;
+    return $time/($opt->{threads}*$opt->{scale} || 1), $res;
 }
 
 sub bench_run {
@@ -1033,6 +1046,7 @@ sub total_stats {
     my $display = $opt->{time} ? 'times' : 'scores';
     my $title   = $opt->{time} ? 'Time (sec)' : 'Score';
     print "Aggregates:\n".pad_to("Benchmark",24).pad_to("Avg $title").pad_to("Min $title").pad_to("Max $title");
+    print pad_to("stdev %") if $opt->{stdev};
     print pad_to("Pass %") unless $opt->{time};
     print "\n";
     foreach my $bench (sort keys %$benchmarks) {
@@ -1053,7 +1067,13 @@ sub calc_stats {
     my $arr = shift;
     my $pad = shift;
     my ($min, $max, $avg) = min_max_avg($arr);
-    return $avg, join '', map {pad_to(sprintf($opt->{f}, $_), $pad)} ($avg,$min,$max);
+    my $str = join '', map {pad_to(sprintf($opt->{f}, $_), $pad)} ($avg,$min,$max);
+    if ($opt->{stdev} && $avg) {
+        my $stdev = avg_stdev($arr);
+        $stdev *= 100/$avg;
+        $str .= pad_to(sprintf("%0.2f%%", $stdev), $pad);
+    }
+    return $avg, $str;
 }
 
 sub min_max_avg {
diff --git a/t/simple.t b/t/simple.t
@@ -42,6 +42,7 @@ diag $std[0];
             skip_prove => 1,
             bio_codons => 1,
             iter       => 2,
+            stdev      => 1,
             no_mce     => 1,
             include    => 'Matrix'
         }
@@ -60,7 +61,7 @@ calc_scalability({}, \%stats1, \%stats2);
             time    => 1,
             quick   => 1,
             iter    => 2,
-            repeat  => 1,
+            scale   => 1,
             no_mce  => 1,
             include => 'DCT',
         }
@@ -76,7 +77,7 @@ my $datadir = dist_dir("Benchmark-DKbench");
             threads  => 1,
             time     => 1,
             iter     => 1,
-            repeat   => 1,
+            scale    => 1,
             no_mce   => 1,
             include  => 'prove',
         }