From a448f88b6986208c0da97940c86189f78a865068 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Mon, 14 Dec 2020 13:50:59 -0500 Subject: [PATCH] Utilize PGO for rustc linux dist builds This implements support for applying PGO to the rustc compilation step (not standard library or any tooling, including rustdoc). Expanding PGO to more tools is not terribly difficult but will involve more work and greater CI time commitment. For the same reason of avoiding greater time commitment, this currently avoids implementing for platforms outside of x86_64-unknown-linux-gnu, though in practice it should be quite simple to extend over time to more platforms. The initial implementation is intentionally minimal here to avoid too much work investment before we start seeing wins for a subset of Rust users. The choice of workloads to profile here is somewhat arbitrary, but the general rationale was to aim for a small set that largely avoided time regressions on perf.rust-lang.org's full suite of crates. The set chosen is libcore, cargo (and its dependencies), and a few ad-hoc stress tests from perf.rlo. The stress tests are arguably the most controversial, but they benefit those cases (avoiding regressions) and do not really remove wins from other benchmarks. The primary next step after this PR lands is to implement support for PGO in LLVM. It is unclear whether we can afford a full LLVM rebuild in CI, though, so the approach taken there may need to be more staggered. rustc-only PGO seems well affordable on linux at least, giving us up to 20% wall time wins on some crates for 15 minutes of extra CI time (1 hour up from 45 minutes). The PGO data is uploaded to allow others to reuse it if attempting to reproduce the CI build or potentially, in the future, on other platforms where an off-by-one strategy is used for dist builds at minimal performance cost. --- src/bootstrap/builder.rs | 1 + src/bootstrap/compile.rs | 37 +++++++++- src/bootstrap/config.rs | 9 +++ src/bootstrap/dist.rs | 69 +++++++++++++++++++ src/bootstrap/flags.rs | 7 ++ .../host-x86_64/dist-x86_64-linux/Dockerfile | 9 ++- src/ci/pgo.sh | 47 +++++++++++++ src/tools/build-manifest/src/main.rs | 1 + 8 files changed, 176 insertions(+), 4 deletions(-) create mode 100755 src/ci/pgo.sh diff --git a/src/bootstrap/builder.rs b/src/bootstrap/builder.rs index 9af79e20630df..05b1035d843f1 100644 --- a/src/bootstrap/builder.rs +++ b/src/bootstrap/builder.rs @@ -471,6 +471,7 @@ impl<'a> Builder<'a> { dist::RustDev, dist::Extended, dist::BuildManifest, + dist::ReproducibleArtifacts, ), Kind::Install => describe!( install::Docs, diff --git a/src/bootstrap/compile.rs b/src/bootstrap/compile.rs index fbebb26c74620..091bd2a1c5a16 100644 --- a/src/bootstrap/compile.rs +++ b/src/bootstrap/compile.rs @@ -501,6 +501,41 @@ impl Step for Rustc { let mut cargo = builder.cargo(compiler, Mode::Rustc, SourceType::InTree, target, "build"); rustc_cargo(builder, &mut cargo, target); + if builder.config.rust_profile_use.is_some() + && builder.config.rust_profile_generate.is_some() + { + panic!("Cannot use and generate PGO profiles at the same time"); + } + + let is_collecting = if let Some(path) = &builder.config.rust_profile_generate { + if compiler.stage == 1 { + cargo.rustflag(&format!("-Cprofile-generate={}", path)); + // Apparently necessary to avoid overflowing the counters during + // a Cargo build profile + cargo.rustflag("-Cllvm-args=-vp-counters-per-site=4"); + true + } else { + false + } + } else if let Some(path) = &builder.config.rust_profile_use { + if compiler.stage == 1 { + cargo.rustflag(&format!("-Cprofile-use={}", path)); + cargo.rustflag("-Cllvm-args=-pgo-warn-missing-function"); + true + } else { + false + } + } else { + false + }; + if is_collecting { + // Ensure paths to Rust sources are relative, not absolute. + cargo.rustflag(&format!( + "-Cllvm-args=-static-func-strip-dirname-prefix={}", + builder.config.src.components().count() + )); + } + builder.info(&format!( "Building stage{} compiler artifacts ({} -> {})", compiler.stage, &compiler.host, target @@ -752,7 +787,7 @@ fn copy_codegen_backends_to_sysroot( // Here we're looking for the output dylib of the `CodegenBackend` step and // we're copying that into the `codegen-backends` folder. let dst = builder.sysroot_codegen_backends(target_compiler); - t!(fs::create_dir_all(&dst)); + t!(fs::create_dir_all(&dst), dst); if builder.config.dry_run { return; diff --git a/src/bootstrap/config.rs b/src/bootstrap/config.rs index fb2c6d1f92a80..58dc5f7af791d 100644 --- a/src/bootstrap/config.rs +++ b/src/bootstrap/config.rs @@ -133,6 +133,8 @@ pub struct Config { pub rust_thin_lto_import_instr_limit: Option, pub rust_remap_debuginfo: bool, pub rust_new_symbol_mangling: bool, + pub rust_profile_use: Option, + pub rust_profile_generate: Option, pub build: TargetSelection, pub hosts: Vec, @@ -494,6 +496,8 @@ struct Rust { llvm_libunwind: Option, control_flow_guard: Option, new_symbol_mangling: Option, + profile_generate: Option, + profile_use: Option, } /// TOML representation of how each build target is configured. @@ -871,6 +875,11 @@ impl Config { config.rust_codegen_units = rust.codegen_units.map(threads_from_config); config.rust_codegen_units_std = rust.codegen_units_std.map(threads_from_config); + config.rust_profile_use = flags.rust_profile_use.or(rust.profile_use); + config.rust_profile_generate = flags.rust_profile_generate.or(rust.profile_generate); + } else { + config.rust_profile_use = flags.rust_profile_use; + config.rust_profile_generate = flags.rust_profile_generate; } if let Some(t) = toml.target { diff --git a/src/bootstrap/dist.rs b/src/bootstrap/dist.rs index b1bb97cf83ba5..823f62fa4a391 100644 --- a/src/bootstrap/dist.rs +++ b/src/bootstrap/dist.rs @@ -2664,3 +2664,72 @@ impl Step for BuildManifest { distdir(builder).join(format!("{}-{}.tar.gz", name, self.target.triple)) } } + +/// Tarball containing artifacts necessary to reproduce the build of rustc. +/// +/// Currently this is the PGO profile data. +/// +/// Should not be considered stable by end users. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct ReproducibleArtifacts { + pub target: TargetSelection, +} + +impl Step for ReproducibleArtifacts { + type Output = Option; + const DEFAULT: bool = true; + const ONLY_HOSTS: bool = true; + + fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> { + run.path("reproducible") + } + + fn make_run(run: RunConfig<'_>) { + run.builder.ensure(ReproducibleArtifacts { target: run.target }); + } + + fn run(self, builder: &Builder<'_>) -> Self::Output { + let name = pkgname(builder, "reproducible-artifacts"); + let tmp = tmpdir(builder); + + // Prepare the image. + let image = tmp.join("reproducible-artifacts-image"); + let _ = fs::remove_dir_all(&image); + + if let Some(path) = &builder.config.rust_profile_use { + builder.install(std::path::Path::new(path), &image, 0o644); + } else { + return None; + } + + // Prepare the overlay. + let overlay = tmp.join("reproducible-artifacts-overlay"); + let _ = fs::remove_dir_all(&overlay); + builder.create_dir(&overlay); + builder.create(&overlay.join("version"), &builder.rust_version()); + for file in &["COPYRIGHT", "LICENSE-APACHE", "LICENSE-MIT", "README.md"] { + builder.install(&builder.src.join(file), &overlay, 0o644); + } + + // Create the final tarball. + let mut cmd = rust_installer(builder); + cmd.arg("generate") + .arg("--product-name=Rust") + .arg("--rel-manifest-dir=rustlib") + .arg("--success-message=reproducible-artifacts installed.") + .arg("--image-dir") + .arg(&image) + .arg("--work-dir") + .arg(&tmpdir(builder)) + .arg("--output-dir") + .arg(&distdir(builder)) + .arg("--non-installed-overlay") + .arg(&overlay) + .arg(format!("--package-name={}-{}", name, self.target.triple)) + .arg("--legacy-manifest-dirs=rustlib,cargo") + .arg("--component-name=reproducible-artifacts"); + + builder.run(&mut cmd); + Some(distdir(builder).join(format!("{}-{}.tar.gz", name, self.target.triple))) + } +} diff --git a/src/bootstrap/flags.rs b/src/bootstrap/flags.rs index 5a8096674c6da..d6a45f1c17076 100644 --- a/src/bootstrap/flags.rs +++ b/src/bootstrap/flags.rs @@ -68,6 +68,9 @@ pub struct Flags { pub deny_warnings: Option, pub llvm_skip_rebuild: Option, + + pub rust_profile_use: Option, + pub rust_profile_generate: Option, } pub enum Subcommand { @@ -219,6 +222,8 @@ To learn more about a subcommand, run `./x.py -h`", VALUE overrides the skip-rebuild option in config.toml.", "VALUE", ); + opts.optopt("", "rust-profile-generate", "rustc error format", "FORMAT"); + opts.optopt("", "rust-profile-use", "rustc error format", "FORMAT"); // We can't use getopt to parse the options until we have completed specifying which // options are valid, but under the current implementation, some options are conditional on @@ -674,6 +679,8 @@ Arguments: color: matches .opt_get_default("color", Color::Auto) .expect("`color` should be `always`, `never`, or `auto`"), + rust_profile_use: matches.opt_str("rust-profile-use"), + rust_profile_generate: matches.opt_str("rust-profile-generate"), } } } diff --git a/src/ci/docker/host-x86_64/dist-x86_64-linux/Dockerfile b/src/ci/docker/host-x86_64/dist-x86_64-linux/Dockerfile index 14700aeea05af..d1b4bbf7fffef 100644 --- a/src/ci/docker/host-x86_64/dist-x86_64-linux/Dockerfile +++ b/src/ci/docker/host-x86_64/dist-x86_64-linux/Dockerfile @@ -85,6 +85,8 @@ ENV CC=clang CXX=clang++ COPY scripts/sccache.sh /scripts/ RUN sh /scripts/sccache.sh +ENV PGO_HOST=x86_64-unknown-linux-gnu + ENV HOSTS=x86_64-unknown-linux-gnu ENV RUST_CONFIGURE_ARGS \ @@ -98,9 +100,10 @@ ENV RUST_CONFIGURE_ARGS \ --set llvm.thin-lto=true \ --set llvm.ninja=false \ --set rust.jemalloc -ENV SCRIPT python2.7 ../x.py dist --host $HOSTS --target $HOSTS \ - --include-default-paths \ - src/tools/build-manifest +ENV SCRIPT ../src/ci/pgo.sh python2.7 ../x.py dist \ + --host $HOSTS --target $HOSTS \ + --include-default-paths \ + src/tools/build-manifest ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER=clang # This is the only builder which will create source tarballs diff --git a/src/ci/pgo.sh b/src/ci/pgo.sh new file mode 100755 index 0000000000000..13b8ca91f890f --- /dev/null +++ b/src/ci/pgo.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +set -euxo pipefail + +rm -rf /tmp/rustc-pgo + +python2.7 ../x.py build --target=$PGO_HOST --host=$PGO_HOST \ + --stage 2 library/std --rust-profile-generate=/tmp/rustc-pgo + +./build/$PGO_HOST/stage2/bin/rustc --edition=2018 \ + --crate-type=lib ../library/core/src/lib.rs + +# Download and build a single-file stress test benchmark on perf.rust-lang.org. +function pgo_perf_benchmark { + local PERF=e095f5021bf01cf3800f50b3a9f14a9683eb3e4e + local github_prefix=https://raw.githubusercontent.com/rust-lang/rustc-perf/$PERF + local name=$1 + curl -o /tmp/$name.rs $github_prefix/collector/benchmarks/$name/src/lib.rs + ./build/$PGO_HOST/stage2/bin/rustc --edition=2018 --crate-type=lib /tmp/$name.rs +} + +pgo_perf_benchmark externs +pgo_perf_benchmark ctfe-stress-4 + +cp -pri ../src/tools/cargo /tmp/cargo + +# Build cargo (with some flags) +function pgo_cargo { + RUSTC=./build/$PGO_HOST/stage2/bin/rustc \ + ./build/$PGO_HOST/stage0/bin/cargo $@ \ + --manifest-path /tmp/cargo/Cargo.toml +} + +# Build a couple different variants of Cargo +CARGO_INCREMENTAL=1 pgo_cargo check +echo 'pub fn barbarbar() {}' >> /tmp/cargo/src/cargo/lib.rs +CARGO_INCREMENTAL=1 pgo_cargo check +touch /tmp/cargo/src/cargo/lib.rs +CARGO_INCREMENTAL=1 pgo_cargo check +pgo_cargo build --release + +# Merge the profile data we gathered +./build/$PGO_HOST/llvm/bin/llvm-profdata \ + merge -o /tmp/rustc-pgo.profdata /tmp/rustc-pgo + +# This produces the actual final set of artifacts. +$@ --rust-profile-use=/tmp/rustc-pgo.profdata diff --git a/src/tools/build-manifest/src/main.rs b/src/tools/build-manifest/src/main.rs index 0462efaa9b001..73a4cbd07924f 100644 --- a/src/tools/build-manifest/src/main.rs +++ b/src/tools/build-manifest/src/main.rs @@ -299,6 +299,7 @@ impl Builder { let mut package = |name, targets| self.package(name, &mut manifest.pkg, targets); package("rustc", HOSTS); package("rustc-dev", HOSTS); + package("reproducible-artifacts", HOSTS); package("rustc-docs", HOSTS); package("cargo", HOSTS); package("rust-mingw", MINGW);