Skip to content

Commit 7b741fb

Browse files
authored
Rollup merge of rust-lang#67889 - Zoxc:parallel-cgus, r=michaelwoerister
Compile some CGUs in parallel at the start of codegen This brings the compilation time for `syntex_syntax` from 11.542s to 10.453s with 6 threads in non-incremental debug mode. Just compiling `n` CGUs in parallel at the beginning of codegen seems sufficient to get rid of the staircase effect, at least for `syntex_syntax`. Based on rust-lang#67777. r? @michaelwoerister cc @alexcrichton @Mark-Simulacrum
2 parents cd47af1 + 69bacd0 commit 7b741fb

File tree

4 files changed

+89
-22
lines changed

4 files changed

+89
-22
lines changed

src/librustc_codegen_llvm/base.rs

+4-6
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
//! but one `llvm::Type` corresponds to many `Ty`s; for instance, `tup(int, int,
1414
//! int)` and `rec(x=int, y=int, z=int)` will have the same `llvm::Type`.
1515
16-
use super::{LlvmCodegenBackend, ModuleLlvm};
16+
use super::ModuleLlvm;
1717

1818
use crate::builder::Builder;
1919
use crate::common;
@@ -29,7 +29,6 @@ use rustc::middle::exported_symbols;
2929
use rustc::mir::mono::{Linkage, Visibility};
3030
use rustc::session::config::DebugInfo;
3131
use rustc::ty::TyCtxt;
32-
use rustc_codegen_ssa::back::write::submit_codegened_module_to_llvm;
3332
use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
3433
use rustc_codegen_ssa::mono_item::MonoItemExt;
3534
use rustc_codegen_ssa::traits::*;
@@ -100,8 +99,7 @@ pub fn iter_globals(llmod: &'ll llvm::Module) -> ValueIter<'ll> {
10099
pub fn compile_codegen_unit(
101100
tcx: TyCtxt<'tcx>,
102101
cgu_name: Symbol,
103-
tx_to_llvm_workers: &std::sync::mpsc::Sender<Box<dyn std::any::Any + Send>>,
104-
) {
102+
) -> (ModuleCodegen<ModuleLlvm>, u64) {
105103
let prof_timer = tcx.prof.generic_activity("codegen_module");
106104
let start_time = Instant::now();
107105

@@ -115,8 +113,6 @@ pub fn compile_codegen_unit(
115113
// the time we needed for codegenning it.
116114
let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64;
117115

118-
submit_codegened_module_to_llvm(&LlvmCodegenBackend(()), tx_to_llvm_workers, module, cost);
119-
120116
fn module_codegen(tcx: TyCtxt<'_>, cgu_name: Symbol) -> ModuleCodegen<ModuleLlvm> {
121117
let cgu = tcx.codegen_unit(cgu_name);
122118
// Instantiate monomorphizations without filling out definitions yet...
@@ -164,6 +160,8 @@ pub fn compile_codegen_unit(
164160
kind: ModuleKind::Regular,
165161
}
166162
}
163+
164+
(module, cost)
167165
}
168166

169167
pub fn set_link_section(llval: &Value, attrs: &CodegenFnAttrs) {

src/librustc_codegen_llvm/lib.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#![feature(link_args)]
2020
#![feature(static_nobundle)]
2121
#![feature(trusted_len)]
22+
#![recursion_limit = "256"]
2223

2324
use back::write::{create_informational_target_machine, create_target_machine};
2425
use rustc_span::symbol::Symbol;
@@ -108,9 +109,8 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
108109
&self,
109110
tcx: TyCtxt<'_>,
110111
cgu_name: Symbol,
111-
tx: &std::sync::mpsc::Sender<Box<dyn Any + Send>>,
112-
) {
113-
base::compile_codegen_unit(tcx, cgu_name, tx);
112+
) -> (ModuleCodegen<ModuleLlvm>, u64) {
113+
base::compile_codegen_unit(tcx, cgu_name)
114114
}
115115
fn target_machine_factory(
116116
&self,

src/librustc_codegen_ssa/base.rs

+77-9
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
//! int)` and `rec(x=int, y=int, z=int)` will have the same `llvm::Type`.
1515
1616
use crate::back::write::{
17-
start_async_codegen, submit_post_lto_module_to_llvm, submit_pre_lto_module_to_llvm,
18-
OngoingCodegen,
17+
start_async_codegen, submit_codegened_module_to_llvm, submit_post_lto_module_to_llvm,
18+
submit_pre_lto_module_to_llvm, OngoingCodegen,
1919
};
2020
use crate::common::{IntPredicate, RealPredicate, TypeKind};
2121
use crate::meth;
@@ -40,6 +40,7 @@ use rustc::ty::{self, Instance, Ty, TyCtxt};
4040
use rustc_codegen_utils::{check_for_rustc_errors_attr, symbol_names_test};
4141
use rustc_data_structures::fx::FxHashMap;
4242
use rustc_data_structures::profiling::print_time_passes_entry;
43+
use rustc_data_structures::sync::{par_iter, Lock, ParallelIterator};
4344
use rustc_hir as hir;
4445
use rustc_hir::def_id::{DefId, LOCAL_CRATE};
4546
use rustc_index::vec::Idx;
@@ -606,20 +607,83 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
606607
codegen_units
607608
};
608609

609-
let mut total_codegen_time = Duration::new(0, 0);
610+
let total_codegen_time = Lock::new(Duration::new(0, 0));
610611

611-
for cgu in codegen_units.into_iter() {
612+
// The non-parallel compiler can only translate codegen units to LLVM IR
613+
// on a single thread, leading to a staircase effect where the N LLVM
614+
// threads have to wait on the single codegen threads to generate work
615+
// for them. The parallel compiler does not have this restriction, so
616+
// we can pre-load the LLVM queue in parallel before handing off
617+
// coordination to the OnGoingCodegen scheduler.
618+
//
619+
// This likely is a temporary measure. Once we don't have to support the
620+
// non-parallel compiler anymore, we can compile CGUs end-to-end in
621+
// parallel and get rid of the complicated scheduling logic.
622+
let pre_compile_cgus = |cgu_reuse: &[CguReuse]| {
623+
if cfg!(parallel_compiler) {
624+
tcx.sess.time("compile_first_CGU_batch", || {
625+
// Try to find one CGU to compile per thread.
626+
let cgus: Vec<_> = cgu_reuse
627+
.iter()
628+
.enumerate()
629+
.filter(|&(_, reuse)| reuse == &CguReuse::No)
630+
.take(tcx.sess.threads())
631+
.collect();
632+
633+
// Compile the found CGUs in parallel.
634+
par_iter(cgus)
635+
.map(|(i, _)| {
636+
let start_time = Instant::now();
637+
let module = backend.compile_codegen_unit(tcx, codegen_units[i].name());
638+
let mut time = total_codegen_time.lock();
639+
*time += start_time.elapsed();
640+
(i, module)
641+
})
642+
.collect()
643+
})
644+
} else {
645+
FxHashMap::default()
646+
}
647+
};
648+
649+
let mut cgu_reuse = Vec::new();
650+
let mut pre_compiled_cgus: Option<FxHashMap<usize, _>> = None;
651+
652+
for (i, cgu) in codegen_units.iter().enumerate() {
612653
ongoing_codegen.wait_for_signal_to_codegen_item();
613654
ongoing_codegen.check_for_errors(tcx.sess);
614655

615-
let cgu_reuse = determine_cgu_reuse(tcx, &cgu);
656+
// Do some setup work in the first iteration
657+
if pre_compiled_cgus.is_none() {
658+
// Calculate the CGU reuse
659+
cgu_reuse = tcx.sess.time("find_cgu_reuse", || {
660+
codegen_units.iter().map(|cgu| determine_cgu_reuse(tcx, &cgu)).collect()
661+
});
662+
// Pre compile some CGUs
663+
pre_compiled_cgus = Some(pre_compile_cgus(&cgu_reuse));
664+
}
665+
666+
let cgu_reuse = cgu_reuse[i];
616667
tcx.sess.cgu_reuse_tracker.set_actual_reuse(&cgu.name().as_str(), cgu_reuse);
617668

618669
match cgu_reuse {
619670
CguReuse::No => {
620-
let start_time = Instant::now();
621-
backend.compile_codegen_unit(tcx, cgu.name(), &ongoing_codegen.coordinator_send);
622-
total_codegen_time += start_time.elapsed();
671+
let (module, cost) =
672+
if let Some(cgu) = pre_compiled_cgus.as_mut().unwrap().remove(&i) {
673+
cgu
674+
} else {
675+
let start_time = Instant::now();
676+
let module = backend.compile_codegen_unit(tcx, cgu.name());
677+
let mut time = total_codegen_time.lock();
678+
*time += start_time.elapsed();
679+
module
680+
};
681+
submit_codegened_module_to_llvm(
682+
&backend,
683+
&ongoing_codegen.coordinator_send,
684+
module,
685+
cost,
686+
);
623687
false
624688
}
625689
CguReuse::PreLto => {
@@ -652,7 +716,11 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
652716

653717
// Since the main thread is sometimes blocked during codegen, we keep track
654718
// -Ztime-passes output manually.
655-
print_time_passes_entry(tcx.sess.time_passes(), "codegen_to_LLVM_IR", total_codegen_time);
719+
print_time_passes_entry(
720+
tcx.sess.time_passes(),
721+
"codegen_to_LLVM_IR",
722+
total_codegen_time.into_inner(),
723+
);
656724

657725
::rustc_incremental::assert_module_sources::assert_module_sources(tcx);
658726

src/librustc_codegen_ssa/traits/backend.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use super::write::WriteBackendMethods;
22
use super::CodegenObject;
3+
use crate::ModuleCodegen;
34

45
use rustc::middle::cstore::EncodedMetadata;
56
use rustc::session::{config, Session};
@@ -10,7 +11,6 @@ use rustc_codegen_utils::codegen_backend::CodegenBackend;
1011
use rustc_span::symbol::Symbol;
1112
use syntax::expand::allocator::AllocatorKind;
1213

13-
use std::sync::mpsc;
1414
use std::sync::Arc;
1515

1616
pub trait BackendTypes {
@@ -34,7 +34,7 @@ impl<'tcx, T> Backend<'tcx> for T where
3434
{
3535
}
3636

37-
pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Send {
37+
pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Send + Sync {
3838
fn new_metadata(&self, sess: TyCtxt<'_>, mod_name: &str) -> Self::Module;
3939
fn write_compressed_metadata<'tcx>(
4040
&self,
@@ -48,12 +48,13 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
4848
mods: &mut Self::Module,
4949
kind: AllocatorKind,
5050
);
51+
/// This generates the codegen unit and returns it along with
52+
/// a `u64` giving an estimate of the unit's processing cost.
5153
fn compile_codegen_unit(
5254
&self,
5355
tcx: TyCtxt<'_>,
5456
cgu_name: Symbol,
55-
tx_to_llvm_workers: &mpsc::Sender<Box<dyn std::any::Any + Send>>,
56-
);
57+
) -> (ModuleCodegen<Self::Module>, u64);
5758
// If find_features is true this won't access `sess.crate_types` by assuming
5859
// that `is_pie_binary` is false. When we discover LLVM target features
5960
// `sess.crate_types` is uninitialized so we cannot access it.

0 commit comments

Comments
 (0)