Skip to content

Commit 22e2f23

Browse files
committed
Auto merge of #6880 - alexcrichton:cache, r=Eh2406
Parse less JSON on null builds This commit fixes a performance pathology in Cargo today. Whenever Cargo generates a lock file (which happens on all invocations of `cargo build` for example) Cargo will parse the crates.io index to learn about dependencies. Currently, however, when it parses a crate it parses the JSON blob for every single version of the crate. With a lock file, however, or with incremental builds only one of these lines of JSON is relevant. Measured today Cargo building Cargo parses 3700 JSON dependencies in the registry. This commit implements an optimization that brings down the number of parsed JSON lines in the registry to precisely the right number necessary to build a project. For example Cargo has 150 crates in its lock file, so now it only parses 150 JSON lines (a 20x reduction from 3700). This in turn can greatly improve Cargo's null build time. Cargo building Cargo dropped from 120ms to 60ms on a Linux machine and 400ms to 200ms on a Mac. The commit internally has a lot more details about how this is done but the general idea is to have a cache which is optimized for Cargo to read which is maintained automatically by Cargo. Closes #6866
2 parents 254840c + e33881b commit 22e2f23

20 files changed

+751
-276
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ lazycell = "1.2.0"
4444
libc = "0.2"
4545
log = "0.4.6"
4646
libgit2-sys = "0.7.9"
47+
memchr = "2.1.3"
4748
num_cpus = "1.0"
4849
opener = "0.4"
4950
rustfix = "0.4.4"

src/cargo/core/compiler/layout.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,9 @@ impl Layout {
160160
pub fn prepare(&mut self) -> io::Result<()> {
161161
if fs::metadata(&self.root).is_err() {
162162
fs::create_dir_all(&self.root)?;
163+
self.exclude_from_backups(&self.root);
163164
}
164165

165-
self.exclude_from_backups(&self.root);
166-
167166
mkdir(&self.deps)?;
168167
mkdir(&self.native)?;
169168
mkdir(&self.incremental)?;

src/cargo/core/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ pub use self::shell::{Shell, Verbosity};
1414
pub use self::source::{GitReference, Source, SourceId, SourceMap};
1515
pub use self::summary::{FeatureMap, FeatureValue, Summary};
1616
pub use self::workspace::{Members, Workspace, WorkspaceConfig, WorkspaceRootConfig};
17+
pub use self::interning::InternedString;
1718

1819
pub mod compiler;
1920
pub mod dependency;

src/cargo/core/package.rs

+5
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use crate::ops;
2525
use crate::util::errors::{CargoResult, CargoResultExt, HttpNot200};
2626
use crate::util::network::Retry;
2727
use crate::util::{self, internal, lev_distance, Config, Progress, ProgressStyle};
28+
use crate::util::config::PackageCacheLock;
2829

2930
/// Information about a package that is available somewhere in the file system.
3031
///
@@ -339,6 +340,9 @@ pub struct Downloads<'a, 'cfg: 'a> {
339340
/// trigger a timeout; reset `next_speed_check` and set this back to the
340341
/// configured threshold.
341342
next_speed_check_bytes_threshold: Cell<u64>,
343+
/// Global filesystem lock to ensure only one Cargo is downloading at a
344+
/// time.
345+
_lock: PackageCacheLock<'cfg>,
342346
}
343347

344348
struct Download<'cfg> {
@@ -437,6 +441,7 @@ impl<'cfg> PackageSet<'cfg> {
437441
timeout,
438442
next_speed_check: Cell::new(Instant::now()),
439443
next_speed_check_bytes_threshold: Cell::new(0),
444+
_lock: self.config.acquire_package_cache_lock()?,
440445
})
441446
}
442447

src/cargo/core/source/source_id.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ impl SourceId {
337337
Kind::Registry => {}
338338
_ => return false,
339339
}
340-
self.inner.url.to_string() == CRATES_IO_INDEX
340+
self.inner.url.as_str() == CRATES_IO_INDEX
341341
}
342342

343343
/// Hashes `self`.

src/cargo/ops/cargo_generate_lockfile.rs

+5
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ pub fn update_lockfile(ws: &Workspace<'_>, opts: &UpdateOptions<'_>) -> CargoRes
4040
failure::bail!("you can't update in the offline mode");
4141
}
4242

43+
// Updates often require a lot of modifications to the registry, so ensure
44+
// that we're synchronized against other Cargos.
45+
let _lock = ws.config().acquire_package_cache_lock()?;
46+
4347
let previous_resolve = match ops::load_pkg_lockfile(ws)? {
4448
Some(resolve) => resolve,
4549
None => return generate_lockfile(ws),
@@ -73,6 +77,7 @@ pub fn update_lockfile(ws: &Workspace<'_>, opts: &UpdateOptions<'_>) -> CargoRes
7377
});
7478
}
7579
}
80+
7681
registry.add_sources(sources)?;
7782
}
7883

src/cargo/ops/cargo_install.rs

+5
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,11 @@ fn check_yanked_install(ws: &Workspace<'_>) -> CargoResult<()> {
488488
// wouldn't be available for `compile_ws`.
489489
let (pkg_set, resolve) = ops::resolve_ws_with_method(ws, Method::Everything, &specs)?;
490490
let mut sources = pkg_set.sources_mut();
491+
492+
// Checking the yanked status invovles taking a look at the registry and
493+
// maybe updating files, so be sure to lock it here.
494+
let _lock = ws.config().acquire_package_cache_lock()?;
495+
491496
for pkg_id in resolve.iter() {
492497
if let Some(source) = sources.get_mut(pkg_id.source_id()) {
493498
if source.is_yanked(pkg_id)? {

src/cargo/ops/cargo_package.rs

+4
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,10 @@ fn compare_resolve(
548548
}
549549

550550
fn check_yanked(config: &Config, pkg_set: &PackageSet<'_>, resolve: &Resolve) -> CargoResult<()> {
551+
// Checking the yanked status invovles taking a look at the registry and
552+
// maybe updating files, so be sure to lock it here.
553+
let _lock = config.acquire_package_cache_lock()?;
554+
551555
let mut sources = pkg_set.sources_mut();
552556
for pkg_id in resolve.iter() {
553557
if let Some(source) = sources.get_mut(pkg_id.source_id()) {

src/cargo/ops/common_for_install_and_uninstall.rs

+5
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,11 @@ pub fn select_pkg<'a, T>(
564564
where
565565
T: Source + 'a,
566566
{
567+
// This operation may involve updating some sources or making a few queries
568+
// which may involve frobbing caches, as a result make sure we synchronize
569+
// with other global Cargos
570+
let _lock = config.acquire_package_cache_lock()?;
571+
567572
if needs_update {
568573
source.update()?;
569574
}

src/cargo/ops/registry.rs

+1
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,7 @@ fn registry(
353353
let token = token.or(token_config);
354354
let sid = get_source_id(config, index_config.or(index), registry)?;
355355
let api_host = {
356+
let _lock = config.acquire_package_cache_lock()?;
356357
let mut src = RegistrySource::remote(sid, &HashSet::new(), config);
357358
// Only update the index if the config is not available or `force` is set.
358359
let cfg = src.config();

src/cargo/ops/resolve.rs

+4
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,10 @@ pub fn resolve_with_previous<'cfg>(
146146
specs: &[PackageIdSpec],
147147
register_patches: bool,
148148
) -> CargoResult<Resolve> {
149+
// We only want one Cargo at a time resolving a crate graph since this can
150+
// involve a lot of frobbing of the global caches.
151+
let _lock = ws.config().acquire_package_cache_lock()?;
152+
149153
// Here we place an artificial limitation that all non-registry sources
150154
// cannot be locked at more than one revision. This means that if a Git
151155
// repository provides more than one package, they must all be updated in

src/cargo/sources/git/source.rs

+8-15
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,9 @@ impl<'cfg> Source for GitSource<'cfg> {
154154
}
155155

156156
fn update(&mut self) -> CargoResult<()> {
157-
let lock =
158-
self.config
159-
.git_path()
160-
.open_rw(".cargo-lock-git", self.config, "the git checkouts")?;
161-
162-
let db_path = lock.parent().join("db").join(&self.ident);
157+
let git_path = self.config.git_path();
158+
let git_path = self.config.assert_package_cache_locked(&git_path);
159+
let db_path = git_path.join("db").join(&self.ident);
163160

164161
if self.config.cli_unstable().offline && !db_path.exists() {
165162
failure::bail!(
@@ -189,21 +186,17 @@ impl<'cfg> Source for GitSource<'cfg> {
189186
(self.remote.db_at(&db_path)?, actual_rev.unwrap())
190187
};
191188

192-
// Don’t use the full hash, in order to contribute less to reaching the path length limit
193-
// on Windows. See <https://github.com/servo/servo/pull/14397>.
189+
// Don’t use the full hash, in order to contribute less to reaching the
190+
// path length limit on Windows. See
191+
// <https://github.com/servo/servo/pull/14397>.
194192
let short_id = db.to_short_id(&actual_rev).unwrap();
195193

196-
let checkout_path = lock
197-
.parent()
194+
let checkout_path = git_path
198195
.join("checkouts")
199196
.join(&self.ident)
200197
.join(short_id.as_str());
201198

202-
// Copy the database to the checkout location. After this we could drop
203-
// the lock on the database as we no longer needed it, but we leave it
204-
// in scope so the destructors here won't tamper with too much.
205-
// Checkout is immutable, so we don't need to protect it with a lock once
206-
// it is created.
199+
// Copy the database to the checkout location.
207200
db.copy_to(actual_rev.clone(), &checkout_path, self.config)?;
208201

209202
let source_id = self.source_id.with_precise(Some(actual_rev.to_string()));

0 commit comments

Comments
 (0)