Skip to content

Commit a200640

Browse files
committed
Improve performance of git status check in cargo package.
1 parent 7664fd6 commit a200640

File tree

3 files changed

+114
-48
lines changed

3 files changed

+114
-48
lines changed

src/cargo/ops/cargo_package.rs

+59-46
Original file line numberDiff line numberDiff line change
@@ -394,76 +394,89 @@ fn check_repo_state(
394394
src_files: &[PathBuf],
395395
repo: &git2::Repository,
396396
) -> CargoResult<Option<String>> {
397-
let workdir = repo.workdir().unwrap();
398-
399-
let mut sub_repos = Vec::new();
400-
open_submodules(repo, &mut sub_repos)?;
401-
// Sort so that longest paths are first, to check nested submodules first.
402-
sub_repos.sort_unstable_by(|a, b| b.0.as_os_str().len().cmp(&a.0.as_os_str().len()));
403-
let submodule_dirty = |path: &Path| -> bool {
404-
sub_repos
405-
.iter()
406-
.filter(|(sub_path, _sub_repo)| path.starts_with(sub_path))
407-
.any(|(sub_path, sub_repo)| {
408-
let relative = path.strip_prefix(sub_path).unwrap();
409-
sub_repo
410-
.status_file(relative)
411-
.map(|status| status != git2::Status::CURRENT)
412-
.unwrap_or(false)
413-
})
414-
};
415-
416-
let dirty = src_files
397+
// This is a collection of any dirty or untracked files. This covers:
398+
// - new/modified/deleted/renamed/type change (index or worktree)
399+
// - untracked files (which are "new" worktree files)
400+
// - ignored (in case the user has an `include` directive that
401+
// conflicts with .gitignore).
402+
let mut dirty_files = Vec::new();
403+
collect_statuses(repo, &mut dirty_files)?;
404+
// Include each submodule so that the error message can provide
405+
// specifically *which* files in a submodule are modified.
406+
status_submodules(repo, &mut dirty_files)?;
407+
408+
// Find the intersection of dirty in git, and the src_files that would
409+
// be packaged. This is a lazy n^2 check, but seems fine with
410+
// thousands of files.
411+
let dirty_src_files: Vec<String> = src_files
417412
.iter()
418-
.filter(|file| {
419-
let relative = file.strip_prefix(workdir).unwrap();
420-
if let Ok(status) = repo.status_file(relative) {
421-
if status == git2::Status::CURRENT {
422-
false
423-
} else if relative.file_name().and_then(|s| s.to_str()).unwrap_or("")
424-
== "Cargo.lock"
425-
{
426-
// It is OK to include this file even if it is ignored.
427-
status != git2::Status::IGNORED
428-
} else {
429-
true
430-
}
431-
} else {
432-
submodule_dirty(file)
433-
}
434-
})
413+
.filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
435414
.map(|path| {
436415
path.strip_prefix(p.root())
437416
.unwrap_or(path)
438417
.display()
439418
.to_string()
440419
})
441-
.collect::<Vec<_>>();
442-
if dirty.is_empty() {
420+
.collect();
421+
if dirty_src_files.is_empty() {
443422
let rev_obj = repo.revparse_single("HEAD")?;
444423
Ok(Some(rev_obj.id().to_string()))
445424
} else {
446425
anyhow::bail!(
447426
"{} files in the working directory contain changes that were \
448427
not yet committed into git:\n\n{}\n\n\
449428
to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag",
450-
dirty.len(),
451-
dirty.join("\n")
429+
dirty_src_files.len(),
430+
dirty_src_files.join("\n")
452431
)
453432
}
454433
}
455434

456-
/// Helper to recursively open all submodules.
457-
fn open_submodules(
435+
// Helper to collect dirty statuses for a single repo.
436+
fn collect_statuses(
437+
repo: &git2::Repository,
438+
dirty_files: &mut Vec<PathBuf>,
439+
) -> CargoResult<()> {
440+
let mut status_opts = git2::StatusOptions::new();
441+
// Exclude submodules, as they are being handled manually by recursing
442+
// into each one so that details about specific files can be
443+
// retrieved.
444+
status_opts
445+
.exclude_submodules(true)
446+
.include_ignored(true)
447+
.include_untracked(true);
448+
let repo_statuses = repo.statuses(Some(&mut status_opts)).with_context(|| {
449+
format!(
450+
"failed to retrieve git status from repo {}",
451+
repo.path().display()
452+
)
453+
})?;
454+
let workdir = repo.workdir().unwrap();
455+
let this_dirty = repo_statuses.iter().filter_map(|entry| {
456+
let path = entry.path().expect("valid utf-8 path");
457+
if path.ends_with("Cargo.lock") && entry.status() == git2::Status::IGNORED {
458+
// It is OK to include Cargo.lock even if it is ignored.
459+
return None;
460+
}
461+
// Use an absolute path, so that comparing paths is easier
462+
// (particularly with submodules).
463+
Some(workdir.join(path))
464+
});
465+
dirty_files.extend(this_dirty);
466+
Ok(())
467+
}
468+
469+
// Helper to collect dirty statuses while recursing into submodules.
470+
fn status_submodules(
458471
repo: &git2::Repository,
459-
sub_repos: &mut Vec<(PathBuf, git2::Repository)>,
472+
dirty_files: &mut Vec<PathBuf>,
460473
) -> CargoResult<()> {
461474
for submodule in repo.submodules()? {
462475
// Ignore submodules that don't open, they are probably not initialized.
463476
// If its files are required, then the verification step should fail.
464477
if let Ok(sub_repo) = submodule.open() {
465-
open_submodules(&sub_repo, sub_repos)?;
466-
sub_repos.push((sub_repo.workdir().unwrap().to_owned(), sub_repo));
478+
status_submodules(&sub_repo, dirty_files)?;
479+
collect_statuses(&sub_repo, dirty_files)?;
467480
}
468481
}
469482
Ok(())

tests/testsuite/git.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -2760,7 +2760,7 @@ to proceed despite [..]
27602760
git::commit(&repo);
27612761
git_project.cargo("package --no-verify").run();
27622762
// Modify within nested submodule.
2763-
git_project.change_file("src/bar/mod.rs", "//test");
2763+
git_project.change_file("src/bar/new_file.rs", "//test");
27642764
git_project
27652765
.cargo("package --no-verify")
27662766
.with_status(101)
@@ -2770,7 +2770,7 @@ to proceed despite [..]
27702770
See [..]
27712771
[ERROR] 1 files in the working directory contain changes that were not yet committed into git:
27722772
2773-
src/bar/mod.rs
2773+
src/bar/new_file.rs
27742774
27752775
to proceed despite [..]
27762776
",

tests/testsuite/package.rs

+53
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,59 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d
816816
.run();
817817
}
818818

819+
#[cargo_test]
820+
fn dirty_ignored() {
821+
// Cargo warns about an ignored file that will be published.
822+
let (p, repo) = git::new_repo("foo", |p| {
823+
p.file(
824+
"Cargo.toml",
825+
r#"
826+
[package]
827+
name = "foo"
828+
version = "0.1.0"
829+
description = "foo"
830+
license = "foo"
831+
documentation = "foo"
832+
include = ["src", "build"]
833+
"#,
834+
)
835+
.file("src/lib.rs", "")
836+
.file(".gitignore", "build")
837+
});
838+
// Example of adding a file that is confusingly ignored by an overzealous
839+
// gitignore rule.
840+
p.change_file("src/build/mod.rs", "");
841+
p.cargo("package --list")
842+
.with_status(101)
843+
.with_stderr(
844+
"\
845+
error: 1 files in the working directory contain changes that were not yet committed into git:
846+
847+
src/build/mod.rs
848+
849+
to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag
850+
",
851+
)
852+
.run();
853+
// Add the ignored file and make sure it is included.
854+
let mut index = t!(repo.index());
855+
t!(index.add_path(Path::new("src/build/mod.rs")));
856+
t!(index.write());
857+
git::commit(&repo);
858+
p.cargo("package --list")
859+
.with_stderr("")
860+
.with_stdout(
861+
"\
862+
.cargo_vcs_info.json
863+
Cargo.toml
864+
Cargo.toml.orig
865+
src/build/mod.rs
866+
src/lib.rs
867+
",
868+
)
869+
.run();
870+
}
871+
819872
#[cargo_test]
820873
fn generated_manifest() {
821874
registry::alt_init();

0 commit comments

Comments
 (0)