Skip to content

Commit 449ead0

Browse files
committed
package: canonicalize tar headers for crate packages
Currently, when reading a file from disk, we include several pieces of data from the on-disk file, including the user and group names and IDs, the device major and minor, the mode, and the timestamp. This means that our archives differ between systems, sometimes in unhelpful ways. In addition, most users probably did not intend to share information about their user and group settings, operating system and disk type, and umask. While these aren't huge privacy leaks, cargo doesn't use them when extracting archives, so there's no value to including them. Since using consistent data means that our archives are reproducible and don't leak user data, both of which are desirable features, let's canonicalize the header to strip out identifying information. Omit the inclusion of the timestamp for generated files and tell the tar crate to copy deterministic data. That will omit all of the data we don't care about and also canonicalize the mode properly. Our tests don't check the specifics of certain fields because they differ between the generated files and the files that are archived from the disk format. They are still canonicalized correctly for each type, however.
1 parent e46ca84 commit 449ead0

File tree

2 files changed

+6
-48
lines changed

2 files changed

+6
-48
lines changed

src/cargo/ops/cargo_package.rs

+2-37
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@ use std::io::SeekFrom;
55
use std::path::{Path, PathBuf};
66
use std::rc::Rc;
77
use std::sync::Arc;
8-
use std::time::SystemTime;
98

109
use flate2::read::GzDecoder;
1110
use flate2::{Compression, GzBuilder};
1211
use log::debug;
13-
use tar::{Archive, Builder, EntryType, Header};
12+
use tar::{Archive, Builder, EntryType, Header, HeaderMode};
1413

1514
use crate::core::compiler::{BuildConfig, CompileMode, DefaultExecutor, Executor};
1615
use crate::core::{Feature, Shell, Verbosity, Workspace};
@@ -472,35 +471,6 @@ fn check_repo_state(
472471
}
473472
}
474473

475-
fn timestamp() -> u64 {
476-
if let Ok(var) = std::env::var("SOURCE_DATE_EPOCH") {
477-
if let Ok(stamp) = var.parse() {
478-
return stamp;
479-
}
480-
}
481-
SystemTime::now()
482-
.duration_since(SystemTime::UNIX_EPOCH)
483-
.unwrap()
484-
.as_secs()
485-
}
486-
487-
fn canonicalize_header(header: &mut Header) {
488-
// Let's not include information about the user or their system here.
489-
header.set_username("root").unwrap();
490-
header.set_groupname("root").unwrap();
491-
header.set_uid(0);
492-
header.set_gid(0);
493-
header.set_device_major(0).unwrap();
494-
header.set_device_minor(0).unwrap();
495-
496-
let mode = if header.mode().unwrap() & 0o100 != 0 {
497-
0o755
498-
} else {
499-
0o644
500-
};
501-
header.set_mode(mode);
502-
}
503-
504474
fn tar(
505475
ws: &Workspace<'_>,
506476
ar_files: Vec<ArchiveFile>,
@@ -520,7 +490,6 @@ fn tar(
520490

521491
let base_name = format!("{}-{}", pkg.name(), pkg.version());
522492
let base_path = Path::new(&base_name);
523-
let time = timestamp();
524493
for ar_file in ar_files {
525494
let ArchiveFile {
526495
rel_path,
@@ -540,9 +509,7 @@ fn tar(
540509
let metadata = file.metadata().chain_err(|| {
541510
format!("could not learn metadata for: `{}`", disk_path.display())
542511
})?;
543-
header.set_metadata(&metadata);
544-
header.set_mtime(time);
545-
canonicalize_header(&mut header);
512+
header.set_metadata_in_mode(&metadata, HeaderMode::Deterministic);
546513
header.set_cksum();
547514
ar.append_data(&mut header, &ar_path, &mut file)
548515
.chain_err(|| {
@@ -557,9 +524,7 @@ fn tar(
557524
};
558525
header.set_entry_type(EntryType::file());
559526
header.set_mode(0o644);
560-
header.set_mtime(time);
561527
header.set_size(contents.len() as u64);
562-
canonicalize_header(&mut header);
563528
header.set_cksum();
564529
ar.append_data(&mut header, &ar_path, contents.as_bytes())
565530
.chain_err(|| format!("could not archive source file `{}`", rel_str))?;

tests/testsuite/package.rs

+4-11
Original file line numberDiff line numberDiff line change
@@ -1938,10 +1938,7 @@ fn reproducible_output() {
19381938
.file("src/main.rs", r#"fn main() { println!("hello"); }"#)
19391939
.build();
19401940

1941-
// Timestamp is arbitrary and is the same used by git format-patch.
1942-
p.cargo("package")
1943-
.env("SOURCE_DATE_EPOCH", "1000684800")
1944-
.run();
1941+
p.cargo("package").run();
19451942
assert!(p.root().join("target/package/foo-0.0.1.crate").is_file());
19461943

19471944
let f = File::open(&p.root().join("target/package/foo-0.0.1.crate")).unwrap();
@@ -1951,12 +1948,8 @@ fn reproducible_output() {
19511948
let ent = ent.unwrap();
19521949
let header = ent.header();
19531950
assert_eq!(header.mode().unwrap(), 0o644);
1954-
assert_eq!(header.uid().unwrap(), 0);
1955-
assert_eq!(header.gid().unwrap(), 0);
1956-
assert_eq!(header.mtime().unwrap(), 1000684800);
1957-
assert_eq!(header.username().unwrap().unwrap(), "root");
1958-
assert_eq!(header.groupname().unwrap().unwrap(), "root");
1959-
assert_eq!(header.device_major().unwrap().unwrap(), 0);
1960-
assert_eq!(header.device_minor().unwrap().unwrap(), 0);
1951+
assert_eq!(header.mtime().unwrap(), 0);
1952+
assert_eq!(header.username().unwrap().unwrap(), "");
1953+
assert_eq!(header.groupname().unwrap().unwrap(), "");
19611954
}
19621955
}

0 commit comments

Comments
 (0)