Skip to content

Commit 5342d40

Browse files
committed
Auto merge of #51050 - symphorien:fstatat, r=kennytm
std::fs::DirEntry.metadata(): use fstatat instead of lstat when possible When reading a directory with `read_dir`, querying metadata for a resulting `DirEntry` is done by building the whole path and then `lstat`ing it, which requires the kernel to resolve the whole path. Instead, one can use the file descriptor to the enumerated directory and use `fstatat`. This make the resolving step unnecessary. This PR implements using `fstatat` on linux, android and emscripten. ## Compatibility across targets `fstatat` is POSIX. * Linux >= 2.6.19 according to https://linux.die.net/man/2/fstatat * android according to https://android.googlesource.com/platform/bionic/+/master/libc/libc.map.txt#392 * emscripten according to https://github.com/kripken/emscripten/blob/7f89560101843198787530731f40a65288f6f15f/system/include/libc/sys/stat.h#L76 The man page says "A similar system call exists on Solaris." but I haven't found it. ## Compatibility with old platforms This was introduced with glibc 2.4 according to the man page. The only information I could find about the minimal version of glibc rust must support is this discussion https://internals.rust-lang.org/t/bumping-glibc-requirements-for-the-rust-toolchain/5111/10 The conclusion, if I understand correctly, is that currently rust supports glibc >= 2.3.4 but the "real" requirement is Centos 5 with glibc 2.5. This PR would make the minimal version 2.4, so this should be fine. ## Benefit I did the following silly benchmark: ```rust use std::io; use std::fs; use std::os::linux::fs::MetadataExt; use std::time::Instant; fn main() -> Result<(), io::Error> { let mut n = 0; let mut size = 0; let start = Instant::now(); for entry in fs::read_dir("/nix/store/.links")? { let entry = entry?; let stat = entry.metadata()?; size += stat.st_size(); n+=1; } println!("{} files, size {}, time {:?}", n, size, Instant::now().duration_since(start)); Ok(()) } ``` On warm cache, with current rust nightly: ``` 1014099 files, size 76895290022, time Duration { secs: 2, nanos: 65832118 } ``` (between 2.1 and 2.9 seconds usually) With this PR: ``` 1014099 files, size 76895290022, time Duration { secs: 1, nanos: 581662953 } ``` (1.5 to 1.6 seconds usually). approximately 40% faster :) On cold cache there is not much to gain because path lookup (which we spare) would have been a cache hit: Before ``` 1014099 files, size 76895290022, time Duration { secs: 391, nanos: 739874992 } ``` After ``` 1014099 files, size 76895290022, time Duration { secs: 388, nanos: 431567396 } ``` ## Testing The tests were run on linux `x86_64` ``` python x.py test src/tools/tidy ./x.py test src/libstd ``` and the above benchmark. I did not test any other target.
2 parents 6de4ec6 + 8dec03b commit 5342d40

File tree

1 file changed

+33
-12
lines changed

1 file changed

+33
-12
lines changed

src/libstd/sys/unix/fs.rs

+33-12
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ use sys_common::{AsInner, FromInner};
2525

2626
#[cfg(any(target_os = "linux", target_os = "emscripten", target_os = "l4re"))]
2727
use libc::{stat64, fstat64, lstat64, off64_t, ftruncate64, lseek64, dirent64, readdir64_r, open64};
28+
#[cfg(any(target_os = "linux", target_os = "emscripten", target_os = "android"))]
29+
use libc::{fstatat, dirfd};
2830
#[cfg(target_os = "android")]
2931
use libc::{stat as stat64, fstat as fstat64, lstat as lstat64, lseek64,
3032
dirent as dirent64, open as open64};
@@ -48,20 +50,24 @@ pub struct FileAttr {
4850
stat: stat64,
4951
}
5052

51-
pub struct ReadDir {
53+
// all DirEntry's will have a reference to this struct
54+
struct InnerReadDir {
5255
dirp: Dir,
53-
root: Arc<PathBuf>,
56+
root: PathBuf,
5457
}
5558

59+
#[derive(Clone)]
60+
pub struct ReadDir(Arc<InnerReadDir>);
61+
5662
struct Dir(*mut libc::DIR);
5763

5864
unsafe impl Send for Dir {}
5965
unsafe impl Sync for Dir {}
6066

6167
pub struct DirEntry {
6268
entry: dirent64,
63-
root: Arc<PathBuf>,
64-
// We need to store an owned copy of the directory name
69+
dir: ReadDir,
70+
// We need to store an owned copy of the entry name
6571
// on Solaris and Fuchsia because a) it uses a zero-length
6672
// array to store the name, b) its lifetime between readdir
6773
// calls is not guaranteed.
@@ -207,7 +213,7 @@ impl fmt::Debug for ReadDir {
207213
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
208214
// This will only be called from std::fs::ReadDir, which will add a "ReadDir()" frame.
209215
// Thus the result will be e g 'ReadDir("/home")'
210-
fmt::Debug::fmt(&*self.root, f)
216+
fmt::Debug::fmt(&*self.0.root, f)
211217
}
212218
}
213219

@@ -223,7 +229,7 @@ impl Iterator for ReadDir {
223229
// is safe to use in threaded applications and it is generally preferred
224230
// over the readdir_r(3C) function.
225231
super::os::set_errno(0);
226-
let entry_ptr = libc::readdir(self.dirp.0);
232+
let entry_ptr = libc::readdir(self.0.dirp.0);
227233
if entry_ptr.is_null() {
228234
// NULL can mean either the end is reached or an error occurred.
229235
// So we had to clear errno beforehand to check for an error now.
@@ -240,7 +246,7 @@ impl Iterator for ReadDir {
240246
entry: *entry_ptr,
241247
name: ::slice::from_raw_parts(name as *const u8,
242248
namelen as usize).to_owned().into_boxed_slice(),
243-
root: self.root.clone()
249+
dir: self.clone()
244250
};
245251
if ret.name_bytes() != b"." && ret.name_bytes() != b".." {
246252
return Some(Ok(ret))
@@ -254,11 +260,11 @@ impl Iterator for ReadDir {
254260
unsafe {
255261
let mut ret = DirEntry {
256262
entry: mem::zeroed(),
257-
root: self.root.clone()
263+
dir: self.clone(),
258264
};
259265
let mut entry_ptr = ptr::null_mut();
260266
loop {
261-
if readdir64_r(self.dirp.0, &mut ret.entry, &mut entry_ptr) != 0 {
267+
if readdir64_r(self.0.dirp.0, &mut ret.entry, &mut entry_ptr) != 0 {
262268
return Some(Err(Error::last_os_error()))
263269
}
264270
if entry_ptr.is_null() {
@@ -281,13 +287,27 @@ impl Drop for Dir {
281287

282288
impl DirEntry {
283289
pub fn path(&self) -> PathBuf {
284-
self.root.join(OsStr::from_bytes(self.name_bytes()))
290+
self.dir.0.root.join(OsStr::from_bytes(self.name_bytes()))
285291
}
286292

287293
pub fn file_name(&self) -> OsString {
288294
OsStr::from_bytes(self.name_bytes()).to_os_string()
289295
}
290296

297+
#[cfg(any(target_os = "linux", target_os = "emscripten", target_os = "android"))]
298+
pub fn metadata(&self) -> io::Result<FileAttr> {
299+
let fd = cvt(unsafe {dirfd(self.dir.0.dirp.0)})?;
300+
let mut stat: stat64 = unsafe { mem::zeroed() };
301+
cvt(unsafe {
302+
fstatat(fd,
303+
self.entry.d_name.as_ptr(),
304+
&mut stat as *mut _ as *mut _,
305+
libc::AT_SYMLINK_NOFOLLOW)
306+
})?;
307+
Ok(FileAttr { stat: stat })
308+
}
309+
310+
#[cfg(not(any(target_os = "linux", target_os = "emscripten", target_os = "android")))]
291311
pub fn metadata(&self) -> io::Result<FileAttr> {
292312
lstat(&self.path())
293313
}
@@ -664,14 +684,15 @@ impl fmt::Debug for File {
664684
}
665685

666686
pub fn readdir(p: &Path) -> io::Result<ReadDir> {
667-
let root = Arc::new(p.to_path_buf());
687+
let root = p.to_path_buf();
668688
let p = cstr(p)?;
669689
unsafe {
670690
let ptr = libc::opendir(p.as_ptr());
671691
if ptr.is_null() {
672692
Err(Error::last_os_error())
673693
} else {
674-
Ok(ReadDir { dirp: Dir(ptr), root: root })
694+
let inner = InnerReadDir { dirp: Dir(ptr), root };
695+
Ok(ReadDir(Arc::new(inner)))
675696
}
676697
}
677698
}

0 commit comments

Comments
 (0)