Skip to content

Commit 2e3da88

Browse files
authored
Merge pull request #7438 from karlmcdowall/head_perf2
head: rework handling of non-seekable files
2 parents ae6d4de + e1275f4 commit 2e3da88

File tree

2 files changed

+583
-109
lines changed

2 files changed

+583
-109
lines changed

src/uu/head/src/head.rs

+32-45
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ use thiserror::Error;
1717
use uucore::display::Quotable;
1818
use uucore::error::{FromIo, UError, UResult};
1919
use uucore::line_ending::LineEnding;
20-
use uucore::lines::lines;
2120
use uucore::{format_usage, help_about, help_usage, show};
2221

2322
const BUF_SIZE: usize = 65536;
@@ -37,7 +36,8 @@ mod options {
3736

3837
mod parse;
3938
mod take;
40-
use take::take_all_but;
39+
use take::copy_all_but_n_bytes;
40+
use take::copy_all_but_n_lines;
4141
use take::take_lines;
4242

4343
#[derive(Error, Debug)]
@@ -274,14 +274,16 @@ fn read_n_lines(input: &mut impl std::io::BufRead, n: u64, separator: u8) -> std
274274
let mut reader = take_lines(input, n, separator);
275275

276276
// Write those bytes to `stdout`.
277-
let mut stdout = std::io::stdout();
277+
let stdout = std::io::stdout();
278+
let stdout = stdout.lock();
279+
let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout);
278280

279-
let bytes_written = io::copy(&mut reader, &mut stdout).map_err(wrap_in_stdout_error)?;
281+
let bytes_written = io::copy(&mut reader, &mut writer).map_err(wrap_in_stdout_error)?;
280282

281283
// Make sure we finish writing everything to the target before
282284
// exiting. Otherwise, when Rust is implicitly flushing, any
283285
// error will be silently ignored.
284-
stdout.flush().map_err(wrap_in_stdout_error)?;
286+
writer.flush().map_err(wrap_in_stdout_error)?;
285287

286288
Ok(bytes_written)
287289
}
@@ -296,43 +298,37 @@ fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize>
296298
}
297299
}
298300

299-
fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result<u64> {
300-
let mut bytes_written = 0;
301+
fn read_but_last_n_bytes(mut input: impl Read, n: u64) -> std::io::Result<u64> {
302+
let mut bytes_written: u64 = 0;
301303
if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
302304
let stdout = std::io::stdout();
303-
let stdout = stdout.lock();
304-
// Even though stdout is buffered, it will flush on each newline in the
305-
// input stream. This can be costly, so add an extra layer of buffering
306-
// over the top. This gives a significant speedup (approx 4x).
307-
let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout);
308-
for byte in take_all_but(input.bytes(), n) {
309-
writer.write_all(&[byte?]).map_err(wrap_in_stdout_error)?;
310-
bytes_written += 1;
311-
}
305+
let mut stdout = stdout.lock();
306+
307+
bytes_written = copy_all_but_n_bytes(&mut input, &mut stdout, n)
308+
.map_err(wrap_in_stdout_error)?
309+
.try_into()
310+
.unwrap();
311+
312312
// Make sure we finish writing everything to the target before
313313
// exiting. Otherwise, when Rust is implicitly flushing, any
314314
// error will be silently ignored.
315-
writer.flush().map_err(wrap_in_stdout_error)?;
315+
stdout.flush().map_err(wrap_in_stdout_error)?;
316316
}
317317
Ok(bytes_written)
318318
}
319319

320-
fn read_but_last_n_lines(
321-
input: impl std::io::BufRead,
322-
n: u64,
323-
separator: u8,
324-
) -> std::io::Result<u64> {
320+
fn read_but_last_n_lines(mut input: impl Read, n: u64, separator: u8) -> std::io::Result<u64> {
321+
let stdout = std::io::stdout();
322+
let mut stdout = stdout.lock();
323+
if n == 0 {
324+
return io::copy(&mut input, &mut stdout).map_err(wrap_in_stdout_error);
325+
}
325326
let mut bytes_written: u64 = 0;
326327
if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
327-
let stdout = std::io::stdout();
328-
let mut stdout = stdout.lock();
329-
330-
for bytes in take_all_but(lines(input, separator), n) {
331-
let bytes = bytes?;
332-
bytes_written += u64::try_from(bytes.len()).unwrap();
333-
334-
stdout.write_all(&bytes).map_err(wrap_in_stdout_error)?;
335-
}
328+
bytes_written = copy_all_but_n_lines(input, &mut stdout, n, separator)
329+
.map_err(wrap_in_stdout_error)?
330+
.try_into()
331+
.unwrap();
336332
// Make sure we finish writing everything to the target before
337333
// exiting. Otherwise, when Rust is implicitly flushing, any
338334
// error will be silently ignored.
@@ -434,10 +430,9 @@ fn head_backwards_without_seek_file(
434430
input: &mut std::fs::File,
435431
options: &HeadOptions,
436432
) -> std::io::Result<u64> {
437-
let reader = std::io::BufReader::with_capacity(BUF_SIZE, &*input);
438433
match options.mode {
439-
Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n),
440-
Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into()),
434+
Mode::AllButLastBytes(n) => read_but_last_n_bytes(input, n),
435+
Mode::AllButLastLines(n) => read_but_last_n_lines(input, n, options.line_ending.into()),
441436
_ => unreachable!(),
442437
}
443438
}
@@ -452,28 +447,20 @@ fn head_backwards_on_seekable_file(
452447
if n >= size {
453448
Ok(0)
454449
} else {
455-
read_n_bytes(
456-
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
457-
size - n,
458-
)
450+
read_n_bytes(input, size - n)
459451
}
460452
}
461453
Mode::AllButLastLines(n) => {
462454
let found = find_nth_line_from_end(input, n, options.line_ending.into())?;
463-
read_n_bytes(
464-
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
465-
found,
466-
)
455+
read_n_bytes(input, found)
467456
}
468457
_ => unreachable!(),
469458
}
470459
}
471460

472461
fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<u64> {
473462
match options.mode {
474-
Mode::FirstBytes(n) => {
475-
read_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n)
476-
}
463+
Mode::FirstBytes(n) => read_n_bytes(input, n),
477464
Mode::FirstLines(n) => read_n_lines(
478465
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
479466
n,

0 commit comments

Comments
 (0)