|
3 | 3 | // For the full copyright and license information, please view the LICENSE
|
4 | 4 | // file that was distributed with this source code.
|
5 | 5 |
|
6 |
| -// spell-checker:ignore (vars) seekable |
| 6 | +// spell-checker:ignore (vars) seekable memrchr |
7 | 7 |
|
8 | 8 | use clap::{Arg, ArgAction, ArgMatches, Command};
|
| 9 | +use memchr::memrchr_iter; |
9 | 10 | use std::ffi::OsString;
|
10 | 11 | #[cfg(unix)]
|
11 | 12 | use std::fs::File;
|
@@ -378,30 +379,50 @@ where
|
378 | 379 |
|
379 | 380 | let mut buffer = [0u8; BUF_SIZE];
|
380 | 381 |
|
381 |
| - let mut i = 0u64; |
382 | 382 | let mut lines = 0u64;
|
| 383 | + let mut check_last_byte_first_loop = true; |
| 384 | + let mut bytes_remaining_to_search = file_size; |
383 | 385 |
|
384 | 386 | loop {
|
385 | 387 | // the casts here are ok, `buffer.len()` should never be above a few k
|
386 |
| - let bytes_remaining_to_search = file_size - i; |
387 |
| - let bytes_to_read_this_loop = bytes_remaining_to_search.min(BUF_SIZE.try_into().unwrap()); |
| 388 | + let bytes_to_read_this_loop = |
| 389 | + bytes_remaining_to_search.min(buffer.len().try_into().unwrap()); |
388 | 390 | let read_start_offset = bytes_remaining_to_search - bytes_to_read_this_loop;
|
389 | 391 | let buffer = &mut buffer[..bytes_to_read_this_loop.try_into().unwrap()];
|
| 392 | + bytes_remaining_to_search -= bytes_to_read_this_loop; |
390 | 393 |
|
391 | 394 | input.seek(SeekFrom::Start(read_start_offset))?;
|
392 | 395 | input.read_exact(buffer)?;
|
393 |
| - for byte in buffer.iter().rev() { |
394 |
| - if byte == &separator { |
395 |
| - lines += 1; |
396 |
| - } |
397 |
| - // if it were just `n`, |
| 396 | + |
| 397 | + // Unfortunately need special handling for the case that the input file doesn't have |
| 398 | + // a terminating `separator` character. |
| 399 | + // If the input file doesn't end with a `separator` character, add an extra line to our |
| 400 | + // `line` counter. In the case that `n` is 0 we need to return here since we've |
| 401 | + // obviously found our 0th-line-from-the-end offset. |
| 402 | + if check_last_byte_first_loop { |
| 403 | + check_last_byte_first_loop = false; |
| 404 | + if let Some(last_byte_of_file) = buffer.last() { |
| 405 | + if last_byte_of_file != &separator { |
| 406 | + if n == 0 { |
| 407 | + input.rewind()?; |
| 408 | + return Ok(file_size); |
| 409 | + } |
| 410 | + assert_eq!(lines, 0); |
| 411 | + lines = 1; |
| 412 | + } |
| 413 | + }; |
| 414 | + } |
| 415 | + |
| 416 | + for separator_offset in memrchr_iter(separator, &buffer[..]) { |
| 417 | + lines += 1; |
398 | 418 | if lines == n + 1 {
|
399 | 419 | input.rewind()?;
|
400 |
| - return Ok(file_size - i); |
| 420 | + return Ok(read_start_offset |
| 421 | + + TryInto::<u64>::try_into(separator_offset).unwrap() |
| 422 | + + 1); |
401 | 423 | }
|
402 |
| - i += 1; |
403 | 424 | }
|
404 |
| - if file_size - i == 0 { |
| 425 | + if read_start_offset == 0 { |
405 | 426 | input.rewind()?;
|
406 | 427 | return Ok(0);
|
407 | 428 | }
|
@@ -753,4 +774,23 @@ mod tests {
|
753 | 774 | 0
|
754 | 775 | );
|
755 | 776 | }
|
| 777 | + |
| 778 | + #[test] |
| 779 | + fn test_find_nth_line_from_end_non_terminated() { |
| 780 | + // Validate the find_nth_line_from_end for files that are not terminated with a final |
| 781 | + // newline character. |
| 782 | + let input_file = "a\nb"; |
| 783 | + let mut input = Cursor::new(input_file); |
| 784 | + assert_eq!(find_nth_line_from_end(&mut input, 0, b'\n').unwrap(), 3); |
| 785 | + assert_eq!(find_nth_line_from_end(&mut input, 1, b'\n').unwrap(), 2); |
| 786 | + } |
| 787 | + |
| 788 | + #[test] |
| 789 | + fn test_find_nth_line_from_end_empty() { |
| 790 | + // Validate the find_nth_line_from_end for files that are empty. |
| 791 | + let input_file = ""; |
| 792 | + let mut input = Cursor::new(input_file); |
| 793 | + assert_eq!(find_nth_line_from_end(&mut input, 0, b'\n').unwrap(), 0); |
| 794 | + assert_eq!(find_nth_line_from_end(&mut input, 1, b'\n').unwrap(), 0); |
| 795 | + } |
756 | 796 | }
|
0 commit comments