Skip to content

Commit 8931d2c

Browse files
authoredMar 23, 2025··
Merge pull request #7521 from usamoi/ptx
ptx: fixes
2 parents 53d2772 + 412d2b3 commit 8931d2c

File tree

3 files changed

+155
-53
lines changed

3 files changed

+155
-53
lines changed
 

‎src/uu/ptx/src/ptx.rs

+79-53
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,12 @@ use std::fs::File;
1515
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
1616
use std::num::ParseIntError;
1717
use uucore::display::Quotable;
18-
use uucore::error::{FromIo, UError, UResult};
18+
use uucore::error::{FromIo, UError, UResult, UUsageError};
1919
use uucore::{format_usage, help_about, help_usage};
2020

2121
const USAGE: &str = help_usage!("ptx.md");
2222
const ABOUT: &str = help_about!("ptx.md");
2323

24-
const REGEX_CHARCLASS: &str = "^-]\\";
25-
2624
#[derive(Debug)]
2725
enum OutFormat {
2826
Dumb,
@@ -71,8 +69,12 @@ fn read_word_filter_file(
7169
.get_one::<String>(option)
7270
.expect("parsing options failed!")
7371
.to_string();
74-
let file = File::open(filename)?;
75-
let reader = BufReader::new(file);
72+
let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
73+
Box::new(stdin())
74+
} else {
75+
let file = File::open(filename)?;
76+
Box::new(file)
77+
});
7678
let mut words: HashSet<String> = HashSet::new();
7779
for word in reader.lines() {
7880
words.insert(word?);
@@ -88,7 +90,12 @@ fn read_char_filter_file(
8890
let filename = matches
8991
.get_one::<String>(option)
9092
.expect("parsing options failed!");
91-
let mut reader = File::open(filename)?;
93+
let mut reader: Box<dyn Read> = if filename == "-" {
94+
Box::new(stdin())
95+
} else {
96+
let file = File::open(filename)?;
97+
Box::new(file)
98+
};
9299
let mut buffer = String::new();
93100
reader.read_to_string(&mut buffer)?;
94101
Ok(buffer.chars().collect())
@@ -155,18 +162,10 @@ impl WordFilter {
155162
let reg = match arg_reg {
156163
Some(arg_reg) => arg_reg,
157164
None => {
158-
if break_set.is_some() {
165+
if let Some(break_set) = break_set {
159166
format!(
160167
"[^{}]+",
161-
break_set
162-
.unwrap()
163-
.into_iter()
164-
.map(|c| if REGEX_CHARCLASS.contains(c) {
165-
format!("\\{c}")
166-
} else {
167-
c.to_string()
168-
})
169-
.collect::<String>()
168+
regex::escape(&break_set.into_iter().collect::<String>())
170169
)
171170
} else if config.gnu_ext {
172171
"\\w+".to_owned()
@@ -260,10 +259,17 @@ fn get_config(matches: &clap::ArgMatches) -> UResult<Config> {
260259
.parse()
261260
.map_err(PtxError::ParseError)?;
262261
}
263-
if matches.get_flag(options::FORMAT_ROFF) {
262+
if let Some(format) = matches.get_one::<String>(options::FORMAT) {
263+
config.format = match format.as_str() {
264+
"roff" => OutFormat::Roff,
265+
"tex" => OutFormat::Tex,
266+
_ => unreachable!("should be caught by clap"),
267+
};
268+
}
269+
if matches.get_flag(options::format::ROFF) {
264270
config.format = OutFormat::Roff;
265271
}
266-
if matches.get_flag(options::FORMAT_TEX) {
272+
if matches.get_flag(options::format::TEX) {
267273
config.format = OutFormat::Tex;
268274
}
269275
Ok(config)
@@ -277,20 +283,10 @@ struct FileContent {
277283

278284
type FileMap = HashMap<String, FileContent>;
279285

280-
fn read_input(input_files: &[String], config: &Config) -> std::io::Result<FileMap> {
286+
fn read_input(input_files: &[String]) -> std::io::Result<FileMap> {
281287
let mut file_map: FileMap = HashMap::new();
282-
let mut files = Vec::new();
283-
if input_files.is_empty() {
284-
files.push("-");
285-
} else if config.gnu_ext {
286-
for file in input_files {
287-
files.push(file);
288-
}
289-
} else {
290-
files.push(&input_files[0]);
291-
}
292288
let mut offset: usize = 0;
293-
for filename in files {
289+
for filename in input_files {
294290
let reader: BufReader<Box<dyn Read>> = BufReader::new(if filename == "-" {
295291
Box::new(stdin())
296292
} else {
@@ -344,7 +340,7 @@ fn create_word_set(config: &Config, filter: &WordFilter, file_map: &FileMap) ->
344340
continue;
345341
}
346342
if config.ignore_case {
347-
word = word.to_lowercase();
343+
word = word.to_uppercase();
348344
}
349345
word_set.insert(WordRef {
350346
word,
@@ -693,15 +689,19 @@ fn write_traditional_output(
693689
}
694690

695691
mod options {
692+
pub mod format {
693+
pub static ROFF: &str = "roff";
694+
pub static TEX: &str = "tex";
695+
}
696+
696697
pub static FILE: &str = "file";
697698
pub static AUTO_REFERENCE: &str = "auto-reference";
698699
pub static TRADITIONAL: &str = "traditional";
699700
pub static FLAG_TRUNCATION: &str = "flag-truncation";
700701
pub static MACRO_NAME: &str = "macro-name";
701-
pub static FORMAT_ROFF: &str = "format=roff";
702+
pub static FORMAT: &str = "format";
702703
pub static RIGHT_SIDE_REFS: &str = "right-side-refs";
703704
pub static SENTENCE_REGEXP: &str = "sentence-regexp";
704-
pub static FORMAT_TEX: &str = "format=tex";
705705
pub static WORD_REGEXP: &str = "word-regexp";
706706
pub static BREAK_FILE: &str = "break-file";
707707
pub static IGNORE_CASE: &str = "ignore-case";
@@ -715,21 +715,40 @@ mod options {
715715
#[uucore::main]
716716
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
717717
let matches = uu_app().try_get_matches_from(args)?;
718+
let config = get_config(&matches)?;
718719

719-
let mut input_files: Vec<String> = match &matches.get_many::<String>(options::FILE) {
720-
Some(v) => v.clone().cloned().collect(),
721-
None => vec!["-".to_string()],
722-
};
720+
let input_files;
721+
let output_file;
722+
723+
let mut files = matches
724+
.get_many::<String>(options::FILE)
725+
.into_iter()
726+
.flatten()
727+
.cloned();
728+
729+
if !config.gnu_ext {
730+
input_files = vec![files.next().unwrap_or("-".to_string())];
731+
output_file = files.next().unwrap_or("-".to_string());
732+
if let Some(file) = files.next() {
733+
return Err(UUsageError::new(
734+
1,
735+
format!("extra operand {}", file.quote()),
736+
));
737+
}
738+
} else {
739+
input_files = {
740+
let mut files = files.collect::<Vec<_>>();
741+
if files.is_empty() {
742+
files.push("-".to_string());
743+
}
744+
files
745+
};
746+
output_file = "-".to_string();
747+
}
723748

724-
let config = get_config(&matches)?;
725749
let word_filter = WordFilter::new(&matches, &config)?;
726-
let file_map = read_input(&input_files, &config).map_err_context(String::new)?;
750+
let file_map = read_input(&input_files).map_err_context(String::new)?;
727751
let word_set = create_word_set(&config, &word_filter, &file_map);
728-
let output_file = if !config.gnu_ext && input_files.len() == 2 {
729-
input_files.pop().unwrap()
730-
} else {
731-
"-".to_string()
732-
};
733752
write_traditional_output(&config, &file_map, &word_set, &output_file)
734753
}
735754

@@ -774,10 +793,24 @@ pub fn uu_app() -> Command {
774793
.value_name("STRING"),
775794
)
776795
.arg(
777-
Arg::new(options::FORMAT_ROFF)
796+
Arg::new(options::FORMAT)
797+
.long(options::FORMAT)
798+
.hide(true)
799+
.value_parser(["roff", "tex"])
800+
.overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX]),
801+
)
802+
.arg(
803+
Arg::new(options::format::ROFF)
778804
.short('O')
779-
.long(options::FORMAT_ROFF)
780805
.help("generate output as roff directives")
806+
.overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX])
807+
.action(ArgAction::SetTrue),
808+
)
809+
.arg(
810+
Arg::new(options::format::TEX)
811+
.short('T')
812+
.help("generate output as TeX directives")
813+
.overrides_with_all([options::FORMAT, options::format::ROFF, options::format::TEX])
781814
.action(ArgAction::SetTrue),
782815
)
783816
.arg(
@@ -794,13 +827,6 @@ pub fn uu_app() -> Command {
794827
.help("for end of lines or end of sentences")
795828
.value_name("REGEXP"),
796829
)
797-
.arg(
798-
Arg::new(options::FORMAT_TEX)
799-
.short('T')
800-
.long(options::FORMAT_TEX)
801-
.help("generate output as TeX directives")
802-
.action(ArgAction::SetTrue),
803-
)
804830
.arg(
805831
Arg::new(options::WORD_REGEXP)
806832
.short('W')

‎tests/by-util/test_ptx.rs

+48
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
//
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
5+
// spell-checker:ignore roff
56
use crate::common::util::TestScenario;
67

78
#[test]
@@ -112,3 +113,50 @@ fn gnu_ext_disabled_empty_word_regexp_ignores_break_file() {
112113
.succeeds()
113114
.stdout_only_fixture("gnu_ext_disabled_rightward_no_ref.expected");
114115
}
116+
117+
#[test]
118+
fn test_reject_too_many_operands() {
119+
new_ucmd!().args(&["-G", "-", "-", "-"]).fails_with_code(1);
120+
}
121+
122+
#[test]
123+
fn test_break_file_regex_escaping() {
124+
new_ucmd!()
125+
.pipe_in("\\.+*?()|[]{}^$#&-~")
126+
.args(&["-G", "-b", "-", "input"])
127+
.succeeds()
128+
.stdout_only_fixture("break_file_regex_escaping.expected");
129+
}
130+
131+
#[test]
132+
fn test_ignore_case() {
133+
new_ucmd!()
134+
.args(&["-G", "-f"])
135+
.pipe_in("a _")
136+
.succeeds()
137+
.stdout_only(".xx \"\" \"\" \"a _\" \"\"\n.xx \"\" \"a\" \"_\" \"\"\n");
138+
}
139+
140+
#[test]
141+
fn test_format() {
142+
new_ucmd!()
143+
.args(&["-G", "-O"])
144+
.pipe_in("a")
145+
.succeeds()
146+
.stdout_only(".xx \"\" \"\" \"a\" \"\"\n");
147+
new_ucmd!()
148+
.args(&["-G", "-T"])
149+
.pipe_in("a")
150+
.succeeds()
151+
.stdout_only("\\xx {}{}{a}{}{}\n");
152+
new_ucmd!()
153+
.args(&["-G", "--format=roff"])
154+
.pipe_in("a")
155+
.succeeds()
156+
.stdout_only(".xx \"\" \"\" \"a\" \"\"\n");
157+
new_ucmd!()
158+
.args(&["-G", "--format=tex"])
159+
.pipe_in("a")
160+
.succeeds()
161+
.stdout_only("\\xx {}{}{a}{}{}\n");
162+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
.xx "" "" """quotes"", for roff" ""
2+
.xx "" "and some other like" "%a, b#, c$c" ""
3+
.xx "" "and some other like %a, b#" ", c$c" ""
4+
.xx "" "maybe" "also~or^" ""
5+
.xx "" "" "and some other like %a, b#, c$c" ""
6+
.xx "" "oh," "and back\slash" ""
7+
.xx "" "and some other like %a," "b#, c$c" ""
8+
.xx "" "oh, and" "back\slash" ""
9+
.xx "" "{" "brackets} for tex" ""
10+
.xx "" "and some other like %a, b#," "c$c" ""
11+
.xx "" "and some other like %a, b#, c$" "c" ""
12+
.xx "" "let's check special" "characters:" ""
13+
.xx "" "let's" "check special characters:" ""
14+
.xx "" """quotes""," "for roff" ""
15+
.xx "" "{brackets}" "for tex" ""
16+
.xx "" "" "hello world!" ""
17+
.xx "" "" "let's check special characters:" ""
18+
.xx "" "and some other" "like %a, b#, c$c" ""
19+
.xx "" "" "maybe also~or^" ""
20+
.xx "" "" "oh, and back\slash" ""
21+
.xx "" "maybe also~" "or^" ""
22+
.xx "" "and some" "other like %a, b#, c$c" ""
23+
.xx "" """quotes"", for" "roff" ""
24+
.xx "" "oh, and back\" "slash" ""
25+
.xx "" "and" "some other like %a, b#, c$c" ""
26+
.xx "" "let's check" "special characters:" ""
27+
.xx "" "{brackets} for" "tex" ""
28+
.xx "" "hello" "world!" ""

0 commit comments

Comments
 (0)
Please sign in to comment.