Skip to content

Commit 7d03617

Browse files
committed
Auto merge of #56568 - notriddle:master, r=alexcrichton
Remove dependency on shell32.dll Closes #56510 if it works on MinGW (I've only tested it on MSVC).
2 parents f4b07e0 + 83fe6e4 commit 7d03617

File tree

4 files changed

+210
-54
lines changed

4 files changed

+210
-54
lines changed

src/libstd/build.rs

-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ fn main() {
6868
println!("cargo:rustc-link-lib=advapi32");
6969
println!("cargo:rustc-link-lib=ws2_32");
7070
println!("cargo:rustc-link-lib=userenv");
71-
println!("cargo:rustc-link-lib=shell32");
7271
} else if target.contains("fuchsia") {
7372
println!("cargo:rustc-link-lib=zircon");
7473
println!("cargo:rustc-link-lib=fdio");

src/libstd/sys/windows/args.rs

+209-49
Original file line numberDiff line numberDiff line change
@@ -11,33 +11,161 @@
1111
#![allow(dead_code)] // runtime init functions not used during testing
1212

1313
use os::windows::prelude::*;
14+
use sys::windows::os::current_exe;
1415
use sys::c;
15-
use slice;
16-
use ops::Range;
1716
use ffi::OsString;
18-
use libc::{c_int, c_void};
1917
use fmt;
18+
use vec;
19+
use core::iter;
20+
use slice;
21+
use path::PathBuf;
2022

2123
pub unsafe fn init(_argc: isize, _argv: *const *const u8) { }
2224

2325
pub unsafe fn cleanup() { }
2426

2527
pub fn args() -> Args {
2628
unsafe {
27-
let mut nArgs: c_int = 0;
28-
let lpCmdLine = c::GetCommandLineW();
29-
let szArgList = c::CommandLineToArgvW(lpCmdLine, &mut nArgs);
30-
31-
// szArcList can be NULL if CommandLinToArgvW failed,
32-
// but in that case nArgs is 0 so we won't actually
33-
// try to read a null pointer
34-
Args { cur: szArgList, range: 0..(nArgs as isize) }
29+
let lp_cmd_line = c::GetCommandLineW();
30+
let parsed_args_list = parse_lp_cmd_line(
31+
lp_cmd_line as *const u16,
32+
|| current_exe().map(PathBuf::into_os_string).unwrap_or_else(|_| OsString::new()));
33+
34+
Args { parsed_args_list: parsed_args_list.into_iter() }
3535
}
3636
}
3737

38+
/// Implements the Windows command-line argument parsing algorithm.
39+
///
40+
/// Microsoft's documentation for the Windows CLI argument format can be found at
41+
/// <https://docs.microsoft.com/en-us/previous-versions//17w5ykft(v=vs.85)>.
42+
///
43+
/// Windows includes a function to do this in shell32.dll,
44+
/// but linking with that DLL causes the process to be registered as a GUI application.
45+
/// GUI applications add a bunch of overhead, even if no windows are drawn. See
46+
/// <https://randomascii.wordpress.com/2018/12/03/a-not-called-function-can-cause-a-5x-slowdown/>.
47+
///
48+
/// This function was tested for equivalence to the shell32.dll implementation in
49+
/// Windows 10 Pro v1803, using an exhaustive test suite available at
50+
/// <https://gist.github.com/notriddle/dde431930c392e428055b2dc22e638f5> or
51+
/// <https://paste.gg/p/anonymous/47d6ed5f5bd549168b1c69c799825223>.
52+
unsafe fn parse_lp_cmd_line<F: Fn() -> OsString>(lp_cmd_line: *const u16, exe_name: F)
53+
-> Vec<OsString> {
54+
const BACKSLASH: u16 = '\\' as u16;
55+
const QUOTE: u16 = '"' as u16;
56+
const TAB: u16 = '\t' as u16;
57+
const SPACE: u16 = ' ' as u16;
58+
let mut ret_val = Vec::new();
59+
if lp_cmd_line.is_null() || *lp_cmd_line == 0 {
60+
ret_val.push(exe_name());
61+
return ret_val;
62+
}
63+
let mut cmd_line = {
64+
let mut end = 0;
65+
while *lp_cmd_line.offset(end) != 0 {
66+
end += 1;
67+
}
68+
slice::from_raw_parts(lp_cmd_line, end as usize)
69+
};
70+
// The executable name at the beginning is special.
71+
cmd_line = match cmd_line[0] {
72+
// The executable name ends at the next quote mark,
73+
// no matter what.
74+
QUOTE => {
75+
let args = {
76+
let mut cut = cmd_line[1..].splitn(2, |&c| c == QUOTE);
77+
if let Some(exe) = cut.next() {
78+
ret_val.push(OsString::from_wide(exe));
79+
}
80+
cut.next()
81+
};
82+
if let Some(args) = args {
83+
args
84+
} else {
85+
return ret_val;
86+
}
87+
}
88+
// Implement quirk: when they say whitespace here,
89+
// they include the entire ASCII control plane:
90+
// "However, if lpCmdLine starts with any amount of whitespace, CommandLineToArgvW
91+
// will consider the first argument to be an empty string. Excess whitespace at the
92+
// end of lpCmdLine is ignored."
93+
0...SPACE => {
94+
ret_val.push(OsString::new());
95+
&cmd_line[1..]
96+
},
97+
// The executable name ends at the next whitespace,
98+
// no matter what.
99+
_ => {
100+
let args = {
101+
let mut cut = cmd_line.splitn(2, |&c| c > 0 && c <= SPACE);
102+
if let Some(exe) = cut.next() {
103+
ret_val.push(OsString::from_wide(exe));
104+
}
105+
cut.next()
106+
};
107+
if let Some(args) = args {
108+
args
109+
} else {
110+
return ret_val;
111+
}
112+
}
113+
};
114+
let mut cur = Vec::new();
115+
let mut in_quotes = false;
116+
let mut was_in_quotes = false;
117+
let mut backslash_count: usize = 0;
118+
for &c in cmd_line {
119+
match c {
120+
// backslash
121+
BACKSLASH => {
122+
backslash_count += 1;
123+
was_in_quotes = false;
124+
},
125+
QUOTE if backslash_count % 2 == 0 => {
126+
cur.extend(iter::repeat(b'\\' as u16).take(backslash_count / 2));
127+
backslash_count = 0;
128+
if was_in_quotes {
129+
cur.push('"' as u16);
130+
was_in_quotes = false;
131+
} else {
132+
was_in_quotes = in_quotes;
133+
in_quotes = !in_quotes;
134+
}
135+
}
136+
QUOTE if backslash_count % 2 != 0 => {
137+
cur.extend(iter::repeat(b'\\' as u16).take(backslash_count / 2));
138+
backslash_count = 0;
139+
was_in_quotes = false;
140+
cur.push(b'"' as u16);
141+
}
142+
SPACE | TAB if !in_quotes => {
143+
cur.extend(iter::repeat(b'\\' as u16).take(backslash_count));
144+
if !cur.is_empty() || was_in_quotes {
145+
ret_val.push(OsString::from_wide(&cur[..]));
146+
cur.truncate(0);
147+
}
148+
backslash_count = 0;
149+
was_in_quotes = false;
150+
}
151+
_ => {
152+
cur.extend(iter::repeat(b'\\' as u16).take(backslash_count));
153+
backslash_count = 0;
154+
was_in_quotes = false;
155+
cur.push(c);
156+
}
157+
}
158+
}
159+
cur.extend(iter::repeat(b'\\' as u16).take(backslash_count));
160+
// include empty quoted strings at the end of the arguments list
161+
if !cur.is_empty() || was_in_quotes || in_quotes {
162+
ret_val.push(OsString::from_wide(&cur[..]));
163+
}
164+
ret_val
165+
}
166+
38167
pub struct Args {
39-
range: Range<isize>,
40-
cur: *mut *mut u16,
168+
parsed_args_list: vec::IntoIter<OsString>,
41169
}
42170

43171
pub struct ArgsInnerDebug<'a> {
@@ -46,19 +174,7 @@ pub struct ArgsInnerDebug<'a> {
46174

47175
impl<'a> fmt::Debug for ArgsInnerDebug<'a> {
48176
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
49-
f.write_str("[")?;
50-
let mut first = true;
51-
for i in self.args.range.clone() {
52-
if !first {
53-
f.write_str(", ")?;
54-
}
55-
first = false;
56-
57-
// Here we do allocation which could be avoided.
58-
fmt::Debug::fmt(&unsafe { os_string_from_ptr(*self.args.cur.offset(i)) }, f)?;
59-
}
60-
f.write_str("]")?;
61-
Ok(())
177+
self.args.parsed_args_list.as_slice().fmt(f)
62178
}
63179
}
64180

@@ -70,38 +186,82 @@ impl Args {
70186
}
71187
}
72188

73-
unsafe fn os_string_from_ptr(ptr: *mut u16) -> OsString {
74-
let mut len = 0;
75-
while *ptr.offset(len) != 0 { len += 1; }
76-
77-
// Push it onto the list.
78-
let ptr = ptr as *const u16;
79-
let buf = slice::from_raw_parts(ptr, len as usize);
80-
OsStringExt::from_wide(buf)
81-
}
82-
83189
impl Iterator for Args {
84190
type Item = OsString;
85-
fn next(&mut self) -> Option<OsString> {
86-
self.range.next().map(|i| unsafe { os_string_from_ptr(*self.cur.offset(i)) } )
87-
}
88-
fn size_hint(&self) -> (usize, Option<usize>) { self.range.size_hint() }
191+
fn next(&mut self) -> Option<OsString> { self.parsed_args_list.next() }
192+
fn size_hint(&self) -> (usize, Option<usize>) { self.parsed_args_list.size_hint() }
89193
}
90194

91195
impl DoubleEndedIterator for Args {
92-
fn next_back(&mut self) -> Option<OsString> {
93-
self.range.next_back().map(|i| unsafe { os_string_from_ptr(*self.cur.offset(i)) } )
94-
}
196+
fn next_back(&mut self) -> Option<OsString> { self.parsed_args_list.next_back() }
95197
}
96198

97199
impl ExactSizeIterator for Args {
98-
fn len(&self) -> usize { self.range.len() }
200+
fn len(&self) -> usize { self.parsed_args_list.len() }
99201
}
100202

101-
impl Drop for Args {
102-
fn drop(&mut self) {
103-
// self.cur can be null if CommandLineToArgvW previously failed,
104-
// but LocalFree ignores NULL pointers
105-
unsafe { c::LocalFree(self.cur as *mut c_void); }
203+
#[cfg(test)]
204+
mod tests {
205+
use sys::windows::args::*;
206+
use ffi::OsString;
207+
208+
fn chk(string: &str, parts: &[&str]) {
209+
let mut wide: Vec<u16> = OsString::from(string).encode_wide().collect();
210+
wide.push(0);
211+
let parsed = unsafe {
212+
parse_lp_cmd_line(wide.as_ptr() as *const u16, || OsString::from("TEST.EXE"))
213+
};
214+
let expected: Vec<OsString> = parts.iter().map(|k| OsString::from(k)).collect();
215+
assert_eq!(parsed.as_slice(), expected.as_slice());
216+
}
217+
218+
#[test]
219+
fn empty() {
220+
chk("", &["TEST.EXE"]);
221+
chk("\0", &["TEST.EXE"]);
222+
}
223+
224+
#[test]
225+
fn single_words() {
226+
chk("EXE one_word", &["EXE", "one_word"]);
227+
chk("EXE a", &["EXE", "a"]);
228+
chk("EXE 😅", &["EXE", "😅"]);
229+
chk("EXE 😅🤦", &["EXE", "😅🤦"]);
230+
}
231+
232+
#[test]
233+
fn official_examples() {
234+
chk(r#"EXE "abc" d e"#, &["EXE", "abc", "d", "e"]);
235+
chk(r#"EXE a\\\b d"e f"g h"#, &["EXE", r#"a\\\b"#, "de fg", "h"]);
236+
chk(r#"EXE a\\\"b c d"#, &["EXE", r#"a\"b"#, "c", "d"]);
237+
chk(r#"EXE a\\\\"b c" d e"#, &["EXE", r#"a\\b c"#, "d", "e"]);
238+
}
239+
240+
#[test]
241+
fn whitespace_behavior() {
242+
chk(r#" test"#, &["", "test"]);
243+
chk(r#" test"#, &["", "test"]);
244+
chk(r#" test test2"#, &["", "test", "test2"]);
245+
chk(r#" test test2"#, &["", "test", "test2"]);
246+
chk(r#"test test2 "#, &["test", "test2"]);
247+
chk(r#"test test2 "#, &["test", "test2"]);
248+
chk(r#"test "#, &["test"]);
249+
}
250+
251+
#[test]
252+
fn genius_quotes() {
253+
chk(r#"EXE "" """#, &["EXE", "", ""]);
254+
chk(r#"EXE "" """"#, &["EXE", "", "\""]);
255+
chk(
256+
r#"EXE "this is """all""" in the same argument""#,
257+
&["EXE", "this is \"all\" in the same argument"]
258+
);
259+
chk(r#"EXE "a"""#, &["EXE", "a\""]);
260+
chk(r#"EXE "a"" a"#, &["EXE", "a\"", "a"]);
261+
// quotes cannot be escaped in command names
262+
chk(r#""EXE" check"#, &["EXE", "check"]);
263+
chk(r#""EXE check""#, &["EXE check"]);
264+
chk(r#""EXE """for""" check"#, &["EXE ", r#"for""#, "check"]);
265+
chk(r#""EXE \"for\" check"#, &[r#"EXE \"#, r#"for""#, "check"]);
106266
}
107267
}

src/libstd/sys/windows/c.rs

-3
Original file line numberDiff line numberDiff line change
@@ -1035,9 +1035,6 @@ extern "system" {
10351035

10361036
pub fn SetLastError(dwErrCode: DWORD);
10371037
pub fn GetCommandLineW() -> *mut LPCWSTR;
1038-
pub fn LocalFree(ptr: *mut c_void);
1039-
pub fn CommandLineToArgvW(lpCmdLine: *mut LPCWSTR,
1040-
pNumArgs: *mut c_int) -> *mut *mut u16;
10411038
pub fn GetTempPathW(nBufferLength: DWORD,
10421039
lpBuffer: LPCWSTR) -> DWORD;
10431040
pub fn OpenProcessToken(ProcessHandle: HANDLE,

src/test/run-make-fulldeps/tools.mk

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ endif
7676
# Extra flags needed to compile a working executable with the standard library
7777
ifdef IS_WINDOWS
7878
ifdef IS_MSVC
79-
EXTRACFLAGS := ws2_32.lib userenv.lib shell32.lib advapi32.lib
79+
EXTRACFLAGS := ws2_32.lib userenv.lib advapi32.lib
8080
else
8181
EXTRACFLAGS := -lws2_32 -luserenv
8282
endif

0 commit comments

Comments
 (0)