11
11
#![ allow( dead_code) ] // runtime init functions not used during testing
12
12
13
13
use os:: windows:: prelude:: * ;
14
+ use sys:: windows:: os:: current_exe;
14
15
use sys:: c;
15
- use slice;
16
- use ops:: Range ;
17
16
use ffi:: OsString ;
18
- use libc:: { c_int, c_void} ;
19
17
use fmt;
18
+ use vec;
19
+ use core:: iter;
20
+ use slice;
21
+ use path:: PathBuf ;
20
22
21
23
pub unsafe fn init ( _argc : isize , _argv : * const * const u8 ) { }
22
24
23
25
pub unsafe fn cleanup ( ) { }
24
26
25
27
pub fn args ( ) -> Args {
26
28
unsafe {
27
- let mut nArgs: c_int = 0 ;
28
- let lpCmdLine = c:: GetCommandLineW ( ) ;
29
- let szArgList = c:: CommandLineToArgvW ( lpCmdLine, & mut nArgs) ;
30
-
31
- // szArcList can be NULL if CommandLinToArgvW failed,
32
- // but in that case nArgs is 0 so we won't actually
33
- // try to read a null pointer
34
- Args { cur : szArgList, range : 0 ..( nArgs as isize ) }
29
+ let lp_cmd_line = c:: GetCommandLineW ( ) ;
30
+ let parsed_args_list = parse_lp_cmd_line (
31
+ lp_cmd_line as * const u16 ,
32
+ || current_exe ( ) . map ( PathBuf :: into_os_string) . unwrap_or_else ( |_| OsString :: new ( ) ) ) ;
33
+
34
+ Args { parsed_args_list : parsed_args_list. into_iter ( ) }
35
35
}
36
36
}
37
37
38
+ /// Implements the Windows command-line argument parsing algorithm.
39
+ ///
40
+ /// Microsoft's documentation for the Windows CLI argument format can be found at
41
+ /// <https://docs.microsoft.com/en-us/previous-versions//17w5ykft(v=vs.85)>.
42
+ ///
43
+ /// Windows includes a function to do this in shell32.dll,
44
+ /// but linking with that DLL causes the process to be registered as a GUI application.
45
+ /// GUI applications add a bunch of overhead, even if no windows are drawn. See
46
+ /// <https://randomascii.wordpress.com/2018/12/03/a-not-called-function-can-cause-a-5x-slowdown/>.
47
+ ///
48
+ /// This function was tested for equivalence to the shell32.dll implementation in
49
+ /// Windows 10 Pro v1803, using an exhaustive test suite available at
50
+ /// <https://gist.github.com/notriddle/dde431930c392e428055b2dc22e638f5> or
51
+ /// <https://paste.gg/p/anonymous/47d6ed5f5bd549168b1c69c799825223>.
52
+ unsafe fn parse_lp_cmd_line < F : Fn ( ) -> OsString > ( lp_cmd_line : * const u16 , exe_name : F )
53
+ -> Vec < OsString > {
54
+ const BACKSLASH : u16 = '\\' as u16 ;
55
+ const QUOTE : u16 = '"' as u16 ;
56
+ const TAB : u16 = '\t' as u16 ;
57
+ const SPACE : u16 = ' ' as u16 ;
58
+ let mut ret_val = Vec :: new ( ) ;
59
+ if lp_cmd_line. is_null ( ) || * lp_cmd_line == 0 {
60
+ ret_val. push ( exe_name ( ) ) ;
61
+ return ret_val;
62
+ }
63
+ let mut cmd_line = {
64
+ let mut end = 0 ;
65
+ while * lp_cmd_line. offset ( end) != 0 {
66
+ end += 1 ;
67
+ }
68
+ slice:: from_raw_parts ( lp_cmd_line, end as usize )
69
+ } ;
70
+ // The executable name at the beginning is special.
71
+ cmd_line = match cmd_line[ 0 ] {
72
+ // The executable name ends at the next quote mark,
73
+ // no matter what.
74
+ QUOTE => {
75
+ let args = {
76
+ let mut cut = cmd_line[ 1 ..] . splitn ( 2 , |& c| c == QUOTE ) ;
77
+ if let Some ( exe) = cut. next ( ) {
78
+ ret_val. push ( OsString :: from_wide ( exe) ) ;
79
+ }
80
+ cut. next ( )
81
+ } ;
82
+ if let Some ( args) = args {
83
+ args
84
+ } else {
85
+ return ret_val;
86
+ }
87
+ }
88
+ // Implement quirk: when they say whitespace here,
89
+ // they include the entire ASCII control plane:
90
+ // "However, if lpCmdLine starts with any amount of whitespace, CommandLineToArgvW
91
+ // will consider the first argument to be an empty string. Excess whitespace at the
92
+ // end of lpCmdLine is ignored."
93
+ 0 ...SPACE => {
94
+ ret_val. push ( OsString :: new ( ) ) ;
95
+ & cmd_line[ 1 ..]
96
+ } ,
97
+ // The executable name ends at the next whitespace,
98
+ // no matter what.
99
+ _ => {
100
+ let args = {
101
+ let mut cut = cmd_line. splitn ( 2 , |& c| c > 0 && c <= SPACE ) ;
102
+ if let Some ( exe) = cut. next ( ) {
103
+ ret_val. push ( OsString :: from_wide ( exe) ) ;
104
+ }
105
+ cut. next ( )
106
+ } ;
107
+ if let Some ( args) = args {
108
+ args
109
+ } else {
110
+ return ret_val;
111
+ }
112
+ }
113
+ } ;
114
+ let mut cur = Vec :: new ( ) ;
115
+ let mut in_quotes = false ;
116
+ let mut was_in_quotes = false ;
117
+ let mut backslash_count: usize = 0 ;
118
+ for & c in cmd_line {
119
+ match c {
120
+ // backslash
121
+ BACKSLASH => {
122
+ backslash_count += 1 ;
123
+ was_in_quotes = false ;
124
+ } ,
125
+ QUOTE if backslash_count % 2 == 0 => {
126
+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count / 2 ) ) ;
127
+ backslash_count = 0 ;
128
+ if was_in_quotes {
129
+ cur. push ( '"' as u16 ) ;
130
+ was_in_quotes = false ;
131
+ } else {
132
+ was_in_quotes = in_quotes;
133
+ in_quotes = !in_quotes;
134
+ }
135
+ }
136
+ QUOTE if backslash_count % 2 != 0 => {
137
+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count / 2 ) ) ;
138
+ backslash_count = 0 ;
139
+ was_in_quotes = false ;
140
+ cur. push ( b'"' as u16 ) ;
141
+ }
142
+ SPACE | TAB if !in_quotes => {
143
+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count) ) ;
144
+ if !cur. is_empty ( ) || was_in_quotes {
145
+ ret_val. push ( OsString :: from_wide ( & cur[ ..] ) ) ;
146
+ cur. truncate ( 0 ) ;
147
+ }
148
+ backslash_count = 0 ;
149
+ was_in_quotes = false ;
150
+ }
151
+ _ => {
152
+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count) ) ;
153
+ backslash_count = 0 ;
154
+ was_in_quotes = false ;
155
+ cur. push ( c) ;
156
+ }
157
+ }
158
+ }
159
+ cur. extend ( iter:: repeat ( b'\\' as u16 ) . take ( backslash_count) ) ;
160
+ // include empty quoted strings at the end of the arguments list
161
+ if !cur. is_empty ( ) || was_in_quotes || in_quotes {
162
+ ret_val. push ( OsString :: from_wide ( & cur[ ..] ) ) ;
163
+ }
164
+ ret_val
165
+ }
166
+
38
167
pub struct Args {
39
- range : Range < isize > ,
40
- cur : * mut * mut u16 ,
168
+ parsed_args_list : vec:: IntoIter < OsString > ,
41
169
}
42
170
43
171
pub struct ArgsInnerDebug < ' a > {
@@ -46,19 +174,7 @@ pub struct ArgsInnerDebug<'a> {
46
174
47
175
impl < ' a > fmt:: Debug for ArgsInnerDebug < ' a > {
48
176
fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
49
- f. write_str ( "[" ) ?;
50
- let mut first = true ;
51
- for i in self . args . range . clone ( ) {
52
- if !first {
53
- f. write_str ( ", " ) ?;
54
- }
55
- first = false ;
56
-
57
- // Here we do allocation which could be avoided.
58
- fmt:: Debug :: fmt ( & unsafe { os_string_from_ptr ( * self . args . cur . offset ( i) ) } , f) ?;
59
- }
60
- f. write_str ( "]" ) ?;
61
- Ok ( ( ) )
177
+ self . args . parsed_args_list . as_slice ( ) . fmt ( f)
62
178
}
63
179
}
64
180
@@ -70,38 +186,82 @@ impl Args {
70
186
}
71
187
}
72
188
73
- unsafe fn os_string_from_ptr ( ptr : * mut u16 ) -> OsString {
74
- let mut len = 0 ;
75
- while * ptr. offset ( len) != 0 { len += 1 ; }
76
-
77
- // Push it onto the list.
78
- let ptr = ptr as * const u16 ;
79
- let buf = slice:: from_raw_parts ( ptr, len as usize ) ;
80
- OsStringExt :: from_wide ( buf)
81
- }
82
-
83
189
impl Iterator for Args {
84
190
type Item = OsString ;
85
- fn next ( & mut self ) -> Option < OsString > {
86
- self . range . next ( ) . map ( |i| unsafe { os_string_from_ptr ( * self . cur . offset ( i) ) } )
87
- }
88
- fn size_hint ( & self ) -> ( usize , Option < usize > ) { self . range . size_hint ( ) }
191
+ fn next ( & mut self ) -> Option < OsString > { self . parsed_args_list . next ( ) }
192
+ fn size_hint ( & self ) -> ( usize , Option < usize > ) { self . parsed_args_list . size_hint ( ) }
89
193
}
90
194
91
195
impl DoubleEndedIterator for Args {
92
- fn next_back ( & mut self ) -> Option < OsString > {
93
- self . range . next_back ( ) . map ( |i| unsafe { os_string_from_ptr ( * self . cur . offset ( i) ) } )
94
- }
196
+ fn next_back ( & mut self ) -> Option < OsString > { self . parsed_args_list . next_back ( ) }
95
197
}
96
198
97
199
impl ExactSizeIterator for Args {
98
- fn len ( & self ) -> usize { self . range . len ( ) }
200
+ fn len ( & self ) -> usize { self . parsed_args_list . len ( ) }
99
201
}
100
202
101
- impl Drop for Args {
102
- fn drop ( & mut self ) {
103
- // self.cur can be null if CommandLineToArgvW previously failed,
104
- // but LocalFree ignores NULL pointers
105
- unsafe { c:: LocalFree ( self . cur as * mut c_void ) ; }
203
+ #[ cfg( test) ]
204
+ mod tests {
205
+ use sys:: windows:: args:: * ;
206
+ use ffi:: OsString ;
207
+
208
+ fn chk ( string : & str , parts : & [ & str ] ) {
209
+ let mut wide: Vec < u16 > = OsString :: from ( string) . encode_wide ( ) . collect ( ) ;
210
+ wide. push ( 0 ) ;
211
+ let parsed = unsafe {
212
+ parse_lp_cmd_line ( wide. as_ptr ( ) as * const u16 , || OsString :: from ( "TEST.EXE" ) )
213
+ } ;
214
+ let expected: Vec < OsString > = parts. iter ( ) . map ( |k| OsString :: from ( k) ) . collect ( ) ;
215
+ assert_eq ! ( parsed. as_slice( ) , expected. as_slice( ) ) ;
216
+ }
217
+
218
+ #[ test]
219
+ fn empty ( ) {
220
+ chk ( "" , & [ "TEST.EXE" ] ) ;
221
+ chk ( "\0 " , & [ "TEST.EXE" ] ) ;
222
+ }
223
+
224
+ #[ test]
225
+ fn single_words ( ) {
226
+ chk ( "EXE one_word" , & [ "EXE" , "one_word" ] ) ;
227
+ chk ( "EXE a" , & [ "EXE" , "a" ] ) ;
228
+ chk ( "EXE 😅" , & [ "EXE" , "😅" ] ) ;
229
+ chk ( "EXE 😅🤦" , & [ "EXE" , "😅🤦" ] ) ;
230
+ }
231
+
232
+ #[ test]
233
+ fn official_examples ( ) {
234
+ chk ( r#"EXE "abc" d e"# , & [ "EXE" , "abc" , "d" , "e" ] ) ;
235
+ chk ( r#"EXE a\\\b d"e f"g h"# , & [ "EXE" , r#"a\\\b"# , "de fg" , "h" ] ) ;
236
+ chk ( r#"EXE a\\\"b c d"# , & [ "EXE" , r#"a\"b"# , "c" , "d" ] ) ;
237
+ chk ( r#"EXE a\\\\"b c" d e"# , & [ "EXE" , r#"a\\b c"# , "d" , "e" ] ) ;
238
+ }
239
+
240
+ #[ test]
241
+ fn whitespace_behavior ( ) {
242
+ chk ( r#" test"# , & [ "" , "test" ] ) ;
243
+ chk ( r#" test"# , & [ "" , "test" ] ) ;
244
+ chk ( r#" test test2"# , & [ "" , "test" , "test2" ] ) ;
245
+ chk ( r#" test test2"# , & [ "" , "test" , "test2" ] ) ;
246
+ chk ( r#"test test2 "# , & [ "test" , "test2" ] ) ;
247
+ chk ( r#"test test2 "# , & [ "test" , "test2" ] ) ;
248
+ chk ( r#"test "# , & [ "test" ] ) ;
249
+ }
250
+
251
+ #[ test]
252
+ fn genius_quotes ( ) {
253
+ chk ( r#"EXE "" """# , & [ "EXE" , "" , "" ] ) ;
254
+ chk ( r#"EXE "" """"# , & [ "EXE" , "" , "\" " ] ) ;
255
+ chk (
256
+ r#"EXE "this is """all""" in the same argument""# ,
257
+ & [ "EXE" , "this is \" all\" in the same argument" ]
258
+ ) ;
259
+ chk ( r#"EXE "a"""# , & [ "EXE" , "a\" " ] ) ;
260
+ chk ( r#"EXE "a"" a"# , & [ "EXE" , "a\" " , "a" ] ) ;
261
+ // quotes cannot be escaped in command names
262
+ chk ( r#""EXE" check"# , & [ "EXE" , "check" ] ) ;
263
+ chk ( r#""EXE check""# , & [ "EXE check" ] ) ;
264
+ chk ( r#""EXE """for""" check"# , & [ "EXE " , r#"for""# , "check" ] ) ;
265
+ chk ( r#""EXE \"for\" check"# , & [ r#"EXE \"# , r#"for""# , "check" ] ) ;
106
266
}
107
267
}
0 commit comments