5
5
#include " node_i18n.h"
6
6
#include " util-inl.h"
7
7
8
+ #include < algorithm>
8
9
#include < cmath>
9
10
#include < cstdio>
10
11
#include < numeric>
@@ -58,7 +59,7 @@ class URLHost {
58
59
public:
59
60
~URLHost ();
60
61
61
- void ParseIPv4Host (const char * input, size_t length, bool * is_ipv4 );
62
+ void ParseIPv4Host (const char * input, size_t length);
62
63
void ParseIPv6Host (const char * input, size_t length);
63
64
void ParseOpaqueHost (const char * input, size_t length);
64
65
void ParseHost (const char * input,
@@ -165,6 +166,9 @@ enum url_cb_args {
165
166
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
166
167
CHAR_TEST (8 , IsASCIITabOrNewline, (ch == ' \t ' || ch == ' \n ' || ch == ' \r ' ))
167
168
169
+ // https://infra.spec.whatwg.org/#c0-control
170
+ CHAR_TEST (8 , IsC0Control, (ch >= ' \0 ' && ch <= ' \x1f ' ))
171
+
168
172
// https://infra.spec.whatwg.org/#c0-control-or-space
169
173
CHAR_TEST (8 , IsC0ControlOrSpace, (ch >= ' \0 ' && ch <= ' ' ))
170
174
@@ -190,12 +194,18 @@ T ASCIILowercase(T ch) {
190
194
}
191
195
192
196
// https://url.spec.whatwg.org/#forbidden-host-code-point
193
- CHAR_TEST (8 , IsForbiddenHostCodePoint,
194
- ch == ' \0 ' || ch == ' \t ' || ch == ' \n ' || ch == ' \r ' ||
195
- ch == ' ' || ch == ' #' || ch == ' %' || ch == ' /' ||
196
- ch == ' :' || ch == ' ?' || ch == ' @' || ch == ' [' ||
197
- ch == ' <' || ch == ' >' || ch == ' \\ ' || ch == ' ]' ||
198
- ch == ' ^' || ch == ' |' )
197
+ CHAR_TEST (8 ,
198
+ IsForbiddenHostCodePoint,
199
+ ch == ' \0 ' || ch == ' \t ' || ch == ' \n ' || ch == ' \r ' || ch == ' ' ||
200
+ ch == ' #' || ch == ' /' || ch == ' :' || ch == ' ?' || ch == ' @' ||
201
+ ch == ' [' || ch == ' <' || ch == ' >' || ch == ' \\ ' || ch == ' ]' ||
202
+ ch == ' ^' || ch == ' |' )
203
+
204
+ // https://url.spec.whatwg.org/#forbidden-domain-code-point
205
+ CHAR_TEST (8 ,
206
+ IsForbiddenDomainCodePoint,
207
+ IsForbiddenHostCodePoint (ch) || IsC0Control(ch) || ch == '%' ||
208
+ ch == '\x7f')
199
209
200
210
// https://url.spec.whatwg.org/#windows-drive-letter
201
211
TWO_CHAR_STRING_TEST(8 , IsWindowsDriveLetter,
@@ -359,18 +369,21 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) {
359
369
type_ = HostType::H_IPV6;
360
370
}
361
371
362
- int64_t ParseNumber (const char * start, const char * end) {
372
+ // https://url.spec.whatwg.org/#ipv4-number-parser
373
+ int64_t ParseIPv4Number (const char * start, const char * end) {
374
+ if (end - start == 0 ) return -1 ;
375
+
363
376
unsigned R = 10 ;
364
377
if (end - start >= 2 && start[0 ] == ' 0' && (start[1 ] | 0x20 ) == ' x' ) {
365
378
start += 2 ;
366
379
R = 16 ;
367
- }
368
- if (end - start == 0 ) {
369
- return 0 ;
370
- } else if (R == 10 && end - start > 1 && start[0 ] == ' 0' ) {
380
+ } else if (end - start >= 2 && start[0 ] == ' 0' ) {
371
381
start++;
372
382
R = 8 ;
373
383
}
384
+
385
+ if (end - start == 0 ) return 0 ;
386
+
374
387
const char * p = start;
375
388
376
389
while (p < end) {
@@ -394,9 +407,33 @@ int64_t ParseNumber(const char* start, const char* end) {
394
407
return strtoll (start, nullptr , R);
395
408
}
396
409
397
- void URLHost::ParseIPv4Host (const char * input, size_t length, bool * is_ipv4) {
410
+ // https://url.spec.whatwg.org/#ends-in-a-number-checker
411
+ bool EndsInANumber (const std::string& input) {
412
+ std::vector<std::string> parts = SplitString (input, ' .' , false );
413
+
414
+ if (parts.empty ()) return false ;
415
+
416
+ if (parts.back ().empty ()) {
417
+ if (parts.size () == 1 ) return false ;
418
+ parts.pop_back ();
419
+ }
420
+
421
+ const std::string& last = parts.back ();
422
+
423
+ // If last is non-empty and contains only ASCII digits, then return true
424
+ if (!last.empty () && std::all_of (last.begin (), last.end (), ::isdigit)) {
425
+ return true ;
426
+ }
427
+
428
+ const char * last_str = last.c_str ();
429
+ int64_t num = ParseIPv4Number (last_str, last_str + last.size ());
430
+ if (num >= 0 ) return true ;
431
+
432
+ return false ;
433
+ }
434
+
435
+ void URLHost::ParseIPv4Host (const char * input, size_t length) {
398
436
CHECK_EQ (type_, HostType::H_FAILED);
399
- *is_ipv4 = false ;
400
437
const char * pointer = input;
401
438
const char * mark = input;
402
439
const char * end = pointer + length;
@@ -415,7 +452,7 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
415
452
return ;
416
453
if (pointer == mark)
417
454
return ;
418
- int64_t n = ParseNumber (mark, pointer);
455
+ int64_t n = ParseIPv4Number (mark, pointer);
419
456
if (n < 0 )
420
457
return ;
421
458
@@ -430,7 +467,6 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
430
467
pointer++;
431
468
}
432
469
CHECK_GT (parts, 0 );
433
- *is_ipv4 = true ;
434
470
435
471
// If any but the last item in numbers is greater than 255, return failure.
436
472
// If the last item in numbers is greater than or equal to
@@ -458,7 +494,7 @@ void URLHost::ParseOpaqueHost(const char* input, size_t length) {
458
494
output.reserve (length);
459
495
for (size_t i = 0 ; i < length; i++) {
460
496
const char ch = input[i];
461
- if (ch != ' % ' && IsForbiddenHostCodePoint (ch)) {
497
+ if (IsForbiddenHostCodePoint (ch)) {
462
498
return ;
463
499
} else {
464
500
AppendOrEscape (&output, ch, C0_CONTROL_ENCODE_SET);
@@ -497,16 +533,15 @@ void URLHost::ParseHost(const char* input,
497
533
// If any of the following characters are still present, we have to fail
498
534
for (size_t n = 0 ; n < decoded.size (); n++) {
499
535
const char ch = decoded[n];
500
- if (IsForbiddenHostCodePoint (ch)) {
536
+ if (IsForbiddenDomainCodePoint (ch)) {
501
537
return ;
502
538
}
503
539
}
504
540
505
- // Check to see if it's an IPv4 IP address
506
- bool is_ipv4;
507
- ParseIPv4Host (decoded.c_str (), decoded.length (), &is_ipv4);
508
- if (is_ipv4)
509
- return ;
541
+ // If domain ends in a number, then return the result of IPv4 parsing domain
542
+ if (EndsInANumber (decoded)) {
543
+ return ParseIPv4Host (decoded.c_str (), decoded.length ());
544
+ }
510
545
511
546
// If the unicode flag is set, run the result through punycode ToUnicode
512
547
if (unicode && !ToUnicode (decoded, &decoded))
0 commit comments