5
5
#include " node_i18n.h"
6
6
#include " util-inl.h"
7
7
8
+ #include < algorithm>
8
9
#include < cmath>
9
10
#include < cstdio>
10
11
#include < numeric>
@@ -58,7 +59,7 @@ class URLHost {
58
59
public:
59
60
~URLHost ();
60
61
61
- void ParseIPv4Host (const char * input, size_t length, bool * is_ipv4 );
62
+ void ParseIPv4Host (const char * input, size_t length);
62
63
void ParseIPv6Host (const char * input, size_t length);
63
64
void ParseOpaqueHost (const char * input, size_t length);
64
65
void ParseHost (const char * input,
@@ -359,18 +360,21 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) {
359
360
type_ = HostType::H_IPV6;
360
361
}
361
362
362
- int64_t ParseNumber (const char * start, const char * end) {
363
+ // https://url.spec.whatwg.org/#ipv4-number-parser
364
+ int64_t ParseIPv4Number (const char * start, const char * end) {
365
+ if (end - start == 0 ) return -1 ;
366
+
363
367
unsigned R = 10 ;
364
368
if (end - start >= 2 && start[0 ] == ' 0' && (start[1 ] | 0x20 ) == ' x' ) {
365
369
start += 2 ;
366
370
R = 16 ;
367
- }
368
- if (end - start == 0 ) {
369
- return 0 ;
370
- } else if (R == 10 && end - start > 1 && start[0 ] == ' 0' ) {
371
+ } else if (end - start >= 2 && start[0 ] == ' 0' ) {
371
372
start++;
372
373
R = 8 ;
373
374
}
375
+
376
+ if (end - start == 0 ) return 0 ;
377
+
374
378
const char * p = start;
375
379
376
380
while (p < end) {
@@ -394,9 +398,33 @@ int64_t ParseNumber(const char* start, const char* end) {
394
398
return strtoll (start, nullptr , R);
395
399
}
396
400
397
- void URLHost::ParseIPv4Host (const char * input, size_t length, bool * is_ipv4) {
401
+ // https://url.spec.whatwg.org/#ends-in-a-number-checker
402
+ bool EndsInANumber (const std::string& input) {
403
+ std::vector<std::string> parts = SplitString (input, ' .' , false );
404
+
405
+ if (parts.empty ()) return false ;
406
+
407
+ if (parts.back ().empty ()) {
408
+ if (parts.size () == 1 ) return false ;
409
+ parts.pop_back ();
410
+ }
411
+
412
+ const std::string& last = parts.back ();
413
+
414
+ // If last is non-empty and contains only ASCII digits, then return true
415
+ if (!last.empty () && std::all_of (last.begin (), last.end (), ::isdigit)) {
416
+ return true ;
417
+ }
418
+
419
+ const char * last_str = last.c_str ();
420
+ int64_t num = ParseIPv4Number (last_str, last_str + last.size ());
421
+ if (num >= 0 ) return true ;
422
+
423
+ return false ;
424
+ }
425
+
426
+ void URLHost::ParseIPv4Host (const char * input, size_t length) {
398
427
CHECK_EQ (type_, HostType::H_FAILED);
399
- *is_ipv4 = false ;
400
428
const char * pointer = input;
401
429
const char * mark = input;
402
430
const char * end = pointer + length;
@@ -414,7 +442,7 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
414
442
if (++parts > static_cast <int >(arraysize (numbers))) return ;
415
443
if (pointer == mark)
416
444
return ;
417
- int64_t n = ParseNumber (mark, pointer);
445
+ int64_t n = ParseIPv4Number (mark, pointer);
418
446
if (n < 0 )
419
447
return ;
420
448
@@ -429,7 +457,6 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
429
457
pointer++;
430
458
}
431
459
CHECK_GT (parts, 0 );
432
- *is_ipv4 = true ;
433
460
434
461
// If any but the last item in numbers is greater than 255, return failure.
435
462
// If the last item in numbers is greater than or equal to
@@ -501,11 +528,10 @@ void URLHost::ParseHost(const char* input,
501
528
}
502
529
}
503
530
504
- // Check to see if it's an IPv4 IP address
505
- bool is_ipv4;
506
- ParseIPv4Host (decoded.c_str (), decoded.length (), &is_ipv4);
507
- if (is_ipv4)
508
- return ;
531
+ // If domain ends in a number, then return the result of IPv4 parsing domain
532
+ if (EndsInANumber (decoded)) {
533
+ return ParseIPv4Host (decoded.c_str (), decoded.length ());
534
+ }
509
535
510
536
// If the unicode flag is set, run the result through punycode ToUnicode
511
537
if (unicode && !ToUnicode (decoded, &decoded))
0 commit comments