@@ -175,6 +175,8 @@ CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
175
175
// https://infra.spec.whatwg.org/#ascii-digit
176
176
CHAR_TEST (8 , IsASCIIDigit, (ch >= ' 0' && ch <= ' 9' ))
177
177
178
+ CHAR_TEST (8 , IsASCIIOcDigit, (ch >= ' 0' && ch <= ' 7' ))
179
+
178
180
// https://infra.spec.whatwg.org/#ascii-hex-digit
179
181
CHAR_TEST (8 , IsASCIIHexDigit, (IsASCIIDigit(ch) ||
180
182
(ch >= ' A' && ch <= ' F' ) ||
@@ -407,29 +409,67 @@ int64_t ParseIPv4Number(const char* start, const char* end) {
407
409
return strtoll (start, nullptr , R);
408
410
}
409
411
412
+ // https://url.spec.whatwg.org/#ipv4-number-parser
413
+ bool IsIPv4NumberValid (const std::string_view input) {
414
+ if (input.empty ()) {
415
+ return false ;
416
+ }
417
+
418
+ // If a number starts with '0' it might be a number with base 8 or base
419
+ // 16. If not, checking if all characters are digits proves that it is a
420
+ // base 10 number.
421
+ if (input.size () >= 2 && input[0 ] == ' 0' ) {
422
+ if (input[1 ] == ' X' || input[1 ] == ' x' ) {
423
+ if (input.size () == 2 ) {
424
+ return true ;
425
+ }
426
+
427
+ return std::all_of (input.begin () + 2 , input.end (), [](const char & c) {
428
+ return IsASCIIHexDigit (c);
429
+ });
430
+ }
431
+
432
+ return std::all_of (input.begin () + 1 , input.end (), [](const char & c) {
433
+ return IsASCIIOcDigit (c);
434
+ });
435
+ }
436
+
437
+ return std::all_of (input.begin (), input.end (), [](const char & c) {
438
+ return IsASCIIDigit (c);
439
+ });
440
+ }
441
+
410
442
// https://url.spec.whatwg.org/#ends-in-a-number-checker
411
- bool EndsInANumber (const std::string& input) {
412
- std::vector<std::string> parts = SplitString (input, ' .' , false );
443
+ inline bool EndsInANumber (const std::string_view input) {
444
+ if (input.empty ()) {
445
+ return false ;
446
+ }
413
447
414
- if (parts.empty ()) return false ;
448
+ char delimiter = ' .' ;
449
+ auto last_index = input.size () - 1 ;
450
+ if (input.back () == delimiter) {
451
+ --last_index;
452
+ }
415
453
416
- if (parts.back ().empty ()) {
417
- if (parts.size () == 1 ) return false ;
418
- parts.pop_back ();
454
+ std::string_view last{};
455
+ auto pos = input.find_last_of (delimiter, last_index);
456
+ if (pos == std::string_view::npos) {
457
+ last = input.substr (0 , last_index);
458
+ } else {
459
+ last = input.substr (pos + 1 , last_index - pos);
419
460
}
420
461
421
- const std::string& last = parts.back ();
462
+ if (last.empty ()) {
463
+ return false ;
464
+ }
422
465
423
- // If last is non-empty and contains only ASCII digits, then return true
424
- if (!last.empty () && std::all_of (last.begin (), last.end (), ::isdigit)) {
466
+ if (std::all_of (last.begin (), last.end (), [](const char & c) {
467
+ return IsASCIIDigit (c);
468
+ })) {
425
469
return true ;
426
470
}
427
471
428
- const char * last_str = last.c_str ();
429
- int64_t num = ParseIPv4Number (last_str, last_str + last.size ());
430
- if (num >= 0 ) return true ;
431
-
432
- return false ;
472
+ return IsIPv4NumberValid (last);
433
473
}
434
474
435
475
void URLHost::ParseIPv4Host (const char * input, size_t length) {
0 commit comments