Skip to content

Commit a1f1d3a

Browse files
F3n67utargos
authored andcommitted
url: update WHATWG URL parser to align with latest spec
PR-URL: #43190 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Daijiro Wachi <daijiro.wachi@gmail.com>
1 parent 23c5de3 commit a1f1d3a

32 files changed

+3657
-821
lines changed

src/node_url.cc

+58-23
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "node_i18n.h"
66
#include "util-inl.h"
77

8+
#include <algorithm>
89
#include <cmath>
910
#include <cstdio>
1011
#include <numeric>
@@ -58,7 +59,7 @@ class URLHost {
5859
public:
5960
~URLHost();
6061

61-
void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
62+
void ParseIPv4Host(const char* input, size_t length);
6263
void ParseIPv6Host(const char* input, size_t length);
6364
void ParseOpaqueHost(const char* input, size_t length);
6465
void ParseHost(const char* input,
@@ -165,6 +166,9 @@ enum url_cb_args {
165166
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
166167
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
167168

169+
// https://infra.spec.whatwg.org/#c0-control
170+
CHAR_TEST(8, IsC0Control, (ch >= '\0' && ch <= '\x1f'))
171+
168172
// https://infra.spec.whatwg.org/#c0-control-or-space
169173
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
170174

@@ -190,12 +194,18 @@ T ASCIILowercase(T ch) {
190194
}
191195

192196
// https://url.spec.whatwg.org/#forbidden-host-code-point
193-
CHAR_TEST(8, IsForbiddenHostCodePoint,
194-
ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
195-
ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
196-
ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
197-
ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
198-
ch == '^' || ch == '|')
197+
CHAR_TEST(8,
198+
IsForbiddenHostCodePoint,
199+
ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' || ch == ' ' ||
200+
ch == '#' || ch == '/' || ch == ':' || ch == '?' || ch == '@' ||
201+
ch == '[' || ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
202+
ch == '^' || ch == '|')
203+
204+
// https://url.spec.whatwg.org/#forbidden-domain-code-point
205+
CHAR_TEST(8,
206+
IsForbiddenDomainCodePoint,
207+
IsForbiddenHostCodePoint(ch) || IsC0Control(ch) || ch == '%' ||
208+
ch == '\x7f')
199209

200210
// https://url.spec.whatwg.org/#windows-drive-letter
201211
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
@@ -359,18 +369,21 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) {
359369
type_ = HostType::H_IPV6;
360370
}
361371

362-
int64_t ParseNumber(const char* start, const char* end) {
372+
// https://url.spec.whatwg.org/#ipv4-number-parser
373+
int64_t ParseIPv4Number(const char* start, const char* end) {
374+
if (end - start == 0) return -1;
375+
363376
unsigned R = 10;
364377
if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
365378
start += 2;
366379
R = 16;
367-
}
368-
if (end - start == 0) {
369-
return 0;
370-
} else if (R == 10 && end - start > 1 && start[0] == '0') {
380+
} else if (end - start >= 2 && start[0] == '0') {
371381
start++;
372382
R = 8;
373383
}
384+
385+
if (end - start == 0) return 0;
386+
374387
const char* p = start;
375388

376389
while (p < end) {
@@ -394,9 +407,33 @@ int64_t ParseNumber(const char* start, const char* end) {
394407
return strtoll(start, nullptr, R);
395408
}
396409

397-
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
410+
// https://url.spec.whatwg.org/#ends-in-a-number-checker
411+
bool EndsInANumber(const std::string& input) {
412+
std::vector<std::string> parts = SplitString(input, '.', false);
413+
414+
if (parts.empty()) return false;
415+
416+
if (parts.back().empty()) {
417+
if (parts.size() == 1) return false;
418+
parts.pop_back();
419+
}
420+
421+
const std::string& last = parts.back();
422+
423+
// If last is non-empty and contains only ASCII digits, then return true
424+
if (!last.empty() && std::all_of(last.begin(), last.end(), ::isdigit)) {
425+
return true;
426+
}
427+
428+
const char* last_str = last.c_str();
429+
int64_t num = ParseIPv4Number(last_str, last_str + last.size());
430+
if (num >= 0) return true;
431+
432+
return false;
433+
}
434+
435+
void URLHost::ParseIPv4Host(const char* input, size_t length) {
398436
CHECK_EQ(type_, HostType::H_FAILED);
399-
*is_ipv4 = false;
400437
const char* pointer = input;
401438
const char* mark = input;
402439
const char* end = pointer + length;
@@ -415,7 +452,7 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
415452
return;
416453
if (pointer == mark)
417454
return;
418-
int64_t n = ParseNumber(mark, pointer);
455+
int64_t n = ParseIPv4Number(mark, pointer);
419456
if (n < 0)
420457
return;
421458

@@ -430,7 +467,6 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
430467
pointer++;
431468
}
432469
CHECK_GT(parts, 0);
433-
*is_ipv4 = true;
434470

435471
// If any but the last item in numbers is greater than 255, return failure.
436472
// If the last item in numbers is greater than or equal to
@@ -458,7 +494,7 @@ void URLHost::ParseOpaqueHost(const char* input, size_t length) {
458494
output.reserve(length);
459495
for (size_t i = 0; i < length; i++) {
460496
const char ch = input[i];
461-
if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
497+
if (IsForbiddenHostCodePoint(ch)) {
462498
return;
463499
} else {
464500
AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
@@ -497,16 +533,15 @@ void URLHost::ParseHost(const char* input,
497533
// If any of the following characters are still present, we have to fail
498534
for (size_t n = 0; n < decoded.size(); n++) {
499535
const char ch = decoded[n];
500-
if (IsForbiddenHostCodePoint(ch)) {
536+
if (IsForbiddenDomainCodePoint(ch)) {
501537
return;
502538
}
503539
}
504540

505-
// Check to see if it's an IPv4 IP address
506-
bool is_ipv4;
507-
ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
508-
if (is_ipv4)
509-
return;
541+
// If domain ends in a number, then return the result of IPv4 parsing domain
542+
if (EndsInANumber(decoded)) {
543+
return ParseIPv4Host(decoded.c_str(), decoded.length());
544+
}
510545

511546
// If the unicode flag is set, run the result through punycode ToUnicode
512547
if (unicode && !ToUnicode(decoded, &decoded))

src/util.cc

+4-2
Original file line numberDiff line numberDiff line change
@@ -164,15 +164,17 @@ std::string GetHumanReadableProcessName() {
164164
return SPrintF("%s[%d]", GetProcessTitle("Node.js"), uv_os_getpid());
165165
}
166166

167-
std::vector<std::string> SplitString(const std::string& in, char delim) {
167+
std::vector<std::string> SplitString(const std::string& in,
168+
char delim,
169+
bool skipEmpty) {
168170
std::vector<std::string> out;
169171
if (in.empty())
170172
return out;
171173
std::istringstream in_stream(in);
172174
while (in_stream.good()) {
173175
std::string item;
174176
std::getline(in_stream, item, delim);
175-
if (item.empty()) continue;
177+
if (item.empty() && skipEmpty) continue;
176178
out.emplace_back(std::move(item));
177179
}
178180
return out;

src/util.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,9 @@ struct FunctionDeleter {
645645
template <typename T, void (*function)(T*)>
646646
using DeleteFnPtr = typename FunctionDeleter<T, function>::Pointer;
647647

648-
std::vector<std::string> SplitString(const std::string& in, char delim);
648+
std::vector<std::string> SplitString(const std::string& in,
649+
char delim,
650+
bool skipEmpty = true);
649651

650652
inline v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
651653
std::string_view str,

test/common/wpt/worker.js

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ const resource = new ResourceLoader(workerData.wptPath);
88

99
global.self = global;
1010
global.GLOBAL = {
11-
isWindow() { return false; }
11+
isWindow() { return false; },
12+
isShadowRealm() { return false; }
1213
};
1314
global.require = require;
1415

test/fixtures/wpt/README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ Last update:
2323
- html/webappapis/timers: https://github.com/web-platform-tests/wpt/tree/5873f2d8f1/html/webappapis/timers
2424
- interfaces: https://github.com/web-platform-tests/wpt/tree/fc086c82d5/interfaces
2525
- performance-timeline: https://github.com/web-platform-tests/wpt/tree/17ebc3aea0/performance-timeline
26-
- resources: https://github.com/web-platform-tests/wpt/tree/fbee645164/resources
26+
- resources: https://github.com/web-platform-tests/wpt/tree/c5b428f15a/resources
2727
- streams: https://github.com/web-platform-tests/wpt/tree/8f60d94439/streams
28-
- url: https://github.com/web-platform-tests/wpt/tree/77d54aa9e0/url
28+
- url: https://github.com/web-platform-tests/wpt/tree/0e5b126cd0/url
2929
- user-timing: https://github.com/web-platform-tests/wpt/tree/df24fb604e/user-timing
3030
- wasm/jsapi: https://github.com/web-platform-tests/wpt/tree/1dd414c796/wasm/jsapi
3131
- wasm/webapi: https://github.com/web-platform-tests/wpt/tree/fd1b23eeaa/wasm/webapi
+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Function that sends an accesskey using the proper key combination depending on the browser and OS.
3+
*
4+
* This needs that the test imports the following scripts:
5+
* <script src="/resources/testdriver.js"></script>
6+
* <script src="/resources/testdriver-actions.js"></script>
7+
* <script src="/resources/testdriver-vendor.js"></script>
8+
*/
9+
function pressAccessKey(accessKey){
10+
let controlKey = '\uE009'; // left Control key
11+
let altKey = '\uE00A'; // left Alt key
12+
let optionKey = altKey; // left Option key
13+
let shiftKey = '\uE008'; // left Shift key
14+
// There are differences in using accesskey across browsers and OS's.
15+
// See: // https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/accesskey
16+
let isMacOSX = navigator.userAgent.indexOf("Mac") != -1;
17+
let osAccessKey = isMacOSX ? [controlKey, optionKey] : [shiftKey, altKey];
18+
let actions = new test_driver.Actions();
19+
// Press keys.
20+
for (let key of osAccessKey) {
21+
actions = actions.keyDown(key);
22+
}
23+
actions = actions
24+
.keyDown(accessKey)
25+
.addTick()
26+
.keyUp(accessKey);
27+
osAccessKey.reverse();
28+
for (let key of osAccessKey) {
29+
actions = actions.keyUp(key);
30+
}
31+
return actions.send();
32+
}
33+
34+
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<meta charset="utf-8" />
5+
<title>Blank Page</title>
6+
<script>
7+
window.onload = function(event) {
8+
// This is needed to ensure the onload event fires when this page is
9+
// opened as a popup.
10+
// See https://github.com/web-platform-tests/wpt/pull/18157
11+
};
12+
</script>
13+
</head>
14+
<body>
15+
</body>
16+
</html>

0 commit comments

Comments
 (0)