Skip to content

Commit 70157b9

Browse files
committedMay 14, 2021
url: forbid certain confusable changes from being introduced by toASCII
The legacy url.parse() function attempts to convert Unicode domains (IDNs) into their ASCII/Punycode form through the use of the toASCII function. However, toASCII can introduce or remove various characters that at best invalidate the parsed URL, and at worst cause hostname spoofing: url.parse('http://bad.c℀.good.com/').href === 'http://bad.ca/c.good.com/' (from [1]) url.parse('http://\u00AD/bad.com').href === 'http:///bad.com/' While changes to the legacy URL parser are discouraged in general, the security implications here outweigh the desire for strict compatibility. This is since this commit only changes behavior when non-ASCII characters appear in the hostname, an unusual situation for most use cases. Additionally, despite the availability of the WHATWG URL API, url.parse remain widely deployed in the Node.js ecosystem, as exemplified by the recent un-deprecation of the legacy API. This change is similar in spirit to CPython 3.8's change [2] fixing bpo-36216 [3] aka CVE-2019-9636, which also occurred despite potential compatibility concerns. [1]: https://hackerone.com/reports/678487 [2]: python/cpython@16e6f7d [3]: https://bugs.python.org/issue36216 PR-URL: #38631 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
1 parent 41fab0d commit 70157b9

File tree

4 files changed

+74
-6
lines changed

4 files changed

+74
-6
lines changed
 

‎doc/api/errors.md

+5-4
Original file line numberDiff line numberDiff line change
@@ -1677,10 +1677,10 @@ An invalid URI was passed.
16771677
<a id="ERR_INVALID_URL"></a>
16781678
### `ERR_INVALID_URL`
16791679

1680-
An invalid URL was passed to the [WHATWG][WHATWG URL API]
1681-
[`URL` constructor][`new URL(input)`] to be parsed. The thrown error object
1682-
typically has an additional property `'input'` that contains the URL that failed
1683-
to parse.
1680+
An invalid URL was passed to the [WHATWG][WHATWG URL API] [`URL`
1681+
constructor][`new URL(input)`] or the legacy [`url.parse()`][] to be parsed.
1682+
The thrown error object typically has an additional property `'input'` that
1683+
contains the URL that failed to parse.
16841684

16851685
<a id="ERR_INVALID_URL_SCHEME"></a>
16861686
### `ERR_INVALID_URL_SCHEME`
@@ -2824,6 +2824,7 @@ The native call from `process.cpuUsage` could not be processed.
28242824
[`stream.write()`]: stream.md#stream_writable_write_chunk_encoding_callback
28252825
[`subprocess.kill()`]: child_process.md#child_process_subprocess_kill_signal
28262826
[`subprocess.send()`]: child_process.md#child_process_subprocess_send_message_sendhandle_options_callback
2827+
[`url.parse()`]: url.md#url_url_parse_urlstring_parsequerystring_slashesdenotehost
28272828
[`util.getSystemErrorName(error.errno)`]: util.md#util_util_getsystemerrorname_err
28282829
[`zlib`]: zlib.md
28292830
[crypto digest algorithm]: crypto.md#crypto_crypto_gethashes

‎doc/api/url.md

+5
Original file line numberDiff line numberDiff line change
@@ -1232,6 +1232,11 @@ forward-slash characters (`/`) are required following the colon in the
12321232
<!-- YAML
12331233
added: v0.1.25
12341234
changes:
1235+
- version: REPLACEME
1236+
pr-url: https://github.com/nodejs/node/pull/38631
1237+
description: Now throws an `ERR_INVALID_URL` exception when Punycode
1238+
conversion of a hostname introduces changes that could cause
1239+
the URL to be re-parsed differently.
12351240
- version:
12361241
- v15.13.0
12371242
- v14.17.0

‎lib/url.js

+30-2
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ const { toASCII } = require('internal/idna');
3434
const { encodeStr, hexTable } = require('internal/querystring');
3535

3636
const {
37-
ERR_INVALID_ARG_TYPE
37+
ERR_INVALID_ARG_TYPE,
38+
ERR_INVALID_URL,
3839
} = require('internal/errors').codes;
3940
const { validateString } = require('internal/validators');
4041

@@ -167,6 +168,20 @@ function isIpv6Hostname(hostname) {
167168
);
168169
}
169170

171+
// This prevents some common spoofing bugs due to our use of IDNA toASCII. For
172+
// compatibility, the set of characters we use here is the *intersection* of
173+
// "forbidden host code point" in the WHATWG URL Standard [1] and the
174+
// characters in the host parsing loop in Url.prototype.parse, with the
175+
// following additions:
176+
//
177+
// - ':' since this could cause a "protocol spoofing" bug
178+
// - '@' since this could cause parts of the hostname to be confused with auth
179+
// - '[' and ']' since this could cause a non-IPv6 hostname to be interpreted
180+
// as IPv6 by isIpv6Hostname above
181+
//
182+
// [1]: https://url.spec.whatwg.org/#forbidden-host-code-point
183+
const forbiddenHostChars = /[\t\n\r #%/:<>?@[\\\]^|]/;
184+
170185
Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
171186
validateString(url, 'url');
172187

@@ -389,7 +404,7 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
389404
this.hostname = this.hostname.toLowerCase();
390405
}
391406

392-
if (!ipv6Hostname) {
407+
if (!ipv6Hostname && this.hostname !== '') {
393408
// IDNA Support: Returns a punycoded representation of "domain".
394409
// It only converts parts of the domain name that
395410
// have non-ASCII characters, i.e. it doesn't matter if
@@ -398,6 +413,19 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
398413
// Use lenient mode (`true`) to try to support even non-compliant
399414
// URLs.
400415
this.hostname = toASCII(this.hostname, true);
416+
417+
// Prevent two potential routes of hostname spoofing.
418+
// 1. If this.hostname is empty, it must have become empty due to toASCII
419+
// since we checked this.hostname above.
420+
// 2. If any of forbiddenHostChars appears in this.hostname, it must have
421+
// also gotten in due to toASCII. This is since getHostname would have
422+
// filtered them out otherwise.
423+
// Rather than trying to correct this by moving the non-host part into
424+
// the pathname as we've done in getHostname, throw an exception to
425+
// convey the severity of this issue.
426+
if (this.hostname === '' || forbiddenHostChars.test(this.hostname)) {
427+
throw new ERR_INVALID_URL(url);
428+
}
401429
}
402430

403431
const p = this.port ? ':' + this.port : '';

‎test/parallel/test-url-parse-invalid-input.js

+34
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,37 @@ assert.throws(() => { url.parse('http://%E0%A4%A@fail'); },
3636
// JS engine errors do not have the `code` property.
3737
return e.code === undefined;
3838
});
39+
40+
if (common.hasIntl) {
41+
// An array of Unicode code points whose Unicode NFKD contains a "bad
42+
// character".
43+
const badIDNA = (() => {
44+
const BAD_CHARS = '#%/:?@[\\]^|';
45+
const out = [];
46+
for (let i = 0x80; i < 0x110000; i++) {
47+
const cp = String.fromCodePoint(i);
48+
for (const badChar of BAD_CHARS) {
49+
if (cp.normalize('NFKD').includes(badChar)) {
50+
out.push(cp);
51+
}
52+
}
53+
}
54+
return out;
55+
})();
56+
57+
// The generation logic above should at a minimum produce these two
58+
// characters.
59+
assert(badIDNA.includes('℀'));
60+
assert(badIDNA.includes('@'));
61+
62+
for (const badCodePoint of badIDNA) {
63+
const badURL = `http://fail${badCodePoint}fail.com/`;
64+
assert.throws(() => { url.parse(badURL); },
65+
(e) => e.code === 'ERR_INVALID_URL',
66+
`parsing ${badURL}`);
67+
}
68+
69+
assert.throws(() => { url.parse('http://\u00AD/bad.com/'); },
70+
(e) => e.code === 'ERR_INVALID_URL',
71+
'parsing http://\u00AD/bad.com/');
72+
}

0 commit comments

Comments
 (0)
Please sign in to comment.