Skip to content

Commit 60b10f0

Browse files
TimothyGuMylesBorins
authored andcommitted
url: update IDNA handling
Remove custom tests for invalid IDNA domains in url-idna.js in favor of the more comprehensive official set. Backport-PR-URL: #17365 PR-URL: #13362 Refs: whatwg/url#309 Refs: web-platform-tests/wpt#5976 Reviewed-By: Refael Ackermann <refack@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Daijiro Wachi <daijiro.wachi@gmail.com>
1 parent 699c663 commit 60b10f0

File tree

6 files changed

+509
-80
lines changed

6 files changed

+509
-80
lines changed

lib/url.js

+4-1
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,10 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
311311
// It only converts parts of the domain name that
312312
// have non-ASCII characters, i.e. it doesn't matter if
313313
// you call it with a domain that already is ASCII-only.
314-
this.hostname = toASCII(this.hostname);
314+
315+
// Use lenient mode (`true`) to try to support even non-compliant
316+
// URLs.
317+
this.hostname = toASCII(this.hostname, true);
315318
}
316319

317320
var p = this.port ? ':' + this.port : '';

src/node_i18n.cc

+71-17
Original file line numberDiff line numberDiff line change
@@ -77,54 +77,72 @@ bool InitializeICUDirectory(const std::string& path) {
7777
}
7878
}
7979

80-
static int32_t ToUnicode(MaybeStackBuffer<char>* buf,
81-
const char* input,
82-
size_t length) {
80+
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
81+
const char* input,
82+
size_t length) {
8383
UErrorCode status = U_ZERO_ERROR;
84-
uint32_t options = UIDNA_DEFAULT;
85-
options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
84+
uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
8685
UIDNA* uidna = uidna_openUTS46(options, &status);
8786
if (U_FAILURE(status))
8887
return -1;
8988
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
9089

9190
int32_t len = uidna_nameToUnicodeUTF8(uidna,
9291
input, length,
93-
**buf, buf->length(),
92+
**buf, buf->capacity(),
9493
&info,
9594
&status);
9695

96+
// Do not check info.errors like we do with ToASCII since ToUnicode always
97+
// returns a string, despite any possible errors that may have occurred.
98+
9799
if (status == U_BUFFER_OVERFLOW_ERROR) {
98100
status = U_ZERO_ERROR;
99101
buf->AllocateSufficientStorage(len);
100102
len = uidna_nameToUnicodeUTF8(uidna,
101103
input, length,
102-
**buf, buf->length(),
104+
**buf, buf->capacity(),
103105
&info,
104106
&status);
105107
}
106108

107-
if (U_FAILURE(status))
109+
// info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
110+
// string, regardless of whether an error occurred.
111+
112+
if (U_FAILURE(status)) {
108113
len = -1;
114+
buf->SetLength(0);
115+
} else {
116+
buf->SetLength(len);
117+
}
109118

110119
uidna_close(uidna);
111120
return len;
112121
}
113122

114-
static int32_t ToASCII(MaybeStackBuffer<char>* buf,
115-
const char* input,
116-
size_t length) {
123+
int32_t ToASCII(MaybeStackBuffer<char>* buf,
124+
const char* input,
125+
size_t length,
126+
enum idna_mode mode) {
117127
UErrorCode status = U_ZERO_ERROR;
118-
uint32_t options = UIDNA_DEFAULT;
119-
options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
128+
uint32_t options = // CheckHyphens = false; handled later
129+
UIDNA_CHECK_BIDI | // CheckBidi = true
130+
UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true
131+
UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing
132+
if (mode == IDNA_STRICT) {
133+
options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict
134+
// VerifyDnsLength = beStrict;
135+
// handled later
136+
}
137+
120138
UIDNA* uidna = uidna_openUTS46(options, &status);
121139
if (U_FAILURE(status))
122140
return -1;
123141
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
124142

125143
int32_t len = uidna_nameToASCII_UTF8(uidna,
126144
input, length,
127-
**buf, buf->length(),
145+
**buf, buf->capacity(),
128146
&info,
129147
&status);
130148

@@ -133,13 +151,45 @@ static int32_t ToASCII(MaybeStackBuffer<char>* buf,
133151
buf->AllocateSufficientStorage(len);
134152
len = uidna_nameToASCII_UTF8(uidna,
135153
input, length,
136-
**buf, buf->length(),
154+
**buf, buf->capacity(),
137155
&info,
138156
&status);
139157
}
140158

141-
if (U_FAILURE(status))
159+
// In UTS #46 which specifies ToASCII, certain error conditions are
160+
// configurable through options, and the WHATWG URL Standard promptly elects
161+
// to disable some of them to accommodate for real-world use cases.
162+
// Unfortunately, ICU4C's IDNA module does not support disabling some of
163+
// these options through `options` above, and thus continues throwing
164+
// unnecessary errors. To counter this situation, we just filter out the
165+
// errors that may have happened afterwards, before deciding whether to
166+
// return an error from this function.
167+
168+
// CheckHyphens = false
169+
// (Specified in the current UTS #46 draft rev. 18.)
170+
// Refs:
171+
// - https://github.com/whatwg/url/issues/53
172+
// - https://github.com/whatwg/url/pull/309
173+
// - http://www.unicode.org/review/pri317/
174+
// - http://www.unicode.org/reports/tr46/tr46-18.html
175+
// - https://www.icann.org/news/announcement-2000-01-07-en
176+
info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
177+
info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
178+
info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
179+
180+
if (mode != IDNA_STRICT) {
181+
// VerifyDnsLength = beStrict
182+
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
183+
info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
184+
info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
185+
}
186+
187+
if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) {
142188
len = -1;
189+
buf->SetLength(0);
190+
} else {
191+
buf->SetLength(len);
192+
}
143193

144194
uidna_close(uidna);
145195
return len;
@@ -169,8 +219,12 @@ static void ToASCII(const FunctionCallbackInfo<Value>& args) {
169219
CHECK_GE(args.Length(), 1);
170220
CHECK(args[0]->IsString());
171221
Utf8Value val(env->isolate(), args[0]);
222+
// optional arg
223+
bool lenient = args[1]->BooleanValue(env->context()).FromJust();
224+
enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
225+
172226
MaybeStackBuffer<char> buf;
173-
int32_t len = ToASCII(&buf, *val, val.length());
227+
int32_t len = ToASCII(&buf, *val, val.length(), mode);
174228

175229
if (len < 0) {
176230
return env->ThrowError("Cannot convert name to ASCII");

src/node_i18n.h

+24
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,30 @@ namespace i18n {
1616

1717
bool InitializeICUDirectory(const std::string& path);
1818

19+
enum idna_mode {
20+
// Default mode for maximum compatibility.
21+
IDNA_DEFAULT,
22+
// Ignore all errors in IDNA conversion, if possible.
23+
IDNA_LENIENT,
24+
// Enforce STD3 rules (UseSTD3ASCIIRules) and DNS length restrictions
25+
// (VerifyDnsLength). Corresponds to `beStrict` flag in the "domain to ASCII"
26+
// algorithm.
27+
IDNA_STRICT
28+
};
29+
30+
// Implements the WHATWG URL Standard "domain to ASCII" algorithm.
31+
// https://url.spec.whatwg.org/#concept-domain-to-ascii
32+
int32_t ToASCII(MaybeStackBuffer<char>* buf,
33+
const char* input,
34+
size_t length,
35+
enum idna_mode mode = IDNA_DEFAULT);
36+
37+
// Implements the WHATWG URL Standard "domain to Unicode" algorithm.
38+
// https://url.spec.whatwg.org/#concept-domain-to-unicode
39+
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
40+
const char* input,
41+
size_t length);
42+
1943
} // namespace i18n
2044
} // namespace node
2145

0 commit comments

Comments
 (0)