@@ -77,54 +77,72 @@ bool InitializeICUDirectory(const std::string& path) {
77
77
}
78
78
}
79
79
80
- static int32_t ToUnicode (MaybeStackBuffer<char >* buf,
81
- const char * input,
82
- size_t length) {
80
+ int32_t ToUnicode (MaybeStackBuffer<char >* buf,
81
+ const char * input,
82
+ size_t length) {
83
83
UErrorCode status = U_ZERO_ERROR;
84
- uint32_t options = UIDNA_DEFAULT;
85
- options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
84
+ uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
86
85
UIDNA* uidna = uidna_openUTS46 (options, &status);
87
86
if (U_FAILURE (status))
88
87
return -1 ;
89
88
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
90
89
91
90
int32_t len = uidna_nameToUnicodeUTF8 (uidna,
92
91
input, length,
93
- **buf, buf->length (),
92
+ **buf, buf->capacity (),
94
93
&info,
95
94
&status);
96
95
96
+ // Do not check info.errors like we do with ToASCII since ToUnicode always
97
+ // returns a string, despite any possible errors that may have occurred.
98
+
97
99
if (status == U_BUFFER_OVERFLOW_ERROR) {
98
100
status = U_ZERO_ERROR;
99
101
buf->AllocateSufficientStorage (len);
100
102
len = uidna_nameToUnicodeUTF8 (uidna,
101
103
input, length,
102
- **buf, buf->length (),
104
+ **buf, buf->capacity (),
103
105
&info,
104
106
&status);
105
107
}
106
108
107
- if (U_FAILURE (status))
109
+ // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
110
+ // string, regardless of whether an error occurred.
111
+
112
+ if (U_FAILURE (status)) {
108
113
len = -1 ;
114
+ buf->SetLength (0 );
115
+ } else {
116
+ buf->SetLength (len);
117
+ }
109
118
110
119
uidna_close (uidna);
111
120
return len;
112
121
}
113
122
114
- static int32_t ToASCII (MaybeStackBuffer<char >* buf,
115
- const char * input,
116
- size_t length) {
123
+ int32_t ToASCII (MaybeStackBuffer<char >* buf,
124
+ const char * input,
125
+ size_t length,
126
+ enum idna_mode mode) {
117
127
UErrorCode status = U_ZERO_ERROR;
118
- uint32_t options = UIDNA_DEFAULT;
119
- options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
128
+ uint32_t options = // CheckHyphens = false; handled later
129
+ UIDNA_CHECK_BIDI | // CheckBidi = true
130
+ UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true
131
+ UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing
132
+ if (mode == IDNA_STRICT) {
133
+ options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict
134
+ // VerifyDnsLength = beStrict;
135
+ // handled later
136
+ }
137
+
120
138
UIDNA* uidna = uidna_openUTS46 (options, &status);
121
139
if (U_FAILURE (status))
122
140
return -1 ;
123
141
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
124
142
125
143
int32_t len = uidna_nameToASCII_UTF8 (uidna,
126
144
input, length,
127
- **buf, buf->length (),
145
+ **buf, buf->capacity (),
128
146
&info,
129
147
&status);
130
148
@@ -133,13 +151,45 @@ static int32_t ToASCII(MaybeStackBuffer<char>* buf,
133
151
buf->AllocateSufficientStorage (len);
134
152
len = uidna_nameToASCII_UTF8 (uidna,
135
153
input, length,
136
- **buf, buf->length (),
154
+ **buf, buf->capacity (),
137
155
&info,
138
156
&status);
139
157
}
140
158
141
- if (U_FAILURE (status))
159
+ // In UTS #46 which specifies ToASCII, certain error conditions are
160
+ // configurable through options, and the WHATWG URL Standard promptly elects
161
+ // to disable some of them to accommodate for real-world use cases.
162
+ // Unfortunately, ICU4C's IDNA module does not support disabling some of
163
+ // these options through `options` above, and thus continues throwing
164
+ // unnecessary errors. To counter this situation, we just filter out the
165
+ // errors that may have happened afterwards, before deciding whether to
166
+ // return an error from this function.
167
+
168
+ // CheckHyphens = false
169
+ // (Specified in the current UTS #46 draft rev. 18.)
170
+ // Refs:
171
+ // - https://github.com/whatwg/url/issues/53
172
+ // - https://github.com/whatwg/url/pull/309
173
+ // - http://www.unicode.org/review/pri317/
174
+ // - http://www.unicode.org/reports/tr46/tr46-18.html
175
+ // - https://www.icann.org/news/announcement-2000-01-07-en
176
+ info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
177
+ info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
178
+ info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
179
+
180
+ if (mode != IDNA_STRICT) {
181
+ // VerifyDnsLength = beStrict
182
+ info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
183
+ info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
184
+ info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
185
+ }
186
+
187
+ if (U_FAILURE (status) || (mode != IDNA_LENIENT && info.errors != 0 )) {
142
188
len = -1 ;
189
+ buf->SetLength (0 );
190
+ } else {
191
+ buf->SetLength (len);
192
+ }
143
193
144
194
uidna_close (uidna);
145
195
return len;
@@ -169,8 +219,12 @@ static void ToASCII(const FunctionCallbackInfo<Value>& args) {
169
219
CHECK_GE (args.Length (), 1 );
170
220
CHECK (args[0 ]->IsString ());
171
221
Utf8Value val (env->isolate (), args[0 ]);
222
+ // optional arg
223
+ bool lenient = args[1 ]->BooleanValue (env->context ()).FromJust ();
224
+ enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
225
+
172
226
MaybeStackBuffer<char > buf;
173
- int32_t len = ToASCII (&buf, *val, val.length ());
227
+ int32_t len = ToASCII (&buf, *val, val.length (), mode );
174
228
175
229
if (len < 0 ) {
176
230
return env->ThrowError (" Cannot convert name to ASCII" );
0 commit comments