Skip to content
This repository was archived by the owner on Aug 31, 2018. It is now read-only.

Commit f332483

Browse files
jasnelladdaleax
authored andcommitted
util: graduate TextEncoder/TextDecoder, tests
Add tests ported from Web Platform Tests. Graduate TextEncoder / TextDecoder from experimental PR-URL: nodejs/node#15743 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Refael Ackermann <refack@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com> Reviewed-By: Timothy Gu <timothygu99@gmail.com>
1 parent 1b0e1c4 commit f332483

12 files changed

+563
-28
lines changed

doc/api/util.md

-4
Original file line numberDiff line numberDiff line change
@@ -551,8 +551,6 @@ see [Custom promisified functions][].
551551
added: v8.3.0
552552
-->
553553

554-
> Stability: 1 - Experimental
555-
556554
An implementation of the [WHATWG Encoding Standard][] `TextDecoder` API.
557555

558556
```js
@@ -690,8 +688,6 @@ mark.
690688
added: v8.3.0
691689
-->
692690

693-
> Stability: 1 - Experimental
694-
695691
An implementation of the [WHATWG Encoding Standard][] `TextEncoder` API. All
696692
instances of `TextEncoder` only support UTF-8 encoding.
697693

lib/internal/encoding.js

-20
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,6 @@ const kEncoding = Symbol('encoding');
1010
const kDecoder = Symbol('decoder');
1111
const kEncoder = Symbol('encoder');
1212

13-
let warned = false;
14-
const experimental =
15-
'The WHATWG Encoding Standard implementation is an experimental API. It ' +
16-
'should not yet be used in production applications.';
17-
1813
const {
1914
getConstructorOf,
2015
customInspectSymbol: inspect
@@ -289,11 +284,6 @@ function getEncodingFromLabel(label) {
289284

290285
class TextEncoder {
291286
constructor() {
292-
if (!warned) {
293-
warned = true;
294-
process.emitWarning(experimental, 'ExperimentalWarning');
295-
}
296-
297287
this[kEncoder] = true;
298288
}
299289

@@ -353,11 +343,6 @@ function makeTextDecoderICU() {
353343

354344
class TextDecoder {
355345
constructor(encoding = 'utf-8', options = {}) {
356-
if (!warned) {
357-
warned = true;
358-
process.emitWarning(experimental, 'ExperimentalWarning');
359-
}
360-
361346
encoding = `${encoding}`;
362347
if (typeof options !== 'object')
363348
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
@@ -430,11 +415,6 @@ function makeTextDecoderJS() {
430415

431416
class TextDecoder {
432417
constructor(encoding = 'utf-8', options = {}) {
433-
if (!warned) {
434-
warned = true;
435-
process.emitWarning(experimental, 'ExperimentalWarning');
436-
}
437-
438418
encoding = `${encoding}`;
439419
if (typeof options !== 'object')
440420
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
'use strict';
2+
3+
// From: https://github.com/w3c/web-platform-tests/blob/d74324b53c/encoding/textdecoder-fatal-streaming.html
4+
5+
const common = require('../common');
6+
7+
if (!common.hasIntl)
8+
common.skip('missing Intl');
9+
10+
const assert = require('assert');
11+
const {
12+
TextDecoder
13+
} = require('util');
14+
15+
16+
{
17+
[
18+
{ encoding: 'utf-8', sequence: [0xC0] },
19+
{ encoding: 'utf-16le', sequence: [0x00] },
20+
{ encoding: 'utf-16be', sequence: [0x00] }
21+
].forEach((testCase) => {
22+
const data = new Uint8Array([testCase.sequence]);
23+
common.expectsError(
24+
() => {
25+
const decoder = new TextDecoder(testCase.encoding, { fatal: true });
26+
decoder.decode(data);
27+
}, {
28+
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
29+
type: TypeError,
30+
message:
31+
`The encoded data was not valid for encoding ${testCase.encoding}`
32+
}
33+
);
34+
35+
assert.strictEqual(
36+
new TextDecoder(testCase.encoding).decode(data),
37+
'\uFFFD'
38+
);
39+
});
40+
}
41+
42+
{
43+
const decoder = new TextDecoder('utf-16le', { fatal: true });
44+
const odd = new Uint8Array([0x00]);
45+
const even = new Uint8Array([0x00, 0x00]);
46+
47+
assert.strictEqual(decoder.decode(odd, { stream: true }), '');
48+
assert.strictEqual(decoder.decode(odd), '\u0000');
49+
50+
common.expectsError(
51+
() => {
52+
decoder.decode(even, { stream: true });
53+
decoder.decode(odd);
54+
}, {
55+
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
56+
type: TypeError,
57+
message:
58+
'The encoded data was not valid for encoding utf-16le'
59+
}
60+
);
61+
62+
common.expectsError(
63+
() => {
64+
decoder.decode(odd, { stream: true });
65+
decoder.decode(even);
66+
}, {
67+
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
68+
type: TypeError,
69+
message:
70+
'The encoded data was not valid for encoding utf-16le'
71+
}
72+
);
73+
74+
assert.strictEqual(decoder.decode(even, { stream: true }), '\u0000');
75+
assert.strictEqual(decoder.decode(even), '\u0000');
76+
}

test/parallel/test-whatwg-encoding-internals.js

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
'use strict';
33

44
require('../common');
5+
56
const assert = require('assert');
67
const { getEncodingFromLabel } = require('internal/encoding');
78

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
'use strict';
2+
3+
// From: https://github.com/w3c/web-platform-tests/blob/fa9436d12c/encoding/api-surrogates-utf8.html
4+
5+
require('../common');
6+
7+
const assert = require('assert');
8+
const {
9+
TextDecoder,
10+
TextEncoder
11+
} = require('util');
12+
13+
const badStrings = [
14+
{
15+
input: 'abc123',
16+
expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
17+
decoded: 'abc123',
18+
name: 'Sanity check'
19+
},
20+
{
21+
input: '\uD800',
22+
expected: [0xef, 0xbf, 0xbd],
23+
decoded: '\uFFFD',
24+
name: 'Surrogate half (low)'
25+
},
26+
{
27+
input: '\uDC00',
28+
expected: [0xef, 0xbf, 0xbd],
29+
decoded: '\uFFFD',
30+
name: 'Surrogate half (high)'
31+
},
32+
{
33+
input: 'abc\uD800123',
34+
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
35+
decoded: 'abc\uFFFD123',
36+
name: 'Surrogate half (low), in a string'
37+
},
38+
{
39+
input: 'abc\uDC00123',
40+
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
41+
decoded: 'abc\uFFFD123',
42+
name: 'Surrogate half (high), in a string'
43+
},
44+
{
45+
input: '\uDC00\uD800',
46+
expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
47+
decoded: '\uFFFD\uFFFD',
48+
name: 'Wrong order'
49+
}
50+
];
51+
52+
badStrings.forEach((t) => {
53+
const encoded = new TextEncoder().encode(t.input);
54+
assert.deepStrictEqual([].slice.call(encoded), t.expected);
55+
assert.strictEqual(new TextDecoder('utf-8').decode(encoded), t.decoded);
56+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
'use strict';
2+
3+
// From: https://github.com/w3c/web-platform-tests/blob/39a67e2fff/encoding/textdecoder-fatal.html
4+
5+
const common = require('../common');
6+
7+
if (!common.hasIntl)
8+
common.skip('missing Intl');
9+
10+
const assert = require('assert');
11+
const {
12+
TextDecoder
13+
} = require('util');
14+
15+
const bad = [
16+
{ encoding: 'utf-8', input: [0xFF], name: 'invalid code' },
17+
{ encoding: 'utf-8', input: [0xC0], name: 'ends early' },
18+
{ encoding: 'utf-8', input: [0xE0], name: 'ends early 2' },
19+
{ encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' },
20+
{ encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' },
21+
{ encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' },
22+
{ encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' },
23+
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' },
24+
{ encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' },
25+
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
26+
name: '> 0x10FFFF' },
27+
{ encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80],
28+
name: 'obsolete lead byte' },
29+
// Overlong encodings
30+
{ encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' },
31+
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x80],
32+
name: 'overlong U+0000 - 3 bytes' },
33+
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80],
34+
name: 'overlong U+0000 - 4 bytes' },
35+
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80],
36+
name: 'overlong U+0000 - 5 bytes' },
37+
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
38+
name: 'overlong U+0000 - 6 bytes' },
39+
{ encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' },
40+
{ encoding: 'utf-8', input: [0xE0, 0x81, 0xBF],
41+
name: 'overlong U+007F - 3 bytes' },
42+
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF],
43+
name: 'overlong U+007F - 4 bytes' },
44+
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF],
45+
name: 'overlong U+007F - 5 bytes' },
46+
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF],
47+
name: 'overlong U+007F - 6 bytes' },
48+
{ encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF],
49+
name: 'overlong U+07FF - 3 bytes' },
50+
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF],
51+
name: 'overlong U+07FF - 4 bytes' },
52+
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF],
53+
name: 'overlong U+07FF - 5 bytes' },
54+
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF],
55+
name: 'overlong U+07FF - 6 bytes' },
56+
{ encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF],
57+
name: 'overlong U+FFFF - 4 bytes' },
58+
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF],
59+
name: 'overlong U+FFFF - 5 bytes' },
60+
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF],
61+
name: 'overlong U+FFFF - 6 bytes' },
62+
{ encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF],
63+
name: 'overlong U+10FFFF - 5 bytes' },
64+
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF],
65+
name: 'overlong U+10FFFF - 6 bytes' },
66+
// UTF-16 surrogates encoded as code points in UTF-8
67+
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' },
68+
{ encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' },
69+
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80],
70+
name: 'surrogate pair' },
71+
{ encoding: 'utf-16le', input: [0x00], name: 'truncated code unit' },
72+
// Mismatched UTF-16 surrogates are exercised in utf16-surrogates.html
73+
// FIXME: Add legacy encoding cases
74+
];
75+
76+
bad.forEach((t) => {
77+
common.expectsError(
78+
() => {
79+
new TextDecoder(t.encoding, { fatal: true })
80+
.decode(new Uint8Array(t.input));
81+
}, {
82+
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
83+
type: TypeError
84+
}
85+
);
86+
});
87+
88+
{
89+
assert('fatal' in new TextDecoder());
90+
assert.strictEqual(typeof new TextDecoder().fatal, 'boolean');
91+
assert(!new TextDecoder().fatal);
92+
assert(new TextDecoder('utf-8', { fatal: true }).fatal);
93+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
'use strict';
2+
3+
// From: https://github.com/w3c/web-platform-tests/blob/7f567fa29c/encoding/textdecoder-ignorebom.html
4+
5+
const common = require('../common');
6+
7+
const assert = require('assert');
8+
const {
9+
TextDecoder
10+
} = require('util');
11+
12+
const cases = [
13+
{
14+
encoding: 'utf-8',
15+
bytes: [0xEF, 0xBB, 0xBF, 0x61, 0x62, 0x63],
16+
skipNoIntl: false
17+
},
18+
{
19+
encoding: 'utf-16le',
20+
bytes: [0xFF, 0xFE, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00],
21+
skipNoIntl: false
22+
},
23+
{
24+
encoding: 'utf-16be',
25+
bytes: [0xFE, 0xFF, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63],
26+
skipNoIntl: true
27+
}
28+
];
29+
30+
cases.forEach((testCase) => {
31+
if (testCase.skipNoIntl && !common.hasIntl) {
32+
console.log(`skipping ${testCase.encoding} because missing Intl`);
33+
return; // skipping
34+
}
35+
const BOM = '\uFEFF';
36+
let decoder = new TextDecoder(testCase.encoding, { ignoreBOM: true });
37+
const bytes = new Uint8Array(testCase.bytes);
38+
assert.strictEqual(decoder.decode(bytes), `${BOM}abc`);
39+
decoder = new TextDecoder(testCase.encoding, { ignoreBOM: false });
40+
assert.strictEqual(decoder.decode(bytes), 'abc');
41+
decoder = new TextDecoder(testCase.encoding);
42+
assert.strictEqual(decoder.decode(bytes), 'abc');
43+
});
44+
45+
{
46+
assert('ignoreBOM' in new TextDecoder());
47+
assert.strictEqual(typeof new TextDecoder().ignoreBOM, 'boolean');
48+
assert(!new TextDecoder().ignoreBOM);
49+
assert(new TextDecoder('utf-8', { ignoreBOM: true }).ignoreBOM);
50+
}

0 commit comments

Comments
 (0)