Skip to content

Commit d9adcee

Browse files
joyeecheungaddaleax
authored andcommitted
test: pull enconding WPT test fixtures
PR-URL: #25321 Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de>
1 parent 6778261 commit d9adcee

File tree

323 files changed

+24110
-10
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

323 files changed

+24110
-10
lines changed

test/fixtures/wpt/README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@ See [test/wpt](../../wpt/README.md) for information on how these tests are run.
1010

1111
Last update:
1212

13-
- resources: https://github.com/web-platform-tests/wpt/tree/679a364421/resources
14-
- interfaces: https://github.com/web-platform-tests/wpt/tree/db7f86289e/interfaces
1513
- console: https://github.com/web-platform-tests/wpt/tree/9786a4b131/console
14+
- encoding: https://github.com/web-platform-tests/wpt/tree/a093a659ed/encoding
1615
- url: https://github.com/web-platform-tests/wpt/tree/75b0f336c5/url
16+
- resources: https://github.com/web-platform-tests/wpt/tree/679a364421/resources
17+
- interfaces: https://github.com/web-platform-tests/wpt/tree/712c9f275e/interfaces
1718

1819
[Web Platform Tests]: https://github.com/web-platform-tests/wpt
1920
[`git node wpt`]: https://github.com/nodejs/node-core-utils/blob/master/docs/git-node.md#git-node-wpt

test/fixtures/wpt/encoding/META.yml

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
spec: https://encoding.spec.whatwg.org/
2+
suggested_reviewers:
3+
- inexorabletash
4+
- annevk
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// META: title=Encoding API: Basics
2+
3+
test(function() {
4+
assert_equals((new TextEncoder).encoding, 'utf-8', 'default encoding is utf-8');
5+
assert_equals((new TextDecoder).encoding, 'utf-8', 'default encoding is utf-8');
6+
}, 'Default encodings');
7+
8+
test(function() {
9+
assert_array_equals(new TextEncoder().encode(), [], 'input default should be empty string')
10+
assert_array_equals(new TextEncoder().encode(undefined), [], 'input default should be empty string')
11+
}, 'Default inputs');
12+
13+
14+
function testDecodeSample(encoding, string, bytes) {
15+
test(function() {
16+
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
17+
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string);
18+
}, 'Decode sample: ' + encoding);
19+
}
20+
21+
// z (ASCII U+007A), cent (Latin-1 U+00A2), CJK water (BMP U+6C34),
22+
// G-Clef (non-BMP U+1D11E), PUA (BMP U+F8FF), PUA (non-BMP U+10FFFD)
23+
// byte-swapped BOM (non-character U+FFFE)
24+
var sample = 'z\xA2\u6C34\uD834\uDD1E\uF8FF\uDBFF\uDFFD\uFFFE';
25+
26+
test(function() {
27+
var encoding = 'utf-8';
28+
var string = sample;
29+
var bytes = [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xEF, 0xA3, 0xBF, 0xF4, 0x8F, 0xBF, 0xBD, 0xEF, 0xBF, 0xBE];
30+
var encoded = new TextEncoder().encode(string);
31+
assert_array_equals([].slice.call(encoded), bytes);
32+
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
33+
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string);
34+
}, 'Encode/decode round trip: utf-8');
35+
36+
testDecodeSample(
37+
'utf-16le',
38+
sample,
39+
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
40+
);
41+
42+
testDecodeSample(
43+
'utf-16be',
44+
sample,
45+
[0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xF8, 0xFF, 0xDB, 0xFF, 0xDF, 0xFD, 0xFF, 0xFE]
46+
);
47+
48+
testDecodeSample(
49+
'utf-16',
50+
sample,
51+
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
52+
);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// META: title=Encoding API: invalid label
2+
// META: timeout=long
3+
// META: script=resources/encodings.js
4+
5+
var tests = ["invalid-invalidLabel"];
6+
setup(function() {
7+
encodings_table.forEach(function(section) {
8+
section.encodings.forEach(function(encoding) {
9+
encoding.labels.forEach(function(label) {
10+
["\u0000", "\u000b", "\u00a0", "\u2028", "\u2029"].forEach(function(ws) {
11+
tests.push(ws + label);
12+
tests.push(label + ws);
13+
tests.push(ws + label + ws);
14+
});
15+
});
16+
});
17+
});
18+
});
19+
20+
tests.forEach(function(input) {
21+
test(function() {
22+
assert_throws(new RangeError(), function() { new TextDecoder(input); });
23+
}, 'Invalid label ' + format_value(input) + ' should be rejected by TextDecoder.');
24+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// META: title=Encoding API: replacement encoding
2+
// META: script=resources/encodings.js
3+
4+
encodings_table.forEach(function(section) {
5+
section.encodings.filter(function(encoding) {
6+
return encoding.name === 'replacement';
7+
}).forEach(function(encoding) {
8+
encoding.labels.forEach(function(label) {
9+
test(function() {
10+
assert_throws(new RangeError(), function() { new TextDecoder(label); });
11+
}, 'Label for "replacement" should be rejected by API: ' + label);
12+
});
13+
});
14+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// META: title=Encoding API: Invalid UTF-16 surrogates with UTF-8 encoding
2+
3+
var badStrings = [
4+
{
5+
input: 'abc123',
6+
expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
7+
decoded: 'abc123',
8+
name: 'Sanity check'
9+
},
10+
{
11+
input: '\uD800',
12+
expected: [0xef, 0xbf, 0xbd],
13+
decoded: '\uFFFD',
14+
name: 'Surrogate half (low)'
15+
},
16+
{
17+
input: '\uDC00',
18+
expected: [0xef, 0xbf, 0xbd],
19+
decoded: '\uFFFD',
20+
name: 'Surrogate half (high)'
21+
},
22+
{
23+
input: 'abc\uD800123',
24+
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
25+
decoded: 'abc\uFFFD123',
26+
name: 'Surrogate half (low), in a string'
27+
},
28+
{
29+
input: 'abc\uDC00123',
30+
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
31+
decoded: 'abc\uFFFD123',
32+
name: 'Surrogate half (high), in a string'
33+
},
34+
{
35+
input: '\uDC00\uD800',
36+
expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
37+
decoded: '\uFFFD\uFFFD',
38+
name: 'Wrong order'
39+
}
40+
];
41+
42+
badStrings.forEach(function(t) {
43+
test(function() {
44+
var encoded = new TextEncoder().encode(t.input);
45+
assert_array_equals([].slice.call(encoded), t.expected);
46+
assert_equals(new TextDecoder('utf-8').decode(encoded), t.decoded);
47+
}, 'Invalid surrogates encoded into UTF-8: ' + t.name);
48+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
<!doctype html>
2+
<meta charset=big5> <!-- test breaks if the server overrides this -->
3+
<script src=/resources/testharness.js></script>
4+
<script src=/resources/testharnessreport.js></script>
5+
<div id=log></div>
6+
<script>
7+
function encode(input, output, desc) {
8+
test(function() {
9+
var a = document.createElement("a"); // <a> uses document encoding for URL's query
10+
// Append and prepend X to test for off-by-one errors
11+
a.href = "https://example.com/?X" + input + "X";
12+
assert_equals(a.search.substr(1), "X" + output + "X"); // remove leading "?"
13+
}, "big5 encoder: " + desc);
14+
}
15+
16+
encode("ab", "ab", "very basic")
17+
// edge cases
18+
encode("\u9EA6", "%26%2340614%3B", "Highest-pointer BMP character excluded from encoder");
19+
encode("\uD858\uDE6B", "%26%23156267%3B", "Highest-pointer character excluded from encoder");
20+
encode("\u3000", "%A1@", "Lowest-pointer character included in encoder");
21+
encode("\u20AC", "%A3%E1", "Euro; the highest-pointer character before a range of 30 unmapped pointers");
22+
encode("\u4E00", "%A4@", "The lowest-pointer character after the range of 30 unmapped pointers");
23+
encode("\uD85D\uDE07", "%C8%A4", "The highest-pointer character before a range of 41 unmapped pointers");
24+
encode("\uFFE2", "%C8%CD", "The lowest-pointer character after the range of 41 unmapped pointers");
25+
encode("\u79D4", "%FE%FE", "The last character in the index");
26+
// not in index
27+
encode("\u2603", "%26%239731%3B", "The canonical BMP test character that is not in the index");
28+
encode("\uD83D\uDCA9", "%26%23128169%3B", "The canonical astral test character that is not in the index");
29+
// duplicate low bits
30+
encode("\uD840\uDFB5", "%FDj", "A Plane 2 character whose low 16 bits match a BMP character that has a lower pointer");
31+
// prefer last
32+
encode("\u2550", "%F9%F9", "A duplicate-mapped code point that prefers the highest pointer in the encoder");
33+
</script>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!doctype html>
2+
<meta charset=shift_jis>
3+
<title>Shift_JIS file ending with a truncated sequence</title>
4+
One-byte truncated sequence:&#xFFFD;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<!doctype html>
2+
<meta charset=shift_jis>
3+
<title>Shift_JIS file ending with a truncated sequence</title>
4+
<link rel=match href=/encoding/eof-shift_jis-ref.html>
5+
One-byte truncated sequence:�
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!doctype html>
2+
<meta charset=utf-8>
3+
<title>UTF-8 file ending with a one-byte truncated sequence</title>
4+
One-byte truncated sequence:&#xFFFD;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<!doctype html>
2+
<meta charset=utf-8>
3+
<title>UTF-8 file ending with a one-byte truncated sequence</title>
4+
<link rel=match href="eof-utf-8-one-ref.html">
5+
One-byte truncated sequence:�
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!doctype html>
2+
<meta charset=utf-8>
3+
<title>UTF-8 file ending with a three-byte truncated sequence</title>
4+
Three-byte truncated sequence:&#xFFFD;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<!doctype html>
2+
<meta charset=utf-8>
3+
<title>UTF-8 file ending with a three-byte truncated sequence</title>
4+
<link rel=match href="eof-utf-8-three-ref.html">
5+
Three-byte truncated sequence:�
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!doctype html>
2+
<meta charset=utf-8>
3+
<title>UTF-8 file ending with a two-byte truncated sequence</title>
4+
Two-byte truncated sequence:&#xFFFD;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<!doctype html>
2+
<meta charset=utf-8>
3+
<title>UTF-8 file ending with a two-byte truncated sequence</title>
4+
<link rel=match href="eof-utf-8-two-ref.html">
5+
Two-byte truncated sequence:�
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<!doctype html>
2+
<meta charset=gb18030> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
3+
<script src=/resources/testharness.js></script>
4+
<script src=/resources/testharnessreport.js></script>
5+
<div id=log></div>
6+
<script>
7+
function encode(input, output, desc) {
8+
test(function() {
9+
var a = document.createElement("a") // <a> uses document encoding for URL's query
10+
a.href = "https://example.com/?" + input
11+
assert_equals(a.search.substr(1), output) // remove leading "?"
12+
}, "gb18030 encoder: " + desc)
13+
}
14+
15+
encode("s", "s", "very basic")
16+
encode("\u20AC", "%A2%E3", "Euro")
17+
encode("\u4E02", "%81@", "character")
18+
encode("\uE4C6", "%A1@", "PUA")
19+
encode("\uE4C5", "%FE%FE", "PUA #2")
20+
encode("\ud83d\udca9", "%949%DA3", "poo")
21+
</script>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<!doctype html>
2+
<meta charset=gbk> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
3+
<script src=/resources/testharness.js></script>
4+
<script src=/resources/testharnessreport.js></script>
5+
<div id=log></div>
6+
<script>
7+
function encode(input, output, desc) {
8+
test(function() {
9+
var a = document.createElement("a") // <a> uses document encoding for URL's query
10+
a.href = "https://example.com/?" + input
11+
assert_equals(a.search.substr(1), output) // remove leading "?"
12+
}, "gbk encoder: " + desc)
13+
}
14+
15+
encode("s", "s", "very basic")
16+
encode("\u20AC", "%80", "Euro")
17+
encode("\u4E02", "%81@", "character")
18+
encode("\uE4C6", "%A1@", "PUA")
19+
encode("\uE4C5", "%FE%FE", "PUA #2")
20+
encode("\ud83d\udca9", "%26%23128169%3B", "poo")
21+
</script>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// META: global=window,worker
2+
// META: script=/resources/WebIDLParser.js
3+
// META: script=/resources/idlharness.js
4+
5+
idl_test(
6+
['encoding'],
7+
[], // No deps
8+
idl_array => {
9+
idl_array.add_objects({
10+
TextEncoder: ['new TextEncoder()'],
11+
TextDecoder: ['new TextDecoder()']
12+
});
13+
}
14+
);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
function decode(input, output, desc) {
2+
test(function() {
3+
var d = new TextDecoder("iso-2022-jp"),
4+
buffer = new ArrayBuffer(input.length),
5+
view = new Int8Array(buffer)
6+
for(var i = 0, l = input.length; i < l; i++) {
7+
view[i] = input[i]
8+
}
9+
assert_equals(d.decode(view), output)
10+
}, "iso-2022-jp decoder: " + desc)
11+
}
12+
decode([0x1b, 0x24], "�$", "Error ESC")
13+
decode([0x1b, 0x24, 0x50], "�$P", "Error ESC, character")
14+
decode([0x1b, 0x28, 0x42, 0x50], "P", "ASCII ESC, character")
15+
decode([0x1b, 0x28, 0x42, 0x1b, 0x28, 0x42, 0x50], "�P", "Double ASCII ESC, character")
16+
decode([0x50, 0x1b, 0x28, 0x42, 0x50], "PP", "character, ASCII ESC, character")
17+
decode([0x5C, 0x5D, 0x7E], "\\]~", "characters")
18+
decode([0x0D, 0x0E, 0x0F, 0x10], "\x0D��\x10", "SO / SI")
19+
20+
decode([0x1b, 0x28, 0x4A, 0x5C, 0x5D, 0x7E], "¥]‾", "Roman ESC, characters")
21+
decode([0x1b, 0x28, 0x4A, 0x0D, 0x0E, 0x0F, 0x10], "\x0D��\x10", "Roman ESC, SO / SI")
22+
decode([0x1b, 0x28, 0x4A, 0x1b, 0x1b, 0x28, 0x49, 0x50], "�ミ", "Roman ESC, error ESC, Katakana ESC")
23+
24+
decode([0x1b, 0x28, 0x49, 0x50], "ミ", "Katakana ESC, character")
25+
decode([0x1b, 0x28, 0x49, 0x1b, 0x24, 0x40, 0x50, 0x50], "�佩", "Katakana ESC, multibyte ESC, character")
26+
decode([0x1b, 0x28, 0x49, 0x1b, 0x50], "�ミ", "Katakana ESC, error ESC, character")
27+
decode([0x1b, 0x28, 0x49, 0x1b, 0x24, 0x50], "�、ミ", "Katakana ESC, error ESC #2, character")
28+
decode([0x1b, 0x28, 0x49, 0x50, 0x1b, 0x28, 0x49, 0x50], "ミミ", "Katakana ESC, character, Katakana ESC, character")
29+
decode([0x1b, 0x28, 0x49, 0x0D, 0x0E, 0x0F, 0x10], "����", "Katakana ESC, SO / SI")
30+
31+
decode([0x1b, 0x24, 0x40, 0x50, 0x50], "佩", "Multibyte ESC, character")
32+
decode([0x1b, 0x24, 0x42, 0x50, 0x50], "佩", "Multibyte ESC #2, character")
33+
decode([0x1b, 0x24, 0x42, 0x1b, 0x50, 0x50], "�佩", "Multibyte ESC, error ESC, character")
34+
decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x40], "�", "Double multibyte ESC")
35+
decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x40, 0x50, 0x50], "�佩", "Double multibyte ESC, character")
36+
decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x42, 0x50, 0x50], "�佩", "Double multibyte ESC #2, character")
37+
decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x50, 0x50], "�ば�", "Multibyte ESC, error ESC #2, character")
38+
39+
decode([0x1b, 0x24, 0x40, 0x50, 0x1b, 0x24, 0x40, 0x50, 0x50], "�佩", "Multibyte ESC, single byte, multibyte ESC, character")
40+
decode([0x1b, 0x24, 0x40, 0x20, 0x50], "��", "Multibyte ESC, lead error byte")
41+
decode([0x1b, 0x24, 0x40, 0x50, 0x20], "�", "Multibyte ESC, trail error byte")
42+
43+
decode([0x50, 0x1b], "P�", "character, error ESC")
44+
decode([0x50, 0x1b, 0x24], "P�$", "character, error ESC #2")
45+
decode([0x50, 0x1b, 0x50], "P�P", "character, error ESC #3")
46+
decode([0x50, 0x1b, 0x28, 0x42], "P", "character, ASCII ESC")
47+
decode([0x50, 0x1b, 0x28, 0x4A], "P", "character, Roman ESC")
48+
decode([0x50, 0x1b, 0x28, 0x49], "P", "character, Katakana ESC")
49+
decode([0x50, 0x1b, 0x24, 0x40], "P", "character, Multibyte ESC")
50+
decode([0x50, 0x1b, 0x24, 0x42], "P", "character, Multibyte ESC #2")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<!doctype html>
2+
<meta charset=iso-2022-jp> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
3+
<script src=/resources/testharness.js></script>
4+
<script src=/resources/testharnessreport.js></script>
5+
<div id=log></div>
6+
<script>
7+
function encode(input, output, desc) {
8+
test(function() {
9+
var a = document.createElement("a") // <a> uses document encoding for URL's query
10+
a.href = "https://example.com/?" + input
11+
assert_equals(a.search.substr(1), output) // remove leading "?"
12+
}, "iso-2022-jp encoder: " + desc)
13+
}
14+
15+
encode("s", "s", "very basic")
16+
encode("\u00A5\u203Es\\\uFF90\u4F69", "%1B(J\\~s%1B(B\\%1B$B%_PP%1B(B", "basics")
17+
encode("\x0E\x0F\x1Bx", "%0E%0F%1Bx", "SO/SI ESC")
18+
encode("\uFFFD", "%26%2365533%3B", "U+FFFD");
19+
</script>

0 commit comments

Comments
 (0)