@@ -19,12 +19,17 @@ void builderAppendQuotedString(StringBuilder& builder,
19
19
size_t expected_utf16_length =
20
20
simdutf::utf16_length_from_utf8 (string.data (), string.length ());
21
21
MaybeStackBuffer<char16_t > buffer (expected_utf16_length);
22
+ // simdutf::convert_utf8_to_utf16 returns zero in case of error.
22
23
size_t utf16_length = simdutf::convert_utf8_to_utf16 (
23
24
string.data (), string.length (), buffer.out ());
24
- CHECK_EQ (expected_utf16_length, utf16_length);
25
- escapeWideStringForJSON (reinterpret_cast <const uint16_t *>(buffer.out ()),
26
- utf16_length,
27
- &builder);
25
+ // We have that utf16_length == expected_utf16_length if and only
26
+ // if the input was a valid UTF-8 string.
27
+ if (utf16_length != 0 ) {
28
+ CHECK_EQ (expected_utf16_length, utf16_length);
29
+ escapeWideStringForJSON (reinterpret_cast <const uint16_t *>(buffer.out ()),
30
+ utf16_length,
31
+ &builder);
32
+ } // Otherwise, we had an invalid UTF-8 input.
28
33
}
29
34
builder.put (' "' );
30
35
}
@@ -35,8 +40,12 @@ std::unique_ptr<Value> parseJSON(const std::string_view string) {
35
40
size_t expected_utf16_length =
36
41
simdutf::utf16_length_from_utf8 (string.data (), string.length ());
37
42
MaybeStackBuffer<char16_t > buffer (expected_utf16_length);
43
+ // simdutf::convert_utf8_to_utf16 returns zero in case of error.
38
44
size_t utf16_length = simdutf::convert_utf8_to_utf16 (
39
45
string.data (), string.length (), buffer.out ());
46
+ // We have that utf16_length == expected_utf16_length if and only
47
+ // if the input was a valid UTF-8 string.
48
+ if (utf16_length == 0 ) return nullptr ; // We had an invalid UTF-8 input.
40
49
CHECK_EQ (expected_utf16_length, utf16_length);
41
50
return parseJSONCharacters (reinterpret_cast <const uint16_t *>(buffer.out ()),
42
51
utf16_length);
@@ -62,9 +71,14 @@ String StringViewToUtf8(v8_inspector::StringView view) {
62
71
size_t expected_utf8_length =
63
72
simdutf::utf8_length_from_utf16 (source, view.length ());
64
73
MaybeStackBuffer<char > buffer (expected_utf8_length);
74
+ // convert_utf16_to_utf8 returns zero in case of error.
65
75
size_t utf8_length =
66
76
simdutf::convert_utf16_to_utf8 (source, view.length (), buffer.out ());
67
- CHECK_EQ (expected_utf8_length, utf8_length);
77
+ // We have that utf8_length == expected_utf8_length if and only
78
+ // if the input was a valid UTF-16 string. Otherwise, utf8_length
79
+ // must be zero.
80
+ CHECK (utf8_length == 0 || utf8_length == expected_utf8_length);
81
+ // An invalid UTF-16 input will generate the empty string:
68
82
return String (buffer.out (), utf8_length);
69
83
}
70
84
@@ -112,9 +126,14 @@ String fromUTF16(const uint16_t* data, size_t length) {
112
126
size_t expected_utf8_length =
113
127
simdutf::utf8_length_from_utf16 (casted_data, length);
114
128
MaybeStackBuffer<char > buffer (expected_utf8_length);
129
+ // simdutf::convert_utf16_to_utf8 returns zero in case of error.
115
130
size_t utf8_length =
116
131
simdutf::convert_utf16_to_utf8 (casted_data, length, buffer.out ());
117
- CHECK_EQ (expected_utf8_length, utf8_length);
132
+ // We have that utf8_length == expected_utf8_length if and only
133
+ // if the input was a valid UTF-16 string. Otherwise, utf8_length
134
+ // must be zero.
135
+ CHECK (utf8_length == 0 || utf8_length == expected_utf8_length);
136
+ // An invalid UTF-16 input will generate the empty string:
118
137
return String (buffer.out (), utf8_length);
119
138
}
120
139
@@ -123,6 +142,9 @@ const uint8_t* CharactersUTF8(const std::string_view s) {
123
142
}
124
143
125
144
size_t CharacterCount (const std::string_view s) {
145
+ // The utf32_length_from_utf8 function calls count_utf8.
146
+ // The count_utf8 function counts the number of code points
147
+ // (characters) in the string, assuming that the string is valid Unicode.
126
148
// TODO(@anonrig): Test to make sure CharacterCount returns correctly.
127
149
return simdutf::utf32_length_from_utf8 (s.data (), s.length ());
128
150
}
0 commit comments