Skip to content

Commit b34de64

Browse files
lemireMoLow
authored andcommitted
src: allow simdutf::convert_* functions to return zero
PR-URL: #47471 Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: Luigi Pinca <luigipinca@gmail.com>
1 parent 3902be8 commit b34de64

File tree

1 file changed

+28
-6
lines changed

1 file changed

+28
-6
lines changed

src/inspector/node_string.cc

+28-6
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,17 @@ void builderAppendQuotedString(StringBuilder& builder,
1919
size_t expected_utf16_length =
2020
simdutf::utf16_length_from_utf8(string.data(), string.length());
2121
MaybeStackBuffer<char16_t> buffer(expected_utf16_length);
22+
// simdutf::convert_utf8_to_utf16 returns zero in case of error.
2223
size_t utf16_length = simdutf::convert_utf8_to_utf16(
2324
string.data(), string.length(), buffer.out());
24-
CHECK_EQ(expected_utf16_length, utf16_length);
25-
escapeWideStringForJSON(reinterpret_cast<const uint16_t*>(buffer.out()),
26-
utf16_length,
27-
&builder);
25+
// We have that utf16_length == expected_utf16_length if and only
26+
// if the input was a valid UTF-8 string.
27+
if (utf16_length != 0) {
28+
CHECK_EQ(expected_utf16_length, utf16_length);
29+
escapeWideStringForJSON(reinterpret_cast<const uint16_t*>(buffer.out()),
30+
utf16_length,
31+
&builder);
32+
} // Otherwise, we had an invalid UTF-8 input.
2833
}
2934
builder.put('"');
3035
}
@@ -35,8 +40,12 @@ std::unique_ptr<Value> parseJSON(const std::string_view string) {
3540
size_t expected_utf16_length =
3641
simdutf::utf16_length_from_utf8(string.data(), string.length());
3742
MaybeStackBuffer<char16_t> buffer(expected_utf16_length);
43+
// simdutf::convert_utf8_to_utf16 returns zero in case of error.
3844
size_t utf16_length = simdutf::convert_utf8_to_utf16(
3945
string.data(), string.length(), buffer.out());
46+
// We have that utf16_length == expected_utf16_length if and only
47+
// if the input was a valid UTF-8 string.
48+
if (utf16_length == 0) return nullptr; // We had an invalid UTF-8 input.
4049
CHECK_EQ(expected_utf16_length, utf16_length);
4150
return parseJSONCharacters(reinterpret_cast<const uint16_t*>(buffer.out()),
4251
utf16_length);
@@ -62,9 +71,14 @@ String StringViewToUtf8(v8_inspector::StringView view) {
6271
size_t expected_utf8_length =
6372
simdutf::utf8_length_from_utf16(source, view.length());
6473
MaybeStackBuffer<char> buffer(expected_utf8_length);
74+
// convert_utf16_to_utf8 returns zero in case of error.
6575
size_t utf8_length =
6676
simdutf::convert_utf16_to_utf8(source, view.length(), buffer.out());
67-
CHECK_EQ(expected_utf8_length, utf8_length);
77+
// We have that utf8_length == expected_utf8_length if and only
78+
// if the input was a valid UTF-16 string. Otherwise, utf8_length
79+
// must be zero.
80+
CHECK(utf8_length == 0 || utf8_length == expected_utf8_length);
81+
// An invalid UTF-16 input will generate the empty string:
6882
return String(buffer.out(), utf8_length);
6983
}
7084

@@ -112,9 +126,14 @@ String fromUTF16(const uint16_t* data, size_t length) {
112126
size_t expected_utf8_length =
113127
simdutf::utf8_length_from_utf16(casted_data, length);
114128
MaybeStackBuffer<char> buffer(expected_utf8_length);
129+
// simdutf::convert_utf16_to_utf8 returns zero in case of error.
115130
size_t utf8_length =
116131
simdutf::convert_utf16_to_utf8(casted_data, length, buffer.out());
117-
CHECK_EQ(expected_utf8_length, utf8_length);
132+
// We have that utf8_length == expected_utf8_length if and only
133+
// if the input was a valid UTF-16 string. Otherwise, utf8_length
134+
// must be zero.
135+
CHECK(utf8_length == 0 || utf8_length == expected_utf8_length);
136+
// An invalid UTF-16 input will generate the empty string:
118137
return String(buffer.out(), utf8_length);
119138
}
120139

@@ -123,6 +142,9 @@ const uint8_t* CharactersUTF8(const std::string_view s) {
123142
}
124143

125144
size_t CharacterCount(const std::string_view s) {
145+
// The utf32_length_from_utf8 function calls count_utf8.
146+
// The count_utf8 function counts the number of code points
147+
// (characters) in the string, assuming that the string is valid Unicode.
126148
// TODO(@anonrig): Test to make sure CharacterCount returns correctly.
127149
return simdutf::utf32_length_from_utf8(s.data(), s.length());
128150
}

0 commit comments

Comments
 (0)