Skip to content

Commit 06f6ac1

Browse files
committed
string_decoder: fix number of replacement chars
Fixes: #22626 PR-URL: #22709 Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
1 parent ab6ddc0 commit 06f6ac1

File tree

2 files changed

+18
-6
lines changed

2 files changed

+18
-6
lines changed

src/string_decoder.cc

+7-6
Original file line numberDiff line numberDiff line change
@@ -71,16 +71,17 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
7171
kIncompleteCharactersEnd);
7272
if (Encoding() == UTF8) {
7373
// For UTF-8, we need special treatment to align with the V8 decoder:
74-
// If an incomplete character is found at a chunk boundary, we turn
75-
// that character into a single invalid one.
74+
// If an incomplete character is found at a chunk boundary, we use
75+
// its remainder and pass it to V8 as-is.
7676
for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
7777
if ((data[i] & 0xC0) != 0x80) {
7878
// This byte is not a continuation byte even though it should have
79-
// been one.
80-
// Act as if there was a 1-byte incomplete character, which does
81-
// not make sense but works here because we know it's invalid.
79+
// been one. We stop decoding of the incomplete character at this
80+
// point (but still use the rest of the incomplete bytes from this
81+
// chunk) and assume that the new, unexpected byte starts a new one.
8282
state_[kMissingBytes] = 0;
83-
state_[kBufferedBytes] = 1;
83+
memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
84+
state_[kBufferedBytes] += i;
8485
data += i;
8586
nread -= i;
8687
break;

test/parallel/test-string-decoder.js

+11
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,17 @@ assert.strictEqual(decoder.write(Buffer.alloc(20)), '\0'.repeat(10));
162162
assert.strictEqual(decoder.write(Buffer.alloc(48)), '\0'.repeat(24));
163163
assert.strictEqual(decoder.end(), '');
164164

165+
// Regression tests for https://github.com/nodejs/node/issues/22626
166+
// (not enough replacement chars when having seen more than one byte of an
167+
// incomplete multibyte characters).
168+
decoder = new StringDecoder('utf8');
169+
assert.strictEqual(decoder.write(Buffer.from('f69b', 'hex')), '');
170+
assert.strictEqual(decoder.write(Buffer.from('d1', 'hex')), '\ufffd\ufffd');
171+
assert.strictEqual(decoder.end(), '\ufffd');
172+
assert.strictEqual(decoder.write(Buffer.from('f4', 'hex')), '');
173+
assert.strictEqual(decoder.write(Buffer.from('bde5', 'hex')), '\ufffd\ufffd');
174+
assert.strictEqual(decoder.end(), '\ufffd');
175+
165176
common.expectsError(
166177
() => new StringDecoder(1),
167178
{

0 commit comments

Comments
 (0)