Skip to content

Commit ae842a4

Browse files
anonrigRafaelGSS
authored andcommitted
util: add fast path for text-decoder fatal flag
PR-URL: #45803 Reviewed-By: Robert Nagy <ronagy@icloud.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Michael Dawson <midawson@redhat.com>
1 parent 344c5ec commit ae842a4

File tree

3 files changed

+25
-9
lines changed

3 files changed

+25
-9
lines changed

benchmark/util/text-decoder.js

+8-3
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@ const common = require('../common.js');
55
const bench = common.createBenchmark(main, {
66
encoding: ['utf-8', 'latin1', 'iso-8859-3'],
77
ignoreBOM: [0, 1],
8+
fatal: [0, 1],
89
len: [256, 1024 * 16, 1024 * 512],
910
n: [1e2],
1011
type: ['SharedArrayBuffer', 'ArrayBuffer', 'Buffer']
1112
});
1213

13-
function main({ encoding, len, n, ignoreBOM, type }) {
14-
const decoder = new TextDecoder(encoding, { ignoreBOM });
14+
function main({ encoding, len, n, ignoreBOM, type, fatal }) {
15+
const decoder = new TextDecoder(encoding, { ignoreBOM, fatal });
1516
let buf;
1617

1718
switch (type) {
@@ -31,7 +32,11 @@ function main({ encoding, len, n, ignoreBOM, type }) {
3132

3233
bench.start();
3334
for (let i = 0; i < n; i++) {
34-
decoder.decode(buf);
35+
try {
36+
decoder.decode(buf);
37+
} catch {
38+
// eslint-disable no-empty
39+
}
3540
}
3641
bench.end(n);
3742
}

lib/internal/encoding.js

+6-6
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const kFlags = Symbol('flags');
2929
const kEncoding = Symbol('encoding');
3030
const kDecoder = Symbol('decoder');
3131
const kEncoder = Symbol('encoder');
32+
const kFatal = Symbol('kFatal');
3233
const kUTF8FastPath = Symbol('kUTF8FastPath');
3334
const kIgnoreBOM = Symbol('kIgnoreBOM');
3435

@@ -396,17 +397,16 @@ function makeTextDecoderICU() {
396397
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
397398
}
398399

399-
// Only support fast path for UTF-8 without FATAL flag
400-
const fastPathAvailable = enc === 'utf-8' && !(options?.fatal);
401-
402400
this[kDecoder] = true;
403401
this[kFlags] = flags;
404402
this[kEncoding] = enc;
405403
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
406-
this[kUTF8FastPath] = fastPathAvailable;
404+
this[kFatal] = Boolean(options?.fatal);
405+
// Only support fast path for UTF-8.
406+
this[kUTF8FastPath] = enc === 'utf-8';
407407
this[kHandle] = undefined;
408408

409-
if (!fastPathAvailable) {
409+
if (!this[kUTF8FastPath]) {
410410
this.#prepareConverter();
411411
}
412412
}
@@ -425,7 +425,7 @@ function makeTextDecoderICU() {
425425
this[kUTF8FastPath] &&= !(options?.stream);
426426

427427
if (this[kUTF8FastPath]) {
428-
return decodeUTF8(input, this[kIgnoreBOM]);
428+
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
429429
}
430430

431431
this.#prepareConverter();

src/node_buffer.cc

+11
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "node_internals.h"
2929

3030
#include "env-inl.h"
31+
#include "simdutf.h"
3132
#include "string_bytes.h"
3233
#include "string_search.h"
3334
#include "util-inl.h"
@@ -583,10 +584,20 @@ void DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
583584
ArrayBufferViewContents<char> buffer(args[0]);
584585

585586
bool ignore_bom = args[1]->IsTrue();
587+
bool has_fatal = args[2]->IsTrue();
586588

587589
const char* data = buffer.data();
588590
size_t length = buffer.length();
589591

592+
if (has_fatal) {
593+
auto result = simdutf::validate_utf8_with_errors(data, length);
594+
595+
if (result.error) {
596+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
597+
env->isolate(), "The encoded data was not valid for encoding utf-8");
598+
}
599+
}
600+
590601
if (!ignore_bom && length >= 3) {
591602
if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) {
592603
data += 3;

0 commit comments

Comments
 (0)