Skip to content

Commit 7c79ba7

Browse files
anonrigaddaleax
authored andcommitted
util: add fast path for utf8 encoding
Co-authored-by: Anna Henningsen <anna@addaleax.net> PR-URL: #45412 Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: Santiago Gimeno <santiago.gimeno@gmail.com>
1 parent a6fe707 commit 7c79ba7

File tree

3 files changed

+76
-6
lines changed

3 files changed

+76
-6
lines changed

lib/internal/encoding.js

+30-5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
// https://encoding.spec.whatwg.org
55

66
const {
7+
Boolean,
78
ObjectCreate,
89
ObjectDefineProperties,
910
ObjectGetOwnPropertyDescriptors,
@@ -28,6 +29,8 @@ const kFlags = Symbol('flags');
2829
const kEncoding = Symbol('encoding');
2930
const kDecoder = Symbol('decoder');
3031
const kEncoder = Symbol('encoder');
32+
const kUTF8FastPath = Symbol('kUTF8FastPath');
33+
const kIgnoreBOM = Symbol('kIgnoreBOM');
3134

3235
const {
3336
getConstructorOf,
@@ -49,7 +52,8 @@ const {
4952

5053
const {
5154
encodeInto,
52-
encodeUtf8String
55+
encodeUtf8String,
56+
decodeUTF8,
5357
} = internalBinding('buffer');
5458

5559
let Buffer;
@@ -397,19 +401,40 @@ function makeTextDecoderICU() {
397401
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
398402
}
399403

400-
const handle = getConverter(enc, flags);
401-
if (handle === undefined)
402-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
404+
// Only support fast path for UTF-8 without FATAL flag
405+
const fastPathAvailable = enc === 'utf-8' && !(options?.fatal);
403406

404407
this[kDecoder] = true;
405-
this[kHandle] = handle;
406408
this[kFlags] = flags;
407409
this[kEncoding] = enc;
410+
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
411+
this[kUTF8FastPath] = fastPathAvailable;
412+
this[kHandle] = undefined;
413+
414+
if (!fastPathAvailable) {
415+
this.#prepareConverter();
416+
}
408417
}
409418

419+
#prepareConverter() {
420+
if (this[kHandle] !== undefined) return;
421+
const handle = getConverter(this[kEncoding], this[kFlags]);
422+
if (handle === undefined)
423+
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
424+
this[kHandle] = handle;
425+
}
410426

411427
decode(input = empty, options = kEmptyObject) {
412428
validateDecoder(this);
429+
430+
this[kUTF8FastPath] &&= !(options?.stream);
431+
432+
if (this[kUTF8FastPath]) {
433+
return decodeUTF8(input, this[kIgnoreBOM]);
434+
}
435+
436+
this.#prepareConverter();
437+
413438
validateObject(options, 'options', {
414439
nullable: true,
415440
allowArray: true,

src/node_buffer.cc

+45
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "node_blob.h"
2525
#include "node_errors.h"
2626
#include "node_external_reference.h"
27+
#include "node_i18n.h"
2728
#include "node_internals.h"
2829

2930
#include "env-inl.h"
@@ -565,6 +566,48 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
565566
args.GetReturnValue().Set(ret);
566567
}
567568

569+
// Convert the input into an encoded string
570+
void DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
571+
Environment* env = Environment::GetCurrent(args); // list, flags
572+
573+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
574+
args[0]->IsArrayBufferView())) {
575+
return node::THROW_ERR_INVALID_ARG_TYPE(
576+
env->isolate(),
577+
"The \"list\" argument must be an instance of SharedArrayBuffer, "
578+
"ArrayBuffer or ArrayBufferView.");
579+
}
580+
581+
ArrayBufferViewContents<char> buffer(args[0]);
582+
583+
CHECK(args[1]->IsBoolean());
584+
bool ignore_bom = args[1]->IsTrue();
585+
586+
const char* data = buffer.data();
587+
size_t length = buffer.length();
588+
589+
if (!ignore_bom && length >= 3) {
590+
if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) {
591+
data += 3;
592+
length -= 3;
593+
}
594+
}
595+
596+
if (length == 0) return args.GetReturnValue().SetEmptyString();
597+
598+
Local<Value> error;
599+
MaybeLocal<Value> maybe_ret =
600+
StringBytes::Encode(env->isolate(), data, length, UTF8, &error);
601+
Local<Value> ret;
602+
603+
if (!maybe_ret.ToLocal(&ret)) {
604+
CHECK(!error.IsEmpty());
605+
env->isolate()->ThrowException(error);
606+
return;
607+
}
608+
609+
args.GetReturnValue().Set(ret);
610+
}
568611

569612
// bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd])
570613
void Copy(const FunctionCallbackInfo<Value> &args) {
@@ -1282,6 +1325,7 @@ void Initialize(Local<Object> target,
12821325

12831326
SetMethod(context, target, "setBufferPrototype", SetBufferPrototype);
12841327
SetMethodNoSideEffect(context, target, "createFromString", CreateFromString);
1328+
SetMethodNoSideEffect(context, target, "decodeUTF8", DecodeUTF8);
12851329

12861330
SetMethodNoSideEffect(context, target, "byteLengthUtf8", ByteLengthUtf8);
12871331
SetMethod(context, target, "copy", Copy);
@@ -1339,6 +1383,7 @@ void Initialize(Local<Object> target,
13391383
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
13401384
registry->Register(SetBufferPrototype);
13411385
registry->Register(CreateFromString);
1386+
registry->Register(DecodeUTF8);
13421387

13431388
registry->Register(ByteLengthUtf8);
13441389
registry->Register(Copy);

test/parallel/test-whatwg-encoding-custom-textdecoder.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ if (common.hasIntl) {
113113
' fatal: false,\n' +
114114
' ignoreBOM: true,\n' +
115115
' [Symbol(flags)]: 4,\n' +
116-
' [Symbol(handle)]: Converter {}\n' +
116+
' [Symbol(handle)]: undefined\n' +
117117
'}'
118118
);
119119
} else {

0 commit comments

Comments
 (0)