Skip to content

Commit 2dd0714

Browse files
author
Mert Can Altin
committed
util: add fast path for Latin1 decoding
1 parent e1852b5 commit 2dd0714

File tree

3 files changed

+46
-0
lines changed

3 files changed

+46
-0
lines changed

lib/internal/encoding.js

+5
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ const {
5555
encodeIntoResults,
5656
encodeUtf8String,
5757
decodeUTF8,
58+
decodeLatin1,
5859
} = binding;
5960

6061
const { Buffer } = require('buffer');
@@ -443,6 +444,10 @@ function makeTextDecoderICU() {
443444
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
444445
}
445446

447+
if (this[kEncoding] === 'windows-1252') {
448+
return decodeLatin1(input);
449+
}
450+
446451
this.#prepareConverter();
447452

448453
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

src/encoding_binding.cc

+40
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "encoding_binding.h"
22
#include "ada.h"
33
#include "env-inl.h"
4+
#include "node_buffer.h"
45
#include "node_errors.h"
56
#include "node_external_reference.h"
67
#include "simdutf.h"
@@ -226,6 +227,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
226227
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
227228
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
228229
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
230+
SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1);
229231
}
230232

231233
void BindingData::CreatePerContextProperties(Local<Object> target,
@@ -243,6 +245,44 @@ void BindingData::RegisterTimerExternalReferences(
243245
registry->Register(DecodeUTF8);
244246
registry->Register(ToASCII);
245247
registry->Register(ToUnicode);
248+
registry->Register(DecodeLatin1);
249+
}
250+
251+
void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
252+
Environment* env = Environment::GetCurrent(args);
253+
254+
CHECK_GE(args.Length(), 1);
255+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
256+
args[0]->IsArrayBufferView())) {
257+
return node::THROW_ERR_INVALID_ARG_TYPE(
258+
env->isolate(),
259+
"The \"input\" argument must be an instance of ArrayBuffer, "
260+
"SharedArrayBuffer, or ArrayBufferView.");
261+
}
262+
263+
ArrayBufferViewContents<uint8_t> buffer(args[0]);
264+
const uint8_t* data = buffer.data();
265+
size_t length = buffer.length();
266+
267+
if (length == 0) {
268+
return args.GetReturnValue().SetEmptyString();
269+
}
270+
271+
std::string result(length * 2, '\0');
272+
273+
size_t written = simdutf::convert_latin1_to_utf8(
274+
reinterpret_cast<const char*>(data), length, &result[0]);
275+
276+
if (written == 0) {
277+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
278+
env->isolate(), "The encoded data was not valid for encoding latin1");
279+
}
280+
281+
result.resize(written);
282+
283+
Local<Object> buffer_result =
284+
node::Buffer::Copy(env, result.c_str(), result.length()).ToLocalChecked();
285+
args.GetReturnValue().Set(buffer_result);
246286
}
247287

248288
} // namespace encoding_binding

src/encoding_binding.h

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
3131
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
3232
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
3333
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
34+
static void DecodeLatin1(const v8::FunctionCallbackInfo<v8::Value>& args);
3435

3536
static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
3637
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);

0 commit comments

Comments
 (0)