Skip to content

Commit b871573

Browse files
author
Mert Can Altin
committed
util: add fast path for Latin1 decoding
1 parent bbdfeeb commit b871573

File tree

4 files changed

+119
-0
lines changed

4 files changed

+119
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#include "encoding_binding.h"
2+
#include "node_test_fixture.h"
3+
#include "env-inl.h"
4+
#include "v8.h"
5+
#include "gtest/gtest.h"
6+
7+
namespace node {
8+
namespace encoding_binding {
9+
10+
bool RunDecodeLatin1(Environment* env, Local<Value> args[], Local<Value>* result) {
11+
Isolate* isolate = env->isolate();
12+
TryCatch try_catch(isolate);
13+
14+
BindingData::DecodeLatin1(FunctionCallbackInfo<Value>(args));
15+
16+
if (try_catch.HasCaught()) {
17+
return false;
18+
}
19+
20+
*result = try_catch.Exception();
21+
return true;
22+
}
23+
24+
class EncodingBindingTest : public NodeTestFixture {};
25+
26+
TEST_F(EncodingBindingTest, DecodeLatin1_ValidInput) {
27+
Environment* env = CreateEnvironment();
28+
Isolate* isolate = env->isolate();
29+
HandleScope handle_scope(isolate);
30+
31+
const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3};
32+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
33+
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
34+
35+
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
36+
Local<Value> args[] = { array };
37+
38+
Local<Value> result;
39+
EXPECT_TRUE(RunDecodeLatin1(env, args, &result));
40+
41+
String::Utf8Value utf8_result(isolate, result);
42+
EXPECT_STREQ(*utf8_result, "Áéó");
43+
}
44+
45+
TEST_F(EncodingBindingTest, DecodeLatin1_EmptyInput) {
46+
Environment* env = CreateEnvironment();
47+
Isolate* isolate = env->isolate();
48+
HandleScope handle_scope(isolate);
49+
50+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, 0);
51+
Local<Uint8Array> array = Uint8Array::New(ab, 0, 0);
52+
Local<Value> args[] = { array };
53+
54+
Local<Value> result;
55+
EXPECT_TRUE(RunDecodeLatin1(env, args, &result));
56+
57+
String::Utf8Value utf8_result(isolate, result);
58+
EXPECT_STREQ(*utf8_result, "");
59+
}
60+
61+
TEST_F(EncodingBindingTest, DecodeLatin1_InvalidInput) {
62+
Environment* env = CreateEnvironment();
63+
Isolate* isolate = env->isolate();
64+
HandleScope handle_scope(isolate);
65+
66+
Local<Value> args[] = { String::NewFromUtf8Literal(isolate, "Invalid input") };
67+
68+
Local<Value> result;
69+
EXPECT_FALSE(RunDecodeLatin1(env, args, &result));
70+
}
71+
72+
} // namespace encoding_binding
73+
} // namespace node

lib/internal/encoding.js

+5
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ const {
5555
encodeIntoResults,
5656
encodeUtf8String,
5757
decodeUTF8,
58+
decodeLatin1,
5859
} = binding;
5960

6061
const { Buffer } = require('buffer');
@@ -443,6 +444,10 @@ function makeTextDecoderICU() {
443444
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
444445
}
445446

447+
if (this[kEncoding] === 'windows-1252') {
448+
return decodeLatin1(input);
449+
}
450+
446451
this.#prepareConverter();
447452

448453
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

src/encoding_binding.cc

+40
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "encoding_binding.h"
22
#include "ada.h"
33
#include "env-inl.h"
4+
#include "node_buffer.h"
45
#include "node_errors.h"
56
#include "node_external_reference.h"
67
#include "simdutf.h"
@@ -226,6 +227,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
226227
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
227228
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
228229
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
230+
SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1);
229231
}
230232

231233
void BindingData::CreatePerContextProperties(Local<Object> target,
@@ -243,6 +245,44 @@ void BindingData::RegisterTimerExternalReferences(
243245
registry->Register(DecodeUTF8);
244246
registry->Register(ToASCII);
245247
registry->Register(ToUnicode);
248+
registry->Register(DecodeLatin1);
249+
}
250+
251+
void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
252+
Environment* env = Environment::GetCurrent(args);
253+
254+
CHECK_GE(args.Length(), 1);
255+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
256+
args[0]->IsArrayBufferView())) {
257+
return node::THROW_ERR_INVALID_ARG_TYPE(
258+
env->isolate(),
259+
"The \"input\" argument must be an instance of ArrayBuffer, "
260+
"SharedArrayBuffer, or ArrayBufferView.");
261+
}
262+
263+
ArrayBufferViewContents<uint8_t> buffer(args[0]);
264+
const uint8_t* data = buffer.data();
265+
size_t length = buffer.length();
266+
267+
if (length == 0) {
268+
return args.GetReturnValue().SetEmptyString();
269+
}
270+
271+
std::string result(length * 2, '\0');
272+
273+
size_t written = simdutf::convert_latin1_to_utf8(
274+
reinterpret_cast<const char*>(data), length, &result[0]);
275+
276+
if (written == 0) {
277+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
278+
env->isolate(), "The encoded data was not valid for encoding latin1");
279+
}
280+
281+
result.resize(written);
282+
283+
Local<Object> buffer_result =
284+
node::Buffer::Copy(env, result.c_str(), result.length()).ToLocalChecked();
285+
args.GetReturnValue().Set(buffer_result);
246286
}
247287

248288
} // namespace encoding_binding

src/encoding_binding.h

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
3131
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
3232
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
3333
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
34+
static void DecodeLatin1(const v8::FunctionCallbackInfo<v8::Value>& args);
3435

3536
static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
3637
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);

0 commit comments

Comments
 (0)