Skip to content

Commit 6f504b7

Browse files
anonriglemire
authored andcommitted
buffer: use simdutf for atob implementation
Co-authored-by: Daniel Lemire <daniel@lemire.me> PR-URL: #52381 Refs: #51670 Reviewed-By: Daniel Lemire <daniel@lemire.me> Reviewed-By: Vinícius Lourenço Claro Cardoso <contact@viniciusl.com.br> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Robert Nagy <ronagy@icloud.com> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com> Reviewed-By: Filip Skokan <panva.ip@gmail.com>
1 parent cf62936 commit 6f504b7

File tree

3 files changed

+93
-73
lines changed

3 files changed

+93
-73
lines changed

benchmark/buffers/buffer-atob.js

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
'use strict';
2+
const common = require('../common.js');
3+
const assert = require('node:assert');
4+
5+
const bench = common.createBenchmark(main, {
6+
size: [16, 32, 64, 128],
7+
n: [1e6],
8+
});
9+
10+
function main({ n, size }) {
11+
const input = btoa('A'.repeat(size));
12+
let out = 0;
13+
14+
bench.start();
15+
for (let i = 0; i < n; i++) {
16+
out += atob(input).length;
17+
}
18+
bench.end(n);
19+
assert(out > 0);
20+
}

lib/buffer.js

+13-73
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,8 @@
2323

2424
const {
2525
Array,
26-
ArrayFrom,
2726
ArrayIsArray,
2827
ArrayPrototypeForEach,
29-
ArrayPrototypeIndexOf,
3028
MathFloor,
3129
MathMin,
3230
MathTrunc,
@@ -70,6 +68,7 @@ const {
7068
swap64: _swap64,
7169
kMaxLength,
7270
kStringMaxLength,
71+
atob: _atob,
7372
} = internalBinding('buffer');
7473
const {
7574
constants: {
@@ -1259,85 +1258,26 @@ function btoa(input) {
12591258
return buf.toString('base64');
12601259
}
12611260

1262-
// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode
1263-
const kForgivingBase64AllowedChars = [
1264-
// ASCII whitespace
1265-
// Refs: https://infra.spec.whatwg.org/#ascii-whitespace
1266-
0x09, 0x0A, 0x0C, 0x0D, 0x20,
1267-
1268-
// Uppercase letters
1269-
...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('A') + i),
1270-
1271-
// Lowercase letters
1272-
...ArrayFrom({ length: 26 }, (_, i) => StringPrototypeCharCodeAt('a') + i),
1273-
1274-
// Decimal digits
1275-
...ArrayFrom({ length: 10 }, (_, i) => StringPrototypeCharCodeAt('0') + i),
1276-
1277-
0x2B, // +
1278-
0x2F, // /
1279-
0x3D, // =
1280-
];
1281-
const kEqualSignIndex = ArrayPrototypeIndexOf(kForgivingBase64AllowedChars,
1282-
0x3D);
1283-
12841261
function atob(input) {
1285-
// The implementation here has not been performance optimized in any way and
1286-
// should not be.
1287-
// Refs: https://github.com/nodejs/node/pull/38433#issuecomment-828426932
12881262
if (arguments.length === 0) {
12891263
throw new ERR_MISSING_ARGS('input');
12901264
}
12911265

1292-
input = `${input}`;
1293-
let nonAsciiWhitespaceCharCount = 0;
1294-
let equalCharCount = 0;
1266+
const result = _atob(`${input}`);
12951267

1296-
for (let n = 0; n < input.length; n++) {
1297-
const index = ArrayPrototypeIndexOf(
1298-
kForgivingBase64AllowedChars,
1299-
StringPrototypeCharCodeAt(input, n));
1300-
1301-
if (index > 4) {
1302-
// The first 5 elements of `kForgivingBase64AllowedChars` are
1303-
// ASCII whitespace char codes.
1304-
nonAsciiWhitespaceCharCount++;
1305-
1306-
if (index === kEqualSignIndex) {
1307-
equalCharCount++;
1308-
} else if (equalCharCount) {
1309-
// The `=` char is only allowed at the end.
1310-
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1311-
}
1312-
1313-
if (equalCharCount > 2) {
1314-
// Only one more `=` is permitted after the first equal sign.
1315-
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1316-
}
1317-
} else if (index === -1) {
1268+
switch (result) {
1269+
case -2: // Invalid character
13181270
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1319-
}
1320-
}
1321-
1322-
let reminder = nonAsciiWhitespaceCharCount % 4;
1323-
1324-
// See #2, #3, #4 - https://infra.spec.whatwg.org/#forgiving-base64
1325-
if (!reminder) {
1326-
// Remove all trailing `=` characters and get the new reminder.
1327-
reminder = (nonAsciiWhitespaceCharCount - equalCharCount) % 4;
1328-
} else if (equalCharCount) {
1329-
// `=` should not in the input if there's a reminder.
1330-
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1331-
}
1332-
1333-
// See #3 - https://infra.spec.whatwg.org/#forgiving-base64
1334-
if (reminder === 1) {
1335-
throw lazyDOMException(
1336-
'The string to be decoded is not correctly encoded.',
1337-
'InvalidCharacterError');
1271+
case -1: // Single character remained
1272+
throw lazyDOMException(
1273+
'The string to be decoded is not correctly encoded.',
1274+
'InvalidCharacterError');
1275+
case -3: // Possible overflow
1276+
// TODO(@anonrig): Throw correct error in here.
1277+
throw lazyDOMException('The input causes overflow.', 'InvalidCharacterError');
1278+
default:
1279+
return result;
13381280
}
1339-
1340-
return Buffer.from(input, 'base64').toString('latin1');
13411281
}
13421282

13431283
function isUtf8(input) {

src/node_buffer.cc

+60
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ using v8::Just;
6767
using v8::Local;
6868
using v8::Maybe;
6969
using v8::MaybeLocal;
70+
using v8::NewStringType;
7071
using v8::Nothing;
7172
using v8::Number;
7273
using v8::Object;
@@ -1210,6 +1211,61 @@ void DetachArrayBuffer(const FunctionCallbackInfo<Value>& args) {
12101211
}
12111212
}
12121213

1214+
// In case of success, the decoded string is returned.
1215+
// In case of error, a negative value is returned:
1216+
// * -1 indicates a single character remained,
1217+
// * -2 indicates an invalid character,
1218+
// * -3 indicates a possible overflow (i.e., more than 2 GB output).
1219+
static void Atob(const FunctionCallbackInfo<Value>& args) {
1220+
CHECK_EQ(args.Length(), 1);
1221+
Environment* env = Environment::GetCurrent(args);
1222+
THROW_AND_RETURN_IF_NOT_STRING(env, args[0], "argument");
1223+
1224+
Local<String> input = args[0].As<String>();
1225+
MaybeStackBuffer<char> buffer;
1226+
simdutf::result result;
1227+
1228+
if (input->IsExternalOneByte()) { // 8-bit case
1229+
auto ext = input->GetExternalOneByteStringResource();
1230+
size_t expected_length =
1231+
simdutf::maximal_binary_length_from_base64(ext->data(), ext->length());
1232+
buffer.AllocateSufficientStorage(expected_length + 1);
1233+
buffer.SetLengthAndZeroTerminate(expected_length);
1234+
result = simdutf::base64_to_binary(
1235+
ext->data(), ext->length(), buffer.out(), simdutf::base64_default);
1236+
} else { // 16-bit case
1237+
String::Value value(env->isolate(), input);
1238+
auto data = reinterpret_cast<const char16_t*>(*value);
1239+
size_t expected_length =
1240+
simdutf::maximal_binary_length_from_base64(data, value.length());
1241+
buffer.AllocateSufficientStorage(expected_length + 1);
1242+
buffer.SetLengthAndZeroTerminate(expected_length);
1243+
result = simdutf::base64_to_binary(
1244+
data, value.length(), buffer.out(), simdutf::base64_default);
1245+
}
1246+
1247+
if (result.error == simdutf::error_code::SUCCESS) {
1248+
auto value =
1249+
String::NewFromOneByte(env->isolate(),
1250+
reinterpret_cast<const uint8_t*>(buffer.out()),
1251+
NewStringType::kNormal,
1252+
result.count)
1253+
.ToLocalChecked();
1254+
return args.GetReturnValue().Set(value);
1255+
}
1256+
1257+
// Default value is: "possible overflow"
1258+
int32_t error_code = -3;
1259+
1260+
if (result.error == simdutf::error_code::INVALID_BASE64_CHARACTER) {
1261+
error_code = -2;
1262+
} else if (result.error == simdutf::error_code::BASE64_INPUT_REMAINDER) {
1263+
error_code = -1;
1264+
}
1265+
1266+
args.GetReturnValue().Set(error_code);
1267+
}
1268+
12131269
namespace {
12141270

12151271
std::pair<void*, size_t> DecomposeBufferToParts(Local<Value> buffer) {
@@ -1272,6 +1328,8 @@ void Initialize(Local<Object> target,
12721328
Environment* env = Environment::GetCurrent(context);
12731329
Isolate* isolate = env->isolate();
12741330

1331+
SetMethodNoSideEffect(context, target, "atob", Atob);
1332+
12751333
SetMethod(context, target, "setBufferPrototype", SetBufferPrototype);
12761334
SetMethodNoSideEffect(context, target, "createFromString", CreateFromString);
12771335

@@ -1373,6 +1431,8 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
13731431

13741432
registry->Register(DetachArrayBuffer);
13751433
registry->Register(CopyArrayBuffer);
1434+
1435+
registry->Register(Atob);
13761436
}
13771437

13781438
} // namespace Buffer

0 commit comments

Comments
 (0)