Skip to content

Commit 70bbc02

Browse files
jasnelltargos
authored andcommitted
src, deps: add nbytes library
Projects that seek to implement Node.js compatible APIs end up needed to reproduce various bits of functionality internally in order to faithfully replicate the Node.js behaviors. This is particularly true for things like byte manipulation, base64 and hex encoding, and other low-level operations. This change proposes moving much of this low-level byte manipulation code out of nodejs/src and into a new `nbytes` library. Initially this new library will exist in the `deps` directory but the intent is to spin out a new separate repository to be its home in the future. Doing so will allow other projects to use the nbytes library with exactly the same implementation as Node.js. This commit moves only the byte swapping and legacy base64 handling code. Additional commits will move additional byte manipulation logic into the library. PR-URL: #53507 Reviewed-By: Yagiz Nizipli <yagiz.nizipli@sentry.io> Reviewed-By: Robert Nagy <ronagy@icloud.com> Reviewed-By: Chengzhong Wu <legendecas@gmail.com>
1 parent e95af74 commit 70bbc02

27 files changed

+669
-593
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ with-code-cache test-code-cache:
174174

175175
out/Makefile: config.gypi common.gypi common_node.gypi node.gyp \
176176
deps/uv/uv.gyp deps/llhttp/llhttp.gyp deps/zlib/zlib.gyp \
177-
deps/simdutf/simdutf.gyp deps/ada/ada.gyp \
177+
deps/simdutf/simdutf.gyp deps/ada/ada.gyp deps/nbytes/nbytes.gyp \
178178
tools/v8_gypfiles/toolchain.gypi tools/v8_gypfiles/features.gypi \
179179
tools/v8_gypfiles/inspector.gypi tools/v8_gypfiles/v8.gyp
180180
$(PYTHON) tools/gyp_node.py -f make

deps/nbytes/README.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Node.js bytes (nbytes) library
2+
3+
The `nbytes` library extracts certain Node.js specific byte manipulation
4+
functions from the core of Node.js itself and makes them available for
5+
use in other projects that need to emulate Node.js' behavior.

deps/nbytes/nbytes.cpp

+251
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
#include "nbytes.h"
2+
#include <string.h>
3+
#include <cmath>
4+
#include <cstddef>
5+
6+
namespace nbytes {
7+
8+
// ============================================================================
9+
// Byte Swapping
10+
11+
namespace {
12+
// These are defined by <sys/byteorder.h> or <netinet/in.h> on some systems.
13+
// To avoid warnings, undefine them before redefining them.
14+
#ifdef BSWAP_2
15+
# undef BSWAP_2
16+
#endif
17+
#ifdef BSWAP_4
18+
# undef BSWAP_4
19+
#endif
20+
#ifdef BSWAP_8
21+
# undef BSWAP_8
22+
#endif
23+
24+
#if defined(_MSC_VER)
25+
#include <intrin.h>
26+
#define BSWAP_2(x) _byteswap_ushort(x)
27+
#define BSWAP_4(x) _byteswap_ulong(x)
28+
#define BSWAP_8(x) _byteswap_uint64(x)
29+
#else
30+
#define BSWAP_2(x) ((x) << 8) | ((x) >> 8)
31+
#define BSWAP_4(x) \
32+
(((x) & 0xFF) << 24) | \
33+
(((x) & 0xFF00) << 8) | \
34+
(((x) >> 8) & 0xFF00) | \
35+
(((x) >> 24) & 0xFF)
36+
#define BSWAP_8(x) \
37+
(((x) & 0xFF00000000000000ull) >> 56) | \
38+
(((x) & 0x00FF000000000000ull) >> 40) | \
39+
(((x) & 0x0000FF0000000000ull) >> 24) | \
40+
(((x) & 0x000000FF00000000ull) >> 8) | \
41+
(((x) & 0x00000000FF000000ull) << 8) | \
42+
(((x) & 0x0000000000FF0000ull) << 24) | \
43+
(((x) & 0x000000000000FF00ull) << 40) | \
44+
(((x) & 0x00000000000000FFull) << 56)
45+
#endif
46+
} // namespace
47+
48+
bool SwapBytes16(void* data, size_t nbytes) {
49+
if (nbytes % sizeof(uint16_t) != 0) return false;
50+
51+
#if defined(_MSC_VER)
52+
if (AlignUp(data, sizeof(uint16_t)) == data) {
53+
// MSVC has no strict aliasing, and is able to highly optimize this case.
54+
uint16_t* data16 = reinterpret_cast<uint16_t*>(data);
55+
size_t len16 = nbytes / sizeof(uint16_t);
56+
for (size_t i = 0; i < len16; i++) {
57+
data16[i] = BSWAP_2(data16[i]);
58+
}
59+
return true;
60+
}
61+
#endif
62+
63+
uint16_t temp;
64+
uint8_t* ptr = reinterpret_cast<uint8_t*>(data);
65+
for (size_t i = 0; i < nbytes; i += sizeof(uint16_t)) {
66+
memcpy(&temp, &ptr[i], sizeof(uint16_t));
67+
temp = BSWAP_2(temp);
68+
memcpy(&ptr[i], &temp, sizeof(uint16_t));
69+
}
70+
71+
return true;
72+
}
73+
74+
bool SwapBytes32(void* data, size_t nbytes) {
75+
if (nbytes % sizeof(uint32_t) != 0) return false;
76+
77+
#if defined(_MSC_VER)
78+
// MSVC has no strict aliasing, and is able to highly optimize this case.
79+
if (AlignUp(data, sizeof(uint32_t)) == data) {
80+
uint32_t* data32 = reinterpret_cast<uint32_t*>(data);
81+
size_t len32 = nbytes / sizeof(uint32_t);
82+
for (size_t i = 0; i < len32; i++) {
83+
data32[i] = BSWAP_4(data32[i]);
84+
}
85+
return true;
86+
}
87+
#endif
88+
89+
uint32_t temp = 0;
90+
uint8_t* ptr = reinterpret_cast<uint8_t*>(data);
91+
for (size_t i = 0; i < nbytes; i += sizeof(uint32_t)) {
92+
memcpy(&temp, &ptr[i], sizeof(uint32_t));
93+
temp = BSWAP_4(temp);
94+
memcpy(&ptr[i], &temp, sizeof(uint32_t));
95+
}
96+
97+
return true;
98+
}
99+
100+
bool SwapBytes64(void* data, size_t nbytes) {
101+
if (nbytes % sizeof(uint64_t) != 0) return false;
102+
103+
#if defined(_MSC_VER)
104+
if (AlignUp(data, sizeof(uint64_t)) == data) {
105+
// MSVC has no strict aliasing, and is able to highly optimize this case.
106+
uint64_t* data64 = reinterpret_cast<uint64_t*>(data);
107+
size_t len64 = nbytes / sizeof(uint64_t);
108+
for (size_t i = 0; i < len64; i++) {
109+
data64[i] = BSWAP_8(data64[i]);
110+
}
111+
return true;
112+
}
113+
#endif
114+
115+
uint64_t temp = 0;
116+
uint8_t* ptr = reinterpret_cast<uint8_t*>(data);
117+
for (size_t i = 0; i < nbytes; i += sizeof(uint64_t)) {
118+
memcpy(&temp, &ptr[i], sizeof(uint64_t));
119+
temp = BSWAP_8(temp);
120+
memcpy(&ptr[i], &temp, sizeof(uint64_t));
121+
}
122+
123+
return true;
124+
}
125+
126+
// ============================================================================
127+
// Base64 (legacy)
128+
129+
// supports regular and URL-safe base64
130+
const int8_t unbase64_table[256] =
131+
{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, -2, -1, -1,
132+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
133+
-2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63,
134+
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
135+
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
136+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
137+
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
138+
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
139+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
140+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
141+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
142+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
143+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
144+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
145+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
146+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
147+
};
148+
149+
// ============================================================================
150+
// Hex
151+
152+
const int8_t unhex_table[256] =
153+
{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
154+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
155+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
156+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
157+
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
158+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
159+
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
160+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
161+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
162+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
163+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
164+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
165+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
166+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
169+
};
170+
171+
size_t HexEncode(
172+
const char* src,
173+
size_t slen,
174+
char* dst,
175+
size_t dlen) {
176+
// We know how much we'll write, just make sure that there's space.
177+
NBYTES_ASSERT_TRUE(
178+
dlen >= MultiplyWithOverflowCheck<size_t>(slen, 2u) &&
179+
"not enough space provided for hex encode");
180+
181+
dlen = slen * 2;
182+
for (size_t i = 0, k = 0; k < dlen; i += 1, k += 2) {
183+
static const char hex[] = "0123456789abcdef";
184+
uint8_t val = static_cast<uint8_t>(src[i]);
185+
dst[k + 0] = hex[val >> 4];
186+
dst[k + 1] = hex[val & 15];
187+
}
188+
189+
return dlen;
190+
}
191+
192+
std::string HexEncode(const char* src, size_t slen) {
193+
size_t dlen = slen * 2;
194+
std::string dst(dlen, '\0');
195+
HexEncode(src, slen, dst.data(), dlen);
196+
return dst;
197+
}
198+
199+
// ============================================================================
200+
201+
void ForceAsciiSlow(const char* src, char* dst, size_t len) {
202+
for (size_t i = 0; i < len; ++i) {
203+
dst[i] = src[i] & 0x7f;
204+
}
205+
}
206+
207+
void ForceAscii(const char* src, char* dst, size_t len) {
208+
if (len < 16) {
209+
ForceAsciiSlow(src, dst, len);
210+
return;
211+
}
212+
213+
const unsigned bytes_per_word = sizeof(uintptr_t);
214+
const unsigned align_mask = bytes_per_word - 1;
215+
const unsigned src_unalign = reinterpret_cast<uintptr_t>(src) & align_mask;
216+
const unsigned dst_unalign = reinterpret_cast<uintptr_t>(dst) & align_mask;
217+
218+
if (src_unalign > 0) {
219+
if (src_unalign == dst_unalign) {
220+
const unsigned unalign = bytes_per_word - src_unalign;
221+
ForceAsciiSlow(src, dst, unalign);
222+
src += unalign;
223+
dst += unalign;
224+
len -= src_unalign;
225+
} else {
226+
ForceAsciiSlow(src, dst, len);
227+
return;
228+
}
229+
}
230+
231+
#if defined(_WIN64) || defined(_LP64)
232+
const uintptr_t mask = ~0x8080808080808080ll;
233+
#else
234+
const uintptr_t mask = ~0x80808080l;
235+
#endif
236+
237+
const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
238+
uintptr_t* dstw = reinterpret_cast<uintptr_t*>(dst);
239+
240+
for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
241+
dstw[i] = srcw[i] & mask;
242+
}
243+
244+
const unsigned remainder = len & align_mask;
245+
if (remainder > 0) {
246+
const size_t offset = len - remainder;
247+
ForceAsciiSlow(src + offset, dst + offset, remainder);
248+
}
249+
}
250+
251+
} // namespace nbytes

deps/nbytes/nbytes.gyp

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
'variables': {
3+
'nbytes_sources': [ 'nbytes.cpp' ],
4+
},
5+
'targets': [
6+
{
7+
'target_name': 'nbytes',
8+
'type': 'static_library',
9+
'include_dirs': ['.'],
10+
'direct_dependent_settings': {
11+
'include_dirs': ['.'],
12+
},
13+
'sources': [ '<@(nbytes_sources)' ]
14+
},
15+
]
16+
}

0 commit comments

Comments
 (0)