Skip to content

Commit cde6dcc

Browse files
authored
tools: refactor js2c.cc to use c++20
PR-URL: #54849 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Daniel Lemire <daniel@lemire.me>
1 parent 291d90a commit cde6dcc

File tree

1 file changed

+64
-52
lines changed

1 file changed

+64
-52
lines changed

tools/js2c.cc

+64-52
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
#include <algorithm>
2+
#include <array>
23
#include <cassert>
34
#include <cctype>
4-
#include <cinttypes>
55
#include <cstdarg>
66
#include <cstdio>
77
#include <functional>
8-
#include <iostream>
98
#include <map>
10-
#include <set>
119
#include <string>
1210
#include <string_view>
1311
#include <vector>
@@ -72,42 +70,24 @@ size_t GetFileSize(const std::string& filename, int* error) {
7270
return result;
7371
}
7472

75-
bool EndsWith(const std::string& str, std::string_view suffix) {
76-
size_t suffix_len = suffix.length();
77-
size_t str_len = str.length();
78-
if (str_len < suffix_len) {
79-
return false;
80-
}
81-
return str.compare(str_len - suffix_len, suffix_len, suffix) == 0;
82-
}
83-
84-
bool StartsWith(const std::string& str, std::string_view prefix) {
85-
size_t prefix_len = prefix.length();
86-
size_t str_len = str.length();
87-
if (str_len < prefix_len) {
88-
return false;
89-
}
90-
return str.compare(0, prefix_len, prefix) == 0;
91-
}
92-
93-
bool FilenameIsConfigGypi(const std::string& path) {
94-
return path == "config.gypi" || EndsWith(path, "/config.gypi");
73+
constexpr bool FilenameIsConfigGypi(const std::string_view path) {
74+
return path == "config.gypi" || path.ends_with("/config.gypi");
9575
}
9676

9777
typedef std::vector<std::string> FileList;
9878
typedef std::map<std::string, FileList> FileMap;
9979

10080
bool SearchFiles(const std::string& dir,
10181
FileMap* file_map,
102-
const std::string& extension) {
82+
std::string_view extension) {
10383
uv_fs_t scan_req;
10484
int result = uv_fs_scandir(nullptr, &scan_req, dir.c_str(), 0, nullptr);
10585
bool errored = false;
10686
if (result < 0) {
10787
PrintUvError("scandir", dir.c_str(), result);
10888
errored = true;
10989
} else {
110-
auto it = file_map->insert({extension, FileList()}).first;
90+
auto it = file_map->insert({std::string(extension), FileList()}).first;
11191
FileList& files = it->second;
11292
files.reserve(files.size() + result);
11393
uv_dirent_t dent;
@@ -124,7 +104,7 @@ bool SearchFiles(const std::string& dir,
124104
}
125105

126106
std::string path = dir + '/' + dent.name;
127-
if (EndsWith(path, extension)) {
107+
if (path.ends_with(extension)) {
128108
files.emplace_back(path);
129109
continue;
130110
}
@@ -153,12 +133,11 @@ constexpr std::string_view kJsSuffix = ".js";
153133
constexpr std::string_view kGypiSuffix = ".gypi";
154134
constexpr std::string_view depsPrefix = "deps/";
155135
constexpr std::string_view libPrefix = "lib/";
156-
std::set<std::string_view> kAllowedExtensions{
157-
kGypiSuffix, kJsSuffix, kMjsSuffix};
158136

159-
std::string_view HasAllowedExtensions(const std::string& filename) {
160-
for (const auto& ext : kAllowedExtensions) {
161-
if (EndsWith(filename, ext)) {
137+
constexpr std::string_view HasAllowedExtensions(
138+
const std::string_view filename) {
139+
for (const auto& ext : {kGypiSuffix, kJsSuffix, kMjsSuffix}) {
140+
if (filename.ends_with(ext)) {
162141
return ext;
163142
}
164143
}
@@ -350,17 +329,17 @@ std::string GetFileId(const std::string& filename) {
350329
size_t start = 0;
351330
std::string prefix;
352331
// Strip .mjs and .js suffix
353-
if (EndsWith(filename, kMjsSuffix)) {
332+
if (filename.ends_with(kMjsSuffix)) {
354333
end -= kMjsSuffix.size();
355-
} else if (EndsWith(filename, kJsSuffix)) {
334+
} else if (filename.ends_with(kJsSuffix)) {
356335
end -= kJsSuffix.size();
357336
}
358337

359338
// deps/acorn/acorn/dist/acorn.js -> internal/deps/acorn/acorn/dist/acorn
360-
if (StartsWith(filename, depsPrefix)) {
339+
if (filename.starts_with(depsPrefix)) {
361340
start = depsPrefix.size();
362341
prefix = "internal/deps/";
363-
} else if (StartsWith(filename, libPrefix)) {
342+
} else if (filename.starts_with(libPrefix)) {
364343
// lib/internal/url.js -> internal/url
365344
start = libPrefix.size();
366345
prefix = "";
@@ -381,18 +360,52 @@ std::string GetVariableName(const std::string& id) {
381360
return result;
382361
}
383362

384-
std::vector<std::string> GetCodeTable() {
385-
size_t size = 1 << 16;
386-
std::vector<std::string> code_table(size);
387-
for (size_t i = 0; i < size; ++i) {
388-
code_table[i] = std::to_string(i) + ',';
363+
// The function returns a string buffer and an array of
364+
// offsets. The string is just "0,1,2,3,...,65535,".
365+
// The second array contain the offsets indicating the
366+
// start of each substring ("0,", "1,", etc.) and the final
367+
// offset points just beyond the end of the string.
368+
// 382106 is the length of the string "0,1,2,3,...,65535,".
369+
// 65537 is 2**16 + 1
370+
// This function could be constexpr, but it might become too expensive to
371+
// compile.
372+
std::pair<std::array<char, 382106>, std::array<uint32_t, 65537>>
373+
precompute_string() {
374+
// the string "0,1,2,3,...,65535,".
375+
std::array<char, 382106> str;
376+
// the offsets in the string pointing at the beginning of each substring
377+
std::array<uint32_t, 65537> off;
378+
off[0] = 0;
379+
char* p = &str[0];
380+
constexpr auto const_int_to_str = [](uint16_t value, char* s) -> uint32_t {
381+
uint32_t index = 0;
382+
do {
383+
s[index++] = '0' + (value % 10);
384+
value /= 10;
385+
} while (value != 0);
386+
387+
for (uint32_t i = 0; i < index / 2; ++i) {
388+
char temp = s[i];
389+
s[i] = s[index - i - 1];
390+
s[index - i - 1] = temp;
391+
}
392+
s[index] = ',';
393+
return index + 1;
394+
};
395+
for (int i = 0; i < 65536; ++i) {
396+
size_t offset = const_int_to_str(i, p);
397+
p += offset;
398+
off[i + 1] = off[i] + offset;
389399
}
390-
return code_table;
400+
return {str, off};
391401
}
392402

393-
const std::string& GetCode(uint16_t index) {
394-
static std::vector<std::string> table = GetCodeTable();
395-
return table[index];
403+
const std::string_view GetCode(uint16_t index) {
404+
// We use about 644254 bytes of memory. An array of 65536 strings might use
405+
// 2097152 bytes so we save 3x the memory.
406+
static auto [backing_string, offsets] = precompute_string();
407+
return std::string_view(&backing_string[offsets[index]],
408+
offsets[index + 1] - offsets[index]);
396409
}
397410

398411
#ifdef NODE_JS2C_USE_STRING_LITERALS
@@ -532,8 +545,7 @@ Fragment GetDefinitionImpl(const std::vector<char>& code,
532545
// Avoid using snprintf on large chunks of data because it's much slower.
533546
// It's fine to use it on small amount of data though.
534547
if constexpr (is_two_byte) {
535-
std::vector<uint16_t> utf16_codepoints;
536-
utf16_codepoints.resize(count);
548+
std::vector<uint16_t> utf16_codepoints(count);
537549
size_t utf16_count = simdutf::convert_utf8_to_utf16(
538550
code.data(),
539551
code.size(),
@@ -542,8 +554,8 @@ Fragment GetDefinitionImpl(const std::vector<char>& code,
542554
utf16_codepoints.resize(utf16_count);
543555
Debug("static size %zu\n", utf16_count);
544556
for (size_t i = 0; i < utf16_count; ++i) {
545-
const std::string& str = GetCode(utf16_codepoints[i]);
546-
memcpy(result.data() + cur, str.c_str(), str.size());
557+
std::string_view str = GetCode(utf16_codepoints[i]);
558+
memcpy(result.data() + cur, str.data(), str.size());
547559
cur += str.size();
548560
}
549561
} else {
@@ -556,8 +568,8 @@ Fragment GetDefinitionImpl(const std::vector<char>& code,
556568
i,
557569
ch);
558570
}
559-
const std::string& str = GetCode(ch);
560-
memcpy(result.data() + cur, str.c_str(), str.size());
571+
std::string_view str = GetCode(ch);
572+
memcpy(result.data() + cur, str.data(), str.size());
561573
cur += str.size();
562574
}
563575
}
@@ -895,8 +907,8 @@ int Main(int argc, char* argv[]) {
895907
int error = 0;
896908
const std::string& file = args[i];
897909
if (IsDirectory(file, &error)) {
898-
if (!SearchFiles(file, &file_map, std::string(kJsSuffix)) ||
899-
!SearchFiles(file, &file_map, std::string(kMjsSuffix))) {
910+
if (!SearchFiles(file, &file_map, kJsSuffix) ||
911+
!SearchFiles(file, &file_map, kMjsSuffix)) {
900912
return 1;
901913
}
902914
} else if (error != 0) {

0 commit comments

Comments
 (0)