Skip to content

Commit 55a5442

Browse files
committed
src: register external references for source code
Currently we use external strings for internalized builtin source code. However when a snapshot is taken, any external string whose resource is not registered is flattened into a SeqString (see ref). The result is that any module source code stored in the snapshot does not use external strings after deserialization. This patch registers an external string resource for each internalized builtin's source. The savings are substantial: ~1.9 MB of heap memory per isolate, or ~44% of an otherwise empty isolate's heap usage: ```bash $ node --expose-gc -p 'gc(),process.memoryUsage().heapUsed' 4190968 $ ./node --expose-gc -p 'gc(),process.memoryUsage().heapUsed' 2327536 ``` The savings can be even higher for user snapshots which may include more internal modules. Doing this with the existing UnionBytes abstraction was tricky, because UnionBytes only creates an external string resource when ToStringChecked is called. However we need to collate a list of external resources before isolate construction. UnionBytes can also be deallocated, which isn't ideal since registering an external string resource which is later deallocated would be very bad. Rather than further complicate UnionBytes, we introduce a new class called EternalBytes which assumes that the data has static lifetime and creates a single external string resource on construction. It reuses this original external string resource across V8 isolates by simply ignoring Dispose calls from V8. In order to distinguish between EternalBytes and UnionBytes, we bifurcate the sources map into two maps: the internalized builtins map (which is never modified) and the sources map (which can be changed through externalized builtins or by the embedder). Refs: https://github.com/v8/v8/blob/d2c8fbe9ccd1a6ce5591bb7dd319c3c00d6bf489/src/snapshot/serializer.cc#L633
1 parent 2660a32 commit 55a5442

7 files changed

+193
-36
lines changed

node.gyp

+1
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,7 @@
654654
'src/node_sockaddr-inl.h',
655655
'src/node_stat_watcher.h',
656656
'src/node_union_bytes.h',
657+
'src/node_eternal_bytes.h',
657658
'src/node_url.h',
658659
'src/node_util.h',
659660
'src/node_version.h',

src/node_builtins.cc

+39-10
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ using v8::Undefined;
3434
using v8::Value;
3535

3636
BuiltinLoader::BuiltinLoader()
37-
: config_(GetConfig()), code_cache_(std::make_shared<BuiltinCodeCache>()) {
38-
LoadJavaScriptSource();
37+
: code_cache_(std::make_shared<BuiltinCodeCache>()) {
3938
#ifdef NODE_SHARED_BUILTIN_CJS_MODULE_LEXER_LEXER_PATH
4039
AddExternalizedBuiltin(
4140
"internal/deps/cjs-module-lexer/lexer",
@@ -55,18 +54,33 @@ BuiltinLoader::BuiltinLoader()
5554
}
5655

5756
bool BuiltinLoader::Exists(const char* id) {
57+
auto internalized_builtins = GetInternalizedBuiltinSourceMap();
58+
if (internalized_builtins->find(id) != internalized_builtins->end()) {
59+
return true;
60+
}
5861
auto source = source_.read();
5962
return source->find(id) != source->end();
6063
}
6164

6265
bool BuiltinLoader::Add(const char* id, const UnionBytes& source) {
66+
auto internalized_builtins = GetInternalizedBuiltinSourceMap();
67+
if (internalized_builtins->find(id) != internalized_builtins->end()) {
68+
// Cannot add this builtin as it would conflict with an
69+
// existing internalized builtin.
70+
return false;
71+
}
6372
auto result = source_.write()->emplace(id, source);
6473
return result.second;
6574
}
6675

6776
Local<Object> BuiltinLoader::GetSourceObject(Local<Context> context) {
77+
auto internalized_builtins = GetInternalizedBuiltinSourceMap();
6878
Isolate* isolate = context->GetIsolate();
6979
Local<Object> out = Object::New(isolate);
80+
for (auto const& x : *internalized_builtins) {
81+
Local<String> key = OneByteString(isolate, x.first.c_str(), x.first.size());
82+
out->Set(context, key, x.second.ToStringChecked(isolate)).FromJust();
83+
}
7084
auto source = source_.read();
7185
for (auto const& x : *source) {
7286
Local<String> key = OneByteString(isolate, x.first.c_str(), x.first.size());
@@ -76,13 +90,17 @@ Local<Object> BuiltinLoader::GetSourceObject(Local<Context> context) {
7690
}
7791

7892
Local<String> BuiltinLoader::GetConfigString(Isolate* isolate) {
79-
return config_.ToStringChecked(isolate);
93+
return GetConfig()->ToStringChecked(isolate);
8094
}
8195

8296
std::vector<std::string> BuiltinLoader::GetBuiltinIds() const {
8397
std::vector<std::string> ids;
98+
auto internalized_builtins = GetInternalizedBuiltinSourceMap();
8499
auto source = source_.read();
85-
ids.reserve(source->size());
100+
ids.reserve(internalized_builtins->size() + source->size());
101+
for (auto const& x : *internalized_builtins) {
102+
ids.emplace_back(x.first);
103+
}
86104
for (auto const& x : *source) {
87105
ids.emplace_back(x.first);
88106
}
@@ -184,14 +202,20 @@ static std::string OnDiskFileName(const char* id) {
184202

185203
MaybeLocal<String> BuiltinLoader::LoadBuiltinSource(Isolate* isolate,
186204
const char* id) const {
187-
auto source = source_.read();
188205
#ifndef NODE_BUILTIN_MODULES_PATH
189-
const auto source_it = source->find(id);
190-
if (UNLIKELY(source_it == source->end())) {
191-
fprintf(stderr, "Cannot find native builtin: \"%s\".\n", id);
192-
ABORT();
206+
auto internalized_builtins = GetInternalizedBuiltinSourceMap();
207+
const auto internalized_builtins_it = internalized_builtins->find(id);
208+
if (LIKELY(internalized_builtins_it != internalized_builtins->end())) {
209+
return internalized_builtins_it->second.ToStringChecked(isolate);
210+
}
211+
212+
auto source = source_.read();
213+
auto source_it = source->find(id);
214+
if (LIKELY(source_it != source->end())) {
215+
return source_it->second.ToStringChecked(isolate);
193216
}
194-
return source_it->second.ToStringChecked(isolate);
217+
fprintf(stderr, "Cannot find native builtin: \"%s\".\n", id);
218+
ABORT();
195219
#else // !NODE_BUILTIN_MODULES_PATH
196220
std::string filename = OnDiskFileName(id);
197221

@@ -710,6 +734,11 @@ void BuiltinLoader::RegisterExternalReferences(
710734
registry->Register(GetCacheUsage);
711735
registry->Register(CompileFunction);
712736
registry->Register(HasCachedBuiltins);
737+
auto internalized_builtins = GetInternalizedBuiltinSourceMap();
738+
for (auto const& x : *internalized_builtins) {
739+
auto resource = x.second.AsResource();
740+
registry->Register(resource);
741+
}
713742
}
714743

715744
} // namespace builtins

src/node_builtins.h

+9-7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <set>
1111
#include <string>
1212
#include <vector>
13+
#include "node_eternal_bytes.h"
1314
#include "node_mutex.h"
1415
#include "node_threadsafe_cow.h"
1516
#include "node_union_bytes.h"
@@ -25,11 +26,15 @@ class Realm;
2526

2627
namespace builtins {
2728

28-
using BuiltinSourceMap = std::map<std::string, UnionBytes>;
29+
using InternalizedBuiltinSourceMap = std::map<std::string, EternalBytes>;
30+
using NonInternalizedBuiltinSourceMap = std::map<std::string, UnionBytes>;
2931
using BuiltinCodeCacheMap =
3032
std::unordered_map<std::string,
3133
std::unique_ptr<v8::ScriptCompiler::CachedData>>;
3234

35+
const InternalizedBuiltinSourceMap* GetInternalizedBuiltinSourceMap();
36+
const EternalBytes* GetConfig();
37+
3338
struct CodeCacheInfo {
3439
std::string id;
3540
std::vector<uint8_t> data;
@@ -84,10 +89,6 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
8489
// Only allow access from friends.
8590
friend class CodeCacheBuilder;
8691

87-
// Generated by tools/js2c.py as node_javascript.cc
88-
void LoadJavaScriptSource(); // Loads data into source_
89-
UnionBytes GetConfig(); // Return data for config.gypi
90-
9192
std::vector<std::string> GetBuiltinIds() const;
9293

9394
struct BuiltinCategories {
@@ -133,9 +134,10 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
133134

134135
void AddExternalizedBuiltin(const char* id, const char* filename);
135136

136-
ThreadsafeCopyOnWrite<BuiltinSourceMap> source_;
137+
ThreadsafeCopyOnWrite<NonInternalizedBuiltinSourceMap> source_;
137138

138-
const UnionBytes config_;
139+
static void RegisterSourcesAsExternalReferences(
140+
ExternalReferenceRegistry* registry);
139141

140142
struct BuiltinCodeCache {
141143
RwLock mutex;

src/node_eternal_bytes.h

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#ifndef SRC_NODE_ETERNAL_BYTES_H_
2+
#define SRC_NODE_ETERNAL_BYTES_H_
3+
4+
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
5+
6+
// A pointer to const uint8_t* or const uint16_t* data that can be turned into
7+
// external v8::String when given an isolate. Unlike UnionBytes, this assumes
8+
// that the underlying buffer is eternal (will not be garbage collected) and
9+
// reuses the same v8::String::ExternalStringResourceBase for all materialized
10+
// strings. This allows the resource to be registered as an external reference
11+
// for snapshotting.
12+
13+
#include <variant>
14+
#include "v8.h"
15+
16+
namespace node {
17+
18+
class EternalBytes;
19+
20+
template <typename Char, typename IChar, typename Base>
21+
class EternalExternalByteResource : public Base {
22+
static_assert(sizeof(IChar) == sizeof(Char),
23+
"incompatible interface and internal pointers");
24+
25+
public:
26+
explicit EternalExternalByteResource(const Char* data, size_t length)
27+
: data_(data), length_(length) {}
28+
29+
const IChar* data() const override {
30+
return reinterpret_cast<const IChar*>(data_);
31+
}
32+
size_t length() const override { return length_; }
33+
34+
void Dispose() override {
35+
// Do nothing. This class is owned by the EternalBytes instance and so
36+
// should not be destroyed. It may also be in use by other external strings
37+
// besides the one which was collected.
38+
}
39+
40+
EternalExternalByteResource(const EternalExternalByteResource&) = delete;
41+
EternalExternalByteResource& operator=(const EternalExternalByteResource&) =
42+
delete;
43+
44+
friend class EternalBytes;
45+
46+
private:
47+
const Char* data_;
48+
const size_t length_;
49+
};
50+
51+
using EternalExternalOneByteResource =
52+
EternalExternalByteResource<uint8_t,
53+
char,
54+
v8::String::ExternalOneByteStringResource>;
55+
using EternalExternalTwoByteResource =
56+
EternalExternalByteResource<uint16_t,
57+
uint16_t,
58+
v8::String::ExternalStringResource>;
59+
60+
template <class... Ts>
61+
struct overloaded : Ts... {
62+
using Ts::operator()...;
63+
};
64+
template <class... Ts>
65+
overloaded(Ts...) -> overloaded<Ts...>;
66+
67+
class EternalBytes {
68+
public:
69+
EternalBytes(const uint8_t* data, size_t length)
70+
: resource_(new EternalExternalOneByteResource(data, length)) {}
71+
EternalBytes(const uint16_t* data, size_t length)
72+
: resource_(new EternalExternalTwoByteResource(data, length)) {}
73+
74+
EternalBytes(const EternalBytes&) = default;
75+
EternalBytes& operator=(const EternalBytes&) = default;
76+
EternalBytes(EternalBytes&&) = default;
77+
EternalBytes& operator=(EternalBytes&&) = default;
78+
79+
bool IsOneByte() const {
80+
return std::holds_alternative<EternalExternalOneByteResource*>(resource_);
81+
}
82+
83+
const v8::String::ExternalStringResourceBase* AsResource() const {
84+
return std::visit(
85+
overloaded{
86+
[](EternalExternalOneByteResource* resource) {
87+
return static_cast<const v8::String::ExternalStringResourceBase*>(
88+
resource);
89+
},
90+
[](EternalExternalTwoByteResource* resource) {
91+
return static_cast<const v8::String::ExternalStringResourceBase*>(
92+
resource);
93+
}},
94+
resource_);
95+
}
96+
97+
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) const {
98+
return std::visit(
99+
overloaded{[=](EternalExternalOneByteResource* resource) {
100+
return v8::String::NewExternalOneByte(isolate, resource)
101+
.ToLocalChecked();
102+
},
103+
[=](EternalExternalTwoByteResource* resource) {
104+
return v8::String::NewExternalTwoByte(isolate, resource)
105+
.ToLocalChecked();
106+
}},
107+
resource_);
108+
}
109+
110+
private:
111+
const std::variant<EternalExternalOneByteResource*,
112+
EternalExternalTwoByteResource*>
113+
resource_;
114+
};
115+
116+
} // namespace node
117+
118+
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
119+
120+
#endif // SRC_NODE_ETERNAL_BYTES_H_

src/node_external_reference.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ class ExternalReferenceRegistry {
4747
V(v8::IndexedPropertyDefinerCallback) \
4848
V(v8::IndexedPropertyDeleterCallback) \
4949
V(v8::IndexedPropertyQueryCallback) \
50-
V(v8::IndexedPropertyDescriptorCallback)
50+
V(v8::IndexedPropertyDescriptorCallback) \
51+
V(const v8::String::ExternalStringResourceBase*)
5152

5253
#define V(ExternalReferenceType) \
5354
void Register(ExternalReferenceType addr) { RegisterT(addr); }

test/cctest/test_per_process.cc

+13-11
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,28 @@
77
#include <string>
88

99
using node::builtins::BuiltinLoader;
10-
using node::builtins::BuiltinSourceMap;
10+
using node::builtins::InternalizedBuiltinSourceMap;
1111

1212
class PerProcessTest : public ::testing::Test {
1313
protected:
14-
static const BuiltinSourceMap get_sources_for_test() {
15-
return *BuiltinLoader().source_.read();
14+
static const InternalizedBuiltinSourceMap*
15+
get_internalized_builtin_source_map_for_test() {
16+
return node::builtins::GetInternalizedBuiltinSourceMap();
1617
}
1718
};
1819

1920
namespace {
2021

2122
TEST_F(PerProcessTest, EmbeddedSources) {
22-
const auto& sources = PerProcessTest::get_sources_for_test();
23-
ASSERT_TRUE(std::any_of(sources.cbegin(), sources.cend(), [](auto p) {
24-
return p.second.is_one_byte();
25-
})) << "BuiltinLoader::source_ should have some 8bit items";
26-
27-
ASSERT_TRUE(std::any_of(sources.cbegin(), sources.cend(), [](auto p) {
28-
return !p.second.is_one_byte();
29-
})) << "BuiltinLoader::source_ should have some 16bit items";
23+
const auto& sources =
24+
PerProcessTest::get_internalized_builtin_source_map_for_test();
25+
ASSERT_TRUE(std::any_of(sources->cbegin(), sources->cend(), [](auto p) {
26+
return p.second.IsOneByte();
27+
})) << "internalized_builtin_source_map should have some 8bit items";
28+
29+
ASSERT_TRUE(std::any_of(sources->cbegin(), sources->cend(), [](auto p) {
30+
return !p.second.IsOneByte();
31+
})) << "internalized_builtin_source_map should have some 16bit items";
3032
}
3133

3234
} // end namespace

tools/js2c.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,19 @@ def ReadFile(filename):
5858
{0}
5959
6060
namespace {{
61-
const ThreadsafeCopyOnWrite<BuiltinSourceMap> global_source_map {{
62-
BuiltinSourceMap{{ {1} }}
61+
const InternalizedBuiltinSourceMap internalized_builtin_source_map {{
62+
InternalizedBuiltinSourceMap{{ {1} }}
6363
}};
64+
65+
const EternalBytes config(config_raw, {2});
6466
}}
6567
66-
void BuiltinLoader::LoadJavaScriptSource() {{
67-
source_ = global_source_map;
68+
const InternalizedBuiltinSourceMap* GetInternalizedBuiltinSourceMap() {{
69+
return &internalized_builtin_source_map;
6870
}}
6971
70-
UnionBytes BuiltinLoader::GetConfig() {{
71-
return UnionBytes(config_raw, {2}); // config.gypi
72+
const EternalBytes* GetConfig() {{
73+
return &config;
7274
}}
7375
7476
}} // namespace builtins
@@ -88,7 +90,7 @@ def ReadFile(filename):
8890
}};
8991
"""
9092

91-
INITIALIZER = '{{"{0}", UnionBytes{{{1}, {2}}} }},'
93+
INITIALIZER = '{{"{0}", {{{1}, {2}}} }},'
9294

9395
CONFIG_GYPI_ID = 'config_raw'
9496

0 commit comments

Comments
 (0)