Skip to content

Commit e70b8a4

Browse files
committed
src: register external references for source code
Currently we use external strings for internalized builtin source code. However when a snapshot is taken, any external string whose resource is not registered is flattened into a SeqString (see ref). The result is that module source code stored in the snapshot does not use external strings after deserialization. This patch registers an external string resource for each internalized builtin's source. The savings are substantial: ~1.9 MB of heap memory per isolate, or ~44% of an otherwise empty isolate's heap usage: ```console $ node --expose-gc -p 'gc(),process.memoryUsage().heapUsed' 4190968 $ ./node --expose-gc -p 'gc(),process.memoryUsage().heapUsed' 2327536 ``` The savings can be even higher for user snapshots which may include more internal modules. The existing UnionBytes implementation was ill-suited, because it only created an external string resource when ToStringChecked was called, but we need to register the external string resources before the isolate even exists. We change UnionBytes to no longer own the data, and shift ownership of the data to a new external resource class called StaticExternalByteResource. StaticExternalByteResource are either statically allocated (for internalized builtin code) or owned by the static `externalized_builtin_sources` map, so they will only be destructed when static resources are destructed. We change JS2C to emit statements to register a string resource for each internalized builtin. Refs: https://github.com/v8/v8/blob/d2c8fbe9ccd1a6ce5591bb7dd319c3c00d6bf489/src/snapshot/serializer.cc#L633
1 parent 9e9f68a commit e70b8a4

6 files changed

+123
-116
lines changed

src/node_builtins.cc

+27-19
Original file line numberDiff line numberDiff line change
@@ -213,18 +213,18 @@ MaybeLocal<String> BuiltinLoader::LoadBuiltinSource(Isolate* isolate,
213213

214214
namespace {
215215
static Mutex externalized_builtins_mutex;
216-
std::unordered_map<std::string, std::string> externalized_builtin_sources;
216+
std::unordered_map<std::string, std::unique_ptr<StaticExternalTwoByteResource>>
217+
externalized_builtin_sources;
217218
} // namespace
218219

219220
void BuiltinLoader::AddExternalizedBuiltin(const char* id,
220221
const char* filename) {
221-
std::string source;
222+
StaticExternalTwoByteResource* resource;
222223
{
223224
Mutex::ScopedLock lock(externalized_builtins_mutex);
224225
auto it = externalized_builtin_sources.find(id);
225-
if (it != externalized_builtin_sources.end()) {
226-
source = it->second;
227-
} else {
226+
if (it == externalized_builtin_sources.end()) {
227+
std::string source;
228228
int r = ReadFileSync(&source, filename);
229229
if (r != 0) {
230230
fprintf(stderr,
@@ -233,23 +233,29 @@ void BuiltinLoader::AddExternalizedBuiltin(const char* id,
233233
filename);
234234
ABORT();
235235
}
236-
externalized_builtin_sources[id] = source;
236+
size_t expected_u16_length =
237+
simdutf::utf16_length_from_utf8(source.data(), source.length());
238+
auto out = std::make_shared<std::vector<uint16_t>>(expected_u16_length);
239+
size_t u16_length = simdutf::convert_utf8_to_utf16(
240+
source.data(),
241+
source.length(),
242+
reinterpret_cast<char16_t*>(out->data()));
243+
out->resize(u16_length);
244+
245+
auto result = externalized_builtin_sources.emplace(
246+
id,
247+
std::make_unique<StaticExternalTwoByteResource>(
248+
out->data(), out->size(), out));
249+
CHECK(result.second);
250+
it = result.first;
237251
}
252+
// OK to get the raw pointer, since externalized_builtin_sources owns
253+
// the resource, resources are never removed from the map, and
254+
// externalized_builtin_sources has static lifetime.
255+
resource = it->second.get();
238256
}
239257

240-
Add(id, source);
241-
}
242-
243-
bool BuiltinLoader::Add(const char* id, std::string_view utf8source) {
244-
size_t expected_u16_length =
245-
simdutf::utf16_length_from_utf8(utf8source.data(), utf8source.length());
246-
auto out = std::make_shared<std::vector<uint16_t>>(expected_u16_length);
247-
size_t u16_length =
248-
simdutf::convert_utf8_to_utf16(utf8source.data(),
249-
utf8source.length(),
250-
reinterpret_cast<char16_t*>(out->data()));
251-
out->resize(u16_length);
252-
return Add(id, UnionBytes(out));
258+
Add(id, UnionBytes(resource));
253259
}
254260

255261
MaybeLocal<Function> BuiltinLoader::LookupAndCompileInternal(
@@ -703,6 +709,8 @@ void BuiltinLoader::RegisterExternalReferences(
703709
registry->Register(GetCacheUsage);
704710
registry->Register(CompileFunction);
705711
registry->Register(HasCachedBuiltins);
712+
713+
RegisterExternalReferencesForInternalizedBuiltinCode(registry);
706714
}
707715

708716
} // namespace builtins

src/node_builtins.h

+5
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <set>
1111
#include <string>
1212
#include <vector>
13+
#include "node_external_reference.h"
1314
#include "node_mutex.h"
1415
#include "node_threadsafe_cow.h"
1516
#include "node_union_bytes.h"
@@ -30,6 +31,10 @@ using BuiltinCodeCacheMap =
3031
std::unordered_map<std::string,
3132
std::unique_ptr<v8::ScriptCompiler::CachedData>>;
3233

34+
// Generated by tools/js2c.py as node_javascript.cc
35+
void RegisterExternalReferencesForInternalizedBuiltinCode(
36+
ExternalReferenceRegistry* registry);
37+
3338
struct CodeCacheInfo {
3439
std::string id;
3540
std::vector<uint8_t> data;

src/node_external_reference.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ class ExternalReferenceRegistry {
4747
V(v8::IndexedPropertyDefinerCallback) \
4848
V(v8::IndexedPropertyDeleterCallback) \
4949
V(v8::IndexedPropertyQueryCallback) \
50-
V(v8::IndexedPropertyDescriptorCallback)
50+
V(v8::IndexedPropertyDescriptorCallback) \
51+
V(const v8::String::ExternalStringResourceBase*)
5152

5253
#define V(ExternalReferenceType) \
5354
void Register(ExternalReferenceType addr) { RegisterT(addr); }

src/node_union_bytes.h

+50-26
Original file line numberDiff line numberDiff line change
@@ -4,50 +4,74 @@
44

55
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
66

7-
// A union of const uint8_t* or const uint16_t* data that can be
8-
// turned into external v8::String when given an isolate.
9-
107
#include "v8.h"
118

129
namespace node {
1310

11+
// An external resource intended to be used with static lifetime.
12+
template <typename Char, typename IChar, typename Base>
13+
class StaticExternalByteResource : public Base {
14+
static_assert(sizeof(IChar) == sizeof(Char),
15+
"incompatible interface and internal pointers");
16+
17+
public:
18+
explicit StaticExternalByteResource(const Char* data,
19+
size_t length,
20+
std::shared_ptr<void> owning_ptr)
21+
: data_(data), length_(length), owning_ptr_(owning_ptr) {}
22+
23+
const IChar* data() const override {
24+
return reinterpret_cast<const IChar*>(data_);
25+
}
26+
size_t length() const override { return length_; }
27+
28+
void Dispose() override {
29+
// We ignore Dispose calls from V8, even if we "own" a resource via
30+
// owning_ptr_. All instantiations of this class are static or owned by a
31+
// static map, and will be destructed when static variables are destructed.
32+
}
33+
34+
StaticExternalByteResource(const StaticExternalByteResource&) = delete;
35+
StaticExternalByteResource& operator=(const StaticExternalByteResource&) =
36+
delete;
37+
38+
private:
39+
const Char* data_;
40+
const size_t length_;
41+
std::shared_ptr<void> owning_ptr_;
42+
};
43+
44+
using StaticExternalOneByteResource =
45+
StaticExternalByteResource<uint8_t,
46+
char,
47+
v8::String::ExternalOneByteStringResource>;
48+
using StaticExternalTwoByteResource =
49+
StaticExternalByteResource<uint16_t,
50+
uint16_t,
51+
v8::String::ExternalStringResource>;
52+
1453
// Similar to a v8::String, but it's independent from Isolates
1554
// and can be materialized in Isolates as external Strings
1655
// via ToStringChecked.
1756
class UnionBytes {
1857
public:
19-
UnionBytes(const uint16_t* data, size_t length)
20-
: one_bytes_(nullptr), two_bytes_(data), length_(length) {}
21-
UnionBytes(const uint8_t* data, size_t length)
22-
: one_bytes_(data), two_bytes_(nullptr), length_(length) {}
23-
template <typename T> // T = uint8_t or uint16_t
24-
explicit UnionBytes(std::shared_ptr<std::vector</*const*/ T>> data)
25-
: UnionBytes(data->data(), data->size()) {
26-
owning_ptr_ = data;
27-
}
58+
explicit UnionBytes(StaticExternalOneByteResource* one_byte_resource)
59+
: one_byte_resource_(one_byte_resource), two_byte_resource_(nullptr) {}
60+
explicit UnionBytes(StaticExternalTwoByteResource* two_byte_resource)
61+
: one_byte_resource_(nullptr), two_byte_resource_(two_byte_resource) {}
2862

2963
UnionBytes(const UnionBytes&) = default;
3064
UnionBytes& operator=(const UnionBytes&) = default;
3165
UnionBytes(UnionBytes&&) = default;
3266
UnionBytes& operator=(UnionBytes&&) = default;
3367

34-
bool is_one_byte() const { return one_bytes_ != nullptr; }
35-
const uint16_t* two_bytes_data() const {
36-
CHECK_NOT_NULL(two_bytes_);
37-
return two_bytes_;
38-
}
39-
const uint8_t* one_bytes_data() const {
40-
CHECK_NOT_NULL(one_bytes_);
41-
return one_bytes_;
42-
}
68+
bool is_one_byte() const { return one_byte_resource_ != nullptr; }
69+
4370
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) const;
44-
size_t length() const { return length_; }
4571

4672
private:
47-
const uint8_t* one_bytes_;
48-
const uint16_t* two_bytes_;
49-
size_t length_;
50-
std::shared_ptr<void> owning_ptr_;
73+
StaticExternalOneByteResource* one_byte_resource_;
74+
StaticExternalTwoByteResource* two_byte_resource_;
5175
};
5276

5377
} // namespace node

src/util.cc

+4-56
Original file line numberDiff line numberDiff line change
@@ -551,65 +551,13 @@ void SetConstructorFunction(Isolate* isolate,
551551
that->Set(name, tmpl);
552552
}
553553

554-
namespace {
555-
556-
class NonOwningExternalOneByteResource
557-
: public v8::String::ExternalOneByteStringResource {
558-
public:
559-
explicit NonOwningExternalOneByteResource(const UnionBytes& source)
560-
: source_(source) {}
561-
~NonOwningExternalOneByteResource() override = default;
562-
563-
const char* data() const override {
564-
return reinterpret_cast<const char*>(source_.one_bytes_data());
565-
}
566-
size_t length() const override { return source_.length(); }
567-
568-
NonOwningExternalOneByteResource(const NonOwningExternalOneByteResource&) =
569-
delete;
570-
NonOwningExternalOneByteResource& operator=(
571-
const NonOwningExternalOneByteResource&) = delete;
572-
573-
private:
574-
const UnionBytes source_;
575-
};
576-
577-
class NonOwningExternalTwoByteResource
578-
: public v8::String::ExternalStringResource {
579-
public:
580-
explicit NonOwningExternalTwoByteResource(const UnionBytes& source)
581-
: source_(source) {}
582-
~NonOwningExternalTwoByteResource() override = default;
583-
584-
const uint16_t* data() const override { return source_.two_bytes_data(); }
585-
size_t length() const override { return source_.length(); }
586-
587-
NonOwningExternalTwoByteResource(const NonOwningExternalTwoByteResource&) =
588-
delete;
589-
NonOwningExternalTwoByteResource& operator=(
590-
const NonOwningExternalTwoByteResource&) = delete;
591-
592-
private:
593-
const UnionBytes source_;
594-
};
595-
596-
} // anonymous namespace
597-
598554
Local<String> UnionBytes::ToStringChecked(Isolate* isolate) const {
599-
if (UNLIKELY(length() == 0)) {
600-
// V8 requires non-null data pointers for empty external strings,
601-
// but we don't guarantee that. Solve this by not creating an
602-
// external string at all in that case.
603-
return String::Empty(isolate);
604-
}
605555
if (is_one_byte()) {
606-
NonOwningExternalOneByteResource* source =
607-
new NonOwningExternalOneByteResource(*this);
608-
return String::NewExternalOneByte(isolate, source).ToLocalChecked();
556+
return String::NewExternalOneByte(isolate, one_byte_resource_)
557+
.ToLocalChecked();
609558
} else {
610-
NonOwningExternalTwoByteResource* source =
611-
new NonOwningExternalTwoByteResource(*this);
612-
return String::NewExternalTwoByte(isolate, source).ToLocalChecked();
559+
return String::NewExternalTwoByte(isolate, two_byte_resource_)
560+
.ToLocalChecked();
613561
}
614562
}
615563

0 commit comments

Comments
 (0)