Skip to content

Commit 190596c

Browse files
kvakiltargos
authored andcommitted
src: register external references for source code
Currently we use external strings for internalized builtin source code. However when a snapshot is taken, any external string whose resource is not registered is flattened into a SeqString (see ref). The result is that module source code stored in the snapshot does not use external strings after deserialization. This patch registers an external string resource for each internalized builtin's source. The savings are substantial: ~1.9 MB of heap memory per isolate, or ~44% of an otherwise empty isolate's heap usage: ```console $ node --expose-gc -p 'gc(),process.memoryUsage().heapUsed' 4190968 $ ./node --expose-gc -p 'gc(),process.memoryUsage().heapUsed' 2327536 ``` The savings can be even higher for user snapshots which may include more internal modules. The existing UnionBytes implementation was ill-suited, because it only created an external string resource when ToStringChecked was called, but we need to register the external string resources before the isolate even exists. We change UnionBytes to no longer own the data, and shift ownership of the data to a new external resource class called StaticExternalByteResource. StaticExternalByteResource are either statically allocated (for internalized builtin code) or owned by the static `externalized_builtin_sources` map, so they will only be destructed when static resources are destructed. We change JS2C to emit statements to register a string resource for each internalized builtin. Refs: https://github.com/v8/v8/blob/d2c8fbe9ccd1a6ce5591bb7dd319c3c00d6bf489/src/snapshot/serializer.cc#L633 PR-URL: #47055 Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com> Reviewed-By: Chengzhong Wu <legendecas@gmail.com>
1 parent 106dc61 commit 190596c

6 files changed

+123
-117
lines changed

src/node_builtins.cc

+27-19
Original file line numberDiff line numberDiff line change
@@ -213,18 +213,18 @@ MaybeLocal<String> BuiltinLoader::LoadBuiltinSource(Isolate* isolate,
213213

214214
namespace {
215215
static Mutex externalized_builtins_mutex;
216-
std::unordered_map<std::string, std::string> externalized_builtin_sources;
216+
std::unordered_map<std::string, std::unique_ptr<StaticExternalTwoByteResource>>
217+
externalized_builtin_sources;
217218
} // namespace
218219

219220
void BuiltinLoader::AddExternalizedBuiltin(const char* id,
220221
const char* filename) {
221-
std::string source;
222+
StaticExternalTwoByteResource* resource;
222223
{
223224
Mutex::ScopedLock lock(externalized_builtins_mutex);
224225
auto it = externalized_builtin_sources.find(id);
225-
if (it != externalized_builtin_sources.end()) {
226-
source = it->second;
227-
} else {
226+
if (it == externalized_builtin_sources.end()) {
227+
std::string source;
228228
int r = ReadFileSync(&source, filename);
229229
if (r != 0) {
230230
fprintf(stderr,
@@ -233,23 +233,29 @@ void BuiltinLoader::AddExternalizedBuiltin(const char* id,
233233
filename);
234234
ABORT();
235235
}
236-
externalized_builtin_sources[id] = source;
236+
size_t expected_u16_length =
237+
simdutf::utf16_length_from_utf8(source.data(), source.length());
238+
auto out = std::make_shared<std::vector<uint16_t>>(expected_u16_length);
239+
size_t u16_length = simdutf::convert_utf8_to_utf16(
240+
source.data(),
241+
source.length(),
242+
reinterpret_cast<char16_t*>(out->data()));
243+
out->resize(u16_length);
244+
245+
auto result = externalized_builtin_sources.emplace(
246+
id,
247+
std::make_unique<StaticExternalTwoByteResource>(
248+
out->data(), out->size(), out));
249+
CHECK(result.second);
250+
it = result.first;
237251
}
252+
// OK to get the raw pointer, since externalized_builtin_sources owns
253+
// the resource, resources are never removed from the map, and
254+
// externalized_builtin_sources has static lifetime.
255+
resource = it->second.get();
238256
}
239257

240-
Add(id, source);
241-
}
242-
243-
bool BuiltinLoader::Add(const char* id, std::string_view utf8source) {
244-
size_t expected_u16_length =
245-
simdutf::utf16_length_from_utf8(utf8source.data(), utf8source.length());
246-
auto out = std::make_shared<std::vector<uint16_t>>(expected_u16_length);
247-
size_t u16_length =
248-
simdutf::convert_utf8_to_utf16(utf8source.data(),
249-
utf8source.length(),
250-
reinterpret_cast<char16_t*>(out->data()));
251-
out->resize(u16_length);
252-
return Add(id, UnionBytes(out));
258+
Add(id, UnionBytes(resource));
253259
}
254260

255261
MaybeLocal<Function> BuiltinLoader::LookupAndCompileInternal(
@@ -719,6 +725,8 @@ void BuiltinLoader::RegisterExternalReferences(
719725
registry->Register(CompileFunction);
720726
registry->Register(HasCachedBuiltins);
721727
registry->Register(SetInternalLoaders);
728+
729+
RegisterExternalReferencesForInternalizedBuiltinCode(registry);
722730
}
723731

724732
} // namespace builtins

src/node_builtins.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <set>
1111
#include <string>
1212
#include <vector>
13+
#include "node_external_reference.h"
1314
#include "node_mutex.h"
1415
#include "node_threadsafe_cow.h"
1516
#include "node_union_bytes.h"
@@ -30,6 +31,10 @@ using BuiltinCodeCacheMap =
3031
std::unordered_map<std::string,
3132
std::unique_ptr<v8::ScriptCompiler::CachedData>>;
3233

34+
// Generated by tools/js2c.py as node_javascript.cc
35+
void RegisterExternalReferencesForInternalizedBuiltinCode(
36+
ExternalReferenceRegistry* registry);
37+
3338
struct CodeCacheInfo {
3439
std::string id;
3540
std::vector<uint8_t> data;
@@ -72,7 +77,6 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
7277
v8::Local<v8::String> GetConfigString(v8::Isolate* isolate);
7378
bool Exists(const char* id);
7479
bool Add(const char* id, const UnionBytes& source);
75-
bool Add(const char* id, std::string_view utf8source);
7680

7781
bool CompileAllBuiltins(v8::Local<v8::Context> context);
7882
void RefreshCodeCache(const std::vector<CodeCacheInfo>& in);

src/node_external_reference.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ class ExternalReferenceRegistry {
5050
V(v8::IndexedPropertyDefinerCallback) \
5151
V(v8::IndexedPropertyDeleterCallback) \
5252
V(v8::IndexedPropertyQueryCallback) \
53-
V(v8::IndexedPropertyDescriptorCallback)
53+
V(v8::IndexedPropertyDescriptorCallback) \
54+
V(const v8::String::ExternalStringResourceBase*)
5455

5556
#define V(ExternalReferenceType) \
5657
void Register(ExternalReferenceType addr) { RegisterT(addr); }

src/node_union_bytes.h

+50-26
Original file line numberDiff line numberDiff line change
@@ -4,50 +4,74 @@
44

55
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
66

7-
// A union of const uint8_t* or const uint16_t* data that can be
8-
// turned into external v8::String when given an isolate.
9-
107
#include "v8.h"
118

129
namespace node {
1310

11+
// An external resource intended to be used with static lifetime.
12+
template <typename Char, typename IChar, typename Base>
13+
class StaticExternalByteResource : public Base {
14+
static_assert(sizeof(IChar) == sizeof(Char),
15+
"incompatible interface and internal pointers");
16+
17+
public:
18+
explicit StaticExternalByteResource(const Char* data,
19+
size_t length,
20+
std::shared_ptr<void> owning_ptr)
21+
: data_(data), length_(length), owning_ptr_(owning_ptr) {}
22+
23+
const IChar* data() const override {
24+
return reinterpret_cast<const IChar*>(data_);
25+
}
26+
size_t length() const override { return length_; }
27+
28+
void Dispose() override {
29+
// We ignore Dispose calls from V8, even if we "own" a resource via
30+
// owning_ptr_. All instantiations of this class are static or owned by a
31+
// static map, and will be destructed when static variables are destructed.
32+
}
33+
34+
StaticExternalByteResource(const StaticExternalByteResource&) = delete;
35+
StaticExternalByteResource& operator=(const StaticExternalByteResource&) =
36+
delete;
37+
38+
private:
39+
const Char* data_;
40+
const size_t length_;
41+
std::shared_ptr<void> owning_ptr_;
42+
};
43+
44+
using StaticExternalOneByteResource =
45+
StaticExternalByteResource<uint8_t,
46+
char,
47+
v8::String::ExternalOneByteStringResource>;
48+
using StaticExternalTwoByteResource =
49+
StaticExternalByteResource<uint16_t,
50+
uint16_t,
51+
v8::String::ExternalStringResource>;
52+
1453
// Similar to a v8::String, but it's independent from Isolates
1554
// and can be materialized in Isolates as external Strings
1655
// via ToStringChecked.
1756
class UnionBytes {
1857
public:
19-
UnionBytes(const uint16_t* data, size_t length)
20-
: one_bytes_(nullptr), two_bytes_(data), length_(length) {}
21-
UnionBytes(const uint8_t* data, size_t length)
22-
: one_bytes_(data), two_bytes_(nullptr), length_(length) {}
23-
template <typename T> // T = uint8_t or uint16_t
24-
explicit UnionBytes(std::shared_ptr<std::vector</*const*/ T>> data)
25-
: UnionBytes(data->data(), data->size()) {
26-
owning_ptr_ = data;
27-
}
58+
explicit UnionBytes(StaticExternalOneByteResource* one_byte_resource)
59+
: one_byte_resource_(one_byte_resource), two_byte_resource_(nullptr) {}
60+
explicit UnionBytes(StaticExternalTwoByteResource* two_byte_resource)
61+
: one_byte_resource_(nullptr), two_byte_resource_(two_byte_resource) {}
2862

2963
UnionBytes(const UnionBytes&) = default;
3064
UnionBytes& operator=(const UnionBytes&) = default;
3165
UnionBytes(UnionBytes&&) = default;
3266
UnionBytes& operator=(UnionBytes&&) = default;
3367

34-
bool is_one_byte() const { return one_bytes_ != nullptr; }
35-
const uint16_t* two_bytes_data() const {
36-
CHECK_NOT_NULL(two_bytes_);
37-
return two_bytes_;
38-
}
39-
const uint8_t* one_bytes_data() const {
40-
CHECK_NOT_NULL(one_bytes_);
41-
return one_bytes_;
42-
}
68+
bool is_one_byte() const { return one_byte_resource_ != nullptr; }
69+
4370
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) const;
44-
size_t length() const { return length_; }
4571

4672
private:
47-
const uint8_t* one_bytes_;
48-
const uint16_t* two_bytes_;
49-
size_t length_;
50-
std::shared_ptr<void> owning_ptr_;
73+
StaticExternalOneByteResource* one_byte_resource_;
74+
StaticExternalTwoByteResource* two_byte_resource_;
5175
};
5276

5377
} // namespace node

src/util.cc

+4-56
Original file line numberDiff line numberDiff line change
@@ -606,65 +606,13 @@ void SetConstructorFunction(Isolate* isolate,
606606
that->Set(name, tmpl);
607607
}
608608

609-
namespace {
610-
611-
class NonOwningExternalOneByteResource
612-
: public v8::String::ExternalOneByteStringResource {
613-
public:
614-
explicit NonOwningExternalOneByteResource(const UnionBytes& source)
615-
: source_(source) {}
616-
~NonOwningExternalOneByteResource() override = default;
617-
618-
const char* data() const override {
619-
return reinterpret_cast<const char*>(source_.one_bytes_data());
620-
}
621-
size_t length() const override { return source_.length(); }
622-
623-
NonOwningExternalOneByteResource(const NonOwningExternalOneByteResource&) =
624-
delete;
625-
NonOwningExternalOneByteResource& operator=(
626-
const NonOwningExternalOneByteResource&) = delete;
627-
628-
private:
629-
const UnionBytes source_;
630-
};
631-
632-
class NonOwningExternalTwoByteResource
633-
: public v8::String::ExternalStringResource {
634-
public:
635-
explicit NonOwningExternalTwoByteResource(const UnionBytes& source)
636-
: source_(source) {}
637-
~NonOwningExternalTwoByteResource() override = default;
638-
639-
const uint16_t* data() const override { return source_.two_bytes_data(); }
640-
size_t length() const override { return source_.length(); }
641-
642-
NonOwningExternalTwoByteResource(const NonOwningExternalTwoByteResource&) =
643-
delete;
644-
NonOwningExternalTwoByteResource& operator=(
645-
const NonOwningExternalTwoByteResource&) = delete;
646-
647-
private:
648-
const UnionBytes source_;
649-
};
650-
651-
} // anonymous namespace
652-
653609
Local<String> UnionBytes::ToStringChecked(Isolate* isolate) const {
654-
if (UNLIKELY(length() == 0)) {
655-
// V8 requires non-null data pointers for empty external strings,
656-
// but we don't guarantee that. Solve this by not creating an
657-
// external string at all in that case.
658-
return String::Empty(isolate);
659-
}
660610
if (is_one_byte()) {
661-
NonOwningExternalOneByteResource* source =
662-
new NonOwningExternalOneByteResource(*this);
663-
return String::NewExternalOneByte(isolate, source).ToLocalChecked();
611+
return String::NewExternalOneByte(isolate, one_byte_resource_)
612+
.ToLocalChecked();
664613
} else {
665-
NonOwningExternalTwoByteResource* source =
666-
new NonOwningExternalTwoByteResource(*this);
667-
return String::NewExternalTwoByte(isolate, source).ToLocalChecked();
614+
return String::NewExternalTwoByte(isolate, two_byte_resource_)
615+
.ToLocalChecked();
668616
}
669617
}
670618

0 commit comments

Comments
 (0)