Skip to content

Commit b5e805f

Browse files
committed
module: detect ESM syntax by trying to recompile as SourceTextModule
Instead of using an async function wrapper, just try compiling code with unknown module format as SourceTextModule when it cannot be compiled as CJS and the error message indicates that it's worth a retry. If it can be parsed as SourceTextModule then it's considered ESM. Also, move shouldRetryAsESM() to C++ completely so that we can reuse it in the CJS module loader for require(esm). Drive-by: move methods that don't belong to ContextifyContext out as static methods and move GetHostDefinedOptions to ModuleWrap. PR-URL: nodejs#52413 Reviewed-By: Geoffrey Booth <webadmin@geoffreybooth.com> Reviewed-By: Jacob Smith <jacob@frende.me>
1 parent 2d35d3f commit b5e805f

File tree

7 files changed

+304
-339
lines changed

7 files changed

+304
-339
lines changed

lib/internal/modules/esm/get_format.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
114114
// but this gets called again from `defaultLoad`/`defaultLoadSync`.
115115
if (getOptionValue('--experimental-detect-module')) {
116116
const format = source ?
117-
(containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs') :
117+
(containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs') :
118118
null;
119119
if (format === 'module') {
120120
// This module has a .js extension, a package.json with no `type` field, and ESM syntax.
@@ -158,7 +158,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
158158
if (!source) { return null; }
159159
const format = getFormatOfExtensionlessFile(url);
160160
if (format === 'module') {
161-
return containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs';
161+
return containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs';
162162
}
163163
return format;
164164
}

lib/internal/modules/helpers.js

-34
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,6 @@ const {
1919
} = require('internal/errors').codes;
2020
const { BuiltinModule } = require('internal/bootstrap/realm');
2121

22-
const {
23-
shouldRetryAsESM: contextifyShouldRetryAsESM,
24-
constants: {
25-
syntaxDetectionErrors: {
26-
esmSyntaxErrorMessages,
27-
throwsOnlyInCommonJSErrorMessages,
28-
},
29-
},
30-
} = internalBinding('contextify');
3122
const { validateString } = require('internal/validators');
3223
const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched.
3324
const internalFS = require('internal/fs/utils');
@@ -338,30 +329,6 @@ function urlToFilename(url) {
338329
return url;
339330
}
340331

341-
let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM
342-
let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM
343-
/**
344-
* After an attempt to parse a module as CommonJS throws an error, should we try again as ESM?
345-
* We only want to try again as ESM if the error is due to syntax that is only valid in ESM; and if the CommonJS parse
346-
* throws on an error that would not have been a syntax error in ESM (like via top-level `await` or a lexical
347-
* redeclaration of one of the CommonJS variables) then we need to parse again to see if it would have thrown in ESM.
348-
* @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS
349-
* @param {string} source Module contents
350-
*/
351-
function shouldRetryAsESM(errorMessage, source) {
352-
esmSyntaxErrorMessagesSet ??= new SafeSet(esmSyntaxErrorMessages);
353-
if (esmSyntaxErrorMessagesSet.has(errorMessage)) {
354-
return true;
355-
}
356-
357-
throwsOnlyInCommonJSErrorMessagesSet ??= new SafeSet(throwsOnlyInCommonJSErrorMessages);
358-
if (throwsOnlyInCommonJSErrorMessagesSet.has(errorMessage)) {
359-
return /** @type {boolean} */(contextifyShouldRetryAsESM(source));
360-
}
361-
362-
return false;
363-
}
364-
365332

366333
// Whether we have started executing any user-provided CJS code.
367334
// This is set right before we call the wrapped CJS code (not after,
@@ -396,7 +363,6 @@ module.exports = {
396363
loadBuiltinModule,
397364
makeRequireFunction,
398365
normalizeReferrerURL,
399-
shouldRetryAsESM,
400366
stripBOM,
401367
toRealPath,
402368
hasStartedUserCJSExecution() {

lib/internal/modules/run_main.js

+16-10
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
'use strict';
22

33
const {
4+
ObjectGetPrototypeOf,
45
StringPrototypeEndsWith,
6+
SyntaxErrorPrototype,
57
} = primordials;
68

79
const { getOptionValue } = require('internal/options');
@@ -156,27 +158,29 @@ function runEntryPointWithESMLoader(callback) {
156158
function executeUserEntryPoint(main = process.argv[1]) {
157159
const resolvedMain = resolveMainPath(main);
158160
const useESMLoader = shouldUseESMLoader(resolvedMain);
159-
161+
let mainURL;
160162
// Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first
161163
// try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM.
162164
let retryAsESM = false;
163165
if (!useESMLoader) {
164166
const cjsLoader = require('internal/modules/cjs/loader');
165167
const { Module } = cjsLoader;
166168
if (getOptionValue('--experimental-detect-module')) {
169+
// TODO(joyeecheung): handle this in the CJS loader. Don't try-catch here.
167170
try {
168171
// Module._load is the monkey-patchable CJS module loader.
169172
Module._load(main, null, true);
170173
} catch (error) {
171-
const source = cjsLoader.entryPointSource;
172-
const { shouldRetryAsESM } = require('internal/modules/helpers');
173-
retryAsESM = shouldRetryAsESM(error.message, source);
174-
// In case the entry point is a large file, such as a bundle,
175-
// ensure no further references can prevent it being garbage-collected.
176-
cjsLoader.entryPointSource = undefined;
174+
if (error != null && ObjectGetPrototypeOf(error) === SyntaxErrorPrototype) {
175+
const { shouldRetryAsESM } = internalBinding('contextify');
176+
const mainPath = resolvedMain || main;
177+
mainURL = pathToFileURL(mainPath).href;
178+
retryAsESM = shouldRetryAsESM(error.message, cjsLoader.entryPointSource, mainURL);
179+
// In case the entry point is a large file, such as a bundle,
180+
// ensure no further references can prevent it being garbage-collected.
181+
cjsLoader.entryPointSource = undefined;
182+
}
177183
if (!retryAsESM) {
178-
const { enrichCJSError } = require('internal/modules/esm/translators');
179-
enrichCJSError(error, source, resolvedMain);
180184
throw error;
181185
}
182186
}
@@ -187,7 +191,9 @@ function executeUserEntryPoint(main = process.argv[1]) {
187191

188192
if (useESMLoader || retryAsESM) {
189193
const mainPath = resolvedMain || main;
190-
const mainURL = pathToFileURL(mainPath).href;
194+
if (mainURL === undefined) {
195+
mainURL = pathToFileURL(mainPath).href;
196+
}
191197

192198
runEntryPointWithESMLoader((cascadedLoader) => {
193199
// Note that if the graph contains unfinished TLA, this may never resolve

src/module_wrap.cc

+93-30
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,18 @@ ModuleWrap* ModuleWrap::GetFromModule(Environment* env,
102102
return nullptr;
103103
}
104104

105-
// new ModuleWrap(url, context, source, lineOffset, columnOffset, cachedData)
105+
106+
Local<PrimitiveArray> ModuleWrap::GetHostDefinedOptions(
107+
Isolate* isolate, Local<Symbol> id_symbol) {
108+
Local<PrimitiveArray> host_defined_options =
109+
PrimitiveArray::New(isolate, HostDefinedOptions::kLength);
110+
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
111+
return host_defined_options;
112+
}
113+
114+
// new ModuleWrap(url, context, source, lineOffset, columnOffset[, cachedData]);
106115
// new ModuleWrap(url, context, source, lineOffset, columOffset,
107-
// hostDefinedOption)
116+
// idSymbol);
108117
// new ModuleWrap(url, context, exportNames, evaluationCallback[, cjsModule])
109118
void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
110119
CHECK(args.IsConstructCall());
@@ -134,7 +143,7 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
134143
int column_offset = 0;
135144

136145
bool synthetic = args[2]->IsArray();
137-
146+
bool can_use_builtin_cache = false;
138147
Local<PrimitiveArray> host_defined_options =
139148
PrimitiveArray::New(isolate, HostDefinedOptions::kLength);
140149
Local<Symbol> id_symbol;
@@ -143,20 +152,24 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
143152
// cjsModule])
144153
CHECK(args[3]->IsFunction());
145154
} else {
146-
// new ModuleWrap(url, context, source, lineOffset, columOffset, cachedData)
155+
// new ModuleWrap(url, context, source, lineOffset, columOffset[,
156+
// cachedData]);
147157
// new ModuleWrap(url, context, source, lineOffset, columOffset,
148-
// hostDefinedOption)
158+
// idSymbol);
149159
CHECK(args[2]->IsString());
150160
CHECK(args[3]->IsNumber());
151161
line_offset = args[3].As<Int32>()->Value();
152162
CHECK(args[4]->IsNumber());
153163
column_offset = args[4].As<Int32>()->Value();
154164
if (args[5]->IsSymbol()) {
155165
id_symbol = args[5].As<Symbol>();
166+
can_use_builtin_cache =
167+
(id_symbol ==
168+
realm->isolate_data()->source_text_module_default_hdo());
156169
} else {
157170
id_symbol = Symbol::New(isolate, url);
158171
}
159-
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
172+
host_defined_options = GetHostDefinedOptions(isolate, id_symbol);
160173

161174
if (that->SetPrivate(context,
162175
realm->isolate_data()->host_defined_option_symbol(),
@@ -189,36 +202,34 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
189202
module = Module::CreateSyntheticModule(isolate, url, export_names,
190203
SyntheticModuleEvaluationStepsCallback);
191204
} else {
192-
ScriptCompiler::CachedData* cached_data = nullptr;
205+
// When we are compiling for the default loader, this will be
206+
// std::nullopt, and CompileSourceTextModule() should use
207+
// on-disk cache (not present on v20.x).
208+
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data;
209+
if (id_symbol !=
210+
realm->isolate_data()->source_text_module_default_hdo()) {
211+
user_cached_data = nullptr;
212+
}
193213
if (args[5]->IsArrayBufferView()) {
214+
CHECK(!can_use_builtin_cache); // We don't use this option internally.
194215
Local<ArrayBufferView> cached_data_buf = args[5].As<ArrayBufferView>();
195216
uint8_t* data =
196217
static_cast<uint8_t*>(cached_data_buf->Buffer()->Data());
197-
cached_data =
218+
user_cached_data =
198219
new ScriptCompiler::CachedData(data + cached_data_buf->ByteOffset(),
199220
cached_data_buf->ByteLength());
200221
}
201-
202222
Local<String> source_text = args[2].As<String>();
203-
ScriptOrigin origin(isolate,
204-
url,
205-
line_offset,
206-
column_offset,
207-
true, // is cross origin
208-
-1, // script id
209-
Local<Value>(), // source map URL
210-
false, // is opaque (?)
211-
false, // is WASM
212-
true, // is ES Module
213-
host_defined_options);
214-
ScriptCompiler::Source source(source_text, origin, cached_data);
215-
ScriptCompiler::CompileOptions options;
216-
if (source.GetCachedData() == nullptr) {
217-
options = ScriptCompiler::kNoCompileOptions;
218-
} else {
219-
options = ScriptCompiler::kConsumeCodeCache;
220-
}
221-
if (!ScriptCompiler::CompileModule(isolate, &source, options)
223+
224+
bool cache_rejected = false;
225+
if (!CompileSourceTextModule(realm,
226+
source_text,
227+
url,
228+
line_offset,
229+
column_offset,
230+
host_defined_options,
231+
user_cached_data,
232+
&cache_rejected)
222233
.ToLocal(&module)) {
223234
if (try_catch.HasCaught() && !try_catch.HasTerminated()) {
224235
CHECK(!try_catch.Message().IsEmpty());
@@ -231,8 +242,9 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
231242
}
232243
return;
233244
}
234-
if (options == ScriptCompiler::kConsumeCodeCache &&
235-
source.GetCachedData()->rejected) {
245+
246+
if (user_cached_data.has_value() && user_cached_data.value() != nullptr &&
247+
cache_rejected) {
236248
THROW_ERR_VM_MODULE_CACHED_DATA_REJECTED(
237249
realm, "cachedData buffer was rejected");
238250
try_catch.ReThrow();
@@ -275,6 +287,57 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
275287
args.GetReturnValue().Set(that);
276288
}
277289

290+
MaybeLocal<Module> ModuleWrap::CompileSourceTextModule(
291+
Realm* realm,
292+
Local<String> source_text,
293+
Local<String> url,
294+
int line_offset,
295+
int column_offset,
296+
Local<PrimitiveArray> host_defined_options,
297+
std::optional<ScriptCompiler::CachedData*> user_cached_data,
298+
bool* cache_rejected) {
299+
Isolate* isolate = realm->isolate();
300+
EscapableHandleScope scope(isolate);
301+
ScriptOrigin origin(isolate,
302+
url,
303+
line_offset,
304+
column_offset,
305+
true, // is cross origin
306+
-1, // script id
307+
Local<Value>(), // source map URL
308+
false, // is opaque (?)
309+
false, // is WASM
310+
true, // is ES Module
311+
host_defined_options);
312+
ScriptCompiler::CachedData* cached_data = nullptr;
313+
// When compiling for the default loader, user_cached_data is std::nullptr.
314+
// When compiling for vm.Module, it's either nullptr or a pointer to the
315+
// cached data.
316+
if (user_cached_data.has_value()) {
317+
cached_data = user_cached_data.value();
318+
}
319+
320+
ScriptCompiler::Source source(source_text, origin, cached_data);
321+
ScriptCompiler::CompileOptions options;
322+
if (cached_data == nullptr) {
323+
options = ScriptCompiler::kNoCompileOptions;
324+
} else {
325+
options = ScriptCompiler::kConsumeCodeCache;
326+
}
327+
328+
Local<Module> module;
329+
if (!ScriptCompiler::CompileModule(isolate, &source, options)
330+
.ToLocal(&module)) {
331+
return scope.EscapeMaybe(MaybeLocal<Module>());
332+
}
333+
334+
if (options == ScriptCompiler::kConsumeCodeCache) {
335+
*cache_rejected = source.GetCachedData()->rejected;
336+
}
337+
338+
return scope.Escape(module);
339+
}
340+
278341
static Local<Object> createImportAttributesContainer(
279342
Realm* realm,
280343
Isolate* isolate,

src/module_wrap.h

+20-1
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
55

6-
#include <unordered_map>
6+
#include <optional>
77
#include <string>
8+
#include <unordered_map>
89
#include <vector>
910
#include "base_object.h"
11+
#include "v8-script.h"
1012

1113
namespace node {
1214

@@ -68,6 +70,23 @@ class ModuleWrap : public BaseObject {
6870
return true;
6971
}
7072

73+
static v8::Local<v8::PrimitiveArray> GetHostDefinedOptions(
74+
v8::Isolate* isolate, v8::Local<v8::Symbol> symbol);
75+
76+
// When user_cached_data is not std::nullopt, use the code cache if it's not
77+
// nullptr, otherwise don't use code cache.
78+
// TODO(joyeecheung): when it is std::nullopt, use on-disk cache
79+
// See: https://github.com/nodejs/node/issues/47472
80+
static v8::MaybeLocal<v8::Module> CompileSourceTextModule(
81+
Realm* realm,
82+
v8::Local<v8::String> source_text,
83+
v8::Local<v8::String> url,
84+
int line_offset,
85+
int column_offset,
86+
v8::Local<v8::PrimitiveArray> host_defined_options,
87+
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data,
88+
bool* cache_rejected);
89+
7190
private:
7291
ModuleWrap(Realm* realm,
7392
v8::Local<v8::Object> object,

0 commit comments

Comments
 (0)