Skip to content

Commit 179dabd

Browse files
committed
module: detect ESM syntax by trying to recompile as SourceTextModule
Instead of using an async function wrapper, just try compiling code with unknown module format as SourceTextModule when it cannot be compiled as CJS and the error message indicates that it's worth a retry. If it can be parsed as SourceTextModule then it's considered ESM. Also, move shouldRetryAsESM() to C++ completely so that we can reuse it in the CJS module loader for require(esm). Drive-by: move methods that don't belong to ContextifyContext out as static methods and move GetHostDefinedOptions to ModuleWrap.
1 parent 756acd0 commit 179dabd

File tree

7 files changed

+287
-337
lines changed

7 files changed

+287
-337
lines changed

lib/internal/modules/esm/get_format.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
114114
// but this gets called again from `defaultLoad`/`defaultLoadSync`.
115115
if (getOptionValue('--experimental-detect-module')) {
116116
const format = source ?
117-
(containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs') :
117+
(containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs') :
118118
null;
119119
if (format === 'module') {
120120
// This module has a .js extension, a package.json with no `type` field, and ESM syntax.
@@ -158,7 +158,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
158158
if (!source) { return null; }
159159
const format = getFormatOfExtensionlessFile(url);
160160
if (format === 'module') {
161-
return containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs';
161+
return containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs';
162162
}
163163
return format;
164164
}

lib/internal/modules/helpers.js

-35
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,6 @@ const {
1919
} = require('internal/errors').codes;
2020
const { BuiltinModule } = require('internal/bootstrap/realm');
2121

22-
const {
23-
shouldRetryAsESM: contextifyShouldRetryAsESM,
24-
constants: {
25-
syntaxDetectionErrors: {
26-
esmSyntaxErrorMessages,
27-
throwsOnlyInCommonJSErrorMessages,
28-
},
29-
},
30-
} = internalBinding('contextify');
3122
const { validateString } = require('internal/validators');
3223
const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched.
3324
const internalFS = require('internal/fs/utils');
@@ -329,31 +320,6 @@ function normalizeReferrerURL(referrerName) {
329320
}
330321

331322

332-
let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM
333-
let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM
334-
/**
335-
* After an attempt to parse a module as CommonJS throws an error, should we try again as ESM?
336-
* We only want to try again as ESM if the error is due to syntax that is only valid in ESM; and if the CommonJS parse
337-
* throws on an error that would not have been a syntax error in ESM (like via top-level `await` or a lexical
338-
* redeclaration of one of the CommonJS variables) then we need to parse again to see if it would have thrown in ESM.
339-
* @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS
340-
* @param {string} source Module contents
341-
*/
342-
function shouldRetryAsESM(errorMessage, source) {
343-
esmSyntaxErrorMessagesSet ??= new SafeSet(esmSyntaxErrorMessages);
344-
if (esmSyntaxErrorMessagesSet.has(errorMessage)) {
345-
return true;
346-
}
347-
348-
throwsOnlyInCommonJSErrorMessagesSet ??= new SafeSet(throwsOnlyInCommonJSErrorMessages);
349-
if (throwsOnlyInCommonJSErrorMessagesSet.has(errorMessage)) {
350-
return /** @type {boolean} */(contextifyShouldRetryAsESM(source));
351-
}
352-
353-
return false;
354-
}
355-
356-
357323
// Whether we have started executing any user-provided CJS code.
358324
// This is set right before we call the wrapped CJS code (not after,
359325
// in case we are half-way in the execution when internals check this).
@@ -373,7 +339,6 @@ module.exports = {
373339
loadBuiltinModule,
374340
makeRequireFunction,
375341
normalizeReferrerURL,
376-
shouldRetryAsESM,
377342
stripBOM,
378343
toRealPath,
379344
hasStartedUserCJSExecution() {

lib/internal/modules/run_main.js

+16-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
'use strict';
22

33
const {
4+
ObjectGetPrototypeOf,
5+
SyntaxErrorPrototype,
46
StringPrototypeEndsWith,
57
globalThis,
68
} = primordials;
@@ -159,27 +161,29 @@ function runEntryPointWithESMLoader(callback) {
159161
function executeUserEntryPoint(main = process.argv[1]) {
160162
const resolvedMain = resolveMainPath(main);
161163
const useESMLoader = shouldUseESMLoader(resolvedMain);
162-
164+
let mainURL;
163165
// Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first
164166
// try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM.
165167
let retryAsESM = false;
166168
if (!useESMLoader) {
167169
const cjsLoader = require('internal/modules/cjs/loader');
168170
const { Module } = cjsLoader;
169171
if (getOptionValue('--experimental-detect-module')) {
172+
// TODO(joyeecheung): handle this in the CJS loader. Don't try-catch here.
170173
try {
171174
// Module._load is the monkey-patchable CJS module loader.
172175
Module._load(main, null, true);
173176
} catch (error) {
174-
const source = cjsLoader.entryPointSource;
175-
const { shouldRetryAsESM } = require('internal/modules/helpers');
176-
retryAsESM = shouldRetryAsESM(error.message, source);
177-
// In case the entry point is a large file, such as a bundle,
178-
// ensure no further references can prevent it being garbage-collected.
179-
cjsLoader.entryPointSource = undefined;
177+
if (ObjectGetPrototypeOf(error) === SyntaxErrorPrototype) {
178+
const { shouldRetryAsESM } = internalBinding('contextify');
179+
const mainPath = resolvedMain || main;
180+
mainURL = pathToFileURL(mainPath).href;
181+
retryAsESM = shouldRetryAsESM(error.message, cjsLoader.entryPointSource, mainPath);
182+
// In case the entry point is a large file, such as a bundle,
183+
// ensure no further references can prevent it being garbage-collected.
184+
cjsLoader.entryPointSource = undefined;
185+
}
180186
if (!retryAsESM) {
181-
const { enrichCJSError } = require('internal/modules/esm/translators');
182-
enrichCJSError(error, source, resolvedMain);
183187
throw error;
184188
}
185189
}
@@ -190,7 +194,9 @@ function executeUserEntryPoint(main = process.argv[1]) {
190194

191195
if (useESMLoader || retryAsESM) {
192196
const mainPath = resolvedMain || main;
193-
const mainURL = pathToFileURL(mainPath).href;
197+
if (mainURL === undefined) {
198+
mainURL = pathToFileURL(mainPath).href;
199+
}
194200

195201
runEntryPointWithESMLoader((cascadedLoader) => {
196202
// Note that if the graph contains unsettled TLA, this may never resolve

src/module_wrap.cc

+82-33
Original file line numberDiff line numberDiff line change
@@ -142,10 +142,18 @@ v8::Maybe<bool> ModuleWrap::CheckUnsettledTopLevelAwait() {
142142
return v8::Just(false);
143143
}
144144

145-
// new ModuleWrap(url, context, source, lineOffset, columnOffset, cachedData)
145+
Local<PrimitiveArray> ModuleWrap::GetHostDefinedOptions(
146+
Isolate* isolate, Local<Symbol> id_symbol) {
147+
Local<PrimitiveArray> host_defined_options =
148+
PrimitiveArray::New(isolate, HostDefinedOptions::kLength);
149+
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
150+
return host_defined_options;
151+
}
152+
153+
// new ModuleWrap(url, context, source, lineOffset, columnOffset[, cachedData]);
146154
// new ModuleWrap(url, context, source, lineOffset, columOffset,
147-
// hostDefinedOption) new ModuleWrap(url, context, exportNames,
148-
// syntheticExecutionFunction)
155+
// idSymbol);
156+
// new ModuleWrap(url, context, exportNames, syntheticExecutionFunction)
149157
void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
150158
CHECK(args.IsConstructCall());
151159
CHECK_GE(args.Length(), 3);
@@ -174,17 +182,16 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
174182
int column_offset = 0;
175183

176184
bool synthetic = args[2]->IsArray();
177-
178-
Local<PrimitiveArray> host_defined_options =
179-
PrimitiveArray::New(isolate, HostDefinedOptions::kLength);
185+
Local<PrimitiveArray> host_defined_options;
180186
Local<Symbol> id_symbol;
181187
if (synthetic) {
182188
// new ModuleWrap(url, context, exportNames, syntheticExecutionFunction)
183189
CHECK(args[3]->IsFunction());
184190
} else {
185-
// new ModuleWrap(url, context, source, lineOffset, columOffset, cachedData)
191+
// new ModuleWrap(url, context, source, lineOffset, columOffset[,
192+
// cachedData]);
186193
// new ModuleWrap(url, context, source, lineOffset, columOffset,
187-
// hostDefinedOption)
194+
// idSymbol);
188195
CHECK(args[2]->IsString());
189196
CHECK(args[3]->IsNumber());
190197
line_offset = args[3].As<Int32>()->Value();
@@ -195,7 +202,7 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
195202
} else {
196203
id_symbol = Symbol::New(isolate, url);
197204
}
198-
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
205+
host_defined_options = GetHostDefinedOptions(isolate, id_symbol);
199206

200207
if (that->SetPrivate(context,
201208
realm->isolate_data()->host_defined_option_symbol(),
@@ -230,36 +237,32 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
230237
module = Module::CreateSyntheticModule(
231238
isolate, url, span, SyntheticModuleEvaluationStepsCallback);
232239
} else {
233-
ScriptCompiler::CachedData* cached_data = nullptr;
240+
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data;
241+
if (id_symbol !=
242+
realm->isolate_data()->source_text_module_default_hdo()) {
243+
// TODO(joyeecheung): when we are compiling for the default loader, this
244+
// will be std::nullopt, and CompileSourceTextModule() should use
245+
// on-disk cache. See: https://github.com/nodejs/node/issues/47472
246+
user_cached_data = nullptr;
247+
}
234248
if (args[5]->IsArrayBufferView()) {
235249
Local<ArrayBufferView> cached_data_buf = args[5].As<ArrayBufferView>();
236250
uint8_t* data =
237251
static_cast<uint8_t*>(cached_data_buf->Buffer()->Data());
238-
cached_data =
252+
user_cached_data =
239253
new ScriptCompiler::CachedData(data + cached_data_buf->ByteOffset(),
240254
cached_data_buf->ByteLength());
241255
}
242-
243256
Local<String> source_text = args[2].As<String>();
244-
ScriptOrigin origin(isolate,
245-
url,
246-
line_offset,
247-
column_offset,
248-
true, // is cross origin
249-
-1, // script id
250-
Local<Value>(), // source map URL
251-
false, // is opaque (?)
252-
false, // is WASM
253-
true, // is ES Module
254-
host_defined_options);
255-
ScriptCompiler::Source source(source_text, origin, cached_data);
256-
ScriptCompiler::CompileOptions options;
257-
if (source.GetCachedData() == nullptr) {
258-
options = ScriptCompiler::kNoCompileOptions;
259-
} else {
260-
options = ScriptCompiler::kConsumeCodeCache;
261-
}
262-
if (!ScriptCompiler::CompileModule(isolate, &source, options)
257+
bool cache_rejected = false;
258+
if (!CompileSourceTextModule(realm,
259+
source_text,
260+
url,
261+
line_offset,
262+
column_offset,
263+
host_defined_options,
264+
user_cached_data,
265+
&cache_rejected)
263266
.ToLocal(&module)) {
264267
if (try_catch.HasCaught() && !try_catch.HasTerminated()) {
265268
CHECK(!try_catch.Message().IsEmpty());
@@ -272,8 +275,9 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
272275
}
273276
return;
274277
}
275-
if (options == ScriptCompiler::kConsumeCodeCache &&
276-
source.GetCachedData()->rejected) {
278+
279+
if (user_cached_data.has_value() && user_cached_data.value() != nullptr &&
280+
cache_rejected) {
277281
THROW_ERR_VM_MODULE_CACHED_DATA_REJECTED(
278282
realm, "cachedData buffer was rejected");
279283
try_catch.ReThrow();
@@ -310,6 +314,51 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
310314
args.GetReturnValue().Set(that);
311315
}
312316

317+
MaybeLocal<Module> ModuleWrap::CompileSourceTextModule(
318+
Realm* realm,
319+
Local<String> source_text,
320+
Local<String> url,
321+
int line_offset,
322+
int column_offset,
323+
Local<PrimitiveArray> host_defined_options,
324+
std::optional<ScriptCompiler::CachedData*> user_cached_data,
325+
bool* cache_rejected) {
326+
Isolate* isolate = realm->isolate();
327+
EscapableHandleScope scope(isolate);
328+
ScriptOrigin origin(isolate,
329+
url,
330+
line_offset,
331+
column_offset,
332+
true, // is cross origin
333+
-1, // script id
334+
Local<Value>(), // source map URL
335+
false, // is opaque (?)
336+
false, // is WASM
337+
true, // is ES Module
338+
host_defined_options);
339+
ScriptCompiler::CachedData* cached_data = nullptr;
340+
if (user_cached_data.has_value()) {
341+
cached_data = user_cached_data.value();
342+
}
343+
ScriptCompiler::Source source(source_text, origin, cached_data);
344+
ScriptCompiler::CompileOptions options;
345+
if (cached_data == nullptr) {
346+
options = ScriptCompiler::kNoCompileOptions;
347+
} else {
348+
options = ScriptCompiler::kConsumeCodeCache;
349+
}
350+
Local<Module> module;
351+
if (!ScriptCompiler::CompileModule(isolate, &source, options)
352+
.ToLocal(&module)) {
353+
return scope.EscapeMaybe(MaybeLocal<Module>());
354+
}
355+
if (cached_data != nullptr) {
356+
*cache_rejected = source.GetCachedData()->rejected;
357+
}
358+
359+
return scope.Escape(module);
360+
}
361+
313362
static Local<Object> createImportAttributesContainer(
314363
Realm* realm,
315364
Isolate* isolate,

src/module_wrap.h

+20-1
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
55

6-
#include <unordered_map>
6+
#include <optional>
77
#include <string>
8+
#include <unordered_map>
89
#include <vector>
910
#include "base_object.h"
11+
#include "v8-script.h"
1012

1113
namespace node {
1214

@@ -69,6 +71,23 @@ class ModuleWrap : public BaseObject {
6971
return true;
7072
}
7173

74+
static v8::Local<v8::PrimitiveArray> GetHostDefinedOptions(
75+
v8::Isolate* isolate, v8::Local<v8::Symbol> symbol);
76+
77+
// When user_cached_data is not std::nullopt, use the code cache if it's not
78+
// nullptr, otherwise don't use code cache.
79+
// TODO(joyeecheung): when it is std::nullopt, use on-disk cache
80+
// See: https://github.com/nodejs/node/issues/47472
81+
static v8::MaybeLocal<v8::Module> CompileSourceTextModule(
82+
Realm* realm,
83+
v8::Local<v8::String> source_text,
84+
v8::Local<v8::String> url,
85+
int line_offset,
86+
int column_offset,
87+
v8::Local<v8::PrimitiveArray> host_defined_options,
88+
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data,
89+
bool* cache_rejected);
90+
7291
private:
7392
ModuleWrap(Realm* realm,
7493
v8::Local<v8::Object> object,

0 commit comments

Comments
 (0)