Skip to content

Commit b3dd0fd

Browse files
authored
Merge pull request #5142 from cloudflare/jasnell/start-enabling-nmr-internal
2 parents e4a9b57 + dd00a33 commit b3dd0fd

File tree

6 files changed

+339
-134
lines changed

6 files changed

+339
-134
lines changed

src/workerd/api/tests/new-module-registry-test.js

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,54 @@ await rejects(import('module-not-found'), {
139139
message: /Module not found: file:\/\/\/bundle\/module-not-found/,
140140
});
141141

142+
await rejects(import('file:///outside'), {
143+
message: /Module not found/,
144+
});
145+
146+
await import('file:///bundle/outside');
147+
148+
const abc123 = await import('abc123');
149+
strictEqual(abc123.default, 1);
150+
151+
// Full URLs can be used as module specifiers. These would not
152+
// actually resolve to network requests.
153+
const mod = await import('https://example.com/mod');
154+
strictEqual(mod.default, 'example');
155+
156+
// Whitespace and leading ./, ../, and multiple slashes are
157+
// stripped and ignored when setting up the module registry,
158+
// so they should resolve as expected here as being relative
159+
// to the bundle base URL.
160+
const mod2 = await import(' ./should/be/ok ');
161+
strictEqual(mod2.default, 1);
162+
163+
// UTF-8 percent-encoded of 部品 (Japanese for "component")
164+
const mod3 = await import('%E9%83%A8%E5%93%81');
165+
const mod4 = await import('部品'); // Get's converted into UTF-8 bytes in source
166+
const mod5 = await import('\u90e8\u54c1'); // Specifically UTF-16 code units in source
167+
const mod6 = await import('\xE9\x83\xA8\xE5\x93\x81');
168+
import { default as mod7 } from '部品';
169+
import { default as mod8 } from '\u90e8\u54c1';
170+
import { default as mod9 } from '\xE9\x83\xA8\xE5\x93\x81';
171+
import { default as mod10 } from '%E9%83%A8%E5%93%81';
172+
173+
strictEqual(mod3.default, 1);
174+
strictEqual(mod4.default, 1);
175+
strictEqual(mod5.default, 1);
176+
strictEqual(mod6.default, 1);
177+
strictEqual(mod7, 1);
178+
strictEqual(mod8, 1);
179+
strictEqual(mod9, 1);
180+
strictEqual(mod10, 1);
181+
182+
// The percent-encoded UTF-16 form of 部品 should not work.
183+
await rejects(import('%E8%90%C1%54'), {
184+
message: /Module not found/,
185+
});
186+
await rejects(import('%90%E8%54%C1'), {
187+
message: /Module not found/,
188+
});
189+
142190
// Verify that a module is unable to perform IO operations at the top level, even if
143191
// the dynamic import is initiated within the scope of an active IoContext.
144192
export const noTopLevelIo = {
@@ -260,6 +308,17 @@ export const wasmDynamicSourcePhaseFailureTest = {
260308
},
261309
};
262310

311+
export const complexModuleTest = {
312+
async test() {
313+
const { abc } = await import('complex');
314+
strictEqual(abc.foo, 1);
315+
strictEqual(abc.def.bar, 2);
316+
317+
const { default: abc2 } = await import('abc');
318+
strictEqual(abc2, 'file:///bundle/abc');
319+
},
320+
};
321+
263322
// TODO(now): Tests
264323
// * [x] Include tests for all known module types
265324
// * [x] ESM

src/workerd/api/tests/new-module-registry-test.wd-test

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,33 @@ const unitTests :Workerd.Config = (
4141
(name = "json", json = "{ \"foo\": 1 }"),
4242
(name = "invalid-json", json = "1n"),
4343

44-
(name = "wasm", wasm = embed "test.wasm")
44+
(name = "wasm", wasm = embed "test.wasm"),
45+
46+
(name = "complex", esModule = "import * as abc from '././././././complex/complex2'; export { abc };"),
47+
(name = "complex/complex2", esModule = "import * as def from '/bundle/complex/complex3'; export { def }; export const foo = 1;"),
48+
(name = "complex/complex3", esModule = "export const bar = 2;"),
49+
50+
# When the user bundle name begins with a slash, it is stripped before
51+
# processing, ensuring that the module is still processed relative to
52+
# the bundle base URL. Query strings and fragments are ignored.
53+
(name = "/%61bc?abc#123", esModule = "export default import.meta.url"),
54+
55+
# A module name should not be able to escape the bundle base URL,
56+
# and leading/trailing whitespace should be trimmed.
57+
(name = " ..//////outside ", esModule = "export default 1;"),
58+
59+
# Should resolve to "abc123" after normalization.
60+
(name = "/foo/../bar/../../../../////abc123", esModule = "export default 1;"),
61+
62+
# It should be possible to have a module whose name is a valid URL.
63+
(name = "https://example.com/mod", esModule = "export default 'example';"),
64+
65+
# Percent-encoded characters in the path should be normalized, and absolute
66+
# file URLs are accepted as long as they are under the bundle base URL.
67+
(name = "file:///bundl%65/should/b%65/ok", esModule = "export default 1;"),
68+
69+
# Unicode characters in the path should be handled as UTF-8 and supported.
70+
(name = "部品", esModule = "export default 1;"),
4571
],
4672
compatibilityDate = "2025-05-01",
4773
compatibilityFlags = [

src/workerd/io/worker-modules.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ static kj::Arc<jsg::modules::ModuleRegistry> newWorkerModuleRegistry(
104104
const CompatibilityFlags::Reader& featureFlags,
105105
const jsg::Url& bundleBase,
106106
auto setupForApi,
107-
jsg::modules::ModuleRegistry::Builder::Options options) {
107+
jsg::modules::ModuleRegistry::Builder::Options options =
108+
jsg::modules::ModuleRegistry::Builder::Options::NONE) {
108109
jsg::modules::ModuleRegistry::Builder builder(resolveObserver, bundleBase, options);
109110

110111
// This callback is used when a module is being loaded to arrange evaluating the
@@ -206,13 +207,14 @@ static kj::Arc<jsg::modules::ModuleRegistry> newWorkerModuleRegistry(
206207
break;
207208
}
208209
KJ_CASE_ONEOF(content, Worker::Script::PythonModule) {
209-
KJ_REQUIRE(featureFlags.getPythonWorkers(),
210-
"The python_workers compatibility flag is required to use Python.");
211-
firstEsm = false;
212-
hasPythonModules = true;
213-
kj::StringPtr entry = PYTHON_ENTRYPOINT;
214-
bundleBuilder.addEsmModule(def.name, entry);
215-
break;
210+
KJ_FAIL_ASSERT("Python modules are not currently supported with the new module registry");
211+
// KJ_REQUIRE(featureFlags.getPythonWorkers(),
212+
// "The python_workers compatibility flag is required to use Python.");
213+
// firstEsm = false;
214+
// hasPythonModules = true;
215+
// kj::StringPtr entry = PYTHON_ENTRYPOINT;
216+
// bundleBuilder.addEsmModule(def.name, entry);
217+
// break;
216218
}
217219
KJ_CASE_ONEOF(content, Worker::Script::PythonRequirement) {
218220
// Handled separately

src/workerd/jsg/modules-new.c++

Lines changed: 114 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,22 @@ kj::Maybe<const Module&> checkModule(const ResolveContext& context, const Module
2525
return module;
2626
};
2727

28+
kj::String specifierToString(jsg::Lock& js, v8::Local<v8::String> spec) {
29+
// Source files in workers end up being converted to UTF-8 bytes, so if the specifier
30+
// string contains non-ASCII unicode characters, those will be directly encoded as UTF-8
31+
// bytes, which unfortunately end up double-encoded if we try to read them using the
32+
// regular js.toString() method. Doh! Fortunately they come through as one-byte strings,
33+
// so we can detect that case and handle those correctly here.
34+
if (spec->ContainsOnlyOneByte()) {
35+
auto buf = kj::heapArray<char>(spec->Length() + 1);
36+
spec->WriteOneByteV2(js.v8Isolate, 0, spec->Length(), buf.asBytes().begin(),
37+
v8::String::WriteFlags::kNullTerminate);
38+
KJ_ASSERT(buf[buf.size() - 1] == '\0');
39+
return kj::String(kj::mv(buf));
40+
}
41+
return js.toString(spec);
42+
}
43+
2844
// Ensure that the given module has been instantiated or errored.
2945
// If false is returned, then an exception should have been scheduled
3046
// on the isolate.
@@ -746,7 +762,7 @@ v8::MaybeLocal<v8::Promise> dynamicImportModuleCallback(v8::Local<v8::Context> c
746762
auto& registry = IsolateModuleRegistry::from(js.v8Isolate);
747763
try {
748764
return js.tryCatch([&]() -> v8::MaybeLocal<v8::Promise> {
749-
auto spec = js.toString(specifier);
765+
auto spec = specifierToString(js, specifier);
750766

751767
// The proposed specification for import attributes strongly recommends that
752768
// embedders reject import attributes and types they do not understand/implement.
@@ -862,7 +878,7 @@ v8::MaybeLocal<std::conditional_t<IsSourcePhase, v8::Object, v8::Module>> resolv
862878
auto& registry = IsolateModuleRegistry::from(js.v8Isolate);
863879

864880
return js.tryCatch([&]() -> v8::MaybeLocal<ReturnType> {
865-
auto spec = kj::str(specifier);
881+
auto spec = specifierToString(js, specifier);
866882

867883
// The proposed specification for import attributes strongly recommends that
868884
// embedders reject import attributes and types they do not understand/implement.
@@ -1201,11 +1217,100 @@ ModuleBundle::BundleBuilder::BundleBuilder(const jsg::Url& bundleBase)
12011217
: ModuleBundle::Builder(Type::BUNDLE),
12021218
bundleBase(bundleBase) {}
12031219

1220+
namespace {
1221+
static constexpr auto BUNDLE_CLONE_OPTIONS = jsg::Url::EquivalenceOption::IGNORE_FRAGMENTS |
1222+
jsg::Url::EquivalenceOption::IGNORE_SEARCH | jsg::Url::EquivalenceOption::NORMALIZE_PATH;
1223+
1224+
// Takes the user-provided module name and normalizes it to a form that can be
1225+
// resolved relative to the bundle base. This involves pre-parsing the name as a URL
1226+
// relative to a dummy base URL in order to normalize out dot and double-dot segments,
1227+
// then stripping off any leading slashes so that the name is always relative and cannot
1228+
// be interpreted as an absolute path.
1229+
jsg::Url normalizeModuleName(kj::StringPtr name, const jsg::Url& base) {
1230+
// This first step normalizes out path segments like "." and "..", drops query
1231+
// strings and fragments, and normalizes percent-encoding in the path.
1232+
auto url = KJ_ASSERT_NONNULL(base.tryResolve(name)).clone(BUNDLE_CLONE_OPTIONS);
1233+
1234+
// If the protocol is not file:, then we don't need to do any more processing
1235+
// here. We will check the validity of the result as a module URL in the next
1236+
// step.
1237+
if (url.getProtocol() != "file:"_kj) {
1238+
return kj::mv(url);
1239+
}
1240+
1241+
auto urlPath = url.getPathname();
1242+
auto basePath = base.getPathname();
1243+
1244+
// The url path must not be identical to the base...
1245+
KJ_REQUIRE(urlPath != basePath, "Invalid empty module name");
1246+
1247+
// If the url path starts with the base path, then we're good!
1248+
if (urlPath.startsWith(basePath)) {
1249+
return kj::mv(url);
1250+
}
1251+
1252+
// Otherwise, let's make sure that the url path is processed as
1253+
// relative to the base path. We do this by stripping off any
1254+
// leading slashes from the front of the URL then re-resolve
1255+
// against the base. This should be an exceedingly rare edge
1256+
// case if the worker bundle is being constructed properly. It's
1257+
// meant only to handle cases where silliness like "///foo" is
1258+
// given as a module name.
1259+
while (urlPath.startsWith("/"_kj) && urlPath.size() > 0) {
1260+
urlPath = urlPath.slice(1);
1261+
}
1262+
KJ_REQUIRE(urlPath.size() > 0, "Invalid empty module name");
1263+
1264+
return KJ_ASSERT_NONNULL(base.tryResolve(urlPath));
1265+
}
1266+
1267+
bool isValidBundleModuleUrl(const jsg::Url& url, const jsg::Url& base) {
1268+
KJ_DASSERT(base.getProtocol() == "file:"_kj);
1269+
KJ_DASSERT(base.getPathname().endsWith("/"_kj));
1270+
1271+
// Let's forbid users from using cloudflare: and workerd: URLs in bundles so that
1272+
// we can protect those namespaces for our own future use. Specifically, these
1273+
// should only be used by the runtime to refer to built-in modules. We don't
1274+
// restrict other non-standard protocols like node:
1275+
KJ_REQUIRE(url.getProtocol() != "cloudflare:"_kj,
1276+
"The cloudflare: protocol is reserved and cannot be used in module bundles");
1277+
KJ_REQUIRE(url.getProtocol() != "workerd:"_kj,
1278+
"The workerd: protocol is reserved and cannot be used in module bundles");
1279+
1280+
if (url.getProtocol() != "file:"_kj) {
1281+
// Different protocols are always OK
1282+
return true;
1283+
}
1284+
1285+
// Module file: URLs must not have a host component.
1286+
// We already know the protocol is "file:" here because of the check above.
1287+
if (url.getHost() != ""_kj) {
1288+
return false;
1289+
}
1290+
1291+
// Check if url is subordinate to the base.
1292+
// This means url's path should start with base's path as a prefix
1293+
auto aPath = url.getPathname();
1294+
auto bPath = base.getPathname();
1295+
1296+
return aPath.startsWith(bPath);
1297+
}
1298+
1299+
// Converts the name given for a user-bundle module into a fully qualified module url.
1300+
// This involves normalizing the name such that it is relative to the bundle base, removes
1301+
// any query parameters or fragments, removes dot and double-dot path segments, normalizes
1302+
// percent-encoding, and otherwise validates that the resulting URL is a valid URL.
1303+
const jsg::Url processModuleName(kj::StringPtr name, const jsg::Url& base) {
1304+
auto url = normalizeModuleName(name, base);
1305+
KJ_REQUIRE(isValidBundleModuleUrl(url, base), "Invalid module name: ", name);
1306+
return url;
1307+
}
1308+
1309+
} // namespace
1310+
12041311
ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::addSyntheticModule(
12051312
kj::StringPtr name, EvaluateCallback callback, kj::Array<kj::String> namedExports) {
1206-
auto url = KJ_ASSERT_NONNULL(bundleBase.tryResolve(name));
1207-
// Make sure that percent-encoding in the path is normalized so we can match correctly.
1208-
url = url.clone(Url::EquivalenceOption::NORMALIZE_PATH);
1313+
const auto url = processModuleName(name, bundleBase);
12091314
add(url,
12101315
[url = url.clone(), callback = kj::mv(callback), namedExports = kj::mv(namedExports),
12111316
type = type()](const ResolveContext& context) mutable
@@ -1219,9 +1324,7 @@ ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::addSyntheticModule(
12191324

12201325
ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::addEsmModule(
12211326
kj::StringPtr name, kj::ArrayPtr<const char> source, Module::Flags flags) {
1222-
auto url = KJ_ASSERT_NONNULL(bundleBase.tryResolve(name));
1223-
// Make sure that percent-encoding in the path is normalized so we can match correctly.
1224-
url = url.clone(Url::EquivalenceOption::NORMALIZE_PATH);
1327+
const auto url = processModuleName(name, bundleBase);
12251328
add(url,
12261329
[url = url.clone(), source, flags, type = type()](const ResolveContext& context) mutable
12271330
-> kj::Maybe<kj::OneOf<kj::String, kj::Own<Module>>> {
@@ -1233,10 +1336,8 @@ ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::addEsmModule(
12331336

12341337
ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::addWasmModule(
12351338
kj::StringPtr name, kj::ArrayPtr<const kj::byte> data) {
1339+
const auto url = processModuleName(name, bundleBase);
12361340
auto callback = jsg::modules::Module::newWasmModuleHandler(data);
1237-
auto url = KJ_ASSERT_NONNULL(bundleBase.tryResolve(name));
1238-
// Make sure that percent-encoding in the path is normalized so we can match correctly.
1239-
url = url.clone(Url::EquivalenceOption::NORMALIZE_PATH);
12401341
add(url,
12411342
[url = url.clone(), callback = kj::mv(callback), type = type()](
12421343
const ResolveContext& context) mutable
@@ -1250,8 +1351,8 @@ ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::addWasmModule(
12501351

12511352
ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::alias(
12521353
kj::StringPtr alias, kj::StringPtr name) {
1253-
auto aliasUrl = KJ_ASSERT_NONNULL(bundleBase.tryResolve(alias));
1254-
auto id = KJ_ASSERT_NONNULL(bundleBase.tryResolve(name));
1354+
const auto id = processModuleName(name, bundleBase);
1355+
const auto aliasUrl = processModuleName(alias, bundleBase);
12551356
Builder::alias(aliasUrl, id);
12561357
return *this;
12571358
}

src/workerd/server/server.c++

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4255,8 +4255,19 @@ kj::Promise<kj::Own<Server::WorkerService>> Server::makeWorkerImpl(kj::StringPtr
42554255
// config. So for now this just uses the defaults.
42564256
auto workerFs = newWorkerFileSystem(kj::heap<FsMap>(), getBundleDirectory(def.source));
42574257

4258+
// TODO(soon): Either make python workers support the new module registry before
4259+
// NMR is defaulted on, or disable NMR by default when python workers are enabled.
4260+
// While NMR is experimental, we'll just throw an error if both are enabled.
4261+
if (def.featureFlags.getPythonWorkers()) {
4262+
KJ_REQUIRE(!def.featureFlags.getNewModuleRegistry(),
4263+
"Python workers do not currently support the new ModuleRegistry implementation. "
4264+
"Please disable the new ModuleRegistry feature flag to use Python workers.");
4265+
}
4266+
4267+
bool usingNewModuleRegistry = def.featureFlags.getNewModuleRegistry();
42584268
kj::Maybe<kj::Arc<jsg::modules::ModuleRegistry>> newModuleRegistry;
4259-
if (def.featureFlags.getNewModuleRegistry()) {
4269+
// TODO(soon): Python workers do not currently support the new module registry.
4270+
if (usingNewModuleRegistry) {
42604271
KJ_REQUIRE(experimental,
42614272
"The new ModuleRegistry implementation is an experimental feature. "
42624273
"You must run workerd with `--experimental` to use this feature.");
@@ -4297,8 +4308,8 @@ kj::Promise<kj::Own<Server::WorkerService>> Server::makeWorkerImpl(kj::StringPtr
42974308
inspectorPolicy = Worker::Isolate::InspectorPolicy::ALLOW_FULLY_TRUSTED;
42984309
}
42994310
Worker::LoggingOptions isolateLoggingOptions = loggingOptions;
4300-
isolateLoggingOptions.consoleMode = def.source.variant.is<WorkerSource::ScriptSource>() &&
4301-
!def.featureFlags.getNewModuleRegistry()
4311+
isolateLoggingOptions.consoleMode =
4312+
def.source.variant.is<WorkerSource::ScriptSource>() && !usingNewModuleRegistry
43024313
? Worker::ConsoleMode::INSPECTOR_ONLY
43034314
: loggingOptions.consoleMode;
43044315
auto isolate = kj::atomicRefcounted<Worker::Isolate>(kj::mv(api), kj::mv(observer), name,
@@ -4310,7 +4321,7 @@ kj::Promise<kj::Own<Server::WorkerService>> Server::makeWorkerImpl(kj::StringPtr
43104321
isolateRegistrar->registerIsolate(name, isolate.get());
43114322
}
43124323

4313-
if (!def.featureFlags.getNewModuleRegistry()) {
4324+
if (!usingNewModuleRegistry) {
43144325
KJ_IF_SOME(moduleFallback, def.moduleFallback) {
43154326
KJ_REQUIRE(experimental,
43164327
"The module fallback service is an experimental feature. "

0 commit comments

Comments
 (0)