@@ -25,6 +25,22 @@ kj::Maybe<const Module&> checkModule(const ResolveContext& context, const Module
25
25
return module ;
26
26
};
27
27
28
+ kj::String specifierToString (jsg::Lock& js, v8::Local<v8::String> spec) {
29
+ // Source files in workers end up being converted to UTF-8 bytes, so if the specifier
30
+ // string contains non-ASCII unicode characters, those will be directly encoded as UTF-8
31
+ // bytes, which unfortunately end up double-encoded if we try to read them using the
32
+ // regular js.toString() method. Doh! Fortunately they come through as one-byte strings,
33
+ // so we can detect that case and handle those correctly here.
34
+ if (spec->ContainsOnlyOneByte ()) {
35
+ auto buf = kj::heapArray<char >(spec->Length () + 1 );
36
+ spec->WriteOneByteV2 (js.v8Isolate , 0 , spec->Length (), buf.asBytes ().begin (),
37
+ v8::String::WriteFlags::kNullTerminate );
38
+ KJ_ASSERT (buf[buf.size () - 1 ] == ' \0 ' );
39
+ return kj::String (kj::mv (buf));
40
+ }
41
+ return js.toString (spec);
42
+ }
43
+
28
44
// Ensure that the given module has been instantiated or errored.
29
45
// If false is returned, then an exception should have been scheduled
30
46
// on the isolate.
@@ -746,7 +762,7 @@ v8::MaybeLocal<v8::Promise> dynamicImportModuleCallback(v8::Local<v8::Context> c
746
762
auto & registry = IsolateModuleRegistry::from (js.v8Isolate );
747
763
try {
748
764
return js.tryCatch ([&]() -> v8::MaybeLocal<v8::Promise> {
749
- auto spec = js. toString ( specifier);
765
+ auto spec = specifierToString (js, specifier);
750
766
751
767
// The proposed specification for import attributes strongly recommends that
752
768
// embedders reject import attributes and types they do not understand/implement.
@@ -862,7 +878,7 @@ v8::MaybeLocal<std::conditional_t<IsSourcePhase, v8::Object, v8::Module>> resolv
862
878
auto & registry = IsolateModuleRegistry::from (js.v8Isolate );
863
879
864
880
return js.tryCatch ([&]() -> v8::MaybeLocal<ReturnType> {
865
- auto spec = kj::str ( specifier);
881
+ auto spec = specifierToString (js, specifier);
866
882
867
883
// The proposed specification for import attributes strongly recommends that
868
884
// embedders reject import attributes and types they do not understand/implement.
@@ -1201,11 +1217,100 @@ ModuleBundle::BundleBuilder::BundleBuilder(const jsg::Url& bundleBase)
1201
1217
: ModuleBundle::Builder(Type::BUNDLE),
1202
1218
bundleBase(bundleBase) {}
1203
1219
1220
+ namespace {
1221
+ static constexpr auto BUNDLE_CLONE_OPTIONS = jsg::Url::EquivalenceOption::IGNORE_FRAGMENTS |
1222
+ jsg::Url::EquivalenceOption::IGNORE_SEARCH | jsg::Url::EquivalenceOption::NORMALIZE_PATH;
1223
+
1224
+ // Takes the user-provided module name and normalizes it to a form that can be
1225
+ // resolved relative to the bundle base. This involves pre-parsing the name as a URL
1226
+ // relative to a dummy base URL in order to normalize out dot and double-dot segments,
1227
+ // then stripping off any leading slashes so that the name is always relative and cannot
1228
+ // be interpreted as an absolute path.
1229
+ jsg::Url normalizeModuleName (kj::StringPtr name, const jsg::Url& base) {
1230
+ // This first step normalizes out path segments like "." and "..", drops query
1231
+ // strings and fragments, and normalizes percent-encoding in the path.
1232
+ auto url = KJ_ASSERT_NONNULL (base.tryResolve (name)).clone (BUNDLE_CLONE_OPTIONS);
1233
+
1234
+ // If the protocol is not file:, then we don't need to do any more processing
1235
+ // here. We will check the validity of the result as a module URL in the next
1236
+ // step.
1237
+ if (url.getProtocol () != " file:" _kj) {
1238
+ return kj::mv (url);
1239
+ }
1240
+
1241
+ auto urlPath = url.getPathname ();
1242
+ auto basePath = base.getPathname ();
1243
+
1244
+ // The url path must not be identical to the base...
1245
+ KJ_REQUIRE (urlPath != basePath, " Invalid empty module name" );
1246
+
1247
+ // If the url path starts with the base path, then we're good!
1248
+ if (urlPath.startsWith (basePath)) {
1249
+ return kj::mv (url);
1250
+ }
1251
+
1252
+ // Otherwise, let's make sure that the url path is processed as
1253
+ // relative to the base path. We do this by stripping off any
1254
+ // leading slashes from the front of the URL then re-resolve
1255
+ // against the base. This should be an exceedingly rare edge
1256
+ // case if the worker bundle is being constructed properly. It's
1257
+ // meant only to handle cases where silliness like "///foo" is
1258
+ // given as a module name.
1259
+ while (urlPath.startsWith (" /" _kj) && urlPath.size () > 0 ) {
1260
+ urlPath = urlPath.slice (1 );
1261
+ }
1262
+ KJ_REQUIRE (urlPath.size () > 0 , " Invalid empty module name" );
1263
+
1264
+ return KJ_ASSERT_NONNULL (base.tryResolve (urlPath));
1265
+ }
1266
+
1267
+ bool isValidBundleModuleUrl (const jsg::Url& url, const jsg::Url& base) {
1268
+ KJ_DASSERT (base.getProtocol () == " file:" _kj);
1269
+ KJ_DASSERT (base.getPathname ().endsWith (" /" _kj));
1270
+
1271
+ // Let's forbid users from using cloudflare: and workerd: URLs in bundles so that
1272
+ // we can protect those namespaces for our own future use. Specifically, these
1273
+ // should only be used by the runtime to refer to built-in modules. We don't
1274
+ // restrict other non-standard protocols like node:
1275
+ KJ_REQUIRE (url.getProtocol () != " cloudflare:" _kj,
1276
+ " The cloudflare: protocol is reserved and cannot be used in module bundles" );
1277
+ KJ_REQUIRE (url.getProtocol () != " workerd:" _kj,
1278
+ " The workerd: protocol is reserved and cannot be used in module bundles" );
1279
+
1280
+ if (url.getProtocol () != " file:" _kj) {
1281
+ // Different protocols are always OK
1282
+ return true ;
1283
+ }
1284
+
1285
+ // Module file: URLs must not have a host component.
1286
+ // We already know the protocol is "file:" here because of the check above.
1287
+ if (url.getHost () != " " _kj) {
1288
+ return false ;
1289
+ }
1290
+
1291
+ // Check if url is subordinate to the base.
1292
+ // This means url's path should start with base's path as a prefix
1293
+ auto aPath = url.getPathname ();
1294
+ auto bPath = base.getPathname ();
1295
+
1296
+ return aPath.startsWith (bPath);
1297
+ }
1298
+
1299
+ // Converts the name given for a user-bundle module into a fully qualified module url.
1300
+ // This involves normalizing the name such that it is relative to the bundle base, removes
1301
+ // any query parameters or fragments, removes dot and double-dot path segments, normalizes
1302
+ // percent-encoding, and otherwise validates that the resulting URL is a valid URL.
1303
+ const jsg::Url processModuleName (kj::StringPtr name, const jsg::Url& base) {
1304
+ auto url = normalizeModuleName (name, base);
1305
+ KJ_REQUIRE (isValidBundleModuleUrl (url, base), " Invalid module name: " , name);
1306
+ return url;
1307
+ }
1308
+
1309
+ } // namespace
1310
+
1204
1311
ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::addSyntheticModule (
1205
1312
kj::StringPtr name, EvaluateCallback callback, kj::Array<kj::String> namedExports) {
1206
- auto url = KJ_ASSERT_NONNULL (bundleBase.tryResolve (name));
1207
- // Make sure that percent-encoding in the path is normalized so we can match correctly.
1208
- url = url.clone (Url::EquivalenceOption::NORMALIZE_PATH);
1313
+ const auto url = processModuleName (name, bundleBase);
1209
1314
add (url,
1210
1315
[url = url.clone (), callback = kj::mv (callback), namedExports = kj::mv (namedExports),
1211
1316
type = type ()](const ResolveContext& context) mutable
@@ -1219,9 +1324,7 @@ ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::addSyntheticModule(
1219
1324
1220
1325
ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::addEsmModule (
1221
1326
kj::StringPtr name, kj::ArrayPtr<const char > source, Module::Flags flags) {
1222
- auto url = KJ_ASSERT_NONNULL (bundleBase.tryResolve (name));
1223
- // Make sure that percent-encoding in the path is normalized so we can match correctly.
1224
- url = url.clone (Url::EquivalenceOption::NORMALIZE_PATH);
1327
+ const auto url = processModuleName (name, bundleBase);
1225
1328
add (url,
1226
1329
[url = url.clone (), source, flags, type = type ()](const ResolveContext& context) mutable
1227
1330
-> kj::Maybe<kj::OneOf<kj::String, kj::Own<Module>>> {
@@ -1233,10 +1336,8 @@ ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::addEsmModule(
1233
1336
1234
1337
ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::addWasmModule (
1235
1338
kj::StringPtr name, kj::ArrayPtr<const kj::byte> data) {
1339
+ const auto url = processModuleName (name, bundleBase);
1236
1340
auto callback = jsg::modules::Module::newWasmModuleHandler (data);
1237
- auto url = KJ_ASSERT_NONNULL (bundleBase.tryResolve (name));
1238
- // Make sure that percent-encoding in the path is normalized so we can match correctly.
1239
- url = url.clone (Url::EquivalenceOption::NORMALIZE_PATH);
1240
1341
add (url,
1241
1342
[url = url.clone (), callback = kj::mv (callback), type = type ()](
1242
1343
const ResolveContext& context) mutable
@@ -1250,8 +1351,8 @@ ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::addWasmModule(
1250
1351
1251
1352
ModuleBundle::BundleBuilder& ModuleBundle::BundleBuilder::alias (
1252
1353
kj::StringPtr alias, kj::StringPtr name) {
1253
- auto aliasUrl = KJ_ASSERT_NONNULL ( bundleBase. tryResolve (alias) );
1254
- auto id = KJ_ASSERT_NONNULL ( bundleBase. tryResolve (name) );
1354
+ const auto id = processModuleName (name, bundleBase);
1355
+ const auto aliasUrl = processModuleName (alias, bundleBase);
1255
1356
Builder::alias (aliasUrl, id);
1256
1357
return *this ;
1257
1358
}
0 commit comments