Skip to content

Commit a2d05c6

Browse files
committed
Mount NVIDIA firmware directory if present
This change mounts the NVIDIA firmware directory into the container if present on the host. Starting with A100 devices the firmware from /lib/firmware/nvidia/<driver_version> is loaded when a device is initialised. This means that this folder is required in the container when persistence mode is not enabled on the host. Signed-off-by: Evan Lezar <[email protected]>
1 parent 5decca3 commit a2d05c6

File tree

4 files changed

+74
-4
lines changed

4 files changed

+74
-4
lines changed

src/nvc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ struct nvc_driver_info {
5959
size_t nipcs;
6060
struct nvc_device_node *devs;
6161
size_t ndevs;
62+
char **dirs;
63+
size_t ndirs;
6264
};
6365

6466
struct nvc_mig_device {

src/nvc_info.c

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,10 @@ static int find_library_paths(struct error *, struct dxcore_context *, struct nv
4747
static int find_binary_paths(struct error *, struct dxcore_context*, struct nvc_driver_info *, const char *, const char * const [], size_t);
4848
static int find_device_node(struct error *, const char *, const char *, struct nvc_device_node *);
4949
static int find_ipc_path(struct error *, const char *, const char *, char **);
50+
static int lookup_paths(struct error *, struct dxcore_context *, struct nvc_driver_info *, const char *, int32_t, const char *);
5051
static int lookup_libraries(struct error *, struct dxcore_context *, struct nvc_driver_info *, const char *, int32_t, const char *);
5152
static int lookup_binaries(struct error *, struct dxcore_context *, struct nvc_driver_info *, const char *, int32_t);
53+
static int lookup_directories(struct error *, struct dxcore_context *, struct nvc_driver_info *, const char *, int32_t);
5254
static int lookup_devices(struct error *, const struct nvc_context *, struct nvc_driver_info *, const char *, int32_t);
5355
static int lookup_ipcs(struct error *, struct nvc_driver_info *, const char *, int32_t);
5456
static int fill_mig_device_info(struct nvc_context *, bool mig_enabled, struct driver_device *, struct nvc_device *);
@@ -350,6 +352,27 @@ find_ipc_path(struct error *err, const char *root, const char *ipc, char **buf)
350352
return (0);
351353
}
352354

355+
static int
356+
lookup_paths(struct error *err, struct dxcore_context *dxcore, struct nvc_driver_info *info, const char *root, int32_t flags, const char *ldcache)
357+
{
358+
if (lookup_libraries(err, dxcore, info, root, flags, ldcache) < 0) {
359+
log_err("error looking up libraries");
360+
return (-1);
361+
}
362+
363+
if (lookup_binaries(err, dxcore, info, root, flags) < 0) {
364+
log_err("error looking up binaries");
365+
return (-1);
366+
}
367+
368+
if (lookup_directories(err, dxcore, info, root, flags) < 0) {
369+
log_err("error looking up additional paths");
370+
return (-1);
371+
}
372+
373+
return (0);
374+
}
375+
353376
static int
354377
lookup_libraries(struct error *err, struct dxcore_context *dxcore, struct nvc_driver_info *info, const char *root, int32_t flags, const char *ldcache)
355378
{
@@ -406,6 +429,38 @@ lookup_binaries(struct error *err, struct dxcore_context* dxcore, struct nvc_dri
406429
return (0);
407430
}
408431

432+
static int
433+
lookup_directories(struct error *err, struct dxcore_context *dxcore, struct nvc_driver_info *info, const char *root, int32_t flags) {
434+
int fd;
435+
char *firmware_path = NULL;
436+
437+
if (dxcore->initialized) {
438+
log_info("skipping path lookup for dxcore");
439+
return 0;
440+
}
441+
442+
// If the NVIDIA driver firmware path exists, include this in the mounted folders.
443+
if (xasprintf(err, &firmware_path, NV_FIRMWARE_DRIVER_PATH, info->nvrm_version) < 0) {
444+
log_errf("error constructing firmware path for %s", info->nvrm_version);
445+
return (-1);
446+
}
447+
if ((fd = xopen(err, firmware_path, O_PATH|O_DIRECTORY)) < 0) {
448+
log_infof("missing firmware path %s", firmware_path);
449+
return (0);
450+
}
451+
close(fd);
452+
453+
info->dirs = array_new(err, 1);
454+
if (info->dirs == NULL) {
455+
log_err("error creating path array");
456+
return (-1);
457+
}
458+
info->dirs[0] = firmware_path;
459+
info->ndirs = 1;
460+
461+
return (0);
462+
}
463+
409464
static int
410465
lookup_devices(struct error *err, const struct nvc_context *ctx, struct nvc_driver_info *info, const char *root, int32_t flags)
411466
{
@@ -762,9 +817,7 @@ nvc_driver_info_new(struct nvc_context *ctx, const char *opts)
762817
goto fail;
763818
if (driver_get_cuda_version(&ctx->drv, &info->cuda_version) < 0)
764819
goto fail;
765-
if (lookup_libraries(&ctx->err, &ctx->dxcore, info, ctx->cfg.root, flags, ctx->cfg.ldcache) < 0)
766-
goto fail;
767-
if (lookup_binaries(&ctx->err, &ctx->dxcore, info, ctx->cfg.root, flags) < 0)
820+
if (lookup_paths(&ctx->err, &ctx->dxcore, info, ctx->cfg.root, flags, ctx->cfg.ldcache) < 0)
768821
goto fail;
769822
if (lookup_devices(&ctx->err, ctx, info, ctx->cfg.root, flags) < 0)
770823
goto fail;

src/nvc_internal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@
6464

6565
#define CUDA_RUNTIME_DIR "/usr/local/cuda"
6666

67+
#define NV_FIRMWARE_PATH "/lib/firmware/nvidia"
68+
#define NV_FIRMWARE_DRIVER_PATH NV_FIRMWARE_PATH "/%s"
69+
6770
#define MSFT_DXG_DEVICE_PATH _PATH_DEV "dxg"
6871

6972
struct nvc_context {

src/nvc_mount.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -720,7 +720,7 @@ nvc_driver_mount(struct nvc_context *ctx, const struct nvc_container *cnt, const
720720
if (ns_enter(&ctx->err, cnt->mnt_ns, CLONE_NEWNS) < 0)
721721
return (-1);
722722

723-
nmnt = 2 + info->nbins + info->nlibs + cnt->nlibs + info->nlibs32 + info->nipcs + info->ndevs;
723+
nmnt = 2 + info->nbins + info->nlibs + cnt->nlibs + info->nlibs32 + info->nipcs + info->ndevs + info->ndirs;
724724
mnt = ptr = (const char **)array_new(&ctx->err, nmnt);
725725
if (mnt == NULL)
726726
goto fail;
@@ -778,6 +778,18 @@ nvc_driver_mount(struct nvc_context *ctx, const struct nvc_container *cnt, const
778778
free(libs);
779779
}
780780

781+
/* Directory mounts */
782+
for (size_t i = 0; i < info->ndirs; ++i) {
783+
if (str_has_prefix(NV_FIRMWARE_PATH, info->dirs[i])) {
784+
if (!(cnt->flags & OPT_UTILITY_LIBS))
785+
continue;
786+
}
787+
if ((*ptr++ = mount_directory(&ctx->err, ctx->cfg.root, cnt, info->dirs[i])) == NULL) {
788+
log_errf("error mounting directory %s", info->dirs[i]);
789+
goto fail;
790+
}
791+
}
792+
781793
/* IPC mounts */
782794
for (size_t i = 0; i < info->nipcs; ++i) {
783795
/* XXX Only utility libraries require persistenced or fabricmanager IPC, everything else is compute only. */

0 commit comments

Comments
 (0)