Skip to content

Commit d3dcb86

Browse files
committed
Fix update of ldcache for non-matching host and container distros
This change ensures that updating the ldcache in a container also includes the system search paths for the container ldconfig. In most cases, the hook will be executing a host ldconfig that may be configured widely differently from what the container image expects. The common case is Debian vs non-Debian. But there are also hosts that configure ldconfig to search in a glibc prefix (e.g. /usr/lib/glibc). To avoid all these cases, write the container's expected system search paths to a drop-in conf file that is likely to be last in lexicographic order. Entries in the top-level ld.so.conf file may be processed after this drop-in, but this hook does not modify the top-level file if it exists. Signed-off-by: Jean-Francois Roy <[email protected]> Signed-off-by: Evan Lezar <[email protected]>
1 parent e1075cb commit d3dcb86

File tree

2 files changed

+102
-19
lines changed

2 files changed

+102
-19
lines changed

internal/ldconfig/ldconfig.go

Lines changed: 83 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ const (
3737
// higher precedence than other libraries on the system, but lower than
3838
// the 00-cuda-compat that is included in some containers.
3939
ldsoconfdFilenamePattern = "00-nvcr-*.conf"
40+
// ldsoconfdSystemDirsFilenamePattern specifies the filename pattern for the drop-in conf file
41+
// that includes the expected system directories for the container.
42+
// This is chosen to have a high likelihood of being lexographically last in
43+
// in the list of config files, since system search paths should be
44+
// considered last.
45+
ldsoconfdSystemDirsFilenamePattern = "zz-nvcr-*.conf"
4046
// defaultTopLevelLdsoconfFilePath is the standard location of the top-level ld.so.conf file.
4147
// Most container images based on a distro will have this file, but distroless container images
4248
// may not.
@@ -76,7 +82,8 @@ func NewRunner(id string, ldconfigPath string, containerRoot string, additionala
7682
// This struct is used to perform operations on the ldcache and libraries in a
7783
// particular root (e.g. a container).
7884
//
79-
// args[0] is the reexec initializer function name
85+
// args[0] is the reexec initializer function name and is required.
86+
//
8087
// The following flags are required:
8188
//
8289
// --ldconfig-path=LDCONFIG_PATH the path to ldconfig on the host
@@ -85,16 +92,20 @@ func NewRunner(id string, ldconfigPath string, containerRoot string, additionala
8592
// The following flags are optional:
8693
//
8794
// --is-debian-like-host Indicates that the host system is debian-based.
95+
// See https://github.com/NVIDIA/nvidia-container-toolkit/pull/1444
8896
//
8997
// The remaining args are folders where soname symlinks need to be created.
9098
func NewFromArgs(args ...string) (*Ldconfig, error) {
9199
if len(args) < 1 {
92100
return nil, fmt.Errorf("incorrect arguments: %v", args)
93101
}
94-
fs := flag.NewFlagSet(args[1], flag.ExitOnError)
102+
fs := flag.NewFlagSet("ldconfig-options", flag.ExitOnError)
95103
ldconfigPath := fs.String("ldconfig-path", "", "the path to ldconfig on the host")
96104
containerRoot := fs.String("container-root", "", "the path in which ldconfig must be run")
97-
isDebianLikeHost := fs.Bool("is-debian-like-host", false, "the hook is running from a Debian-like host")
105+
isDebianLikeHost := fs.Bool("is-debian-like-host", false, `indicates that the host system is debian-based.
106+
This allows us to handle the case where there are differences in behavior
107+
between the ldconfig from the host (as executed from an update-ldcache hook) and
108+
ldconfig in the container. Such differences include system search paths.`)
98109
if err := fs.Parse(args[1:]); err != nil {
99110
return nil, err
100111
}
@@ -124,8 +135,12 @@ func (l *Ldconfig) UpdateLDCache() error {
124135
// `prepareRoot` pivots to the container root, so can now set the container "debian-ness".
125136
l.isDebianLikeContainer = isDebian()
126137

127-
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
128-
// be configured to use a different config file by default.
138+
// Ensure that the top-level config file used specifies includes the
139+
// defaultLdsoconfDir drop-in config folder.
140+
if err := ensureLdsoconfFile(defaultTopLevelLdsoconfFilePath, defaultLdsoconfdDir); err != nil {
141+
return fmt.Errorf("failed to ensure ld.so.conf file: %w", err)
142+
}
143+
129144
filteredDirectories, err := l.filterDirectories(defaultTopLevelLdsoconfFilePath, l.directories...)
130145
if err != nil {
131146
return err
@@ -137,11 +152,21 @@ func (l *Ldconfig) UpdateLDCache() error {
137152
"-C", "/etc/ld.so.cache",
138153
}
139154

140-
if err := ensureLdsoconfFile(defaultTopLevelLdsoconfFilePath, defaultLdsoconfdDir); err != nil {
141-
return fmt.Errorf("failed to ensure ld.so.conf file: %w", err)
142-
}
143155
if err := createLdsoconfdFile(defaultLdsoconfdDir, ldsoconfdFilenamePattern, filteredDirectories...); err != nil {
144-
return fmt.Errorf("failed to create ld.so.conf.d drop-in file: %w", err)
156+
return fmt.Errorf("failed to write %s drop-in: %w", ldsoconfdFilenamePattern, err)
157+
}
158+
159+
if l.isDebianLikeHost != l.isDebianLikeContainer {
160+
// In most cases, the hook will be executing a host ldconfig that may be configured widely
161+
// differently from what the container image expects. The common case is Debian vs non-Debian.
162+
// But there are also hosts that configure ldconfig to search in a glibc prefix
163+
// (e.g. /usr/lib/glibc). To avoid all these cases, write the container's expected system search
164+
// paths to a drop-in conf file that is likely to be last in lexicographic order. Entries in the
165+
// top-level ld.so.conf file may be processed after this drop-in, but this hook does not modify
166+
// the top-level file if it exists.
167+
if err := createLdsoconfdFile(defaultLdsoconfdDir, ldsoconfdSystemDirsFilenamePattern, l.getSystemSearchPaths()...); err != nil {
168+
return fmt.Errorf("failed to write %s drop-in: %w", ldsoconfdSystemDirsFilenamePattern, err)
169+
}
145170
}
146171

147172
return SafeExec(ldconfigPath, args, nil)
@@ -188,6 +213,7 @@ func (l *Ldconfig) filterDirectories(configFilePath string, directories ...strin
188213
continue
189214
}
190215
filtered = append(filtered, d)
216+
ldconfigDirs[d] = struct{}{}
191217
}
192218
return filtered, nil
193219
}
@@ -331,22 +357,61 @@ func isDebian() bool {
331357
return !info.IsDir()
332358
}
333359

334-
// nonDebianSystemSearchPaths returns the system search paths for non-Debian
335-
// systems.
360+
// nonDebianSystemSearchPaths returns the system search paths for non-Debian systems.
361+
//
362+
// glibc ldconfig's calls `add_system_dir` with `SLIBDIR` and `LIBDIR` (if they are not equal). On
363+
// aarch64 and x86_64, `add_system_dir` is a macro that scans the provided path. If the path ends
364+
// with "/lib64" (or "/libx32", x86_64 only), it strips those suffixes. Then it registers the
365+
// resulting path. Then if the path ends with "/lib", it registers "path"+"64" (and "path"+"x32",
366+
// x86_64 only).
336367
//
337-
// This list was taken from the output of:
368+
// By default, "LIBDIR" is "/usr/lib" and "SLIBDIR" is "/lib". Note that on modern distributions,
369+
// "/lib" is usually a symlink to "/usr/lib" and "/lib64" to "/usr/lib64". ldconfig resolves
370+
// symlinks and skips duplicate directory entries.
338371
//
339-
// docker run --rm -ti redhat/ubi9 /usr/lib/ld-linux-aarch64.so.1 --help | grep -A6 "Shared library search path"
372+
// To get the list of system paths, you can invoke the dynamic linker with `--list-diagnostics` and
373+
// look for "path.system_dirs". For example
374+
// `docker run --rm -ti fedora:latest /lib64/ld-linux-x86-64.so.2 --list-diagnostics | grep path.system_dirs`.
375+
//
376+
// On most distributions, including Fedora and derivatives, this yields the following
377+
// ldconfig system search paths.
378+
//
379+
// TODO: Add other architectures that have custom `add_system_dir` macros (e.g. riscv)
380+
// TODO: Replace with executing the container's dynamlic linker with `--list-diagnostics`?
340381
func nonDebianSystemSearchPaths() []string {
341-
return []string{"/lib64", "/usr/lib64"}
382+
var paths []string
383+
paths = append(paths, "/lib", "/usr/lib")
384+
switch runtime.GOARCH {
385+
case "amd64":
386+
paths = append(paths,
387+
"/lib/lib64",
388+
"/usr/lib64",
389+
"/libx32",
390+
"/usr/libx32",
391+
)
392+
case "arm64":
393+
paths = append(paths,
394+
"/lib/lib64",
395+
"/usr/lib64",
396+
)
397+
}
398+
return paths
342399
}
343400

344-
// debianSystemSearchPaths returns the system search paths for Debian-like
345-
// systems.
401+
// debianSystemSearchPaths returns the system search paths for Debian-like systems.
346402
//
347-
// This list was taken from the output of:
403+
// Debian (and derivatives) apply their multi-arch patch to glibc, which modifies ldconfig to
404+
// use the same set of system paths as the dynamic linker. These paths are going to include the
405+
// multi-arch directory _and_ by default "/lib" and "/usr/lib" for compatibility.
348406
//
349-
// docker run --rm -ti ubuntu /usr/lib/aarch64-linux-gnu/ld-linux-aarch64.so.1 --help | grep -A6 "Shared library search path"
407+
// To get the list of system paths, you can invoke the dynamic linker with `--list-diagnostics` and
408+
// look for "path.system_dirs". For example
409+
// `docker run --rm -ti ubuntu:latest /lib64/ld-linux-x86-64.so.2 --list-diagnostics | grep path.system_dirs`.
410+
//
411+
// This yields the following ldconfig system search paths.
412+
//
413+
// TODO: Add other architectures that have custom `add_system_dir` macros (e.g. riscv)
414+
// TODO: Replace with executing the container's dynamlic linker with `--list-diagnostics`?
350415
func debianSystemSearchPaths() []string {
351416
var paths []string
352417
switch runtime.GOARCH {
@@ -362,6 +427,5 @@ func debianSystemSearchPaths() []string {
362427
)
363428
}
364429
paths = append(paths, "/lib", "/usr/lib")
365-
366430
return paths
367431
}

tests/e2e/nvidia-container-toolkit_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,4 +570,23 @@ EOF`)
570570
Expect(output).To(Equal(expectedOutput))
571571
})
572572
})
573+
574+
When("running a ubi9 container", Ordered, func() {
575+
var (
576+
expectedOutput string
577+
)
578+
BeforeAll(func(ctx context.Context) {
579+
_, _, err := runner.Run(`docker pull redhat/ubi9`)
580+
Expect(err).ToNot(HaveOccurred())
581+
582+
expectedOutput, _, err = runner.Run(`docker run --rm --runtime=runc redhat/ubi9 bash -c "ldconfig -p | grep libc.so."`)
583+
Expect(err).ToNot(HaveOccurred())
584+
})
585+
586+
It("should include the system libraries when using the nvidia-container-runtime", func(ctx context.Context) {
587+
output, _, err := runner.Run(`docker run --rm --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all redhat/ubi9 bash -c "ldconfig -p | grep libc.so."`)
588+
Expect(err).ToNot(HaveOccurred())
589+
Expect(output).To(Equal(expectedOutput))
590+
})
591+
})
573592
})

0 commit comments

Comments
 (0)