Skip to content

Commit dcb05d0

Browse files
authored
Merge pull request #1444 from elezar/fix-ldcache-update-on-non-debian
Fix ldcache update when host and container distributions do not match
2 parents b05f110 + 6592021 commit dcb05d0

File tree

2 files changed

+133
-23
lines changed

2 files changed

+133
-23
lines changed

internal/ldconfig/ldconfig.go

Lines changed: 114 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ const (
3737
// higher precedence than other libraries on the system, but lower than
3838
// the 00-cuda-compat that is included in some containers.
3939
ldsoconfdFilenamePattern = "00-nvcr-*.conf"
40+
// ldsoconfdSystemDirsFilenamePattern specifies the filename pattern for the drop-in conf file
41+
// that includes the expected system directories for the container.
42+
// This is chosen to have a high likelihood of being lexicographically last in
43+
// in the list of config files, since system search paths should be
44+
// considered last.
45+
ldsoconfdSystemDirsFilenamePattern = "zz-nvcr-*.conf"
4046
// defaultTopLevelLdsoconfFilePath is the standard location of the top-level ld.so.conf file.
4147
// Most container images based on a distro will have this file, but distroless container images
4248
// may not.
@@ -63,7 +69,7 @@ func NewRunner(id string, ldconfigPath string, containerRoot string, additionala
6369
"--ldconfig-path", strings.TrimPrefix(config.NormalizeLDConfigPath("@"+ldconfigPath), "@"),
6470
"--container-root", containerRoot,
6571
}
66-
if isDebian() {
72+
if isDebianLike() {
6773
args = append(args, "--is-debian-like-host")
6874
}
6975
args = append(args, additionalargs...)
@@ -76,25 +82,31 @@ func NewRunner(id string, ldconfigPath string, containerRoot string, additionala
7682
// This struct is used to perform operations on the ldcache and libraries in a
7783
// particular root (e.g. a container).
7884
//
79-
// args[0] is the reexec initializer function name
85+
// args[0] is the reexec initializer function name and is required.
86+
//
8087
// The following flags are required:
8188
//
8289
// --ldconfig-path=LDCONFIG_PATH the path to ldconfig on the host
8390
// --container-root=CONTAINER_ROOT the path in which ldconfig must be run
8491
//
8592
// The following flags are optional:
8693
//
87-
// --is-debian-like-host Indicates that the host system is debian-based.
94+
// --is-debian-like-host Indicates that the host system is debian-like (e.g. Debian, Ubuntu)
95+
// as opposed to non-Debian-like (e.g. RHEL, Fedora)
96+
// See https://github.com/NVIDIA/nvidia-container-toolkit/pull/1444
8897
//
8998
// The remaining args are folders where soname symlinks need to be created.
9099
func NewFromArgs(args ...string) (*Ldconfig, error) {
91100
if len(args) < 1 {
92101
return nil, fmt.Errorf("incorrect arguments: %v", args)
93102
}
94-
fs := flag.NewFlagSet(args[1], flag.ExitOnError)
103+
fs := flag.NewFlagSet("ldconfig-options", flag.ExitOnError)
95104
ldconfigPath := fs.String("ldconfig-path", "", "the path to ldconfig on the host")
96105
containerRoot := fs.String("container-root", "", "the path in which ldconfig must be run")
97-
isDebianLikeHost := fs.Bool("is-debian-like-host", false, "the hook is running from a Debian-like host")
106+
isDebianLikeHost := fs.Bool("is-debian-like-host", false, `indicates that the host system is debian-based.
107+
This allows us to handle the case where there are differences in behavior
108+
between the ldconfig from the host (as executed from an update-ldcache hook) and
109+
ldconfig in the container. Such differences include system search paths.`)
98110
if err := fs.Parse(args[1:]); err != nil {
99111
return nil, err
100112
}
@@ -122,10 +134,14 @@ func (l *Ldconfig) UpdateLDCache() error {
122134
}
123135

124136
// `prepareRoot` pivots to the container root, so can now set the container "debian-ness".
125-
l.isDebianLikeContainer = isDebian()
137+
l.isDebianLikeContainer = isDebianLike()
138+
139+
// Ensure that the top-level config file used specifies includes the
140+
// defaultLdsoconfDir drop-in config folder.
141+
if err := ensureLdsoconfFile(defaultTopLevelLdsoconfFilePath, defaultLdsoconfdDir); err != nil {
142+
return fmt.Errorf("failed to ensure ld.so.conf file: %w", err)
143+
}
126144

127-
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
128-
// be configured to use a different config file by default.
129145
filteredDirectories, err := l.filterDirectories(defaultTopLevelLdsoconfFilePath, l.directories...)
130146
if err != nil {
131147
return err
@@ -137,11 +153,20 @@ func (l *Ldconfig) UpdateLDCache() error {
137153
"-C", "/etc/ld.so.cache",
138154
}
139155

140-
if err := ensureLdsoconfFile(defaultTopLevelLdsoconfFilePath, defaultLdsoconfdDir); err != nil {
141-
return fmt.Errorf("failed to ensure ld.so.conf file: %w", err)
142-
}
143156
if err := createLdsoconfdFile(defaultLdsoconfdDir, ldsoconfdFilenamePattern, filteredDirectories...); err != nil {
144-
return fmt.Errorf("failed to create ld.so.conf.d drop-in file: %w", err)
157+
return fmt.Errorf("failed to write %s drop-in: %w", ldsoconfdFilenamePattern, err)
158+
}
159+
160+
// In most cases, the hook will be executing a host ldconfig that may be configured widely
161+
// differently from what the container image expects.
162+
// The common case is Debian-like (e.g. Debian, Ubuntu) vs non-Debian-like (e.g. RHEL, Fedora).
163+
// But there are also hosts that configure ldconfig to search in a glibc prefix
164+
// (e.g. /usr/lib/glibc). To avoid all these cases, write the container's expected system search
165+
// paths to a drop-in conf file that is likely to be last in lexicographic order. Entries in the
166+
// top-level ld.so.conf file may be processed after this drop-in, but this hook does not modify
167+
// the top-level file if it exists.
168+
if err := createLdsoconfdFile(defaultLdsoconfdDir, ldsoconfdSystemDirsFilenamePattern, l.getSystemSearchPaths()...); err != nil {
169+
return fmt.Errorf("failed to write %s drop-in: %w", ldsoconfdSystemDirsFilenamePattern, err)
145170
}
146171

147172
return SafeExec(ldconfigPath, args, nil)
@@ -188,6 +213,7 @@ func (l *Ldconfig) filterDirectories(configFilePath string, directories ...strin
188213
continue
189214
}
190215
filtered = append(filtered, d)
216+
ldconfigDirs[d] = struct{}{}
191217
}
192218
return filtered, nil
193219
}
@@ -323,30 +349,96 @@ func processLdsoconfFile(ldsoconfFilename string) ([]string, []string, error) {
323349
return directories, includedFilenames, nil
324350
}
325351

326-
func isDebian() bool {
352+
// isDebianLike returns true if a Debian-like distribution is detected.
353+
// Debian-like distributions include Debian and Ubuntu, whereas non-Debian-like
354+
// distributions include RHEL and Fedora.
355+
func isDebianLike() bool {
327356
info, err := os.Stat("/etc/debian_version")
328357
if err != nil {
329358
return false
330359
}
331360
return !info.IsDir()
332361
}
333362

334-
// nonDebianSystemSearchPaths returns the system search paths for non-Debian
335-
// systems.
363+
// nonDebianSystemSearchPaths returns the system search paths for non-Debian-like systems.
364+
// (note that Debian-like systems include Ubuntu systems)
365+
//
366+
// glibc ldconfig's calls `add_system_dir` with `SLIBDIR` and `LIBDIR` (if they are not equal). On
367+
// aarch64 and x86_64, `add_system_dir` is a macro that scans the provided path. If the path ends
368+
// with "/lib64" (or "/libx32", x86_64 only), it strips those suffixes. Then it registers the
369+
// resulting path. Then if the path ends with "/lib", it registers "path"+"64" (and "path"+"x32",
370+
// x86_64 only).
371+
//
372+
// By default, "LIBDIR" is "/usr/lib" and "SLIBDIR" is "/lib". Note that on modern distributions,
373+
// "/lib" is usually a symlink to "/usr/lib" and "/lib64" to "/usr/lib64". ldconfig resolves
374+
// symlinks and skips duplicate directory entries.
375+
//
376+
// To get the list of system paths, you can invoke the dynamic linker with `--list-diagnostics` and
377+
// look for "path.system_dirs". For example
378+
//
379+
// $ docker run --rm -ti fedora bash -c "uname -m;\$(find . | grep /ld-linux) --list-diagnostics | grep path.system_dirs"
380+
// x86_64
381+
// path.system_dirs[0x0]="/lib64/"
382+
// path.system_dirs[0x1]="/usr/lib64/"
383+
//
384+
// $ docker run --rm -ti redhat/ubi9 bash -c "uname -m;\$(find . | grep /ld-linux) --list-diagnostics | grep path.system_dirs"
385+
// x86_64
386+
// path.system_dirs[0x0]="/lib64/"
387+
// path.system_dirs[0x1]="/usr/lib64/"
336388
//
337-
// This list was taken from the output of:
389+
// On most distributions, including Fedora and derivatives, this yields the following
390+
// ldconfig system search paths.
338391
//
339-
// docker run --rm -ti redhat/ubi9 /usr/lib/ld-linux-aarch64.so.1 --help | grep -A6 "Shared library search path"
392+
// TODO: Add other architectures that have custom `add_system_dir` macros (e.g. riscv)
393+
// TODO: Replace with executing the container's dynamlic linker with `--list-diagnostics`?
340394
func nonDebianSystemSearchPaths() []string {
341-
return []string{"/lib64", "/usr/lib64"}
395+
var paths []string
396+
paths = append(paths, "/lib", "/usr/lib")
397+
switch runtime.GOARCH {
398+
case "amd64":
399+
paths = append(paths,
400+
"/lib64",
401+
"/usr/lib64",
402+
"/libx32",
403+
"/usr/libx32",
404+
)
405+
case "arm64":
406+
paths = append(paths,
407+
"/lib64",
408+
"/usr/lib64",
409+
)
410+
}
411+
return paths
342412
}
343413

344-
// debianSystemSearchPaths returns the system search paths for Debian-like
345-
// systems.
414+
// debianSystemSearchPaths returns the system search paths for Debian-like systems.
415+
// (note that Debian-like systems include Ubuntu systems)
416+
//
417+
// Debian (and derivatives) apply their multi-arch patch to glibc, which modifies ldconfig to
418+
// use the same set of system paths as the dynamic linker. These paths are going to include the
419+
// multi-arch directory _and_ by default "/lib" and "/usr/lib" for compatibility.
346420
//
347-
// This list was taken from the output of:
421+
// To get the list of system paths, you can invoke the dynamic linker with `--list-diagnostics` and
422+
// look for "path.system_dirs". For example
348423
//
349-
// docker run --rm -ti ubuntu /usr/lib/aarch64-linux-gnu/ld-linux-aarch64.so.1 --help | grep -A6 "Shared library search path"
424+
// $ docker run --rm -ti ubuntu bash -c "uname -m;\$(find . | grep /ld-linux | head -1) --list-diagnostics | grep path.system_dirs"
425+
// x86_64
426+
// path.system_dirs[0x0]="/lib/x86_64-linux-gnu/"
427+
// path.system_dirs[0x1]="/usr/lib/x86_64-linux-gnu/"
428+
// path.system_dirs[0x2]="/lib/"
429+
// path.system_dirs[0x3]="/usr/lib/"
430+
//
431+
// $ docker run --rm -ti debian bash -c "uname -m;\$(find . | grep /ld-linux | head -1) --list-diagnostics | grep path.system_dirs"
432+
// x86_64
433+
// path.system_dirs[0x0]="/lib/x86_64-linux-gnu/"
434+
// path.system_dirs[0x1]="/usr/lib/x86_64-linux-gnu/"
435+
// path.system_dirs[0x2]="/lib/"
436+
// path.system_dirs[0x3]="/usr/lib/"
437+
//
438+
// This yields the following ldconfig system search paths.
439+
//
440+
// TODO: Add other architectures that have custom `add_system_dir` macros (e.g. riscv)
441+
// TODO: Replace with executing the container's dynamlic linker with `--list-diagnostics`?
350442
func debianSystemSearchPaths() []string {
351443
var paths []string
352444
switch runtime.GOARCH {
@@ -362,6 +454,5 @@ func debianSystemSearchPaths() []string {
362454
)
363455
}
364456
paths = append(paths, "/lib", "/usr/lib")
365-
366457
return paths
367458
}

tests/e2e/nvidia-container-toolkit_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,4 +570,23 @@ EOF`)
570570
Expect(output).To(Equal(expectedOutput))
571571
})
572572
})
573+
574+
When("running a ubi9 container", Ordered, func() {
575+
var (
576+
expectedOutput string
577+
)
578+
BeforeAll(func(ctx context.Context) {
579+
_, _, err := runner.Run(`docker pull redhat/ubi9`)
580+
Expect(err).ToNot(HaveOccurred())
581+
582+
expectedOutput, _, err = runner.Run(`docker run --rm --runtime=runc redhat/ubi9 bash -c "ldconfig -p | grep libc.so."`)
583+
Expect(err).ToNot(HaveOccurred())
584+
})
585+
586+
It("should include the system libraries when using the nvidia-container-runtime", func(ctx context.Context) {
587+
output, _, err := runner.Run(`docker run --rm --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all redhat/ubi9 bash -c "ldconfig -p | grep libc.so."`)
588+
Expect(err).ToNot(HaveOccurred())
589+
Expect(output).To(Equal(expectedOutput))
590+
})
591+
})
573592
})

0 commit comments

Comments
 (0)