Skip to content

Commit 4b73a12

Browse files
committed
Create .so and SONAME symlinks for driver libraries
This change ensures that .so and SONAME symlinks are created for driver libraries in the container. Signed-off-by: Evan Lezar <[email protected]>
1 parent 94ce80a commit 4b73a12

File tree

4 files changed

+290
-11
lines changed

4 files changed

+290
-11
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## v1.18.0-rc.2
44

5+
- Ensure that .so symlinks are created for driver libraries in the container
56
- Load settings from config.toml file during CDI generation
67
- Use securejoin to resolve /proc
78
- Refactor nvml CDI spec generation for consistency

internal/discover/symlinks.go

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,21 @@
1717
package discover
1818

1919
import (
20+
"debug/elf"
2021
"fmt"
2122
"path/filepath"
23+
"strings"
2224
)
2325

26+
type Symlink struct {
27+
target string
28+
link string
29+
}
30+
31+
func (s *Symlink) String() string {
32+
return fmt.Sprintf("%s::%s", s.target, s.link)
33+
}
34+
2435
type additionalSymlinks struct {
2536
Discover
2637
version string
@@ -60,7 +71,14 @@ func (d *additionalSymlinks) Hooks() ([]Hook, error) {
6071
}
6172
processedPaths[mount.Path] = true
6273

63-
for _, link := range d.getLinksForMount(mount.Path) {
74+
linksForMount := d.getLinksForMount(mount.Path)
75+
soSymlinks, err := d.getDotSoSymlinks(mount.HostPath)
76+
if err != nil {
77+
soSymlinks = nil
78+
}
79+
linksForMount = append(linksForMount, soSymlinks...)
80+
81+
for _, link := range linksForMount {
6482
if processedLinks[link] {
6583
continue
6684
}
@@ -110,3 +128,73 @@ func (d additionalSymlinks) isDriverLibrary(libraryName string, filename string)
110128
match, _ := filepath.Match(pattern, filename)
111129
return match
112130
}
131+
132+
func (d *additionalSymlinks) getDotSoSymlinks(libraryPath string) ([]string, error) {
133+
libraryDir, libraryName := filepath.Split(libraryPath)
134+
if !d.isDriverLibrary("*", libraryName) {
135+
return nil, nil
136+
}
137+
138+
soname, err := getSoname(libraryPath)
139+
if err != nil {
140+
return nil, err
141+
}
142+
if soname == "" || soname == libraryName {
143+
return nil, nil
144+
}
145+
146+
var soSymlinks []string
147+
// Create the SONAME -> libraryName symlink.
148+
sonameLinkPath := filepath.Join(libraryDir, soname)
149+
s := Symlink{
150+
target: libraryName,
151+
link: sonameLinkPath,
152+
}
153+
soSymlinks = append(soSymlinks, s.String())
154+
155+
// Create the .so -> SONAME symlink
156+
soLink := getSoLink(soname)
157+
if soLink != "" && soLink != soname && soLink != libraryName {
158+
s := Symlink{
159+
target: filepath.Base(soname),
160+
link: filepath.Join(libraryDir, soLink),
161+
}
162+
soSymlinks = append(soSymlinks, s.String())
163+
}
164+
return soSymlinks, nil
165+
}
166+
167+
// getSoname returns the soname for the specified library path.
168+
// We use a function variable here to allow this to be overridden for testing.
169+
var getSoname = func(libraryPath string) (string, error) {
170+
lib, err := elf.Open(libraryPath)
171+
if err != nil {
172+
return "", err
173+
}
174+
defer lib.Close()
175+
176+
sonames, err := lib.DynString(elf.DT_SONAME)
177+
if err != nil {
178+
return "", err
179+
}
180+
if len(sonames) == 0 {
181+
return "", nil
182+
}
183+
if len(sonames) != 1 {
184+
return "", fmt.Errorf("multiple SONAMEs detected for %v: %v", libraryPath, sonames)
185+
}
186+
return sonames[0], nil
187+
}
188+
189+
// getSoLink returns the filename for the .so symlink that should point to the
190+
// soname symlink for the specified library.
191+
func getSoLink(soname string) string {
192+
ext := filepath.Ext(soname)
193+
if ext == "" {
194+
return ""
195+
}
196+
if ext == ".so" {
197+
return soname
198+
}
199+
return getSoLink(strings.TrimSuffix(soname, ext))
200+
}

internal/discover/symlinks_test.go

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,3 +334,118 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
334334
})
335335
}
336336
}
337+
338+
func TestGetDotSoSymlinks(t *testing.T) {
339+
testCases := []struct {
340+
description string
341+
libraryPath string
342+
getSonameFunc func(string) (string, error)
343+
expectedError error
344+
expectedSymlinks []string
345+
}{
346+
{
347+
description: "libcuda.soname links",
348+
libraryPath: "/usr/lib/libcuda.so.999.88.77",
349+
getSonameFunc: func(s string) (string, error) {
350+
return "libcuda.so.1", nil
351+
},
352+
expectedError: nil,
353+
expectedSymlinks: []string{
354+
"libcuda.so.999.88.77::/usr/lib/libcuda.so.1",
355+
"libcuda.so.1::/usr/lib/libcuda.so",
356+
},
357+
},
358+
{
359+
description: "empty soname is ignored",
360+
libraryPath: "/usr/lib/libcuda.so.999.88.77",
361+
getSonameFunc: func(s string) (string, error) {
362+
return "", nil
363+
},
364+
expectedError: nil,
365+
expectedSymlinks: nil,
366+
},
367+
{
368+
description: "equal soname is ignored",
369+
libraryPath: "/usr/lib/libcuda.so.999.88.77",
370+
getSonameFunc: func(s string) (string, error) {
371+
return "libcuda.so.999.88.77", nil
372+
},
373+
expectedError: nil,
374+
expectedSymlinks: nil,
375+
},
376+
}
377+
378+
for _, tc := range testCases {
379+
t.Run(tc.description, func(t *testing.T) {
380+
defer setGetSoname(tc.getSonameFunc)()
381+
382+
sut := &additionalSymlinks{version: "*.*"}
383+
symlinks, err := sut.getDotSoSymlinks(tc.libraryPath)
384+
385+
if tc.expectedError == nil {
386+
require.NoError(t, err)
387+
} else {
388+
require.EqualError(t, err, tc.expectedError.Error())
389+
}
390+
391+
require.EqualValues(t, tc.expectedSymlinks, symlinks)
392+
})
393+
}
394+
}
395+
396+
func TestGetSoLink(t *testing.T) {
397+
testCases := []struct {
398+
description string
399+
input string
400+
expectedSoLink string
401+
}{
402+
{
403+
description: "empty string",
404+
input: "",
405+
expectedSoLink: "",
406+
},
407+
{
408+
description: "cuda driver library",
409+
input: "libcuda.so.999.88.77",
410+
expectedSoLink: "libcuda.so",
411+
},
412+
{
413+
description: "beta cuda driver library",
414+
input: "libcuda.so.999.88",
415+
expectedSoLink: "libcuda.so",
416+
},
417+
{
418+
description: "no .so in libname",
419+
input: "foo.bar.baz",
420+
expectedSoLink: "",
421+
},
422+
{
423+
description: "multiple .so in libname",
424+
input: "foo.so.so.566",
425+
expectedSoLink: "foo.so.so",
426+
},
427+
{
428+
description: "no suffix after so",
429+
input: "foo.so",
430+
expectedSoLink: "foo.so",
431+
},
432+
}
433+
434+
for _, tc := range testCases {
435+
t.Run(tc.description, func(t *testing.T) {
436+
437+
soLink := getSoLink(tc.input)
438+
439+
require.Equal(t, tc.expectedSoLink, soLink)
440+
})
441+
}
442+
}
443+
444+
func setGetSoname(override func(string) (string, error)) func() {
445+
original := getSoname
446+
getSoname = override
447+
448+
return func() {
449+
getSoname = original
450+
}
451+
}

tests/e2e/nvidia-container-toolkit_test.go

Lines changed: 85 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ import (
2929
// Integration tests for Docker runtime
3030
var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
3131
var runner Runner
32+
var hostDriverVersion string
33+
var hostDriverMajor string
3234

3335
// Install the NVIDIA Container Toolkit
3436
BeforeAll(func(ctx context.Context) {
@@ -50,6 +52,15 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
5052
err = installer.Install()
5153
Expect(err).ToNot(HaveOccurred())
5254
}
55+
56+
driverOutput, _, err := runner.Run("nvidia-smi -q | grep \"Driver Version\"")
57+
Expect(err).ToNot(HaveOccurred())
58+
parts := strings.SplitN(driverOutput, ":", 2)
59+
Expect(parts).To(HaveLen(2))
60+
61+
hostDriverVersion = strings.TrimSpace(parts[1])
62+
Expect(hostDriverVersion).ToNot(BeEmpty())
63+
hostDriverMajor = strings.SplitN(hostDriverVersion, ".", 2)[0]
5364
})
5465

5566
// GPUs are accessible in a container: Running nvidia-smi -L inside the
@@ -184,16 +195,7 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
184195
compatDriverVersion := strings.TrimPrefix(filepath.Base(compatOutput), "libcuda.so.")
185196
compatMajor := strings.SplitN(compatDriverVersion, ".", 2)[0]
186197

187-
driverOutput, _, err := runner.Run("nvidia-smi -q | grep \"Driver Version\"")
188-
Expect(err).ToNot(HaveOccurred())
189-
parts := strings.SplitN(driverOutput, ":", 2)
190-
Expect(parts).To(HaveLen(2))
191-
192-
hostDriverVersion := strings.TrimSpace(parts[1])
193-
Expect(hostDriverVersion).ToNot(BeEmpty())
194-
driverMajor := strings.SplitN(hostDriverVersion, ".", 2)[0]
195-
196-
if driverMajor >= compatMajor {
198+
if hostDriverMajor >= compatMajor {
197199
GinkgoLogr.Info("CUDA Forward Compatibility tests require an older driver version", "hostDriverVersion", hostDriverVersion, "compatDriverVersion", compatDriverVersion)
198200
Skip("CUDA Forward Compatibility tests require an older driver version")
199201
}
@@ -241,6 +243,8 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
241243
BeforeAll(func(ctx context.Context) {
242244
_, _, err := runner.Run("docker pull ubuntu")
243245
Expect(err).ToNot(HaveOccurred())
246+
_, _, err = runner.Run("docker pull busybox")
247+
Expect(err).ToNot(HaveOccurred())
244248
})
245249

246250
It("should include libcuda.so in the ldcache", func(ctx context.Context) {
@@ -257,6 +261,77 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
257261

258262
Expect(libs).To(ContainElements([]string{"libcuda.so", "libcuda.so.1"}))
259263
})
264+
265+
It("should include .so and SONAME symlinks", func(ctx context.Context) {
266+
symlinkOutput, _, err := runner.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all busybox ls -l /usr/lib/x86_64-linux-gnu/ | awk '{print $9, $11}'")
267+
Expect(err).ToNot(HaveOccurred())
268+
Expect(symlinkOutput).ToNot(BeEmpty())
269+
270+
// This produces output similar to:
271+
// We check this to ensure that we have valid driver library symlink
272+
// chains.
273+
// libcuda.so libcuda.so.1
274+
// libcuda.so.1 libcuda.so.570.133.20
275+
// libcuda.so.570.133.20
276+
// libcudadebugger.so libcudadebugger.so.1
277+
// libcudadebugger.so.1 libcudadebugger.so.570.133.20
278+
// libcudadebugger.so.570.133.20
279+
// libnvidia-ml.so libnvidia-ml.so.1
280+
// libnvidia-ml.so.1 libnvidia-ml.so.570.133.20
281+
// libnvidia-ml.so.570.133.20
282+
// libnvidia-nvvm.so libnvidia-nvvm.so.4
283+
// libnvidia-nvvm.so.4 libnvidia-nvvm.so.570.133.20
284+
// libnvidia-nvvm.so.570.133.20
285+
// libnvidia-opencl.so libnvidia-opencl.so.1
286+
// libnvidia-opencl.so.1 libnvidia-opencl.so.570.133.20
287+
// libnvidia-opencl.so.570.133.20
288+
// libnvidia-pkcs11-openssl3.so.570.133.20
289+
// libnvidia-pkcs11.so.570.133.20
290+
// libnvidia-ptxjitcompiler.so libnvidia-ptxjitcompiler.so.1
291+
// libnvidia-ptxjitcompiler.so.1 libnvidia-ptxjitcompiler.so.570.133.20
292+
// libnvidia-ptxjitcompiler.so.570.133.20
293+
294+
symlinkOutputLines := strings.Split(symlinkOutput, "\n")
295+
var symlinks []string
296+
var chain []string
297+
for _, line := range symlinkOutputLines {
298+
if strings.TrimSpace(line) == "" {
299+
continue
300+
}
301+
parts := strings.SplitN(line, " ", 2)
302+
chain = append(chain, parts...)
303+
if len(parts) == 1 {
304+
Expect(line).To(HaveSuffix(hostDriverMajor))
305+
Expect(chain).To(Or(HaveLen(5), HaveLen(1)))
306+
307+
if len(chain) == 1 {
308+
continue
309+
}
310+
311+
// The symlink chains have the pattern:
312+
// [A, A.1, A.1, A.driverVersion, A.driverVersion]
313+
// A has the suffix .so.
314+
Expect(chain[0]).To(HaveSuffix(".so"))
315+
for i, c := range chain {
316+
if i == 0 {
317+
continue
318+
}
319+
Expect(c).To(HavePrefix(chain[0]))
320+
if i == 2 || i == 4 {
321+
Expect(c).To(Equal(chain[i-1]))
322+
}
323+
if i == 3 {
324+
Expect(c).To(HaveSuffix(hostDriverVersion))
325+
}
326+
}
327+
328+
symlinks = append(symlinks, chain[0])
329+
chain = nil
330+
}
331+
}
332+
Expect(chain).To(BeNil())
333+
Expect(symlinks).To(ContainElements("libcuda.so", "libnvidia-ml.so"))
334+
})
260335
})
261336

262337
When("Running containers with shared mount propagation", Ordered, func() {

0 commit comments

Comments
 (0)