Skip to content

Commit 01fe641

Browse files
committed
Create .so symlinks for driver libraries in container
This change adds an opt-in feature for creating .so symlinks to all injected driver files in a contianer. If features.dot-so-symlinks = true is set in the config.toml, the creation of symlinks for driver files is enabled. This can also be triggered on a per-container basis using the envvar NVIDIA_DOT_SO_SYMLINKS=enabled. Signed-off-by: Evan Lezar <[email protected]>
1 parent 3fa03d0 commit 01fe641

File tree

11 files changed

+253
-24
lines changed

11 files changed

+253
-24
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# NVIDIA Container Toolkit Changelog
22

3+
* Add a hook to create `.so` symlinks for driver libraries in a container.
4+
35
## v1.15.0-rc.4
46
* Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed.
57
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/**
2+
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
**/
16+
17+
package dotsosymlinks
18+
19+
import (
20+
"fmt"
21+
"os"
22+
"path/filepath"
23+
"strings"
24+
25+
"github.com/urfave/cli/v2"
26+
27+
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
28+
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
29+
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
30+
)
31+
32+
type command struct {
33+
logger logger.Interface
34+
}
35+
36+
type config struct {
37+
containerSpec string
38+
driverVersion string
39+
}
40+
41+
// NewCommand constructs a hook command with the specified logger
42+
func NewCommand(logger logger.Interface) *cli.Command {
43+
c := command{
44+
logger: logger,
45+
}
46+
return c.build()
47+
}
48+
49+
// build
50+
func (m command) build() *cli.Command {
51+
cfg := config{}
52+
53+
// Create the '' command
54+
c := cli.Command{
55+
Name: "create-dot-so-symlinks",
56+
Usage: "A hook to create .so symlinks in the container.",
57+
Action: func(c *cli.Context) error {
58+
return m.run(c, &cfg)
59+
},
60+
}
61+
62+
c.Flags = []cli.Flag{
63+
&cli.StringFlag{
64+
Name: "container-spec",
65+
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
66+
Destination: &cfg.containerSpec,
67+
},
68+
&cli.StringFlag{
69+
Name: "driver-version",
70+
Usage: "specify the driver version for which the symlinks are to be created. This assumes driver libraries have the .so.`VERSION` suffix.",
71+
Destination: &cfg.driverVersion,
72+
Required: true,
73+
},
74+
}
75+
76+
return &c
77+
}
78+
79+
func (m command) run(c *cli.Context, cfg *config) error {
80+
s, err := oci.LoadContainerState(cfg.containerSpec)
81+
if err != nil {
82+
return fmt.Errorf("failed to load container state: %v", err)
83+
}
84+
85+
containerRoot, err := s.GetContainerRoot()
86+
if err != nil {
87+
return fmt.Errorf("failed to determined container root: %v", err)
88+
}
89+
90+
locator := lookup.NewLibraryLocator(
91+
lookup.WithLogger(m.logger),
92+
lookup.WithRoot(containerRoot),
93+
lookup.WithOptional(true),
94+
)
95+
libs, err := locator.Locate("*.so." + cfg.driverVersion)
96+
if err != nil {
97+
return fmt.Errorf("failed to locate libraries for driver version %v: %v", cfg.driverVersion, err)
98+
}
99+
100+
for _, lib := range libs {
101+
if !strings.HasSuffix(lib, ".so."+cfg.driverVersion) {
102+
continue
103+
}
104+
libSoPath := strings.TrimSuffix(lib, "."+cfg.driverVersion)
105+
libSoXPaths, err := filepath.Glob(libSoPath + ".[0-9]")
106+
if len(libSoXPaths) != 1 || err != nil {
107+
continue
108+
}
109+
err = os.Symlink(filepath.Base(libSoXPaths[0]), libSoPath)
110+
if err != nil {
111+
continue
112+
}
113+
}
114+
return nil
115+
}

cmd/nvidia-ctk/hook/hook.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
"github.com/urfave/cli/v2"
2424

25+
createdotsosymlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-dot-so-symlinks"
2526
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
2627
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
2728
)
@@ -50,6 +51,7 @@ func (m hookCommand) build() *cli.Command {
5051
ldcache.NewCommand(m.logger),
5152
symlinks.NewCommand(m.logger),
5253
chmod.NewCommand(m.logger),
54+
createdotsosymlinks.NewCommand(m.logger),
5355
}
5456

5557
return &hook

internal/config/features.go

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@ package config
1919
type featureName string
2020

2121
const (
22-
FeatureGDS = featureName("gds")
23-
FeatureMOFED = featureName("mofed")
24-
FeatureNVSWITCH = featureName("nvswitch")
25-
FeatureGDRCopy = featureName("gdrcopy")
22+
FeatureGDS = featureName("gds")
23+
FeatureMOFED = featureName("mofed")
24+
FeatureNVSWITCH = featureName("nvswitch")
25+
FeatureGDRCopy = featureName("gdrcopy")
26+
FeatureDotSoSymlinks = featureName("dot-so-symlinks")
2627
)
2728

2829
// features specifies a set of named features.
@@ -31,6 +32,9 @@ type features struct {
3132
MOFED *feature `toml:"mofed,omitempty"`
3233
NVSWITCH *feature `toml:"nvswitch,omitempty"`
3334
GDRCopy *feature `toml:"gdrcopy,omitempty"`
35+
// DotSoSymlinks allows for the creation of .so symlinks to .so.1 driver
36+
// files to be opted in to.
37+
DotSoSymlinks *feature `toml:"dot-so-symlinks,omitempty"`
3438
}
3539

3640
type feature bool
@@ -40,10 +44,11 @@ type feature bool
4044
// variables can also be supplied.
4145
func (fs features) IsEnabled(n featureName, in ...getenver) bool {
4246
featureEnvvars := map[featureName]string{
43-
FeatureGDS: "NVIDIA_GDS",
44-
FeatureMOFED: "NVIDIA_MOFED",
45-
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
46-
FeatureGDRCopy: "NVIDIA_GDRCOPY",
47+
FeatureGDS: "NVIDIA_GDS",
48+
FeatureMOFED: "NVIDIA_MOFED",
49+
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
50+
FeatureGDRCopy: "NVIDIA_GDRCOPY",
51+
FeatureDotSoSymlinks: "NVIDIA_DOT_SO_SYMLINKS",
4752
}
4853

4954
envvar := featureEnvvars[n]
@@ -56,6 +61,8 @@ func (fs features) IsEnabled(n featureName, in ...getenver) bool {
5661
return fs.NVSWITCH.isEnabled(envvar, in...)
5762
case FeatureGDRCopy:
5863
return fs.GDRCopy.isEnabled(envvar, in...)
64+
case FeatureDotSoSymlinks:
65+
return fs.DotSoSymlinks.isEnabled(envvar, in...)
5966
default:
6067
return false
6168
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/**
2+
# Copyright 2024 NVIDIA CORPORATION
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
**/
16+
17+
package discover
18+
19+
// NewDotSoSymlinksDiscoverer creates a discoverer that generates a hook to create .so symlinks in
20+
// a container.
21+
func NewDotSoSymlinksDiscoverer(nvidiaCTKPath string, version string) Discover {
22+
return CreateNvidiaCTKHook(
23+
nvidiaCTKPath,
24+
"create-dot-so-symlinks",
25+
"--driver-version", version,
26+
)
27+
}

internal/discover/graphics.go

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ import (
2727
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
2828
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
2929
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
30-
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
3130
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
3231
)
3332

@@ -252,20 +251,16 @@ func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidia
252251
}
253252

254253
func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) {
255-
libCudaPaths, err := cuda.New(
256-
driver.Libraries(),
257-
).Locate(".*.*")
254+
libRoot, err := driver.LibraryRoot()
258255
if err != nil {
259-
return nil, fmt.Errorf("failed to locate libcuda.so: %v", err)
256+
return nil, fmt.Errorf("failed to determine driver library root: %w", err)
260257
}
261-
libcudaPath := libCudaPaths[0]
262258

263-
version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.")
264-
if version == "" {
265-
return nil, fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath)
259+
version, err := driver.Version()
260+
if err != nil {
261+
return nil, fmt.Errorf("failed to determine driver version: %w", err)
266262
}
267263

268-
libRoot := filepath.Dir(libcudaPath)
269264
xorgLibs := NewMounts(
270265
logger,
271266
lookup.NewFileLocator(

internal/lookup/root/options.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,9 @@ func WithConfigSearchPaths(paths ...string) Option {
4343
d.configSearchPaths = paths
4444
}
4545
}
46+
47+
func WithVersion(version string) Option {
48+
return func(d *Driver) {
49+
d.version = version
50+
}
51+
}

internal/lookup/root/root.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,30 @@
1717
package root
1818

1919
import (
20+
"fmt"
2021
"os"
2122
"path/filepath"
23+
"strings"
24+
"sync"
2225

2326
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
2427
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
2528
)
2629

2730
// Driver represents a filesystem in which a set of drivers or devices is defined.
2831
type Driver struct {
32+
sync.Mutex
2933
logger logger.Interface
3034
// Root represents the root from the perspective of the driver libraries and binaries.
3135
Root string
3236
// librarySearchPaths specifies explicit search paths for discovering libraries.
3337
librarySearchPaths []string
3438
// configSearchPaths specified explicit search paths for discovering driver config files.
3539
configSearchPaths []string
40+
// version stores the driver version. This can be specified at construction or cached on subsequent calls.
41+
version string
42+
// libraryRoot stores the absolute path where the driver libraries (libcuda.so.<VERSION>) can be found.
43+
libraryRoot string
3644
}
3745

3846
// New creates a new Driver root using the specified options.
@@ -80,6 +88,62 @@ func (r *Driver) configSearchOptions() []lookup.Option {
8088
}
8189
}
8290

91+
// Version returns the driver version as a string.
92+
func (r *Driver) Version() (string, error) {
93+
r.Lock()
94+
defer r.Unlock()
95+
if r.version != "" {
96+
return r.version, nil
97+
}
98+
99+
libcudaPath, err := r.libcudaPath()
100+
if err != nil {
101+
return "", fmt.Errorf("failed to locate libcuda.so: %v", err)
102+
}
103+
104+
version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.")
105+
if version == "" {
106+
return "", fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath)
107+
}
108+
109+
r.version = version
110+
return r.version, nil
111+
}
112+
113+
// LibraryRoot returns the folder in which the driver libraries can be found.
114+
func (r *Driver) LibraryRoot() (string, error) {
115+
r.Lock()
116+
defer r.Unlock()
117+
if r.libraryRoot != "" {
118+
return r.libraryRoot, nil
119+
}
120+
121+
libcudaPath, err := r.libcudaPath()
122+
if err != nil {
123+
return "", fmt.Errorf("failed to locate libcuda.so: %v", err)
124+
}
125+
126+
r.libraryRoot = filepath.Dir(libcudaPath)
127+
return r.libraryRoot, nil
128+
}
129+
130+
// libcudaPath returns the path to libcuda.so.*.* in the driver root.
131+
func (r *Driver) libcudaPath() (string, error) {
132+
pattern := "libcuda.so.*.*"
133+
134+
locator := r.Libraries()
135+
paths, err := locator.Locate(pattern)
136+
if err != nil {
137+
return "", fmt.Errorf("failed to locate %v: %v", pattern, err)
138+
}
139+
140+
libcudaPath := paths[0]
141+
if len(paths) > 1 {
142+
r.logger.Warningf("Selecting %v out of multiple libcuda.so paths.", libcudaPath, paths)
143+
}
144+
return libcudaPath, nil
145+
}
146+
83147
// normalizeSearchPaths takes a list of paths and normalized these.
84148
// Each of the elements in the list is expanded if it is a path list and the
85149
// resultant list is returned.

0 commit comments

Comments
 (0)