Skip to content

Commit baa5096

Browse files
committed
Create .so symlinks for driver libraries in container
This change adds an opt-in feature for creating .so symlinks to all injected driver files in a contianer. If features.dot-so-symlinks = true is set in the config.toml, the creation of symlinks for driver files is enabled. This can also be triggered on a per-container basis using the envvar NVIDIA_DOT_SO_SYMLINKS=enabled. Signed-off-by: Evan Lezar <[email protected]>
1 parent 26e52b8 commit baa5096

File tree

12 files changed

+252
-25
lines changed

12 files changed

+252
-25
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# NVIDIA Container Toolkit Changelog
22

3+
* Add a hook to create `.so` symlinks for driver libraries in a container.
4+
35
## v1.15.0-rc.4
46
* Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed.
57
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/**
2+
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
**/
16+
17+
package dotsosymlinks
18+
19+
import (
20+
"fmt"
21+
"os"
22+
"path/filepath"
23+
"strings"
24+
25+
"github.com/urfave/cli/v2"
26+
27+
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
28+
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
29+
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
30+
)
31+
32+
type command struct {
33+
logger logger.Interface
34+
}
35+
36+
type config struct {
37+
containerSpec string
38+
driverVersion string
39+
}
40+
41+
// NewCommand constructs a hook command with the specified logger
42+
func NewCommand(logger logger.Interface) *cli.Command {
43+
c := command{
44+
logger: logger,
45+
}
46+
return c.build()
47+
}
48+
49+
// build
50+
func (m command) build() *cli.Command {
51+
cfg := config{}
52+
53+
// Create the '' command
54+
c := cli.Command{
55+
Name: "create-dot-so-symlinks",
56+
Usage: "A hook to create symlinks in the container. This can be used to process CSV mount specs",
57+
Action: func(c *cli.Context) error {
58+
return m.run(c, &cfg)
59+
},
60+
}
61+
62+
c.Flags = []cli.Flag{
63+
&cli.StringFlag{
64+
Name: "container-spec",
65+
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
66+
Destination: &cfg.containerSpec,
67+
},
68+
&cli.StringFlag{
69+
Name: "driver-version",
70+
Usage: "specify the driver version for which the symlinks are to be created. This assumes driver libraries have the .so.`VERSION` suffix.",
71+
Destination: &cfg.driverVersion,
72+
Required: true,
73+
},
74+
}
75+
76+
return &c
77+
}
78+
79+
func (m command) run(c *cli.Context, cfg *config) error {
80+
s, err := oci.LoadContainerState(cfg.containerSpec)
81+
if err != nil {
82+
return fmt.Errorf("failed to load container state: %v", err)
83+
}
84+
85+
containerRoot, err := s.GetContainerRoot()
86+
if err != nil {
87+
return fmt.Errorf("failed to determined container root: %v", err)
88+
}
89+
90+
libs, err := lookup.NewLibraryLocator(
91+
lookup.WithLogger(m.logger),
92+
lookup.WithRoot(containerRoot),
93+
lookup.WithOptional(true),
94+
).Locate("*.so." + cfg.driverVersion)
95+
if err != nil {
96+
return fmt.Errorf("failed to locate libraries for driver version %v: %v", cfg.driverVersion, err)
97+
}
98+
99+
for _, lib := range libs {
100+
if !strings.HasSuffix(lib, ".so."+cfg.driverVersion) {
101+
continue
102+
}
103+
libSoPath := strings.TrimSuffix(lib, "."+cfg.driverVersion)
104+
libSoXPaths, err := filepath.Glob(libSoPath + ".[0-9]")
105+
if len(libSoXPaths) != 1 || err != nil {
106+
continue
107+
}
108+
err = os.Symlink(filepath.Base(libSoXPaths[0]), libSoPath)
109+
if err != nil {
110+
continue
111+
}
112+
}
113+
return nil
114+
}

cmd/nvidia-ctk/hook/hook.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
"github.com/urfave/cli/v2"
2424

25+
createdotsosymlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-dot-so-symlinks"
2526
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
2627
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
2728
)
@@ -50,6 +51,7 @@ func (m hookCommand) build() *cli.Command {
5051
ldcache.NewCommand(m.logger),
5152
symlinks.NewCommand(m.logger),
5253
chmod.NewCommand(m.logger),
54+
createdotsosymlinks.NewCommand(m.logger),
5355
}
5456

5557
return &hook

internal/config/features.go

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,15 @@ package config
1919
type featureName string
2020

2121
const (
22-
FeatureGDS = featureName("gds")
23-
FeatureMOFED = featureName("mofed")
24-
FeatureNVSWITCH = featureName("nvswitch")
25-
FeatureGDRCopy = featureName("gdrcopy")
22+
FeatureGDS = featureName("gds")
23+
FeatureMOFED = featureName("mofed")
24+
FeatureNVSWITCH = featureName("nvswitch")
25+
FeatureGDRCopy = featureName("gdrcopy")
26+
FeatureDotSoSymlinks = featureName("dot-so-symlinks")
27+
28+
// featureNotControlledByEnvvar is used for features that have no envvar to
29+
// allow per-container opt-in.
30+
featureNotControlledByEnvvar = ""
2631
)
2732

2833
// features specifies a set of named features.
@@ -31,6 +36,9 @@ type features struct {
3136
MOFED *feature `toml:"mofed,omitempty"`
3237
NVSWITCH *feature `toml:"nvswitch,omitempty"`
3338
GDRCopy *feature `toml:"gdrcopy,omitempty"`
39+
// DotSoSymlinks allows for the creation of .so symlinks to .so.1 driver
40+
// files to be opted out of.
41+
DotSoSymlinks *feature `toml:"dot-so-symlinks,omitempty"`
3442
}
3543

3644
type feature bool
@@ -40,10 +48,11 @@ type feature bool
4048
// variables can also be supplied.
4149
func (fs features) IsEnabled(n featureName, in ...getenver) bool {
4250
featureEnvvars := map[featureName]string{
43-
FeatureGDS: "NVIDIA_GDS",
44-
FeatureMOFED: "NVIDIA_MOFED",
45-
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
46-
FeatureGDRCopy: "NVIDIA_GDRCOPY",
51+
FeatureGDS: "NVIDIA_GDS",
52+
FeatureMOFED: "NVIDIA_MOFED",
53+
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
54+
FeatureGDRCopy: "NVIDIA_GDRCOPY",
55+
FeatureDotSoSymlinks: "NVIDIA_DOT_SO_SYMLINKS",
4756
}
4857

4958
envvar := featureEnvvars[n]
@@ -56,6 +65,8 @@ func (fs features) IsEnabled(n featureName, in ...getenver) bool {
5665
return fs.NVSWITCH.isEnabled(envvar, in...)
5766
case FeatureGDRCopy:
5867
return fs.GDRCopy.isEnabled(envvar, in...)
68+
case FeatureDotSoSymlinks:
69+
return fs.DotSoSymlinks.isEnabled(envvar, in...)
5970
default:
6071
return false
6172
}
@@ -69,7 +80,7 @@ func (f *feature) isEnabled(envvar string, ins ...getenver) bool {
6980
if f != nil {
7081
return bool(*f)
7182
}
72-
if envvar == "" {
83+
if envvar == featureNotControlledByEnvvar {
7384
return false
7485
}
7586
for _, in := range ins {
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/**
2+
# Copyright 2024 NVIDIA CORPORATION
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
**/
16+
17+
package discover
18+
19+
// NewDotSoSymlinksDiscoverer creates a discoverer that generates a hook to create .so symlinks in
20+
// a container.
21+
func NewDotSoSymlinksDiscoverer(nvidiaCTKPath string, version string) Discover {
22+
return CreateNvidiaCTKHook(
23+
nvidiaCTKPath,
24+
"create-dot-so-symlinks",
25+
"--driver-version", version,
26+
)
27+
}

internal/discover/graphics.go

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ import (
2727
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
2828
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
2929
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
30-
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
3130
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
3231
)
3332

@@ -256,20 +255,16 @@ func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidia
256255
}
257256

258257
func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) {
259-
libCudaPaths, err := cuda.New(
260-
driver.Libraries(),
261-
).Locate(".*.*")
258+
libRoot, err := driver.LibraryRoot()
262259
if err != nil {
263-
return nil, fmt.Errorf("failed to locate libcuda.so: %v", err)
260+
return nil, fmt.Errorf("failed to determine driver library root: %w", err)
264261
}
265-
libcudaPath := libCudaPaths[0]
266262

267-
version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.")
268-
if version == "" {
269-
return nil, fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath)
263+
version, err := driver.Version()
264+
if err != nil {
265+
return nil, fmt.Errorf("failed to determine driver version: %w", err)
270266
}
271267

272-
libRoot := filepath.Dir(libcudaPath)
273268
xorgLibs := NewMounts(
274269
logger,
275270
lookup.NewFileLocator(

internal/lookup/root/options.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,9 @@ func WithLibrarySearchPaths(paths ...string) Option {
3737
d.librarySearchPaths = paths
3838
}
3939
}
40+
41+
func WithVersion(version string) Option {
42+
return func(d *Driver) {
43+
d.version = version
44+
}
45+
}

internal/lookup/root/root.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,28 @@
1717
package root
1818

1919
import (
20+
"fmt"
2021
"path/filepath"
22+
"strings"
23+
"sync"
2124

2225
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
2326
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
27+
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
2428
)
2529

2630
// Driver represents a filesystem in which a set of drivers or devices is defined.
2731
type Driver struct {
32+
sync.Mutex
2833
logger logger.Interface
2934
// Root represents the root from the perspective of the driver libraries and binaries.
3035
Root string
3136
// librarySearchPaths specifies explicit search paths for discovering libraries.
3237
librarySearchPaths []string
38+
// version stores the driver version. This can be specified at construction or cached on subsequent calls.
39+
version string
40+
// libraryRoot stores the absolute path where the driver libraries (libcuda.so.<VERSION>) can be found.
41+
libraryRoot string
3342
}
3443

3544
// New creates a new Driver root using the specified options.
@@ -53,6 +62,50 @@ func (r *Driver) Libraries() lookup.Locator {
5362
)
5463
}
5564

65+
// Version returns the driver version as a string.
66+
func (r *Driver) Version() (string, error) {
67+
r.Lock()
68+
defer r.Unlock()
69+
if r.version != "" {
70+
return r.version, nil
71+
}
72+
73+
libCudaPaths, err := cuda.New(
74+
r.Libraries(),
75+
).Locate(".*.*")
76+
if err != nil {
77+
return "", fmt.Errorf("failed to locate libcuda.so: %v", err)
78+
}
79+
libcudaPath := libCudaPaths[0]
80+
81+
version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.")
82+
if version == "" {
83+
return "", fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath)
84+
}
85+
86+
r.version = version
87+
return r.version, nil
88+
}
89+
90+
// LibraryRoot returns the folder in which the driver libraries can be found.
91+
func (r *Driver) LibraryRoot() (string, error) {
92+
r.Lock()
93+
defer r.Unlock()
94+
if r.libraryRoot != "" {
95+
return r.libraryRoot, nil
96+
}
97+
98+
libCudaPaths, err := cuda.New(
99+
r.Libraries(),
100+
).Locate(".*.*")
101+
if err != nil {
102+
return "", fmt.Errorf("failed to locate libcuda.so: %v", err)
103+
}
104+
105+
r.libraryRoot = filepath.Dir(libCudaPaths[0])
106+
return r.libraryRoot, nil
107+
}
108+
56109
// normalizeSearchPaths takes a list of paths and normalized these.
57110
// Each of the elements in the list is expanded if it is a path list and the
58111
// resultant list is returned.

0 commit comments

Comments
 (0)