Skip to content

Commit b934c68

Browse files
authored
Merge pull request #1103 from elezar/reenable-nvsandboxutils
Reenable nvsandboxutils for driver discovery
2 parents be6a36c + 7bd65da commit b934c68

File tree

3 files changed

+48
-18
lines changed

3 files changed

+48
-18
lines changed

pkg/nvcdi/api.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,13 @@ const (
4545
// This was added with v1.17.5 of the NVIDIA Container Toolkit.
4646
HookEnableCudaCompat = HookName("enable-cuda-compat")
4747
)
48+
49+
// A FeatureFlag refers to a specific feature that can be toggled in the CDI api.
50+
// All features are off by default.
51+
type FeatureFlag string
52+
53+
const (
54+
// FeatureDisableNvsandboxUtils disables the use of nvsandboxutils when
55+
// querying devices.
56+
FeatureDisableNvsandboxUtils = FeatureFlag("disable-nvsandbox-utils")
57+
)

pkg/nvcdi/lib.go

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ type nvcdilib struct {
5656

5757
mergedDeviceOptions []transform.MergedDeviceOption
5858

59+
featureFlags map[FeatureFlag]bool
60+
5961
disabledHooks disabledHooks
6062
hookCreator discover.HookCreator
6163
}
@@ -64,6 +66,7 @@ type nvcdilib struct {
6466
func New(opts ...Option) (Interface, error) {
6567
l := &nvcdilib{
6668
disabledHooks: make(disabledHooks),
69+
featureFlags: make(map[FeatureFlag]bool),
6770
}
6871
for _, opt := range opts {
6972
opt(l)
@@ -108,24 +111,7 @@ func New(opts ...Option) (Interface, error) {
108111
}
109112
l.nvmllib = nvml.New(nvmlOpts...)
110113
}
111-
// TODO: Repeated calls to nvsandboxutils.Init and Shutdown are causing
112-
// segmentation violations. Here we disabled nvsandbox utils unless explicitly
113-
// specified.
114-
// This will be reenabled as soon as we have more visibility into why this is
115-
// happening and a mechanism to detect and disable this if required.
116-
// if l.nvsandboxutilslib == nil {
117-
// var nvsandboxutilsOpts []nvsandboxutils.LibraryOption
118-
// // Set the library path for libnvidia-sandboxutils
119-
// candidates, err := l.driver.Libraries().Locate("libnvidia-sandboxutils.so.1")
120-
// if err != nil {
121-
// l.logger.Warningf("Ignoring error in locating libnvidia-sandboxutils.so.1: %v", err)
122-
// } else {
123-
// libNvidiaSandboxutilsPath := candidates[0]
124-
// l.logger.Infof("Using %v", libNvidiaSandboxutilsPath)
125-
// nvsandboxutilsOpts = append(nvsandboxutilsOpts, nvsandboxutils.WithLibraryPath(libNvidiaSandboxutilsPath))
126-
// }
127-
// l.nvsandboxutilslib = nvsandboxutils.New(nvsandboxutilsOpts...)
128-
// }
114+
l.nvsandboxutilslib = l.getNvsandboxUtilsLib()
129115
if l.devicelib == nil {
130116
l.devicelib = device.New(l.nvmllib)
131117
}
@@ -231,3 +217,26 @@ func (l *nvcdilib) getCudaVersionNvsandboxutils() (string, error) {
231217
}
232218
return version, nil
233219
}
220+
221+
// getNvsandboxUtilsLib returns the nvsandboxutilslib to use for CDI spec
222+
// generation.
223+
func (l *nvcdilib) getNvsandboxUtilsLib() nvsandboxutils.Interface {
224+
if l.featureFlags[FeatureDisableNvsandboxUtils] {
225+
return nil
226+
}
227+
if l.nvsandboxutilslib != nil {
228+
return l.nvsandboxutilslib
229+
}
230+
231+
var nvsandboxutilsOpts []nvsandboxutils.LibraryOption
232+
// Set the library path for libnvidia-sandboxutils
233+
candidates, err := l.driver.Libraries().Locate("libnvidia-sandboxutils.so.1")
234+
if err != nil {
235+
l.logger.Warningf("Ignoring error in locating libnvidia-sandboxutils.so.1: %v", err)
236+
} else {
237+
libNvidiaSandboxutilsPath := candidates[0]
238+
l.logger.Infof("Using %v", libNvidiaSandboxutilsPath)
239+
nvsandboxutilsOpts = append(nvsandboxutilsOpts, nvsandboxutils.WithLibraryPath(libNvidiaSandboxutilsPath))
240+
}
241+
return nvsandboxutils.New(nvsandboxutilsOpts...)
242+
}

pkg/nvcdi/options.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,3 +166,14 @@ func WithDisabledHook(hook HookName) Option {
166166
o.disabledHooks[hook] = true
167167
}
168168
}
169+
170+
// WithFeatureFlag allows specified features to be toggled on.
171+
// This option can be specified multiple times for each feature flag.
172+
func WithFeatureFlag(featureFlag FeatureFlag) Option {
173+
return func(o *nvcdilib) {
174+
if o.featureFlags == nil {
175+
o.featureFlags = make(map[FeatureFlag]bool)
176+
}
177+
o.featureFlags[featureFlag] = true
178+
}
179+
}

0 commit comments

Comments
 (0)