Skip to content

Commit 26057d8

Browse files
committed
address comments (2)
Signed-off-by: Varun Ramachandra Sekar <[email protected]>
1 parent cd9c8e1 commit 26057d8

File tree

3 files changed

+32
-15
lines changed

3 files changed

+32
-15
lines changed

cmd/gpu-kubelet-plugin/cdi.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ import (
3131
cdiapi "tags.cncf.io/container-device-interface/pkg/cdi"
3232
cdiparser "tags.cncf.io/container-device-interface/pkg/parser"
3333
cdispec "tags.cncf.io/container-device-interface/specs-go"
34+
35+
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/featuregates"
3436
)
3537

3638
const (
@@ -168,9 +170,12 @@ func (cdi *CDIHandler) CreateStandardDeviceSpecFile(allocatable AllocatableDevic
168170
klog.Errorf("failed to create standard nvidia device spec file: %v", err)
169171
return err
170172
}
171-
if err := cdi.createStandardVfioDeviceSpecFile(allocatable); err != nil {
172-
klog.Errorf("failed to create standard vfio device spec file: %v", err)
173-
return err
173+
174+
if featuregates.Enabled(featuregates.PassthroughSupport) {
175+
if err := cdi.createStandardVfioDeviceSpecFile(allocatable); err != nil {
176+
klog.Errorf("failed to create standard vfio device spec file: %v", err)
177+
return err
178+
}
174179
}
175180
return nil
176181
}

cmd/gpu-kubelet-plugin/device_state.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
9898

9999
var vfioPciManager *VfioPciManager
100100
if featuregates.Enabled(featuregates.PassthroughSupport) {
101-
manager := NewVfioPciManager(string(containerDriverRoot), nvdevlib)
101+
manager := NewVfioPciManager(string(containerDriverRoot), nvdevlib, true /* nvidiaEnabled */)
102102
if err := manager.Prechecks(); err == nil {
103103
vfioPciManager = manager
104104
} else {
@@ -505,7 +505,7 @@ func (s *DeviceState) unprepareVfioDevices(ctx context.Context, devices Prepared
505505
if err != nil {
506506
return fmt.Errorf("error getting allocatable device for vfio device: %w", err)
507507
}
508-
if err := s.vfioPciManager.Unconfigure(vfioAllocatable.Vfio); err != nil {
508+
if err := s.vfioPciManager.Unconfigure(ctx, vfioAllocatable.Vfio); err != nil {
509509
return fmt.Errorf("error unconfiguring vfio device: %w", err)
510510
}
511511
}

cmd/gpu-kubelet-plugin/vfio-device.go

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,23 +47,26 @@ const (
4747
)
4848

4949
type VfioPciManager struct {
50-
driverRoot string
51-
driver string
52-
nvlib *deviceLib
50+
driverRoot string
51+
driver string
52+
nvlib *deviceLib
53+
nvidiaEnabled bool
5354
}
5455

55-
func NewVfioPciManager(driverRoot string, nvlib *deviceLib) *VfioPciManager {
56+
func NewVfioPciManager(driverRoot string, nvlib *deviceLib, nvidiaEnabled bool) *VfioPciManager {
5657
vm := &VfioPciManager{
57-
driverRoot: driverRoot,
58-
driver: vfioPciDriver,
59-
nvlib: nvlib,
58+
driverRoot: driverRoot,
59+
driver: vfioPciDriver,
60+
nvlib: nvlib,
61+
nvidiaEnabled: nvidiaEnabled,
6062
}
6163
if !vm.isVfioPCIModuleLoaded() {
6264
err := vm.loadVfioPciModule()
6365
if err != nil {
6466
klog.Fatalf("failed to load vfio_pci module: %v", err)
6567
}
6668
}
69+
6770
return vm
6871
}
6972

@@ -124,7 +127,7 @@ func (vm *VfioPciManager) loadVfioPciModule() error {
124127
return nil
125128
}
126129

127-
func (vm *VfioPciManager) WaitForGPUFree(info *VfioDeviceInfo) error {
130+
func (vm *VfioPciManager) WaitForGPUFree(ctx context.Context, info *VfioDeviceInfo) error {
128131
if info.parent == nil {
129132
return nil
130133
}
@@ -175,7 +178,11 @@ func (vm *VfioPciManager) Configure(ctx context.Context, info *VfioDeviceInfo) e
175178
if driver == vm.driver {
176179
return nil
177180
}
178-
err = vm.WaitForGPUFree(info)
181+
// Only support vfio-pci or nvidia (if vm.nvidiaEnabled) driver.
182+
if !vm.nvidiaEnabled || driver != nvidiaDriver {
183+
return fmt.Errorf("gpu is bound to %q driver, expected %q or %q", driver, vm.driver, nvidiaDriver)
184+
}
185+
err = vm.WaitForGPUFree(ctx, info)
179186
if err != nil {
180187
return err
181188
}
@@ -191,10 +198,15 @@ func (vm *VfioPciManager) Configure(ctx context.Context, info *VfioDeviceInfo) e
191198
}
192199

193200
// Unconfigure binds the GPU to the nvidia driver.
194-
func (vm *VfioPciManager) Unconfigure(info *VfioDeviceInfo) error {
201+
func (vm *VfioPciManager) Unconfigure(ctx context.Context, info *VfioDeviceInfo) error {
195202
perGpuLock.Get(info.pcieBusID).Lock()
196203
defer perGpuLock.Get(info.pcieBusID).Unlock()
197204

205+
// Do nothing if we dont expect to switch to nvidia driver.
206+
if !vm.nvidiaEnabled {
207+
return nil
208+
}
209+
198210
driver, err := getDriver(pciDevicesRoot, info.pcieBusID)
199211
if err != nil {
200212
return err

0 commit comments

Comments
 (0)