Skip to content

Commit bab614a

Browse files
committed
soft check for VFs before attempting unbind
Signed-off-by: Varun Ramachandra Sekar <[email protected]>
1 parent 298a471 commit bab614a

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

cmd/gpu-kubelet-plugin/device_state.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
9898

9999
var vfioPciManager *VfioPciManager
100100
if featuregates.Enabled(featuregates.PassthroughSupport) {
101-
manager := NewVfioPciManager(string(containerDriverRoot))
101+
manager := NewVfioPciManager(string(containerDriverRoot), nvdevlib)
102102
if err := manager.Prechecks(); err == nil {
103103
vfioPciManager = manager
104104
} else {

cmd/gpu-kubelet-plugin/vfio-device.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,14 @@ const (
4747
type VfioPciManager struct {
4848
driverRoot string
4949
driver string
50+
nvlib *deviceLib
5051
}
5152

52-
func NewVfioPciManager(driverRoot string) *VfioPciManager {
53+
func NewVfioPciManager(driverRoot string, nvlib *deviceLib) *VfioPciManager {
5354
vm := &VfioPciManager{
5455
driverRoot: driverRoot,
5556
driver: vfioPciDriver,
57+
nvlib: nvlib,
5658
}
5759
if !vm.isVfioPCIModuleLoaded() {
5860
err := vm.loadVfioPciModule()
@@ -149,6 +151,19 @@ func (vm *VfioPciManager) WaitForGPUFree(info *VfioDeviceInfo) error {
149151
}
150152
}
151153

154+
// Verify there are no VFs on the GPU.
155+
func (vm *VfioPciManager) verifyDisabledVFs(pcieBusID string) error {
156+
gpu, err := vm.nvlib.nvpci.GetGPUByPciBusID(pcieBusID)
157+
if err != nil {
158+
return err
159+
}
160+
numVFs := gpu.SriovInfo.PhysicalFunction.NumVFs
161+
if numVFs > 0 {
162+
return fmt.Errorf("gpu has %d VFs, cannot unbind", numVFs)
163+
}
164+
return nil
165+
}
166+
152167
// Configure binds the GPU to the vfio-pci driver.
153168
func (vm *VfioPciManager) Configure(info *VfioDeviceInfo) error {
154169
perGpuLock.Get(info.pcieBusID).Lock()
@@ -165,6 +180,10 @@ func (vm *VfioPciManager) Configure(info *VfioDeviceInfo) error {
165180
if err != nil {
166181
return err
167182
}
183+
err = vm.verifyDisabledVFs(info.pcieBusID)
184+
if err != nil {
185+
return err
186+
}
168187
err = vm.changeDriver(info.pcieBusID, vm.driver)
169188
if err != nil {
170189
return err

0 commit comments

Comments
 (0)