Skip to content

Commit 298a471

Browse files
committed
deadvertise sibling devices on preparation
Signed-off-by: Varun Ramachandra Sekar <[email protected]>
1 parent ac29067 commit 298a471

File tree

4 files changed

+199
-57
lines changed

4 files changed

+199
-57
lines changed

cmd/gpu-kubelet-plugin/allocatable.go

Lines changed: 94 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,6 @@ import (
2222
resourceapi "k8s.io/api/resource/v1"
2323
)
2424

25-
type AllocatableDevices map[string]*AllocatableDevice
26-
27-
func (d AllocatableDevices) GetGPUByPCIeBusID(pcieBusID string) *AllocatableDevice {
28-
for _, device := range d {
29-
if device.Type() != GpuDeviceType {
30-
continue
31-
}
32-
if device.Gpu.pcieBusID == pcieBusID {
33-
return device
34-
}
35-
}
36-
return nil
37-
}
38-
3925
type AllocatableDevice struct {
4026
Gpu *GpuInfo
4127
Mig *MigDeviceInfo
@@ -92,6 +78,73 @@ func (d AllocatableDevice) UUID() string {
9278
panic("unexpected type for AllocatableDevice")
9379
}
9480

81+
type AllocatableDeviceList []*AllocatableDevice
82+
83+
type AllocatableDevices map[string]*AllocatableDevice
84+
85+
func (d AllocatableDevices) getDevicesByGPUPCIBusID(pcieBusID string) AllocatableDeviceList {
86+
var devices AllocatableDeviceList
87+
for _, device := range d {
88+
switch device.Type() {
89+
case GpuDeviceType:
90+
if device.Gpu.pcieBusID == pcieBusID {
91+
devices = append(devices, device)
92+
}
93+
case MigDeviceType:
94+
if device.Mig.parent.pcieBusID == pcieBusID {
95+
devices = append(devices, device)
96+
}
97+
case VfioDeviceType:
98+
if device.Vfio.pcieBusID == pcieBusID {
99+
devices = append(devices, device)
100+
}
101+
}
102+
}
103+
return devices
104+
}
105+
106+
func (d AllocatableDevices) GetGPUByPCIeBusID(pcieBusID string) *AllocatableDevice {
107+
for _, device := range d {
108+
if device.Type() != GpuDeviceType {
109+
continue
110+
}
111+
if device.Gpu.pcieBusID == pcieBusID {
112+
return device
113+
}
114+
}
115+
return nil
116+
}
117+
118+
func (d AllocatableDevices) GetGPUs() AllocatableDeviceList {
119+
var devices AllocatableDeviceList
120+
for _, device := range d {
121+
if device.Type() == GpuDeviceType {
122+
devices = append(devices, device)
123+
}
124+
}
125+
return devices
126+
}
127+
128+
func (d AllocatableDevices) GetMigDevices() AllocatableDeviceList {
129+
var devices AllocatableDeviceList
130+
for _, device := range d {
131+
if device.Type() == MigDeviceType {
132+
devices = append(devices, device)
133+
}
134+
}
135+
return devices
136+
}
137+
138+
func (d AllocatableDevices) GetVfioDevices() AllocatableDeviceList {
139+
var devices AllocatableDeviceList
140+
for _, device := range d {
141+
if device.Type() == VfioDeviceType {
142+
devices = append(devices, device)
143+
}
144+
}
145+
return devices
146+
}
147+
95148
func (d AllocatableDevices) GpuUUIDs() []string {
96149
var uuids []string
97150
for _, device := range d {
@@ -131,3 +184,30 @@ func (d AllocatableDevices) UUIDs() []string {
131184
slices.Sort(uuids)
132185
return uuids
133186
}
187+
188+
func (d AllocatableDevices) RemoveSiblingDevices(device *AllocatableDevice) {
189+
var pciBusID string
190+
switch device.Type() {
191+
case GpuDeviceType:
192+
pciBusID = device.Gpu.pcieBusID
193+
case MigDeviceType:
194+
pciBusID = device.Mig.parent.pcieBusID
195+
case VfioDeviceType:
196+
pciBusID = device.Vfio.pcieBusID
197+
}
198+
199+
siblings := d.getDevicesByGPUPCIBusID(pciBusID)
200+
for _, sibling := range siblings {
201+
if sibling.Type() == device.Type() {
202+
continue
203+
}
204+
switch sibling.Type() {
205+
case GpuDeviceType:
206+
delete(d, sibling.Gpu.CanonicalName())
207+
case MigDeviceType:
208+
delete(d, sibling.Mig.CanonicalName())
209+
case VfioDeviceType:
210+
delete(d, sibling.Vfio.CanonicalName())
211+
}
212+
}
213+
}

cmd/gpu-kubelet-plugin/cdi.go

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,11 @@ func (cdi *CDIHandler) writeSpec(spec spec.Interface, specName string) error {
165165

166166
func (cdi *CDIHandler) CreateStandardDeviceSpecFile(allocatable AllocatableDevices) error {
167167
if err := cdi.createStandardNvidiaDeviceSpecFile(allocatable); err != nil {
168+
klog.Errorf("failed to create standard nvidia device spec file: %v", err)
168169
return err
169170
}
170171
if err := cdi.createStandardVfioDeviceSpecFile(allocatable); err != nil {
172+
klog.Errorf("failed to create standard vfio device spec file: %v", err)
171173
return err
172174
}
173175
return nil
@@ -199,6 +201,7 @@ func (cdi *CDIHandler) createStandardVfioDeviceSpecFile(allocatable AllocatableD
199201
}
200202

201203
specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiDeviceClass, cdiVfioSpecIdentifier)
204+
klog.Infof("Writing vfio spec for %s to %s", specName, cdi.cdiRoot)
202205
return cdi.writeSpec(spec, specName)
203206
}
204207

@@ -252,7 +255,8 @@ func (cdi *CDIHandler) createStandardNvidiaDeviceSpecFile(allocatable Allocatabl
252255
return fmt.Errorf("failed to creat CDI spec: %w", err)
253256
}
254257

255-
specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiDeviceClass, cdiVfioSpecIdentifier)
258+
specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiDeviceClass, cdiBaseSpecIdentifier)
259+
klog.Infof("Writing spec for %s to %s", specName, cdi.cdiRoot)
256260
return cdi.writeSpec(spec, specName)
257261
}
258262

@@ -291,26 +295,10 @@ func (cdi *CDIHandler) CreateClaimSpecFile(claimUID string, preparedDevices Prep
291295
return fmt.Errorf("failed to creat CDI spec: %w", err)
292296
}
293297

294-
// Transform the spec to make it aware that it is running inside a container.
295-
err = transformroot.New(
296-
transformroot.WithRoot(cdi.driverRoot),
297-
transformroot.WithTargetRoot(cdi.targetDriverRoot),
298-
transformroot.WithRelativeTo("host"),
299-
).Transform(spec.Raw())
300-
if err != nil {
301-
return fmt.Errorf("failed to transform driver root in CDI spec: %w", err)
302-
}
303-
304-
// Update the spec to include only the minimum version necessary.
305-
minVersion, err := cdispec.MinimumRequiredVersion(spec.Raw())
306-
if err != nil {
307-
return fmt.Errorf("failed to get minimum required CDI spec version: %w", err)
308-
}
309-
spec.Raw().Version = minVersion
310-
311298
// Write the spec out to disk.
312299
specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiClaimClass, claimUID)
313-
return cdi.cache.WriteSpec(spec.Raw(), specName)
300+
klog.Infof("Writing claim spec for %s to %s", specName, cdi.cdiRoot)
301+
return cdi.writeSpec(spec, specName)
314302
}
315303

316304
func (cdi *CDIHandler) DeleteClaimSpecFile(claimUID string) error {

cmd/gpu-kubelet-plugin/device_state.go

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,17 @@ func (s *DeviceState) Prepare(ctx context.Context, claim *resourceapi.ResourceCl
180180
return nil, fmt.Errorf("prepare devices failed: %w", err)
181181
}
182182

183+
if featuregates.Enabled(featuregates.PassthroughSupport) {
184+
for _, device := range preparedDevices.GetDevices() {
185+
allocatableDevice, ok := s.allocatable[device.DeviceName]
186+
if !ok {
187+
klog.Warningf("allocatable not found for device: %v", device.DeviceName)
188+
continue
189+
}
190+
s.allocatable.RemoveSiblingDevices(allocatableDevice)
191+
}
192+
}
193+
183194
if err := s.cdi.CreateClaimSpecFile(claimUID, preparedDevices); err != nil {
184195
return nil, fmt.Errorf("unable to create CDI spec file for claim: %w", err)
185196
}
@@ -230,6 +241,19 @@ func (s *DeviceState) Unprepare(ctx context.Context, claimUID string) error {
230241
return fmt.Errorf("unsupported ClaimCheckpointState: %v", pc.CheckpointState)
231242
}
232243

244+
if featuregates.Enabled(featuregates.PassthroughSupport) {
245+
for _, device := range pc.PreparedDevices.GetDevices() {
246+
allocatableDevice, ok := s.allocatable[device.DeviceName]
247+
if !ok {
248+
klog.Warningf("allocatable not found for device: %v", device.DeviceName)
249+
continue
250+
}
251+
err := s.discoverSiblingAllocatables(allocatableDevice)
252+
if err != nil {
253+
return fmt.Errorf("error discovering sibling allocatables: %w", err)
254+
}
255+
}
256+
}
233257
if err := s.cdi.DeleteClaimSpecFile(claimUID); err != nil {
234258
return fmt.Errorf("unable to delete CDI spec file for claim: %w", err)
235259
}
@@ -472,26 +496,40 @@ func (s *DeviceState) getAllocatableVfioDevice(uuid string) (*AllocatableDevice,
472496
return allocatable, nil
473497
}
474498
}
475-
return nil, fmt.Errorf("allocatable device not found for vfio-pci device: %v", uuid)
499+
return nil, fmt.Errorf("allocatable device not found for vfio device: %v", uuid)
476500
}
477501

478502
func (s *DeviceState) unprepareVfioDevices(ctx context.Context, devices PreparedDeviceList) error {
479503
for _, device := range devices {
480504
vfioAllocatable, err := s.getAllocatableVfioDevice(device.Vfio.Info.UUID)
481505
if err != nil {
482-
return fmt.Errorf("error getting allocatable device for vfio-pci device: %w", err)
506+
return fmt.Errorf("error getting allocatable device for vfio device: %w", err)
483507
}
484508
if err := s.vfioPciManager.Unconfigure(vfioAllocatable.Vfio); err != nil {
485-
return fmt.Errorf("error unconfiguring vfio-pci device: %w", err)
509+
return fmt.Errorf("error unconfiguring vfio device: %w", err)
486510
}
511+
}
512+
return nil
513+
}
487514

488-
// Rediscover the GPU to account for possible device minor changes.
489-
allocatableDevice, err := s.nvdevlib.discoverGPUByPCIBusID(vfioAllocatable.Vfio.pcieBusID)
515+
func (s *DeviceState) discoverSiblingAllocatables(device *AllocatableDevice) error {
516+
switch device.Type() {
517+
case GpuDeviceType, MigDeviceType:
518+
vfio, err := s.nvdevlib.discoverVfioDevice(device.Gpu)
519+
if err != nil {
520+
return fmt.Errorf("error discovering vfio device: %w", err)
521+
}
522+
s.allocatable[vfio.CanonicalName()] = vfio
523+
case VfioDeviceType:
524+
gpu, migs, err := s.nvdevlib.discoverGPUByPCIBusID(device.Vfio.pcieBusID)
490525
if err != nil {
491-
return fmt.Errorf("error rediscovering GPU by PCIe bus ID: %w", err)
526+
return fmt.Errorf("error discovering gpu by pci bus id: %w", err)
527+
}
528+
s.allocatable[gpu.CanonicalName()] = gpu
529+
device.Vfio.parent = gpu.Gpu
530+
for _, mig := range migs {
531+
s.allocatable[mig.CanonicalName()] = mig
492532
}
493-
vfioAllocatable.Vfio.parent = allocatableDevice.Gpu
494-
s.allocatable[allocatableDevice.Gpu.CanonicalName()] = allocatableDevice
495533
}
496534
return nil
497535
}
@@ -572,7 +610,6 @@ func (s *DeviceState) applyVfioDeviceConfig(ctx context.Context, config *configa
572610
if err != nil {
573611
return nil, err
574612
}
575-
delete(s.allocatable, info.Vfio.parent.CanonicalName())
576613
}
577614

578615
return &configState, nil

cmd/gpu-kubelet-plugin/nvlib.go

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -149,16 +149,12 @@ func (l deviceLib) enumerateGpusAndMigDevices(config *Config) (AllocatableDevice
149149
}
150150
devices[gpuInfo.CanonicalName()] = deviceInfo
151151

152-
migs, err := l.getMigDevices(gpuInfo)
152+
migs, err := l.discoverMigDevicesByGPU(gpuInfo)
153153
if err != nil {
154-
return fmt.Errorf("error getting MIG devices for GPU %d: %w", i, err)
154+
return fmt.Errorf("error discovering MIG devices for GPU %q: %w", gpuInfo.CanonicalName(), err)
155155
}
156-
157156
for _, migDeviceInfo := range migs {
158-
deviceInfo := &AllocatableDevice{
159-
Mig: migDeviceInfo,
160-
}
161-
devices[migDeviceInfo.CanonicalName()] = deviceInfo
157+
devices[migDeviceInfo.CanonicalName()] = migDeviceInfo
162158
}
163159

164160
return nil
@@ -170,13 +166,31 @@ func (l deviceLib) enumerateGpusAndMigDevices(config *Config) (AllocatableDevice
170166
return devices, nil
171167
}
172168

173-
func (l deviceLib) discoverGPUByPCIBusID(pcieBusID string) (*AllocatableDevice, error) {
169+
func (l deviceLib) discoverMigDevicesByGPU(gpuInfo *GpuInfo) (AllocatableDeviceList, error) {
170+
var devices AllocatableDeviceList
171+
migs, err := l.getMigDevices(gpuInfo)
172+
if err != nil {
173+
return nil, fmt.Errorf("error getting MIG devices for GPU %q: %w", gpuInfo.CanonicalName(), err)
174+
}
175+
176+
for _, migDeviceInfo := range migs {
177+
mig := &AllocatableDevice{
178+
Mig: migDeviceInfo,
179+
}
180+
devices = append(devices, mig)
181+
}
182+
return devices, nil
183+
}
184+
185+
// TODO: Need go-nvlib util for this.
186+
func (l deviceLib) discoverGPUByPCIBusID(pcieBusID string) (*AllocatableDevice, AllocatableDeviceList, error) {
174187
if err := l.Init(); err != nil {
175-
return nil, err
188+
return nil, nil, err
176189
}
177190
defer l.alwaysShutdown()
178191

179-
var allocatableDevice *AllocatableDevice
192+
var gpu *AllocatableDevice
193+
var migs AllocatableDeviceList
180194
err := l.VisitDevices(func(i int, d nvdev.Device) error {
181195
gpuPCIBusID, err := d.GetPCIBusID()
182196
if err != nil {
@@ -189,18 +203,41 @@ func (l deviceLib) discoverGPUByPCIBusID(pcieBusID string) (*AllocatableDevice,
189203
if err != nil {
190204
return fmt.Errorf("error getting info for GPU %d: %w", i, err)
191205
}
192-
allocatableDevice = &AllocatableDevice{
206+
gpu = &AllocatableDevice{
193207
Gpu: gpuInfo,
194208
}
209+
migs, err = l.discoverMigDevicesByGPU(gpuInfo)
210+
if err != nil {
211+
return fmt.Errorf("error discovering MIG devices for GPU %q: %w", gpuInfo.CanonicalName(), err)
212+
}
195213
return nil
196214
})
197215
if err != nil {
198-
return nil, fmt.Errorf("error visiting devices: %w", err)
216+
return nil, nil, fmt.Errorf("error visiting devices: %w", err)
217+
}
218+
return gpu, migs, nil
219+
}
220+
221+
// TODO: Need go-nvlib util for this.
222+
func (l deviceLib) discoverVfioDevice(gpuInfo *GpuInfo) (*AllocatableDevice, error) {
223+
gpus, err := l.nvpci.GetGPUs()
224+
if err != nil {
225+
return nil, fmt.Errorf("error getting GPU PCI devices: %w", err)
199226
}
200-
if allocatableDevice == nil {
201-
return nil, fmt.Errorf("error discovering GPU by PCI bus ID: %s", pcieBusID)
227+
for idx, gpu := range gpus {
228+
if gpu.Address != gpuInfo.pcieBusID {
229+
continue
230+
}
231+
vfioDeviceInfo, err := l.getVfioDeviceInfo(idx, gpu)
232+
if err != nil {
233+
return nil, fmt.Errorf("error getting VFIO device info: %w", err)
234+
}
235+
vfioDeviceInfo.parent = gpuInfo
236+
return &AllocatableDevice{
237+
Vfio: vfioDeviceInfo,
238+
}, nil
202239
}
203-
return allocatableDevice, nil
240+
return nil, fmt.Errorf("error discovering VFIO device by PCIe bus ID: %s", gpuInfo.pcieBusID)
204241
}
205242

206243
func (l deviceLib) getGpuInfo(index int, device nvdev.Device) (*GpuInfo, error) {

0 commit comments

Comments
 (0)