Skip to content

Commit 55fc7b0

Browse files
authored
Merge pull request #668 from varunrsekar/vfio-support-1.33
Support VFIO passthrough
2 parents 5443e0f + ef23484 commit 55fc7b0

File tree

36 files changed

+44223
-81
lines changed

36 files changed

+44223
-81
lines changed

api/nvidia.com/resource/v1beta1/api.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const (
2929

3030
GpuConfigKind = "GpuConfig"
3131
MigDeviceConfigKind = "MigDeviceConfig"
32+
VfioDeviceConfigKind = "VfioDeviceConfig"
3233
ComputeDomainChannelConfigKind = "ComputeDomainChannelConfig"
3334
ComputeDomainDaemonConfigKind = "ComputeDomainDaemonConfig"
3435
ComputeDomainKind = "ComputeDomain"
@@ -66,6 +67,7 @@ func init() {
6667
scheme.AddKnownTypes(schemeGroupVersion,
6768
&GpuConfig{},
6869
&MigDeviceConfig{},
70+
&VfioDeviceConfig{},
6971
&ComputeDomainChannelConfig{},
7072
&ComputeDomainDaemonConfig{},
7173
&ComputeDomain{},
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package v1beta1
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
22+
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/featuregates"
23+
)
24+
25+
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
26+
27+
// VfioDeviceConfig holds the set of parameters for configuring a VFIO device.
28+
type VfioDeviceConfig struct {
29+
metav1.TypeMeta `json:",inline"`
30+
}
31+
32+
// DefaultVfioDeviceConfig provides the default configuration of a VFIO device.
33+
func DefaultVfioDeviceConfig() *VfioDeviceConfig {
34+
if !featuregates.Enabled(featuregates.PassthroughSupport) {
35+
return nil
36+
}
37+
return &VfioDeviceConfig{
38+
TypeMeta: metav1.TypeMeta{
39+
APIVersion: GroupName + "/" + Version,
40+
Kind: VfioDeviceConfigKind,
41+
},
42+
}
43+
}
44+
45+
// Normalize updates a VfioDeviceConfig config with implied default values based on other settings.
46+
func (c *VfioDeviceConfig) Normalize() error {
47+
return nil
48+
}
49+
50+
// Validate ensures that VfioDeviceConfig has a valid set of values.
51+
func (c *VfioDeviceConfig) Validate() error {
52+
return nil
53+
}

api/nvidia.com/resource/v1beta1/zz_generated.deepcopy.go

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/gpu-kubelet-plugin/allocatable.go

Lines changed: 122 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2+
* Copyright (c) 2022-2025, NVIDIA CORPORATION. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -22,11 +22,10 @@ import (
2222
resourceapi "k8s.io/api/resource/v1"
2323
)
2424

25-
type AllocatableDevices map[string]*AllocatableDevice
26-
2725
type AllocatableDevice struct {
28-
Gpu *GpuInfo
29-
Mig *MigDeviceInfo
26+
Gpu *GpuInfo
27+
Mig *MigDeviceInfo
28+
Vfio *VfioDeviceInfo
3029
}
3130

3231
func (d AllocatableDevice) Type() string {
@@ -36,6 +35,9 @@ func (d AllocatableDevice) Type() string {
3635
if d.Mig != nil {
3736
return MigDeviceType
3837
}
38+
if d.Vfio != nil {
39+
return VfioDeviceType
40+
}
3941
return UnknownDeviceType
4042
}
4143

@@ -45,6 +47,8 @@ func (d *AllocatableDevice) CanonicalName() string {
4547
return d.Gpu.CanonicalName()
4648
case MigDeviceType:
4749
return d.Mig.CanonicalName()
50+
case VfioDeviceType:
51+
return d.Vfio.CanonicalName()
4852
}
4953
panic("unexpected type for AllocatableDevice")
5054
}
@@ -55,6 +59,8 @@ func (d *AllocatableDevice) GetDevice() resourceapi.Device {
5559
return d.Gpu.GetDevice()
5660
case MigDeviceType:
5761
return d.Mig.GetDevice()
62+
case VfioDeviceType:
63+
return d.Vfio.GetDevice()
5864
}
5965
panic("unexpected type for AllocatableDevice")
6066
}
@@ -66,9 +72,79 @@ func (d AllocatableDevice) UUID() string {
6672
if d.Mig != nil {
6773
return d.Mig.UUID
6874
}
75+
if d.Vfio != nil {
76+
return d.Vfio.UUID
77+
}
6978
panic("unexpected type for AllocatableDevice")
7079
}
7180

81+
type AllocatableDeviceList []*AllocatableDevice
82+
83+
type AllocatableDevices map[string]*AllocatableDevice
84+
85+
func (d AllocatableDevices) getDevicesByGPUPCIBusID(pcieBusID string) AllocatableDeviceList {
86+
var devices AllocatableDeviceList
87+
for _, device := range d {
88+
switch device.Type() {
89+
case GpuDeviceType:
90+
if device.Gpu.pcieBusID == pcieBusID {
91+
devices = append(devices, device)
92+
}
93+
case MigDeviceType:
94+
if device.Mig.parent.pcieBusID == pcieBusID {
95+
devices = append(devices, device)
96+
}
97+
case VfioDeviceType:
98+
if device.Vfio.pcieBusID == pcieBusID {
99+
devices = append(devices, device)
100+
}
101+
}
102+
}
103+
return devices
104+
}
105+
106+
func (d AllocatableDevices) GetGPUByPCIeBusID(pcieBusID string) *AllocatableDevice {
107+
for _, device := range d {
108+
if device.Type() != GpuDeviceType {
109+
continue
110+
}
111+
if device.Gpu.pcieBusID == pcieBusID {
112+
return device
113+
}
114+
}
115+
return nil
116+
}
117+
118+
func (d AllocatableDevices) GetGPUs() AllocatableDeviceList {
119+
var devices AllocatableDeviceList
120+
for _, device := range d {
121+
if device.Type() == GpuDeviceType {
122+
devices = append(devices, device)
123+
}
124+
}
125+
return devices
126+
}
127+
128+
func (d AllocatableDevices) GetMigDevices() AllocatableDeviceList {
129+
var devices AllocatableDeviceList
130+
for _, device := range d {
131+
if device.Type() == MigDeviceType {
132+
devices = append(devices, device)
133+
}
134+
}
135+
return devices
136+
}
137+
138+
func (d AllocatableDevices) GetVfioDevices() AllocatableDeviceList {
139+
var devices AllocatableDeviceList
140+
for _, device := range d {
141+
if device.Type() == VfioDeviceType {
142+
devices = append(devices, device)
143+
}
144+
}
145+
return devices
146+
}
147+
72148
func (d AllocatableDevices) GpuUUIDs() []string {
73149
var uuids []string
74150
for _, device := range d {
@@ -91,8 +167,49 @@ func (d AllocatableDevices) MigDeviceUUIDs() []string {
91167
return uuids
92168
}
93169

170+
func (d AllocatableDevices) VfioDeviceUUIDs() []string {
171+
var uuids []string
172+
for _, device := range d {
173+
if device.Type() == VfioDeviceType {
174+
uuids = append(uuids, device.Vfio.UUID)
175+
}
176+
}
177+
slices.Sort(uuids)
178+
return uuids
179+
}
180+
94181
func (d AllocatableDevices) UUIDs() []string {
95182
uuids := append(d.GpuUUIDs(), d.MigDeviceUUIDs()...)
183+
uuids = append(uuids, d.VfioDeviceUUIDs()...)
96184
slices.Sort(uuids)
97185
return uuids
98186
}
187+
188+
func (d AllocatableDevices) RemoveSiblingDevices(device *AllocatableDevice) {
189+
var pciBusID string
190+
switch device.Type() {
191+
case GpuDeviceType:
192+
pciBusID = device.Gpu.pcieBusID
193+
case VfioDeviceType:
194+
pciBusID = device.Vfio.pcieBusID
195+
case MigDeviceType:
196+
// TODO: Implement once dynamic MIG is supported.
197+
return
198+
}
199+
200+
siblings := d.getDevicesByGPUPCIBusID(pciBusID)
201+
for _, sibling := range siblings {
202+
if sibling.Type() == device.Type() {
203+
continue
204+
}
205+
switch sibling.Type() {
206+
case GpuDeviceType:
207+
delete(d, sibling.Gpu.CanonicalName())
208+
case VfioDeviceType:
209+
delete(d, sibling.Vfio.CanonicalName())
210+
case MigDeviceType:
211+
// TODO: Implement once dynamic MIG is supported.
212+
continue
213+
}
214+
}
215+
}

0 commit comments

Comments
 (0)