Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ echo 8 > /sys/class/net/ib0/device/sriov_numvfs
* `rdmaIsolation` (boolean, optional): Enable RDMA network namespace isolation for RDMA workloads. More information
about the system requirements to support this mode of operation can be found [here](https://github.com/Mellanox/rdma-cni)
* `ibKubernetesEnabled` (bool, optional): Enforces ib-sriov-cni to work with [ib-kubernetes](https://www.github.com/Mellanox/ib-kubernetes).
* `vfioPciMode` (boolean, optional): Enable VFIO mode for VF devices bound to vfio-pci driver. When enabled, the CNI skips network interface configuration as VFIO devices are used for direct device assignment (e.g., for kubevirt/VM workloads). Defaults to false. If not explicitly set, the mode is auto-detected based on the VF's driver binding.

> *__Note__*: If `rdmaIsolation` is set to _true_, [`rdma-cni`](https://github.com/Mellanox/rdma-cni) should not be used.

Expand Down
72 changes: 59 additions & 13 deletions cmd/ib-sriov-cni/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,30 @@ func unlockCNIExecution(lock *flock.Flock) {
_ = lock.Unlock()
}

func handleVfioPciDetection(netConf *localtypes.NetConf) error {
if netConf.DeviceID == "" {
return fmt.Errorf("device ID is required for VFIO PCI detection")
}

isVfioPci, err := utils.IsVfioPciDevice(netConf.DeviceID)
if err != nil {
return fmt.Errorf("failed to check vfio-pci driver binding for device %s: %v", netConf.DeviceID, err)
}

// If vfioPciMode is explicitly set to true, validate the device is actually bound to vfio-pci
if netConf.VfioPciMode {
if !isVfioPci {
return fmt.Errorf("vfioPciMode is enabled but device %s is not bound to vfio-pci driver", netConf.DeviceID)
}
} else {
// Auto-detect: if device is bound to vfio-pci, enable vfioPciMode
if isVfioPci {
netConf.VfioPciMode = true
}
}
return nil
}

// Get network config, updated with GUID, device info and network namespace.
func getNetConfNetns(args *skel.CmdArgs) (*localtypes.NetConf, ns.NetNS, error) {
netConf, err := config.LoadConf(args.StdinData)
Expand All @@ -116,13 +140,18 @@ func getNetConfNetns(args *skel.CmdArgs) (*localtypes.NetConf, ns.NetNS, error)
"infiniband SRIOV-CNI failed, Unexpected error. GUID must be provided by ib-kubernetes")
}

if netConf.RdmaIso {
if netConf.RdmaIsolation {
err = utils.EnsureRdmaSystemMode()
if err != nil {
return nil, nil, err
}
}

// Handle vfio-pci detection
if err := handleVfioPciDetection(netConf); err != nil {
return nil, nil, err
}

err = config.LoadDeviceInfo(netConf)
if err != nil {
return nil, nil, fmt.Errorf("failed to get device specific information. %v", err)
Expand All @@ -135,18 +164,23 @@ func getNetConfNetns(args *skel.CmdArgs) (*localtypes.NetConf, ns.NetNS, error)
return netConf, netns, nil
}

// Applies VF config and performs VF setup. if RdmaIso is configured, moves RDMA device into namespace
// Applies VF config and performs VF setup. if RdmaIsolation is configured, moves RDMA device into namespace
func doVFConfig(sm localtypes.Manager, netConf *localtypes.NetConf, netns ns.NetNS, args *skel.CmdArgs) (retErr error) {
err := sm.ApplyVFConfig(netConf)
if err != nil {
return fmt.Errorf("infiniBand SRI-OV CNI failed to configure VF %q", err)
}

// VFIO devices don't have network interfaces, skip SetupVF
if netConf.VfioPciMode {
return nil
}

// Note(adrianc): We do this here as ApplyVFCOnfig is rebinding the VF, causing the RDMA device to be recreated.
// We do this here due to some un-intuitive kernel behavior (which i hope will change), moving an RDMA device
// to namespace causes all of its associated ULP devices (IPoIB) to be recreated in the default namespace,
// hence SetupVF needs to occur after moving RDMA device to namespace
if netConf.RdmaIso {
if netConf.RdmaIsolation {
var rdmaDev string
rdmaDev, err = utils.MoveRdmaDevToNsPci(netConf.DeviceID, netns)
if err != nil {
Expand Down Expand Up @@ -244,7 +278,7 @@ func cmdAdd(args *skel.CmdArgs) (retErr error) {
if nsErr == nil {
_ = sm.ReleaseVF(netConf, args.IfName, args.ContainerID, netns)
}
if netConf.RdmaIso {
if netConf.RdmaIsolation {
_ = utils.MoveRdmaDevFromNs(netConf.RdmaNetState.ContainerRdmaDevName, netns)
}
}
Expand All @@ -256,7 +290,8 @@ func cmdAdd(args *skel.CmdArgs) (retErr error) {
Sandbox: netns.Path(),
}}

if netConf.IPAM.Type != "" {
// VFIO devices don't have network interfaces, skip IPAM configuration
if netConf.IPAM.Type != "" && !netConf.VfioPciMode {
var newResult *current.Result
newResult, err = runIPAMPlugin(args.StdinData, netConf)
if err != nil {
Expand Down Expand Up @@ -294,6 +329,17 @@ func cmdAdd(args *skel.CmdArgs) (retErr error) {
return types.PrintResult(result, netConf.CNIVersion)
}

func handleIPAMCleanup(netConf *localtypes.NetConf, stdinData []byte) error {
// VFIO devices don't use IPAM
if netConf.VfioPciMode {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried that, but it will trigger lint error for cyclomatic complexity.
cmd/ib-sriov-cni/main.go:326:1: cyclomatic complexity 16 of func cmdDel is high (> 15) (gocyclo)

return nil
}
if netConf.IPAM.Type == ipamDHCP {
return fmt.Errorf("ipam type dhcp is not supported")
}
return ipam.ExecDel(netConf.IPAM.Type, stdinData)
}

func cmdDel(args *skel.CmdArgs) (retErr error) {
// https://github.com/kubernetes/kubernetes/pull/35240
if args.Netns == "" {
Expand All @@ -320,10 +366,7 @@ func cmdDel(args *skel.CmdArgs) (retErr error) {
sm := sriov.NewSriovManager()

if netConf.IPAM.Type != "" {
if netConf.IPAM.Type == ipamDHCP {
return fmt.Errorf("ipam type dhcp is not supported")
}
err = ipam.ExecDel(netConf.IPAM.Type, args.StdinData)
err = handleIPAMCleanup(netConf, args.StdinData)
if err != nil {
return err
}
Expand Down Expand Up @@ -352,9 +395,12 @@ func cmdDel(args *skel.CmdArgs) (retErr error) {
}
defer unlockCNIExecution(lock)

err = sm.ReleaseVF(netConf, args.IfName, args.ContainerID, netns)
if err != nil {
return err
// VFIO devices don't have network interfaces to release
if !netConf.VfioPciMode {
err = sm.ReleaseVF(netConf, args.IfName, args.ContainerID, netns)
if err != nil {
return err
}
}

// Move RDMA device to default namespace
Expand All @@ -364,7 +410,7 @@ func cmdDel(args *skel.CmdArgs) (retErr error) {
// 1. netedv cleanup during ReleaseVF.
// 2. rdma dev netns cleanup as ResetVFConfig will rebind the VF.
// Doing anything would have yielded the same results however ResetVFConfig will eventually not trigger VF rebind.
if netConf.RdmaIso {
if netConf.RdmaIsolation {
err = utils.MoveRdmaDevFromNs(netConf.RdmaNetState.ContainerRdmaDevName, netns)
if err != nil {
return fmt.Errorf(
Expand Down
6 changes: 6 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ func LoadDeviceInfo(netConf *types.NetConf) error {
return fmt.Errorf("load config: vf pci addr is required")
}

// VFIO devices don't have network interfaces, skip getting interface name
if netConf.VfioPciMode {
netConf.HostIFNames = ""
return nil
}

// Get interface name
hostIFNames, err := utils.GetVFLinkNames(netConf.DeviceID)
if err != nil || hostIFNames == "" {
Expand Down
91 changes: 54 additions & 37 deletions pkg/sriov/sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,45 @@ func (s *sriovManager) ReleaseVF(conf *types.NetConf, podifName, cid string, net
})
}

// applyVFGuid handles VF GUID configuration and validation for both VFIO and regular VFs
func (s *sriovManager) applyVFGuid(conf *types.NetConf, pfLink netlink.Link) error {
if conf.GUID != "" {
if !utils.IsValidGUID(conf.GUID) {
return fmt.Errorf("invalid guid %s", conf.GUID)
}

// For VFIO VF devices, we can't read current GUID from VF interface
if conf.VfioPciMode {
// Save all-F GUID to reset to during deletion
conf.HostIFGUID = utils.DefaultGUID
} else {
// Regular VF: save current GUID from VF link
vfLink, err := s.nLink.LinkByName(conf.HostIFNames)
if err != nil {
return fmt.Errorf("failed to lookup vf %q: %v", conf.HostIFNames, err)
}
conf.HostIFGUID = vfLink.Attrs().HardwareAddr.String()[36:]
}

// Set link guid
if err := s.setVfGUID(conf, pfLink, conf.GUID); err != nil {
return err
}
} else if !conf.VfioPciMode {
// Verify VF have valid GUID (skip for VFIO as we can't access VF interface)
vfLink, err := s.nLink.LinkByName(conf.HostIFNames)
if err != nil {
return fmt.Errorf("failed to lookup vf %q: %v", conf.HostIFNames, err)
}

guid := utils.GetGUIDFromHwAddr(vfLink.Attrs().HardwareAddr)
if guid == "" || utils.IsAllZeroGUID(guid) || utils.IsAllOnesGUID(guid) {
return fmt.Errorf("VF %s GUID is not valid", conf.HostIFNames)
}
}
return nil
}

// ApplyVFConfig configure a VF with parameters given in NetConf
func (s *sriovManager) ApplyVFConfig(conf *types.NetConf) error {
pfLink, err := s.nLink.LinkByName(conf.Master)
Expand All @@ -248,37 +287,8 @@ func (s *sriovManager) ApplyVFConfig(conf *types.NetConf) error {
}
}

// Set link guid
if conf.GUID != "" {
if !utils.IsValidGUID(conf.GUID) {
return fmt.Errorf("invalid guid %s", conf.GUID)
}
// save link guid
vfLink, err := s.nLink.LinkByName(conf.HostIFNames)
if err != nil {
return fmt.Errorf("failed to lookup vf %q: %v", conf.HostIFNames, err)
}

conf.HostIFGUID = vfLink.Attrs().HardwareAddr.String()[36:]

// Set link guid
if err := s.setVfGUID(conf, pfLink, conf.GUID); err != nil {
return err
}
} else {
// Verify VF have valid GUID.
vfLink, err := s.nLink.LinkByName(conf.HostIFNames)
if err != nil {
return fmt.Errorf("failed to lookup vf %q: %v", conf.HostIFNames, err)
}

guid := utils.GetGUIDFromHwAddr(vfLink.Attrs().HardwareAddr)
if guid == "" || utils.IsAllZeroGUID(guid) || utils.IsAllOnesGUID(guid) {
return fmt.Errorf("VF %s GUID is not valid", conf.HostIFNames)
}
}

return nil
// Handle VF GUID configuration
return s.applyVFGuid(conf, pfLink)
}

// restoreVFName restores VF name from conf
Expand Down Expand Up @@ -329,15 +339,18 @@ func (s *sriovManager) ResetVFConfig(conf *types.NetConf) error {
// This happen when create a VF it guid is all zeros
if conf.HostIFGUID != "" {
if utils.IsAllZeroGUID(conf.HostIFGUID) {
conf.HostIFGUID = "FF:FF:FF:FF:FF:FF:FF:FF"
conf.HostIFGUID = utils.DefaultGUID
}

if err := s.setVfGUID(conf, pfLink, conf.HostIFGUID); err != nil {
return err
}
// setVfGUID cause VF to rebind, which change its name. Lets restore it.
// For VFIO devices, skip VF name restoration since no rebind occurs
// Once setVfGUID wouldn't do rebind to apply GUID this function should be removed
return s.restoreVFName(conf)
if !conf.VfioPciMode {
return s.restoreVFName(conf)
}
}

return nil
Expand All @@ -356,10 +369,14 @@ func (s *sriovManager) setVfGUID(conf *types.NetConf, pfLink netlink.Link, guidA
if err != nil {
return fmt.Errorf("failed to add port guid %s: %v", guid, err)
}
// unbind vf then bind it to apply the guid
err = s.utils.RebindVf(conf.Master, conf.DeviceID)
if err != nil {
return err
// For VFIO devices, skip rebind as the device is bound to vfio-pci driver
// and doesn't have a network interface that can be unbound/rebound
if !conf.VfioPciMode {
// unbind vf then bind it to apply the guid
err = s.utils.RebindVf(conf.Master, conf.DeviceID)
if err != nil {
return err
}
}
return nil
}
22 changes: 22 additions & 0 deletions pkg/sriov/sriov_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,28 @@ var _ = Describe("Sriov", func() {
Expect(err.Error()).To(Equal("mocked failed"))
Expect(netconf.HostIFGUID).To(Equal(hostGUID))
})
It("ApplyVFConfig with valid GUID and VfioPciMode VF (no network interface) - should return success after setting GUID", func() {
mockedNetLinkManger := &mocks.NetlinkManager{}
mockedPciUtils := &mocks.PciUtils{}

fakeLink := &FakeLink{netlink.LinkAttrs{}}
netconf.GUID = "01:23:45:67:89:ab:cd:ef"
netconf.VfioPciMode = true
netconf.HostIFNames = "" // VFIO VF has no network interface

// Only PF link is needed for VFIO VF
mockedNetLinkManger.On("LinkByName", netconf.Master).Return(fakeLink, nil)
mockedNetLinkManger.On("LinkSetVfNodeGUID", fakeLink, mock.AnythingOfType("int"), mock.Anything).Return(nil)
mockedNetLinkManger.On("LinkSetVfPortGUID", fakeLink, mock.AnythingOfType("int"), mock.Anything).Return(nil)

mockedPciUtils.On("RebindVf", mock.AnythingOfType("string"), mock.AnythingOfType("string")).Return(nil)

sm := sriovManager{nLink: mockedNetLinkManger, utils: mockedPciUtils}
err := sm.ApplyVFConfig(netconf)
Expect(err).NotTo(HaveOccurred())
// For VFIO VF, HostIFGUID should be set to all-F for reset during deletion
Expect(netconf.HostIFGUID).To(Equal("FF:FF:FF:FF:FF:FF:FF:FF"))
})
})
Context("Checking SetupVF function", func() {
var (
Expand Down
3 changes: 2 additions & 1 deletion pkg/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ type IbSriovNetConf struct {
GUID string `json:"-"` // Taken from either CNI_ARGS "guid" attribute or from RuntimeConfig
PKey string `json:"pkey"`
LinkState string `json:"link_state,omitempty"` // auto|enable|disable
RdmaIso bool `json:"rdmaIsolation,omitempty"`
RdmaIsolation bool `json:"rdmaIsolation,omitempty"`
IBKubernetesEnabled bool `json:"ibKubernetesEnabled,omitempty"`
VfioPciMode bool `json:"vfioPciMode,omitempty"` // Skip SR-IOV network setup, default false
RdmaNetState rdmatypes.RdmaNetState
RuntimeConfig RuntimeConf `json:"runtimeConfig,omitempty"`
Args struct {
Expand Down
2 changes: 1 addition & 1 deletion pkg/types/types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ var _ = Describe("Types", func() {
ContIFNames: "net1",
PKey: "0x8001",
LinkState: "enable",
RdmaIso: true,
RdmaIsolation: true,
IBKubernetesEnabled: false,
RdmaNetState: rdmatypes.RdmaNetState{
Version: "1.0",
Expand Down
Loading
Loading