Skip to content

Commit cca6323

Browse files
authored
Fix rshim drop mode driver binding (#286)
This commit resolves two issues where devices in drop mode incorrectly had PCIe drivers bound, violating the drop mode principle that such devices should remain unbound. Issue 1: Cross-device driver binding with vfio-pci driver. - Taking one rshim device out of drop mode caused ALL rshim devices to bind drivers, even those still in drop mode. Root cause: per-device vendor ID registration triggered kernel auto-binding for all matching devices system-wide, specifically for vfio-pci driver. Issue 2: Driver persistence with uio_pci_generic - Devices remained bound when returning to drop mode due to an outdated workaround for Linux kernels <4.18 that prevented UIO device unbinding. Key fixes: - Use driver_override for precise per-device control - Move device ID registration to one-time initialization - Remove obsolete UIO workaround The fix ensures drop mode devices remain truly unbound while providing robust per-device driver management for mixed drop/active scenarios. RM #4560999
1 parent d970a21 commit cca6323

File tree

1 file changed

+60
-26
lines changed

1 file changed

+60
-26
lines changed

src/rshim_pcie.c

Lines changed: 60 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -323,20 +323,14 @@ static void rshim_pcie_mmap_release(rshim_pcie_t *dev)
323323

324324
static void rshim_pcie_bind(rshim_pcie_t *dev, bool enable)
325325
{
326-
uint16_t device_id[] = {BLUEFIELD1_DEVICE_ID, BLUEFIELD2_DEVICE_ID,
327-
BLUEFIELD3_DEVICE_ID, BLUEFIELD3_DEVICE_ID2};
328326
char cmd[RSHIM_CMD_MAX];
329-
int i, rc;
327+
const char *driver_name = NULL;
330328

331-
/*
332-
* Linux kernel prior 4.18 has a bug which could cause crash when uio is
333-
* unregistered (see commit 57c5f4df0a5a uio: fix crash after the device
334-
* is unregistered). Below is a workaround to avoid such crash for uio.
335-
* The rshim probing order is vfio->uio->direct. The uio unbind won't
336-
* affect the operation of direct mapping.
337-
*/
338-
if (!enable && (dev->mmap_mode == RSHIM_PCIE_MMAP_UIO))
339-
return;
329+
if (dev->mmap_mode == RSHIM_PCIE_MMAP_VFIO) {
330+
driver_name = "vfio-pci";
331+
} else if (dev->mmap_mode == RSHIM_PCIE_MMAP_UIO) {
332+
driver_name = "uio_pci_generic";
333+
}
340334

341335
if (dev->mmap_mode == RSHIM_PCIE_MMAP_VFIO ||
342336
dev->mmap_mode == RSHIM_PCIE_MMAP_UIO) {
@@ -347,26 +341,29 @@ static void rshim_pcie_bind(rshim_pcie_t *dev, bool enable)
347341
dev->pci_path);
348342
if (system(cmd) == -1)
349343
RSHIM_DBG("Failed to unbind device\n");
350-
} else if (dev->mmap_mode == RSHIM_PCIE_MMAP_VFIO) {
351-
/* Clear driver_override. */
344+
352345
snprintf(cmd, sizeof(cmd),
353346
"echo \"\" > %s/%04x:%02x:%02x.%1u/driver_override 2>/dev/null",
354347
SYS_BUS_PCI_PATH, dev->domain, dev->bus,
355348
dev->dev, dev->func);
356349
if (system(cmd) == -1)
357-
RSHIM_DBG("Failed to enable pcie\n");
358-
}
350+
RSHIM_DBG("Failed to clear driver_override\n");
351+
} else {
352+
snprintf(cmd, sizeof(cmd),
353+
"echo %s > %s/%04x:%02x:%02x.%1u/driver_override 2>/dev/null",
354+
driver_name,
355+
SYS_BUS_PCI_PATH, dev->domain, dev->bus,
356+
dev->dev, dev->func);
357+
if (system(cmd) == -1)
358+
RSHIM_DBG("Failed to set driver_override\n");
359359

360-
for (i = 0; i < sizeof(device_id) / sizeof(uint16_t); i++) {
361-
snprintf(cmd, sizeof(cmd), "echo '%x %x' > %s/%s 2>/dev/null",
362-
TILERA_VENDOR_ID, device_id[i], dev->pci_path,
363-
enable ? "new_id" : "remove_id");
364-
rc = system(cmd);
365-
if (rc == -1)
366-
RSHIM_DBG("Failed to write device id %m\n");
367-
}
360+
/* Unbind if bound to a diffrent driver. Won't hurt if it's not bound. */
361+
snprintf(cmd, sizeof(cmd),
362+
"echo %04x:%02x:%02x.%1u > %s/unbind 2>/dev/null",
363+
dev->domain, dev->bus, dev->dev, dev->func,
364+
dev->pci_path);
365+
system(cmd); /* Ignore errors - device may already be unbound */
368366

369-
if (enable) {
370367
snprintf(cmd, sizeof(cmd),
371368
"echo %04x:%02x:%02x.%1u > %s/bind 2>/dev/null",
372369
dev->domain, dev->bus, dev->dev, dev->func,
@@ -395,7 +392,7 @@ static void rshim_pcie_bind(rshim_pcie_t *dev, bool enable)
395392
SYS_BUS_PCI_PATH, dev->domain, dev->bus,
396393
dev->dev, dev->func);
397394
if (system(cmd) == -1)
398-
RSHIM_DBG("Failed to enable pcie\n");
395+
RSHIM_DBG("Failed to set driver_override\n");
399396
}
400397
}
401398

@@ -1103,6 +1100,24 @@ static void rshim_pcie_delete(rshim_backend_t *bd)
11031100
free(dev);
11041101
}
11051102

1103+
static void rshim_pcie_register_device_ids(const char *driver_path)
1104+
{
1105+
uint16_t device_id[] = {BLUEFIELD1_DEVICE_ID, BLUEFIELD2_DEVICE_ID,
1106+
BLUEFIELD3_DEVICE_ID, BLUEFIELD3_DEVICE_ID2};
1107+
char cmd[RSHIM_CMD_MAX];
1108+
int i, rc;
1109+
1110+
/* Register all BF vendor IDs with the driver */
1111+
for (i = 0; i < sizeof(device_id) / sizeof(uint16_t); i++) {
1112+
snprintf(cmd, sizeof(cmd), "echo '%x %x' > %s/new_id 2>/dev/null",
1113+
TILERA_VENDOR_ID, device_id[i], driver_path);
1114+
rc = system(cmd);
1115+
if (rc == -1)
1116+
RSHIM_DBG("Failed to write device id %04x to %s\n", device_id[i],
1117+
driver_path);
1118+
}
1119+
}
1120+
11061121
/* Enable RSHIM PF and setup memory map. */
11071122
static int rshim_pcie_enable(rshim_backend_t *bd, bool enable)
11081123
{
@@ -1141,6 +1156,7 @@ static int rshim_pcie_enable(rshim_backend_t *bd, bool enable)
11411156
rshim_pcie_bind(dev, false);
11421157
dev->pci_path = SYS_UIO_PCI_PATH;
11431158
dev->mmap_mode = RSHIM_PCIE_MMAP_UIO;
1159+
rshim_pcie_register_device_ids(SYS_UIO_PCI_PATH);
11441160
rshim_pcie_bind(dev, true);
11451161
rc = rshim_pcie_mmap(dev, true);
11461162
}
@@ -1446,6 +1462,7 @@ int rshim_pcie_init(void)
14461462
if (rshim_pcie_has_vfio()) {
14471463
rshim_pcie_mmap_mode = RSHIM_PCIE_MMAP_VFIO;
14481464
rshim_sys_pci_path = SYS_VFIO_PCI_PATH;
1465+
rshim_pcie_register_device_ids(SYS_VFIO_PCI_PATH);
14491466
} else {
14501467
/* Linux kernel lock_down requires VFIO. */
14511468
if (kernel_lock_down_enabled()) {
@@ -1456,6 +1473,7 @@ int rshim_pcie_init(void)
14561473
if (rshim_pcie_has_uio()) {
14571474
rshim_pcie_mmap_mode = RSHIM_PCIE_MMAP_UIO;
14581475
rshim_sys_pci_path = SYS_UIO_PCI_PATH;
1476+
rshim_pcie_register_device_ids(SYS_UIO_PCI_PATH);
14591477
}
14601478
}
14611479

@@ -1479,6 +1497,22 @@ int rshim_pcie_init(void)
14791497
!rshim_is_bluefield3(dev->device_id)))
14801498
continue;
14811499

1500+
/*
1501+
* If DROP mode is set and this device got auto-bound during ID
1502+
* registration, unbind it now.
1503+
*/
1504+
if (rshim_drop_mode > 0 && rshim_sys_pci_path) {
1505+
char pci_addr[32];
1506+
char cmd[RSHIM_CMD_MAX];
1507+
snprintf(pci_addr, sizeof(pci_addr), "%04x:%02x:%02x.%x",
1508+
dev->domain, dev->bus, dev->dev, dev->func);
1509+
snprintf(cmd, sizeof(cmd),
1510+
"[ -e /sys/bus/pci/devices/%s/driver ] && "
1511+
"echo %s > /sys/bus/pci/devices/%s/driver/unbind 2>/dev/null",
1512+
pci_addr, pci_addr, pci_addr);
1513+
system(cmd);
1514+
}
1515+
14821516
rc = rshim_pcie_probe(dev);
14831517
if (rc)
14841518
continue;

0 commit comments

Comments
 (0)