Skip to content

Commit 2a350bd

Browse files
authored
Merge pull request #56 from almaslennikov/resource-prefix
Extend resource pool configuration with resource prefix option
2 parents 47a7a5d + eea08e4 commit 2a350bd

File tree

7 files changed

+92
-71
lines changed

7 files changed

+92
-71
lines changed

README.md

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,22 @@
44
[![Coverage Status](https://coveralls.io/repos/github/Mellanox/k8s-rdma-shared-dev-plugin/badge.svg)](https://coveralls.io/github/Mellanox/k8s-rdma-shared-dev-plugin)
55

66
# k8s-rdma-shared-dev-plugin
7+
78
(https://hub.docker.com/r/mellanox/k8s-rdma-shared-dev-plugin)
89

9-
This is simple rdma device plugin that support IB and RoCE HCA.
10-
This plugin runs as daemonset.
11-
Its container image is available at mellanox/k8s-rdma-shared-dev-plugin.
10+
This is simple rdma device plugin that support IB and RoCE HCA. This plugin runs as daemonset. Its container image is
11+
available at mellanox/k8s-rdma-shared-dev-plugin.
1212

1313
# How to use device plugin
1414

1515
**1.** Use CNI plugin such as Contiv, Calico, Cluster
1616

17-
Make sure to configure ib0 or appropriate IPoIB netdevice as the parent netdevice for creating overlay/virtual netdevices.
17+
Make sure to configure ib0 or appropriate IPoIB netdevice as the parent netdevice for creating overlay/virtual
18+
netdevices.
1819

1920
**2.** Create ConfigMap
2021

21-
Create config map to describe mode as "hca" mode.
22-
This is per node configuration.
22+
Create config map to describe mode as "hca" mode. This is per node configuration.
2323

2424
```
2525
kubectl create -f images/k8s-rdma-shared-dev-plugin-config-map.yaml
@@ -34,6 +34,7 @@ kubectl create -f images/k8s-rdma-shared-dev-plugin-ds.yaml
3434
**4.** Create Test pod
3535

3636
Create test pod which requests 1 vhca resource.
37+
3738
```
3839
kubectl create -f example/test-hca-pod.yaml
3940
```
@@ -75,16 +76,18 @@ kubectl create -f <rdma-app.yaml>
7576
```
7677

7778
# RDMA Shared Device Plugin Configurations
79+
7880
The plugin has several configuration fields, this section explains each field usage
7981

8082
```json
8183
{
8284
"periodicUpdateInterval": 300,
8385
"configList": [{
84-
"resourceName": "hca_shared_devices_a",
85-
"rdmaHcaMax": 1000,
86-
"devices": ["ib0", "ib1"]
87-
},
86+
"resourceName": "hca_shared_devices_a",
87+
"resourcePrefix": "example_prefix",
88+
"rdmaHcaMax": 1000,
89+
"devices": ["ib0", "ib1"]
90+
},
8891
{
8992
"resourceName": "hca_shared_devices_b",
9093
"rdmaHcaMax": 500,
@@ -98,26 +101,27 @@ The plugin has several configuration fields, this section explains each field us
98101
}
99102
```
100103

101-
`periodicUpdateInterval` is the time interval in seconds to update the resources according to host devices in case of changes.
102-
Notes:
103-
- if `periodicUpdateInterval` is 0 then periodic update for host devices will be disabled.
104-
- if `periodicUpdateInterval` is not set then default periodic update interval of 60 seconds will be used.
104+
`periodicUpdateInterval` is the time interval in seconds to update the resources according to host devices in case of
105+
changes. Notes:
105106

106-
`"configList"` should contain a list of config objects. Each config object may consist of following fields:
107+
- if `periodicUpdateInterval` is 0 then periodic update for host devices will be disabled.
108+
- if `periodicUpdateInterval` is not set then default periodic update interval of 60 seconds will be used.
107109

110+
`"configList"` should contain a list of config objects. Each config object may consist of following fields:
108111

109-
| Field | Required | Description | Type | Example |
110-
|----------------|----------|------------------------------------------------------------------------------------------------------------------------------------|------------------|---------------------------------------------------------|
111-
| "resourceName" | Y | Endpoint resource name. Should not contain special characters, must be unique in the scope of the resource prefix | string | "hca_shared_devices_a" |
112-
| "rdmaHcaMax" | Y | Maximum number of RDMA resources that can be provided by the device plugin resource | Integer | 1000 |
113-
| "selectors" | N | A map of device selectors for filtering the devices. refer to [Device Selectors](#devices-selectors) section for more information | json object | selectors": {"vendors": ["15b3"],"deviceIDs": ["1017"]} |
114-
| "devices" | N | A list of devices names to be selected, same as "ifNames" selector | `string` list | ["ib0", "ib1"] |
112+
| Field | Required | Description | Type | Default value | Example |
113+
|------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------|------------------|---------------|---------------------------------------------------------|
114+
| "resourceName" | Y | Endpoint resource name. Should not contain special characters, must be unique in the scope of the resource prefix | string | - | "hca_shared_devices_a" |
115+
| "resourcePrefix" | N | Endpoint resource prefix. Should not contain special characters | string | "rdma" | "example_prefix" |
116+
| "rdmaHcaMax" | Y | Maximum number of RDMA resources that can be provided by the device plugin resource | Integer | - | 1000 |
117+
| "selectors" | N | A map of device selectors for filtering the devices. refer to [Device Selectors](#devices-selectors) section for more information | json object | - | selectors": {"vendors": ["15b3"],"deviceIDs": ["1017"]} |
118+
| "devices" | N | A list of devices names to be selected, same as "ifNames" selector | `string` list | - | ["ib0", "ib1"] |
115119

116120
Note: Either `selectors` or `devices` must be specified for a given resource, "selectors" is recommended.
117121

118122
## Devices Selectors
119-
The following selectors are used for filtering the desired devices.
120123

124+
The following selectors are used for filtering the desired devices.
121125

122126
| Field | Description | Type | Example |
123127
|-------------|----------------------------------------------------------------|---------------|--------------------------|
@@ -130,7 +134,10 @@ The following selectors are used for filtering the desired devices.
130134
[//]: # (The tables above generated using: https://ozh.github.io/ascii-tables/)
131135

132136
## Selectors Matching Process
133-
The device plugin filters the host devices based on the provided selectors, if there are any missing selectors, the device plugin ignores them. Device plugin performs logical OR between elements of a specific selector and logical AND is performed between selectors.
137+
138+
The device plugin filters the host devices based on the provided selectors, if there are any missing selectors, the
139+
device plugin ignores them. Device plugin performs logical OR between elements of a specific selector and logical AND is
140+
performed between selectors.
134141

135142
# RDMA shared device plugin deployment with node labels
136143

@@ -139,7 +146,8 @@ RDMA shared device plugin should be deployed on nodes that:
139146
1. Have RDMA capable hardware
140147
2. RDMA kernel stack is loaded
141148

142-
To allow proper node selection [Node Feature Discovery (NFD)](https://github.com/kubernetes-sigs/node-feature-discovery) can be used to discover the node capabilities, and expose them as node labels.
149+
To allow proper node selection [Node Feature Discovery (NFD)](https://github.com/kubernetes-sigs/node-feature-discovery)
150+
can be used to discover the node capabilities, and expose them as node labels.
143151

144152
1. Deploy NFD, release `v0.6.0` or new newer
145153

@@ -150,15 +158,18 @@ To allow proper node selection [Node Feature Discovery (NFD)](https://github.com
150158
```
151159

152160
2. Check the new labels added to the node
161+
153162
```
154163
# kubectl get nodes --show-labels
155164
```
156165

157-
RDMA device plugin can then be deployed on nodes with `feature.node.kubernetes.io/custom-rdma.available=true`, which indicates that the node is RDMA capable and RDMA modules are loaded.
166+
RDMA device plugin can then be deployed on nodes with `feature.node.kubernetes.io/custom-rdma.available=true`, which
167+
indicates that the node is RDMA capable and RDMA modules are loaded.
158168

159169
# Docker image
160-
RDMA shared device plugin uses `alpine` base image by default. To build RDMA shared device plugin with
161-
another base image you need to pass `BASE_IMAGE` argument:
170+
171+
RDMA shared device plugin uses `alpine` base image by default. To build RDMA shared device plugin with another base
172+
image you need to pass `BASE_IMAGE` argument:
162173

163174
```
164175
docker build -t k8s-rdma-shared-dev-plugin \

go.sum

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7w
137137
golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
138138
golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
139139
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
140-
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
141140
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
142141
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
143142
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=

pkg/resources/resources_manager.go

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ var (
4343
// resourceManager for plugin
4444
type resourceManager struct {
4545
configFile string
46-
resourcePrefix string
46+
defaultResourcePrefix string
4747
socketSuffix string
4848
watchMode bool
4949
configList []*types.UserConfig
@@ -62,12 +62,12 @@ func NewResourceManager() types.ResourceManager {
6262
fmt.Println("Using Deprecated Devie Plugin Registry Path")
6363
}
6464
return &resourceManager{
65-
configFile: configFilePath,
66-
resourcePrefix: rdmaHcaResourcePrefix,
67-
socketSuffix: socketSuffix,
68-
watchMode: watcherMode,
69-
netlinkManager: &netlinkManager{},
70-
rds: NewRdmaDeviceSpec(requiredRdmaDevices),
65+
configFile: configFilePath,
66+
defaultResourcePrefix: rdmaHcaResourcePrefix,
67+
socketSuffix: socketSuffix,
68+
watchMode: watcherMode,
69+
netlinkManager: &netlinkManager{},
70+
rds: NewRdmaDeviceSpec(requiredRdmaDevices),
7171
}
7272
}
7373

@@ -120,7 +120,7 @@ func (rm *resourceManager) ValidateConfigs() error {
120120

121121
for _, conf := range rm.configList {
122122
// check if name contains acceptable characters
123-
if !validResourceName(conf.ResourceName) {
123+
if !validResourceNameOrPrefix(conf.ResourceName) {
124124
return fmt.Errorf("error: resource name \"%s\" contains invalid characters", conf.ResourceName)
125125
}
126126
// check resource names are unique
@@ -129,6 +129,12 @@ func (rm *resourceManager) ValidateConfigs() error {
129129
// resource name already exist
130130
return fmt.Errorf("error: resource name \"%s\" already exists", conf.ResourceName)
131131
}
132+
// If prefix is not configured - use the default one. Otherwise validate if it contains acceptable characters
133+
if conf.ResourcePrefix == "" {
134+
conf.ResourcePrefix = rm.defaultResourcePrefix
135+
} else if !validResourceNameOrPrefix(conf.ResourcePrefix) {
136+
return fmt.Errorf("error: resource prefix \"%s\" contains invalid characters", conf.ResourcePrefix)
137+
}
132138

133139
if conf.RdmaHcaMax < 0 {
134140
return fmt.Errorf("error: Invalid value for rdmaHcaMax < 0: %d", conf.RdmaHcaMax)
@@ -197,7 +203,7 @@ func (rm *resourceManager) InitServers() error {
197203
log.Printf("Warning: no devices in device pool, creating empty resource server for %s", config.ResourceName)
198204
}
199205

200-
rs, err := newResourceServer(config, filteredDevices, rm.watchMode, rm.resourcePrefix, rm.socketSuffix)
206+
rs, err := newResourceServer(config, filteredDevices, rm.watchMode, rm.socketSuffix)
201207
if err != nil {
202208
return err
203209
}
@@ -241,7 +247,7 @@ func (rm *resourceManager) RestartAllServers() error {
241247
return nil
242248
}
243249

244-
func validResourceName(name string) bool {
250+
func validResourceNameOrPrefix(name string) bool {
245251
// name regex
246252
var validString = regexp.MustCompile(`^[a-zA-Z0-9_]+$`)
247253
return validString.MatchString(name)

pkg/resources/resources_manager_test.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -414,9 +414,10 @@ var _ = Describe("ResourcesManger", func() {
414414
rm := &resourceManager{}
415415

416416
configlist = append(configlist, &types.UserConfig{
417-
ResourceName: "test_config",
418-
RdmaHcaMax: 100,
419-
Devices: []string{"ib0"}})
417+
ResourceName: "test_config",
418+
ResourcePrefix: "test_prefix",
419+
RdmaHcaMax: 100,
420+
Devices: []string{"ib0"}})
420421

421422
rm.configList = configlist
422423
err := rm.InitServers()

pkg/resources/server.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ func (rsc *resourcesServerPort) Dial(unixSocketPath string, timeout time.Duratio
106106
}
107107

108108
// newResourceServer returns an initialized server
109-
func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, watcherMode bool, resourcePrefix,
109+
func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, watcherMode bool,
110110
socketSuffix string) (types.ResourceServer, error) {
111111
var devs []*pluginapi.Device
112112

@@ -115,6 +115,9 @@ func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, w
115115
if config.RdmaHcaMax < 0 {
116116
return nil, fmt.Errorf("error: Invalid value for rdmaHcaMax < 0: %d", config.RdmaHcaMax)
117117
}
118+
if config.ResourcePrefix == "" {
119+
return nil, fmt.Errorf("error: Empty resourcePrefix")
120+
}
118121

119122
deviceSpec := getDevicesSpec(devices)
120123

@@ -138,7 +141,7 @@ func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, w
138141
socketName := fmt.Sprintf("%s.%s", config.ResourceName, socketSuffix)
139142

140143
return &resourceServer{
141-
resourceName: fmt.Sprintf("%s/%s", resourcePrefix, config.ResourceName),
144+
resourceName: fmt.Sprintf("%s/%s", config.ResourcePrefix, config.ResourceName),
142145
socketName: socketName,
143146
socketPath: filepath.Join(sockDir, socketName),
144147
watchMode: watcherMode,

0 commit comments

Comments
 (0)