Skip to content

Commit 47a107c

Browse files
committed
fix: kubelet --root-dir can be used
Signed-off-by: niuli33 <[email protected]>
1 parent be484a8 commit 47a107c

File tree

6 files changed

+42
-13
lines changed

6 files changed

+42
-13
lines changed

cmd/k8s-rdma-shared-dp/main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ func main() {
7575
log.Fatalf("Exiting.. one or more invalid configuration(s) given: %v", err)
7676
}
7777

78+
if err := rm.SetWatchMode(); err != nil {
79+
log.Fatalln(err.Error())
80+
}
81+
7882
if err := rm.ValidateRdmaSystemMode(); err != nil {
7983
log.Fatalf("Exiting.. can not change : %v", err)
8084
}

deployment/k8s/base/configmap.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ metadata:
66
data:
77
config.json: |
88
{
9+
"kubeletRootDir": "/var/lib/kubelet",
910
"periodicUpdateInterval": 300,
1011
"configList": [{
1112
"resourceName": "hca_shared_devices_a",

deployment/k8s/base/daemonset.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ spec:
2121
securityContext:
2222
privileged: true
2323
volumeMounts:
24+
# 如果configmap.yaml中指定了kubeletRootDir,则需要调整为相同的路径
25+
# 如:kubeletRootDir=/data/kubelet,则mountPath=/data/kubelet/device-plugins
2426
- name: device-plugin
2527
mountPath: /var/lib/kubelet/device-plugins
2628
readOnly: false

pkg/resources/resources_manager.go

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"fmt"
2424
"log"
2525
"os"
26+
"path"
2627
"regexp"
2728
"strconv"
2829
"time"
@@ -52,6 +53,9 @@ const (
5253

5354
// RDMA subsystem network namespace mode
5455
rdmaExclusive = "exclusive"
56+
57+
// Default kubelet root dir
58+
defaultKubeletRootDir = "/var/lib/kubelet"
5559
)
5660

5761
var (
@@ -72,20 +76,14 @@ type resourceManager struct {
7276
rds types.RdmaDeviceSpec
7377
PeriodicUpdateInterval time.Duration
7478
useCdi bool
79+
kubeletRootDir string
7580
}
7681

7782
func NewResourceManager(configFile string, useCdi bool) types.ResourceManager {
78-
watcherMode := detectPluginWatchMode(activeSockDir)
79-
if watcherMode {
80-
fmt.Println("Using Kubelet Plugin Registry Mode")
81-
} else {
82-
fmt.Println("Using Deprecated Devie Plugin Registry Path")
83-
}
8483
return &resourceManager{
8584
configFile: configFile,
8685
defaultResourcePrefix: rdmaHcaResourcePrefix,
8786
socketSuffix: socketSuffix,
88-
watchMode: watcherMode,
8987
netlinkManager: &netlinkManager{},
9088
rds: NewRdmaDeviceSpec(requiredRdmaDevices),
9189
useCdi: useCdi,
@@ -107,6 +105,12 @@ func (rm *resourceManager) ReadConfig() error {
107105

108106
log.Printf("loaded config: %+v \n", config.ConfigList)
109107

108+
// check kubelet root dir config
109+
rm.kubeletRootDir = defaultKubeletRootDir
110+
if config.KubeletRootDir != "" {
111+
rm.kubeletRootDir = config.KubeletRootDir
112+
}
113+
110114
// if periodic update is not set then use the default value
111115
if config.PeriodicUpdateInterval == nil {
112116
log.Println("no periodic update interval is set, use default interval 60 seconds")
@@ -228,7 +232,7 @@ func (rm *resourceManager) InitServers() error {
228232
return err
229233
}
230234
}
231-
rs, err := newResourceServer(config, filteredDevices, rm.watchMode, rm.socketSuffix, rm.useCdi)
235+
rs, err := newResourceServer(config, filteredDevices, rm.watchMode, rm.socketSuffix, rm.useCdi, rm.kubeletRootDir)
232236
if err != nil {
233237
return err
234238
}
@@ -418,3 +422,16 @@ func (rm *resourceManager) PeriodicUpdate() func() {
418422
}
419423
}
420424
}
425+
426+
func (rm *resourceManager) SetWatchMode() error {
427+
sockPathDir := path.Join(rm.kubeletRootDir, "plugins_registry")
428+
watcherMode := detectPluginWatchMode(sockPathDir)
429+
if watcherMode {
430+
fmt.Println("Using Kubelet Plugin Registry Mode")
431+
} else {
432+
fmt.Println("Using Deprecated Devie Plugin Registry Path")
433+
}
434+
rm.watchMode = watcherMode
435+
436+
return nil
437+
}

pkg/resources/server.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"log"
2424
"net"
2525
"os"
26+
"path"
2627
"path/filepath"
2728
"strconv"
2829
"sync"
@@ -68,6 +69,7 @@ type resourceServer struct {
6869
useCdi bool
6970
cdi cdi.CDI
7071
cdiResourceName string
72+
kubeletRootDir string
7173
}
7274

7375
func (rsc *resourcesServerPort) GetServer() *grpc.Server {
@@ -123,10 +125,10 @@ func (rsc *resourcesServerPort) Dial(unixSocketPath string, timeout time.Duratio
123125

124126
// newResourceServer returns an initialized server
125127
func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, watcherMode bool,
126-
socketSuffix string, useCdi bool) (types.ResourceServer, error) {
128+
socketSuffix string, useCdi bool, kubeletRootDir string) (types.ResourceServer, error) {
127129
var devs []*pluginapi.Device
128130

129-
sockDir := activeSockDir
131+
sockDir := path.Join(kubeletRootDir, "plugins_registry")
130132

131133
if config.RdmaHcaMax < 0 {
132134
return nil, fmt.Errorf("error: Invalid value for rdmaHcaMax < 0: %d", config.RdmaHcaMax)
@@ -151,7 +153,7 @@ func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, w
151153
}
152154

153155
if !watcherMode {
154-
sockDir = deprecatedSockDir
156+
sockDir = path.Join(kubeletRootDir, "device-plugins")
155157
}
156158

157159
socketName := fmt.Sprintf("%s.%s", config.ResourceName, socketSuffix)
@@ -172,6 +174,7 @@ func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, w
172174
useCdi: useCdi,
173175
cdi: cdi.New(),
174176
cdiResourceName: config.ResourceName,
177+
kubeletRootDir: kubeletRootDir,
175178
}, nil
176179
}
177180

@@ -277,7 +280,7 @@ func (rs *resourceServer) Watch() {
277280

278281
// Register registers the device plugin for the given resourceName with Kubelet.
279282
func (rs *resourceServer) register() error {
280-
kubeletEndpoint := filepath.Join(deprecatedSockDir, kubeEndPoint)
283+
kubeletEndpoint := filepath.Join(rs.kubeletRootDir, "device-plugins", kubeEndPoint)
281284
conn, err := rs.rsConnector.Dial(kubeletEndpoint, cDialTimeout)
282285
if err != nil {
283286
return err
@@ -424,7 +427,7 @@ func (rs *resourceServer) GetInfo(ctx context.Context, rqt *registerapi.InfoRequ
424427
pluginInfoResponse := &registerapi.PluginInfo{
425428
Type: registerapi.DevicePlugin,
426429
Name: rs.resourceName,
427-
Endpoint: filepath.Join(activeSockDir, rs.socketName),
430+
Endpoint: filepath.Join(rs.kubeletRootDir, "plugins_registry", rs.socketName),
428431
SupportedVersions: []string{"v1alpha1", "v1beta1"},
429432
}
430433
return pluginInfoResponse, nil

pkg/types/types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ type UserConfig struct {
4848

4949
// UserConfigList config list for servers
5050
type UserConfigList struct {
51+
KubeletRootDir string `json:"kubeletRootDir"`
5152
PeriodicUpdateInterval *int `json:"periodicUpdateInterval"`
5253
ConfigList []UserConfig `json:"configList"`
5354
}
@@ -75,6 +76,7 @@ type ResourceManager interface {
7576
RestartAllServers() error
7677
GetFilteredDevices(devices []PciNetDevice, selector *Selectors) []PciNetDevice
7778
PeriodicUpdate() func()
79+
SetWatchMode() error
7880
}
7981

8082
// ResourceServerPort to connect the resources server to k8s

0 commit comments

Comments
 (0)