Skip to content

Commit c0046b4

Browse files
authored
Merge pull request NVIDIA#464 from guptaNswati/kubelet-dir-flags
make kubelet plugin dir configurable
2 parents 577c844 + ba49202 commit c0046b4

File tree

10 files changed

+115
-45
lines changed

10 files changed

+115
-45
lines changed

cmd/compute-domain-kubelet-plugin/computedomain.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ const (
4343
informerResyncPeriod = 10 * time.Minute
4444
cleanupInterval = 10 * time.Minute
4545

46-
ComputeDomainDaemonSettingsRoot = DriverPluginPath + "/domains"
46+
ComputeDomainDaemonConfigFilesDirName = "domains"
4747
ComputeDomainDaemonConfigTemplatePath = "/templates/compute-domain-daemon-config.tmpl.cfg"
4848
)
4949

@@ -67,9 +67,10 @@ type ComputeDomainDaemonSettings struct {
6767
nodesConfigPath string
6868
}
6969

70-
func NewComputeDomainManager(config *Config, configFilesRoot, cliqueID string) *ComputeDomainManager {
70+
func NewComputeDomainManager(config *Config, cliqueID string) *ComputeDomainManager {
7171
factory := nvinformers.NewSharedInformerFactory(config.clientsets.Nvidia, informerResyncPeriod)
7272
informer := factory.Resource().V1beta1().ComputeDomains().Informer()
73+
configFilesRoot := filepath.Join(config.DriverPluginPath(), ComputeDomainDaemonConfigFilesDirName)
7374

7475
m := &ComputeDomainManager{
7576
config: config,

cmd/compute-domain-kubelet-plugin/device_state.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,13 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
9090
return nil, fmt.Errorf("error getting cliqueID: %w", err)
9191
}
9292

93-
computeDomainManager := NewComputeDomainManager(config, ComputeDomainDaemonSettingsRoot, cliqueID)
93+
computeDomainManager := NewComputeDomainManager(config, cliqueID)
9494

9595
if err := cdi.CreateStandardDeviceSpecFile(allocatable); err != nil {
9696
return nil, fmt.Errorf("unable to create base CDI spec file: %v", err)
9797
}
9898

99-
checkpointManager, err := checkpointmanager.NewCheckpointManager(DriverPluginPath)
99+
checkpointManager, err := checkpointmanager.NewCheckpointManager(config.DriverPluginPath())
100100
if err != nil {
101101
return nil, fmt.Errorf("unable to create checkpoint manager: %v", err)
102102
}

cmd/compute-domain-kubelet-plugin/driver.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"errors"
2222
"fmt"
23+
"path/filepath"
2324
"sync"
2425
"time"
2526

@@ -45,7 +46,7 @@ const (
4546
// DriverPrepUprepFlockPath is the path to a lock file used to make sure
4647
// that calls to nodePrepareResource() / nodeUnprepareResource() never
4748
// interleave, node-globally.
48-
DriverPrepUprepFlockPath = DriverPluginPath + "/pu.lock"
49+
DriverPrepUprepFlockFileName = "pu.lock"
4950
)
5051

5152
// permanentError defines an error indicating that it is permanent.
@@ -70,10 +71,12 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
7071
return nil, err
7172
}
7273

74+
puLockPath := filepath.Join(config.DriverPluginPath(), DriverPrepUprepFlockFileName)
75+
7376
driver := &driver{
7477
client: config.clientsets.Core,
7578
state: state,
76-
pulock: flock.NewFlock(DriverPrepUprepFlockPath),
79+
pulock: flock.NewFlock(puLockPath),
7780
}
7881

7982
helper, err := kubeletplugin.Start(
@@ -89,6 +92,8 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
8992
// prepare() must be incoming). Concurrency management for incoming
9093
// requests is done with this driver's work queue abstraction.
9194
kubeletplugin.Serialize(false),
95+
kubeletplugin.RegistrarDirectoryPath(config.flags.kubeletRegistrarDirectoryPath),
96+
kubeletplugin.PluginDataDirectoryPath(config.DriverPluginPath()),
9297
)
9398
if err != nil {
9499
return nil, err

cmd/compute-domain-kubelet-plugin/main.go

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"github.com/urfave/cli/v2"
2828

29+
"k8s.io/dynamic-resource-allocation/kubeletplugin"
2930
"k8s.io/klog/v2"
3031

3132
"github.com/NVIDIA/k8s-dra-driver-gpu/internal/info"
@@ -34,7 +35,6 @@ import (
3435

3536
const (
3637
DriverName = "compute-domain.nvidia.com"
37-
DriverPluginPath = "/var/lib/kubelet/plugins/" + DriverName
3838
DriverPluginCheckpointFileBasename = "checkpoint.json"
3939
)
4040

@@ -43,19 +43,25 @@ type Flags struct {
4343
loggingConfig *flags.LoggingConfig
4444
featureGateConfig *flags.FeatureGateConfig
4545

46-
nodeName string
47-
namespace string
48-
cdiRoot string
49-
containerDriverRoot string
50-
hostDriverRoot string
51-
nvidiaCDIHookPath string
46+
nodeName string
47+
namespace string
48+
cdiRoot string
49+
containerDriverRoot string
50+
hostDriverRoot string
51+
nvidiaCDIHookPath string
52+
kubeletRegistrarDirectoryPath string
53+
kubeletPluginsDirectoryPath string
5254
}
5355

5456
type Config struct {
5557
flags *Flags
5658
clientsets flags.ClientSets
5759
}
5860

61+
func (c Config) DriverPluginPath() string {
62+
return filepath.Join(c.flags.kubeletPluginsDirectoryPath, DriverName)
63+
}
64+
5965
func main() {
6066
if err := newApp().Run(os.Args); err != nil {
6167
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
@@ -111,6 +117,20 @@ func newApp() *cli.App {
111117
Destination: &flags.nvidiaCDIHookPath,
112118
EnvVars: []string{"NVIDIA_CDI_HOOK_PATH"},
113119
},
120+
&cli.StringFlag{
121+
Name: "kubelet-registrar-directory-path",
122+
Usage: "Absolute path to the directory where kubelet stores plugin registrations.",
123+
Value: kubeletplugin.KubeletRegistryDir,
124+
Destination: &flags.kubeletRegistrarDirectoryPath,
125+
EnvVars: []string{"KUBELET_REGISTRAR_DIRECTORY_PATH"},
126+
},
127+
&cli.StringFlag{
128+
Name: "kubelet-plugins-directory-path",
129+
Usage: "Absolute path to the directory where kubelet stores plugin data.",
130+
Value: kubeletplugin.KubeletPluginsDir,
131+
Destination: &flags.kubeletPluginsDirectoryPath,
132+
EnvVars: []string{"KUBELET_PLUGINS_DIRECTORY_PATH"},
133+
},
114134
}
115135
cliFlags = append(cliFlags, flags.kubeClientConfig.Flags()...)
116136
cliFlags = append(cliFlags, flags.featureGateConfig.Flags()...)
@@ -158,13 +178,13 @@ func newApp() *cli.App {
158178
// StartPlugin initializes and runs the compute domain kubelet plugin.
159179
func StartPlugin(ctx context.Context, config *Config) error {
160180
// Create the plugin directory
161-
err := os.MkdirAll(DriverPluginPath, 0750)
181+
err := os.MkdirAll(config.DriverPluginPath(), 0750)
162182
if err != nil {
163183
return err
164184
}
165185

166186
// Setup nvidia-cdi-hook binary
167-
if err := config.flags.setNvidiaCDIHookPath(); err != nil {
187+
if err := config.setNvidiaCDIHookPath(); err != nil {
168188
return fmt.Errorf("error setting up nvidia-cdi-hook: %w", err)
169189
}
170190

@@ -215,13 +235,13 @@ func StartPlugin(ctx context.Context, config *Config) error {
215235
// to this path. The /usr/bin/nvidia-cdi-hook is present in the current
216236
// container image because it is copied from the toolkit image into this
217237
// container at build time.
218-
func (f *Flags) setNvidiaCDIHookPath() error {
219-
if f.nvidiaCDIHookPath != "" {
238+
func (c Config) setNvidiaCDIHookPath() error {
239+
if c.flags.nvidiaCDIHookPath != "" {
220240
return nil
221241
}
222242

223243
sourcePath := "/usr/bin/nvidia-cdi-hook"
224-
targetPath := filepath.Join(DriverPluginPath, "nvidia-cdi-hook")
244+
targetPath := filepath.Join(c.DriverPluginPath(), "nvidia-cdi-hook")
225245

226246
input, err := os.ReadFile(sourcePath)
227247
if err != nil {
@@ -232,7 +252,7 @@ func (f *Flags) setNvidiaCDIHookPath() error {
232252
return fmt.Errorf("error copying nvidia-cdi-hook: %w", err)
233253
}
234254

235-
f.nvidiaCDIHookPath = targetPath
255+
c.flags.nvidiaCDIHookPath = targetPath
236256

237257
return nil
238258
}

cmd/gpu-kubelet-plugin/device_state.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,13 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
8585
}
8686

8787
tsManager := NewTimeSlicingManager(nvdevlib)
88-
mpsManager := NewMpsManager(config, nvdevlib, MpsRoot, hostDriverRoot, MpsControlDaemonTemplatePath)
88+
mpsManager := NewMpsManager(config, nvdevlib, hostDriverRoot, MpsControlDaemonTemplatePath)
8989

9090
if err := cdi.CreateStandardDeviceSpecFile(allocatable); err != nil {
9191
return nil, fmt.Errorf("unable to create base CDI spec file: %v", err)
9292
}
9393

94-
checkpointManager, err := checkpointmanager.NewCheckpointManager(DriverPluginPath)
94+
checkpointManager, err := checkpointmanager.NewCheckpointManager(config.DriverPluginPath())
9595
if err != nil {
9696
return nil, fmt.Errorf("unable to create checkpoint manager: %v", err)
9797
}

cmd/gpu-kubelet-plugin/driver.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package main
1919
import (
2020
"context"
2121
"fmt"
22+
"path/filepath"
2223
"time"
2324

2425
resourceapi "k8s.io/api/resource/v1"
@@ -35,7 +36,7 @@ import (
3536
// DriverPrepUprepFlockPath is the path to a lock file used to make sure
3637
// that calls to nodePrepareResource() / nodeUnprepareResource() never
3738
// interleave, node-globally.
38-
const DriverPrepUprepFlockPath = DriverPluginPath + "/pu.lock"
39+
const DriverPrepUprepFlockFileName = "pu.lock"
3940

4041
type driver struct {
4142
client coreclientset.Interface
@@ -49,10 +50,13 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
4950
if err != nil {
5051
return nil, err
5152
}
53+
54+
puLockPath := filepath.Join(config.DriverPluginPath(), DriverPrepUprepFlockFileName)
55+
5256
driver := &driver{
5357
client: config.clientsets.Core,
5458
state: state,
55-
pulock: flock.NewFlock(DriverPrepUprepFlockPath),
59+
pulock: flock.NewFlock(puLockPath),
5660
}
5761

5862
helper, err := kubeletplugin.Start(
@@ -62,6 +66,8 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
6266
kubeletplugin.NodeName(config.flags.nodeName),
6367
kubeletplugin.DriverName(DriverName),
6468
kubeletplugin.Serialize(false),
69+
kubeletplugin.RegistrarDirectoryPath(config.flags.kubeletRegistrarDirectoryPath),
70+
kubeletplugin.PluginDataDirectoryPath(config.DriverPluginPath()),
6571
)
6672
if err != nil {
6773
return nil, err

cmd/gpu-kubelet-plugin/main.go

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"github.com/urfave/cli/v2"
2828

29+
"k8s.io/dynamic-resource-allocation/kubeletplugin"
2930
"k8s.io/klog/v2"
3031

3132
"github.com/NVIDIA/k8s-dra-driver-gpu/internal/info"
@@ -34,7 +35,6 @@ import (
3435

3536
const (
3637
DriverName = "gpu.nvidia.com"
37-
DriverPluginPath = "/var/lib/kubelet/plugins/" + DriverName
3838
DriverPluginCheckpointFileBasename = "checkpoint.json"
3939
)
4040

@@ -43,20 +43,26 @@ type Flags struct {
4343
loggingConfig *flags.LoggingConfig
4444
featureGateConfig *flags.FeatureGateConfig
4545

46-
nodeName string
47-
namespace string
48-
cdiRoot string
49-
containerDriverRoot string
50-
hostDriverRoot string
51-
nvidiaCDIHookPath string
52-
imageName string
46+
nodeName string
47+
namespace string
48+
cdiRoot string
49+
containerDriverRoot string
50+
hostDriverRoot string
51+
nvidiaCDIHookPath string
52+
imageName string
53+
kubeletRegistrarDirectoryPath string
54+
kubeletPluginsDirectoryPath string
5355
}
5456

5557
type Config struct {
5658
flags *Flags
5759
clientsets flags.ClientSets
5860
}
5961

62+
func (c Config) DriverPluginPath() string {
63+
return filepath.Join(c.flags.kubeletPluginsDirectoryPath, DriverName)
64+
}
65+
6066
func main() {
6167
if err := newApp().Run(os.Args); err != nil {
6268
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
@@ -119,6 +125,20 @@ func newApp() *cli.App {
119125
Destination: &flags.imageName,
120126
EnvVars: []string{"IMAGE_NAME"},
121127
},
128+
&cli.StringFlag{
129+
Name: "kubelet-registrar-directory-path",
130+
Usage: "Absolute path to the directory where kubelet stores plugin registrations.",
131+
Value: kubeletplugin.KubeletRegistryDir,
132+
Destination: &flags.kubeletRegistrarDirectoryPath,
133+
EnvVars: []string{"KUBELET_REGISTRAR_DIRECTORY_PATH"},
134+
},
135+
&cli.StringFlag{
136+
Name: "kubelet-plugins-directory-path",
137+
Usage: "Absolute path to the directory where kubelet stores plugin data.",
138+
Value: kubeletplugin.KubeletPluginsDir,
139+
Destination: &flags.kubeletPluginsDirectoryPath,
140+
EnvVars: []string{"KUBELET_PLUGINS_DIRECTORY_PATH"},
141+
},
122142
}
123143
cliFlags = append(cliFlags, flags.kubeClientConfig.Flags()...)
124144
cliFlags = append(cliFlags, flags.featureGateConfig.Flags()...)
@@ -166,13 +186,13 @@ func newApp() *cli.App {
166186
// StartPlugin initializes and runs the GPU kubelet plugin.
167187
func StartPlugin(ctx context.Context, config *Config) error {
168188
// Create the plugin directory
169-
err := os.MkdirAll(DriverPluginPath, 0750)
189+
err := os.MkdirAll(config.DriverPluginPath(), 0750)
170190
if err != nil {
171191
return err
172192
}
173193

174194
// Setup nvidia-cdi-hook binary
175-
if err := config.flags.setNvidiaCDIHookPath(); err != nil {
195+
if err := config.setNvidiaCDIHookPath(); err != nil {
176196
return fmt.Errorf("error setting up nvidia-cdi-hook: %w", err)
177197
}
178198

@@ -216,19 +236,20 @@ func StartPlugin(ctx context.Context, config *Config) error {
216236
return nil
217237
}
218238

239+
// change to config
219240
// If 'f.nvidiaCDIHookPath' is already set (from the command line), do nothing.
220241
// If 'f.nvidiaCDIHookPath' is empty, it copies the nvidia-cdi-hook binary from
221242
// /usr/bin/nvidia-cdi-hook to DriverPluginPath and sets 'f.nvidiaCDIHookPath'
222243
// to this path. The /usr/bin/nvidia-cdi-hook is present in the current
223244
// container image because it is copied from the toolkit image into this
224245
// container at build time.
225-
func (f *Flags) setNvidiaCDIHookPath() error {
226-
if f.nvidiaCDIHookPath != "" {
246+
func (c Config) setNvidiaCDIHookPath() error {
247+
if c.flags.nvidiaCDIHookPath != "" {
227248
return nil
228249
}
229250

230251
sourcePath := "/usr/bin/nvidia-cdi-hook"
231-
targetPath := filepath.Join(DriverPluginPath, "nvidia-cdi-hook")
252+
targetPath := filepath.Join(c.DriverPluginPath(), "nvidia-cdi-hook")
232253

233254
input, err := os.ReadFile(sourcePath)
234255
if err != nil {
@@ -239,7 +260,7 @@ func (f *Flags) setNvidiaCDIHookPath() error {
239260
return fmt.Errorf("error copying nvidia-cdi-hook: %w", err)
240261
}
241262

242-
f.nvidiaCDIHookPath = targetPath
263+
c.flags.nvidiaCDIHookPath = targetPath
243264

244265
return nil
245266
}

cmd/gpu-kubelet-plugin/sharing.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"fmt"
2626
"os"
2727
"os/exec"
28+
"path/filepath"
2829
"slices"
2930
"strconv"
3031
"strings"
@@ -51,7 +52,7 @@ import (
5152
)
5253

5354
const (
54-
MpsRoot = DriverPluginPath + "/mps"
55+
MpsControlFilesDirName = "mps"
5556
MpsControlDaemonTemplatePath = "/templates/mps-control-daemon.tmpl.yaml"
5657
MpsControlDaemonNameFmt = "mps-control-daemon-%v" // Fill with ClaimUID
5758
)
@@ -124,7 +125,9 @@ func (t *TimeSlicingManager) SetTimeSlice(devices UUIDProvider, config *configap
124125
return nil
125126
}
126127

127-
func NewMpsManager(config *Config, deviceLib *deviceLib, controlFilesRoot, hostDriverRoot, templatePath string) *MpsManager {
128+
func NewMpsManager(config *Config, deviceLib *deviceLib, hostDriverRoot, templatePath string) *MpsManager {
129+
controlFilesRoot := filepath.Join(config.DriverPluginPath(), MpsControlFilesDirName)
130+
128131
return &MpsManager{
129132
controlFilesRoot: controlFilesRoot,
130133
hostDriverRoot: hostDriverRoot,

0 commit comments

Comments
 (0)