Skip to content

Commit 6d964b1

Browse files
committed
make compute-domain kubelet dir configurable
Signed-off-by: Swati Gupta <[email protected]>
1 parent b86a33a commit 6d964b1

File tree

4 files changed

+50
-21
lines changed

4 files changed

+50
-21
lines changed

cmd/compute-domain-kubelet-plugin/computedomain.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ const (
4343
informerResyncPeriod = 10 * time.Minute
4444
cleanupInterval = 10 * time.Minute
4545

46-
ComputeDomainDaemonSettingsRoot = DriverPluginPath + "/domains"
4746
ComputeDomainDaemonConfigTemplatePath = "/templates/compute-domain-daemon-config.tmpl.cfg"
4847
)
4948

@@ -67,6 +66,10 @@ type ComputeDomainDaemonSettings struct {
6766
nodesConfigPath string
6867
}
6968

69+
func (c Config) ComputeDomainDaemonSettingsRoot() string {
70+
return filepath.Join(c.DriverPluginPath(), "domains")
71+
}
72+
7073
func NewComputeDomainManager(config *Config, configFilesRoot, cliqueID string) *ComputeDomainManager {
7174
factory := nvinformers.NewSharedInformerFactory(config.clientsets.Nvidia, informerResyncPeriod)
7275
informer := factory.Resource().V1beta1().ComputeDomains().Informer()

cmd/compute-domain-kubelet-plugin/device_state.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,13 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
9090
return nil, fmt.Errorf("error getting cliqueID: %w", err)
9191
}
9292

93-
computeDomainManager := NewComputeDomainManager(config, ComputeDomainDaemonSettingsRoot, cliqueID)
93+
computeDomainManager := NewComputeDomainManager(config, config.ComputeDomainDaemonSettingsRoot(), cliqueID)
9494

9595
if err := cdi.CreateStandardDeviceSpecFile(allocatable); err != nil {
9696
return nil, fmt.Errorf("unable to create base CDI spec file: %v", err)
9797
}
9898

99-
checkpointManager, err := checkpointmanager.NewCheckpointManager(DriverPluginPath)
99+
checkpointManager, err := checkpointmanager.NewCheckpointManager(config.DriverPluginPath())
100100
if err != nil {
101101
return nil, fmt.Errorf("unable to create checkpoint manager: %v", err)
102102
}

cmd/compute-domain-kubelet-plugin/driver.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"errors"
2222
"fmt"
23+
"path/filepath"
2324
"sync"
2425
"time"
2526

@@ -42,10 +43,6 @@ const (
4243
// that deadline, retryable errors are retried (with backoff) via the
4344
// workqueue abstraction.
4445
ErrorRetryMaxTimeout = 45 * time.Second
45-
// DriverPrepUprepFlockPath is the path to a lock file used to make sure
46-
// that calls to nodePrepareResource() / nodeUnprepareResource() never
47-
// interleave, node-globally.
48-
DriverPrepUprepFlockPath = DriverPluginPath + "/pu.lock"
4946
)
5047

5148
// permanentError defines an error indicating that it is permanent.
@@ -64,6 +61,13 @@ type driver struct {
6461
pulock *flock.Flock
6562
}
6663

64+
// DriverPrepUprepFlockPath is the path to a lock file used to make sure
65+
// that calls to nodePrepareResource() / nodeUnprepareResource() never
66+
// interleave, node-globally.
67+
func (c Config) DriverPrepUprepFlockPath() string {
68+
return filepath.Join(c.DriverPluginPath(), "pu.lock")
69+
}
70+
6771
func NewDriver(ctx context.Context, config *Config) (*driver, error) {
6872
state, err := NewDeviceState(ctx, config)
6973
if err != nil {
@@ -73,7 +77,7 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
7377
driver := &driver{
7478
client: config.clientsets.Core,
7579
state: state,
76-
pulock: flock.NewFlock(DriverPrepUprepFlockPath),
80+
pulock: flock.NewFlock(config.DriverPrepUprepFlockPath()),
7781
}
7882

7983
helper, err := kubeletplugin.Start(
@@ -89,6 +93,8 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
8993
// prepare() must be incoming). Concurrency management for incoming
9094
// requests is done with this driver's work queue abstraction.
9195
kubeletplugin.Serialize(false),
96+
kubeletplugin.RegistrarDirectoryPath(config.flags.kubeletRegistrarDirectoryPath),
97+
kubeletplugin.PluginDataDirectoryPath(config.DriverPluginPath()),
9298
)
9399
if err != nil {
94100
return nil, err

cmd/compute-domain-kubelet-plugin/main.go

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"github.com/urfave/cli/v2"
2828

29+
"k8s.io/dynamic-resource-allocation/kubeletplugin"
2930
"k8s.io/klog/v2"
3031

3132
"github.com/NVIDIA/k8s-dra-driver-gpu/internal/info"
@@ -34,7 +35,6 @@ import (
3435

3536
const (
3637
DriverName = "compute-domain.nvidia.com"
37-
DriverPluginPath = "/var/lib/kubelet/plugins/" + DriverName
3838
DriverPluginCheckpointFileBasename = "checkpoint.json"
3939
)
4040

@@ -43,19 +43,25 @@ type Flags struct {
4343
loggingConfig *flags.LoggingConfig
4444
featureGateConfig *flags.FeatureGateConfig
4545

46-
nodeName string
47-
namespace string
48-
cdiRoot string
49-
containerDriverRoot string
50-
hostDriverRoot string
51-
nvidiaCDIHookPath string
46+
nodeName string
47+
namespace string
48+
cdiRoot string
49+
containerDriverRoot string
50+
hostDriverRoot string
51+
nvidiaCDIHookPath string
52+
kubeletRegistrarDirectoryPath string
53+
kubeletPluginsDirectoryPath string
5254
}
5355

5456
type Config struct {
5557
flags *Flags
5658
clientsets flags.ClientSets
5759
}
5860

61+
func (c Config) DriverPluginPath() string {
62+
return filepath.Join(c.flags.kubeletPluginsDirectoryPath, DriverName)
63+
}
64+
5965
func main() {
6066
if err := newApp().Run(os.Args); err != nil {
6167
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
@@ -111,6 +117,20 @@ func newApp() *cli.App {
111117
Destination: &flags.nvidiaCDIHookPath,
112118
EnvVars: []string{"NVIDIA_CDI_HOOK_PATH"},
113119
},
120+
&cli.StringFlag{
121+
Name: "kubelet-registrar-directory-path",
122+
Usage: "Absolute path to the directory where kubelet stores plugin registrations.",
123+
Value: kubeletplugin.KubeletRegistryDir,
124+
Destination: &flags.kubeletRegistrarDirectoryPath,
125+
EnvVars: []string{"KUBELET_REGISTRAR_DIRECTORY_PATH"},
126+
},
127+
&cli.StringFlag{
128+
Name: "kubelet-plugins-directory-path",
129+
Usage: "Absolute path to the directory where kubelet stores plugin data.",
130+
Value: kubeletplugin.KubeletPluginsDir,
131+
Destination: &flags.kubeletPluginsDirectoryPath,
132+
EnvVars: []string{"KUBELET_PLUGINS_DIRECTORY_PATH"},
133+
},
114134
}
115135
cliFlags = append(cliFlags, flags.kubeClientConfig.Flags()...)
116136
cliFlags = append(cliFlags, flags.featureGateConfig.Flags()...)
@@ -158,13 +178,13 @@ func newApp() *cli.App {
158178
// StartPlugin initializes and runs the compute domain kubelet plugin.
159179
func StartPlugin(ctx context.Context, config *Config) error {
160180
// Create the plugin directory
161-
err := os.MkdirAll(DriverPluginPath, 0750)
181+
err := os.MkdirAll(config.DriverPluginPath(), 0750)
162182
if err != nil {
163183
return err
164184
}
165185

166186
// Setup nvidia-cdi-hook binary
167-
if err := config.flags.setNvidiaCDIHookPath(); err != nil {
187+
if err := config.setNvidiaCDIHookPath(); err != nil {
168188
return fmt.Errorf("error setting up nvidia-cdi-hook: %w", err)
169189
}
170190

@@ -215,13 +235,13 @@ func StartPlugin(ctx context.Context, config *Config) error {
215235
// to this path. The /usr/bin/nvidia-cdi-hook is present in the current
216236
// container image because it is copied from the toolkit image into this
217237
// container at build time.
218-
func (f *Flags) setNvidiaCDIHookPath() error {
219-
if f.nvidiaCDIHookPath != "" {
238+
func (c Config) setNvidiaCDIHookPath() error {
239+
if c.flags.nvidiaCDIHookPath != "" {
220240
return nil
221241
}
222242

223243
sourcePath := "/usr/bin/nvidia-cdi-hook"
224-
targetPath := filepath.Join(DriverPluginPath, "nvidia-cdi-hook")
244+
targetPath := filepath.Join(c.DriverPluginPath(), "nvidia-cdi-hook")
225245

226246
input, err := os.ReadFile(sourcePath)
227247
if err != nil {
@@ -232,7 +252,7 @@ func (f *Flags) setNvidiaCDIHookPath() error {
232252
return fmt.Errorf("error copying nvidia-cdi-hook: %w", err)
233253
}
234254

235-
f.nvidiaCDIHookPath = targetPath
255+
c.flags.nvidiaCDIHookPath = targetPath
236256

237257
return nil
238258
}

0 commit comments

Comments
 (0)