Skip to content

Commit 3feb86b

Browse files
committed
Invoke create-device-nodes directly
Signed-off-by: Evan Lezar <[email protected]>
1 parent f9c0907 commit 3feb86b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+8891
-9
lines changed

cmd/nvidia-mig-manager/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ func migReconfigure(ctx context.Context, migConfigValue string, clientset *kuber
418418
return fmt.Errorf("error creating reconfigure instance: %w", err)
419419
}
420420

421-
return rcfg.Run()
421+
return rcfg.Run(ctx)
422422
}
423423

424424
func ContinuouslySyncMigConfigChanges(clientset *kubernetes.Clientset, migConfig *SyncableMigConfig) chan struct{} {

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ require (
4646
github.com/russross/blackfriday/v2 v2.1.0 // indirect
4747
github.com/spf13/pflag v1.0.5 // indirect
4848
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
49+
github.com/urfave/cli/v3 v3.4.1 // indirect
4950
github.com/x448/float16 v0.8.4 // indirect
5051
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
5152
go.yaml.in/yaml/v2 v2.4.2 // indirect

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG
121121
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
122122
github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU=
123123
github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4=
124+
github.com/urfave/cli/v3 v3.4.1 h1:1M9UOCy5bLmGnuu1yn3t3CB4rG79Rtoxuv1sPhnm6qM=
125+
github.com/urfave/cli/v3 v3.4.1/go.mod h1:FJSKtM/9AiiTOJL4fJ6TbMUkxBXn7GO9guZqoZtpYpo=
124126
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
125127
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
126128
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=

pkg/mig/reconfigure/reconfigure.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"k8s.io/client-go/kubernetes"
3232
"tags.cncf.io/container-device-interface/pkg/cdi"
3333

34+
devicenodes "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-device-nodes"
3435
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
3536
transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
3637

@@ -122,7 +123,7 @@ func New(ctx context.Context, clientset *kubernetes.Clientset, migPartedBinary [
122123
}
123124

124125
// Run executes the complete MIG reconfiguration process
125-
func (r *Reconfigure) Run() error {
126+
func (r *Reconfigure) Run(ctx context.Context) error {
126127

127128
// Ensure systemd managers are cleaned up
128129
defer r.cleanup()
@@ -212,7 +213,7 @@ func (r *Reconfigure) Run() error {
212213
_ = r.setState(migStateFailed)
213214
return fmt.Errorf("failed to run nvidia-smi: %w", err)
214215
}
215-
if err := r.handleCDI(); err != nil {
216+
if err := r.handleCDI(ctx); err != nil {
216217
_ = r.setState(migStateFailed)
217218
return fmt.Errorf("failed to handle CDI: %w", err)
218219
}
@@ -469,14 +470,16 @@ func (r *Reconfigure) applyMigConfig() error {
469470
}
470471

471472
// handleCDI handles CDI operations if enabled
472-
func (r *Reconfigure) handleCDI() error {
473+
func (r *Reconfigure) handleCDI(ctx context.Context) error {
473474

474475
log.Info("Creating NVIDIA control device nodes")
475-
// TODO: Instead of shelling out, we need to invoke the method via Go. The Toolkit code needs to be refactored first.
476-
cmd := exec.Command("nvidia-ctk", "system", "create-device-nodes", "--control-devices", "--dev-root="+r.opts.DevRootCtrPath)
477-
cmd.Stdout = os.Stdout
478-
cmd.Stderr = os.Stderr
479-
if err := cmd.Run(); err != nil {
476+
477+
// TODO: Instead of abusing CLI command we generate here, we should expose
478+
// this API and use that instead. This would require refactoring in the
479+
// toolkit.
480+
cmd := devicenodes.NewCommand(log.StandardLogger())
481+
err := cmd.Run(ctx, []string{"--control-devices", "--dev-root" + r.opts.DevRootCtrPath})
482+
if err != nil {
480483
return fmt.Errorf("failed to create control device nodes: %w", err)
481484
}
482485

vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-device-nodes/create-device-nodes.go

Lines changed: 142 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices/builder.go

Lines changed: 62 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)