Skip to content

Commit f9c0907

Browse files
committed
Use nvcdi API to generate management CDI spec
This change switches from a direct invocation of the nvidia-ctk cdi generate command to using the nvcdi API. This matches what is done in the toolkit container to generate a CDI spec for management devices. Signed-off-by: Evan Lezar <[email protected]>
1 parent 9d3ba86 commit f9c0907

File tree

1 file changed

+35
-42
lines changed

1 file changed

+35
-42
lines changed

pkg/mig/reconfigure/reconfigure.go

Lines changed: 35 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ import (
2929
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3030
"k8s.io/apimachinery/pkg/util/wait"
3131
"k8s.io/client-go/kubernetes"
32+
"tags.cncf.io/container-device-interface/pkg/cdi"
33+
34+
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
35+
transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root"
3236

3337
"github.com/NVIDIA/mig-parted/internal/systemd"
3438
)
@@ -641,59 +645,48 @@ func (r *Reconfigure) runNvidiaSMI() error {
641645
func (r *Reconfigure) createCDISpec() error {
642646
log.Info("Creating management CDI spec (simplified implementation)")
643647

644-
cdiGenerateCommand := exec.Command("nvidia-ctk", "cdi", "generate",
645-
"--driver-root="+r.opts.DriverRootCtrPath,
646-
"--dev-root="+r.opts.DevRootCtrPath,
647-
"--vendor=management.nvidia.com",
648-
"--class=gpu",
649-
"--nvidia-cdi-hook-path="+r.opts.NvidiaCDIHookPath,
650-
)
648+
if !r.opts.CDIEnabled {
649+
return nil
650+
}
651651

652-
stdout1, err := cdiGenerateCommand.StdoutPipe()
652+
cdilib, err := nvcdi.New(
653+
// TODO: We may want to switch to klog for logging here.
654+
nvcdi.WithLogger(log.StandardLogger()),
655+
nvcdi.WithMode(nvcdi.ModeManagement),
656+
nvcdi.WithDriverRoot(r.opts.DriverRootCtrPath),
657+
nvcdi.WithDevRoot(r.opts.DevRootCtrPath),
658+
nvcdi.WithNVIDIACDIHookPath(r.opts.NvidiaCDIHookPath),
659+
nvcdi.WithVendor("management.nvidia.com"),
660+
nvcdi.WithClass("gpu"),
661+
)
653662
if err != nil {
654-
return fmt.Errorf("failed to get stdout pipe for the nvidia-ctk command: %w", err)
663+
return fmt.Errorf("failed to create CDI library for management containers: %v", err)
655664
}
656665

657-
cdiTransformDriverRootCommand := exec.Command("nvidia-ctk", "cdi", "transform", "root",
658-
"--from="+r.opts.DriverRootCtrPath,
659-
"--to="+r.opts.DriverRoot,
660-
"--input=-")
661-
cdiTransformDriverRootCommand.Stdin = stdout1
662-
663-
stdout2, err := cdiTransformDriverRootCommand.StdoutPipe()
666+
spec, err := cdilib.GetSpec()
664667
if err != nil {
665-
return fmt.Errorf("failed to get stdout pipe for the cdiTransformDriverRootCommand: %w", err)
668+
return fmt.Errorf("failed to genereate CDI spec for management containers: %v", err)
666669
}
667670

668-
cdiTransformDevRootCommand := exec.Command("nvidia-ctk", "cdi", "transform", "root",
669-
"--from="+r.opts.DevRootCtrPath,
670-
"--to="+r.opts.DevRoot,
671-
"--input=-",
672-
"--output=/var/run/cdi/management.nvidia.com-gpu.yaml")
673-
674-
cdiTransformDevRootCommand.Stdin = stdout2
675-
676-
err = cdiGenerateCommand.Start()
677-
if err != nil {
678-
return fmt.Errorf("cmd.Start error for cdiGenerateCommand: %w", err)
671+
transformer := transformroot.NewDriverTransformer(
672+
transformroot.WithDriverRoot(r.opts.DriverRootCtrPath),
673+
transformroot.WithTargetDriverRoot(r.opts.DriverRoot),
674+
transformroot.WithDevRoot(r.opts.DevRootCtrPath),
675+
transformroot.WithTargetDevRoot(r.opts.DevRoot),
676+
)
677+
if err := transformer.Transform(spec.Raw()); err != nil {
678+
return fmt.Errorf("failed to transform driver root in CDI spec: %v", err)
679679
}
680-
err = cdiTransformDriverRootCommand.Start()
680+
681+
name, err := cdi.GenerateNameForSpec(spec.Raw())
681682
if err != nil {
682-
return fmt.Errorf("cmd.Start error for cdiTransformDriverRootCommand: %w", err)
683+
return fmt.Errorf("failed to generate CDI name for management containers: %v", err)
683684
}
684-
err = cdiTransformDevRootCommand.Start()
685+
// TODO: Should this path be configurable? What's important is that this
686+
// file path is the same as the one generated in the NVIDIA Container Toolkit.
687+
err = spec.Save(filepath.Join("/var/run/cdi/", name))
685688
if err != nil {
686-
return fmt.Errorf("cmd.Start error for running cdiTransformDevRootCommand: %w", err)
687-
}
688-
689-
if err = cdiGenerateCommand.Wait(); err != nil {
690-
return fmt.Errorf("cmd.Wait error for cdiGenerateCommand: %w", err)
691-
}
692-
if err = cdiTransformDriverRootCommand.Wait(); err != nil {
693-
return fmt.Errorf("cmd.Wait error for cdiTransformDriverRootCommand: %w", err)
694-
}
695-
if err = cdiTransformDevRootCommand.Wait(); err != nil {
696-
return fmt.Errorf("cmd.Wait error for cdiTransformDevRootCommand: %w", err)
689+
return fmt.Errorf("failed to save CDI spec for management containers: %v", err)
697690
}
698691

699692
return nil

0 commit comments

Comments
 (0)