Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cmd/nvidia-ctk-installer/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func (o Options) Configure(cfg engine.Interface) error {
if err != nil {
return fmt.Errorf("unable to update config: %v", err)
}
return o.flush(cfg)
return o.Flush(cfg)
}

// Unconfigure removes the options from the specified config
Expand All @@ -75,7 +75,7 @@ func (o Options) Unconfigure(cfg engine.Interface) error {
return fmt.Errorf("unable to update config: %v", err)
}

if err := o.flush(cfg); err != nil {
if err := o.Flush(cfg); err != nil {
return err
}

Expand All @@ -93,8 +93,8 @@ func (o Options) Unconfigure(cfg engine.Interface) error {
return nil
}

// flush flushes the specified config to disk
func (o Options) flush(cfg engine.Interface) error {
// Flush flushes the specified config to disk
func (o Options) Flush(cfg engine.Interface) error {
filepath := o.DropInConfig
if filepath == "" {
filepath = o.TopLevelConfigPath
Expand Down
117 changes: 112 additions & 5 deletions cmd/nvidia-ctk-installer/container/runtime/crio/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,30 @@ func TestCrioConfigLifecycle(t *testing.T) {
},
assertCleanupPostConditions: func(t *testing.T, co *container.Options, _ *Options) error {
require.NoFileExists(t, co.TopLevelConfigPath)
require.NoFileExists(t, co.DropInConfig)
// drop-in file not removed on cleanup
actual, err := os.ReadFile(co.DropInConfig)
require.NoError(t, err)

expected := `
[crio]

[crio.runtime]

[crio.runtime.runtimes]

[crio.runtime.runtimes.nvidia]
runtime_path = "/usr/bin/nvidia-container-runtime"
runtime_type = "oci"

[crio.runtime.runtimes.nvidia-cdi]
runtime_path = "/usr/bin/nvidia-container-runtime.cdi"
runtime_type = "oci"

[crio.runtime.runtimes.nvidia-legacy]
runtime_path = "/usr/bin/nvidia-container-runtime.legacy"
runtime_type = "oci"
`
require.Equal(t, expected, string(actual))
return nil
},
},
Expand Down Expand Up @@ -182,8 +205,6 @@ signature_policy = "/etc/crio/policy.json"
assertCleanupPostConditions: func(t *testing.T, co *container.Options, o *Options) error {
require.FileExists(t, co.TopLevelConfigPath)

require.NoFileExists(t, co.DropInConfig)

actualTopLevel, err := os.ReadFile(co.TopLevelConfigPath)
require.NoError(t, err)

Expand All @@ -203,6 +224,37 @@ signature_policy = "/etc/crio/policy.json"
`
require.Equal(t, expectedTopLevel, string(actualTopLevel))

// drop-in file not removed on cleanup
require.FileExists(t, co.DropInConfig)
actual, err := os.ReadFile(co.DropInConfig)
require.NoError(t, err)

expected := `
[crio]

[crio.runtime]

[crio.runtime.runtimes]

[crio.runtime.runtimes.nvidia]
monitor_path = "/usr/libexec/crio/conmon"
runtime_path = "/usr/bin/nvidia-container-runtime"
runtime_root = "/run/crun"
runtime_type = "oci"

[crio.runtime.runtimes.nvidia-cdi]
monitor_path = "/usr/libexec/crio/conmon"
runtime_path = "/usr/bin/nvidia-container-runtime.cdi"
runtime_root = "/run/crun"
runtime_type = "oci"

[crio.runtime.runtimes.nvidia-legacy]
monitor_path = "/usr/libexec/crio/conmon"
runtime_path = "/usr/bin/nvidia-container-runtime.legacy"
runtime_root = "/run/crun"
runtime_type = "oci"
`
require.Equal(t, expected, string(actual))
return nil
},
},
Expand Down Expand Up @@ -312,8 +364,33 @@ runtime_type = "oci"

require.Equal(t, expectedTopLevel, string(actualTopLevel))

require.NoFileExists(t, co.DropInConfig)
// drop-in file not removed on cleanup
// default_runtime setting removed from drop-in
require.FileExists(t, co.DropInConfig)
actual, err := os.ReadFile(co.DropInConfig)
require.NoError(t, err)

expected := `
[crio]

[crio.runtime]

[crio.runtime.runtimes]

[crio.runtime.runtimes.nvidia]
runtime_path = "/usr/bin/nvidia-container-runtime"
runtime_type = "oci"

[crio.runtime.runtimes.nvidia-cdi]
runtime_path = "/usr/bin/nvidia-container-runtime.cdi"
runtime_type = "oci"

[crio.runtime.runtimes.nvidia-legacy]
runtime_path = "/usr/bin/nvidia-container-runtime.legacy"
runtime_type = "oci"
`

require.Equal(t, expected, string(actual))
return nil
},
},
Expand Down Expand Up @@ -480,7 +557,37 @@ plugin_dirs = [
`
require.Equal(t, expected, string(actual))

require.NoFileExists(t, co.DropInConfig)
// drop-in file not removed on cleanup
require.FileExists(t, co.DropInConfig)
actualDropIn, err := os.ReadFile(co.DropInConfig)
require.NoError(t, err)

expectedDropIn := `
[crio]

[crio.runtime]

[crio.runtime.runtimes]

[crio.runtime.runtimes.nvidia]
monitor_path = "/usr/libexec/crio/conmon"
runtime_path = "/usr/bin/nvidia-container-runtime"
runtime_root = "/run/crun"
runtime_type = "oci"

[crio.runtime.runtimes.nvidia-cdi]
monitor_path = "/usr/libexec/crio/conmon"
runtime_path = "/usr/bin/nvidia-container-runtime.cdi"
runtime_root = "/run/crun"
runtime_type = "oci"

[crio.runtime.runtimes.nvidia-legacy]
monitor_path = "/usr/libexec/crio/conmon"
runtime_path = "/usr/bin/nvidia-container-runtime.legacy"
runtime_root = "/run/crun"
runtime_type = "oci"
`
require.Equal(t, expectedDropIn, string(actualDropIn))

return nil
},
Expand Down
35 changes: 27 additions & 8 deletions cmd/nvidia-ctk-installer/container/runtime/crio/crio.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ func setupHook(o *container.Options, co *Options) error {
func setupConfig(o *container.Options) error {
log.Infof("Updating config file")

cfg, err := getRuntimeConfig(o)
cfg, err := getRuntimeConfig(o, false)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}
Expand Down Expand Up @@ -180,16 +180,24 @@ func cleanupHook(co *Options) error {

// cleanupConfig removes the NVIDIA container runtime from the cri-o config
func cleanupConfig(o *container.Options) error {
if !o.SetAsDefault {
return nil
}
Comment on lines +183 to +185
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we still need to check whether we're using a drop-in file? We could be modifying the top-level config directly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure I follow why that matters? Regardless of whether we use a drop-in file or not, removing the nvidia runtime from the cri-o config can lead to the issues described in the PR description.

The reason I thought to add this conditional is that we must attempt to revert the config if nvidia is the default runtime, or else all future pods will fail to function (after the nvidia runtime binaries are removed).


log.Infof("Reverting config file modifications")

cfg, err := getRuntimeConfig(o)
cfg, err := getRuntimeConfig(o, true)
if err != nil {
return fmt.Errorf("unable to load config: %v", err)
}

err = o.Unconfigure(cfg)
err = cfg.UpdateDefaultRuntime(o.RuntimeName, engine.UpdateActionUnset)
if err != nil {
return fmt.Errorf("unable to unconfigure cri-o: %v", err)
return fmt.Errorf("failed to unset %q as the default runtime: %w", o.RuntimeName, err)
}

if err := o.Flush(cfg); err != nil {
return err
}

err = RestartCrio(o)
Expand All @@ -206,24 +214,35 @@ func RestartCrio(o *container.Options) error {
}

func GetLowlevelRuntimePaths(o *container.Options) ([]string, error) {
cfg, err := getRuntimeConfig(o)
cfg, err := getRuntimeConfig(o, false)
if err != nil {
return nil, fmt.Errorf("unable to load crio config: %w", err)
}
return engine.GetBinaryPathsForRuntimes(cfg), nil
}

func getRuntimeConfig(o *container.Options) (engine.Interface, error) {
func getRuntimeConfig(o *container.Options, loadDestinationConfig bool) (engine.Interface, error) {
loaders, err := o.GetConfigLoaders(crio.CommandLineSource)
if err != nil {
return nil, err
}
return crio.New(

options := []crio.Option{
crio.WithTopLevelConfigPath(o.TopLevelConfigPath),
crio.WithConfigSource(
toml.LoadFirst(
loaders...,
),
),
)
}

if loadDestinationConfig {
destinationConfigPath := o.TopLevelConfigPath
if o.DropInConfig != "" {
destinationConfigPath = o.DropInConfig
}
options = append(options, crio.WithConfigDestination(toml.FromFile(destinationConfigPath)))
}

return crio.New(options...)
}
7 changes: 7 additions & 0 deletions pkg/config/engine/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ const (
// SaveToSTDOUT is used to write the specified config to stdout instead of
// to a file on disk.
SaveToSTDOUT = ""
// UpdateActionSet is used as an argument to UpdateDefaultRuntime
// when setting a runtime handler as the default in the config
UpdateActionSet = "set"
// UpdateActionUnset is used as an argument to UpdateDefaultRuntime
// when unsetting a runtime handler as the default in the config
UpdateActionUnset = "unset"
)

// Interface defines the API for a runtime config updater.
Expand All @@ -29,6 +35,7 @@ type Interface interface {
EnableCDI()
GetRuntimeConfig(string) (RuntimeConfig, error)
RemoveRuntime(string) error
UpdateDefaultRuntime(string, string) error
Save(string) (int64, error)
String() string
}
Expand Down
9 changes: 9 additions & 0 deletions pkg/config/engine/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ type RuntimeConfigDestination interface {
AddRuntimeWithOptions(string, string, bool, interface{}) error
EnableCDI()
RemoveRuntime(string) error
UpdateDefaultRuntime(string, string) error
Save(string) (int64, error)
String() string
}
Expand All @@ -60,6 +61,14 @@ func (c *Config) RemoveRuntime(runtime string) error {
return c.Destination.RemoveRuntime(runtime)
}

// UpdateDefaultRuntime updates the default runtime setting in the destination config.
// When action is 'set' the provided runtime name is set as the default.
// When action is 'unset' we make sure the provided runtime name is not
// the default.
func (c *Config) UpdateDefaultRuntime(runtime string, action string) error {
return c.Destination.UpdateDefaultRuntime(runtime, action)
}

// EnableCDI enables CDI in the destination config.
func (c *Config) EnableCDI() {
c.Destination.EnableCDI()
Expand Down
30 changes: 30 additions & 0 deletions pkg/config/engine/containerd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,33 @@ func (c *Config) RemoveRuntime(name string) error {
*c.Tree = config
return nil
}

// UpdateDefaultRuntime updates the default runtime setting in the config.
// When action is 'set' the provided runtime name is set as the default.
// When action is 'unset' we make sure the provided runtime name is not
// the default.
func (c *Config) UpdateDefaultRuntime(name string, action string) error {
if action != engine.UpdateActionSet && action != engine.UpdateActionUnset {
return fmt.Errorf("invalid action %q, valid actions are %q and %q", action, engine.UpdateActionSet, engine.UpdateActionUnset)
}

if c == nil || c.Tree == nil {
if action == engine.UpdateActionSet {
return fmt.Errorf("config toml is nil")
}
return nil
}

config := *c.Tree
if action == engine.UpdateActionSet {
config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "default_runtime_name"}, name)
} else {
defaultRuntime, ok := config.GetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "default_runtime_name"}).(string)
if ok && defaultRuntime == name {
config.DeletePath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "default_runtime_name"})
}
}

*c.Tree = config
return nil
}
8 changes: 8 additions & 0 deletions pkg/config/engine/containerd/config_drop_in.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,14 @@ func (c *ConfigWithDropIn) RemoveRuntime(name string) error {
return c.Interface.RemoveRuntime(name)
}

// UpdateDefaultRuntime updates the default runtime setting in the drop-in config.
// When action is 'set' the provided runtime name is set as the default.
// When action is 'unset' we make sure the provided runtime name is not
// the default.
func (c *ConfigWithDropIn) UpdateDefaultRuntime(name string, action string) error {
return c.Interface.UpdateDefaultRuntime(name, action)
}

// flush saves the top-level config to its path.
// If the config is empty, the file will be deleted.
func (c *topLevelConfig) Save(dropInPath string) (int64, error) {
Expand Down
4 changes: 4 additions & 0 deletions pkg/config/engine/containerd/config_v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ func (c *ConfigV1) RemoveRuntime(name string) error {
return nil
}

func (c *ConfigV1) UpdateDefaultRuntime(name string, action string) error {
return fmt.Errorf("this method is not implemented")
}

// Save writes the config to a file
func (c ConfigV1) Save(path string) (int64, error) {
return (Config)(c).Save(path)
Expand Down
Loading