-
Notifications
You must be signed in to change notification settings - Fork 435
Implement Drop-In Lifecycle Support for Containerd and Crio #1272
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about the following: and the drop in dirs can be inferred as the PARENT dir of the Note that IFF Does it make sense to set up tests for the various combinations here and then use that to drive development?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have implemented this approach. PTAL |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -18,6 +18,8 @@ package containerd | |||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| import ( | ||||||||||||||||||||||||||||||
| "fmt" | ||||||||||||||||||||||||||||||
| "os" | ||||||||||||||||||||||||||||||
| "path/filepath" | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine" | ||||||||||||||||||||||||||||||
| "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml" | ||||||||||||||||||||||||||||||
|
|
@@ -123,12 +125,38 @@ func (c *Config) EnableCDI() { | |||||||||||||||||||||||||||||
| *c.Tree = config | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| // RemoveRuntime removes a runtime from the docker config | ||||||||||||||||||||||||||||||
| // RemoveRuntime removes a runtime from the containerd config | ||||||||||||||||||||||||||||||
| func (c *Config) RemoveRuntime(name string) error { | ||||||||||||||||||||||||||||||
| if c == nil || c.Tree == nil { | ||||||||||||||||||||||||||||||
| return nil | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| // If using NVIDIA-specific configuration, handle file cleanup | ||||||||||||||||||||||||||||||
| if c.nvidiaConfig != "" { | ||||||||||||||||||||||||||||||
| // Check if all NVIDIA runtimes are being removed | ||||||||||||||||||||||||||||||
| remainingNvidiaRuntimes := 0 | ||||||||||||||||||||||||||||||
| if runtimes := c.GetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes"}); runtimes != nil { | ||||||||||||||||||||||||||||||
| if runtimesTree, ok := runtimes.(*toml.Tree); ok { | ||||||||||||||||||||||||||||||
| for _, runtimeName := range runtimesTree.Keys() { | ||||||||||||||||||||||||||||||
| if c.isNvidiaRuntime(runtimeName) && runtimeName != name { | ||||||||||||||||||||||||||||||
| remainingNvidiaRuntimes++ | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
Comment on lines
+134
to
+146
|
||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| // If this is the last NVIDIA runtime, remove the NVIDIA config file | ||||||||||||||||||||||||||||||
| if remainingNvidiaRuntimes == 0 { | ||||||||||||||||||||||||||||||
| if err := os.Remove(c.nvidiaConfig); err != nil && !os.IsNotExist(err) { | ||||||||||||||||||||||||||||||
| c.Logger.Warningf("Failed to remove NVIDIA config file %s: %v", c.nvidiaConfig, err) | ||||||||||||||||||||||||||||||
| } else { | ||||||||||||||||||||||||||||||
| c.Logger.Infof("Removed NVIDIA config file: %s", c.nvidiaConfig) | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| // Don't modify the in-memory tree when using NVIDIA-specific configuration | ||||||||||||||||||||||||||||||
| return nil | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| config := *c.Tree | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| config.DeletePath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name}) | ||||||||||||||||||||||||||||||
|
|
@@ -154,3 +182,134 @@ func (c *Config) RemoveRuntime(name string) error { | |||||||||||||||||||||||||||||
| *c.Tree = config | ||||||||||||||||||||||||||||||
| return nil | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| // Save writes the config to the specified path or NVIDIA-specific config file | ||||||||||||||||||||||||||||||
| func (c *Config) Save(path string) (int64, error) { | ||||||||||||||||||||||||||||||
| if c.nvidiaConfig == "" { | ||||||||||||||||||||||||||||||
| // Backward compatibility: save to main config | ||||||||||||||||||||||||||||||
| return c.Tree.Save(path) | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| // Ensure directory for NVIDIA config file exists | ||||||||||||||||||||||||||||||
| dir := filepath.Dir(c.nvidiaConfig) | ||||||||||||||||||||||||||||||
| if err := os.MkdirAll(dir, 0755); err != nil { | ||||||||||||||||||||||||||||||
| return 0, fmt.Errorf("failed to create directory for NVIDIA config: %w", err) | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| // Save runtime configs to NVIDIA config file | ||||||||||||||||||||||||||||||
| nvidiaConfig := c.extractRuntimeConfig() | ||||||||||||||||||||||||||||||
| n, err := nvidiaConfig.Save(c.nvidiaConfig) | ||||||||||||||||||||||||||||||
| if err != nil { | ||||||||||||||||||||||||||||||
| return n, fmt.Errorf("failed to save NVIDIA config: %w", err) | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| // Update main config with imports directive | ||||||||||||||||||||||||||||||
| if err := c.updateMainConfigImports(path); err != nil { | ||||||||||||||||||||||||||||||
| // Try to clean up the NVIDIA config file on error | ||||||||||||||||||||||||||||||
| os.Remove(c.nvidiaConfig) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
| os.Remove(c.nvidiaConfig) | |
| if removeErr := os.Remove(c.nvidiaConfig); removeErr != nil { | |
| c.Logger.Errorf("failed to remove NVIDIA config file during cleanup: %v", removeErr) | |
| } |
Copilot
AI
Sep 9, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The imports handling logic has complex type checking with multiple nested conditions. Consider refactoring this into separate helper functions to improve readability and maintainability.
Copilot
AI
Sep 18, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The hardcoded runtime names are duplicated between containerd and crio implementations. Consider extracting these to a shared constant or configuration to avoid inconsistencies.
| // isNvidiaRuntime checks if the runtime name is an NVIDIA runtime | |
| func (c *Config) isNvidiaRuntime(name string) bool { | |
| return name == "nvidia" || name == "nvidia-cdi" || name == "nvidia-legacy" | |
| // nvidiaRuntimeNames contains the recognized NVIDIA runtime names. | |
| var nvidiaRuntimeNames = []string{"nvidia", "nvidia-cdi", "nvidia-legacy"} | |
| // isNvidiaRuntime checks if the runtime name is an NVIDIA runtime | |
| func (c *Config) isNvidiaRuntime(name string) bool { | |
| for _, n := range nvidiaRuntimeNames { | |
| if name == n { | |
| return true | |
| } | |
| } | |
| return false |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -122,7 +122,7 @@ func (c *ConfigV1) RemoveRuntime(name string) error { | |
|
|
||
| // Save writes the config to a file | ||
| func (c ConfigV1) Save(path string) (int64, error) { | ||
| return (Config)(c).Save(path) | ||
| return (*Config)(&c).Save(path) | ||
|
Comment on lines
124
to
+125
|
||
| } | ||
|
|
||
| func (c *ConfigV1) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Question: How does this work for crio? Do we also have the ability to update the imports there? Is this something that we want to do?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is possible to point to a diff location than the default one (see https://www.ibm.com/docs/en/zoscp/1.1.0?topic=options-crio-commands-flags#crio-commands__title__4)
--config-dir | Path to the configuration drop-in directory.So we could offer the same
drop-in-configflag on thecrioside.Let me see how that would look like
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be clear. I'm ok for runtime-specific defaults here. We already do this for other settings here:
nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/runtime/runtime.go
Lines 128 to 159 in 470b841
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I think is ok to go with defaults for now