@@ -21,6 +21,7 @@ import (
2121 "fmt"
2222 "os"
2323 "os/signal"
24+ "path/filepath"
2425 "syscall"
2526
2627 "github.com/urfave/cli/v2"
@@ -46,7 +47,7 @@ type Flags struct {
4647 cdiRoot string
4748 containerDriverRoot string
4849 hostDriverRoot string
49- nvidiaCTKPath string
50+ nvidiaCDIHookPath string
5051}
5152
5253type Config struct {
@@ -103,11 +104,10 @@ func newApp() *cli.App {
103104 EnvVars : []string {"CONTAINER_DRIVER_ROOT" },
104105 },
105106 & cli.StringFlag {
106- Name : "nvidia-ctk-path" ,
107- Value : "/usr/bin/nvidia-ctk" ,
108- Usage : "the path to use for the nvidia-ctk in the generated CDI specification. Note that this represents the path on the host." ,
109- Destination : & flags .nvidiaCTKPath ,
110- EnvVars : []string {"NVIDIA_CTK_PATH" },
107+ Name : "nvidia-cdi-hook-path" ,
108+ Usage : "Absolute path to the nvidia-cdi-hook executable in the host file system. Used in the generated CDI specification." ,
109+ Destination : & flags .nvidiaCDIHookPath ,
110+ EnvVars : []string {"NVIDIA_CDI_HOOK_PATH" },
111111 },
112112 }
113113 cliFlags = append (cliFlags , flags .kubeClientConfig .Flags ()... )
@@ -152,12 +152,20 @@ func newApp() *cli.App {
152152 return app
153153}
154154
155+ // StartPlugin initializes and runs the compute domain kubelet plugin.
155156func StartPlugin (ctx context.Context , config * Config ) error {
157+ // Create the plugin directory
156158 err := os .MkdirAll (DriverPluginPath , 0750 )
157159 if err != nil {
158160 return err
159161 }
160162
163+ // Setup nvidia-cdi-hook binary
164+ if err := config .flags .setNvidiaCDIHookPath (); err != nil {
165+ return fmt .Errorf ("error setting up nvidia-cdi-hook: %w" , err )
166+ }
167+
168+ // Initialize CDI root directory
161169 info , err := os .Stat (config .flags .cdiRoot )
162170 switch {
163171 case err != nil && os .IsNotExist (err ):
@@ -171,9 +179,11 @@ func StartPlugin(ctx context.Context, config *Config) error {
171179 return fmt .Errorf ("path for cdi file generation is not a directory: '%v'" , config .flags .cdiRoot )
172180 }
173181
182+ // Setup signal handling for graceful shutdown
174183 sigs := make (chan os.Signal , 1 )
175184 signal .Notify (sigs , syscall .SIGHUP , syscall .SIGINT , syscall .SIGTERM , syscall .SIGQUIT )
176185
186+ // Create a cancellable context for cleanup
177187 var driver * driver
178188 ctx , cancel := context .WithCancel (ctx )
179189 defer func () {
@@ -183,12 +193,43 @@ func StartPlugin(ctx context.Context, config *Config) error {
183193 }
184194 }()
185195
196+ // Create and start the driver
186197 driver , err = NewDriver (ctx , config )
187198 if err != nil {
188199 return fmt .Errorf ("error creating driver: %w" , err )
189200 }
190201
202+ // Wait for shutdown signal
191203 <- sigs
192204
193205 return nil
194206}
207+
208+ // setNvidiaCDIHookPath ensures the proper flag is set with the host path for the nvidia-cdi-hook binary.
209+ // If 'f.nvidiaCDIHookPath' is already set (from the command line), do nothing.
210+ // If 'f.nvidiaCDIHookPath' is empty, it copies the nvidia-cdi-hook binary from
211+ // /usr/bin/nvidia-cdi-hook to DriverPluginPath and sets 'f.nvidiaCDIHookPath'
212+ // to this path. The /usr/bin/nvidia-cdi-hook is present in the current
213+ // container image because it is copied from the toolkit image into this
214+ // container at build time.
215+ func (f * Flags ) setNvidiaCDIHookPath () error {
216+ if f .nvidiaCDIHookPath != "" {
217+ return nil
218+ }
219+
220+ sourcePath := "/usr/bin/nvidia-cdi-hook"
221+ targetPath := filepath .Join (DriverPluginPath , "nvidia-cdi-hook" )
222+
223+ input , err := os .ReadFile (sourcePath )
224+ if err != nil {
225+ return fmt .Errorf ("error reading nvidia-cdi-hook: %w" , err )
226+ }
227+
228+ if err := os .WriteFile (targetPath , input , 0755 ); err != nil {
229+ return fmt .Errorf ("error copying nvidia-cdi-hook: %w" , err )
230+ }
231+
232+ f .nvidiaCDIHookPath = targetPath
233+
234+ return nil
235+ }
0 commit comments