@@ -19,6 +19,7 @@ package reconfigure
1919
2020import (
2121 "context"
22+ "errors"
2223 "fmt"
2324 "os"
2425 "os/exec"
@@ -61,15 +62,13 @@ func New(opts ...Option) (Reconfigurer, error) {
6162func (opts * reconfigureMIGOptions ) Reconfigure () error {
6263 log .Info ("Asserting that the requested configuration is present in the configuration file" )
6364 if err := opts .assertValidMIGConfig (); err != nil {
64- log .Error ("Unable to validate the selected MIG configuration" )
65- return err
65+ return fmt .Errorf ("error validating the selected MIG configuration: %w" , err )
6666 }
6767
6868 log .Infof ("Getting current value of the '%s' node label" , opts .configStateLabel )
6969 state , err := opts .getNodeLabelValue (opts .configStateLabel )
7070 if err != nil {
71- log .Errorf ("Unable to get the value of the '%s' label" , opts .configStateLabel )
72- return err
71+ return fmt .Errorf ("unable to get the value of the %q label: %w" , opts .configStateLabel , err )
7372 }
7473 log .Infof ("Current value of '%s=%s'" , opts .configStateLabel , state )
7574
@@ -84,8 +83,7 @@ func (opts *reconfigureMIGOptions) Reconfigure() error {
8483 if _ , err := os .Stat (stateFilePath ); err == nil {
8584 log .Infof ("Persisting %s to %s" , opts .SelectedMIGConfig , opts .HostMIGManagerStateFile )
8685 if err := opts .hostPersistConfig (); err != nil {
87- log .Errorf ("Unable to persist %s to %s" , opts .SelectedMIGConfig , opts .HostMIGManagerStateFile )
88- return err
86+ return fmt .Errorf ("unable to persist %s to %s: %w" , opts .SelectedMIGConfig , opts .HostMIGManagerStateFile , err )
8987 }
9088 }
9189 }
@@ -95,8 +93,7 @@ func (opts *reconfigureMIGOptions) Reconfigure() error {
9593 migModeChangeRequired := false
9694 if err := opts .assertMIGModeOnly (); err != nil {
9795 if state == configStateRebooting {
98- log .Error ("MIG mode change did not take effect after rebooting" )
99- return fmt .Errorf ("MIG mode change failed after reboot" )
96+ return fmt .Errorf ("MIG mode change failed after reboot: %w" , err )
10097 }
10198 if opts .WithShutdownHostGPUClients {
10299 opts .HostGPUClientServices = append (opts .HostGPUClientServices , opts .HostKubeletService )
@@ -107,8 +104,7 @@ func (opts *reconfigureMIGOptions) Reconfigure() error {
107104 if opts .WithShutdownHostGPUClients {
108105 log .Info ("Shutting down all GPU clients on the host by stopping their systemd services" )
109106 if err := opts .hostStopSystemdServices (); err != nil {
110- log .Error ("Unable to shutdown GPU clients on host by stopping their systemd services" )
111- return err
107+ return fmt .Errorf ("unable to shutdown host GPU clients: %w" , err )
112108 }
113109 if migModeChangeRequired {
114110 log .Info ("Waiting 30 seconds for services to settle" )
@@ -124,7 +120,7 @@ func (opts *reconfigureMIGOptions) Reconfigure() error {
124120 if err := opts .setNodeLabelValue (opts .configStateLabel , configStateRebooting ); err != nil {
125121 log .Errorf ("Unable to set the value of '%s' to '%s'" , opts .configStateLabel , configStateRebooting )
126122 log .Error ("Exiting so as not to reboot multiple times unexpectedly" )
127- return err
123+ return fmt . Errorf ( "unable to set the value of %q to %q: %w" , opts . configStateLabel , configStateRebooting , err )
128124 }
129125 return rebootHost (opts .HostRootMount )
130126 }
@@ -138,8 +134,7 @@ func (opts *reconfigureMIGOptions) Reconfigure() error {
138134 if opts .WithShutdownHostGPUClients {
139135 log .Info ("Restarting all GPU clients previously shutdown on the host by restarting their systemd services" )
140136 if err := opts .hostStartSystemdServices (); err != nil {
141- log .Error ("Unable to restart GPU clients on host by restarting their systemd services" )
142- return err
137+ return fmt .Errorf ("unable to restart host GPU clients: %w" , err )
143138 }
144139 }
145140
@@ -234,18 +229,20 @@ func (opts *reconfigureMIGOptions) hostStartSystemdServices() error {
234229 }
235230 }
236231
237- retCode := 0
232+ var errs error
238233 for _ , service := range opts .hostGPUClientServicesStopped {
239234 log .Infof ("Starting %s" , service )
240235 cmd := exec .Command ("chroot" , opts .HostRootMount , "systemctl" , "start" , service ) // #nosec G204 -- HostRootMount validated via dirpath, service validated via systemd_service_name.
241236 if err := cmd .Run (); err != nil {
242- log .Errorf ("Error Starting %s: skipping, but continuing..." , service )
243- retCode = 1
237+ serviceError := fmt .Errorf ("error starting %q: %w" , err )
238+ log .Errorf ("%v; skipping, but continuing..." , serviceError )
239+
240+ errs = errors .Join (errs , serviceError )
244241 }
245242 }
246243
247- if retCode != 0 {
248- return fmt .Errorf ("some services failed to start" )
244+ if errs != nil {
245+ return fmt .Errorf ("some services failed to start: %w" , errs )
249246 }
250247 return nil
251248}
0 commit comments