Skip to content

Commit d4ed26a

Browse files
committed
Add commandRunnner interface and a basic test
Signed-off-by: Evan Lezar <[email protected]>
1 parent 89e8bfc commit d4ed26a

File tree

4 files changed

+214
-21
lines changed

4 files changed

+214
-21
lines changed

pkg/mig/reconfigure/api.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package reconfigure
22

3+
import "os/exec"
4+
35
const (
46
MIGConfigStateLabel = "nvidia.com/mig.config.state"
57
VGPUConfigStateLabel = "nvidia.com/vgpu.config.state"
@@ -9,3 +11,11 @@ const (
911
type Reconfigurer interface {
1012
Reconfigure() error
1113
}
14+
15+
// A commandRunner is used to run a constructed command.
16+
// This interface allows us to inject a runner for testing.
17+
//
18+
//go:generate moq -rm -fmt=goimports -out command-runner_mock.go . commandRunner
19+
type commandRunner interface {
20+
Run(*exec.Cmd) error
21+
}

pkg/mig/reconfigure/command-runner_mock.go

Lines changed: 75 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/mig/reconfigure/reconfigure.go

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,17 @@ const (
3939
ldPreloadEnvVar = "LD_PRELOAD"
4040
)
4141

42+
type reconfigurer struct {
43+
*reconfigureMIGOptions
44+
commandRunner
45+
}
46+
47+
// A commandWithOutput runs a command and ensures that STDERR and STDOUT are
48+
// set.
49+
type commandWithOutput struct{}
50+
51+
var _ commandRunner = (*commandWithOutput)(nil)
52+
4253
// New creates a MIG Reconfigurer with the supplied options.
4354
func New(opts ...Option) (Reconfigurer, error) {
4455
o := &reconfigureMIGOptions{}
@@ -51,26 +62,31 @@ func New(opts ...Option) (Reconfigurer, error) {
5162
return nil, err
5263
}
5364

54-
return o, nil
65+
r := &reconfigurer{
66+
reconfigureMIGOptions: o,
67+
commandRunner: &commandWithOutput{},
68+
}
69+
70+
return r, nil
5571
}
5672

5773
// Reconfigure configures MIG (Multi-Instance GPU) settings on a Kubernetes
5874
// node. It validates the requested configuration, checks the current state,
5975
// applies MIG mode changes, manages host GPU client services, and handles
6076
// reboots when necessary. The function ensures that MIG configurations are
6177
// applied safely with proper service lifecycle management.
62-
func (opts *reconfigureMIGOptions) Reconfigure() error {
78+
func (opts *reconfigurer) Reconfigure() error {
6379
log.Info("Asserting that the requested configuration is present in the configuration file")
6480
if err := opts.assertValidMIGConfig(); err != nil {
6581
return fmt.Errorf("error validating the selected MIG configuration: %w", err)
6682
}
6783

68-
log.Infof("Getting current value of the '%s' node label", opts.configStateLabel)
69-
state, err := opts.getNodeLabelValue(opts.configStateLabel)
84+
log.Infof("Getting current value of the '%s' node label", opts.ConfigStateLabel)
85+
state, err := opts.getNodeLabelValue(opts.ConfigStateLabel)
7086
if err != nil {
71-
return fmt.Errorf("unable to get the value of the %q label: %w", opts.configStateLabel, err)
87+
return fmt.Errorf("unable to get the value of the %q label: %w", opts.ConfigStateLabel, err)
7288
}
73-
log.Infof("Current value of '%s=%s'", opts.configStateLabel, state)
89+
log.Infof("Current value of '%s=%s'", opts.ConfigStateLabel, state)
7490

7591
log.Info("Checking if the selected MIG config is currently applied or not")
7692
if err := opts.assertMIGConfig(); err == nil {
@@ -116,11 +132,11 @@ func (opts *reconfigureMIGOptions) Reconfigure() error {
116132
log.Info("If the -r option was passed, the node will be automatically rebooted if this is not successful")
117133
if err := opts.applyMIGModeOnly(); err != nil || opts.assertMIGModeOnly() != nil {
118134
if opts.WithReboot {
119-
log.Infof("Changing the '%s' node label to '%s'", opts.configStateLabel, configStateRebooting)
120-
if err := opts.setNodeLabelValue(opts.configStateLabel, configStateRebooting); err != nil {
121-
log.Errorf("Unable to set the value of '%s' to '%s'", opts.configStateLabel, configStateRebooting)
135+
log.Infof("Changing the '%s' node label to '%s'", opts.ConfigStateLabel, configStateRebooting)
136+
if err := opts.setNodeLabelValue(opts.ConfigStateLabel, configStateRebooting); err != nil {
137+
log.Errorf("Unable to set the value of '%s' to '%s'", opts.ConfigStateLabel, configStateRebooting)
122138
log.Error("Exiting so as not to reboot multiple times unexpectedly")
123-
return fmt.Errorf("unable to set the value of %q to %q: %w", opts.configStateLabel, configStateRebooting, err)
139+
return fmt.Errorf("unable to set the value of %q to %q: %w", opts.ConfigStateLabel, configStateRebooting, err)
124140
}
125141
return rebootHost(opts.HostRootMount)
126142
}
@@ -141,7 +157,7 @@ func (opts *reconfigureMIGOptions) Reconfigure() error {
141157
return nil
142158
}
143159

144-
func (opts *reconfigureMIGOptions) assertValidMIGConfig() error {
160+
func (opts *reconfigurer) assertValidMIGConfig() error {
145161
args := []string{
146162
"--debug",
147163
"assert",
@@ -152,7 +168,7 @@ func (opts *reconfigureMIGOptions) assertValidMIGConfig() error {
152168
return opts.runMigParted(args...)
153169
}
154170

155-
func (opts *reconfigureMIGOptions) assertMIGConfig() error {
171+
func (opts *reconfigurer) assertMIGConfig() error {
156172
args := []string{
157173
"--debug",
158174
"assert",
@@ -162,7 +178,7 @@ func (opts *reconfigureMIGOptions) assertMIGConfig() error {
162178
return opts.runMigParted(args...)
163179
}
164180

165-
func (opts *reconfigureMIGOptions) assertMIGModeOnly() error {
181+
func (opts *reconfigurer) assertMIGModeOnly() error {
166182
args := []string{
167183
"--debug",
168184
"assert",
@@ -173,7 +189,7 @@ func (opts *reconfigureMIGOptions) assertMIGModeOnly() error {
173189
return opts.runMigParted(args...)
174190
}
175191

176-
func (opts *reconfigureMIGOptions) applyMIGModeOnly() error {
192+
func (opts *reconfigurer) applyMIGModeOnly() error {
177193
args := []string{
178194
"--debug",
179195
"apply",
@@ -184,7 +200,7 @@ func (opts *reconfigureMIGOptions) applyMIGModeOnly() error {
184200
return opts.runMigParted(args...)
185201
}
186202

187-
func (opts *reconfigureMIGOptions) applyMIGConfig() error {
203+
func (opts *reconfigurer) applyMIGConfig() error {
188204
args := []string{
189205
"--debug",
190206
"apply",
@@ -194,7 +210,7 @@ func (opts *reconfigureMIGOptions) applyMIGConfig() error {
194210
return opts.runMigParted(args...)
195211
}
196212

197-
func (opts *reconfigureMIGOptions) hostPersistConfig() error {
213+
func (opts *reconfigurer) hostPersistConfig() error {
198214
config := fmt.Sprintf(`[Service]
199215
Environment="MIG_PARTED_SELECTED_CONFIG=%s"
200216
`, opts.SelectedMIGConfig)
@@ -206,7 +222,7 @@ Environment="MIG_PARTED_SELECTED_CONFIG=%s"
206222
}
207223

208224
cmd := exec.Command("chroot", opts.HostRootMount, "systemctl", "daemon-reload") // #nosec G204 -- HostRootMount is validated via dirpath validator.
209-
return runCommandWithOutput(cmd)
225+
return opts.Run(cmd)
210226
}
211227

212228
func (opts *reconfigureMIGOptions) hostStopSystemdServices() error {
@@ -234,7 +250,7 @@ func (opts *reconfigureMIGOptions) hostStartSystemdServices() error {
234250
log.Infof("Starting %s", service)
235251
cmd := exec.Command("chroot", opts.HostRootMount, "systemctl", "start", service) // #nosec G204 -- HostRootMount validated via dirpath, service validated via systemd_service_name.
236252
if err := cmd.Run(); err != nil {
237-
serviceError := fmt.Errorf("error starting %q: %w", err)
253+
serviceError := fmt.Errorf("error starting %q: %w", service, err)
238254
log.Errorf("%v; skipping, but continuing...", serviceError)
239255

240256
errs = errors.Join(errs, serviceError)
@@ -332,15 +348,15 @@ func shouldRestartService(opts *reconfigureMIGOptions, service string) bool {
332348
return true
333349
}
334350

335-
func runCommandWithOutput(cmd *exec.Cmd) error {
351+
func (c *commandWithOutput) Run(cmd *exec.Cmd) error {
336352
cmd.Stdout = os.Stdout
337353
cmd.Stderr = os.Stderr
338354
return cmd.Run()
339355
}
340356

341-
func (opts *reconfigureMIGOptions) runMigParted(args ...string) error {
357+
func (opts *reconfigurer) runMigParted(args ...string) error {
342358
cmd := opts.migPartedCmd(args...)
343-
return runCommandWithOutput(cmd)
359+
return opts.Run(cmd)
344360
}
345361

346362
func (opts *reconfigureMIGOptions) migPartedCmd(args ...string) *exec.Cmd {
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/**
2+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
**/
17+
18+
package reconfigure
19+
20+
import (
21+
"fmt"
22+
"os/exec"
23+
"testing"
24+
25+
"github.com/stretchr/testify/require"
26+
)
27+
28+
type commandRunnerWithCLI struct {
29+
mock *commandRunnerMock
30+
calls [][]string
31+
}
32+
33+
func (c *commandRunnerWithCLI) Run(cmd *exec.Cmd) error {
34+
c.calls = append(c.calls, append([]string{cmd.Path}, cmd.Args...))
35+
return c.mock.Run(cmd)
36+
}
37+
38+
func TestReconfigure(t *testing.T) {
39+
testCases := []struct {
40+
description string
41+
options reconfigureMIGOptions
42+
commandRunner *commandRunnerWithCLI
43+
expectedError error
44+
expectedCalls [][]string
45+
}{
46+
{
47+
description: "test command failure",
48+
options: reconfigureMIGOptions{
49+
NodeName: "NodeName",
50+
MIGPartedConfigFile: "/path/to/config/file.yaml",
51+
SelectedMIGConfig: "selected-mig-config",
52+
DriverLibraryPath: "/path/to/libnvidia-ml.so.1",
53+
HostRootMount: "/host/",
54+
},
55+
commandRunner: &commandRunnerWithCLI{
56+
mock: &commandRunnerMock{
57+
RunFunc: func(cmd *exec.Cmd) error {
58+
return fmt.Errorf("error running command %v", cmd.Path)
59+
},
60+
},
61+
},
62+
expectedError: fmt.Errorf("error validating the selected MIG configuration: error running command nvidia-mig-parted"),
63+
expectedCalls: [][]string{
64+
{"nvidia-mig-parted", "nvidia-mig-parted", "--debug", "assert", "--valid-config", "--config-file", "/path/to/config/file.yaml", "--selected-config", "selected-mig-config"},
65+
},
66+
},
67+
}
68+
69+
for _, tc := range testCases {
70+
t.Run(tc.description, func(t *testing.T) {
71+
// TODO: Once we have better mocks in place for the following
72+
// functionality, we can update this.
73+
require.False(t, tc.options.WithReboot)
74+
require.False(t, tc.options.WithShutdownHostGPUClients)
75+
76+
// We test explicit validation in a separate test.
77+
// For now we only ensure that the options are valid.
78+
require.NoError(t, tc.options.Validate())
79+
80+
r := &reconfigurer{
81+
reconfigureMIGOptions: &tc.options,
82+
commandRunner: tc.commandRunner,
83+
}
84+
85+
err := r.Reconfigure()
86+
require.EqualValues(t, tc.expectedError.Error(), err.Error())
87+
88+
require.EqualValues(t, tc.expectedCalls, tc.commandRunner.calls)
89+
90+
})
91+
}
92+
}

0 commit comments

Comments
 (0)