Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 13 additions & 12 deletions cmd/compute-domain-controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ import (
_ "k8s.io/component-base/metrics/prometheus/workqueue" // register work queues in the default legacy registry

"github.com/NVIDIA/k8s-dra-driver-gpu/internal/info"
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/flags"
pkgflags "github.com/NVIDIA/k8s-dra-driver-gpu/pkg/flags"
Copy link
Collaborator Author

@jgehrcke jgehrcke Oct 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to have a way to use the imported flags package in a scope where a local variable flags was already defined.

)

const (
Expand All @@ -54,9 +54,7 @@ const (
)

type Flags struct {
kubeClientConfig flags.KubeClientConfig
loggingConfig *flags.LoggingConfig
featureGateConfig *flags.FeatureGateConfig
kubeClientConfig pkgflags.KubeClientConfig

podName string
namespace string
Expand All @@ -74,7 +72,7 @@ type Flags struct {
type Config struct {
driverName string
flags *Flags
clientsets flags.ClientSets
clientsets pkgflags.ClientSets
mux *http.ServeMux
}

Expand All @@ -86,10 +84,10 @@ func main() {
}

func newApp() *cli.App {
flags := &Flags{
loggingConfig: flags.NewLoggingConfig(),
featureGateConfig: flags.NewFeatureGateConfig(),
}
loggingConfig := pkgflags.NewLoggingConfig()
featureGateConfig := pkgflags.NewFeatureGateConfig()
flags := &Flags{}

cliFlags := []cli.Flag{
&cli.StringFlag{
Name: "pod-name",
Expand Down Expand Up @@ -157,8 +155,8 @@ func newApp() *cli.App {
}

cliFlags = append(cliFlags, flags.kubeClientConfig.Flags()...)
cliFlags = append(cliFlags, flags.featureGateConfig.Flags()...)
cliFlags = append(cliFlags, flags.loggingConfig.Flags()...)
cliFlags = append(cliFlags, featureGateConfig.Flags()...)
cliFlags = append(cliFlags, loggingConfig.Flags()...)

app := &cli.App{
Name: "compute-domain-controller",
Expand All @@ -170,7 +168,10 @@ func newApp() *cli.App {
if c.Args().Len() > 0 {
return fmt.Errorf("arguments not supported: %v", c.Args().Slice())
}
return flags.loggingConfig.Apply()
// `loggingConfig` must be applied before doing any logging
err := loggingConfig.Apply()
pkgflags.LogStartupConfig(flags, loggingConfig)
return err
},
Action: func(c *cli.Context) error {
mux := http.NewServeMux()
Expand Down
23 changes: 11 additions & 12 deletions cmd/compute-domain-daemon/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (

nvapi "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1"
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/featuregates"
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/flags"
pkgflags "github.com/NVIDIA/k8s-dra-driver-gpu/pkg/flags"
)

const (
Expand All @@ -55,8 +55,6 @@ type Flags struct {
podName string
podNamespace string
maxNodesPerIMEXDomain int
loggingConfig *flags.LoggingConfig
featureGateConfig *flags.FeatureGateConfig
}

type IMEXConfigTemplateData struct {
Expand All @@ -71,10 +69,9 @@ func main() {
}

func newApp() *cli.App {
flags := Flags{
loggingConfig: flags.NewLoggingConfig(),
featureGateConfig: flags.NewFeatureGateConfig(),
}
loggingConfig := pkgflags.NewLoggingConfig()
featureGateConfig := pkgflags.NewFeatureGateConfig()
flags := &Flags{}

// Create a wrapper that will be used to gracefully shut down all subcommands
wrapper := func(ctx context.Context, f func(ctx context.Context, cancel context.CancelFunc, flags *Flags) error) error {
Expand All @@ -91,7 +88,7 @@ func newApp() *cli.App {
}()

// Call the wrapped function
return f(ctx, cancel, &flags)
return f(ctx, cancel, flags)
}

cliFlags := []cli.Flag{
Expand Down Expand Up @@ -151,16 +148,19 @@ func newApp() *cli.App {
Destination: &flags.maxNodesPerIMEXDomain,
},
}
cliFlags = append(cliFlags, flags.featureGateConfig.Flags()...)
cliFlags = append(cliFlags, flags.loggingConfig.Flags()...)
cliFlags = append(cliFlags, featureGateConfig.Flags()...)
cliFlags = append(cliFlags, loggingConfig.Flags()...)

// Create the app
app := &cli.App{
Name: "compute-domain-daemon",
Usage: "compute-domain-daemon manages the IMEX daemon for NVIDIA compute domains.",
Flags: cliFlags,
Before: func(c *cli.Context) error {
return flags.loggingConfig.Apply()
// `loggingConfig` must be applied before doing any logging
err := loggingConfig.Apply()
pkgflags.LogStartupConfig(flags, loggingConfig)
return err
},
Commands: []*cli.Command{
{
Expand Down Expand Up @@ -197,7 +197,6 @@ func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error {
podNamespace: flags.podNamespace,
maxNodesPerIMEXDomain: flags.maxNodesPerIMEXDomain,
}
klog.Infof("config: %v", config)

// Support heterogeneous ComputeDomains. That means that a CD may contain
// nodes that do not take part in Multi-Node NVLink communication. On such
Expand Down
29 changes: 14 additions & 15 deletions cmd/compute-domain-kubelet-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (
"k8s.io/component-base/logs"

"github.com/NVIDIA/k8s-dra-driver-gpu/internal/info"
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/flags"
pkgflags "github.com/NVIDIA/k8s-dra-driver-gpu/pkg/flags"
)

const (
Expand All @@ -43,9 +43,7 @@ const (
)

type Flags struct {
kubeClientConfig flags.KubeClientConfig
loggingConfig *flags.LoggingConfig
featureGateConfig *flags.FeatureGateConfig
kubeClientConfig pkgflags.KubeClientConfig

nodeName string
namespace string
Expand All @@ -60,7 +58,7 @@ type Flags struct {

type Config struct {
flags *Flags
clientsets flags.ClientSets
clientsets pkgflags.ClientSets
}

func (c Config) DriverPluginPath() string {
Expand All @@ -75,10 +73,10 @@ func main() {
}

func newApp() *cli.App {
flags := &Flags{
loggingConfig: flags.NewLoggingConfig(),
featureGateConfig: flags.NewFeatureGateConfig(),
}
loggingConfig := pkgflags.NewLoggingConfig()
featureGateConfig := pkgflags.NewFeatureGateConfig()
flags := &Flags{}

cliFlags := []cli.Flag{
&cli.StringFlag{
Name: "node-name",
Expand Down Expand Up @@ -145,8 +143,8 @@ func newApp() *cli.App {
},
}
cliFlags = append(cliFlags, flags.kubeClientConfig.Flags()...)
cliFlags = append(cliFlags, flags.featureGateConfig.Flags()...)
cliFlags = append(cliFlags, flags.loggingConfig.Flags()...)
cliFlags = append(cliFlags, featureGateConfig.Flags()...)
cliFlags = append(cliFlags, loggingConfig.Flags()...)

app := &cli.App{
Name: "compute-domain-kubelet-plugin",
Expand All @@ -158,11 +156,12 @@ func newApp() *cli.App {
if c.Args().Len() > 0 {
return fmt.Errorf("arguments not supported: %v", c.Args().Slice())
}
return flags.loggingConfig.Apply()
// `loggingConfig` must be applied before doing any logging
err := loggingConfig.Apply()
pkgflags.LogStartupConfig(flags, loggingConfig)
return err
},
Action: func(c *cli.Context) error {
ctx := c.Context

clientSets, err := flags.kubeClientConfig.NewClientSets()
if err != nil {
return fmt.Errorf("create client: %w", err)
Expand All @@ -173,7 +172,7 @@ func newApp() *cli.App {
clientsets: clientSets,
}

return RunPlugin(ctx, config)
return RunPlugin(c.Context, config)
},
After: func(c *cli.Context) error {
// Runs after `Action` (regardless of success/error). In urfave cli
Expand Down
29 changes: 14 additions & 15 deletions cmd/gpu-kubelet-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import (
"k8s.io/klog/v2"

"github.com/NVIDIA/k8s-dra-driver-gpu/internal/info"
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/flags"
pkgflags "github.com/NVIDIA/k8s-dra-driver-gpu/pkg/flags"
)

const (
Expand All @@ -41,9 +41,7 @@ const (
)

type Flags struct {
kubeClientConfig flags.KubeClientConfig
loggingConfig *flags.LoggingConfig
featureGateConfig *flags.FeatureGateConfig
kubeClientConfig pkgflags.KubeClientConfig

nodeName string
namespace string
Expand All @@ -59,7 +57,7 @@ type Flags struct {

type Config struct {
flags *Flags
clientsets flags.ClientSets
clientsets pkgflags.ClientSets
}

func (c Config) DriverPluginPath() string {
Expand All @@ -74,10 +72,10 @@ func main() {
}

func newApp() *cli.App {
flags := &Flags{
loggingConfig: flags.NewLoggingConfig(),
featureGateConfig: flags.NewFeatureGateConfig(),
}
loggingConfig := pkgflags.NewLoggingConfig()
featureGateConfig := pkgflags.NewFeatureGateConfig()
flags := &Flags{}

cliFlags := []cli.Flag{
&cli.StringFlag{
Name: "node-name",
Expand Down Expand Up @@ -151,8 +149,8 @@ func newApp() *cli.App {
},
}
cliFlags = append(cliFlags, flags.kubeClientConfig.Flags()...)
cliFlags = append(cliFlags, flags.featureGateConfig.Flags()...)
cliFlags = append(cliFlags, flags.loggingConfig.Flags()...)
cliFlags = append(cliFlags, featureGateConfig.Flags()...)
cliFlags = append(cliFlags, loggingConfig.Flags()...)

app := &cli.App{
Name: "gpu-kubelet-plugin",
Expand All @@ -164,11 +162,12 @@ func newApp() *cli.App {
if c.Args().Len() > 0 {
return fmt.Errorf("arguments not supported: %v", c.Args().Slice())
}
return flags.loggingConfig.Apply()
// `loggingConfig` must be applied before doing any logging
err := loggingConfig.Apply()
pkgflags.LogStartupConfig(flags, loggingConfig)
return err
},
Action: func(c *cli.Context) error {
ctx := c.Context

clientSets, err := flags.kubeClientConfig.NewClientSets()
if err != nil {
return fmt.Errorf("create client: %w", err)
Expand All @@ -179,7 +178,7 @@ func newApp() *cli.App {
clientsets: clientSets,
}

return RunPlugin(ctx, config)
return RunPlugin(c.Context, config)
},
After: func(c *cli.Context) error {
// Runs after `Action` (regardless of success/error). In urfave cli
Expand Down
22 changes: 22 additions & 0 deletions pkg/flags/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ import (

"github.com/spf13/pflag"
"github.com/urfave/cli/v2"
"k8s.io/apimachinery/pkg/util/dump"
"k8s.io/klog/v2"

"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/featuregates"
)

func pflagToCLI(flag *pflag.Flag, category string) cli.Flag {
Expand All @@ -33,3 +37,21 @@ func pflagToCLI(flag *pflag.Flag, category string) cli.Flag {
EnvVars: []string{strings.ToUpper(strings.ReplaceAll(flag.Name, "-", "_"))},
}
}

func LogStartupConfig(parsedFlags any, loggingConfig *LoggingConfig) {
// Always log component startup config (level 0).
klog.Infof("\nFeature gates: %#v\nVerbosity: %d\nFlags: %s",
// Flat boolean map -- no pretty-printing needed.
featuregates.ToMap(),
loggingConfig.config.Verbosity,
// Based on go-spew's Sdump(), with indentation. Type information is
// always displayed (cannot be disabled).
dump.Pretty(parsedFlags),
)

// This is a complex object, comprised of largely static default klog
// component configuration. Various parts can be overridden via environment
// variables or CLI flags: it makes sense to log the interpolated config,
// but only on a high verbosity level.
klog.V(6).Infof("Logging config: %s", dump.Pretty(loggingConfig))
}
16 changes: 16 additions & 0 deletions tests/bats/tests.bats
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,22 @@ log_objects() {
echo "${output}" | grep -E '^.*SUM multinode_device_to_device_memcpy_read_ce [0-9]+\.[0-9]+.*$'
}

@test "Confirm startup config / detail in logs on level 0" {
local _iargs=("--set" "logVerbosity=0")
iupgrade_wait "${TEST_CHART_REPO}" "${TEST_CHART_VERSION}" _iargs

run kubectl logs -l nvidia-dra-driver-gpu-component=controller -n nvidia-dra-driver-gpu --tail=-1
assert_output --partial "Verbosity:"
assert_output --partial '"MPSSupport":false'
assert_output --partial 'additionalNamespaces:'

run kubectl logs -l nvidia-dra-driver-gpu-component=kubelet-plugin -n nvidia-dra-driver-gpu --tail=-1
assert_output --partial "Verbosity"
assert_output --partial "nodeName"
assert_output --partial "identified fabric clique"
assert_output --partial "driver version validation"
}

@test "CD controller: test log verbosity levels" {
iupgrade_wait "${TEST_CHART_REPO}" "${TEST_CHART_VERSION}" NOARGS

Expand Down