Skip to content

Commit 5b00c8f

Browse files
Replace kubectl usage with utility program to apply CRDs
kubectl is heavyweight and often has CVEs, so we are forced to bump its version even if the final gpu-operator image does not contain kubectl bits. This change lets us remove that dependency and use client-go functions to manage CRDs. Signed-off-by: Rajath Agasthya <[email protected]>
1 parent 41f3501 commit 5b00c8f

File tree

72 files changed

+19615
-28
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+19615
-28
lines changed

cmd/manage-crds/main.go

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
Copyright (c), NVIDIA CORPORATION. All rights reserved.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package main
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"os"
23+
24+
"github.com/NVIDIA/k8s-operator-libs/pkg/crdutil"
25+
log "github.com/sirupsen/logrus"
26+
"github.com/urfave/cli/v2"
27+
28+
"github.com/NVIDIA/gpu-operator/internal/info"
29+
)
30+
31+
var logger = log.New()
32+
33+
type config struct {
34+
Debug bool
35+
crdsPaths *cli.StringSlice
36+
}
37+
38+
func main() {
39+
config := config{
40+
crdsPaths: cli.NewStringSlice(),
41+
}
42+
43+
// Create the top-level CLI
44+
c := cli.NewApp()
45+
c.Name = "manage-crds"
46+
c.Usage = "Tools for managing Custom Resource Definitions (CRDs) for NVIDIA GPU Operator"
47+
c.Version = info.GetVersionString()
48+
49+
// Setup the flags for this command
50+
c.Flags = []cli.Flag{
51+
&cli.BoolFlag{
52+
Name: "debug",
53+
Aliases: []string{"d"},
54+
Usage: "Enable debug-level logging",
55+
Destination: &config.Debug,
56+
EnvVars: []string{"DEBUG"},
57+
},
58+
}
59+
60+
// Set log-level for all subcommands
61+
c.Before = func(c *cli.Context) error {
62+
logLevel := log.InfoLevel
63+
if config.Debug {
64+
logLevel = log.DebugLevel
65+
}
66+
logger.SetLevel(logLevel)
67+
return nil
68+
}
69+
70+
// Common flags for both apply and delete subcommands
71+
commonFlags := []cli.Flag{
72+
&cli.StringSliceFlag{
73+
Name: "filepath",
74+
Aliases: []string{"f"},
75+
Usage: "Path to CRD manifest file or directory (can be specified multiple times, directories are searched recursively)",
76+
Required: true,
77+
Destination: config.crdsPaths,
78+
},
79+
}
80+
81+
// Define the subcommands
82+
c.Commands = []*cli.Command{
83+
{
84+
Name: "apply",
85+
Usage: "Apply CRDs from the specified path",
86+
Flags: commonFlags,
87+
Action: func(c *cli.Context) error {
88+
return runApply(c.Context, config)
89+
},
90+
},
91+
{
92+
Name: "delete",
93+
Usage: "Delete CRDs from the specified path",
94+
Flags: commonFlags,
95+
Action: func(c *cli.Context) error {
96+
return runDelete(c.Context, config)
97+
},
98+
},
99+
}
100+
101+
err := c.Run(os.Args)
102+
if err != nil {
103+
log.Errorf("%v", err)
104+
log.Exit(1)
105+
}
106+
}
107+
108+
func runApply(ctx context.Context, cfg config) error {
109+
paths := cfg.crdsPaths.Value()
110+
logger.Infof("Applying CRDs from %d path(s): %v", len(paths), paths)
111+
112+
if err := crdutil.ProcessCRDs(ctx, crdutil.CRDOperationApply, paths...); err != nil {
113+
return fmt.Errorf("failed to apply CRDs: %w", err)
114+
}
115+
116+
logger.Info("Successfully applied CRDs")
117+
return nil
118+
}
119+
120+
func runDelete(ctx context.Context, cfg config) error {
121+
paths := cfg.crdsPaths.Value()
122+
logger.Infof("Deleting CRDs from %d path(s): %v", len(paths), paths)
123+
124+
if err := crdutil.ProcessCRDs(ctx, crdutil.CRDOperationDelete, paths...); err != nil {
125+
return fmt.Errorf("failed to delete CRDs: %w", err)
126+
}
127+
128+
logger.Info("Successfully deleted CRDs")
129+
return nil
130+
}

deployments/gpu-operator/templates/cleanup_crd.yaml

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,13 @@ spec:
3535
image: {{ include "gpu-operator.fullimage" . }}
3636
imagePullPolicy: {{ .Values.operator.imagePullPolicy }}
3737
command:
38-
- sh
39-
- -c
40-
- >
41-
kubectl delete clusterpolicy cluster-policy;
42-
kubectl delete crd clusterpolicies.nvidia.com;
43-
kubectl delete crd nvidiadrivers.nvidia.com --ignore-not-found=true;
44-
{{- if .Values.nfd.enabled -}}
45-
kubectl delete crd nodefeatures.nfd.k8s-sigs.io --ignore-not-found=true;
46-
kubectl delete crd nodefeaturegroups.nfd.k8s-sigs.io --ignore-not-found=true;
47-
kubectl delete crd nodefeaturerules.nfd.k8s-sigs.io --ignore-not-found=true;
48-
{{- end }}
38+
- /usr/bin/manage-crds
39+
args:
40+
- delete
41+
- --filepath=/opt/gpu-operator/nvidia.com_clusterpolicies.yaml
42+
- --filepath=/opt/gpu-operator/nvidia.com_nvidiadrivers.yaml
43+
{{- if .Values.nfd.enabled }}
44+
- --filepath=/opt/gpu-operator/nfd-api-crds.yaml;
45+
{{- end }}
4946
restartPolicy: OnFailure
5047
{{- end }}

deployments/gpu-operator/templates/upgrade_crd.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,13 @@ spec:
8383
image: {{ include "gpu-operator.fullimage" . }}
8484
imagePullPolicy: {{ .Values.operator.imagePullPolicy }}
8585
command:
86-
- sh
87-
- -c
88-
- >
89-
kubectl apply -f /opt/gpu-operator/nvidia.com_clusterpolicies.yaml;
90-
kubectl apply -f /opt/gpu-operator/nvidia.com_nvidiadrivers.yaml;
86+
- /usr/bin/manage-crds
87+
args:
88+
- apply
89+
- --filepath=/opt/gpu-operator/nvidia.com_clusterpolicies.yaml
90+
- --filepath=/opt/gpu-operator/nvidia.com_nvidiadrivers.yaml
9191
{{- if .Values.nfd.enabled }}
92-
kubectl apply -f /opt/gpu-operator/nfd-api-crds.yaml;
92+
- --filepath=/opt/gpu-operator/nfd-api-crds.yaml
9393
{{- end }}
9494
restartPolicy: OnFailure
9595
{{- end }}

docker/Dockerfile

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,6 @@ RUN dnf install -y --allowerasing \
4747

4848
WORKDIR /workspace
4949

50-
# Install must-gather dependency: `kubectl`
51-
ARG TARGETARCH
52-
RUN OS_ARCH=${TARGETARCH/x86_64/amd64} && OS_ARCH=${OS_ARCH/aarch64/arm64} && \
53-
curl -LO https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/${OS_ARCH}/kubectl && \
54-
chmod +x ./kubectl
55-
5650
FROM nvcr.io/nvidia/cuda:12.9.1-base-ubi9 AS sample-builder
5751

5852
RUN dnf install -y --allowerasing \
@@ -96,7 +90,7 @@ LABEL vsc-ref=${GIT_COMMIT}
9690

9791
WORKDIR /
9892
COPY --from=builder /workspace/gpu-operator /usr/bin/
99-
COPY --from=cuda-base /workspace/kubectl /usr/bin/
93+
COPY --from=builder /workspace/manage-crds /usr/bin/
10094
COPY --from=builder /workspace/nvidia-validator /usr/bin/
10195
COPY --from=sample-builder /build/vectorAdd /usr/bin/vectorAdd
10296
# TODO: Copy the compat libs from the 'sample-builder' image instead.

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ require (
66
github.com/Masterminds/sprig/v3 v3.3.0
77
github.com/NVIDIA/go-nvlib v0.8.1
88
github.com/NVIDIA/k8s-kata-manager v0.2.3
9-
github.com/NVIDIA/k8s-operator-libs v0.0.0-20250709180754-c80af13d73e3
9+
github.com/NVIDIA/k8s-operator-libs v0.0.0-20251027171627-45ccd0c3dd32
1010
github.com/NVIDIA/nvidia-container-toolkit v1.18.0
1111
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc
1212
github.com/go-logr/logr v1.4.3

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ github.com/NVIDIA/go-nvlib v0.8.1 h1:OPEHVvn3zcV5OXB68A7WRpeCnYMRSPl7LdeJH/d3gZI
1616
github.com/NVIDIA/go-nvlib v0.8.1/go.mod h1:7mzx9FSdO9fXWP9NKuZmWkCwhkEcSWQFe2tmFwtLb9c=
1717
github.com/NVIDIA/k8s-kata-manager v0.2.3 h1:d5+gRFqU5el/fKMXhHUaPY7haj+dbHL4nDsO/q05LBo=
1818
github.com/NVIDIA/k8s-kata-manager v0.2.3/go.mod h1:xx5OUiMsHyKbyX0JjKHqAftvqS8vx00LFn/5EaMdtB4=
19-
github.com/NVIDIA/k8s-operator-libs v0.0.0-20250709180754-c80af13d73e3 h1:vGT+oyUY7kOGLd71Cz0NfRVEep23jdd4fi+PYsZEj88=
20-
github.com/NVIDIA/k8s-operator-libs v0.0.0-20250709180754-c80af13d73e3/go.mod h1:0GPZJRwr6nY1IVhGUyzG9YfKhNFQq8UlhYe4u7jVF0U=
19+
github.com/NVIDIA/k8s-operator-libs v0.0.0-20251027171627-45ccd0c3dd32 h1:TWudaaTt7QwN/cQwPOm1wgesGLOc8hoik9GubKgnph0=
20+
github.com/NVIDIA/k8s-operator-libs v0.0.0-20251027171627-45ccd0c3dd32/go.mod h1:WbVhWGKqRcwjRKj8MYsYJas73G1YdU3oLW5ggDvTWXs=
2121
github.com/NVIDIA/nvidia-container-toolkit v1.18.0 h1:bXoKq9C1WHU5fF6VqXvX3RkMzpp4ihTUgBPrh66vTf0=
2222
github.com/NVIDIA/nvidia-container-toolkit v1.18.0/go.mod h1:ZxWSG7fnFo2Z7xSGtMyZVF7WnTbj1lgx4dMrBLUq90g=
2323
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=

vendor/github.com/NVIDIA/k8s-operator-libs/pkg/crdutil/README.md

Lines changed: 67 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)