diff --git a/tests/e2e/go.mod b/tests/e2e/go.mod index 275b5b10d..e5893fdf6 100644 --- a/tests/e2e/go.mod +++ b/tests/e2e/go.mod @@ -1,15 +1,19 @@ module github.com/NVIDIA/gpu-operator/tests/e2e -go 1.24.3 +go 1.24.6 + +toolchain go1.24.10 require ( + github.com/NVIDIA/gpu-operator v0.0.0-00010101000000-000000000000 github.com/mittwald/go-helm-client v0.12.18 - github.com/onsi/ginkgo/v2 v2.26.0 + github.com/onsi/ginkgo/v2 v2.27.2 github.com/onsi/gomega v1.38.2 - k8s.io/api v0.33.2 - k8s.io/apimachinery v0.33.2 - k8s.io/client-go v0.33.2 + k8s.io/api v0.34.2 + k8s.io/apimachinery v0.34.2 + k8s.io/client-go v0.34.2 k8s.io/klog/v2 v2.130.1 + k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 ) require ( @@ -21,6 +25,8 @@ require ( github.com/Masterminds/semver/v3 v3.4.0 // indirect github.com/Masterminds/sprig/v3 v3.3.0 // indirect github.com/Masterminds/squirrel v1.5.4 // indirect + github.com/NVIDIA/k8s-kata-manager v0.2.3 // indirect + github.com/NVIDIA/k8s-operator-libs v0.0.0-20251027171627-45ccd0c3dd32 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/blang/semver/v4 v4.0.0 // indirect github.com/chai2010/gettext-go v1.0.2 // indirect @@ -30,11 +36,12 @@ require ( github.com/containerd/platforms v0.2.1 // indirect github.com/cyphar/filepath-securejoin v0.5.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect - github.com/emicklei/go-restful/v3 v3.11.2 // indirect + github.com/emicklei/go-restful/v3 v3.12.2 // indirect github.com/evanphx/json-patch v5.9.11+incompatible // indirect + github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect github.com/fatih/color v1.16.0 // indirect - github.com/fxamacker/cbor/v2 v2.8.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/go-errors/errors v1.5.1 // indirect github.com/go-gorp/gorp/v3 v3.1.0 // indirect github.com/go-logr/logr v1.4.3 // indirect @@ -45,7 +52,7 @@ require ( github.com/gobwas/glob v0.2.3 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/google/btree v1.1.3 // indirect - github.com/google/gnostic-models v0.6.9 // indirect + github.com/google/gnostic-models v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect @@ -75,7 +82,7 @@ require ( github.com/moby/spdystream v0.5.0 // indirect github.com/moby/term v0.5.2 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect @@ -83,18 +90,17 @@ require ( github.com/opencontainers/image-spec v1.1.1 // indirect github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.65.0 // indirect - github.com/prometheus/procfs v0.16.1 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.83.0 // indirect + github.com/regclient/regclient v0.9.2 // indirect github.com/rivo/uniseg v0.4.4 // indirect github.com/rubenv/sql-migrate v1.8.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/shopspring/decimal v1.4.0 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/cast v1.7.0 // indirect - github.com/spf13/cobra v1.9.1 // indirect - github.com/spf13/pflag v1.0.6 // indirect - github.com/stretchr/testify v1.11.1 // indirect + github.com/spf13/cobra v1.10.1 // indirect + github.com/spf13/pflag v1.0.9 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect @@ -104,40 +110,39 @@ require ( go.opentelemetry.io/otel/metric v1.36.0 // indirect go.opentelemetry.io/otel/sdk v1.36.0 // indirect go.opentelemetry.io/otel/trace v1.36.0 // indirect - go.uber.org/automaxprocs v1.6.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/crypto v0.42.0 // indirect - golang.org/x/mod v0.29.0 // indirect - golang.org/x/net v0.44.0 // indirect + golang.org/x/crypto v0.43.0 // indirect + golang.org/x/mod v0.30.0 // indirect + golang.org/x/net v0.46.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect golang.org/x/sync v0.17.0 // indirect - golang.org/x/sys v0.36.0 // indirect - golang.org/x/term v0.35.0 // indirect - golang.org/x/text v0.29.0 // indirect + golang.org/x/sys v0.37.0 // indirect + golang.org/x/term v0.36.0 // indirect + golang.org/x/text v0.30.0 // indirect golang.org/x/time v0.12.0 // indirect - golang.org/x/tools v0.37.0 // indirect + golang.org/x/tools v0.38.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect google.golang.org/grpc v1.73.0 // indirect - google.golang.org/protobuf v1.36.7 // indirect + google.golang.org/protobuf v1.36.8 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect helm.sh/helm/v3 v3.18.4 // indirect - k8s.io/apiextensions-apiserver v0.33.2 // indirect - k8s.io/apiserver v0.33.2 // indirect + k8s.io/apiextensions-apiserver v0.34.2 // indirect + k8s.io/apiserver v0.34.2 // indirect k8s.io/cli-runtime v0.33.2 // indirect - k8s.io/component-base v0.33.2 // indirect - k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a // indirect + k8s.io/component-base v0.34.2 // indirect + k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect k8s.io/kubectl v0.33.2 // indirect - k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect oras.land/oras-go/v2 v2.6.0 // indirect + sigs.k8s.io/controller-runtime v0.22.4 // indirect sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect sigs.k8s.io/kustomize/api v0.19.0 // indirect sigs.k8s.io/kustomize/kyaml v0.19.0 // indirect sigs.k8s.io/randfill v1.0.0 // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect - sigs.k8s.io/yaml v1.5.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect ) // Use local operator module during development diff --git a/tests/e2e/go.sum b/tests/e2e/go.sum index 8c226456f..95b562130 100644 --- a/tests/e2e/go.sum +++ b/tests/e2e/go.sum @@ -20,6 +20,10 @@ github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10= +github.com/NVIDIA/k8s-kata-manager v0.2.3 h1:d5+gRFqU5el/fKMXhHUaPY7haj+dbHL4nDsO/q05LBo= +github.com/NVIDIA/k8s-kata-manager v0.2.3/go.mod h1:xx5OUiMsHyKbyX0JjKHqAftvqS8vx00LFn/5EaMdtB4= +github.com/NVIDIA/k8s-operator-libs v0.0.0-20251027171627-45ccd0c3dd32 h1:TWudaaTt7QwN/cQwPOm1wgesGLOc8hoik9GubKgnph0= +github.com/NVIDIA/k8s-operator-libs v0.0.0-20251027171627-45ccd0c3dd32/go.mod h1:WbVhWGKqRcwjRKj8MYsYJas73G1YdU3oLW5ggDvTWXs= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= @@ -67,10 +71,12 @@ github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= github.com/docker/go-metrics v0.0.1 h1:AgB/0SvBxihN0X8OR4SjsblXkbMvalQ8cjmtKQ2rQV8= github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHzueweSI3Vw= -github.com/emicklei/go-restful/v3 v3.11.2 h1:1onLa9DcsMYO9P+CXaL0dStDqQ2EHHXLiz+BtnqkLAU= -github.com/emicklei/go-restful/v3 v3.11.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU= +github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/evanphx/json-patch v5.9.11+incompatible h1:ixHHqfcGvxhWkniF1tWxBHA0yb4Z+d1UQi45df52xW8= github.com/evanphx/json-patch v5.9.11+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f h1:Wl78ApPPB2Wvf/TIe2xdyJxTlb6obmF18d8QdkxNDu4= github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f/go.mod h1:OSYXu++VVOHnXeitef/D8n/6y4QV8uLHSFXX4NeXMGc= github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= @@ -81,14 +87,14 @@ github.com/foxcpp/go-mockdns v1.1.0 h1:jI0rD8M0wuYAxL7r/ynTrCQQq0BVqfB99Vgk7Dlme github.com/foxcpp/go-mockdns v1.1.0/go.mod h1:IhLeSFGed3mJIAXPH2aiRQB+kqz7oqu8ld2qVbOu7Wk= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= -github.com/fxamacker/cbor/v2 v2.8.0 h1:fFtUGXUzXPHTIUdne5+zzMPTfffl3RD5qYnkY40vtxU= -github.com/fxamacker/cbor/v2 v2.8.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BNhXs= github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo= github.com/gkampitakis/go-diff v1.3.2 h1:Qyn0J9XJSDTgnsgHRdz9Zp24RaJeKMUHg2+PDZZdC4M= github.com/gkampitakis/go-diff v1.3.2/go.mod h1:LLgOrpqleQe26cte8s36HTWcTmMEur6OPYerdAAS9tk= -github.com/gkampitakis/go-snaps v0.5.14 h1:3fAqdB6BCPKHDMHAKRwtPUwYexKtGrNuw8HX/T/4neo= -github.com/gkampitakis/go-snaps v0.5.14/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc= +github.com/gkampitakis/go-snaps v0.5.15 h1:amyJrvM1D33cPHwVrjo9jQxX8g/7E2wYdZ+01KS3zGE= +github.com/gkampitakis/go-snaps v0.5.15/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc= github.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk= github.com/go-errors/errors v1.5.1/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= github.com/go-gorp/gorp/v3 v3.1.0 h1:ItKF/Vbuj31dmV4jxA1qblpSwkl9g1typ24xoe70IGs= @@ -97,6 +103,8 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= github.com/go-openapi/jsonpointer v0.21.1 h1:whnzv/pNXtK2FbX/W9yJfRmE2gsmkfahjMKB0fZvcic= github.com/go-openapi/jsonpointer v0.21.1/go.mod h1:50I1STOfbY1ycR8jGz8DaMeLCdXiI6aDteEdRNNzpdk= github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= @@ -117,9 +125,8 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= -github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw= -github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= +github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -139,9 +146,8 @@ github.com/gosuri/uitable v0.0.4 h1:IG2xLKRvErL3uhY6e1BylFzG+aJiwQviDDTfOKeKTpY= github.com/gosuri/uitable v0.0.4/go.mod h1:tKR86bXuXPZazfOTG1FIzvjIdXzd0mo4Vtn16vt0PJo= github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA= github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= -github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0 h1:TmHmbvxPmaegwhDubVz0lICL0J5Ka2vwTzhoePEXsGE= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0/go.mod h1:qztMSjm835F2bXf+5HKAPIS5qsmQDqZna/PgVt4rWtI= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= @@ -211,16 +217,17 @@ github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFL github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0= github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= -github.com/onsi/ginkgo/v2 v2.26.0 h1:1J4Wut1IlYZNEAWIV3ALrT9NfiaGW2cDCJQSFQMs/gE= -github.com/onsi/ginkgo/v2 v2.26.0/go.mod h1:qhEywmzWTBUY88kfO0BRvX4py7scov9yR+Az2oavUzw= +github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns= +github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo= github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A= github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= @@ -238,14 +245,14 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/poy/onpar v1.1.2 h1:QaNrNiZx0+Nar5dLgTVp5mXkyoVFIbepjyEoGSnhbAY= github.com/poy/onpar v1.1.2/go.mod h1:6X8FLNoxyr9kkmnlqpK6LSoiOtrO6MICtWwEuWkLjzg= -github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= -github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= -github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= -github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.83.0 h1:j9Ce3W6X6Tzi0QnSap+YzGwpqJLJGP/7xV6P9f86jjM= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.83.0/go.mod h1:sSxwdmprUfmRfTknPc4KIjUd2ZIc/kirw4UdXNhOauM= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= -github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= -github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/redis/go-redis/extra/rediscmd/v9 v9.0.5 h1:EaDatTxkdHG+U3Bk4EUr+DZ7fOGwTfezUiUJMaIcaho= @@ -254,6 +261,8 @@ github.com/redis/go-redis/extra/redisotel/v9 v9.0.5 h1:EfpWLLCyXw8PSM2/XNJLjI3Pb github.com/redis/go-redis/extra/redisotel/v9 v9.0.5/go.mod h1:WZjPDy7VNzn77AAfnAfVjZNvfJTYfPetfZk5yoSTLaQ= github.com/redis/go-redis/v9 v9.7.3 h1:YpPyAayJV+XErNsatSElgRZZVCwXX9QzkKYNvO7x0wM= github.com/redis/go-redis/v9 v9.7.3/go.mod h1:bGUrSggJ9X9GUmZpZNEOQKaANxSGgOEBRltRTZHSvrA= +github.com/regclient/regclient v0.9.2 h1:5mJYY3NSV7xtBCv+Me3mbfcNJg9u7nrNt/Z6Od7QjVM= +github.com/regclient/regclient v0.9.2/go.mod h1:QOi29pa84xH+AA56bQwQbzw3RZDwqHrG15KTXGeO+Q8= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis= github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= @@ -271,10 +280,10 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w= github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= -github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= -github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= -github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= -github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= +github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= +github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= @@ -323,10 +332,10 @@ go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0 h1:j7Z go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0/go.mod h1:WXbYJTUaZXAbYd8lbgGuvih0yuCfOFC5RJoYnoLcGz8= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 h1:t/Qur3vKSkUCcDVaSumWF2PKHt85pc7fRvFuoVT8qFU= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0/go.mod h1:Rl61tySSdcOJWoEgYZVtmnKdA0GeKrSqkHC1t+91CH8= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 h1:Vh5HayB/0HHfOQA7Ctx69E/Y/DcQSMPpKANYVMQ7fBA= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0/go.mod h1:cpgtDBaqD/6ok/UG0jT15/uKjAY8mRA53diogHBg3UI= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 h1:5pojmb1U1AogINhN3SurB+zm/nIcusopeBNp42f45QM= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0/go.mod h1:57gTHJSE5S1tqg+EKsLPlTWhpHMsWlVmer+LA926XiA= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0 h1:OeNbIYk/2C15ckl7glBlOBp5+WlYsOElzTNmiPW/x60= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0/go.mod h1:7Bept48yIeqxP2OZ9/AqIpYS94h2or0aB4FypJTc8ZM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 h1:tgJ0uaNS4c98WRNUEx5U3aDlrDOI5Rs+1Vifcw4DJ8U= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0/go.mod h1:U7HYyW0zt/a9x5J1Kjs+r1f/d4ZHnYFclhYY2+YbeoE= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.32.0 h1:cMyu9O88joYEaI47CnQkxO1XZdpoTF9fEnW2duIddhw= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.32.0/go.mod h1:6Am3rn7P9TVVeXYG+wtcGE7IE1tsQ+bP3AuWcKt/gOI= go.opentelemetry.io/otel/exporters/prometheus v0.54.0 h1:rFwzp68QMgtzu9PgP3jm9XaMICI6TsofWWPcBDKwlsU= @@ -349,12 +358,14 @@ go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5J go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w= go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w= go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= -go.opentelemetry.io/proto/otlp v1.4.0 h1:TA9WRvW6zMwP+Ssb6fLoUIuirti1gGbP28GcKG1jgeg= -go.opentelemetry.io/proto/otlp v1.4.0/go.mod h1:PPBWZIP98o2ElSqI35IHfu7hIhSwvc5N38Jw8pXuGFY= -go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= -go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= +go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4= +go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= @@ -362,18 +373,18 @@ go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI= -golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8= +golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= +golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= -golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= +golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk= +golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= -golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= +golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= +golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -388,35 +399,35 @@ golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= -golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= -golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= +golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= -golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= +golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= -golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= +golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= +golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 h1:KAeGQVN3M9nD0/bQXnr/ClcEMJ968gUXJQ9pwfSynuQ= +google.golang.org/genproto v0.0.0-20231211222908-989df2bf70f3 h1:1hfbdAfFbkmpg41000wDVqr7jUpK/Yo+LPnIxxGzmkg= google.golang.org/genproto/googleapis/api v0.0.0-20250324211829-b45e905df463 h1:hE3bRWtU6uceqlh4fhrSnUyjKHMKB9KrTLLG+bc0ddM= google.golang.org/genproto/googleapis/api v0.0.0-20250324211829-b45e905df463/go.mod h1:U90ffi8eUL9MwPcrJylN5+Mk2v3vuPDptd5yyNUiRR8= google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 h1:fc6jSaCT0vBduLYZHYrBBNY4dsWuvgyff9noRNDdBeE= google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= google.golang.org/grpc v1.73.0 h1:VIWSmpI2MegBtTuFt5/JWy2oXxtjJ/e89Z70ImfD2ok= google.golang.org/grpc v1.73.0/go.mod h1:50sbHOUqWoCQGI8V2HQLJM0B+LMlIUjNSZmow7EVBQc= -google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= -google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -431,41 +442,41 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= helm.sh/helm/v3 v3.18.4 h1:pNhnHM3nAmDrxz6/UC+hfjDY4yeDATQCka2/87hkZXQ= helm.sh/helm/v3 v3.18.4/go.mod h1:WVnwKARAw01iEdjpEkP7Ii1tT1pTPYfM1HsakFKM3LI= -k8s.io/api v0.33.2 h1:YgwIS5jKfA+BZg//OQhkJNIfie/kmRsO0BmNaVSimvY= -k8s.io/api v0.33.2/go.mod h1:fhrbphQJSM2cXzCWgqU29xLDuks4mu7ti9vveEnpSXs= -k8s.io/apiextensions-apiserver v0.33.2 h1:6gnkIbngnaUflR3XwE1mCefN3YS8yTD631JXQhsU6M8= -k8s.io/apiextensions-apiserver v0.33.2/go.mod h1:IvVanieYsEHJImTKXGP6XCOjTwv2LUMos0YWc9O+QP8= -k8s.io/apimachinery v0.33.2 h1:IHFVhqg59mb8PJWTLi8m1mAoepkUNYmptHsV+Z1m5jY= -k8s.io/apimachinery v0.33.2/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= -k8s.io/apiserver v0.33.2 h1:KGTRbxn2wJagJowo29kKBp4TchpO1DRO3g+dB/KOJN4= -k8s.io/apiserver v0.33.2/go.mod h1:9qday04wEAMLPWWo9AwqCZSiIn3OYSZacDyu/AcoM/M= +k8s.io/api v0.34.2 h1:fsSUNZhV+bnL6Aqrp6O7lMTy6o5x2C4XLjnh//8SLYY= +k8s.io/api v0.34.2/go.mod h1:MMBPaWlED2a8w4RSeanD76f7opUoypY8TFYkSM+3XHw= +k8s.io/apiextensions-apiserver v0.34.2 h1:WStKftnGeoKP4AZRz/BaAAEJvYp4mlZGN0UCv+uvsqo= +k8s.io/apiextensions-apiserver v0.34.2/go.mod h1:398CJrsgXF1wytdaanynDpJ67zG4Xq7yj91GrmYN2SE= +k8s.io/apimachinery v0.34.2 h1:zQ12Uk3eMHPxrsbUJgNF8bTauTVR2WgqJsTmwTE/NW4= +k8s.io/apimachinery v0.34.2/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= +k8s.io/apiserver v0.34.2 h1:2/yu8suwkmES7IzwlehAovo8dDE07cFRC7KMDb1+MAE= +k8s.io/apiserver v0.34.2/go.mod h1:gqJQy2yDOB50R3JUReHSFr+cwJnL8G1dzTA0YLEqAPI= k8s.io/cli-runtime v0.33.2 h1:koNYQKSDdq5AExa/RDudXMhhtFasEg48KLS2KSAU74Y= k8s.io/cli-runtime v0.33.2/go.mod h1:gnhsAWpovqf1Zj5YRRBBU7PFsRc6NkEkwYNQE+mXL88= -k8s.io/client-go v0.33.2 h1:z8CIcc0P581x/J1ZYf4CNzRKxRvQAwoAolYPbtQes+E= -k8s.io/client-go v0.33.2/go.mod h1:9mCgT4wROvL948w6f6ArJNb7yQd7QsvqavDeZHvNmHo= -k8s.io/component-base v0.33.2 h1:sCCsn9s/dG3ZrQTX/Us0/Sx2R0G5kwa0wbZFYoVp/+0= -k8s.io/component-base v0.33.2/go.mod h1:/41uw9wKzuelhN+u+/C59ixxf4tYQKW7p32ddkYNe2k= +k8s.io/client-go v0.34.2 h1:Co6XiknN+uUZqiddlfAjT68184/37PS4QAzYvQvDR8M= +k8s.io/client-go v0.34.2/go.mod h1:2VYDl1XXJsdcAxw7BenFslRQX28Dxz91U9MWKjX97fE= +k8s.io/component-base v0.34.2 h1:HQRqK9x2sSAsd8+R4xxRirlTjowsg6fWCPwWYeSvogQ= +k8s.io/component-base v0.34.2/go.mod h1:9xw2FHJavUHBFpiGkZoKuYZ5pdtLKe97DEByaA+hHbM= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a h1:ZV3Zr+/7s7aVbjNGICQt+ppKWsF1tehxggNfbM7XnG8= -k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= +k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA= +k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts= k8s.io/kubectl v0.33.2 h1:7XKZ6DYCklu5MZQzJe+CkCjoGZwD1wWl7t/FxzhMz7Y= k8s.io/kubectl v0.33.2/go.mod h1:8rC67FB8tVTYraovAGNi/idWIK90z2CHFNMmGJZJ3KI= -k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= -k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck= +k8s.io/utils v0.0.0-20251002143259-bc988d571ff4/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= oras.land/oras-go/v2 v2.6.0 h1:X4ELRsiGkrbeox69+9tzTu492FMUu7zJQW6eJU+I2oc= oras.land/oras-go/v2 v2.6.0/go.mod h1:magiQDfG6H1O9APp+rOsvCPcW1GD2MM7vgnKY0Y+u1o= +sigs.k8s.io/controller-runtime v0.22.4 h1:GEjV7KV3TY8e+tJ2LCTxUTanW4z/FmNB7l327UfMq9A= +sigs.k8s.io/controller-runtime v0.22.4/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/kustomize/api v0.19.0 h1:F+2HB2mU1MSiR9Hp1NEgoU2q9ItNOaBJl0I4Dlus5SQ= sigs.k8s.io/kustomize/api v0.19.0/go.mod h1:/BbwnivGVcBh1r+8m3tH1VNxJmHSk1PzP5fkP6lbL1o= sigs.k8s.io/kustomize/kyaml v0.19.0 h1:RFge5qsO1uHhwJsu3ipV7RNolC7Uozc0jUBC/61XSlA= sigs.k8s.io/kustomize/kyaml v0.19.0/go.mod h1:FeKD5jEOH+FbZPpqUghBP8mrLjJ3+zD3/rf9NNu1cwY= -sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= -sigs.k8s.io/structured-merge-diff/v4 v4.7.0 h1:qPeWmscJcXP0snki5IYF79Z8xrl8ETFxgMd7wez1XkI= -sigs.k8s.io/structured-merge-diff/v4 v4.7.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= -sigs.k8s.io/yaml v1.5.0 h1:M10b2U7aEUY6hRtU870n2VTPgR5RZiL/I6Lcc2F4NUQ= -sigs.k8s.io/yaml v1.5.0/go.mod h1:wZs27Rbxoai4C0f8/9urLZtZtF3avA3gKvGyPdDqTO4= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/tests/e2e/gpu_operator_test.go b/tests/e2e/gpu_operator_test.go index a6b812c39..8b4f2f0d1 100644 --- a/tests/e2e/gpu_operator_test.go +++ b/tests/e2e/gpu_operator_test.go @@ -28,8 +28,8 @@ import ( "github.com/NVIDIA/gpu-operator/tests/e2e/framework" e2elog "github.com/NVIDIA/gpu-operator/tests/e2e/framework/logs" - k8stest "github.com/NVIDIA/gpu-operator/tests/e2e/kubernetes" - "github.com/NVIDIA/gpu-operator/tests/e2e/operator" + + "github.com/NVIDIA/gpu-operator/tests/e2e/helpers" ) var _ = Describe(e2eTestPrefix+"-premerge-suite", func() { @@ -38,21 +38,21 @@ var _ = Describe(e2eTestPrefix+"-premerge-suite", func() { Describe("GPU Operator ClusterPolicy", func() { Context("When deploying gpu-operator", Ordered, func() { - if tcfg.helmChart == "" { - Fail("No helm-chart for gpu-operator specified") - } - // Init global suite vars vars var ( - operatorClient *operator.Client + operatorClient *helpers.OperatorClient helmReleaseName string - k8sClient *k8stest.Client + k8sClient *helpers.PodClient testNamespace *corev1.Namespace ) BeforeAll(func(ctx context.Context) { + if tcfg.helmChart == "" { + Fail("No helm-chart for gpu-operator specified") + } + var err error - k8sClient = k8stest.NewClient(f.ClientSet.CoreV1()) + k8sClient = helpers.NewPodClient(f.ClientSet.CoreV1()) nsLabels := map[string]string{ "e2e-run": string(framework.RunID), } @@ -62,10 +62,10 @@ var _ = Describe(e2eTestPrefix+"-premerge-suite", func() { Fail(fmt.Sprintf("failed to create gpu operator namespace %s: %v", tcfg.namespace, err)) } - operatorClient, err = operator.NewClient( - operator.WithNamespace(testNamespace.Name), - operator.WithKubeConfig(framework.TestContext.KubeConfig), - operator.WithChart(tcfg.helmChart), + operatorClient, err = helpers.NewOperatorClient( + helpers.WithNamespace(testNamespace.Name), + helpers.WithKubeConfig(framework.TestContext.KubeConfig), + helpers.WithChart(tcfg.helmChart), ) if err != nil { Fail(fmt.Sprintf("failed to instantiate gpu operator client: %v", err)) @@ -79,7 +79,7 @@ var _ = Describe(e2eTestPrefix+"-premerge-suite", func() { fmt.Sprintf("validator.image=%s", tcfg.validatorImage), fmt.Sprintf("validator.version=%s", tcfg.validatorVersion), } - helmReleaseName, err = operatorClient.Install(ctx, values, operator.ChartOptions{ + helmReleaseName, err = operatorClient.Install(ctx, values, helpers.ChartOptions{ CleanupOnFail: true, GenerateName: true, Timeout: 5 * time.Minute, @@ -157,8 +157,12 @@ var _ = Describe(e2eTestPrefix+"-premerge-suite", func() { hasRestarts, err := k8sClient.EnsureNoPodRestarts(ctx, pod.Name, pod.Namespace) Expect(err).NotTo(HaveOccurred()) if !hasRestarts { - errLogs := k8sClient.GetPodLogs(ctx, pod) - e2elog.Logf("printing logs from the pod %s/%s: %s", pod.Namespace, pod.Name, errLogs) + errLogs, err := k8sClient.GetPodLogs(ctx, pod) + if err != nil { + e2elog.Logf("WARN: failed to retrieve logs from pod %s/%s: %v", pod.Namespace, pod.Name, err) + } else { + e2elog.Logf("printing logs from the pod %s/%s: %s", pod.Namespace, pod.Name, errLogs) + } e2elog.Failf("pod %s/%s has unexpected restarts", pod.Namespace, pod.Name) } } diff --git a/tests/e2e/helpers/clusterpolicy.go b/tests/e2e/helpers/clusterpolicy.go new file mode 100644 index 000000000..606a42cc3 --- /dev/null +++ b/tests/e2e/helpers/clusterpolicy.go @@ -0,0 +1,130 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package helpers + +import ( + "context" + "time" + + nvidiav1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" + gpuclientset "github.com/NVIDIA/gpu-operator/api/versioned" + "github.com/NVIDIA/gpu-operator/internal/conditions" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" + "k8s.io/utils/ptr" +) + +type ClusterPolicyClient struct { + client gpuclientset.Interface +} + +func NewClusterPolicyClient(client gpuclientset.Interface) *ClusterPolicyClient { + return &ClusterPolicyClient{ + client: client, + } +} + +func (h *ClusterPolicyClient) Get(ctx context.Context, name string) (*nvidiav1.ClusterPolicy, error) { + return h.client.NvidiaV1().ClusterPolicies().Get(ctx, name, metav1.GetOptions{}) +} + +func (h *ClusterPolicyClient) Update(ctx context.Context, cp *nvidiav1.ClusterPolicy) (*nvidiav1.ClusterPolicy, error) { + return h.client.NvidiaV1().ClusterPolicies().Update(ctx, cp, metav1.UpdateOptions{}) +} + +// modify applies a mutation function to a ClusterPolicy and persists the changes. +// It uses RetryOnConflict to handle concurrent modifications by the operator controller. +func (h *ClusterPolicyClient) modify(ctx context.Context, name string, mutate func(*nvidiav1.ClusterPolicy)) error { + return retry.RetryOnConflict(retry.DefaultBackoff, func() error { + clusterPolicy, err := h.Get(ctx, name) + if err != nil { + return err + } + + mutate(clusterPolicy) + + _, err = h.Update(ctx, clusterPolicy) + return err + }) +} + +func (h *ClusterPolicyClient) UpdateDriverVersion(ctx context.Context, name, version string) error { + return h.modify(ctx, name, func(clusterPolicy *nvidiav1.ClusterPolicy) { + clusterPolicy.Spec.Driver.Version = version + }) +} + +func (h *ClusterPolicyClient) EnableDCGM(ctx context.Context, name string) error { + return h.modify(ctx, name, func(clusterPolicy *nvidiav1.ClusterPolicy) { + clusterPolicy.Spec.DCGM.Enabled = ptr.To(true) + }) +} + +func (h *ClusterPolicyClient) DisableDCGM(ctx context.Context, name string) error { + return h.modify(ctx, name, func(clusterPolicy *nvidiav1.ClusterPolicy) { + clusterPolicy.Spec.DCGM.Enabled = ptr.To(false) + }) +} + +func (h *ClusterPolicyClient) EnableDCGMExporter(ctx context.Context, name string) error { + return h.modify(ctx, name, func(clusterPolicy *nvidiav1.ClusterPolicy) { + clusterPolicy.Spec.DCGMExporter.Enabled = ptr.To(true) + }) +} + +func (h *ClusterPolicyClient) DisableDCGMExporter(ctx context.Context, name string) error { + return h.modify(ctx, name, func(clusterPolicy *nvidiav1.ClusterPolicy) { + clusterPolicy.Spec.DCGMExporter.Enabled = ptr.To(false) + }) +} + +func (h *ClusterPolicyClient) EnableGFD(ctx context.Context, name string) error { + return h.modify(ctx, name, func(clusterPolicy *nvidiav1.ClusterPolicy) { + clusterPolicy.Spec.GPUFeatureDiscovery.Enabled = ptr.To(true) + }) +} + +func (h *ClusterPolicyClient) DisableGFD(ctx context.Context, name string) error { + return h.modify(ctx, name, func(clusterPolicy *nvidiav1.ClusterPolicy) { + clusterPolicy.Spec.GPUFeatureDiscovery.Enabled = ptr.To(false) + }) +} + +func (h *ClusterPolicyClient) SetMIGStrategy(ctx context.Context, name, strategy string) error { + return h.modify(ctx, name, func(clusterPolicy *nvidiav1.ClusterPolicy) { + clusterPolicy.Spec.MIG.Strategy = nvidiav1.MIGStrategy(strategy) + }) +} + +func (h *ClusterPolicyClient) WaitForReady(ctx context.Context, name string, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, defaultPollingInterval, timeout, true, func(ctx context.Context) (bool, error) { + clusterPolicy, err := h.Get(ctx, name) + if err != nil { + return false, err + } + + for _, condition := range clusterPolicy.Status.Conditions { + if condition.Type == conditions.Ready && condition.Status == metav1.ConditionTrue { + return true, nil + } + } + + return false, nil + }) +} + diff --git a/tests/e2e/helpers/constants.go b/tests/e2e/helpers/constants.go new file mode 100644 index 000000000..4a1573055 --- /dev/null +++ b/tests/e2e/helpers/constants.go @@ -0,0 +1,28 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package helpers + +import "time" + +const ( + // defaultPollingInterval is the default interval for polling operations + defaultPollingInterval = 5 * time.Second + + // upgradeDoneState represents the state when a driver upgrade is complete + upgradeDoneState = "upgrade-done" +) + diff --git a/tests/e2e/helpers/daemonset.go b/tests/e2e/helpers/daemonset.go new file mode 100644 index 000000000..38fc84188 --- /dev/null +++ b/tests/e2e/helpers/daemonset.go @@ -0,0 +1,131 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package helpers + +import ( + "context" + "fmt" + "time" + + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" +) + +type DaemonSetClient struct { + client kubernetes.Interface +} + +func NewDaemonSetClient(client kubernetes.Interface) *DaemonSetClient { + return &DaemonSetClient{ + client: client, + } +} + +func (h *DaemonSetClient) GetByLabel(ctx context.Context, namespace, labelKey, labelValue string) (*appsv1.DaemonSet, error) { + labelSelector := labels.SelectorFromSet(map[string]string{ + labelKey: labelValue, + }).String() + + daemonSetList, err := h.client.AppsV1().DaemonSets(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return nil, fmt.Errorf("failed to list DaemonSets: %w", err) + } + + if len(daemonSetList.Items) == 0 { + return nil, fmt.Errorf("no DaemonSet found with label %s=%s", labelKey, labelValue) + } + + if len(daemonSetList.Items) > 1 { + return nil, fmt.Errorf("multiple DaemonSets found with label %s=%s", labelKey, labelValue) + } + + return &daemonSetList.Items[0], nil +} + +func (h *DaemonSetClient) Get(ctx context.Context, namespace, name string) (*appsv1.DaemonSet, error) { + return h.client.AppsV1().DaemonSets(namespace).Get(ctx, name, metav1.GetOptions{}) +} + +func (h *DaemonSetClient) WaitForReady(ctx context.Context, namespace, name string, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, defaultPollingInterval, timeout, true, func(ctx context.Context) (bool, error) { + daemonSet, err := h.Get(ctx, namespace, name) + if err != nil { + return false, err + } + + if daemonSet.Status.NumberReady == daemonSet.Status.DesiredNumberScheduled && + daemonSet.Status.NumberReady > 0 { + return true, nil + } + + return false, nil + }) +} + +func (h *DaemonSetClient) IsReady(ctx context.Context, namespace, name string) (bool, error) { + daemonSet, err := h.Get(ctx, namespace, name) + if err != nil { + return false, err + } + + return daemonSet.Status.NumberReady == daemonSet.Status.DesiredNumberScheduled && daemonSet.Status.NumberReady > 0, nil +} + +func (h *DaemonSetClient) GetImage(ctx context.Context, namespace, name string) (string, error) { + daemonSet, err := h.Get(ctx, namespace, name) + if err != nil { + return "", fmt.Errorf("failed to get DaemonSet: %w", err) + } + + if len(daemonSet.Spec.Template.Spec.Containers) == 0 { + return "", fmt.Errorf("DaemonSet has no containers") + } + + return daemonSet.Spec.Template.Spec.Containers[0].Image, nil +} + +func (h *DaemonSetClient) CheckNoRestarts(ctx context.Context, namespace, name string) error { + daemonSet, err := h.Get(ctx, namespace, name) + if err != nil { + return fmt.Errorf("failed to get DaemonSet: %w", err) + } + + labelSelector := labels.SelectorFromSet(daemonSet.Spec.Selector.MatchLabels).String() + podList, err := h.client.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return fmt.Errorf("failed to list pods: %w", err) + } + + for _, pod := range podList.Items { + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.RestartCount > 0 { + return fmt.Errorf("pod %s/%s container %s has %d restarts", + pod.Namespace, pod.Name, containerStatus.Name, containerStatus.RestartCount) + } + } + } + + return nil +} + diff --git a/tests/e2e/helpers/node.go b/tests/e2e/helpers/node.go new file mode 100644 index 000000000..2cb299828 --- /dev/null +++ b/tests/e2e/helpers/node.go @@ -0,0 +1,130 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package helpers + +import ( + "context" + "fmt" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/kubernetes" +) + +type NodeClient struct { + client kubernetes.Interface +} + +func NewNodeClient(client kubernetes.Interface) *NodeClient { + return &NodeClient{ + client: client, + } +} + +func (h *NodeClient) LabelNode(ctx context.Context, nodeName, key, value string) error { + node, err := h.client.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get node: %w", err) + } + + if node.Labels == nil { + node.Labels = make(map[string]string) + } + + node.Labels[key] = value + + _, err = h.client.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update node: %w", err) + } + + return nil +} + +func (h *NodeClient) UnlabelNode(ctx context.Context, nodeName, key string) error { + node, err := h.client.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get node: %w", err) + } + + if node.Labels != nil { + delete(node.Labels, key) + } + + _, err = h.client.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update node: %w", err) + } + + return nil +} + +func (h *NodeClient) GetNodesByLabel(ctx context.Context, labelKey, labelValue string) ([]corev1.Node, error) { + labelSelector := labels.SelectorFromSet(map[string]string{ + labelKey: labelValue, + }).String() + + nodeList, err := h.client.CoreV1().Nodes().List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return nil, fmt.Errorf("failed to list nodes: %w", err) + } + + return nodeList.Items, nil +} + +func (h *NodeClient) ListNodes(ctx context.Context) ([]corev1.Node, error) { + nodeList, err := h.client.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list nodes: %w", err) + } + + return nodeList.Items, nil +} + +func (h *NodeClient) LabelAllNodes(ctx context.Context, key, value string) error { + nodes, err := h.ListNodes(ctx) + if err != nil { + return err + } + + for _, node := range nodes { + if err := h.LabelNode(ctx, node.Name, key, value); err != nil { + return fmt.Errorf("failed to label node %s: %w", node.Name, err) + } + } + + return nil +} + +func (h *NodeClient) UnlabelAllNodes(ctx context.Context, key string) error { + nodes, err := h.ListNodes(ctx) + if err != nil { + return err + } + + for _, node := range nodes { + if err := h.UnlabelNode(ctx, node.Name, key); err != nil { + return fmt.Errorf("failed to unlabel node %s: %w", node.Name, err) + } + } + + return nil +} + diff --git a/tests/e2e/helpers/nvidiadriver.go b/tests/e2e/helpers/nvidiadriver.go new file mode 100644 index 000000000..cbba03dd5 --- /dev/null +++ b/tests/e2e/helpers/nvidiadriver.go @@ -0,0 +1,158 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package helpers + +import ( + "context" + "fmt" + "log" + "time" + + nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1" + gpuclientset "github.com/NVIDIA/gpu-operator/api/versioned" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" +) + +type NvidiaDriverClient struct { + client gpuclientset.Interface + k8sClient kubernetes.Interface + nodeClient *NodeClient +} + +func NewNvidiaDriverClient(client gpuclientset.Interface, k8sClient kubernetes.Interface) *NvidiaDriverClient { + return &NvidiaDriverClient{ + client: client, + k8sClient: k8sClient, + nodeClient: NewNodeClient(k8sClient), + } +} + +func (h *NvidiaDriverClient) Get(ctx context.Context, name string) (*nvidiav1alpha1.NVIDIADriver, error) { + return h.client.NvidiaV1alpha1().NVIDIADrivers().Get(ctx, name, metav1.GetOptions{}) +} + +func (h *NvidiaDriverClient) Create(ctx context.Context, driver *nvidiav1alpha1.NVIDIADriver) (*nvidiav1alpha1.NVIDIADriver, error) { + return h.client.NvidiaV1alpha1().NVIDIADrivers().Create(ctx, driver, metav1.CreateOptions{}) +} + +func (h *NvidiaDriverClient) Update(ctx context.Context, driver *nvidiav1alpha1.NVIDIADriver) (*nvidiav1alpha1.NVIDIADriver, error) { + return h.client.NvidiaV1alpha1().NVIDIADrivers().Update(ctx, driver, metav1.UpdateOptions{}) +} + +func (h *NvidiaDriverClient) Delete(ctx context.Context, name string) error { + return h.client.NvidiaV1alpha1().NVIDIADrivers().Delete(ctx, name, metav1.DeleteOptions{}) +} + +func (h *NvidiaDriverClient) List(ctx context.Context) (*nvidiav1alpha1.NVIDIADriverList, error) { + return h.client.NvidiaV1alpha1().NVIDIADrivers().List(ctx, metav1.ListOptions{}) +} + +func (h *NvidiaDriverClient) UpdateDriverVersion(ctx context.Context, name, version string) error { + nvidiaDriver, err := h.Get(ctx, name) + if err != nil { + return fmt.Errorf("failed to get NVIDIADriver: %w", err) + } + + nvidiaDriver.Spec.Version = version + + _, err = h.Update(ctx, nvidiaDriver) + if err != nil { + return fmt.Errorf("failed to update NVIDIADriver: %w", err) + } + + return nil +} + +// WaitForReady waits for the nvidia driver pods to be ready and not terminating. +// This checks actual pod readiness similar to check_nvidia_driver_pods_ready() in the bash tests. +func (h *NvidiaDriverClient) WaitForPodsReady(ctx context.Context, namespace string, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, defaultPollingInterval, timeout, true, func(ctx context.Context) (bool, error) { + log.Println("Checking nvidia driver pods") + + labelSelector := labels.SelectorFromSet(map[string]string{ + "app.kubernetes.io/component": "nvidia-driver", + }).String() + + podList, err := h.k8sClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return false, fmt.Errorf("failed to list driver pods: %w", err) + } + + if len(podList.Items) == 0 { + log.Println("No nvidia driver pods found") + return false, nil + } + + log.Printf("Found %d nvidia driver pod(s)\n", len(podList.Items)) + + // Check if all pods are ready and not terminating + for _, pod := range podList.Items { + // Check if pod is ready + isReady := false + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + isReady = true + break + } + } + + if !isReady { + log.Printf("Pod %s/%s is not ready yet\n", pod.Namespace, pod.Name) + return false, nil + } + + if pod.DeletionGracePeriodSeconds != nil { + log.Printf("Pod %s/%s is in terminating state\n", pod.Namespace, pod.Name) + return false, nil + } + } + + log.Println("All nvidia driver pods are ready") + return true, nil + }) +} + +// WaitForUpgradeDone waits for the driver upgrade to complete on all GPU nodes. +func (h *NvidiaDriverClient) WaitForPodsUpgradeDone(ctx context.Context, name string, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, defaultPollingInterval, timeout, true, func(ctx context.Context) (bool, error) { + // Get all GPU nodes + gpuNodes, err := h.nodeClient.GetNodesByLabel(ctx, "nvidia.com/gpu.present", "true") + if err != nil { + return false, fmt.Errorf("failed to get GPU nodes: %w", err) + } + + if len(gpuNodes) == 0 { + return false, fmt.Errorf("no GPU nodes found") + } + + // Check if all GPU nodes have the upgrade-done state + for _, node := range gpuNodes { + upgradeState, exists := node.Labels["nvidia.com/gpu-driver-upgrade-state"] + if !exists || upgradeState != upgradeDoneState { + return false, nil + } + } + + return true, nil + }) +} diff --git a/tests/e2e/helpers/operator.go b/tests/e2e/helpers/operator.go new file mode 100644 index 000000000..6c5360f6f --- /dev/null +++ b/tests/e2e/helpers/operator.go @@ -0,0 +1,128 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package helpers + +import ( + "context" + "fmt" + "os" + "time" + + helm "github.com/mittwald/go-helm-client" + helmValues "github.com/mittwald/go-helm-client/values" +) + +type OperatorClientOption func(client *OperatorClient) + +type OperatorClient struct { + helmClient helm.Client + chart string + namespace string + kubeconfig string +} + +func NewOperatorClient(opts ...OperatorClientOption) (*OperatorClient, error) { + operatorClient := &OperatorClient{} + + for _, option := range opts { + option(operatorClient) + } + + helmOptions := &helm.KubeConfClientOptions{ + Options: &helm.Options{ + Namespace: operatorClient.namespace, + RepositoryCache: os.TempDir() + "/.helmcache", + RepositoryConfig: os.TempDir() + "/.helmrepo", + }, + } + + kubeconfigBytes, err := os.ReadFile(operatorClient.kubeconfig) + if err != nil { + return nil, err + } + helmOptions.KubeConfig = kubeconfigBytes + + helmClient, err := helm.NewClientFromKubeConf(helmOptions) + if err != nil { + return nil, err + } + operatorClient.helmClient = helmClient + + return operatorClient, nil +} + +func WithChart(chart string) OperatorClientOption { + return func(operatorClient *OperatorClient) { + operatorClient.chart = chart + } +} + +func WithKubeConfig(kubeconfig string) OperatorClientOption { + return func(operatorClient *OperatorClient) { + operatorClient.kubeconfig = kubeconfig + } +} + +func WithNamespace(namespace string) OperatorClientOption { + return func(operatorClient *OperatorClient) { + operatorClient.namespace = namespace + } +} + +type ChartOptions struct { + CleanupOnFail bool + GenerateName bool + ReleaseName string + Timeout time.Duration + Wait bool +} + +func (op *OperatorClient) Install(ctx context.Context, params []string, chartOpts ChartOptions) (string, error) { + values := helmValues.Options{ + Values: params, + } + + chartSpec := helm.ChartSpec{ + ChartName: op.chart, + Namespace: op.namespace, + GenerateName: chartOpts.GenerateName, + Wait: chartOpts.Wait, + Timeout: chartOpts.Timeout, + CleanupOnFail: chartOpts.CleanupOnFail, + ValuesOptions: values, + } + + if !chartOpts.GenerateName { + if len(chartOpts.ReleaseName) == 0 { + return "", fmt.Errorf("release name must be provided when the GenerateName chart option is unset") + } + chartSpec.ReleaseName = chartOpts.ReleaseName + } + + release, err := op.helmClient.InstallChart(ctx, &chartSpec, nil) + + if err != nil { + return "", fmt.Errorf("error installing operator: %w", err) + } + + return release.Name, err +} + +func (op *OperatorClient) Uninstall(releaseName string) error { + return op.helmClient.UninstallReleaseByName(releaseName) +} + diff --git a/tests/e2e/kubernetes/pod.go b/tests/e2e/helpers/pod.go similarity index 58% rename from tests/e2e/kubernetes/pod.go rename to tests/e2e/helpers/pod.go index 987ee3cb5..cbffb48e6 100644 --- a/tests/e2e/kubernetes/pod.go +++ b/tests/e2e/helpers/pod.go @@ -14,7 +14,7 @@ # limitations under the License. **/ -package kubernetes +package helpers import ( "bytes" @@ -28,17 +28,17 @@ import ( corev1client "k8s.io/client-go/kubernetes/typed/core/v1" ) -type Client struct { +type PodClient struct { k8sClient corev1client.CoreV1Interface } -func NewClient(k8sClient corev1client.CoreV1Interface) *Client { - return &Client{ +func NewPodClient(k8sClient corev1client.CoreV1Interface) *PodClient { + return &PodClient{ k8sClient: k8sClient, } } -func (c *Client) GetPodsByLabel(ctx context.Context, namespace string, labelMap map[string]string) ([]corev1.Pod, error) { +func (c *PodClient) GetPodsByLabel(ctx context.Context, namespace string, labelMap map[string]string) ([]corev1.Pod, error) { podList, err := c.k8sClient.Pods(namespace).List(ctx, metav1.ListOptions{ LabelSelector: labels.SelectorFromSet(labelMap).String(), }) @@ -48,17 +48,17 @@ func (c *Client) GetPodsByLabel(ctx context.Context, namespace string, labelMap return podList.Items, nil } -func (c *Client) IsPodReady(ctx context.Context, podName, namespace string) (bool, error) { +func (c *PodClient) IsPodReady(ctx context.Context, podName, namespace string) (bool, error) { pod, err := c.k8sClient.Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) if err != nil { - return false, fmt.Errorf("unexpected error getting pod %s: %w", podName, err) + return false, fmt.Errorf("unexpected error getting pod %s: %w", podName, err) } - for _, c := range pod.Status.Conditions { - if c.Type != corev1.PodReady { + for _, condition := range pod.Status.Conditions { + if condition.Type != corev1.PodReady { continue } - if c.Status == corev1.ConditionTrue { + if condition.Status == corev1.ConditionTrue { return true, nil } } @@ -66,40 +66,39 @@ func (c *Client) IsPodReady(ctx context.Context, podName, namespace string) (boo return false, nil } -func (c *Client) EnsureNoPodRestarts(ctx context.Context, podName, namespace string) (bool, error) { +func (c *PodClient) EnsureNoPodRestarts(ctx context.Context, podName, namespace string) (bool, error) { pod, err := c.k8sClient.Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) if err != nil { - return false, fmt.Errorf("unexpected error getting pod %s: %w", podName, err) + return false, fmt.Errorf("unexpected error getting pod %s: %w", podName, err) } - for _, cs := range pod.Status.ContainerStatuses { - if cs.RestartCount > 0 { + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.RestartCount > 0 { return false, nil } } return true, nil } -func (c *Client) GetPodLogs(ctx context.Context, pod corev1.Pod) string { +func (c *PodClient) GetPodLogs(ctx context.Context, pod corev1.Pod) (string, error) { podLogOpts := corev1.PodLogOptions{} req := c.k8sClient.Pods(pod.Namespace).GetLogs(pod.Name, &podLogOpts) podLogs, err := req.Stream(ctx) if err != nil { - return "error in opening stream" + return "", fmt.Errorf("failed to open log stream: %w", err) } defer podLogs.Close() - buf := new(bytes.Buffer) - _, err = io.Copy(buf, podLogs) + buffer := new(bytes.Buffer) + _, err = io.Copy(buffer, podLogs) if err != nil { - return "error in copy information from podLogs to buf" + return "", fmt.Errorf("failed to copy log stream: %w", err) } - str := buf.String() - return str + return buffer.String(), nil } -func (c *Client) CreateNamespace(ctx context.Context, namespaceName string, labels map[string]string) (*corev1.Namespace, error) { +func (c *PodClient) CreateNamespace(ctx context.Context, namespaceName string, labels map[string]string) (*corev1.Namespace, error) { namespaceObj := &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ Name: namespaceName, @@ -111,6 +110,7 @@ func (c *Client) CreateNamespace(ctx context.Context, namespaceName string, labe return c.k8sClient.Namespaces().Create(ctx, namespaceObj, metav1.CreateOptions{}) } -func (c *Client) DeleteNamespace(ctx context.Context, namespaceName string) error { +func (c *PodClient) DeleteNamespace(ctx context.Context, namespaceName string) error { return c.k8sClient.Namespaces().Delete(ctx, namespaceName, metav1.DeleteOptions{}) } + diff --git a/tests/e2e/helpers/workload.go b/tests/e2e/helpers/workload.go new file mode 100644 index 000000000..5e10cc439 --- /dev/null +++ b/tests/e2e/helpers/workload.go @@ -0,0 +1,151 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package helpers + +import ( + "bytes" + "context" + "fmt" + "io" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" +) + +type WorkloadClient struct { + client kubernetes.Interface +} + +func NewWorkloadClient(client kubernetes.Interface) *WorkloadClient { + return &WorkloadClient{ + client: client, + } +} + +func (h *WorkloadClient) DeployPod(ctx context.Context, namespace string, podSpec *corev1.Pod) (*corev1.Pod, error) { + return h.client.CoreV1().Pods(namespace).Create(ctx, podSpec, metav1.CreateOptions{}) +} + +func (h *WorkloadClient) WaitForCompletion(ctx context.Context, namespace, name string, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, defaultPollingInterval, timeout, true, func(ctx context.Context) (bool, error) { + workloadPod, err := h.client.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return false, err + } + + if workloadPod.Status.Phase == corev1.PodSucceeded { + return true, nil + } + + if workloadPod.Status.Phase == corev1.PodFailed { + return false, fmt.Errorf("pod %s/%s failed", namespace, name) + } + + return false, nil + }) +} + +func (h *WorkloadClient) WaitForRunning(ctx context.Context, namespace, name string, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, defaultPollingInterval, timeout, true, func(ctx context.Context) (bool, error) { + workloadPod, err := h.client.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return false, err + } + + if workloadPod.Status.Phase == corev1.PodRunning { + for _, condition := range workloadPod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + return true, nil + } + } + } + + if workloadPod.Status.Phase == corev1.PodFailed { + return false, fmt.Errorf("pod %s/%s failed", namespace, name) + } + + return false, nil + }) +} + +func (h *WorkloadClient) GetLogs(ctx context.Context, namespace, name string) (string, error) { + podLogOpts := corev1.PodLogOptions{} + req := h.client.CoreV1().Pods(namespace).GetLogs(name, &podLogOpts) + podLogs, err := req.Stream(ctx) + if err != nil { + return "", fmt.Errorf("failed to open log stream: %w", err) + } + defer podLogs.Close() + + buffer := new(bytes.Buffer) + _, err = io.Copy(buffer, podLogs) + if err != nil { + return "", fmt.Errorf("failed to copy log stream: %w", err) + } + + return buffer.String(), nil +} + +// VerifyGPUAccess checks pod logs for evidence of GPU access. +// TODO: Improve this by exec'ing into the container and invoking nvidia-smi directly +func (h *WorkloadClient) VerifyGPUAccess(ctx context.Context, namespace, name string) error { + logs, err := h.GetLogs(ctx, namespace, name) + if err != nil { + return fmt.Errorf("failed to get pod logs: %w", err) + } + + if !strings.Contains(logs, "NVIDIA") && !strings.Contains(logs, "GPU") { + return fmt.Errorf("pod logs do not contain evidence of GPU access") + } + + return nil +} + +func (h *WorkloadClient) Delete(ctx context.Context, namespace, name string) error { + return h.client.CoreV1().Pods(namespace).Delete(ctx, name, metav1.DeleteOptions{}) +} + +func CreateSimpleGPUPod(name, namespace string, gpuLimit int) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + Containers: []corev1.Container{ + { + Name: "gpu-test", + Image: "nvcr.io/nvidia/cuda:12.6.0-base-ubuntu22.04", + Command: []string{ + "nvidia-smi", + }, + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(int64(gpuLimit), resource.DecimalSI), + }, + }, + }, + }, + }, + } +} diff --git a/tests/e2e/operator/helm.go b/tests/e2e/operator/helm.go deleted file mode 100644 index c2dcaa47e..000000000 --- a/tests/e2e/operator/helm.go +++ /dev/null @@ -1,130 +0,0 @@ -/** -# Copyright (c) NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package operator - -import ( - "context" - "fmt" - "os" - "time" - - helm "github.com/mittwald/go-helm-client" - helmValues "github.com/mittwald/go-helm-client/values" -) - -// ClientOption is a function that can be used to set the fields of the operator helm Client -type ClientOption func(client *Client) - -// Client represents the struct which holds the helm client -type Client struct { - helmClient helm.Client - chart string - namespace string - kubeconfig string -} - -func NewClient(opts ...ClientOption) (*Client, error) { - client := &Client{} - - for _, o := range opts { - o(client) - } - - opt := &helm.KubeConfClientOptions{ - Options: &helm.Options{ - Namespace: client.namespace, - RepositoryCache: "/tmp/.helmcache", - RepositoryConfig: "/tmp/.helmrepo", - }, - } - - kubeconfigBytes, err := os.ReadFile(client.kubeconfig) - if err != nil { - return nil, err - } - opt.KubeConfig = kubeconfigBytes - - helmClient, err := helm.NewClientFromKubeConf(opt) - if err != nil { - return nil, err - } - client.helmClient = helmClient - - return client, nil -} - -func WithChart(chart string) ClientOption { - return func(c *Client) { - c.chart = chart - } -} - -func WithKubeConfig(kubeconfig string) ClientOption { - return func(c *Client) { - c.kubeconfig = kubeconfig - } -} - -func WithNamespace(namespace string) ClientOption { - return func(c *Client) { - c.namespace = namespace - } -} - -type ChartOptions struct { - CleanupOnFail bool - GenerateName bool - ReleaseName string - Timeout time.Duration - Wait bool -} - -// Install deploys the helm chart -func (c *Client) Install(ctx context.Context, params []string, chartOpts ChartOptions) (string, error) { - values := helmValues.Options{ - Values: params, - } - - chartSpec := helm.ChartSpec{ - ChartName: c.chart, - Namespace: c.namespace, - GenerateName: chartOpts.GenerateName, - Wait: chartOpts.Wait, - Timeout: chartOpts.Timeout, - CleanupOnFail: chartOpts.CleanupOnFail, - ValuesOptions: values, - } - - if !chartOpts.GenerateName { - if len(chartOpts.ReleaseName) == 0 { - return "", fmt.Errorf("release name must be provided the GenerateName chart option is unset") - } - chartSpec.ReleaseName = chartOpts.ReleaseName - } - - res, err := c.helmClient.InstallChart(ctx, &chartSpec, nil) - - if err != nil { - return "", fmt.Errorf("error installing operator: %w", err) - } - - return res.Name, err -} - -func (c *Client) Uninstall(releaseName string) error { - return c.helmClient.UninstallReleaseByName(releaseName) -} diff --git a/tests/e2e/suites/clusterpolicy_test.go b/tests/e2e/suites/clusterpolicy_test.go new file mode 100644 index 000000000..92cdbde43 --- /dev/null +++ b/tests/e2e/suites/clusterpolicy_test.go @@ -0,0 +1,408 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +// Package suites contains end-to-end test suites for GPU Operator ClusterPolicy management. +// These tests verify ClusterPolicy updates, component toggling, and configuration changes. +package suites + +import ( + "context" + "fmt" + "strings" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + + nvidiav1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" + gpuclientset "github.com/NVIDIA/gpu-operator/api/versioned" + "github.com/NVIDIA/gpu-operator/tests/e2e/framework" + e2elog "github.com/NVIDIA/gpu-operator/tests/e2e/framework/logs" + "github.com/NVIDIA/gpu-operator/tests/e2e/helpers" +) + +const ( + defaultNamespace = "gpu-operator" + defaultPolicyName = "cluster-policy" + specUpdateTimeout = 30 * time.Second + componentReadyTimeout = 3 * time.Minute + podDeletionTimeout = 2 * time.Minute + daemonsetUpdateTimeout = 1 * time.Minute +) + +// getClusterPolicyOrSkip retrieves the ClusterPolicy or skips the test if not found. +func getClusterPolicyOrSkip(ctx context.Context, client *helpers.ClusterPolicyClient, name string) *nvidiav1.ClusterPolicy { + clusterPolicy, err := client.Get(ctx, name) + if err != nil { + Skip("ClusterPolicy not deployed - skipping test") + } + return clusterPolicy +} + +// waitForDaemonSetReady waits until the specified DaemonSet is ready. +func waitForDaemonSetReady(ctx context.Context, client *helpers.DaemonSetClient, namespace, name string) { + Eventually(func() bool { + isReady, err := client.IsReady(ctx, namespace, name) + if err != nil { + e2elog.Logf("WARN: error checking daemonset %s: %v", name, err) + return false + } + return isReady + }).WithPolling(5 * time.Second).Within(componentReadyTimeout).WithContext(ctx).Should(BeTrue()) +} + +// waitForPodsDeleted waits until all pods matching the label selector are deleted. +func waitForPodsDeleted(ctx context.Context, clientset kubernetes.Interface, namespace, labelSelector string) { + Eventually(func() bool { + pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return false + } + return len(pods.Items) == 0 + }).WithPolling(5 * time.Second).Within(podDeletionTimeout).WithContext(ctx).Should(BeTrue()) +} + +// waitForSpecUpdate waits until the ClusterPolicy spec matches the provided check function. +func waitForSpecUpdate(ctx context.Context, client *helpers.ClusterPolicyClient, name string, checkFn func(*nvidiav1.ClusterPolicy) bool) { + Eventually(func() bool { + clusterPolicy, err := client.Get(ctx, name) + if err != nil { + return false + } + return checkFn(clusterPolicy) + }).WithPolling(2 * time.Second).Within(specUpdateTimeout).WithContext(ctx).Should(BeTrue()) +} + +// verifyEnvInDaemonSet verifies that a DaemonSet has the specified environment variable. +func verifyEnvInDaemonSet(ctx context.Context, client *helpers.DaemonSetClient, namespace, dsName, envName, envValue string) { + Eventually(func() bool { + ds, err := client.GetByLabel(ctx, namespace, "app", dsName) + if err != nil { + e2elog.Logf("WARN: error getting daemonset %s: %v", dsName, err) + return false + } + if len(ds.Spec.Template.Spec.Containers) == 0 { + return false + } + for _, env := range ds.Spec.Template.Spec.Containers[0].Env { + if env.Name == envName && env.Value == envValue { + return true + } + } + return false + }).WithPolling(5 * time.Second).Within(daemonsetUpdateTimeout).WithContext(ctx).Should(BeTrue()) +} + +// waitForPodsReady waits until all pods matching the label selector are ready. +func waitForPodsReady(ctx context.Context, clientset kubernetes.Interface, namespace, labelSelector string) { + Eventually(func() bool { + pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil || len(pods.Items) == 0 { + return false + } + for _, pod := range pods.Items { + podReady := false + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + podReady = true + break + } + } + if !podReady { + return false + } + } + return true + }).WithPolling(5 * time.Second).Within(componentReadyTimeout).WithContext(ctx).Should(BeTrue()) +} + +var _ = Describe("ClusterPolicy Management", Label("clusterPolicy"), func() { + f := framework.NewFramework("clusterpolicy-suite") + f.SkipNamespaceCreation = true + + var ( + clusterPolicyClient *helpers.ClusterPolicyClient + daemonSetClient *helpers.DaemonSetClient + testNamespace string + policyName string + ) + + BeforeEach(func() { + config := f.ClientConfig() + gpuClient, err := gpuclientset.NewForConfig(config) + Expect(err).NotTo(HaveOccurred()) + + clusterPolicyClient = helpers.NewClusterPolicyClient(gpuClient) + daemonSetClient = helpers.NewDaemonSetClient(f.ClientSet) + testNamespace = defaultNamespace + policyName = defaultPolicyName + }) + + // test_image_updates - Update driver image version + When("Updating driver image version", Label("driver", "upgrade"), func() { + It("should update driver daemonset image and complete upgrade", func(ctx context.Context) { + Skip("Requires specific driver version availability and upgrade flow") + + clusterPolicy := getClusterPolicyOrSkip(ctx, clusterPolicyClient, policyName) + originalVersion := clusterPolicy.Spec.Driver.Version + targetVersion := "550.90.07" + DeferCleanup(func(ctx context.Context) { + _ = clusterPolicyClient.UpdateDriverVersion(ctx, policyName, originalVersion) + }) + + err := clusterPolicyClient.UpdateDriverVersion(ctx, policyName, targetVersion) + Expect(err).NotTo(HaveOccurred(), "Failed to update driver version in ClusterPolicy") + + Eventually(func() bool { + image, err := daemonSetClient.GetImage(ctx, testNamespace, "nvidia-driver-daemonset") + if err != nil { + return false + } + return strings.Contains(image, targetVersion) + }).WithPolling(5 * time.Second).Within(daemonsetUpdateTimeout).WithContext(ctx).Should(BeTrue()) + + waitForDaemonSetReady(ctx, daemonSetClient, testNamespace, "nvidia-driver-daemonset") + }) + }) + + // test_env_updates - Add ENV to Device Plugin + When("Updating device plugin environment variables", Label("config", "envvars"), func() { + It("should add env variable to device plugin daemonset", func(ctx context.Context) { + clusterPolicy := getClusterPolicyOrSkip(ctx, clusterPolicyClient, policyName) + originalEnv := clusterPolicy.Spec.DevicePlugin.Env + DeferCleanup(func(ctx context.Context) { + clusterPolicy, _ := clusterPolicyClient.Get(ctx, policyName) + if clusterPolicy != nil { + clusterPolicy.Spec.DevicePlugin.Env = originalEnv + _, _ = clusterPolicyClient.Update(ctx, clusterPolicy) + } + }) + + testEnvName := "MY_TEST_ENV_NAME" + testEnvValue := "test" + + clusterPolicy.Spec.DevicePlugin.Env = append(clusterPolicy.Spec.DevicePlugin.Env, nvidiav1.EnvVar{ + Name: testEnvName, + Value: testEnvValue, + }) + + _, err := clusterPolicyClient.Update(ctx, clusterPolicy) + Expect(err).NotTo(HaveOccurred(), "Failed to update ClusterPolicy with new environment variable") + + verifyEnvInDaemonSet(ctx, daemonSetClient, testNamespace, "nvidia-device-plugin-daemonset", testEnvName, testEnvValue) + waitForDaemonSetReady(ctx, daemonSetClient, testNamespace, "nvidia-device-plugin-daemonset") + }) + }) + + // test_mig_strategy_updates - Test MIG strategy updates + When("Updating MIG strategy", Label("mig", "config"), func() { + It("should apply MIG_STRATEGY to both GFD and device plugin daemonsets", func(ctx context.Context) { + clusterPolicy := getClusterPolicyOrSkip(ctx, clusterPolicyClient, policyName) + originalStrategy := clusterPolicy.Spec.MIG.Strategy + newStrategy := nvidiav1.MIGStrategyMixed + DeferCleanup(func(ctx context.Context) { + _ = clusterPolicyClient.SetMIGStrategy(ctx, policyName, string(originalStrategy)) + }) + + err := clusterPolicyClient.SetMIGStrategy(ctx, policyName, string(newStrategy)) + Expect(err).NotTo(HaveOccurred(), "Failed to update MIG strategy in ClusterPolicy") + + verifyEnvInDaemonSet(ctx, daemonSetClient, testNamespace, "gpu-feature-discovery", "MIG_STRATEGY", string(newStrategy)) + verifyEnvInDaemonSet(ctx, daemonSetClient, testNamespace, "nvidia-device-plugin-daemonset", "MIG_STRATEGY", string(newStrategy)) + }) + }) + + // test_enable_dcgm - Enable standalone DCGM and verify service + When("Enabling standalone DCGM", Label("dcgm"), func() { + It("should enable DCGM and verify service with local traffic policy", func(ctx context.Context) { + getClusterPolicyOrSkip(ctx, clusterPolicyClient, policyName) + + err := clusterPolicyClient.EnableDCGM(ctx, policyName) + Expect(err).NotTo(HaveOccurred(), "Failed to enable DCGM in ClusterPolicy") + + waitForPodsReady(ctx, f.ClientSet, testNamespace, "app=nvidia-dcgm") + waitForDaemonSetReady(ctx, daemonSetClient, testNamespace, "nvidia-dcgm-exporter") + + Eventually(func() bool { + svc, err := f.ClientSet.CoreV1().Services(testNamespace).Get(ctx, "nvidia-dcgm", metav1.GetOptions{}) + if err != nil { + e2elog.Logf("WARN: error getting nvidia-dcgm service: %v", err) + return false + } + + if svc.Spec.InternalTrafficPolicy == nil { + return false + } + + return *svc.Spec.InternalTrafficPolicy == corev1.ServiceInternalTrafficPolicyLocal + }).WithPolling(5 * time.Second).Within(daemonsetUpdateTimeout).WithContext(ctx).Should(BeTrue()) + }) + }) + + // test_disable_enable_gfd - Disable and re-enable GFD + When("Toggling GPU Feature Discovery", Label("gfd", "toggle"), func() { + It("should disable GFD and verify pods deleted", func(ctx context.Context) { + clusterPolicy := getClusterPolicyOrSkip(ctx, clusterPolicyClient, policyName) + originalState := clusterPolicy.Spec.GPUFeatureDiscovery.Enabled + DeferCleanup(func(ctx context.Context) { + if originalState != nil && *originalState { + _ = clusterPolicyClient.EnableGFD(ctx, policyName) + waitForDaemonSetReady(ctx, daemonSetClient, testNamespace, "gpu-feature-discovery") + } + }) + + err := clusterPolicyClient.DisableGFD(ctx, policyName) + Expect(err).NotTo(HaveOccurred(), "Failed to disable GFD in ClusterPolicy") + + waitForPodsDeleted(ctx, f.ClientSet, testNamespace, "app=gpu-feature-discovery") + }) + + It("should re-enable GFD and verify pods running", func(ctx context.Context) { + clusterPolicy := getClusterPolicyOrSkip(ctx, clusterPolicyClient, policyName) + originalState := clusterPolicy.Spec.GPUFeatureDiscovery.Enabled + DeferCleanup(func(ctx context.Context) { + if originalState != nil && !*originalState { + _ = clusterPolicyClient.DisableGFD(ctx, policyName) + } + }) + + err := clusterPolicyClient.EnableGFD(ctx, policyName) + Expect(err).NotTo(HaveOccurred(), "Failed to enable GFD in ClusterPolicy") + + waitForDaemonSetReady(ctx, daemonSetClient, testNamespace, "gpu-feature-discovery") + }) + }) + + // test_disable_enable_dcgm_exporter - Disable and re-enable DCGM Exporter + When("Toggling DCGM Exporter", Label("dcgm", "toggle"), func() { + It("should disable DCGM Exporter and verify pods deleted", func(ctx context.Context) { + clusterPolicy := getClusterPolicyOrSkip(ctx, clusterPolicyClient, policyName) + originalState := clusterPolicy.Spec.DCGMExporter.Enabled + DeferCleanup(func(ctx context.Context) { + if originalState != nil && *originalState { + _ = clusterPolicyClient.EnableDCGMExporter(ctx, policyName) + waitForDaemonSetReady(ctx, daemonSetClient, testNamespace, "nvidia-dcgm-exporter") + } + }) + + err := clusterPolicyClient.DisableDCGMExporter(ctx, policyName) + Expect(err).NotTo(HaveOccurred(), "Failed to disable DCGM Exporter in ClusterPolicy") + + waitForPodsDeleted(ctx, f.ClientSet, testNamespace, "app=nvidia-dcgm-exporter") + }) + + It("should re-enable DCGM Exporter and verify pods running", func(ctx context.Context) { + clusterPolicy := getClusterPolicyOrSkip(ctx, clusterPolicyClient, policyName) + originalState := clusterPolicy.Spec.DCGMExporter.Enabled + DeferCleanup(func(ctx context.Context) { + if originalState != nil && !*originalState { + _ = clusterPolicyClient.DisableDCGMExporter(ctx, policyName) + } + }) + + err := clusterPolicyClient.EnableDCGMExporter(ctx, policyName) + Expect(err).NotTo(HaveOccurred(), "Failed to enable DCGM Exporter in ClusterPolicy") + + waitForDaemonSetReady(ctx, daemonSetClient, testNamespace, "nvidia-dcgm-exporter") + }) + }) + + // test_custom_labels_override - Test custom labels on daemonsets + When("Updating daemonset custom labels", Label("labels", "config"), func() { + It("should apply custom labels to all operand pods", func(ctx context.Context) { + clusterPolicy := getClusterPolicyOrSkip(ctx, clusterPolicyClient, policyName) + originalLabels := clusterPolicy.Spec.Daemonsets.Labels + DeferCleanup(func(ctx context.Context) { + clusterPolicy, _ := clusterPolicyClient.Get(ctx, policyName) + if clusterPolicy != nil { + clusterPolicy.Spec.Daemonsets.Labels = originalLabels + _, _ = clusterPolicyClient.Update(ctx, clusterPolicy) + } + }) + + customLabels := map[string]string{ + "cloudprovider": "aws", + "platform": "kubernetes", + } + + clusterPolicy.Spec.Daemonsets.Labels = customLabels + _, err := clusterPolicyClient.Update(ctx, clusterPolicy) + Expect(err).NotTo(HaveOccurred(), "Failed to update ClusterPolicy with custom labels") + + // Wait for spec update to be applied + waitForSpecUpdate(ctx, clusterPolicyClient, policyName, func(clusterPolicy *nvidiav1.ClusterPolicy) bool { + if len(clusterPolicy.Spec.Daemonsets.Labels) != len(customLabels) { + return false + } + for k, v := range customLabels { + if clusterPolicy.Spec.Daemonsets.Labels[k] != v { + return false + } + } + return true + }) + + // DaemonSet operands that should have custom labels + daemonsetOperands := []string{ + "nvidia-driver-daemonset", + "nvidia-container-toolkit-daemonset", + "nvidia-device-plugin-daemonset", + "gpu-feature-discovery", + "nvidia-dcgm-exporter", + } + + for _, operand := range daemonsetOperands { + e2elog.Logf("Waiting for daemonset %s to be ready", operand) + waitForDaemonSetReady(ctx, daemonSetClient, testNamespace, operand) + } + + // Validator pods (may be Jobs/Pods, not DaemonSets) + e2elog.Logf("Waiting for validator pods to be ready") + waitForPodsReady(ctx, f.ClientSet, testNamespace, "app=nvidia-operator-validator") + + // Verify labels on all operand pods + allOperands := append(daemonsetOperands, "nvidia-operator-validator") + for _, operand := range allOperands { + e2elog.Logf("Checking %s labels", operand) + labelSelector := fmt.Sprintf("app=%s", operand) + pods, err := f.ClientSet.CoreV1().Pods(testNamespace).List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, + }) + + if err != nil || len(pods.Items) == 0 { + e2elog.Logf("Skipping label check for %s - no pods found", operand) + continue + } + + for _, pod := range pods.Items { + for key, expectedValue := range customLabels { + actualValue, exists := pod.Labels[key] + Expect(exists).To(BeTrue(), fmt.Sprintf("Label %s missing on %s pod %s", key, operand, pod.Name)) + Expect(actualValue).To(Equal(expectedValue), fmt.Sprintf("Label %s has wrong value on %s pod %s", key, operand, pod.Name)) + } + } + } + }) + }) +})