Skip to content

Commit 2758a69

Browse files
committed
fix: cut v0.1.0 with warning about experimental release
1 parent e5419d1 commit 2758a69

File tree

12 files changed

+17
-766
lines changed

12 files changed

+17
-766
lines changed

.github/actions/replace-gitlab-imports/action.yml

Lines changed: 0 additions & 40 deletions
This file was deleted.

.github/actions/setup-build-env/action.yml

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,6 @@ inputs:
4444
description: 'Whether to install DCGM (requires sudo, may fail in some environments)'
4545
required: false
4646
default: 'false'
47-
replace-imports:
48-
description: 'Whether to replace GitLab imports with GitHub imports'
49-
required: false
50-
default: 'true'
51-
generate-protos:
52-
description: 'Whether to generate protocol buffers'
53-
required: false
54-
default: 'true'
5547

5648
runs:
5749
using: 'composite'
@@ -67,11 +59,4 @@ runs:
6759
protoc-gen-go-version: ${{ inputs.protoc-gen-go-version }}
6860
protoc-gen-go-grpc-version: ${{ inputs.protoc-gen-go-grpc-version }}
6961
shellcheck-version: ${{ inputs.shellcheck-version }}
70-
install-dcgm: ${{ inputs.install-dcgm }}
71-
72-
- name: Replace GitLab imports
73-
if: inputs.replace-imports == 'true'
74-
uses: ./.github/actions/replace-gitlab-imports
75-
with:
76-
generate-protos: ${{ inputs.generate-protos }}
77-
62+
install-dcgm: ${{ inputs.install-dcgm }}

.github/actions/setup-ci-tools/action.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ runs:
130130
echo "Installing Protocol Buffers ${{ inputs.protobuf-version }}"
131131
PROTOBUF_VERSION="${{ inputs.protobuf-version }}"
132132
PROTOBUF_VERSION_NO_V="${PROTOBUF_VERSION#v}"
133-
wget https://github.com/protocolbuffers/protobuf/releases/download/${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION_NO_V}-linux-x86_64.zip
133+
wget -q https://github.com/protocolbuffers/protobuf/releases/download/${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION_NO_V}-linux-x86_64.zip
134134
unzip protoc-${PROTOBUF_VERSION_NO_V}-linux-x86_64.zip -d protoc-${PROTOBUF_VERSION_NO_V}-linux-x86_64
135135
sudo cp protoc-${PROTOBUF_VERSION_NO_V}-linux-x86_64/bin/protoc /usr/local/bin/
136136
sudo mkdir -p /usr/local/include/google
@@ -199,6 +199,6 @@ runs:
199199
echo "gotestsum: $(gotestsum --version)"
200200
echo "protoc: $(protoc --version)"
201201
echo "shellcheck: $(shellcheck --version | head -n 2)"
202-
echo "addlicense: $(addlicense --version 2>&1 || echo 'installed')"
203-
echo "goimports: $(goimports --help 2>&1 | head -n 1 || echo 'installed')"
202+
echo "addlicense: $(addlicense >/dev/null 2>&1 || echo 'installed')"
203+
echo "goimports: $(goimports --help >/dev/null 2>&1 || echo 'installed')"
204204
echo "==================================="

.github/workflows/lint-test.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ jobs:
8383
protoc-gen-go-version: ${{ needs.prepare-environment.outputs.protoc_gen_go_version }}
8484
protoc-gen-go-grpc-version: ${{ needs.prepare-environment.outputs.protoc_gen_go_grpc_version }}
8585
shellcheck-version: ${{ needs.prepare-environment.outputs.shellcheck_version }}
86-
replace-imports: ${{ matrix.replace_imports || 'true' }}
8786

8887
- name: ${{ matrix.step_name }}
8988
run: ${{ matrix.make_command }}
@@ -116,7 +115,6 @@ jobs:
116115
protoc-gen-go-grpc-version: ${{ needs.prepare-environment.outputs.protoc_gen_go_grpc_version }}
117116
shellcheck-version: ${{ needs.prepare-environment.outputs.shellcheck_version }}
118117
install-dcgm: ${{ matrix.install_dcgm || 'false' }}
119-
replace-imports: ${{ matrix.replace_imports || 'true' }}
120118

121119
- name: Run lint and test
122120
run: make -C health-monitors/${{ matrix.component }} lint-test

.github/workflows/publish.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@ jobs:
6060
protoc-gen-go-version: ${{ needs.prepare-environment.outputs.protoc_gen_go_version }}
6161
protoc-gen-go-grpc-version: ${{ needs.prepare-environment.outputs.protoc_gen_go_grpc_version }}
6262
shellcheck-version: ${{ needs.prepare-environment.outputs.shellcheck_version }}
63-
replace-imports: 'true'
64-
generate-protos: 'true'
6563

6664
- name: Build image list
6765
id: build-list
@@ -128,8 +126,6 @@ jobs:
128126
protoc-gen-go-version: ${{ needs.prepare-environment.outputs.protoc_gen_go_version }}
129127
protoc-gen-go-grpc-version: ${{ needs.prepare-environment.outputs.protoc_gen_go_grpc_version }}
130128
shellcheck-version: ${{ needs.prepare-environment.outputs.shellcheck_version }}
131-
replace-imports: 'true'
132-
generate-protos: 'true'
133129

134130
- name: Publish container for ${{ matrix.component }}
135131
uses: ./.github/actions/publish-container

.github/workflows/release.yml

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,6 @@ jobs:
4747
with:
4848
ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.tag || github.ref }}
4949

50-
- name: Run import replacement script
51-
env:
52-
GITHUB_REPOSITORY: ${{ github.repository }}
53-
run: |
54-
chmod +x ./scripts/replace-imports.sh
55-
./scripts/replace-imports.sh
56-
57-
- name: Run docker registry replacement script
58-
run: |
59-
chmod +x ./scripts/replace-docker-registry.sh
60-
./scripts/replace-docker-registry.sh
61-
6250
- name: Build image list
6351
env:
6452
SAFE_REF_NAME: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.tag || needs.prepare-environment.outputs.safe_ref_name }}
@@ -87,18 +75,6 @@ jobs:
8775
with:
8876
ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.tag || github.ref }}
8977

90-
- name: Run import replacement script
91-
env:
92-
GITHUB_REPOSITORY: ${{ github.repository }}
93-
run: |
94-
chmod +x ./scripts/replace-imports.sh
95-
./scripts/replace-imports.sh
96-
97-
- name: Run docker registry replacement script
98-
run: |
99-
chmod +x ./scripts/replace-docker-registry.sh
100-
./scripts/replace-docker-registry.sh
101-
10278
- name: Install helm
10379
uses: azure/setup-helm@v4
10480
with:

DEVELOPMENT.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
6767
helm plugin install https://github.com/chartmuseum/helm-push || true
6868

6969
# Install Protocol Buffers (version matching CI)
70-
wget https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip
70+
wget -q https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip
7171
unzip -o protoc-27.1-linux-x86_64.zip -d protoc-27.1-linux-x86_64
7272
cp protoc-27.1-linux-x86_64/bin/protoc /usr/local/bin/
7373
mkdir -p /usr/local/bin/include/google
@@ -78,7 +78,7 @@ rm -rf protoc*
7878
python3 -m pip install --break-system-packages grpcio grpcio-tools
7979

8080
# Install NVIDIA DCGM
81-
wget https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb
81+
wget -q https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb
8282
dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb
8383
apt-get install -y datacenter-gpu-manager=1:3.3.5
8484

README.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88

99
NVSentinel is a comprehensive collection of Kubernetes services that automatically detect, classify, and remediate hardware and software faults in GPU nodes. Designed for GPU clusters, it ensures maximum uptime and seamless fault recovery in high-performance computing environments.
1010

11+
> [!WARNING]
12+
> **Experimental Preview Release**
13+
> This is an experimental/preview release of NVSentinel. Use at your own risk in production environments. The software is provided "as is" without warranties of any kind. Features, APIs, and configurations may change without notice in future releases. For production deployments, thoroughly test in non-critical environments first.
14+
1115
## 🚀 Quick Start
1216

1317
### Prerequisites
@@ -48,7 +52,7 @@ helm search repo oci://ghcr.io/nvidia/nvsentinel --versions
4852
- **📊 Persistent Storage**: MongoDB-based event store with change streams for real-time updates
4953
- **🛡️ Graceful Handling**: Coordinated workload eviction with configurable timeouts
5054
51-
## Getting started
55+
## 🧪 Quick test with any Kubernetes cluster
5256
5357
**Prerequisites**: Kubernetes 1.25+, Helm 3.0+, NVIDIA GPU Operator installed
5458
@@ -87,7 +91,7 @@ helm upgrade --install prometheus prometheus-community/kube-prometheus-stack \
8791
Now install NVSentinel itself. Use the latest stable version from the releases page:
8892
8993
```bash
90-
NVSENTINEL_VERSION=v0.5.0 # Check releases for latest version
94+
NVSENTINEL_VERSION=v0.1.0 # Check releases for latest version
9195
BASE_DIR="$(pwd)"
9296
9397
helm upgrade --install nvsentinel oci://ghcr.io/nvidia/nvsentinel \
@@ -111,7 +115,7 @@ kubectl get pods -n nvsentinel
111115
kubectl get nodes # Should show your GPU nodes
112116
113117
# Run comprehensive validation
114-
./scripts/validate-nvsentinel.sh --version v0.5.0 --verbose
118+
./scripts/validate-nvsentinel.sh --version v0.1.0 --verbose
115119
```
116120
117121
> **Tip**: Use the [`scripts/validate-nvsentinel.sh`](scripts/validate-nvsentinel.sh) script for comprehensive deployment validation including image versions, pod health, and certificate status.

ci.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,22 @@ RUN apt-get update && \
1919
pip install --break-system-packages poetry==1.8.2 && \
2020
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash && \
2121
helm plugin install https://github.com/chartmuseum/helm-push && \
22-
wget https://go.dev/dl/go1.24.8.linux-amd64.tar.gz && tar -C /usr/local -xzf go1.24.8.linux-amd64.tar.gz
22+
wget -q https://go.dev/dl/go1.24.8.linux-amd64.tar.gz && tar -C /usr/local -xzf go1.24.8.linux-amd64.tar.gz
2323

2424
ENV PATH="${PATH}:/usr/local/go/bin:/root/go/bin"
2525

2626
RUN go install github.com/boumenot/gocover-cobertura@latest && \
2727
go install gotest.tools/gotestsum@latest && \
2828
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.64.8
2929

30-
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip && \
30+
RUN wget -q https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip && \
3131
unzip protoc-27.1-linux-x86_64.zip -d protoc-27.1-linux-x86_64 && \
3232
cp protoc-27.1-linux-x86_64/bin/protoc /usr/local/bin/ && mkdir -p /usr/local/bin/include/google && cp -r protoc-27.1-linux-x86_64/include/google /usr/local/bin/include && \
3333
go install google.golang.org/protobuf/cmd/[email protected] && go install google.golang.org/grpc/cmd/[email protected] && \
3434
python3 -m pip install --break-system-packages grpcio grpcio-tools
3535

3636
RUN apt-get update && apt-get install -y wget && \
37-
wget https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb && \
37+
wget -q https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb && \
3838
dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb && \
3939
apt-get update && apt-get install -y datacenter-gpu-manager=1:3.3.5 && \
4040
apt-get clean

0 commit comments

Comments
 (0)