Skip to content

Commit 935cb87

Browse files
committed
Merge remote-tracking branch 'origin/main' into feature/oidc-gcp
2 parents b074b87 + 19d00f2 commit 935cb87

File tree

182 files changed

+11247
-2785
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

182 files changed

+11247
-2785
lines changed

.coderabbit.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
reviews:
16+
profile: chill
17+
auto_review:
18+
enabled: true

.github/actions/setup-ci-env/action.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ runs:
4646
echo "gocover_cobertura_version=$(yq '.go_tools.gocover_cobertura' .versions.yaml)" >> $GITHUB_OUTPUT
4747
echo "goimports_version=$(yq '.go_tools.goimports' .versions.yaml)" >> $GITHUB_OUTPUT
4848
echo "crane_version=$(yq '.go_tools.crane' .versions.yaml)" >> $GITHUB_OUTPUT
49+
echo "helm_version=$(yq '.testing_tools.helm' .versions.yaml)" >> $GITHUB_OUTPUT
4950
5051
- name: Install base dependencies
5152
shell: bash
@@ -78,7 +79,7 @@ runs:
7879
- name: Install Helm
7980
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1
8081
with:
81-
version: 'latest'
82+
version: ${{ steps.versions.outputs.helm_version }}
8283

8384
- name: Cache Helm plugins
8485
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0

.github/copy-pr-bot.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ additional_trustees:
2020
- nitz2407
2121
- XRFXLP
2222
- mchmarny
23+
- tanishagoyal2

.github/workflows/code-scanning.yml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,6 @@ on:
1919
branches:
2020
- main
2121
- "pull-request/[0-9]+"
22-
paths-ignore:
23-
- '**/*.md'
24-
- 'docs/**'
25-
- 'LICENSE'
26-
- '.github/ISSUE_TEMPLATE/**'
27-
- '.github/*.yaml'
28-
- '.github/*.yml'
29-
- '.github/headers/**'
3022
tags:
3123
- 'v*'
3224
workflow_call: {}

.github/workflows/container-build-test.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ jobs:
5959
make_command: 'make -C health-monitors/gpu-health-monitor docker-build-dcgm4'
6060
- component: syslog-health-monitor
6161
make_command: 'make -C health-monitors/syslog-health-monitor docker-build'
62+
- component: kubernetes-object-monitor
63+
make_command: 'make -C health-monitors/kubernetes-object-monitor docker-build'
6264
# Log Collection (Docker-based)
6365
- component: log-collector
6466
make_command: 'make -C log-collector docker-build-log-collector'

.github/workflows/e2e-test.yml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,6 @@ on:
2727
branches:
2828
- main
2929
- "pull-request/[0-9]+"
30-
paths-ignore:
31-
- '**/*.md'
32-
- 'docs/**'
33-
- 'LICENSE'
34-
- '.github/ISSUE_TEMPLATE/**'
35-
- '.github/*.yaml'
36-
- '.github/*.yml'
37-
- '.github/headers/**'
3830
workflow_dispatch:
3931

4032
concurrency:

.github/workflows/lint-test.yml

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,6 @@ on:
1919
branches:
2020
- main
2121
- "pull-request/[0-9]+"
22-
paths-ignore:
23-
- '**/*.md'
24-
- 'docs/**'
25-
- 'LICENSE'
26-
- '.github/ISSUE_TEMPLATE/**'
27-
- '.github/*.yaml'
28-
- '.github/*.yml'
29-
- '.github/headers/**'
3022
tags:
3123
- 'v*'
3224
workflow_dispatch:
@@ -75,6 +67,9 @@ jobs:
7567
- component: helm-charts
7668
make_command: 'make helm-lint'
7769
step_name: 'Validate Helm charts'
70+
- component: scripts
71+
make_command: 'make -C scripts lint'
72+
step_name: 'Run shellcheck on scripts'
7873
steps:
7974
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
8075

@@ -84,11 +79,18 @@ jobs:
8479
- name: ${{ matrix.step_name }}
8580
run: ${{ matrix.make_command }}
8681

82+
- name: Load Helm version from .versions.yaml
83+
if: matrix.component == 'helm-charts'
84+
id: helm-version
85+
run: |
86+
HELM_VERSION=$(yq eval '.testing_tools.helm' .versions.yaml)
87+
echo "helm_version=${HELM_VERSION}" >> $GITHUB_OUTPUT
88+
8789
- name: Setup Helm
8890
if: matrix.component == 'helm-charts'
8991
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1
9092
with:
91-
version: 'v3.14.4'
93+
version: ${{ steps.helm-version.outputs.helm_version }}
9294

9395
- name: Validate Helm Charts
9496
if: matrix.component == 'helm-charts'
@@ -102,6 +104,7 @@ jobs:
102104
include:
103105
- component: syslog-health-monitor
104106
- component: csp-health-monitor
107+
- component: kubernetes-object-monitor
105108
- component: gpu-health-monitor
106109
install_dcgm: 'true'
107110
python_required: 'true'
@@ -134,12 +137,15 @@ jobs:
134137
- platform-connectors
135138
- store-client
136139
- commons
140+
- data-models
137141
- health-events-analyzer
138142
- fault-quarantine
139143
- labeler
144+
- metadata-collector
140145
- node-drainer
141146
- fault-remediation
142147
- janitor
148+
- tests
143149
steps:
144150
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
145151

@@ -158,11 +164,36 @@ jobs:
158164
${{ matrix.component }}/coverage.txt
159165
${{ matrix.component }}/report.xml
160166
167+
tilt-modules-lint-test:
168+
runs-on: linux-amd64-cpu16
169+
timeout-minutes: 30
170+
strategy:
171+
matrix:
172+
component:
173+
- tilt/simple-health-client
174+
steps:
175+
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
176+
177+
- name: Setup build environment
178+
uses: ./.github/actions/setup-ci-env
179+
180+
- name: Run lint and test
181+
run: make -C ${{ matrix.component }} lint-test
182+
183+
- name: Upload artifacts
184+
uses: ./.github/actions/upload-test-artifacts
185+
with:
186+
component-name: simple-health-client
187+
file-paths: |
188+
${{ matrix.component }}/coverage.xml
189+
${{ matrix.component }}/coverage.txt
190+
${{ matrix.component }}/report.xml
191+
161192
consolidated-coverage-report:
162193
if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/heads/pull-request/')
163194
runs-on: linux-amd64-cpu16
164195
timeout-minutes: 15
165-
needs: [health-monitors-lint-test, modules-lint-test]
196+
needs: [health-monitors-lint-test, modules-lint-test, tilt-modules-lint-test]
166197
steps:
167198
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
168199

@@ -359,7 +390,7 @@ jobs:
359390
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
360391
runs-on: linux-amd64-cpu16
361392
timeout-minutes: 15
362-
needs: [health-monitors-lint-test, modules-lint-test]
393+
needs: [health-monitors-lint-test, modules-lint-test, tilt-modules-lint-test]
363394
steps:
364395
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
365396

.github/workflows/publish.yml

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,6 @@ on:
2020
- 'v*'
2121
branches:
2222
- main
23-
paths-ignore:
24-
- '**/*.md'
25-
- 'docs/**'
26-
- 'LICENSE'
27-
- '.github/ISSUE_TEMPLATE/**'
28-
- '.github/*.yaml'
29-
- '.github/*.yml'
30-
- '.github/headers/**'
3123
workflow_dispatch:
3224
inputs:
3325
tag:
@@ -114,6 +106,9 @@ jobs:
114106
- component: syslog-health-monitor
115107
make_command: 'make -C health-monitors/syslog-health-monitor docker-publish'
116108
container_name: 'nvsentinel/syslog-health-monitor'
109+
- component: metadata-collector
110+
make_command: 'make -C metadata-collector docker-publish'
111+
container_name: 'nvsentinel/metadata-collector'
117112
- component: log-collector
118113
make_command: 'make -C log-collector docker-publish-log-collector'
119114
container_name: 'nvsentinel/log-collector'

.github/workflows/release.yml

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,6 @@ on:
1818
push:
1919
tags:
2020
- 'v*'
21-
paths-ignore:
22-
- '**/*.md'
23-
- 'docs/**'
24-
- 'LICENSE'
25-
- '.github/ISSUE_TEMPLATE/**'
26-
- '.github/*.yaml'
27-
- '.github/*.yml'
28-
- '.github/headers/**'
2921
workflow_dispatch:
3022
inputs:
3123
tag:
@@ -90,10 +82,16 @@ jobs:
9082
with:
9183
ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.tag || github.ref }}
9284

85+
- name: Load Helm version from .versions.yaml
86+
id: helm-version
87+
run: |
88+
HELM_VERSION=$(yq eval '.testing_tools.helm' .versions.yaml)
89+
echo "helm_version=${HELM_VERSION}" >> $GITHUB_OUTPUT
90+
9391
- name: Install helm
9492
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1
9593
with:
96-
version: 'latest'
94+
version: ${{ steps.helm-version.outputs.helm_version }}
9795

9896
- name: Configure Helm for GitHub Packages
9997
run: |
@@ -143,7 +141,7 @@ jobs:
143141
path: .
144142

145143
- name: Create GitHub Release
146-
uses: softprops/action-gh-release@6da8fa9354ddfdc4aeace5fc48d7f679b5214090 # v2.4.1
144+
uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2.4.2
147145
with:
148146
tag_name: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.tag || github.ref_name }}
149147
name: Release ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.tag || github.ref_name }}

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,5 +447,6 @@ fault-remediation/fault-remediation
447447
health-events-analyzer/health-events-analyzer
448448
health-monitors/syslog-health-monitor/syslog-health-monitor
449449
labeler/labeler
450+
metadata-collector/metadata-collector
450451
node-drainer/node-drainer
451452
platform-connectors/platform-connectors

0 commit comments

Comments
 (0)