Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 58 additions & 1 deletion .github/workflows/operator-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,67 @@ jobs:
make setup-kind-cluster
make test

# Test deployment policies on 15-node cluster
deployment-policy-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-tags: true
fetch-depth: 0
- name: Setup Go ${{ env.GO_VERSION }}
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
cache-dependency-path: operator/go.sum
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Create 15-node Kind Cluster
id: kind
uses: helm/kind-action@v1
with:
version: v0.30.0
node_image: kindest/node:v1.34.0
config: k8s-tests/chainsaw/deployment-policy/kind-config.yaml
cluster_name: skyhook-dp-test
# Cache build tools and dependencies for faster builds
- name: Restore cached Binaries
id: cached-binaries
uses: actions/cache/restore@v4
with:
key: ${{ env.GO_VERSION }}-${{ runner.os }}-${{ runner.arch }}-bin-${{ hashFiles('operator/deps.mk') }}
restore-keys: ${{ env.GO_VERSION }}-${{ runner.os }}-${{ runner.arch }}-bin-
path: |
${{ github.workspace }}/operator/bin
~/.cache/go-build
- name: Install dependencies
if: steps.cached-binaries.outputs.cache-hit != 'true'
run: |
cd operator
make install-deps
- name: Save cached Binaries
id: save-cached-binaries
if: steps.cached-binaries.outputs.cache-hit != 'true'
uses: actions/cache/save@v4
with:
key: ${{ env.GO_VERSION }}-${{ runner.os }}-${{ runner.arch }}-bin-${{ hashFiles('operator/deps.mk') }}
path: |
${{ github.workspace }}/operator/bin
~/.cache/go-build
# Run deployment policy E2E tests
- name: deployment-policy-e2e-tests
run: |
cd operator
make deployment-policy-tests

# Build multi-platform container image and push to registry
build-and-push-operator:
runs-on: ubuntu-latest
needs: [tests] # Don't run the build and push if the k8s tests fail
needs: [tests, deployment-policy-tests] # Don't run the build and push if tests fail
# Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
permissions:
contents: read
Expand Down
44 changes: 44 additions & 0 deletions k8s-tests/chainsaw/deployment-policy/kind-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Shared Kind cluster configuration for all deployment policy tests
# 15 worker nodes + 1 control-plane
# - Multi-compartment test uses all 15 nodes
# - Linear strategy test uses first 8 nodes
# - Overlapping selectors test uses first 6 nodes

kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
90 changes: 90 additions & 0 deletions k8s-tests/chainsaw/deployment-policy/label-nodes.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/bin/bash

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

# Usage: label-nodes.sh <operation> <node_range> <label1=value1> [label2=value2] ...
# Examples:
# label-nodes.sh add 0-4 priority=critical skyhook.nvidia.com/test-node=skyhooke2e
# label-nodes.sh remove 0-14 priority env region
# label-nodes.sh clean-all skyhook.nvidia.com/test-node

OPERATION=$1
shift

if [ "$OPERATION" = "clean-all" ]; then
LABEL_PREFIX=$1
echo "Cleaning all labels matching: $LABEL_PREFIX"
kubectl label nodes --all "${LABEL_PREFIX}-" --overwrite 2>/dev/null || true
echo "✓ Cleanup complete"
exit 0
fi

NODE_RANGE=$1
shift

# Get all worker nodes (excluding control-plane)
WORKERS=($(kubectl get nodes --no-headers -o custom-columns=NAME:.metadata.name | grep -v control-plane | sort))

# Parse node range
if [[ $NODE_RANGE == *-* ]]; then
START=$(echo $NODE_RANGE | cut -d'-' -f1)
END=$(echo $NODE_RANGE | cut -d'-' -f2)
else
START=$NODE_RANGE
END=$NODE_RANGE
fi

# Validate we have enough nodes
if [ ${#WORKERS[@]} -lt $((END + 1)) ]; then
echo "ERROR: Need at least $((END + 1)) worker nodes for this operation"
echo "Found: ${#WORKERS[@]} workers"
exit 1
fi

case "$OPERATION" in
add)
LABELS="$@"
echo "Adding labels to nodes [$START-$END]: $LABELS"
for i in $(seq $START $END); do
if [ -n "${WORKERS[$i]}" ]; then
kubectl label node ${WORKERS[$i]} $LABELS --overwrite
fi
done
;;
remove)
LABELS_TO_REMOVE=""
for label in "$@"; do
LABELS_TO_REMOVE="$LABELS_TO_REMOVE ${label}-"
done
echo "Removing labels from nodes [$START-$END]: $@"
for i in $(seq $START $END); do
if [ -n "${WORKERS[$i]}" ]; then
kubectl label node ${WORKERS[$i]} $LABELS_TO_REMOVE --overwrite 2>/dev/null || true
fi
done
;;
*)
echo "ERROR: Unknown operation: $OPERATION"
echo "Usage: $0 {add|remove|clean-all} ..."
exit 1
;;
esac

echo "✓ Operation complete"

Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

apiVersion: skyhook.nvidia.com/v1alpha1
kind: Skyhook
metadata:
name: legacy-interruption-budget-test
status:
compartmentStatuses:
__default__:
matched: 6
ceiling: 3
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# yaml-language-server: $schema=https://raw.githubusercontent.com/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json
apiVersion: chainsaw.kyverno.io/v1alpha1
kind: Test
metadata:
name: legacy-interruption-budget-compatibility
spec:
description: |
Tests backwards compatibility with legacy InterruptionBudget.
- Creates Skyhook with interruptionBudget (count: 3) instead of deploymentPolicy
- Verifies that a synthetic __default__ compartment is created
- Verifies that the budget ceiling is respected (max 3 nodes in progress)
- Ensures existing customers' configurations continue to work
timeouts:
exec: 180s
assert: 120s
steps:
- name: setup-nodes
try:
- script:
content: |
chmod +x ../label-nodes.sh
# Clean up any existing labels from previous tests
../label-nodes.sh clean-all skyhook.nvidia.com/test-node
# Label first 6 worker nodes
../label-nodes.sh add 0-5 skyhook.nvidia.com/test-node=skyhooke2e
echo "✓ Node labeling complete"
kubectl get nodes -L skyhook.nvidia.com/test-node --sort-by=.metadata.name | head -8

- name: apply-skyhook
try:
- apply:
file: skyhook.yaml
- sleep:
duration: 10s

- name: verify-default-compartment
try:
- assert:
file: assert-default-compartment.yaml

- name: verify-metrics
try:
- script:
content: |
echo "=== Verifying legacy compatibility metrics ==="

# Verify metrics for synthetic __default__ compartment
../metrics_test.py skyhook_rollout_matched_nodes 6 -t skyhook_name=legacy-interruption-budget-test -t policy_name=legacy -t compartment_name=__default__ -t strategy=fixed
../metrics_test.py skyhook_rollout_ceiling 3 -t skyhook_name=legacy-interruption-budget-test -t policy_name=legacy -t compartment_name=__default__ -t strategy=fixed

# Verify that the legacy policy name is "legacy" and not a real policy
echo "✓ Legacy compatibility metrics verified!"
echo "✓ Metrics use policy_name=legacy for backwards compatibility"

- name: cleanup
try:
- script:
content: |
../label-nodes.sh clean-all skyhook.nvidia.com/test-node
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

apiVersion: skyhook.nvidia.com/v1alpha1
kind: Skyhook
metadata:
name: legacy-interruption-budget-test
spec:
priority: 100
# Use legacy InterruptionBudget instead of DeploymentPolicy
interruptionBudget:
count: 3 # Max 3 nodes at once
nodeSelectors:
matchLabels:
skyhook.nvidia.com/test-node: skyhooke2e
packages:
test-pkg:
version: "6.2.0"
image: "ghcr.io/nvidia/skyhook/agentless"

Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

apiVersion: skyhook.nvidia.com/v1alpha1
kind: Skyhook
metadata:
name: linear-strategy-test
status:
compartmentStatuses:
production:
matched: 8
ceiling: 4
Loading