Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions .github/workflows/agentless-container.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,6 @@
# LICENSE END
#







name: Build and push agentless container image

# Configures this workflow to run every time a tag is created
Expand Down
6 changes: 0 additions & 6 deletions .github/workflows/commit-linting.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,6 @@
# LICENSE END
#







name: Commit Linting
on: pull_request
jobs:
Expand Down
34 changes: 17 additions & 17 deletions .github/workflows/operator-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,26 +49,26 @@ on:
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
GO_VERSION: 1.23.7
GO_VERSION: 1.23.8
PLATFORMS: linux/amd64,linux/arm64

# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
jobs:
unit-test:
## move it all down to the tests job, should do all the tests then, and not double install stuff
# unit-test:
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v4
# - name: Setup Go 1.23
# uses: actions/setup-go@v5
# with:
# go-version: 1.23
# - name: Unit tests
# run: |
# cd operator
# make unit-tests
tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Go 1.23
uses: actions/setup-go@v5
with:
go-version: 1.23
- name: Unit tests
run: |
cd operator
make unit-tests
k8s-tests:
runs-on: ubuntu-latest
needs: [unit-test] # Don't run the k8s tests if the unit tests fail
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -94,10 +94,10 @@ jobs:
run: |
cd operator
GITHUB_TOKEN=${{ secrets.github_token }} make create-kind-cluster
make e2e-tests
make test
build-and-push-operator:
runs-on: ubuntu-latest
needs: [k8s-tests] # Don't run the build and push if the k8s tests fail
needs: [tests] # Don't run the build and push if the k8s tests fail
# Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
permissions:
contents: read
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
.cursorignore
.pytest_cache
.vscode
.idea
35 changes: 35 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Debug Manager",
"type": "go",
"request": "launch",
"mode": "debug",
"program": "${workspaceRoot}/operator/cmd/main.go",
"buildFlags": "--ldflags '-X github.com/NVIDIA/skyhook/internal/version.GIT_SHA=foobars -X github.com/NVIDIA/skyhook/internal/version.VERSION=v0.5.0'",
"env": {
"ENABLE_WEBHOOKS": "false",
"LOG_ENCODER": "console",
"REAPPLY_ON_REBOOT": "false",
// "AGENT_IMAGE": "ghcr.io/nvidia/skyhook/agent:latest", // the real agent image, for testing actual packages
"AGENT_IMAGE": "ghcr.io/nvidia/skyhook/agentless:6.2.0", // this is the mock image, if you need to test for real, use the real image.
},
"args": [],
"showLog": true
},
{
"name": "Test Current File",
"type": "go",
"request": "launch",
"mode": "test",
"program": "${file}",
"env": {},
"args": [],
"showLog": true
}
]
}
7 changes: 0 additions & 7 deletions agent/skyhook-agent/src/skyhook_agent/__about__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,4 @@
# LICENSE END
#








__version__ = "0.0.0"
8 changes: 0 additions & 8 deletions agent/skyhook-agent/src/skyhook_agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,3 @@
#
# LICENSE END
#








38 changes: 35 additions & 3 deletions agent/skyhook-agent/src/skyhook_agent/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,26 @@
import os
import shutil
import glob
import signal

from skyhook_agent.step import Step, UpgradeStep, Idempotence, Mode, CHECK_TO_APPLY
from skyhook_agent import interrupts, config
from typing import List

import logging as logger

# Global flag to track if we received SIGTERM
received_sigterm = False

def sigterm_handler(signum, frame):
"""Handle SIGTERM by setting a global flag and logging the event"""
global received_sigterm
received_sigterm = True
logger.info("Received SIGTERM signal - initiating graceful shutdown")

# Register the SIGTERM handler
signal.signal(signal.SIGTERM, sigterm_handler)

class SkyhookValidationError(Exception):
pass

Expand Down Expand Up @@ -414,7 +427,11 @@ def remove_flags(step_data: dict[Mode, list[Step|UpgradeStep]], config_data: dic
if os.path.exists(flag_file): # Check if the file exists before trying to remove it
os.remove(flag_file)

def main(mode: Mode, root_mount: str, copy_dir: str, interrupt_data: None|str, always_run_step=False):
def main(mode: Mode, root_mount: str, copy_dir: str, interrupt_data: None|str, always_run_step=False) -> bool:
'''
returns True if the there is a failure in the steps, otherwise returns False
'''

if mode not in set(map(str, Mode)):
logger.warning(f"This version of the Agent doesn't support the {mode} mode. Options are: {','.join(map(str, Mode))}.")
return False
Expand Down Expand Up @@ -448,9 +465,19 @@ def main(mode: Mode, root_mount: str, copy_dir: str, interrupt_data: None|str, a
if not os.path.exists(f"{root_mount}/{copy_dir}/configmaps/{f}"):
raise SkyhookValidationError(f"Expected config file {f} not found in configmaps directory.")

return agent_main(mode, root_mount, copy_dir, config_data, interrupt_data, always_run_step)
try:
return agent_main(mode, root_mount, copy_dir, config_data, interrupt_data, always_run_step)
except Exception as e:
if received_sigterm:
logger.info("Gracefully shutting down due to SIGTERM")
# Perform any cleanup if needed
return True
raise

def agent_main(mode: Mode, root_mount: str, copy_dir: str, config_data: dict, interrupt_data: None|str, always_run_step=False):
def agent_main(mode: Mode, root_mount: str, copy_dir: str, config_data: dict, interrupt_data: None|str, always_run_step=False) -> bool:
'''
returns True if the there is a failure in the steps, otherwise returns False
'''

# Pull out step_data so it matches with existing code
step_data = config_data["modes"]
Expand All @@ -464,6 +491,11 @@ def agent_main(mode: Mode, root_mount: str, copy_dir: str, config_data: dict, in
logger.warning(f" There are no {mode} steps defined. This will be ran as a no-op.")

for step in step_data.get(mode, []):
# Check for SIGTERM
if received_sigterm:
logger.info("SIGTERM received, stopping step execution")
return True

# Make the flag file without the host path argument (first one). This is because in operator world
# the host path is going to change every time the Skyhook Custom Resource changes so it would
# look like a step hasn't been run when it fact it had.
Expand Down
7 changes: 0 additions & 7 deletions agent/skyhook-agent/src/skyhook_agent/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@
# LICENSE END
#








from enum import Enum

class SortableEnum(Enum):
Expand Down
6 changes: 0 additions & 6 deletions agent/skyhook-agent/src/skyhook_agent/interrupts.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,6 @@
#








import string
import base64
import json
Expand Down
6 changes: 0 additions & 6 deletions agent/skyhook-agent/src/skyhook_agent/step.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,6 @@
#








from typing import IO
from enum import Enum
import json
Expand Down
8 changes: 0 additions & 8 deletions agent/skyhook-agent/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,3 @@
#
# LICENSE END
#








7 changes: 0 additions & 7 deletions agent/skyhook-agent/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@
# LICENSE END
#








import unittest
from tempfile import TemporaryDirectory

Expand Down
1 change: 1 addition & 0 deletions agent/skyhook-agent/tests/test_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# LICENSE END
#


import unittest
import tempfile
import os
Expand Down
6 changes: 0 additions & 6 deletions agent/skyhook-agent/tests/test_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,6 @@
#








import unittest

from skyhook_agent.enums import SortableEnum, get_latest_schema
Expand Down
6 changes: 0 additions & 6 deletions agent/skyhook-agent/tests/test_interrupts.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,6 @@
#








import unittest
import base64
import json
Expand Down
6 changes: 0 additions & 6 deletions agent/skyhook-agent/tests/test_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,6 @@
#








import unittest, os

from tempfile import TemporaryDirectory
Expand Down
4 changes: 4 additions & 0 deletions chart/templates/skyhook-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,10 @@ spec:
- name
type: object
type: array
gracefulShutdown:
description: GracefulShutdown is the graceful shutdown timeout
for the package, if not set, uses k8s default
type: string
image:
description: Image is the container image to run. Do not included
the tag, that is set in the version.
Expand Down
1 change: 0 additions & 1 deletion containers/agentless/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
# LICENSE END
#


ARG BUSYBOX_TAG=1.36.1

FROM busybox:${BUSYBOX_TAG}
Expand Down
7 changes: 7 additions & 0 deletions containers/agentless/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@
# LICENSE END
#

# Handle SIGTERM gracefully
cleanup() {
echo "Received SIGTERM signal, shutting down gracefully..."
sleep 3
exit 0
}
trap cleanup SIGTERM

SLEEP_LEN=${SLEEP_LEN:-$(($RANDOM % 5 + 5))}

Expand Down
4 changes: 4 additions & 0 deletions containers/ci.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
# LICENSE END
#

## this container is not used in github CI, was from before before we open sourced this project.
## should move to doing something like this in the github actions workflow to save time not installing all the deps all the time
## but for now this is just for when we got to that

ARG GO_VERSION

FROM golang:${GO_VERSION}-bookworm as builder
Expand Down
6 changes: 0 additions & 6 deletions demos/interrupt-wait-for-pod/non-workload.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,6 @@
# LICENSE END
#







apiVersion: apps/v1
kind: ReplicaSet
metadata:
Expand Down
Loading
Loading