Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,14 @@ metadata:
kind: Skyhook
name: config-skyhook
data:
(length(@)): 2
(length(@)): 3
labels.json:
(contains(@, 'skyhook.nvidia.com/test-node')): true
(contains(@, 'skyhook.nvidia.com/status_config-skyhook')): true
annotations.json:
(contains(@, 'skyhook.nvidia.com/status_config-skyhook')): true
(contains(@, 'skyhook.nvidia.com/nodeState_config-skyhook')): true
packages.json:
(contains(@, '"agentVersion"')): true
(contains(@, '"dexter"')): true
(contains(@, '"3.2.3"')): true
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,14 @@ metadata:
kind: Skyhook
name: config-skyhook
data:
(length(@)): 2
(length(@)): 3
labels.json:
(contains(@, 'skyhook.nvidia.com/test-node')): true
(contains(@, 'skyhook.nvidia.com/status_config-skyhook')): true
annotations.json:
(contains(@, 'skyhook.nvidia.com/status_config-skyhook')): true
(contains(@, 'skyhook.nvidia.com/nodeState_config-skyhook')): true
packages.json:
(contains(@, '"agentVersion"')): true
(contains(@, '"dexter"')): true
(contains(@, '"3.2.3"')): true
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,16 @@ metadata:
kind: Skyhook
name: config-skyhook
data:
(length(@)): 2
(length(@)): 3
labels.json:
(contains(@, 'skyhook.nvidia.com/test-node')): true
(contains(@, 'skyhook.nvidia.com/status_config-skyhook')): true
annotations.json:
(contains(@, 'skyhook.nvidia.com/status_config-skyhook')): true
(contains(@, 'skyhook.nvidia.com/nodeState_config-skyhook')): true
packages.json:
(contains(@, '"agentVersion"')): true
(contains(@, '"dexter"')): true
(contains(@, '"baxter"')): true
(contains(@, '"spencer"')): true
(contains(@, '"3.2.3"')): true
8 changes: 7 additions & 1 deletion k8s-tests/chainsaw/skyhook/config-skyhook/assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,16 @@ metadata:
kind: Skyhook
name: config-skyhook
data:
(length(@)): 2
(length(@)): 3
labels.json:
(contains(@, 'skyhook.nvidia.com/test-node')): true
(contains(@, 'skyhook.nvidia.com/status_config-skyhook')): true
annotations.json:
(contains(@, 'skyhook.nvidia.com/status_config-skyhook')): true
(contains(@, 'skyhook.nvidia.com/nodeState_config-skyhook')): true
packages.json:
(contains(@, '"agentVersion"')): true
(contains(@, '"dexter"')): true
(contains(@, '"baxter"')): true
(contains(@, '"spencer"')): true
(contains(@, '"3.2.3"')): true
6 changes: 5 additions & 1 deletion k8s-tests/chainsaw/skyhook/delete-skyhook/assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,14 @@ metadata:
kind: Skyhook
name: delete-skyhook
data:
(length(@)): 2
(length(@)): 3
labels.json:
(contains(@, 'skyhook.nvidia.com/test-node')): true
(contains(@, 'skyhook.nvidia.com/status_delete-skyhook')): true
annotations.json:
(contains(@, 'skyhook.nvidia.com/status_delete-skyhook')): true
(contains(@, 'skyhook.nvidia.com/nodeState_delete-skyhook')): true
packages.json:
(contains(@, '"agentVersion"')): true
(contains(@, '"dexter"')): true
(contains(@, '"3.2.3"')): true
5 changes: 4 additions & 1 deletion k8s-tests/chainsaw/skyhook/interrupt-grouping/assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,13 @@ metadata:
kind: Skyhook
name: interrupt-grouping
data:
(length(@)): 2
(length(@)): 3
labels.json:
(contains(@, 'skyhook.nvidia.com/test-node')): true
(contains(@, 'skyhook.nvidia.com/status_interrupt-grouping')): true
annotations.json:
(contains(@, 'skyhook.nvidia.com/status_interrupt-grouping')): true
(contains(@, 'skyhook.nvidia.com/nodeState_interrupt-grouping')): true
packages.json:
(contains(@, '"agentVersion"')): true
(contains(@, '"dax"')): true
5 changes: 4 additions & 1 deletion k8s-tests/chainsaw/skyhook/interrupt/assert-b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,13 @@ metadata:
kind: Skyhook
name: interrupt
data:
(length(@)): 2
(length(@)): 3
labels.json:
(contains(@, 'skyhook.nvidia.com/test-node')): true
(contains(@, 'skyhook.nvidia.com/status_interrupt')): true
annotations.json:
(contains(@, 'skyhook.nvidia.com/status_interrupt')): true
(contains(@, 'skyhook.nvidia.com/nodeState_interrupt')): true
packages.json:
(contains(@, '"agentVersion"')): true
(contains(@, '"dexter"')): true
5 changes: 4 additions & 1 deletion k8s-tests/chainsaw/skyhook/simple-skyhook/assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,13 @@ metadata:
kind: Skyhook
name: simple-skyhook
data:
(length(@)): 2
(length(@)): 3
labels.json:
(contains(@, 'skyhook.nvidia.com/test-node')): true
(contains(@, 'skyhook.nvidia.com/status_simple-skyhook')): true
annotations.json:
(contains(@, 'skyhook.nvidia.com/status_simple-skyhook')): true
(contains(@, 'skyhook.nvidia.com/nodeState_simple-skyhook')): true
packages.json:
(contains(@, '"agentVersion"')): true
(contains(@, '"dexter"')): true
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,14 @@ metadata:
kind: Skyhook
name: simple-update-skyhook
data:
(length(@)): 2
(length(@)): 3
labels.json:
(contains(@, 'skyhook.nvidia.com/test-node')): true
(contains(@, 'skyhook.nvidia.com/status_simple-update-skyhook')): true
annotations.json:
(contains(@, 'skyhook.nvidia.com/status_simple-update-skyhook')): true
(contains(@, 'skyhook.nvidia.com/nodeState_simple-update-skyhook')): true
packages.json:
(contains(@, '"agentVersion"')): true
(contains(@, '"baxter"')): true
(contains(@, '"2.3.1-test"')): true
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,14 @@ metadata:
kind: Skyhook
name: simple-update-skyhook
data:
(length(@)): 2
(length(@)): 3
labels.json:
(contains(@, 'skyhook.nvidia.com/test-node')): true
(contains(@, 'skyhook.nvidia.com/status_simple-update-skyhook')): true
annotations.json:
(contains(@, 'skyhook.nvidia.com/status_simple-update-skyhook')): true
(contains(@, 'skyhook.nvidia.com/nodeState_simple-update-skyhook')): true
packages.json:
(contains(@, '"agentVersion"')): true
(contains(@, '"dexter"')): true
(contains(@, '"1.2.3"')): true
51 changes: 48 additions & 3 deletions operator/internal/controller/skyhook_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ func (o *SkyhookOperatorOptions) Validate() error {
return nil
}

// AgentVersion returns the image tag portion of AgentImage
func (o *SkyhookOperatorOptions) AgentVersion() string {
parts := strings.Split(o.AgentImage, ":")
return parts[len(parts)-1]
}

func (o *SkyhookOperatorOptions) GetRuntimeRequiredTaint() corev1.Taint {
to_add, _, _ := taints.ParseTaints([]string{o.RuntimeRequiredTaint})
return to_add[0]
Expand Down Expand Up @@ -619,9 +625,9 @@ func (r *SkyhookReconciler) SaveNodesAndSkyhook(ctx context.Context, clusterStat
}
saved = true

err = r.UpsertNodeLabelsAnnotations(ctx, skyhook.GetSkyhook(), node.GetNode())
err = r.UpsertNodeLabelsAnnotationsPackages(ctx, skyhook.GetSkyhook(), node.GetNode())
if err != nil {
errs = append(errs, fmt.Errorf("error upserting labels and annotations config map for node [%s]: %w", node.GetNode().Name, err))
errs = append(errs, fmt.Errorf("error upserting labels, annotations, and packages config map for node [%s]: %w", node.GetNode().Name, err))
}

if node.IsComplete() {
Expand Down Expand Up @@ -792,7 +798,7 @@ func generateSafeName(maxLen int, nameParts ...string) string {
return strings.ToLower(fmt.Sprintf("%s-%s", name, uniqueStr))
}

func (r *SkyhookReconciler) UpsertNodeLabelsAnnotations(ctx context.Context, skyhook *wrapper.Skyhook, node *corev1.Node) error {
func (r *SkyhookReconciler) UpsertNodeLabelsAnnotationsPackages(ctx context.Context, skyhook *wrapper.Skyhook, node *corev1.Node) error {
// No work to do if there is no labels or annotations for node
if len(node.Labels) == 0 && len(node.Annotations) == 0 {
return nil
Expand All @@ -808,6 +814,13 @@ func (r *SkyhookReconciler) UpsertNodeLabelsAnnotations(ctx context.Context, sky
return fmt.Errorf("error converting labels into byte array: %w", err)
}

// marshal intermediary package metadata for the agent
metadata := NewSkyhookMetadata(r.opts, skyhook)
packages, err := metadata.Marshal()
if err != nil {
return fmt.Errorf("error converting packages into byte array: %w", err)
}

configMapName := generateSafeName(253, skyhook.Name, node.Name, "metadata")
newCM := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -824,6 +837,7 @@ func (r *SkyhookReconciler) UpsertNodeLabelsAnnotations(ctx context.Context, sky
Data: map[string]string{
"annotations.json": string(annotations),
"labels.json": string(labels),
"packages.json": string(packages),
},
}

Expand Down Expand Up @@ -1401,6 +1415,37 @@ func (r *SkyhookReconciler) ValidateNodeConfigmaps(ctx context.Context, skyhookN
}
}

// Ensure packages.json is present and up-to-date for expected configmaps
skyhookCR, err := r.dal.GetSkyhook(ctx, skyhookName)
if err != nil {
return update, fmt.Errorf("error getting skyhook for metadata validation: %w", err)
}
skyhookWrapper := wrapper.NewSkyhookWrapper(skyhookCR)
metadata := NewSkyhookMetadata(r.opts, skyhookWrapper)
expectedBytes, err := metadata.Marshal()
if err != nil {
return update, fmt.Errorf("error marshalling metadata for validation: %w", err)
}
expected := string(expectedBytes)

for i := range list.Items {
cm := &list.Items[i]
if _, ok := shouldExist[cm.Name]; !ok {
continue
}
if cm.Data == nil {
cm.Data = make(map[string]string)
}
if cm.Data["packages.json"] != expected {
cm.Data["packages.json"] = expected
if err := r.Update(ctx, cm); err != nil {
errs = append(errs, fmt.Errorf("error updating packages.json on config map [%s]: %w", cm.Name, err))
} else {
update = true
}
}
}

return update, utilerrors.NewAggregate(errs)
}

Expand Down
43 changes: 43 additions & 0 deletions operator/internal/controller/skyhook_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package controller

import (
"context"
"encoding/json"
"fmt"
"testing"
"time"
Expand Down Expand Up @@ -1100,6 +1101,48 @@ var _ = Describe("skyhook controller tests", func() {
Expect(result).To(MatchRegexp(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`), "configmap name should match kubernetes naming requirements")
}
})

It("should create metadata configmap with packages.json including agentVersion and packages", func() {
// build minimal skyhook and node
skyhookCR := &v1alpha1.Skyhook{
ObjectMeta: metav1.ObjectMeta{
Name: "skyhook-meta",
UID: "uid-1234",
},
Spec: v1alpha1.SkyhookSpec{
Packages: v1alpha1.Packages{
"pkg1": {
PackageRef: v1alpha1.PackageRef{Name: "pkg1", Version: "1.0.0"},
Image: "ghcr.io/org/pkg1",
},
},
},
}
sw := wrapper.NewSkyhookWrapper(skyhookCR)

node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node-a", Labels: map[string]string{"a": "b"}}}

// use initialized reconciler
r := operator

// upsert configmap
Expect(r.UpsertNodeLabelsAnnotationsPackages(ctx, sw, node)).To(Succeed())

// fetch configmap
cmName := generateSafeName(253, sw.Name, node.Name, "metadata")
var cm corev1.ConfigMap
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cmName, Namespace: opts.Namespace}, &cm)).To(Succeed())

// validate packages.json exists and has expected agentVersion and packages
Expect(cm.Data).To(HaveKey("packages.json"))
var meta struct {
AgentVersion string `json:"agentVersion"`
Packages map[string]any `json:"packages"`
}
Expect(json.Unmarshal([]byte(cm.Data["packages.json"]), &meta)).To(Succeed())
Expect(meta.AgentVersion).To(Equal(opts.AgentVersion()))
Expect(meta.Packages).To(HaveKey("pkg1"))
})
})

var _ = Describe("Resource Comparison", func() {
Expand Down
67 changes: 67 additions & 0 deletions operator/internal/controller/skyhook_metadata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package controller

import (
"encoding/json"

"github.com/NVIDIA/skyhook/operator/api/v1alpha1"
"github.com/NVIDIA/skyhook/operator/internal/wrapper"
)

// PackageMetadata defines the intermediary contract for a single package that the agent can consume
type PackageMetadata struct {
Name string `json:"name"`
Version string `json:"version"`
Image string `json:"image"`
AgentImageOverride string `json:"agentImageOverride,omitempty"`
Interrupt *v1alpha1.Interrupt `json:"interrupt,omitempty"`
ConfigInterrupts map[string]v1alpha1.Interrupt `json:"configInterrupts,omitempty"`
}

// SkyhookMetadata defines the node metadata contract exposed to the agent
type SkyhookMetadata struct {
AgentVersion string `json:"agentVersion"`
Packages map[string]PackageMetadata `json:"packages"`
}

// NewSkyhookMetadata builds the intermediary SkyhookMetadata from the CR spec and operator options
func NewSkyhookMetadata(opts SkyhookOperatorOptions, s *wrapper.Skyhook) SkyhookMetadata {
packages := make(map[string]PackageMetadata)
for name, p := range s.Spec.Packages {
packages[name] = PackageMetadata{
Name: p.Name,
Version: p.Version,
Image: p.Image,
AgentImageOverride: p.AgentImageOverride,
Interrupt: p.Interrupt,
ConfigInterrupts: p.ConfigInterrupts,
}
}

return SkyhookMetadata{
AgentVersion: opts.AgentVersion(),
Packages: packages,
}
}

// Marshal returns the JSON encoding for inclusion in the node metadata configmap
func (m SkyhookMetadata) Marshal() ([]byte, error) {
return json.Marshal(m)
}
Loading