Skip to content

feat(module): detect nodes with enabled /dev/kvm to schedule virt-handlers #1076

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions images/hooks/cmd/migrate-virthandler-kvm-node-labels/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
Copyright 2025 Flant JSC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// The purpose of this hook is to prevent already launched virt-handler pods from flapping, since the node group configuration virtualization-detect-kvm.sh will be responsible for installing the label virtualization.deckhouse.io/kvm-enabled.

package main

import (
"context"
"fmt"
"strings"

"github.com/deckhouse/module-sdk/pkg"
"github.com/deckhouse/module-sdk/pkg/app"
"github.com/deckhouse/module-sdk/pkg/registry"

"hooks/pkg/common"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"
)

const (
nodesSnapshot = "virthandler-nodes"
virtHandlerLabel = "kubevirt.internal.virtualization.deckhouse.io/schedulable"
virtHandlerLabelValue = "true"
kvmEnabledLabel = "virtualization.deckhouse.io/kvm-enabled"
kvmEnabledLabelValue = "true"
nodeJQFilter = ".metadata"
logMessageTemplate = "Active hypervisor node detected, setting %s label on node %s"
)

type NodeInfo struct {
Name string `json:"name"`
Labels map[string]string `json:"labels"`
}

var kvmLabelPatch = []map[string]string{
{"op": "add", "path": fmt.Sprintf("/metadata/labels/%s", jsonPatchEscape(kvmEnabledLabel)), "value": kvmEnabledLabelValue},
}

var _ = registry.RegisterFunc(config, handler)

var config = &pkg.HookConfig{
OnBeforeHelm: &pkg.OrderedConfig{Order: 5},
Kubernetes: []pkg.KubernetesConfig{
{
Name: nodesSnapshot,
APIVersion: "v1",
Kind: "Node",
JqFilter: nodeJQFilter,
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
virtHandlerLabel: virtHandlerLabelValue,
},
},
ExecuteHookOnSynchronization: ptr.To(false),
ExecuteHookOnEvents: ptr.To(false),
},
},

Queue: fmt.Sprintf("modules/%s", common.MODULE_NAME),
}

func handler(_ context.Context, input *pkg.HookInput) error {
nodes := input.Snapshots.Get(nodesSnapshot)
if len(nodes) == 0 {
return nil
}

for _, node := range nodes {
nodeInfo := &NodeInfo{}
if err := node.UnmarshalTo(nodeInfo); err != nil {
input.Logger.Error(fmt.Sprintf("Failed to unmarshal node metadata %v", err))
continue
}

if _, ok := nodeInfo.Labels[kvmEnabledLabel]; !ok {
input.PatchCollector.PatchWithJSON(kvmLabelPatch, "v1", "Node", "", nodeInfo.Name)
input.Logger.Info(fmt.Sprintf(logMessageTemplate, kvmEnabledLabel, nodeInfo.Name))
}
}
return nil
}

func jsonPatchEscape(s string) string {
s = strings.ReplaceAll(s, "~", "~0")
s = strings.ReplaceAll(s, "/", "~1")
return s
}

func main() {
app.Run()
}
150 changes: 150 additions & 0 deletions images/hooks/cmd/migrate-virthandler-kvm-node-labels/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
Copyright 2025 Flant JSC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"bytes"
"context"
"fmt"
"testing"
"time"

"github.com/deckhouse/deckhouse/pkg/log"
"github.com/deckhouse/module-sdk/pkg"
"github.com/deckhouse/module-sdk/testing/mock"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

func createSnapshotMock(nodeInfo NodeInfo) pkg.Snapshot {
m := mock.NewSnapshotMock(GinkgoT())
m.UnmarshalToMock.Set(func(v any) error {
target, ok := v.(*NodeInfo)
if !ok {
return fmt.Errorf("expected *NodeInfo, got %T", v)
}
*target = nodeInfo
return nil
})
return m
}

func TestMigratevirtHandlerKVMLabels(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Migrate virtHandler KVM labels Suite")
}

var _ = Describe("Migrate virtHandler KVM labels", func() {
var (
snapshots *mock.SnapshotsMock
values *mock.PatchableValuesCollectorMock
patchCollector *mock.PatchCollectorMock
input *pkg.HookInput
buf *bytes.Buffer
)

BeforeEach(func() {
snapshots = mock.NewSnapshotsMock(GinkgoT())
values = mock.NewPatchableValuesCollectorMock(GinkgoT())
patchCollector = mock.NewPatchCollectorMock(GinkgoT())

buf = bytes.NewBuffer([]byte{})

input = &pkg.HookInput{
Values: values,
Snapshots: snapshots,
Logger: log.NewLogger(log.Options{
Level: log.LevelDebug.Level(),
Output: buf,
TimeFunc: func(_ time.Time) time.Time {
parsedTime, err := time.Parse(time.DateTime, "2006-01-02 15:04:05")
Expect(err).ShouldNot(HaveOccurred())
return parsedTime
},
}),
PatchCollector: patchCollector,
}
})

Context("Empty cluster", func() {
It("Hook must execute successfully", func() {
snapshots.GetMock.When(nodesSnapshot).Then(
[]pkg.Snapshot{},
)
err := handler(context.Background(), input)
Expect(err).ShouldNot(HaveOccurred())
})
})

Context("Four nodes but only two should be patched.", func() {
It("Hook must execute successfully", func() {
expectedNodes := map[string]struct{}{
"node1": struct{}{},
"node4": struct{}{},
}

snapshots.GetMock.When(nodesSnapshot).Then([]pkg.Snapshot{
// should be patched
createSnapshotMock(NodeInfo{
Name: "node1",
Labels: map[string]string{
"kubevirt.internal.virtualization.deckhouse.io/schedulable": "true",
},
}),
// should not be patched
createSnapshotMock(NodeInfo{
Name: "node2",
Labels: map[string]string{
"kubevirt.internal.virtualization.deckhouse.io/schedulable": "true",
"virtualization.deckhouse.io/kvm-enabled": "true",
},
}),
// should not be patched
createSnapshotMock(NodeInfo{
Name: "node3",
Labels: map[string]string{
"kubevirt.internal.virtualization.deckhouse.io/schedulable": "true",
"virtualization.deckhouse.io/kvm-enabled": "false",
},
}),
// should be patched
createSnapshotMock(NodeInfo{
Name: "node4",
Labels: map[string]string{
"kubevirt.internal.virtualization.deckhouse.io/schedulable": "true",
},
}),
})

patchCollector.PatchWithJSONMock.Set(func(patch any, apiVersion, kind, namespace, name string, opts ...pkg.PatchCollectorOption) {
p, ok := patch.([]map[string]string)
Expect(ok).To(BeTrue())
Expect(expectedNodes).To(HaveKey(name))
Expect(p).To(BeEquivalentTo(kvmLabelPatch))
delete(expectedNodes, name)
})

err := handler(context.Background(), input)
Expect(err).ShouldNot(HaveOccurred())

Expect(buf.String()).To(ContainSubstring(fmt.Sprintf(logMessageTemplate, kvmEnabledLabel, "node1")))
Expect(buf.String()).To(ContainSubstring(fmt.Sprintf(logMessageTemplate, kvmEnabledLabel, "node4")))

Expect(expectedNodes).To(HaveLen(0))
})
})
})
1 change: 1 addition & 0 deletions images/hooks/werf.inc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ shell:
- go build -ldflags="-s -w" -o /hooks/generate-secret-for-dvcr ./cmd/generate-secret-for-dvcr
- go build -ldflags="-s -w" -o /hooks/discovery-clusterip-service-for-dvcr ./cmd/discovery-clusterip-service-for-dvcr
- go build -ldflags="-s -w" -o /hooks/discovery-workload-nodes ./cmd/discovery-workload-nodes
- go build -ldflags="-s -w" -o /hooks/migrate-virthandler-kvm-node-labels ./cmd/migrate-virthandler-kvm-node-labels
13 changes: 13 additions & 0 deletions templates/kubevirt/_kubevirt_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,16 @@ spec:
{{- define "kubevirt.virthandler_nodeaffinity_strategic_patch_json" -}}
'{{ include "kubevirt.virthandler_nodeaffinity_strategic_patch" . | fromYaml | toJson }}'
{{- end }}

{{- define "kubevirt.virthandler_nodeseletor_strategic_patch" -}}
{{- $defaultLabels := dict "kubernetes.io/os" "linux" "virtualization.deckhouse.io/kvm-enabled" "true" -}}
spec:
template:
spec:
nodeSelector:
{{ $defaultLabels | toYaml | nindent 8 }}
{{- end -}}

{{- define "kubevirt.virthandler_nodeseletor_strategic_patch_json" -}}
'{{ include "kubevirt.virthandler_nodeseletor_strategic_patch" . | fromYaml | toJson }}'
{{- end }}
7 changes: 6 additions & 1 deletion templates/kubevirt/kubevirt.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,16 @@ spec:
resourceName: virt-handler
patch: '[{"op":"replace","path":"/spec/template/spec/tolerations","value":{{ $tolerationsAnyNode }}}]'
type: json
# Add node affinity for the lowest virtualization level
# Add node affinity for the lowest virtualization level for virt-handler pods
- resourceType: DaemonSet
resourceName: virt-handler
patch: {{ include "kubevirt.virthandler_nodeaffinity_strategic_patch_json" $dvpNestingLevel }}
type: strategic
# Run virt-handler pods on required nodes
- resourceType: DaemonSet
resourceName: virt-handler
patch: {{ include "kubevirt.virthandler_nodeseletor_strategic_patch_json" . }}
type: strategic
{{- if (include "helm_lib_ha_enabled" .) }}
# HA settings for deploy/virt-api.
- resourceType: Deployment
Expand Down
56 changes: 56 additions & 0 deletions templates/nodegroupconfiguration-detect-kvm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
apiVersion: deckhouse.io/v1alpha1
kind: NodeGroupConfiguration
metadata:
name: virt-detect-kvm.sh
{{- include "helm_lib_module_labels" (list .) | nindent 2 }}
spec:
weight: 99
nodeGroups: ["*"]
bundles: ["*"]
content: |
# Copyright 2025 Flant JSC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

kvm_enabled_label='virtualization.deckhouse.io/kvm-enabled=true'
kvm_disabled_label='virtualization.deckhouse.io/kvm-enabled-'

label=$kvm_enabled_label

if ! grep -qE 'vmx|svm' /proc/cpuinfo; then
echo "CPU virtualization flags (vmx/svm) not found."
label=$kvm_disabled_label
fi

if [[ ! -c "/dev/kvm" ]]; then
echo "Device /dev/kvm not found."
label=$kvm_disabled_label
fi

kubeconfig="/etc/kubernetes/kubelet.conf"
if [ ! -f "$kubeconfig" ]; then
exit 0
fi

node=$(bb-d8-node-name)

max_attempts=3
until bb-kubectl --kubeconfig $kubeconfig label --overwrite=true node "$node" "$label"; do
attempt=$(( attempt + 1 ))
if [ "$attempt" -gt "$max_attempts" ]; then
bb-log-error "failed to label node $node with label $label after $max_attempts attempts"
exit 1
fi
echo "Waiting for label node $node with label $label (attempt $attempt of $max_attempts)..."
sleep 5
done
Loading