Skip to content

Commit c3c74ea

Browse files
committed
chore: handle correctly the kata enabled
1 parent c652dea commit c3c74ea

File tree

3 files changed

+23
-13
lines changed
  • distros/kubernetes/nvsentinel/charts/syslog-health-monitor/templates
  • health-monitors/syslog-health-monitor
  • labeler-module/pkg/labeler

3 files changed

+23
-13
lines changed

distros/kubernetes/nvsentinel/charts/syslog-health-monitor/templates/configmap.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ data:
3333
# 1. Kata detection happens at the deployment level via node labels
3434
# 2. Each DaemonSet variant (regular/kata) handles journal access differently:
3535
# - Regular: mounts /var/log (file-based logs)
36-
# - Kata: mounts /run/log/journal and /var/log/journal (systemd journal)
36+
# - Kata: mounts /run/systemd/journal and /var/log/journal (systemd journal)
3737
# 3. The kata variant accesses the systemd journal directly, so it can read
3838
# containerd logs without needing special tags or filters in the ConfigMap
3939
log_check_definitions.yaml: |

health-monitors/syslog-health-monitor/main.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727

2828
"github.com/nvidia/nvsentinel/commons/pkg/logger"
2929
"github.com/nvidia/nvsentinel/commons/pkg/server"
30+
"github.com/nvidia/nvsentinel/commons/pkg/stringutil"
3031
pb "github.com/nvidia/nvsentinel/data-models/pkg/protos"
3132
fd "github.com/nvidia/nvsentinel/health-monitors/syslog-health-monitor/pkg/syslog-monitor"
3233
"golang.org/x/sync/errgroup"
@@ -131,14 +132,27 @@ func run() error {
131132
return fmt.Errorf("no checks defined in the config file")
132133
}
133134

134-
// Add kata-specific journal tags if running in Kata mode
135-
if *kataEnabled == "true" {
136-
slog.Info("Kata mode enabled, adding containerd service filter to journal checks")
135+
// Handle kata-specific configuration
136+
if stringutil.IsTruthyValue(*kataEnabled) {
137+
slog.Info("Kata mode enabled, adding containerd service filter and removing SysLogsSXIDError check")
137138

139+
// Add containerd service filter to all checks for kata nodes
138140
for i := range config.Checks {
139-
// Add "-u containerd.service" tag to filter for containerd logs in systemd journal
140-
config.Checks[i].Tags = append(config.Checks[i].Tags, "-u", "containerd.service")
141+
if config.Checks[i].Tags == nil {
142+
config.Checks[i].Tags = []string{"-u", "containerd.service"}
143+
} else {
144+
config.Checks[i].Tags = append(config.Checks[i].Tags, "-u", "containerd.service")
145+
}
146+
}
147+
148+
// Remove SysLogsSXIDError check for kata nodes (not supported in kata environment)
149+
filteredChecks := make([]fd.CheckDefinition, 0, len(config.Checks))
150+
for _, check := range config.Checks {
151+
if check.Name != "SysLogsSXIDError" {
152+
filteredChecks = append(filteredChecks, check)
153+
}
141154
}
155+
config.Checks = filteredChecks
142156
}
143157

144158
slog.Info("Creating syslog monitor", "checksCount", len(config.Checks))

labeler-module/pkg/labeler/labeler.go

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ func (l *Labeler) getKataLabelForNode(node *v1.Node) string {
309309

310310
// isKataEnabled checks if a node has Kata Containers enabled by examining node labels.
311311
// Checks the configured kata labels (either custom override or default) for truthy values.
312+
// Returns true if ANY of the configured labels has a truthy value (OR logic).
312313
// Truthy values are: "true", "enabled", "1", "yes" (case-insensitive).
313314
func isKataEnabled(node *v1.Node, kataLabels []string) bool {
314315
for _, label := range kataLabels {
@@ -475,13 +476,8 @@ func (l *Labeler) updateKataLabel(nodeName, expectedKataLabel string) error {
475476
return nil
476477
}
477478

478-
if expectedKataLabel == "" {
479-
delete(node.Labels, KataEnabledLabel)
480-
slog.Info("Removing Kata enabled label from node", "node", nodeName)
481-
} else {
482-
node.Labels[KataEnabledLabel] = expectedKataLabel
483-
slog.Info("Setting Kata enabled label on node", "node", nodeName, "kata", expectedKataLabel)
484-
}
479+
node.Labels[KataEnabledLabel] = expectedKataLabel
480+
slog.Info("Setting Kata enabled label on node", "node", nodeName, "kata", expectedKataLabel)
485481

486482
_, err = l.clientset.CoreV1().Nodes().Update(l.ctx, node, metav1.UpdateOptions{})
487483

0 commit comments

Comments
 (0)