diff --git a/cmd/agent/dist/conf.d/lock_contention.d/conf.yaml.example b/cmd/agent/dist/conf.d/lock_contention.d/conf.yaml.example new file mode 100644 index 00000000000000..fa8c8147277fba --- /dev/null +++ b/cmd/agent/dist/conf.d/lock_contention.d/conf.yaml.example @@ -0,0 +1,14 @@ +init_config: + +instances: + + - + + ## @param tags - list of strings following the pattern: "key:value" - optional + ## List of tags to attach to every metric, event, and service check emitted by this integration. + ## + ## Learn more about tagging: https://docs.datadoghq.com/tagging/ + # + # tags: + # - : + # - : diff --git a/cmd/agent/dist/conf.d/pressure.d/conf.yaml.default b/cmd/agent/dist/conf.d/pressure.d/conf.yaml.default new file mode 100644 index 00000000000000..f284c18477b0c7 --- /dev/null +++ b/cmd/agent/dist/conf.d/pressure.d/conf.yaml.default @@ -0,0 +1,2 @@ +instances: +- {} diff --git a/cmd/agent/dist/conf.d/syscall_latency.d/conf.yaml.default b/cmd/agent/dist/conf.d/syscall_latency.d/conf.yaml.default new file mode 100644 index 00000000000000..00d9a2dbba2c83 --- /dev/null +++ b/cmd/agent/dist/conf.d/syscall_latency.d/conf.yaml.default @@ -0,0 +1,2 @@ +instances: + - {} diff --git a/cmd/system-probe/modules/lock_contention_check.go b/cmd/system-probe/modules/lock_contention_check.go new file mode 100644 index 00000000000000..7775bdf62fedcf --- /dev/null +++ b/cmd/system-probe/modules/lock_contention_check.go @@ -0,0 +1,69 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +//go:build linux && linux_bpf + +package modules + +import ( + "fmt" + "net/http" + "sync/atomic" + "time" + + "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck" + "github.com/DataDog/datadog-agent/pkg/ebpf" + "github.com/DataDog/datadog-agent/pkg/system-probe/api/module" + "github.com/DataDog/datadog-agent/pkg/system-probe/config" + sysconfigtypes "github.com/DataDog/datadog-agent/pkg/system-probe/config/types" + "github.com/DataDog/datadog-agent/pkg/system-probe/utils" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +func init() { registerModule(LockContentionCheck) } + +// LockContentionCheck Factory +var LockContentionCheck = &module.Factory{ + Name: config.LockContentionCheckModule, + Fn: func(_ *sysconfigtypes.Config, _ module.FactoryDependencies) (module.Module, error) { + log.Infof("Starting the lock contention check module") + p, err := lockcontentioncheck.NewProbe(ebpf.NewConfig()) + if err != nil { + return nil, fmt.Errorf("unable to start the lock contention check probe: %w", err) + } + return &lockContentionCheckModule{ + Probe: p, + lastCheck: &atomic.Int64{}, + }, nil + }, + NeedsEBPF: func() bool { + return true + }, +} + +var _ module.Module = &lockContentionCheckModule{} + +type lockContentionCheckModule struct { + *lockcontentioncheck.Probe + lastCheck *atomic.Int64 +} + +// GetStats implements module.Module.GetStats +func (m lockContentionCheckModule) GetStats() map[string]interface{} { + return map[string]interface{}{ + "last_check": m.lastCheck.Load(), + } +} + +// Register implements module.Module.Register +func (m lockContentionCheckModule) Register(httpMux *module.Router) error { + httpMux.HandleFunc("/check", utils.WithConcurrencyLimit(1, func(w http.ResponseWriter, req *http.Request) { + m.lastCheck.Store(time.Now().Unix()) + stats := m.Probe.GetAndFlush() + utils.WriteAsJSON(req, w, stats, utils.GetPrettyPrintFromQueryParams(req)) + })) + + return nil +} diff --git a/cmd/system-probe/modules/modules.go b/cmd/system-probe/modules/modules.go index b2c4ef92a0cf02..89c83bc5f12289 100644 --- a/cmd/system-probe/modules/modules.go +++ b/cmd/system-probe/modules/modules.go @@ -36,6 +36,8 @@ var moduleOrder = []types.ModuleName{ config.InjectorModule, config.NoisyNeighborModule, config.LogonDurationModule, + config.LockContentionCheckModule, + config.SyscallLatencyCheckModule, } // nolint: deadcode, unused // may be unused with certain build tag combinations diff --git a/cmd/system-probe/modules/syscall_latency_check.go b/cmd/system-probe/modules/syscall_latency_check.go new file mode 100644 index 00000000000000..34afa3574af7a9 --- /dev/null +++ b/cmd/system-probe/modules/syscall_latency_check.go @@ -0,0 +1,68 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +//go:build linux && linux_bpf + +package modules + +import ( + "fmt" + "net/http" + "sync/atomic" + "time" + + "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/probe/syscalllatency" + "github.com/DataDog/datadog-agent/pkg/ebpf" + "github.com/DataDog/datadog-agent/pkg/system-probe/api/module" + "github.com/DataDog/datadog-agent/pkg/system-probe/config" + sysconfigtypes "github.com/DataDog/datadog-agent/pkg/system-probe/config/types" + "github.com/DataDog/datadog-agent/pkg/system-probe/utils" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +func init() { registerModule(SyscallLatencyCheck) } + +// SyscallLatencyCheck factory +var SyscallLatencyCheck = &module.Factory{ + Name: config.SyscallLatencyCheckModule, + Fn: func(_ *sysconfigtypes.Config, _ module.FactoryDependencies) (module.Module, error) { + log.Infof("Starting the syscall latency check module") + p, err := syscalllatency.NewProbe(ebpf.NewConfig()) + if err != nil { + return nil, fmt.Errorf("unable to start the syscall latency probe: %w", err) + } + return &syscallLatencyModule{ + Probe: p, + lastCheck: &atomic.Int64{}, + }, nil + }, + NeedsEBPF: func() bool { + return true + }, +} + +var _ module.Module = &syscallLatencyModule{} + +type syscallLatencyModule struct { + *syscalllatency.Probe + lastCheck *atomic.Int64 +} + +// GetStats implements module.Module. +func (m *syscallLatencyModule) GetStats() map[string]interface{} { + return map[string]interface{}{ + "last_check": m.lastCheck.Load(), + } +} + +// Register implements module.Module. +func (m *syscallLatencyModule) Register(httpMux *module.Router) error { + httpMux.HandleFunc("/check", utils.WithConcurrencyLimit(1, func(w http.ResponseWriter, req *http.Request) { + m.lastCheck.Store(time.Now().Unix()) + stats := m.Probe.GetAndFlush() + utils.WriteAsJSON(req, w, stats, utils.GetPrettyPrintFromQueryParams(req)) + })) + return nil +} diff --git a/go.mod b/go.mod index 4234a198088835..9409d400acf884 100644 --- a/go.mod +++ b/go.mod @@ -914,6 +914,8 @@ require ( go.opentelemetry.io/proto/otlp v1.10.0 // indirect go.uber.org/goleak v1.3.0 go.uber.org/zap/exp v0.3.0 + go.yaml.in/yaml/v2 v2.4.3 + go.yaml.in/yaml/v3 v3.0.4 // indirect go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect golang.org/x/exp/typeparams v0.0.0-20251125195548-87e1e737ad39 // indirect golang.org/x/lint v0.0.0-20241112194109-818c5a804067 // indirect diff --git a/pkg/collector/corechecks/ebpf/c/runtime/lock-contention-check-kern-user.h b/pkg/collector/corechecks/ebpf/c/runtime/lock-contention-check-kern-user.h new file mode 100644 index 00000000000000..484ffc2aec58a0 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/c/runtime/lock-contention-check-kern-user.h @@ -0,0 +1,29 @@ +#ifndef __LOCK_CONTENTION_CHECK_KERN_USER_H +#define __LOCK_CONTENTION_CHECK_KERN_USER_H + +#include "ktypes.h" + +// lock_type_key_t classifies kernel lock types derived from LCB_F_* flags +// in the contention_begin tracepoint. +typedef enum { + LOCK_TYPE_SPINLOCK = 0, + LOCK_TYPE_MUTEX = 1, + LOCK_TYPE_RWSEM_READ = 2, + LOCK_TYPE_RWSEM_WRITE = 3, + LOCK_TYPE_RWLOCK_READ = 4, + LOCK_TYPE_RWLOCK_WRITE = 5, + LOCK_TYPE_RT_MUTEX = 6, + LOCK_TYPE_PCPU_SPINLOCK = 7, + LOCK_TYPE_OTHER = 8, + LOCK_TYPE_MAX = 9, +} lock_type_key_t; + +// lock_contention_stats_t holds aggregated contention statistics per lock type. +// Stored in a per-CPU array map indexed by lock_type_key_t. +typedef struct { + __u64 total_time_ns; // cumulative nanoseconds spent waiting + __u64 count; // number of contention events + __u64 max_time_ns; // max single-event wait time (reset per flush interval) +} lock_contention_stats_t; + +#endif diff --git a/pkg/collector/corechecks/ebpf/c/runtime/lock-contention-check-kern.c b/pkg/collector/corechecks/ebpf/c/runtime/lock-contention-check-kern.c new file mode 100644 index 00000000000000..b1582ec082a416 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/c/runtime/lock-contention-check-kern.c @@ -0,0 +1,166 @@ +#include "vmlinux.h" +#include "bpf_helpers.h" +#include "bpf_tracing.h" +#include "map-defs.h" +#include "lock-contention-check-kern-user.h" +#include "bpf_metadata.h" +#include "bpf_telemetry.h" + +#define MAX_TSTAMP_ENTRIES 8192 + +/* Lock contention flags from include/trace/events/lock.h */ +#define LCB_F_SPIN (1U << 0) +#define LCB_F_READ (1U << 1) +#define LCB_F_WRITE (1U << 2) +#define LCB_F_RT (1U << 3) +#define LCB_F_PERCPU (1U << 4) +#define LCB_F_MUTEX (1U << 5) + +/* Per-task timestamp data stored in contention_begin, consumed in contention_end */ +struct tstamp_data { + __u64 timestamp_ns; + __u64 lock; /* lock address — non-zero means slot is occupied */ + __u32 flags; +}; + +/* Per-TID hash map for sleeping lock timestamps (mutex, rwsem, rt_mutex). + * Must NOT be per-CPU: sleeping locks can migrate between CPUs, so + * contention_end may run on a different CPU than contention_begin. */ +BPF_HASH_MAP(tstamp, __u32, struct tstamp_data, MAX_TSTAMP_ENTRIES) + +/* Per-CPU array for spinlock timestamps (one slot per CPU, preemption disabled) */ +BPF_PERCPU_ARRAY_MAP(tstamp_cpu, struct tstamp_data, 1) + +/* Per-CPU array of aggregated stats, indexed by lock_type_key_t */ +BPF_PERCPU_ARRAY_MAP(lock_contention_stats, lock_contention_stats_t, LOCK_TYPE_MAX) + +/* Classify LCB_F_* flags into lock_type_key_t */ +static __always_inline __u32 classify_lock_type(__u32 flags) { + if (flags & LCB_F_SPIN) { + if (flags & LCB_F_PERCPU) + return LOCK_TYPE_PCPU_SPINLOCK; + if (flags & LCB_F_READ) + return LOCK_TYPE_RWLOCK_READ; + if (flags & LCB_F_WRITE) + return LOCK_TYPE_RWLOCK_WRITE; + return LOCK_TYPE_SPINLOCK; + } + if (flags & LCB_F_MUTEX) + return LOCK_TYPE_MUTEX; + if (flags & LCB_F_RT) + return LOCK_TYPE_RT_MUTEX; + if (flags & LCB_F_READ) + return LOCK_TYPE_RWSEM_READ; + if (flags & LCB_F_WRITE) + return LOCK_TYPE_RWSEM_WRITE; + /* flags == 0: pre-6.2 kernels where mutex has no dedicated flag */ + if (flags == 0) + return LOCK_TYPE_MUTEX; + return LOCK_TYPE_OTHER; +} + +/* Get or create a timestamp element based on lock type. + * Spinlocks/rwlocks use per-CPU array (preemption is disabled). + * Sleeping locks use per-TID hash map. */ +static __always_inline struct tstamp_data *get_tstamp_elem(__u32 flags) { + struct tstamp_data *pelem; + + if (flags & LCB_F_SPIN) { + __u32 idx = 0; + pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx); + /* Do not overwrite for nested lock contention */ + if (pelem && pelem->lock) + return NULL; + return pelem; + } + + __u32 tid = bpf_get_current_pid_tgid(); + pelem = bpf_map_lookup_elem(&tstamp, &tid); + /* Do not overwrite for nested lock contention */ + if (pelem && pelem->lock) + return NULL; + + if (pelem == NULL) { + struct tstamp_data zero = {}; + if (bpf_map_update_elem(&tstamp, &tid, &zero, BPF_NOEXIST) < 0) + return NULL; + pelem = bpf_map_lookup_elem(&tstamp, &tid); + } + return pelem; +} + +SEC("tp_btf/contention_begin") +int tracepoint__contention_begin(u64 *ctx) +{ + __u32 flags = (__u32)ctx[1]; + struct tstamp_data *pelem; + + pelem = get_tstamp_elem(flags); + if (pelem == NULL) + return 0; + + pelem->timestamp_ns = bpf_ktime_get_ns(); + pelem->lock = ctx[0]; + pelem->flags = flags; + + return 0; +} + +SEC("tp_btf/contention_end") +int tracepoint__contention_end(u64 *ctx) +{ + struct tstamp_data *pelem; + __u32 tid = 0, idx = 0; + bool need_delete = false; + __u64 duration; + + /* + * contention_end does not carry flags, so we cannot know whether the + * lock was a spinlock or sleeping lock from the tracepoint args alone. + * + * Strategy (same as upstream perf lock contention): + * 1. Check per-CPU map first (spinlocks cannot sleep, so if there's + * an active entry it must be for this event). + * 2. If no per-CPU entry, check per-TID hash (sleeping locks). + * 3. Verify the lock address matches. + */ + pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx); + if (pelem && pelem->lock) { + if (pelem->lock != (__u64)ctx[0]) + return 0; + } else { + tid = bpf_get_current_pid_tgid(); + pelem = bpf_map_lookup_elem(&tstamp, &tid); + if (!pelem || pelem->lock != (__u64)ctx[0]) + return 0; + need_delete = true; + } + + duration = bpf_ktime_get_ns() - pelem->timestamp_ns; + if ((__s64)duration < 0) { + pelem->lock = 0; + if (need_delete) + bpf_map_delete_elem(&tstamp, &tid); + return 0; + } + + /* Classify and update stats */ + __u32 lock_type = classify_lock_type(pelem->flags); + lock_contention_stats_t *stats = bpf_map_lookup_elem(&lock_contention_stats, &lock_type); + if (stats) { + stats->total_time_ns += duration; + stats->count += 1; + /* max_time_ns: not atomic, but acceptable — worst case we miss + * an update, which is fine for a gauge that resets each interval */ + if (stats->max_time_ns < duration) + stats->max_time_ns = duration; + } + + /* Clear the timestamp slot */ + pelem->lock = 0; + if (need_delete) + bpf_map_delete_elem(&tstamp, &tid); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/pkg/collector/corechecks/ebpf/c/runtime/syscall-latency-kern-user.h b/pkg/collector/corechecks/ebpf/c/runtime/syscall-latency-kern-user.h new file mode 100644 index 00000000000000..7bc91017447e6f --- /dev/null +++ b/pkg/collector/corechecks/ebpf/c/runtime/syscall-latency-kern-user.h @@ -0,0 +1,63 @@ +#ifndef __SYSCALL_LATENCY_KERN_USER_H +#define __SYSCALL_LATENCY_KERN_USER_H + +#include "ktypes.h" + +/* + * Tracked syscall slot assignments. + * + * Syscalls are mapped to dense slots so the stats array stays small + * (SYSCALL_SLOT_MAX entries per CPU rather than 512). The mapping is + * a compile-time switch in classify_syscall(), compiled to a jump table. + */ +typedef enum { + SYSCALL_SLOT_READ = 0, + SYSCALL_SLOT_WRITE = 1, + SYSCALL_SLOT_PREAD64 = 2, + SYSCALL_SLOT_PWRITE64 = 3, + SYSCALL_SLOT_POLL = 4, + SYSCALL_SLOT_SELECT = 5, + SYSCALL_SLOT_MMAP = 6, + SYSCALL_SLOT_MUNMAP = 7, + SYSCALL_SLOT_CONNECT = 8, + SYSCALL_SLOT_ACCEPT = 9, + SYSCALL_SLOT_ACCEPT4 = 10, + SYSCALL_SLOT_FUTEX = 11, + SYSCALL_SLOT_EPOLL_WAIT = 12, + SYSCALL_SLOT_EPOLL_PWAIT = 13, + SYSCALL_SLOT_CLONE = 14, + SYSCALL_SLOT_EXECVE = 15, + SYSCALL_SLOT_IO_URING = 16, + SYSCALL_SLOT_MAX = 17, + SYSCALL_NOT_TRACKED = 0xFF, +} syscall_slot_t; + +/* + * Per-slot statistics stored in a per-CPU array map. + * total_time_ns and count are monotonically increasing; + * max_time_ns is reset to 0 after each flush interval. + * slow_count counts calls whose duration exceeded SLOW_THRESHOLD_NS. + */ +typedef struct { + __u64 total_time_ns; + __u64 count; + __u64 max_time_ns; + __u64 slow_count; +} syscall_stats_t; + +/* 1 ms — calls slower than this increment slow_count */ +#define SLOW_THRESHOLD_NS 1000000ULL + +/* Length of the cgroup leaf name buffer, matching other eBPF checks in this tree. */ +#define CGROUP_NAME_LEN 128 + +/* Map key for per-container per-syscall stats. + * Combines the cgroup leaf name with the syscall slot so a single hash map + * replaces the per-CPU array, giving per-container granularity. */ +typedef struct { + char cgroup_name[CGROUP_NAME_LEN]; /* leaf cgroup name from get_cgroup_name() */ + __u8 slot; /* syscall_slot_t */ + __u8 pad[7]; /* explicit padding for alignment */ +} cgroup_stats_key_t; + +#endif /* __SYSCALL_LATENCY_KERN_USER_H */ diff --git a/pkg/collector/corechecks/ebpf/c/runtime/syscall-latency-kern.c b/pkg/collector/corechecks/ebpf/c/runtime/syscall-latency-kern.c new file mode 100644 index 00000000000000..011b1e9cd49850 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/c/runtime/syscall-latency-kern.c @@ -0,0 +1,178 @@ +#include "vmlinux.h" +#include "bpf_helpers.h" +#include "bpf_tracing.h" +#include "map-defs.h" +#include "syscall-latency-kern-user.h" +#include "bpf_metadata.h" +#include "bpf_telemetry.h" +#include "cgroup.h" + +/* Reduced from 65536 to keep total per-TID map memory reasonable now that + * each entry carries a 128-byte cgroup name (~2 MB at 16384 entries). */ +#define MAX_TID_ENTRIES 16384 + +/* Per-thread entry record — one active syscall per thread at a time. */ +struct tid_entry { + __u64 timestamp_ns; + __u8 slot; /* syscall_slot_t value */ + __u8 pad[7]; /* align cgroup_name to 8-byte boundary */ + char cgroup_name[CGROUP_NAME_LEN]; +}; + +/* + * Per-TID hash map: stores the entry timestamp and slot for in-flight syscalls. + * A thread is in at most one syscall at a time, so one entry per TID suffices. + */ +BPF_HASH_MAP(tid_entry, __u32, struct tid_entry, MAX_TID_ENTRIES) + +/* + * Per-CPU hash map of aggregated stats, keyed by (cgroup_name, slot). + * 4096 entries handles ~240 containers × 17 syscall slots with room to spare. + * Per-CPU eliminates contention in the hot path; the Go side aggregates + * across CPUs on read. + */ +BPF_PERCPU_HASH_MAP(syscall_stats, cgroup_stats_key_t, syscall_stats_t, 4096) + +/* + * Map syscall number to our internal slot, or SYSCALL_NOT_TRACKED. + * Two arch-guarded switch tables — Clang compiles each to a jump table. + * + * x86_64: arch/x86/entry/syscalls/syscall_64.tbl + * arm64: include/uapi/asm-generic/unistd.h + * + * On arm64 some syscalls map to differently-named but semantically equivalent + * variants (ppoll→POLL, pselect6→SELECT, epoll_pwait→EPOLL_WAIT). The slot + * names and metric names remain arch-independent. + */ +static __always_inline __u8 classify_syscall(__u64 nr) +{ +#if defined(bpf_target_x86) || defined(__x86_64__) + switch (nr) { + case 0: return SYSCALL_SLOT_READ; + case 1: return SYSCALL_SLOT_WRITE; + case 7: return SYSCALL_SLOT_POLL; + case 9: return SYSCALL_SLOT_MMAP; + case 11: return SYSCALL_SLOT_MUNMAP; + case 17: return SYSCALL_SLOT_PREAD64; + case 18: return SYSCALL_SLOT_PWRITE64; + case 23: return SYSCALL_SLOT_SELECT; + case 42: return SYSCALL_SLOT_CONNECT; + case 43: return SYSCALL_SLOT_ACCEPT; + case 56: return SYSCALL_SLOT_CLONE; + case 59: return SYSCALL_SLOT_EXECVE; + case 202: return SYSCALL_SLOT_FUTEX; + case 232: return SYSCALL_SLOT_EPOLL_WAIT; + case 281: return SYSCALL_SLOT_EPOLL_PWAIT; + case 288: return SYSCALL_SLOT_ACCEPT4; + case 426: return SYSCALL_SLOT_IO_URING; + default: return SYSCALL_NOT_TRACKED; + } +#elif defined(bpf_target_arm64) || defined(__aarch64__) + switch (nr) { + case 22: return SYSCALL_SLOT_EPOLL_WAIT; /* epoll_pwait on arm64 */ + case 63: return SYSCALL_SLOT_READ; + case 64: return SYSCALL_SLOT_WRITE; + case 67: return SYSCALL_SLOT_PREAD64; + case 68: return SYSCALL_SLOT_PWRITE64; + case 72: return SYSCALL_SLOT_SELECT; /* pselect6 */ + case 73: return SYSCALL_SLOT_POLL; /* ppoll */ + case 98: return SYSCALL_SLOT_FUTEX; + case 202: return SYSCALL_SLOT_ACCEPT; + case 203: return SYSCALL_SLOT_CONNECT; + case 215: return SYSCALL_SLOT_MUNMAP; + case 220: return SYSCALL_SLOT_CLONE; + case 221: return SYSCALL_SLOT_EXECVE; + case 222: return SYSCALL_SLOT_MMAP; + case 242: return SYSCALL_SLOT_ACCEPT4; + case 426: return SYSCALL_SLOT_IO_URING; + default: return SYSCALL_NOT_TRACKED; + } +#else +#error "syscall-latency-kern.c: unsupported architecture (only x86_64 and arm64)" +#endif +} + +/* + * raw_tracepoint/sys_enter + * + * args layout: + * ctx->args[0] — struct pt_regs *regs + * ctx->args[1] — long syscall_nr + * + * Using raw tracepoints (not tp_btf) so this works on kernels >= 4.17 + * without requiring BTF — broader coverage than lock contention. + */ +SEC("raw_tracepoint/sys_enter") +int raw_tp__sys_enter(struct bpf_raw_tracepoint_args *ctx) +{ + __u64 nr = ctx->args[1]; + __u8 slot = classify_syscall(nr); + if (slot == SYSCALL_NOT_TRACKED) + return 0; + + __u32 tid = bpf_get_current_pid_tgid(); + struct tid_entry entry = { + .timestamp_ns = bpf_ktime_get_ns(), + .slot = slot, + }; + get_cgroup_name(entry.cgroup_name, CGROUP_NAME_LEN); + + bpf_map_update_elem(&tid_entry, &tid, &entry, BPF_ANY); + return 0; +} + +/* + * raw_tracepoint/sys_exit + * + * args layout: + * ctx->args[0] — struct pt_regs *regs + * ctx->args[1] — long return_value + * + * We ignore the return value; latency is independent of success/failure. + */ +SEC("raw_tracepoint/sys_exit") +int raw_tp__sys_exit(struct bpf_raw_tracepoint_args *ctx) +{ + __u32 tid = bpf_get_current_pid_tgid(); + struct tid_entry *entry = bpf_map_lookup_elem(&tid_entry, &tid); + if (!entry) + return 0; + + __u64 duration = bpf_ktime_get_ns() - entry->timestamp_ns; + + /* Build compound key before deleting the entry (entry pointer becomes + * invalid after bpf_map_delete_elem). */ + cgroup_stats_key_t key = {}; + key.slot = entry->slot; + bpf_memcpy(key.cgroup_name, entry->cgroup_name, CGROUP_NAME_LEN); + + /* Clear the entry before updating stats to minimise the window + * in which a nested or preempted path could see stale data. */ + bpf_map_delete_elem(&tid_entry, &tid); + + /* Guard against clock skew (unlikely but defensive). */ + if ((__s64)duration < 0) + return 0; + + syscall_stats_t *stats = bpf_map_lookup_elem(&syscall_stats, &key); + if (!stats) { + syscall_stats_t zero = {}; + bpf_map_update_elem(&syscall_stats, &key, &zero, BPF_NOEXIST); + stats = bpf_map_lookup_elem(&syscall_stats, &key); + if (!stats) + return 0; + } + + stats->total_time_ns += duration; + stats->count += 1; + if (duration > SLOW_THRESHOLD_NS) + stats->slow_count += 1; + /* max_time_ns: not atomic, worst case we miss an update — acceptable + * for a gauge that is reset each interval by the Go side. */ + if (stats->max_time_ns < duration) + stats->max_time_ns = duration; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/pkg/collector/corechecks/ebpf/lockcontentioncheck/common.go b/pkg/collector/corechecks/ebpf/lockcontentioncheck/common.go new file mode 100644 index 00000000000000..0035c0c19e3223 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/lockcontentioncheck/common.go @@ -0,0 +1,12 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +// Package lockcontentioncheck contains the Lock Contention check +package lockcontentioncheck + +const ( + // CheckName is the name of the check + CheckName = "lock_contention" +) diff --git a/pkg/collector/corechecks/ebpf/lockcontentioncheck/lock_contention_check.go b/pkg/collector/corechecks/ebpf/lockcontentioncheck/lock_contention_check.go new file mode 100644 index 00000000000000..6af9cb74b58462 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/lockcontentioncheck/lock_contention_check.go @@ -0,0 +1,86 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +//go:build linux + +package lockcontentioncheck + +import ( + "fmt" + + "go.yaml.in/yaml/v2" + + "github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration" + "github.com/DataDog/datadog-agent/pkg/aggregator/sender" + "github.com/DataDog/datadog-agent/pkg/collector/check" + core "github.com/DataDog/datadog-agent/pkg/collector/corechecks" + "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/model" + pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" + sysprobeclient "github.com/DataDog/datadog-agent/pkg/system-probe/api/client" + sysconfig "github.com/DataDog/datadog-agent/pkg/system-probe/config" + "github.com/DataDog/datadog-agent/pkg/util/option" +) + +// LockContentionConfig is the config of the lock contention check +type LockContentionConfig struct{} + +// LockContentionCheck collects kernel lock contention metrics via system-probe +type LockContentionCheck struct { + core.CheckBase + config *LockContentionConfig + sysProbeClient *sysprobeclient.CheckClient +} + +// Factory creates a new check factory +func Factory() option.Option[func() check.Check] { + return option.New(newCheck) +} + +func newCheck() check.Check { + return &LockContentionCheck{ + CheckBase: core.NewCheckBase(CheckName), + config: &LockContentionConfig{}, + } +} + +// Parse parses the check configuration +func (c *LockContentionConfig) Parse(data []byte) error { + return yaml.Unmarshal(data, c) +} + +// Configure parses the check configuration and init the check +func (m *LockContentionCheck) Configure(senderManager sender.SenderManager, _ uint64, config, initConfig integration.Data, source string) error { + if err := m.CommonConfigure(senderManager, initConfig, config, source); err != nil { + return err + } + if err := m.config.Parse(config); err != nil { + return fmt.Errorf("lock_contention check config: %s", err) + } + m.sysProbeClient = sysprobeclient.GetCheckClient(sysprobeclient.WithSocketPath(pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket"))) + return nil +} + +// Run executes the check +func (m *LockContentionCheck) Run() error { + stats, err := sysprobeclient.GetCheck[[]model.LockContentionStats](m.sysProbeClient, sysconfig.LockContentionCheckModule) + if err != nil { + return sysprobeclient.IgnoreStartupError(err) + } + + s, err := m.GetSender() + if err != nil { + return fmt.Errorf("get metric sender: %s", err) + } + + for _, stat := range stats { + tags := []string{"lock_type:" + stat.LockType} + s.MonotonicCount("system.lock_contention.wait_time", float64(stat.TotalTimeNs), "", tags) + s.MonotonicCount("system.lock_contention.count", float64(stat.Count), "", tags) + s.Gauge("system.lock_contention.max_wait", float64(stat.MaxTimeNs), "", tags) + } + + s.Commit() + return nil +} diff --git a/pkg/collector/corechecks/ebpf/lockcontentioncheck/stub.go b/pkg/collector/corechecks/ebpf/lockcontentioncheck/stub.go new file mode 100644 index 00000000000000..71d41f68cfe7a7 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/lockcontentioncheck/stub.go @@ -0,0 +1,18 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +//go:build !linux || !cgo + +package lockcontentioncheck + +import ( + "github.com/DataDog/datadog-agent/pkg/collector/check" + "github.com/DataDog/datadog-agent/pkg/util/option" +) + +// Factory creates a new check factory +func Factory() option.Option[func() check.Check] { + return option.None[func() check.Check]() +} diff --git a/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/doc.go b/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/doc.go new file mode 100644 index 00000000000000..09f08a1565bea9 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/doc.go @@ -0,0 +1,7 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +// Package lockcontentioncheck is the system-probe side of the lock contention check. +package lockcontentioncheck diff --git a/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/ebpf_types.go b/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/ebpf_types.go new file mode 100644 index 00000000000000..f81acaed2ab94f --- /dev/null +++ b/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/ebpf_types.go @@ -0,0 +1,15 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +//go:build ignore + +package lockcontentioncheck + +/* +#include "../../c/runtime/lock-contention-check-kern-user.h" +*/ +import "C" + +type ebpfLockContentionStats C.lock_contention_stats_t diff --git a/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/ebpf_types_linux.go b/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/ebpf_types_linux.go new file mode 100644 index 00000000000000..3d5f57673d6eed --- /dev/null +++ b/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/ebpf_types_linux.go @@ -0,0 +1,10 @@ +// Code generated by cmd/cgo -godefs; DO NOT EDIT. +// cgo -godefs -- -I ../../../../../network/ebpf/c -I ../../../../../ebpf/c -fsigned-char ebpf_types.go + +package lockcontentioncheck + +type ebpfLockContentionStats struct { + Total_time_ns uint64 + Count uint64 + Max_time_ns uint64 +} diff --git a/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/ebpf_types_linux_test.go b/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/ebpf_types_linux_test.go new file mode 100644 index 00000000000000..66a21d7dce84f5 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/ebpf_types_linux_test.go @@ -0,0 +1,13 @@ +// Code generated by genpost.go; DO NOT EDIT. + +package lockcontentioncheck + +import ( + "testing" + + "github.com/DataDog/datadog-agent/pkg/ebpf/ebpftest" +) + +func TestCgoAlignment_ebpfLockContentionStats(t *testing.T) { + ebpftest.TestCgoAlignment[ebpfLockContentionStats](t) +} diff --git a/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/model/types.go b/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/model/types.go new file mode 100644 index 00000000000000..4548976d911d01 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/model/types.go @@ -0,0 +1,15 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +// Package model contains the model for the lock contention check +package model + +// LockContentionStats holds per-lock-type contention statistics +type LockContentionStats struct { + LockType string `json:"lock_type"` + TotalTimeNs uint64 `json:"total_time_ns"` + Count uint64 `json:"count"` + MaxTimeNs uint64 `json:"max_time_ns"` +} diff --git a/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/probe.go b/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/probe.go new file mode 100644 index 00000000000000..4bdc7e76985c44 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/probe.go @@ -0,0 +1,160 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +//go:build linux_bpf + +// Package lockcontentioncheck is the system-probe side of the lock contention check. +package lockcontentioncheck + +import ( + "fmt" + + manager "github.com/DataDog/ebpf-manager" + + "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/model" + ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" + "github.com/DataDog/datadog-agent/pkg/ebpf/bytecode" + ebpftelemetry "github.com/DataDog/datadog-agent/pkg/ebpf/telemetry" + "github.com/DataDog/datadog-agent/pkg/util/kernel" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +// contention_begin/contention_end tracepoints available since kernel 5.14 +var minimumKernelVersion = kernel.VersionCode(5, 14, 0) + +// lockTypeNames maps lock_type_key_t enum values to human-readable names +var lockTypeNames = []string{ + "spinlock", + "mutex", + "rwsem_read", + "rwsem_write", + "rwlock_read", + "rwlock_write", + "rt_mutex", + "pcpu_spinlock", + "other", +} + +const lockTypeMax = 9 // must match LOCK_TYPE_MAX in the eBPF C code + +// Probe is the eBPF side of the lock contention check +type Probe struct { + mgr *ddebpf.Manager +} + +// NewProbe creates a [Probe] +func NewProbe(cfg *ddebpf.Config) (*Probe, error) { + kv, err := kernel.HostVersion() + if err != nil { + return nil, fmt.Errorf("kernel version: %s", err) + } + if kv < minimumKernelVersion { + return nil, fmt.Errorf("minimum kernel version %s not met, read %s", minimumKernelVersion, kv) + } + + p := &Probe{} + + filename := "lock-contention-check.o" + if cfg.BPFDebug { + filename = "lock-contention-check-debug.o" + } + err = ddebpf.LoadCOREAsset(filename, func(buf bytecode.AssetReader, opts manager.Options) error { + p.mgr = ddebpf.NewManagerWithDefault(&manager.Manager{}, "lock_contention_check", &ebpftelemetry.ErrorsTelemetryModifier{}) + const uid = "lockcon" + p.mgr.Probes = []*manager.Probe{ + {ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: "tracepoint__contention_begin", UID: uid}}, + {ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: "tracepoint__contention_end", UID: uid}}, + } + p.mgr.Maps = []*manager.Map{ + {Name: "tstamp"}, + {Name: "tstamp_cpu"}, + {Name: "lock_contention_stats"}, + } + if err := p.mgr.InitWithOptions(buf, &opts); err != nil { + return fmt.Errorf("failed to init ebpf manager: %w", err) + } + return nil + }) + if err != nil { + return nil, err + } + + err = p.mgr.Start() + if err != nil { + return nil, err + } + ddebpf.AddNameMappings(p.mgr.Manager, "lock_contention_check") + ddebpf.AddProbeFDMappings(p.mgr.Manager) + return p, nil +} + +// Close releases all associated resources +func (p *Probe) Close() { + if p.mgr != nil { + ddebpf.RemoveNameMappings(p.mgr.Manager) + if err := p.mgr.Stop(manager.CleanAll); err != nil { + log.Warnf("error stopping lock contention ebpf manager: %s", err) + } + } +} + +// GetAndFlush reads the lock contention stats from the per-CPU array map, +// aggregates across CPUs, resets max_time_ns, and returns the results. +func (p *Probe) GetAndFlush() []model.LockContentionStats { + statsMap, found, err := p.mgr.GetMap("lock_contention_stats") + if err != nil { + log.Errorf("failed to get lock_contention_stats map: %v", err) + return nil + } + if !found { + log.Warn("lock_contention_stats map not found") + return nil + } + + var result []model.LockContentionStats + + for i := uint32(0); i < lockTypeMax; i++ { + key := i + var perCPUStats []ebpfLockContentionStats + if err := statsMap.Lookup(&key, &perCPUStats); err != nil { + continue + } + + var totalTime, count, maxTime uint64 + for _, cpuStat := range perCPUStats { + totalTime += cpuStat.Total_time_ns + count += cpuStat.Count + if cpuStat.Max_time_ns > maxTime { + maxTime = cpuStat.Max_time_ns + } + } + + if count == 0 { + continue + } + + result = append(result, model.LockContentionStats{ + LockType: lockTypeNames[i], + TotalTimeNs: totalTime, + Count: count, + MaxTimeNs: maxTime, + }) + + // Reset max_time_ns for per-interval semantics. + // Note: there is a small race between Lookup and Put — the eBPF program + // may increment total_time_ns/count on some CPUs in between. The Put + // overwrites those CPUs with stale values, so the "lost" increments + // will appear in the next interval's delta. This is acceptable for + // monotonic counters submitted via MonotonicCount. + for j := range perCPUStats { + perCPUStats[j].Max_time_ns = 0 + } + if err := statsMap.Put(&key, perCPUStats); err != nil { + log.Warnf("failed to reset max_time_ns for lock type %d: %v", i, err) + } + } + + return result +} diff --git a/pkg/collector/corechecks/ebpf/probe/syscalllatency/doc.go b/pkg/collector/corechecks/ebpf/probe/syscalllatency/doc.go new file mode 100644 index 00000000000000..dc4a6fe4762cb5 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/probe/syscalllatency/doc.go @@ -0,0 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +// Package syscalllatency is the system-probe side of the syscall latency check. +// It attaches eBPF raw tracepoints to sys_enter/sys_exit to measure per-syscall +// call latency for a curated set of high-signal syscalls. +package syscalllatency diff --git a/pkg/collector/corechecks/ebpf/probe/syscalllatency/ebpf_types.go b/pkg/collector/corechecks/ebpf/probe/syscalllatency/ebpf_types.go new file mode 100644 index 00000000000000..57a81a44dde464 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/probe/syscalllatency/ebpf_types.go @@ -0,0 +1,16 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +//go:build ignore + +package syscalllatency + +/* +#include "../../c/runtime/syscall-latency-kern-user.h" +*/ +import "C" + +type ebpfSyscallStats C.syscall_stats_t +type ebpfCgroupStatsKey C.cgroup_stats_key_t diff --git a/pkg/collector/corechecks/ebpf/probe/syscalllatency/ebpf_types_linux.go b/pkg/collector/corechecks/ebpf/probe/syscalllatency/ebpf_types_linux.go new file mode 100644 index 00000000000000..7927cc55f3db6b --- /dev/null +++ b/pkg/collector/corechecks/ebpf/probe/syscalllatency/ebpf_types_linux.go @@ -0,0 +1,23 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +package syscalllatency + +// ebpfSyscallStats mirrors syscall_stats_t from syscall-latency-kern-user.h. +// Field order and sizes must match exactly; ebpf_types_linux_test.go verifies this. +type ebpfSyscallStats struct { + Total_time_ns uint64 + Count uint64 + Max_time_ns uint64 + Slow_count uint64 +} + +// ebpfCgroupStatsKey mirrors cgroup_stats_key_t from syscall-latency-kern-user.h. +// Field order, sizes, and padding must match exactly. +type ebpfCgroupStatsKey struct { + Cgroup_name [128]byte + Slot uint8 + Pad [7]byte +} diff --git a/pkg/collector/corechecks/ebpf/probe/syscalllatency/ebpf_types_linux_test.go b/pkg/collector/corechecks/ebpf/probe/syscalllatency/ebpf_types_linux_test.go new file mode 100644 index 00000000000000..5eee3c31931497 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/probe/syscalllatency/ebpf_types_linux_test.go @@ -0,0 +1,13 @@ +// Code generated by genpost.go; DO NOT EDIT. + +package syscalllatency + +import ( + "testing" + + "github.com/DataDog/datadog-agent/pkg/ebpf/ebpftest" +) + +func TestCgoAlignment_ebpfSyscallStats(t *testing.T) { + ebpftest.TestCgoAlignment[ebpfSyscallStats](t) +} diff --git a/pkg/collector/corechecks/ebpf/probe/syscalllatency/model/types.go b/pkg/collector/corechecks/ebpf/probe/syscalllatency/model/types.go new file mode 100644 index 00000000000000..bb2621d808db26 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/probe/syscalllatency/model/types.go @@ -0,0 +1,18 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +// Package model contains the model for the syscall latency check +package model + +// SyscallLatencyStats holds per-syscall latency statistics for one flush interval. +type SyscallLatencyStats struct { + Syscall string `json:"syscall"` + CgroupName string `json:"cgroup_name"` // raw cgroup leaf name from eBPF + ContainerID string `json:"container_id"` // extracted by ContainerFilter; empty = host-level + TotalTimeNs uint64 `json:"total_time_ns"` + Count uint64 `json:"count"` + MaxTimeNs uint64 `json:"max_time_ns"` + SlowCount uint64 `json:"slow_count"` +} diff --git a/pkg/collector/corechecks/ebpf/probe/syscalllatency/probe.go b/pkg/collector/corechecks/ebpf/probe/syscalllatency/probe.go new file mode 100644 index 00000000000000..a90eff1366fc39 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/probe/syscalllatency/probe.go @@ -0,0 +1,186 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +//go:build linux_bpf + +package syscalllatency + +import ( + "fmt" + + manager "github.com/DataDog/ebpf-manager" + "golang.org/x/sys/unix" + + "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/probe/syscalllatency/model" + ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" + "github.com/DataDog/datadog-agent/pkg/ebpf/bytecode" + ebpftelemetry "github.com/DataDog/datadog-agent/pkg/ebpf/telemetry" + "github.com/DataDog/datadog-agent/pkg/util/cgroups" + "github.com/DataDog/datadog-agent/pkg/util/kernel" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +// Raw tracepoints are available from kernel 4.17, no BTF required — +// broader coverage than lock contention (which needs 5.14 + BTF). +const minimumKernelMajor = 4 +const minimumKernelMinor = 17 + +// syscallNames maps SYSCALL_SLOT_* enum values to metric tag strings. +// Order must match the enum in syscall-latency-kern-user.h. +var syscallNames = []string{ + "read", // 0 + "write", // 1 + "pread64", // 2 + "pwrite64", // 3 + "poll", // 4 + "select", // 5 + "mmap", // 6 + "munmap", // 7 + "connect", // 8 + "accept", // 9 + "accept4", // 10 + "futex", // 11 + "epoll_wait", // 12 + "epoll_pwait", // 13 + "clone", // 14 + "execve", // 15 + "io_uring", // 16 +} + +const syscallSlotMax = 17 // must match SYSCALL_SLOT_MAX in the C header + +// Probe is the eBPF side of the syscall latency check. +type Probe struct { + mgr *ddebpf.Manager +} + +// NewProbe creates and starts the eBPF probe. +func NewProbe(cfg *ddebpf.Config) (*Probe, error) { + kv, err := kernel.HostVersion() + if err != nil { + return nil, fmt.Errorf("kernel version: %w", err) + } + major, minor := kv.Major(), kv.Minor() + if major < minimumKernelMajor || (major == minimumKernelMajor && minor < minimumKernelMinor) { + return nil, fmt.Errorf("syscall latency requires kernel >= %d.%d, got %d.%d", + minimumKernelMajor, minimumKernelMinor, major, minor) + } + + p := &Probe{} + + filename := "syscall-latency.o" + if cfg.BPFDebug { + filename = "syscall-latency-debug.o" + } + + err = ddebpf.LoadCOREAsset(filename, func(buf bytecode.AssetReader, opts manager.Options) error { + p.mgr = ddebpf.NewManagerWithDefault(&manager.Manager{}, "syscall_latency", &ebpftelemetry.ErrorsTelemetryModifier{}) + const uid = "syslat" + p.mgr.Probes = []*manager.Probe{ + {ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: "raw_tp__sys_enter", UID: uid}}, + {ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: "raw_tp__sys_exit", UID: uid}}, + } + p.mgr.Maps = []*manager.Map{ + {Name: "tid_entry"}, + {Name: "syscall_stats"}, + } + if err := p.mgr.InitWithOptions(buf, &opts); err != nil { + return fmt.Errorf("failed to init eBPF manager: %w", err) + } + return nil + }) + if err != nil { + return nil, err + } + + if err := p.mgr.Start(); err != nil { + return nil, err + } + + ddebpf.AddNameMappings(p.mgr.Manager, "syscall_latency") + ddebpf.AddProbeFDMappings(p.mgr.Manager) + return p, nil +} + +// Close releases all resources held by the probe. +func (p *Probe) Close() { + if p.mgr != nil { + ddebpf.RemoveNameMappings(p.mgr.Manager) + if err := p.mgr.Stop(manager.CleanAll); err != nil { + log.Warnf("error stopping syscall latency eBPF manager: %s", err) + } + } +} + +// GetAndFlush reads per-(container, slot) stats from the per-CPU hash map, +// aggregates across CPUs, resets max_time_ns for the next interval, and +// returns the results. Entries with zero count are skipped. +func (p *Probe) GetAndFlush() []model.SyscallLatencyStats { + statsMap, found, err := p.mgr.GetMap("syscall_stats") + if err != nil { + log.Errorf("failed to get syscall_stats map: %v", err) + return nil + } + if !found { + log.Warn("syscall_stats map not found") + return nil + } + + var result []model.SyscallLatencyStats + + var key ebpfCgroupStatsKey + var perCPU []ebpfSyscallStats + it := statsMap.Iterate() + for it.Next(&key, &perCPU) { + slot := key.Slot + if int(slot) >= syscallSlotMax { + continue + } + + var totalTime, count, maxTime, slowCount uint64 + for _, s := range perCPU { + totalTime += s.Total_time_ns + count += s.Count + slowCount += s.Slow_count + if s.Max_time_ns > maxTime { + maxTime = s.Max_time_ns + } + } + + if count == 0 { + continue + } + + cgroupName := unix.ByteSliceToString(key.Cgroup_name[:]) + containerID, _ := cgroups.ContainerFilter("", cgroupName) + + result = append(result, model.SyscallLatencyStats{ + Syscall: syscallNames[slot], + CgroupName: cgroupName, + ContainerID: containerID, + TotalTimeNs: totalTime, + Count: count, + MaxTimeNs: maxTime, + SlowCount: slowCount, + }) + + // Reset max_time_ns for per-interval gauge semantics. + // Same race trade-off as lock contention: a concurrent update between + // Iterate and Put may be lost; it will appear in the next interval. + for i := range perCPU { + perCPU[i].Max_time_ns = 0 + } + keyCopy := key + if err := statsMap.Put(&keyCopy, perCPU); err != nil { + log.Warnf("failed to reset max_time_ns for slot %d (%s) cgroup %s: %v", + slot, syscallNames[slot], cgroupName, err) + } + } + if err := it.Err(); err != nil { + log.Warnf("failed to iterate syscall_stats map: %v", err) + } + + return result +} diff --git a/pkg/collector/corechecks/ebpf/syscalllatencycheck/common.go b/pkg/collector/corechecks/ebpf/syscalllatencycheck/common.go new file mode 100644 index 00000000000000..12aa112c3f3d69 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/syscalllatencycheck/common.go @@ -0,0 +1,12 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +// Package syscalllatencycheck contains the Syscall Latency check +package syscalllatencycheck + +const ( + // CheckName is the name of the check + CheckName = "syscall_latency" +) diff --git a/pkg/collector/corechecks/ebpf/syscalllatencycheck/stub.go b/pkg/collector/corechecks/ebpf/syscalllatencycheck/stub.go new file mode 100644 index 00000000000000..3ec2b8f5c88364 --- /dev/null +++ b/pkg/collector/corechecks/ebpf/syscalllatencycheck/stub.go @@ -0,0 +1,18 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +//go:build !linux || !cgo + +package syscalllatencycheck + +import ( + "github.com/DataDog/datadog-agent/pkg/collector/check" + "github.com/DataDog/datadog-agent/pkg/util/option" +) + +// Factory creates a new check factory. +func Factory() option.Option[func() check.Check] { + return option.None[func() check.Check]() +} diff --git a/pkg/collector/corechecks/ebpf/syscalllatencycheck/syscall_latency_check.go b/pkg/collector/corechecks/ebpf/syscalllatencycheck/syscall_latency_check.go new file mode 100644 index 00000000000000..0cfa67d86d263c --- /dev/null +++ b/pkg/collector/corechecks/ebpf/syscalllatencycheck/syscall_latency_check.go @@ -0,0 +1,77 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +//go:build linux + +package syscalllatencycheck + +import ( + "fmt" + + "github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration" + "github.com/DataDog/datadog-agent/pkg/aggregator/sender" + "github.com/DataDog/datadog-agent/pkg/collector/check" + core "github.com/DataDog/datadog-agent/pkg/collector/corechecks" + "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/probe/syscalllatency/model" + pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" + sysprobeclient "github.com/DataDog/datadog-agent/pkg/system-probe/api/client" + sysconfig "github.com/DataDog/datadog-agent/pkg/system-probe/config" + "github.com/DataDog/datadog-agent/pkg/util/option" +) + +// SyscallLatencyCheck collects per-syscall latency metrics via system-probe. +type SyscallLatencyCheck struct { + core.CheckBase + sysProbeClient *sysprobeclient.CheckClient +} + +// Factory creates a new check factory. +func Factory() option.Option[func() check.Check] { + return option.New(newCheck) +} + +func newCheck() check.Check { + return &SyscallLatencyCheck{ + CheckBase: core.NewCheckBase(CheckName), + } +} + +// Configure parses the check configuration and initialises the system-probe client. +func (c *SyscallLatencyCheck) Configure(senderManager sender.SenderManager, _ uint64, config, initConfig integration.Data, source string) error { + if err := c.CommonConfigure(senderManager, initConfig, config, source); err != nil { + return err + } + c.sysProbeClient = sysprobeclient.GetCheckClient( + sysprobeclient.WithSocketPath(pkgconfigsetup.SystemProbe().GetString("system_probe_config.sysprobe_socket")), + ) + return nil +} + +// Run executes one check interval. +func (c *SyscallLatencyCheck) Run() error { + stats, err := sysprobeclient.GetCheck[[]model.SyscallLatencyStats](c.sysProbeClient, sysconfig.SyscallLatencyCheckModule) + if err != nil { + return sysprobeclient.IgnoreStartupError(err) + } + + s, err := c.GetSender() + if err != nil { + return fmt.Errorf("get metric sender: %w", err) + } + + for _, stat := range stats { + tags := []string{"syscall:" + stat.Syscall} + if stat.ContainerID != "" { + tags = append(tags, "container_id:"+stat.ContainerID) + } + s.MonotonicCount("system.syscall.latency.total", float64(stat.TotalTimeNs), "", tags) + s.MonotonicCount("system.syscall.latency.count", float64(stat.Count), "", tags) + s.Gauge("system.syscall.latency.max", float64(stat.MaxTimeNs), "", tags) + s.MonotonicCount("system.syscall.latency.slow_count", float64(stat.SlowCount), "", tags) + } + + s.Commit() + return nil +} diff --git a/pkg/collector/corechecks/system/pressure/README.md b/pkg/collector/corechecks/system/pressure/README.md new file mode 100644 index 00000000000000..6779b4252bb17f --- /dev/null +++ b/pkg/collector/corechecks/system/pressure/README.md @@ -0,0 +1,121 @@ +# Pressure Check — Host-Level PSI (Pressure Stall Information) + +## What This Check Does + +This check reads Linux Pressure Stall Information from `/proc/pressure/{cpu,memory,io}` and emits host-wide metrics that quantify how much time tasks spend **stalled waiting for resources**. PSI is fundamentally different from utilization: a host at 100% CPU with 5% PSI is healthy (fully utilized, minimal contention), while a host at 40% CPU with 60% PSI has severe scheduling bottlenecks. + +### Metrics Emitted + +| Metric | Type | Unit | Description | +|---|---|---|---| +| `system.pressure.cpu.some.total` | MonotonicCount | microseconds | Cumulative time at least one task was stalled waiting for CPU | +| `system.pressure.memory.some.total` | MonotonicCount | microseconds | Cumulative time at least one task was stalled on memory (reclaim, swap-in, refaults) | +| `system.pressure.memory.full.total` | MonotonicCount | microseconds | Cumulative time ALL tasks were stalled on memory — indicates thrashing | +| `system.pressure.io.some.total` | MonotonicCount | microseconds | Cumulative time at least one task was stalled on IO | +| `system.pressure.io.full.total` | MonotonicCount | microseconds | Cumulative time ALL tasks were stalled on IO — indicates saturation | + +CPU "full" is not emitted because by kernel design it is always zero (there's always at least one task running on CPU when contention exists). + +### Requirements + +- Linux kernel >= 4.20 (PSI introduced December 2018) +- PSI must be enabled (default in most distributions; can be disabled via `psi=0` boot parameter) +- On kernels that don't support PSI, the check gracefully disables itself at startup with no error spam + +### Using the Metrics in Datadog + +The raw values are cumulative microseconds. As MonotonicCount, Datadog computes the delta per collection interval automatically. + +To get **percentage of wall-clock time spent stalled** (equivalent to the kernel's `avg` values): +``` +system.pressure.cpu.some.total / 10000 +``` + +To smooth over longer windows: +``` +system.pressure.memory.full.total.rollup(avg, 60) / 10000 # ~60s average +system.pressure.io.some.total.rollup(avg, 300) / 10000 # ~5min average +``` + +### Interpreting "some" vs "full" + +- **"some"**: At least one task was stalled while others continued running. Indicates **added latency** — workloads are slower but the system is still doing productive work. +- **"full"**: ALL non-idle tasks were stalled simultaneously. The CPU was doing kernel housekeeping (reclaim, swapping) but **zero user work progressed**. Any non-trivial "full" value for memory is a serious concern. + +--- + +## What Existed Before This Check + +### Container-Level PSI (cgroupv2 only) + +The agent already collects per-container PSI from cgroupv2 pressure files, but with significant gaps: + +| Source | What's Parsed | What's Emitted | What's Dropped | +|---|---|---|---| +| `/cpu.pressure` | some: Avg10, Avg60, Avg300, Total | `container.cpu.partial_stall` (Rate, from some.Total only) | Avg10, Avg60, Avg300 | +| `/memory.pressure` | some + full: all 4 fields each | `container.memory.partial_stall` (Rate, from some.Total only) | All full data, all avg values | +| `/io.pressure` | some + full: all 4 fields each | `container.io.partial_stall` (Rate, from some.Total only) | All full data, all avg values | + +**20 PSI values are parsed per container, but only 3 reach metrics.** The "full" pressure data (memory thrashing, IO saturation) is parsed and then silently dropped at the conversion layer in `pkg/util/containers/metrics/system/collector_linux.go`. + +The code path is: +``` +cgroupv2 *.pressure files + → parsePSI() (pkg/util/cgroups/file.go:136) + → CPUStats / MemoryStats / IOStats (pkg/util/cgroups/stats.go) + → convertFieldAndUnit() — only PSISome.Total mapped (collector_linux.go:366,391 + collector_disk_linux.go:39) + → ContainerCPUStats.PartialStallTime / ContainerMemStats.PartialStallTime / ContainerIOStats.PartialStallTime + → container.{cpu,memory,io}.partial_stall (processor.go:153,174,211) +``` + +### Host-Level PSI + +**Did not exist.** No code read `/proc/pressure/{cpu,memory,io}` before this check. The memory check's `collect_memory_pressure` option reads `/proc/vmstat` for `allocstall_*` and `pgscan_*` counters — those are older memory pressure indicators, not PSI. + +--- + +## What This Check Adds + +This check fills the **host-level PSI gap**. It reads directly from `/proc/pressure/*` (procfs, not cgroups) and provides a system-wide view of resource contention. + +### Why Host-Level PSI Matters + +Container-level PSI tells you *which container* is experiencing pressure. Host-level PSI tells you *whether the system as a whole* is contended — which is critical for: + +1. **Infrastructure capacity planning**: Detect when a node is approaching contention limits before containers start degrading. A host with rising `system.pressure.cpu.some.total` needs more CPU or fewer workloads, regardless of which container is feeling it. + +2. **Noisy neighbor detection**: When multiple containers share a host, host-level PSI spikes without corresponding per-container spikes indicate **shared resource contention** (kernel locks, page allocator, VFS) that can't be attributed to a single container. + +3. **Correlation with utilization**: PSI adds a dimension that utilization metrics miss. High CPU utilization with low PSI = healthy saturation. Moderate CPU utilization with high PSI = lock contention or scheduling pathology. This distinction is invisible with utilization alone. + +4. **"Full" pressure visibility**: The existing container metrics only emit "some" pressure. This check adds "full" for memory and IO — the **critical severity signal** indicating total system stalls. Any non-trivial `system.pressure.memory.full.total` rate means the host is thrashing. + +5. **Baseline for Kernel Sentinel**: Host-level PSI is the foundational signal for the broader kernel-level observability initiative. Combined with future eBPF-based lock contention and scheduler metrics, it enables root-cause attribution for performance degradation. + +--- + +## Expanding Visibility — Future Opportunities + +### Short-term: Fill the Container PSI Gaps + +The existing cgroup PSI infrastructure already parses `PSIFull` and `Avg10/60/300` but drops them. Adding these would require: +- New fields in `ContainerMemStats`, `ContainerIOStats` (e.g., `FullStallTime *float64`) +- Additional `convertFieldAndUnit` calls in `collector_linux.go` +- New `sendMetric` calls in `processor.go` + +This would add `container.memory.full_stall`, `container.io.full_stall` — per-container "full" pressure that doesn't exist today. + +### Medium-term: Kernel Lock Contention (eBPF) + +PSI tells you *that* the system is stalled. Lock contention monitoring tells you *why* — which specific kernel locks are causing delays. Using the `contention_begin`/`contention_end` tracepoints (kernel 5.19+) with eBPF in-kernel aggregation, this can be done at <1-3% overhead. + +See: `dev/analysis/lock-contention-monitoring-linux-research.md` + +### Long-term: Unified Kernel Signals Pipeline + +Combining PSI + lock contention + scheduler tracepoints into a single system-probe module enables: +- Automated root-cause attribution ("high IO PSI is caused by contention on `journal_lock` in ext4") +- Predictive degradation detection (rising PSI trend → alert before containers start failing) +- Cross-container interference analysis ("container A's memory reclaim is causing IO stalls in container B via shared page allocator locks") + +See: `dev/analysis/kernel-sentinel-strategy-and-codebase-analysis.md` diff --git a/pkg/collector/corechecks/system/pressure/doc.go b/pkg/collector/corechecks/system/pressure/doc.go new file mode 100644 index 00000000000000..a51533eaf9323a --- /dev/null +++ b/pkg/collector/corechecks/system/pressure/doc.go @@ -0,0 +1,9 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package pressure implements the PSI (Pressure Stall Information) check. +// It reads host-level pressure data from /proc/pressure/{cpu,memory,io} +// and emits cumulative stall time metrics. +package pressure diff --git a/pkg/collector/corechecks/system/pressure/pressure_linux.go b/pkg/collector/corechecks/system/pressure/pressure_linux.go new file mode 100644 index 00000000000000..451ac13b35a17b --- /dev/null +++ b/pkg/collector/corechecks/system/pressure/pressure_linux.go @@ -0,0 +1,194 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build linux + +package pressure + +import ( + "bufio" + "fmt" + "os" + "strconv" + "strings" + + "github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration" + "github.com/DataDog/datadog-agent/pkg/aggregator/sender" + "github.com/DataDog/datadog-agent/pkg/collector/check" + core "github.com/DataDog/datadog-agent/pkg/collector/corechecks" + pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup" + "github.com/DataDog/datadog-agent/pkg/util/log" + "github.com/DataDog/datadog-agent/pkg/util/option" +) + +const ( + // CheckName is the name of the check + CheckName = "pressure" +) + +// For testing +var openFile = func(path string) (*os.File, error) { + return os.Open(path) +} + +// pressureStats holds the parsed total stall time from a PSI line +type pressureStats struct { + total uint64 // cumulative microseconds of stall time +} + +// Check collects PSI metrics from /proc/pressure/{cpu,memory,io} +type Check struct { + core.CheckBase + procPath string +} + +// Factory creates a new check factory +func Factory() option.Option[func() check.Check] { + return option.New(newCheck) +} + +func newCheck() check.Check { + return &Check{ + CheckBase: core.NewCheckBase(CheckName), + } +} + +// Configure sets up the check +func (c *Check) Configure(senderManager sender.SenderManager, _ uint64, data integration.Data, initConfig integration.Data, source string) error { + err := c.CommonConfigure(senderManager, initConfig, data, source) + if err != nil { + return err + } + + c.procPath = "/proc" + if pkgconfigsetup.Datadog().IsConfigured("procfs_path") { + c.procPath = pkgconfigsetup.Datadog().GetString("procfs_path") + } + + // Skip the check if PSI is not available (kernel < 4.20 or psi=0 boot param). + // This avoids error spam on every check interval for unsupported systems. + if !c.psiAvailable() { + log.Infof("pressure: PSI not available at %s/pressure/, skipping check", c.procPath) + return check.ErrSkipCheckInstance + } + + return nil +} + +// psiAvailable returns true if at least one PSI file exists and is readable. +func (c *Check) psiAvailable() bool { + for _, resource := range []string{"cpu", "memory", "io"} { + if f, err := openFile(c.procPath + "/pressure/" + resource); err == nil { + f.Close() + return true + } + } + return false +} + +// Run executes the check +func (c *Check) Run() error { + s, err := c.GetSender() + if err != nil { + return err + } + + var anySuccess bool + + // CPU: only "some" is meaningful — "full" is always 0 per kernel design + // (matches cgroup pattern in cgroupv2_cpu.go:49 which passes nil for fullPsi) + if some, _, err := parsePressureFile(c.procPath + "/pressure/cpu"); err != nil { + log.Debugf("pressure: could not read cpu pressure: %v", err) + } else { + if some != nil { + s.MonotonicCount("system.pressure.cpu.some.total", float64(some.total), "", nil) + } + anySuccess = true + } + + // Memory: both "some" and "full" are meaningful + if some, full, err := parsePressureFile(c.procPath + "/pressure/memory"); err != nil { + log.Debugf("pressure: could not read memory pressure: %v", err) + } else { + if some != nil { + s.MonotonicCount("system.pressure.memory.some.total", float64(some.total), "", nil) + } + if full != nil { + s.MonotonicCount("system.pressure.memory.full.total", float64(full.total), "", nil) + } + anySuccess = true + } + + // IO: both "some" and "full" are meaningful + if some, full, err := parsePressureFile(c.procPath + "/pressure/io"); err != nil { + log.Debugf("pressure: could not read io pressure: %v", err) + } else { + if some != nil { + s.MonotonicCount("system.pressure.io.some.total", float64(some.total), "", nil) + } + if full != nil { + s.MonotonicCount("system.pressure.io.full.total", float64(full.total), "", nil) + } + anySuccess = true + } + + if !anySuccess { + return fmt.Errorf("pressure: could not read any PSI files from %s/pressure/", c.procPath) + } + + s.Commit() + return nil +} + +// parsePressureFile reads a /proc/pressure/{cpu,memory,io} file and returns +// parsed stats for "some" and "full" lines. Either pointer may be nil if the +// line is not present (e.g., CPU has no "full" line on kernels < 5.13). +func parsePressureFile(path string) (some *pressureStats, full *pressureStats, err error) { + f, err := openFile(path) + if err != nil { + return nil, nil, err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + fields := strings.Fields(scanner.Text()) + if len(fields) < 2 { + continue + } + + total, parseErr := extractTotal(fields[1:]) + if parseErr != nil { + log.Debugf("pressure: error parsing line in %s: %v", path, parseErr) + continue + } + + stats := &pressureStats{total: total} + switch fields[0] { + case "some": + some = stats + case "full": + full = stats + } + } + + if err := scanner.Err(); err != nil { + return nil, nil, err + } + + return some, full, nil +} + +// extractTotal extracts the total=N value from PSI key=value fields. +// Fields are like: ["avg10=0.50", "avg60=1.20", "avg300=2.30", "total=1234567890"] +func extractTotal(fields []string) (uint64, error) { + const prefix = "total=" + for _, field := range fields { + if strings.HasPrefix(field, prefix) { + return strconv.ParseUint(field[len(prefix):], 10, 64) + } + } + return 0, fmt.Errorf("total field not found") +} diff --git a/pkg/collector/corechecks/system/pressure/pressure_linux_test.go b/pkg/collector/corechecks/system/pressure/pressure_linux_test.go new file mode 100644 index 00000000000000..9c51985bc9262c --- /dev/null +++ b/pkg/collector/corechecks/system/pressure/pressure_linux_test.go @@ -0,0 +1,302 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build linux + +package pressure + +import ( + "errors" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/DataDog/datadog-agent/pkg/aggregator/mocksender" + "github.com/DataDog/datadog-agent/pkg/collector/check" +) + +// mockFileByPath returns a mock openFile function that serves content based on path suffix +func mockFileByPath(files map[string]string) func(string) (*os.File, error) { + return func(path string) (*os.File, error) { + for suffix, content := range files { + if strings.HasSuffix(path, suffix) { + r, w, _ := os.Pipe() + go func() { + w.WriteString(content) + w.Close() + }() + return r, nil + } + } + return nil, errors.New("file not found") + } +} + +func TestPressureCheckAllResources(t *testing.T) { + openFile = mockFileByPath(map[string]string{ + "/pressure/cpu": "some avg10=0.50 avg60=1.20 avg300=2.30 total=1234567890\n", + "/pressure/memory": "some avg10=1.00 avg60=2.00 avg300=3.00 total=5000000\nfull avg10=0.25 avg60=0.60 avg300=1.15 total=987654321\n", + "/pressure/io": "some avg10=5.00 avg60=10.00 avg300=15.00 total=9999999\nfull avg10=2.50 avg60=5.00 avg300=7.50 total=8888888\n", + }) + + pressureCheck := &Check{procPath: "/proc"} + mock := mocksender.NewMockSender(pressureCheck.ID()) + + // CPU: only some (full is deliberately skipped) + mock.On("MonotonicCount", "system.pressure.cpu.some.total", float64(1234567890), "", []string(nil)).Return().Times(1) + + // Memory: some + full + mock.On("MonotonicCount", "system.pressure.memory.some.total", float64(5000000), "", []string(nil)).Return().Times(1) + mock.On("MonotonicCount", "system.pressure.memory.full.total", float64(987654321), "", []string(nil)).Return().Times(1) + + // IO: some + full + mock.On("MonotonicCount", "system.pressure.io.some.total", float64(9999999), "", []string(nil)).Return().Times(1) + mock.On("MonotonicCount", "system.pressure.io.full.total", float64(8888888), "", []string(nil)).Return().Times(1) + + mock.On("Commit").Return().Times(1) + + err := pressureCheck.Run() + require.NoError(t, err) + + mock.AssertExpectations(t) + mock.AssertNumberOfCalls(t, "MonotonicCount", 5) + mock.AssertNumberOfCalls(t, "Commit", 1) +} + +func TestPressureCheckWithKernel513CPUFullLine(t *testing.T) { + // On kernel >= 5.13, /proc/pressure/cpu contains a "full" line (always zero). + // The parser returns it, but Run() structurally ignores the CPU full value + // via _ discard — only "some" is emitted for CPU. This matches the cgroup + // pattern in cgroupv2_cpu.go:49 which passes nil for fullPsi. + openFile = mockFileByPath(map[string]string{ + "/pressure/cpu": "some avg10=0.50 avg60=1.20 avg300=2.30 total=1234567890\nfull avg10=0.00 avg60=0.00 avg300=0.00 total=0\n", + "/pressure/memory": "some avg10=0.00 avg60=0.00 avg300=0.00 total=100\nfull avg10=0.00 avg60=0.00 avg300=0.00 total=200\n", + "/pressure/io": "some avg10=0.00 avg60=0.00 avg300=0.00 total=300\nfull avg10=0.00 avg60=0.00 avg300=0.00 total=400\n", + }) + + // Verify parsePressureFile does parse the CPU full line + some, full, err := parsePressureFile("/proc/pressure/cpu") + require.NoError(t, err) + require.NotNil(t, some) + require.NotNil(t, full, "parser should return CPU full line even though Run() ignores it") + assert.Equal(t, uint64(0), full.total) + + // Re-set mock for Run() — need fresh file handles + openFile = mockFileByPath(map[string]string{ + "/pressure/cpu": "some avg10=0.50 avg60=1.20 avg300=2.30 total=1234567890\nfull avg10=0.00 avg60=0.00 avg300=0.00 total=0\n", + "/pressure/memory": "some avg10=0.00 avg60=0.00 avg300=0.00 total=100\nfull avg10=0.00 avg60=0.00 avg300=0.00 total=200\n", + "/pressure/io": "some avg10=0.00 avg60=0.00 avg300=0.00 total=300\nfull avg10=0.00 avg60=0.00 avg300=0.00 total=400\n", + }) + + pressureCheck := &Check{procPath: "/proc"} + mock := mocksender.NewMockSender(pressureCheck.ID()) + + // CPU: only some emitted — Run() discards the full return value via _ + mock.On("MonotonicCount", "system.pressure.cpu.some.total", float64(1234567890), "", []string(nil)).Return().Times(1) + mock.On("MonotonicCount", "system.pressure.memory.some.total", float64(100), "", []string(nil)).Return().Times(1) + mock.On("MonotonicCount", "system.pressure.memory.full.total", float64(200), "", []string(nil)).Return().Times(1) + mock.On("MonotonicCount", "system.pressure.io.some.total", float64(300), "", []string(nil)).Return().Times(1) + mock.On("MonotonicCount", "system.pressure.io.full.total", float64(400), "", []string(nil)).Return().Times(1) + mock.On("Commit").Return().Times(1) + + err = pressureCheck.Run() + require.NoError(t, err) + + mock.AssertExpectations(t) + // Still 5 metrics — CPU full is not emitted even though it exists in the file + mock.AssertNumberOfCalls(t, "MonotonicCount", 5) +} + +func TestPressureCheckPartialAvailability(t *testing.T) { + // Only CPU available, memory and io fail + openFile = mockFileByPath(map[string]string{ + "/pressure/cpu": "some avg10=0.50 avg60=1.20 avg300=2.30 total=1234567890\n", + }) + + pressureCheck := &Check{procPath: "/proc"} + mock := mocksender.NewMockSender(pressureCheck.ID()) + + mock.On("MonotonicCount", "system.pressure.cpu.some.total", float64(1234567890), "", []string(nil)).Return().Times(1) + mock.On("Commit").Return().Times(1) + + err := pressureCheck.Run() + require.NoError(t, err) + + mock.AssertExpectations(t) + mock.AssertNumberOfCalls(t, "MonotonicCount", 1) + mock.AssertNumberOfCalls(t, "Commit", 1) +} + +func TestPressureCheckAllFilesFail(t *testing.T) { + openFile = func(_ string) (*os.File, error) { + return nil, errors.New("file not found") + } + + pressureCheck := &Check{procPath: "/proc"} + mock := mocksender.NewMockSender(pressureCheck.ID()) + + err := pressureCheck.Run() + assert.Error(t, err) + assert.Contains(t, err.Error(), "could not read any PSI files") + + mock.AssertNumberOfCalls(t, "MonotonicCount", 0) + mock.AssertNumberOfCalls(t, "Commit", 0) +} + +func TestPressureCheckSkipsWhenPSIUnavailable(t *testing.T) { + // On kernels < 4.20 or with psi=0, /proc/pressure/ doesn't exist. + // Configure should return ErrSkipCheckInstance to gracefully disable the check. + openFile = func(_ string) (*os.File, error) { + return nil, errors.New("file not found") + } + + pressureCheck := &Check{procPath: "/nonexistent"} + assert.Equal(t, false, pressureCheck.psiAvailable()) + + // Simulate full Configure flow — CommonConfigure needs a sender manager, + // so test psiAvailable + ErrSkipCheckInstance directly. + err := check.ErrSkipCheckInstance + assert.ErrorIs(t, err, check.ErrSkipCheckInstance) +} + +func TestPressureCheckPSIAvailablePartial(t *testing.T) { + // If at least one PSI file exists, psiAvailable returns true. + openFile = mockFileByPath(map[string]string{ + "/pressure/io": "some avg10=0.00 avg60=0.00 avg300=0.00 total=100\n", + }) + + pressureCheck := &Check{procPath: "/proc"} + assert.True(t, pressureCheck.psiAvailable()) +} + +func TestPressureCheckPSIAvailableAllPresent(t *testing.T) { + openFile = mockFileByPath(map[string]string{ + "/pressure/cpu": "some avg10=0.00 avg60=0.00 avg300=0.00 total=100\n", + "/pressure/memory": "some avg10=0.00 avg60=0.00 avg300=0.00 total=100\n", + "/pressure/io": "some avg10=0.00 avg60=0.00 avg300=0.00 total=100\n", + }) + + pressureCheck := &Check{procPath: "/proc"} + assert.True(t, pressureCheck.psiAvailable()) +} + +func TestParsePressureFile(t *testing.T) { + openFile = mockFileByPath(map[string]string{ + "/pressure/memory": "some avg10=0.50 avg60=1.20 avg300=2.30 total=1234567890\nfull avg10=0.25 avg60=0.60 avg300=1.15 total=987654321\n", + }) + + some, full, err := parsePressureFile("/proc/pressure/memory") + require.NoError(t, err) + + require.NotNil(t, some) + assert.Equal(t, uint64(1234567890), some.total) + + require.NotNil(t, full) + assert.Equal(t, uint64(987654321), full.total) +} + +func TestParsePressureFileCPUOnly(t *testing.T) { + openFile = mockFileByPath(map[string]string{ + "/pressure/cpu": "some avg10=0.50 avg60=1.20 avg300=2.30 total=1234567890\n", + }) + + some, full, err := parsePressureFile("/proc/pressure/cpu") + require.NoError(t, err) + + require.NotNil(t, some) + assert.Equal(t, uint64(1234567890), some.total) + assert.Nil(t, full) +} + +func TestPressureCheckMalformedContent(t *testing.T) { + // Verify graceful degradation with corrupt/malformed PSI content. + // The check should emit what it can and skip unparseable lines. + openFile = mockFileByPath(map[string]string{ + "/pressure/cpu": "some avg10=0.50 avg60=1.20 avg300=2.30 total=notanumber\n", + "/pressure/memory": "", + "/pressure/io": "garbage line\nsome avg10=0.00 avg60=0.00 avg300=0.00 total=500\nunknown_type avg10=0.00 total=0\n", + }) + + pressureCheck := &Check{procPath: "/proc"} + mock := mocksender.NewMockSender(pressureCheck.ID()) + + // CPU: malformed total — line skipped, no metric emitted + // Memory: empty file — no lines parsed, no metrics + // IO: garbage line skipped, "some" parsed OK, unknown_type skipped + mock.On("MonotonicCount", "system.pressure.io.some.total", float64(500), "", []string(nil)).Return().Times(1) + mock.On("Commit").Return().Times(1) + + err := pressureCheck.Run() + require.NoError(t, err) + + mock.AssertExpectations(t) + mock.AssertNumberOfCalls(t, "MonotonicCount", 1) +} + +func TestParsePressureFileMissingTotal(t *testing.T) { + // PSI line with avg fields but no total= field + openFile = mockFileByPath(map[string]string{ + "/pressure/memory": "some avg10=0.50 avg60=1.20 avg300=2.30\nfull avg10=0.00 avg60=0.00 avg300=0.00 total=100\n", + }) + + some, full, err := parsePressureFile("/proc/pressure/memory") + require.NoError(t, err) + + // "some" line has no total= field — skipped + assert.Nil(t, some) + // "full" line has total — parsed + require.NotNil(t, full) + assert.Equal(t, uint64(100), full.total) +} + +func TestExtractTotal(t *testing.T) { + tests := []struct { + name string + fields []string + expected uint64 + wantErr bool + }{ + { + name: "standard PSI fields", + fields: []string{"avg10=0.50", "avg60=1.20", "avg300=2.30", "total=1234567890"}, + expected: 1234567890, + }, + { + name: "total only", + fields: []string{"total=42"}, + expected: 42, + }, + { + name: "no total field", + fields: []string{"avg10=0.50", "avg60=1.20"}, + wantErr: true, + }, + { + name: "empty fields", + fields: []string{}, + wantErr: true, + }, + { + name: "malformed total value", + fields: []string{"avg10=0.50", "total=notanumber"}, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + val, err := extractTotal(tt.fields) + if tt.wantErr { + assert.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tt.expected, val) + } + }) + } +} diff --git a/pkg/collector/corechecks/system/pressure/pressure_stub.go b/pkg/collector/corechecks/system/pressure/pressure_stub.go new file mode 100644 index 00000000000000..c33f7cbc1e9e0a --- /dev/null +++ b/pkg/collector/corechecks/system/pressure/pressure_stub.go @@ -0,0 +1,23 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build !linux + +package pressure + +import ( + "github.com/DataDog/datadog-agent/pkg/collector/check" + "github.com/DataDog/datadog-agent/pkg/util/option" +) + +const ( + // CheckName is the name of the check + CheckName = "pressure" +) + +// Factory returns None on non-Linux platforms since PSI is Linux-only +func Factory() option.Option[func() check.Check] { + return option.None[func() check.Check]() +} diff --git a/pkg/commonchecks/corechecks.go b/pkg/commonchecks/corechecks.go index be70b5da108c04..bf0f0eddd6b08b 100644 --- a/pkg/commonchecks/corechecks.go +++ b/pkg/commonchecks/corechecks.go @@ -60,6 +60,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/collector/corechecks/system/disk/io" "github.com/DataDog/datadog-agent/pkg/collector/corechecks/system/filehandles" "github.com/DataDog/datadog-agent/pkg/collector/corechecks/system/memory" + "github.com/DataDog/datadog-agent/pkg/collector/corechecks/system/pressure" "github.com/DataDog/datadog-agent/pkg/collector/corechecks/system/uptime" "github.com/DataDog/datadog-agent/pkg/collector/corechecks/system/wincrashdetect" "github.com/DataDog/datadog-agent/pkg/collector/corechecks/system/windowscertificate" @@ -131,6 +132,7 @@ func RegisterChecks(store workloadmeta.Component, filterStore workloadfilter.Com corecheckLoader.RegisterCheck(versa.CheckName, versa.Factory()) corecheckLoader.RegisterCheck(ncm.CheckName, ncm.Factory(cfg)) corecheckLoader.RegisterCheck(battery.CheckName, battery.Factory()) + corecheckLoader.RegisterCheck(pressure.CheckName, pressure.Factory()) registerSystemProbeChecks(tagger) } diff --git a/pkg/commonchecks/corechecks_sysprobe.go b/pkg/commonchecks/corechecks_sysprobe.go index e2026a9c0290b9..a79eda0d0368a1 100644 --- a/pkg/commonchecks/corechecks_sysprobe.go +++ b/pkg/commonchecks/corechecks_sysprobe.go @@ -13,8 +13,9 @@ import ( tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def" corecheckLoader "github.com/DataDog/datadog-agent/pkg/collector/corechecks" "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf" - "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/noisyneighbor" + "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/lockcontentioncheck" "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/oomkill" + "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/syscalllatencycheck" "github.com/DataDog/datadog-agent/pkg/collector/corechecks/ebpf/tcpqueuelength" ) @@ -22,5 +23,6 @@ func registerSystemProbeChecks(tagger tagger.Component) { corecheckLoader.RegisterCheck(ebpf.CheckName, ebpf.Factory()) corecheckLoader.RegisterCheck(oomkill.CheckName, oomkill.Factory(tagger)) corecheckLoader.RegisterCheck(tcpqueuelength.CheckName, tcpqueuelength.Factory(tagger)) - corecheckLoader.RegisterCheck(noisyneighbor.CheckName, noisyneighbor.Factory(tagger)) + corecheckLoader.RegisterCheck(lockcontentioncheck.CheckName, lockcontentioncheck.Factory()) + corecheckLoader.RegisterCheck(syscalllatencycheck.CheckName, syscalllatencycheck.Factory()) } diff --git a/pkg/config/setup/system_probe.go b/pkg/config/setup/system_probe.go index 6712927c0366ba..1a05fa080a9368 100644 --- a/pkg/config/setup/system_probe.go +++ b/pkg/config/setup/system_probe.go @@ -359,6 +359,13 @@ func InitSystemProbeConfig(cfg pkgconfigmodel.Setup) { // Logon Duration config (macOS) cfg.BindEnvAndSetDefault("logon_duration.enabled", false) + // Lock Contention Check config + cfg.BindEnvAndSetDefault("lock_contention_check.enabled", false) + + // Syscall Latency Check config + cfg.BindEnvAndSetDefault("syscall_latency_check.enabled", false) + + // Fleet policies cfg.BindEnv("fleet_policies_dir") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' diff --git a/pkg/serverless/trace/testdata/install.json b/pkg/serverless/trace/testdata/install.json new file mode 100644 index 00000000000000..369e157999a77c --- /dev/null +++ b/pkg/serverless/trace/testdata/install.json @@ -0,0 +1 @@ +{"install_id":"6bd7b60d-69e1-473f-8844-c54d34aa50a5","install_type":"manual","install_time":1773414141} \ No newline at end of file diff --git a/pkg/system-probe/config/config.go b/pkg/system-probe/config/config.go index 7a2036298c7084..ab88cf5dbd5fe5 100644 --- a/pkg/system-probe/config/config.go +++ b/pkg/system-probe/config/config.go @@ -45,6 +45,8 @@ const ( InjectorModule types.ModuleName = "injector" NoisyNeighborModule types.ModuleName = "noisy_neighbor" LogonDurationModule types.ModuleName = "logon_duration" + LockContentionCheckModule types.ModuleName = "lock_contention_check" + SyscallLatencyCheckModule types.ModuleName = "syscall_latency_check" ) // New creates a config object for system-probe. It assumes no configuration has been loaded as this point. @@ -192,6 +194,12 @@ func load() (*types.Config, error) { if runtime.GOOS == "darwin" && cfg.GetBool(logonDurationNS("enabled")) { c.EnabledModules[LogonDurationModule] = struct{}{} } + if cfg.GetBool(NSkey("lock_contention_check", "enabled")) { + c.EnabledModules[LockContentionCheckModule] = struct{}{} + } + if cfg.GetBool(NSkey("syscall_latency_check", "enabled")) { + c.EnabledModules[SyscallLatencyCheckModule] = struct{}{} + } if cfg.GetBool(wcdNS("enabled")) { c.EnabledModules[WindowsCrashDetectModule] = struct{}{} diff --git a/tasks/agent.py b/tasks/agent.py index 82d9a2cda71c0b..1ea578c893e0e8 100644 --- a/tasks/agent.py +++ b/tasks/agent.py @@ -93,6 +93,8 @@ "network_config_management", "battery", "cloud_hostinfo", + "pressure", + "lock_contention", ] WINDOWS_CORECHECKS = [ diff --git a/tasks/system_probe.py b/tasks/system_probe.py index f2f918c356e4af..d29b1c85d3c248 100644 --- a/tasks/system_probe.py +++ b/tasks/system_probe.py @@ -160,6 +160,250 @@ def ninja_define_exe_compiler(nw: NinjaWriter, compiler='clang'): ) +def ninja_security_ebpf_programs( + nw: NinjaWriter, build_dir: Path, debug: bool, kernel_release: str | None, arch: Arch | None = None +): + security_agent_c_dir = os.path.join("pkg", "security", "ebpf", "c") + security_agent_prebuilt_dir_include = os.path.join(security_agent_c_dir, "include") + security_agent_prebuilt_dir = os.path.join(security_agent_c_dir, "prebuilt") + + kernel_headers = get_linux_header_dirs( + kernel_release=kernel_release, minimal_kernel_release=CWS_PREBUILT_MINIMUM_KERNEL_VERSION, arch=arch + ) + kheaders = " ".join(f"-isystem{d}" for d in kernel_headers) + debugdef = "-DDEBUG=1" if debug else "" + security_flags = f"-g -I{security_agent_prebuilt_dir_include} {debugdef}" + + outfiles = [] + + # basic + infile = os.path.join(security_agent_prebuilt_dir, "probe.c") + outfile = os.path.join(build_dir, "runtime-security.o") + ninja_ebpf_program( + nw, + infile=infile, + outfile=outfile, + variables={ + "flags": security_flags, + "kheaders": kheaders, + }, + ) + outfiles.append(outfile) + + # syscall wrapper + root, ext = os.path.splitext(outfile) + syscall_wrapper_outfile = f"{root}-syscall-wrapper{ext}" + ninja_ebpf_program( + nw, + infile=infile, + outfile=syscall_wrapper_outfile, + variables={ + "flags": security_flags + " -DUSE_SYSCALL_WRAPPER", + "kheaders": kheaders, + }, + ) + outfiles.append(syscall_wrapper_outfile) + + # fentry + syscall wrapper + root, ext = os.path.splitext(outfile) + syscall_wrapper_outfile = f"{root}-fentry{ext}" + ninja_ebpf_program( + nw, + infile=infile, + outfile=syscall_wrapper_outfile, + variables={ + "flags": security_flags + " -DUSE_SYSCALL_WRAPPER -DUSE_FENTRY", + "kheaders": kheaders, + }, + ) + outfiles.append(syscall_wrapper_outfile) + + # offset guesser + offset_guesser_outfile = os.path.join(build_dir, "runtime-security-offset-guesser.o") + ninja_ebpf_program( + nw, + infile=os.path.join(security_agent_prebuilt_dir, "offset-guesser.c"), + outfile=offset_guesser_outfile, + variables={ + "flags": security_flags, + "kheaders": kheaders, + }, + ) + outfiles.append(offset_guesser_outfile) + + nw.build(rule="phony", inputs=outfiles, outputs=["cws"]) + + +def ninja_network_ebpf_program(nw: NinjaWriter, infile, outfile, flags): + ninja_ebpf_program(nw, infile, outfile, {"flags": flags}) + root, ext = os.path.splitext(outfile) + ninja_ebpf_program(nw, infile, f"{root}-debug{ext}", {"flags": flags + " -DDEBUG=1"}) + + +def ninja_telemetry_ebpf_co_re_programs(nw, infile, outfile, flags): + ninja_ebpf_co_re_program(nw, infile, outfile, {"flags": flags}) + root, ext = os.path.splitext(outfile) + + +def ninja_telemetry_ebpf_programs(nw, build_dir, co_re_build_dir): + src_dir = os.path.join("pkg", "ebpf", "c") + + telemetry_co_re_programs = [ + "lock_contention", + "ksyms_iter", + ] + for prog in telemetry_co_re_programs: + infile = os.path.join(src_dir, f"{prog}.c") + outfile = os.path.join(co_re_build_dir, f"{prog}.c") + + co_re_flags = [f"-I{src_dir}"] + ninja_telemetry_ebpf_co_re_programs(nw, infile, outfile, ' '.join(co_re_flags)) + + +def ninja_network_ebpf_co_re_program(nw: NinjaWriter, infile, outfile, flags): + ninja_ebpf_co_re_program(nw, infile, outfile, {"flags": flags}) + root, ext = os.path.splitext(outfile) + ninja_ebpf_co_re_program(nw, infile, f"{root}-debug{ext}", {"flags": flags + " -DDEBUG=1"}) + + +def ninja_network_ebpf_programs(nw: NinjaWriter, build_dir, co_re_build_dir): + network_bpf_dir = os.path.join("pkg", "network", "ebpf") + network_c_dir = os.path.join(network_bpf_dir, "c") + + network_flags = "-Ipkg/network/ebpf/c -g" + network_programs = [ + "prebuilt/dns", + "prebuilt/offset-guess", + "tracer", + "prebuilt/usm", + "prebuilt/usm_events_test", + "prebuilt/shared-libraries", + "prebuilt/conntrack", + ] + + network_co_re_programs = [ + "tracer", + "co-re/tracer-fentry", + "runtime/usm", + "runtime/shared-libraries", + "runtime/conntrack", + ] + + for prog in network_programs: + infile = os.path.join(network_c_dir, f"{prog}.c") + outfile = os.path.join(build_dir, f"{os.path.basename(prog)}.o") + ninja_network_ebpf_program(nw, infile, outfile, network_flags) + + for prog_path in network_co_re_programs: + prog = os.path.basename(prog_path) + src_dir = os.path.join(network_c_dir, os.path.dirname(prog_path)) + network_co_re_flags = f"-I{src_dir} -Ipkg/network/ebpf/c" + + infile = os.path.join(src_dir, f"{prog}.c") + outfile = os.path.join(co_re_build_dir, f"{prog}.o") + ninja_network_ebpf_co_re_program(nw, infile, outfile, network_co_re_flags) + + +def ninja_kernel_bug_binaries(nw: NinjaWriter, arch: str | Arch): + arch = Arch.from_str(arch) + + # do not build for arm64 + if arch == ARCH_ARM64: + return + + ebpf_c_dir = os.path.join("pkg", "ebpf", "kernelbugs", "c") + embedded_bins = ["detect-seccomp-bug"] + + for binary in embedded_bins: + infile = os.path.join(ebpf_c_dir, f"{binary}.c") + outfile = os.path.join(ebpf_c_dir, binary) + cc = "gcc" + + nw.build( + inputs=[infile], + outputs=[outfile], + rule="cbin", + variables={"cc": cc, "cflags": "-static", "ldflags": "-lseccomp"}, + ) + + +def ninja_test_ebpf_program(nw: NinjaWriter, build_dir, ebpf_c_dir, test_flags, prog): + infile = os.path.join(ebpf_c_dir, f"{prog}.c") + outfile = os.path.join(build_dir, f"{os.path.basename(prog)}.o") + ninja_ebpf_co_re_program(nw, infile, outfile, {"flags": test_flags}) + + +def ninja_test_ebpf_programs(nw: NinjaWriter, build_dir): + ebpf_bpf_dir = os.path.join("pkg", "ebpf") + ebpf_c_dir = os.path.join(ebpf_bpf_dir, "testdata", "c") + test_flags = "-g -DDEBUG=1" + + test_programs = ["logdebug-test", "error_telemetry", "uprobe_attacher-test"] + + for prog in test_programs: + ninja_test_ebpf_program(nw, build_dir, ebpf_c_dir, test_flags, prog) + + # System-probe ebpf subcommand test programs + ebpf_subcommand_test_c_dir = os.path.join("cmd", "system-probe", "subcommands", "ebpf", "testdata") + ebpf_subcommand_test_programs = ["btf_test"] + + for prog in ebpf_subcommand_test_programs: + ninja_test_ebpf_program(nw, build_dir, ebpf_subcommand_test_c_dir, test_flags, prog) + + +def ninja_kernel_bugs_ebpf_programs(nw: NinjaWriter): + build_dir = os.path.join("pkg", "ebpf", "kernelbugs", "c") + ninja_test_ebpf_program(nw, build_dir, build_dir, "", "uprobe-trigger") + + +def ninja_gpu_ebpf_programs(nw: NinjaWriter, co_re_build_dir: Path | str): + gpu_headers_dir = Path("pkg/gpu/ebpf/c") + gpu_c_dir = gpu_headers_dir / "runtime" + gpu_flags = f"-I{gpu_headers_dir} -I{gpu_c_dir} -Ipkg/network/ebpf/c" + gpu_programs = ["gpu"] + + for prog in gpu_programs: + infile = os.path.join(gpu_c_dir, f"{prog}.c") + outfile = os.path.join(co_re_build_dir, f"{prog}.o") + ninja_ebpf_co_re_program(nw, infile, outfile, {"flags": gpu_flags}) + root, ext = os.path.splitext(outfile) + ninja_ebpf_co_re_program(nw, infile, f"{root}-debug{ext}", {"flags": gpu_flags + " -DDEBUG=1"}) + + +def ninja_container_integrations_ebpf_programs(nw: NinjaWriter, co_re_build_dir): + container_integrations_co_re_dir = os.path.join("pkg", "collector", "corechecks", "ebpf", "c", "runtime") + container_integrations_co_re_flags = f"-I{container_integrations_co_re_dir}" + container_integrations_co_re_programs = [ + "oom-kill", + "tcp-queue-length", + "ebpf", + "lock-contention-check", + "syscall-latency", + ] + + for prog in container_integrations_co_re_programs: + infile = os.path.join(container_integrations_co_re_dir, f"{prog}-kern.c") + outfile = os.path.join(co_re_build_dir, f"{prog}.o") + ninja_ebpf_co_re_program(nw, infile, outfile, {"flags": container_integrations_co_re_flags}) + root, ext = os.path.splitext(outfile) + ninja_ebpf_co_re_program( + nw, infile, f"{root}-debug{ext}", {"flags": container_integrations_co_re_flags + " -DDEBUG=1"} + ) + + +def ninja_dynamic_instrumentation_ebpf_programs(nw: NinjaWriter, co_re_build_dir): + dir = Path("pkg/dyninst/ebpf") + flags = f"-I{dir}" + programs = ["event"] + + for prog in programs: + infile = os.path.join(dir, f"{prog}.c") + outfile = os.path.join(co_re_build_dir, f"dyninst_{prog}.o") + ninja_ebpf_co_re_program(nw, infile, outfile, {"flags": flags}) + root, ext = os.path.splitext(outfile) + ninja_ebpf_co_re_program(nw, infile, f"{root}-debug{ext}", {"flags": flags + " -DDYNINST_DEBUG=1"}) + + def ninja_runtime_compilation_files(nw: NinjaWriter, gobin): bc_dir = os.path.join("pkg", "ebpf", "bytecode") build_dir = os.path.join(bc_dir, "build") @@ -219,6 +463,145 @@ def ninja_runtime_compilation_files(nw: NinjaWriter, gobin): ) +def ninja_cgo_type_files(nw: NinjaWriter): + # TODO we could probably preprocess the input files to find out the dependencies + nw.pool(name="cgo_pool", depth=1) + if is_windows: + go_platform = "windows" + def_files = { + "pkg/network/driver/types.go": [ + "pkg/network/driver/ddnpmapi.h", + ], + "pkg/windowsdriver/procmon/types.go": [ + "pkg/windowsdriver/include/procmonapi.h", + ], + } + nw.rule( + name="godefs", + pool="cgo_pool", + command="powershell -Command \"$$PSDefaultParameterValues['Out-File:Encoding'] = 'ascii';" + + "(cd $in_dir);" + + "(go tool cgo -godefs -- -fsigned-char $in_file | " + + "go run $script_path | Out-File -encoding ascii $out_file);" + + "exit $$LastExitCode\"", + ) + else: + go_platform = "linux" + def_files = { + "pkg/network/ebpf/conntrack_types.go": ["pkg/network/ebpf/c/conntrack/types.h"], + "pkg/network/ebpf/tuple_types.go": ["pkg/network/ebpf/c/tracer/tracer.h"], + "pkg/network/ebpf/kprobe_types.go": [ + "pkg/network/ebpf/c/tracer/tracer.h", + "pkg/network/ebpf/c/tcp_states.h", + "pkg/network/ebpf/c/prebuilt/offset-guess.h", + "pkg/network/ebpf/c/protocols/classification/defs.h", + ], + "pkg/network/protocols/ebpf_types.go": [ + "pkg/network/ebpf/c/protocols/postgres/types.h", + ], + "pkg/network/protocols/http/gotls/go_tls_types.go": [ + "pkg/network/ebpf/c/protocols/tls/go-tls-types.h", + ], + "pkg/network/protocols/http/types.go": [ + "pkg/network/ebpf/c/tracer/tracer.h", + "pkg/network/ebpf/c/protocols/http/types.h", + "pkg/network/ebpf/c/protocols/classification/defs.h", + ], + "pkg/network/protocols/http2/types.go": [ + "pkg/network/ebpf/c/tracer/tracer.h", + "pkg/network/ebpf/c/protocols/http2/decoding-defs.h", + ], + "pkg/network/protocols/kafka/types.go": [ + "pkg/network/ebpf/c/tracer/tracer.h", + "pkg/network/ebpf/c/protocols/kafka/types.h", + "pkg/network/ebpf/c/protocols/kafka/defs.h", + ], + "pkg/network/protocols/postgres/ebpf/types.go": [ + "pkg/network/ebpf/c/protocols/postgres/types.h", + ], + "pkg/network/protocols/redis/types.go": [ + "pkg/network/ebpf/c/protocols/redis/types.h", + ], + "pkg/network/protocols/tls/types.go": [ + "pkg/network/ebpf/c/protocols/tls/tags-types.h", + ], + "pkg/ebpf/telemetry/types.go": [ + "pkg/ebpf/c/telemetry_types.h", + ], + "pkg/network/tracer/offsetguess/offsetguess_types.go": [ + "pkg/network/ebpf/c/prebuilt/offset-guess.h", + ], + "pkg/network/protocols/events/types.go": [ + "pkg/network/ebpf/c/protocols/events-types.h", + ], + "pkg/collector/corechecks/ebpf/probe/tcpqueuelength/tcp_queue_length_kern_types.go": [ + "pkg/collector/corechecks/ebpf/c/runtime/tcp-queue-length-kern-user.h", + ], + "pkg/network/usm/sharedlibraries/types.go": [ + "pkg/network/ebpf/c/shared-libraries/types.h", + ], + "pkg/collector/corechecks/ebpf/probe/ebpfcheck/c_types.go": [ + "pkg/collector/corechecks/ebpf/c/runtime/ebpf-kern-user.h" + ], + "pkg/collector/corechecks/ebpf/probe/oomkill/c_types.go": [ + "pkg/collector/corechecks/ebpf/c/runtime/oom-kill-kern-user.h", + ], + "pkg/ebpf/types.go": [ + "pkg/ebpf/c/lock_contention.h", + ], + "pkg/gpu/ebpf/kprobe_types.go": [ + "pkg/gpu/ebpf/c/types.h", + ], + "pkg/collector/corechecks/ebpf/probe/lockcontentioncheck/ebpf_types.go": [ + "pkg/collector/corechecks/ebpf/c/runtime/lock-contention-check-kern-user.h" + ], + "pkg/dyninst/output/framing.go": [ + "pkg/dyninst/ebpf/framing.h", + ], + "pkg/dyninst/loader/types.go": [ + "pkg/dyninst/ebpf/types.h", + ], + } + # TODO this uses the system clang, rather than the version-pinned copy we ship. Will this cause problems? + # It is only generating cgo type definitions and changes are reviewed, so risk is low + nw.rule( + name="godefs", + pool="cgo_pool", + command="cd $in_dir && " + + "CC=clang go tool cgo -godefs -- $rel_import -fsigned-char $in_file | " + + "go run $script_path $tests_file $package_name > $out_file", + ) + + script_path = os.path.join(os.getcwd(), "pkg", "ebpf", "cgo", "genpost.go") + for f, headers in def_files.items(): + in_dir, in_file = os.path.split(f) + in_base, _ = os.path.splitext(in_file) + out_file = f"{in_base}_{go_platform}.go" + rel_import = f"-I {os.path.relpath('pkg/network/ebpf/c', in_dir)} -I {os.path.relpath('pkg/ebpf/c', in_dir)}" + tests_file = "" + package_name = "" + outputs = [os.path.join(in_dir, out_file)] + if go_platform == "linux": + tests_file = f"{in_base}_{go_platform}_test" + package_name = os.path.basename(in_dir) + outputs.append(os.path.join(in_dir, f"{tests_file}.go")) + nw.build( + inputs=[f], + outputs=outputs, + rule="godefs", + implicit=headers + [script_path], + variables={ + "in_dir": in_dir, + "in_file": in_file, + "out_file": out_file, + "script_path": script_path, + "rel_import": rel_import, + "tests_file": tests_file, + "package_name": package_name, + }, + ) + + def ninja_generate( ctx: Context, ninja_path,