From 85defc197a8ad58e7b3f60d4143cdf0937587f5f Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Wed, 3 Sep 2025 19:02:03 +0800 Subject: [PATCH 1/2] libbpf-tools: Add cgroup_helpers to get cgroup info Some tools need to obtain cgroup information. For example, oomkill currently only supports tracking process information and cannot obtain cgroup information. It would be better if it could obtain memcg information. For ease of maintenance, a separate commit is kept just for adding cgroup_helpers. Added interfaces: cgroup_cgroupid_of_path() - Get cgroupid from cgroup absolute path get_cgroupid_path() - Get cgroup path from cgroupid Signed-off-by: Rong Tao --- libbpf-tools/Makefile | 1 + libbpf-tools/cgroup_helpers.c | 241 ++++++++++++++++++++++++++++++++++ libbpf-tools/cgroup_helpers.h | 19 +++ 3 files changed, 261 insertions(+) create mode 100644 libbpf-tools/cgroup_helpers.c create mode 100644 libbpf-tools/cgroup_helpers.h diff --git a/libbpf-tools/Makefile b/libbpf-tools/Makefile index a442da985a6e..573d1fff4b45 100644 --- a/libbpf-tools/Makefile +++ b/libbpf-tools/Makefile @@ -106,6 +106,7 @@ SIGSNOOP_ALIAS = killsnoop APP_ALIASES = $(FSDIST_ALIASES) $(FSSLOWER_ALIASES) ${SIGSNOOP_ALIAS} COMMON_OBJ = \ + $(OUTPUT)/cgroup_helpers.o \ $(OUTPUT)/trace_helpers.o \ $(OUTPUT)/syscall_helpers.o \ $(OUTPUT)/errno_helpers.o \ diff --git a/libbpf-tools/cgroup_helpers.c b/libbpf-tools/cgroup_helpers.c new file mode 100644 index 000000000000..ecb16cc94746 --- /dev/null +++ b/libbpf-tools/cgroup_helpers.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2025 Rong Tao */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cgroup_helpers.h" + + +/** + * Get cgroup mountpoints. + * + * @roots need to be free with cgroup_free_roots(), access with roots[idx]. + * + * On success, the number of root path returned, need pass to cgroup_free_roots() + * nentries parameter. On error, -errno returned. + */ +static int cgroup_get_roots(char ***roots) +{ + char line[2048], fsname[128], mntpoint[PATH_MAX], fstype[64], mntopt[256]; + int dump_frequency, fsck_order, n; + FILE *fp; + + n = 0; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return -errno; + + *roots = NULL; + + while (fgets(line, sizeof(line), fp)) { + if (sscanf(line, "%s %s %s %s %d %d\n", fsname, mntpoint, + fstype, mntopt, &dump_frequency, + &fsck_order) != 6) + continue; + + /* Only need cgroup or cgroup2 */ + if (strcmp(fstype, "cgroup") && strcmp(fstype, "cgroup2")) + continue; + + n++; + *roots = (char **)realloc(*roots, n * sizeof(char *)); + (*roots)[n - 1] = strdup(mntpoint); + } + + fclose(fp); + + return n; +} + +/** + * Used to release roots allocated by cgroup_get_roots(). + * + * On success, zero returned. On error, -errno returned. + */ +static int cgroup_free_roots(char **roots, int nentries) +{ + int i; + + if (!roots) + return -EINVAL; + + for (i = 0; i < nentries; i++) + free(roots[i]); + free(roots); + + return 0; +} + +long cgroup_cgroupid_of_path(const char *cgroup_path) +{ + int err; + struct stat st; + /* The inode of the cgroup folder is the groupid */ + err = stat(cgroup_path, &st); + return err ? -errno : st.st_ino; +} + +typedef int (*match_fn)(const char *path, void *arg); + +/** + * Recursively traverse all directories under the known cgroup root for + * matching. + * + * When @match returns true, the match succeeds and the function returns + * without further searching. + * + * If the match is successful, 1 is returned. If the match fails, 0 is returned. + * If an error occurs during the search process, -errno is returned. + */ +static int find_cgroup_from_root_recur(const char *root, match_fn match, + void *arg) +{ + int err = 0; + DIR *dir; + struct dirent *dirent; + char *path; + struct stat st; + size_t path_len; + + assert(match && "match_fn is NULL"); + + if (!root) + return -EINVAL; + + err = lstat(root, &st); + if (err) + return -errno; + if (!S_ISDIR(st.st_mode)) + return -ENOTDIR; + + path = malloc(PATH_MAX); + if (!path) + return -errno; + + snprintf(path, PATH_MAX - 1, "%s/", root); + + err = lstat(path, &st); + if (err) + return -errno; + if (!S_ISDIR(st.st_mode)) { + free(path); + return -ENOENT; + } + + dir = opendir(path); + if (!dir) { + err = -errno; + goto done; + } + + path_len = strlen(path); + + /** + * If the directory path doesn't end with a slash, append a slash, + * convenient for splicing subdirectories. + */ + if (path[path_len - 1] != '/') { + path[path_len] = '/'; + path[++path_len] = '\0'; + } + + /** + * Traverse all folders under the root directory, skipping the current + * directory and the previous directory. + */ + while ((dirent = readdir(dir)) != NULL) { + if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, "..")) + continue; + strncpy(path + path_len, dirent->d_name, PATH_MAX - path_len); + err = lstat(path, &st); + if (err) + continue; + if (!S_ISDIR(st.st_mode)) + continue; +#ifdef DEBUG + fprintf(stderr, "%s\n", path); +#endif + if (match(path, arg)) { + /* Found */ + err = 1; + goto done; + } + + /** + * Recursive search. Returning 1 means it was found, return + * -errno means an error occurred, and returning 0 means it + * was not found and should continue searching. + */ + err = find_cgroup_from_root_recur(path, match, arg); + if (err) + goto done; + } + + /* Not found */ + err = 0; +done: + closedir(dir); + free(path); + return err; +} + +struct match_cgroupid_arg { + long cgroupid; + char path[PATH_MAX]; +}; + +/** + * As the @match parameter of the find_cgroup_from_root_recur() function, + * the cgroup path is found by cgroupid. + */ +static int match_cgroupid(const char *path, void *arg) +{ + long cgroupid; + struct match_cgroupid_arg *a = arg; + + cgroupid = cgroup_cgroupid_of_path(path); +#ifdef DEBUG + fprintf(stderr, "%ld:%ld %s\n", cgroupid, a->cgroupid, path); +#endif + if (cgroupid == a->cgroupid) { + snprintf(a->path, PATH_MAX, path); + return 1; + } + return 0; +} + +int get_cgroupid_path(long cgroupid, char *buf, size_t buf_len) +{ + char **roots = 0; + int nroots, i, err; + struct match_cgroupid_arg arg = {}; + bool found = false; + + arg.cgroupid = cgroupid; + + nroots = cgroup_get_roots(&roots); + + for (i = 0; i < nroots; i++) { +#ifdef DEBUG + fprintf(stderr, "root --- %s\n", roots[i]); +#endif + err = find_cgroup_from_root_recur(roots[i], match_cgroupid, &arg); + if (err == 1) { + strncpy(buf, arg.path, buf_len); + found = true; + break; + } + } + + cgroup_free_roots(roots, nroots); + + return found ? 0 : -ENOENT; +} diff --git a/libbpf-tools/cgroup_helpers.h b/libbpf-tools/cgroup_helpers.h new file mode 100644 index 000000000000..c7e03bf8ad7f --- /dev/null +++ b/libbpf-tools/cgroup_helpers.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2025 Rong Tao */ +#pragma once +#include + + +/** + * Get cgroup id from cgroup path. + * + * On success, the cgroupid returned. On error, -errno returned. + */ +long cgroup_cgroupid_of_path(const char *cgroup_path); + +/** + * Get cgroup path from cgroupid. + * + * On success, zero returned. On error, -errno returned. + */ +int get_cgroupid_path(long cgroupid, char *buf, size_t buf_len); From 3ec6c727768d05b231e56111752e7b0dee4ad89e Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Wed, 3 Sep 2025 19:03:32 +0800 Subject: [PATCH 2/2] libbpf-tools: oomkill: support display memory cgroup Using a simple test program (not shown) called OOM, we performed the following two tests: 1. Allocating unlimited memory 2. Adding the process to a cgroup named oom-memcg and limiting its memory usage to 200MB. When we do not print cgroup information, we can only see process information and cannot see the difference between memcg and non-memcg. $ sudo ./oomkill Tracing OOM kills... Ctrl-C to stop. 14:28:23 Triggered by PID 179201 ("oom"), OOM kill of PID 179201 ("oom"), 6114610 pages, loadavg: loadavg: 0.56 0.51 0.38 2/968 179204 14:28:42 Triggered by PID 179212 ("oom"), OOM kill of PID 179212 ("oom"), 51200 pages, loadavg: loadavg: 0.40 0.47 0.37 3/968 179212 The function implemented by this patch can clearly display cgroup information. $ sudo ./oomkill -c Tracing OOM kills... Ctrl-C to stop. 14:32:59 Triggered by PID 179879 ("oom"), CGROUP 8309 ("/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/session.slice/org.gnome.Shell@wayland.service"), OOM kill of PID 179879 ("oom"), 6114610 pages, loadavg: loadavg: 0.50 0.38 0.35 4/970 179879 14:33:14 Triggered by PID 179884 ("oom"), CGROUP 122547 ("/sys/fs/cgroup/oom-memcg"), MEMCG 122547 ("/sys/fs/cgroup/oom-memcg"), OOM kill of PID 179884 ("oom"), 51200 pages, loadavg: loadavg: 0.47 0.38 0.35 3/971 179884 Link: https://github.com/iovisor/bcc/pull/5384 Signed-off-by: Rong Tao --- libbpf-tools/oomkill.bpf.c | 16 ++++++++++++++++ libbpf-tools/oomkill.c | 33 ++++++++++++++++++++++++++------- libbpf-tools/oomkill.h | 2 ++ 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/libbpf-tools/oomkill.bpf.c b/libbpf-tools/oomkill.bpf.c index 4c722ffd5e17..b0e5c65afdcd 100644 --- a/libbpf-tools/oomkill.bpf.c +++ b/libbpf-tools/oomkill.bpf.c @@ -8,10 +8,16 @@ #include "compat.bpf.h" #include "oomkill.h" +/* linux:include/linux/memcontrol.h */ +struct mem_cgroup { + struct cgroup_subsys_state css; +}; + SEC("kprobe/oom_kill_process") int BPF_KPROBE(oom_kill_process, struct oom_control *oc, const char *message) { struct data_t *data; + struct mem_cgroup *memcg; data = reserve_buf(sizeof(*data)); if (!data) @@ -20,6 +26,16 @@ int BPF_KPROBE(oom_kill_process, struct oom_control *oc, const char *message) data->fpid = bpf_get_current_pid_tgid() >> 32; data->tpid = BPF_CORE_READ(oc, chosen, tgid); data->pages = BPF_CORE_READ(oc, totalpages); + data->cgroupid = bpf_get_current_cgroup_id(); + + /* Get the memory cgroup id */ + memcg = BPF_CORE_READ(oc, memcg); + if (memcg) { + struct cgroup *cgrp = BPF_CORE_READ(memcg, css.cgroup); + data->mem_cgroupid = BPF_CORE_READ(cgrp, kn, id); + } else + data->mem_cgroupid = 0; + bpf_get_current_comm(&data->fcomm, sizeof(data->fcomm)); bpf_probe_read_kernel(&data->tcomm, sizeof(data->tcomm), BPF_CORE_READ(oc, chosen, comm)); submit_buf(ctx, data, sizeof(*data)); diff --git a/libbpf-tools/oomkill.c b/libbpf-tools/oomkill.c index 1dd520d42f64..23679216b2e2 100644 --- a/libbpf-tools/oomkill.c +++ b/libbpf-tools/oomkill.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) // Copyright (c) 2022 Jingxiang Zeng // Copyright (c) 2022 Krisztian Fekete +// Copyright (c) 2025 Rong Tao // // Based on oomkill(8) from BCC by Brendan Gregg. // 13-Jan-2022 Jingxiang Zeng Created this. // 17-Oct-2022 Krisztian Fekete Edited this. +// 03-Aug-2025 Rong Tao Support display cgroup. #include #include #include @@ -20,13 +22,15 @@ #include "compat.h" #include "oomkill.h" #include "btf_helpers.h" +#include "cgroup_helpers.h" #include "trace_helpers.h" static volatile sig_atomic_t exiting = 0; static bool verbose = false; +static bool display_cgroup = false; -const char *argp_program_version = "oomkill 0.1"; +const char *argp_program_version = "oomkill 0.2"; const char *argp_program_bug_address = "https://github.com/iovisor/bcc/tree/master/libbpf-tools"; const char argp_program_doc[] = @@ -35,10 +39,12 @@ const char argp_program_doc[] = "USAGE: oomkill [-h]\n" "\n" "EXAMPLES:\n" -" oomkill # trace OOM kills\n"; +" oomkill # trace OOM kills\n" +" oomkill --cgroup # trace OOM kills with cgroup display\n"; static const struct argp_option opts[] = { { "verbose", 'v', NULL, 0, "Verbose debug output", 0 }, + { "cgroup", 'c', NULL, 0, "Display cgroup information", 0 }, { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help", 0 }, {}, }; @@ -46,6 +52,9 @@ static const struct argp_option opts[] = { static error_t parse_arg(int key, char *arg, struct argp_state *state) { switch (key) { + case 'c': + display_cgroup = true; + break; case 'v': verbose = true; break; @@ -60,18 +69,28 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) static int handle_event(void *ctx, void *data, size_t len) { - char loadavg[256]; + char loadavg[256], cgroup_path[PATH_MAX]; char ts[32]; struct data_t *e = data; str_timestamp("%H:%M:%S", ts, sizeof(ts)); + printf("%s Triggered by PID %d (\"%s\"),", ts, e->tpid, e->tcomm); + if (display_cgroup) { + get_cgroupid_path(e->cgroupid, cgroup_path, sizeof(cgroup_path)); + printf(" CGROUP %lld (\"%s\"),", e->cgroupid, cgroup_path); + if (e->mem_cgroupid) { + if (e->mem_cgroupid != e->cgroupid) + get_cgroupid_path(e->mem_cgroupid, cgroup_path, + sizeof(cgroup_path)); + printf(" MEMCG %lld (\"%s\"),", e->mem_cgroupid, cgroup_path); + } + } + printf(" OOM kill of PID %d (\"%s\"), %lld pages", e->tpid, e->tcomm, e->pages); if (str_loadavg(loadavg, sizeof(loadavg)) > 0) - printf("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\"), %lld pages, loadavg: %s", - ts, e->fpid, e->fcomm, e->tpid, e->tcomm, e->pages, loadavg); + printf(", loadavg: %s\n", loadavg); else - printf("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\"), %lld pages\n", - ts, e->fpid, e->fcomm, e->tpid, e->tcomm, e->pages); + printf("\n"); return 0; } diff --git a/libbpf-tools/oomkill.h b/libbpf-tools/oomkill.h index 086099d5ebc0..3a89a9e9a0fd 100644 --- a/libbpf-tools/oomkill.h +++ b/libbpf-tools/oomkill.h @@ -8,6 +8,8 @@ struct data_t { __u32 fpid; __u32 tpid; __u64 pages; + __u64 cgroupid; + __u64 mem_cgroupid; char fcomm[TASK_COMM_LEN]; char tcomm[TASK_COMM_LEN]; };