diff --git a/libbpf-tools/Makefile b/libbpf-tools/Makefile index a442da985a6e..573d1fff4b45 100644 --- a/libbpf-tools/Makefile +++ b/libbpf-tools/Makefile @@ -106,6 +106,7 @@ SIGSNOOP_ALIAS = killsnoop APP_ALIASES = $(FSDIST_ALIASES) $(FSSLOWER_ALIASES) ${SIGSNOOP_ALIAS} COMMON_OBJ = \ + $(OUTPUT)/cgroup_helpers.o \ $(OUTPUT)/trace_helpers.o \ $(OUTPUT)/syscall_helpers.o \ $(OUTPUT)/errno_helpers.o \ diff --git a/libbpf-tools/cgroup_helpers.c b/libbpf-tools/cgroup_helpers.c new file mode 100644 index 000000000000..ecb16cc94746 --- /dev/null +++ b/libbpf-tools/cgroup_helpers.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2025 Rong Tao */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cgroup_helpers.h" + + +/** + * Get cgroup mountpoints. + * + * @roots need to be free with cgroup_free_roots(), access with roots[idx]. + * + * On success, the number of root path returned, need pass to cgroup_free_roots() + * nentries parameter. On error, -errno returned. + */ +static int cgroup_get_roots(char ***roots) +{ + char line[2048], fsname[128], mntpoint[PATH_MAX], fstype[64], mntopt[256]; + int dump_frequency, fsck_order, n; + FILE *fp; + + n = 0; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return -errno; + + *roots = NULL; + + while (fgets(line, sizeof(line), fp)) { + if (sscanf(line, "%s %s %s %s %d %d\n", fsname, mntpoint, + fstype, mntopt, &dump_frequency, + &fsck_order) != 6) + continue; + + /* Only need cgroup or cgroup2 */ + if (strcmp(fstype, "cgroup") && strcmp(fstype, "cgroup2")) + continue; + + n++; + *roots = (char **)realloc(*roots, n * sizeof(char *)); + (*roots)[n - 1] = strdup(mntpoint); + } + + fclose(fp); + + return n; +} + +/** + * Used to release roots allocated by cgroup_get_roots(). + * + * On success, zero returned. On error, -errno returned. + */ +static int cgroup_free_roots(char **roots, int nentries) +{ + int i; + + if (!roots) + return -EINVAL; + + for (i = 0; i < nentries; i++) + free(roots[i]); + free(roots); + + return 0; +} + +long cgroup_cgroupid_of_path(const char *cgroup_path) +{ + int err; + struct stat st; + /* The inode of the cgroup folder is the groupid */ + err = stat(cgroup_path, &st); + return err ? -errno : st.st_ino; +} + +typedef int (*match_fn)(const char *path, void *arg); + +/** + * Recursively traverse all directories under the known cgroup root for + * matching. + * + * When @match returns true, the match succeeds and the function returns + * without further searching. + * + * If the match is successful, 1 is returned. If the match fails, 0 is returned. + * If an error occurs during the search process, -errno is returned. + */ +static int find_cgroup_from_root_recur(const char *root, match_fn match, + void *arg) +{ + int err = 0; + DIR *dir; + struct dirent *dirent; + char *path; + struct stat st; + size_t path_len; + + assert(match && "match_fn is NULL"); + + if (!root) + return -EINVAL; + + err = lstat(root, &st); + if (err) + return -errno; + if (!S_ISDIR(st.st_mode)) + return -ENOTDIR; + + path = malloc(PATH_MAX); + if (!path) + return -errno; + + snprintf(path, PATH_MAX - 1, "%s/", root); + + err = lstat(path, &st); + if (err) + return -errno; + if (!S_ISDIR(st.st_mode)) { + free(path); + return -ENOENT; + } + + dir = opendir(path); + if (!dir) { + err = -errno; + goto done; + } + + path_len = strlen(path); + + /** + * If the directory path doesn't end with a slash, append a slash, + * convenient for splicing subdirectories. + */ + if (path[path_len - 1] != '/') { + path[path_len] = '/'; + path[++path_len] = '\0'; + } + + /** + * Traverse all folders under the root directory, skipping the current + * directory and the previous directory. + */ + while ((dirent = readdir(dir)) != NULL) { + if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, "..")) + continue; + strncpy(path + path_len, dirent->d_name, PATH_MAX - path_len); + err = lstat(path, &st); + if (err) + continue; + if (!S_ISDIR(st.st_mode)) + continue; +#ifdef DEBUG + fprintf(stderr, "%s\n", path); +#endif + if (match(path, arg)) { + /* Found */ + err = 1; + goto done; + } + + /** + * Recursive search. Returning 1 means it was found, return + * -errno means an error occurred, and returning 0 means it + * was not found and should continue searching. + */ + err = find_cgroup_from_root_recur(path, match, arg); + if (err) + goto done; + } + + /* Not found */ + err = 0; +done: + closedir(dir); + free(path); + return err; +} + +struct match_cgroupid_arg { + long cgroupid; + char path[PATH_MAX]; +}; + +/** + * As the @match parameter of the find_cgroup_from_root_recur() function, + * the cgroup path is found by cgroupid. + */ +static int match_cgroupid(const char *path, void *arg) +{ + long cgroupid; + struct match_cgroupid_arg *a = arg; + + cgroupid = cgroup_cgroupid_of_path(path); +#ifdef DEBUG + fprintf(stderr, "%ld:%ld %s\n", cgroupid, a->cgroupid, path); +#endif + if (cgroupid == a->cgroupid) { + snprintf(a->path, PATH_MAX, path); + return 1; + } + return 0; +} + +int get_cgroupid_path(long cgroupid, char *buf, size_t buf_len) +{ + char **roots = 0; + int nroots, i, err; + struct match_cgroupid_arg arg = {}; + bool found = false; + + arg.cgroupid = cgroupid; + + nroots = cgroup_get_roots(&roots); + + for (i = 0; i < nroots; i++) { +#ifdef DEBUG + fprintf(stderr, "root --- %s\n", roots[i]); +#endif + err = find_cgroup_from_root_recur(roots[i], match_cgroupid, &arg); + if (err == 1) { + strncpy(buf, arg.path, buf_len); + found = true; + break; + } + } + + cgroup_free_roots(roots, nroots); + + return found ? 0 : -ENOENT; +} diff --git a/libbpf-tools/cgroup_helpers.h b/libbpf-tools/cgroup_helpers.h new file mode 100644 index 000000000000..c7e03bf8ad7f --- /dev/null +++ b/libbpf-tools/cgroup_helpers.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2025 Rong Tao */ +#pragma once +#include + + +/** + * Get cgroup id from cgroup path. + * + * On success, the cgroupid returned. On error, -errno returned. + */ +long cgroup_cgroupid_of_path(const char *cgroup_path); + +/** + * Get cgroup path from cgroupid. + * + * On success, zero returned. On error, -errno returned. + */ +int get_cgroupid_path(long cgroupid, char *buf, size_t buf_len); diff --git a/libbpf-tools/oomkill.bpf.c b/libbpf-tools/oomkill.bpf.c index 4c722ffd5e17..b0e5c65afdcd 100644 --- a/libbpf-tools/oomkill.bpf.c +++ b/libbpf-tools/oomkill.bpf.c @@ -8,10 +8,16 @@ #include "compat.bpf.h" #include "oomkill.h" +/* linux:include/linux/memcontrol.h */ +struct mem_cgroup { + struct cgroup_subsys_state css; +}; + SEC("kprobe/oom_kill_process") int BPF_KPROBE(oom_kill_process, struct oom_control *oc, const char *message) { struct data_t *data; + struct mem_cgroup *memcg; data = reserve_buf(sizeof(*data)); if (!data) @@ -20,6 +26,16 @@ int BPF_KPROBE(oom_kill_process, struct oom_control *oc, const char *message) data->fpid = bpf_get_current_pid_tgid() >> 32; data->tpid = BPF_CORE_READ(oc, chosen, tgid); data->pages = BPF_CORE_READ(oc, totalpages); + data->cgroupid = bpf_get_current_cgroup_id(); + + /* Get the memory cgroup id */ + memcg = BPF_CORE_READ(oc, memcg); + if (memcg) { + struct cgroup *cgrp = BPF_CORE_READ(memcg, css.cgroup); + data->mem_cgroupid = BPF_CORE_READ(cgrp, kn, id); + } else + data->mem_cgroupid = 0; + bpf_get_current_comm(&data->fcomm, sizeof(data->fcomm)); bpf_probe_read_kernel(&data->tcomm, sizeof(data->tcomm), BPF_CORE_READ(oc, chosen, comm)); submit_buf(ctx, data, sizeof(*data)); diff --git a/libbpf-tools/oomkill.c b/libbpf-tools/oomkill.c index 1dd520d42f64..23679216b2e2 100644 --- a/libbpf-tools/oomkill.c +++ b/libbpf-tools/oomkill.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) // Copyright (c) 2022 Jingxiang Zeng // Copyright (c) 2022 Krisztian Fekete +// Copyright (c) 2025 Rong Tao // // Based on oomkill(8) from BCC by Brendan Gregg. // 13-Jan-2022 Jingxiang Zeng Created this. // 17-Oct-2022 Krisztian Fekete Edited this. +// 03-Aug-2025 Rong Tao Support display cgroup. #include #include #include @@ -20,13 +22,15 @@ #include "compat.h" #include "oomkill.h" #include "btf_helpers.h" +#include "cgroup_helpers.h" #include "trace_helpers.h" static volatile sig_atomic_t exiting = 0; static bool verbose = false; +static bool display_cgroup = false; -const char *argp_program_version = "oomkill 0.1"; +const char *argp_program_version = "oomkill 0.2"; const char *argp_program_bug_address = "https://github.com/iovisor/bcc/tree/master/libbpf-tools"; const char argp_program_doc[] = @@ -35,10 +39,12 @@ const char argp_program_doc[] = "USAGE: oomkill [-h]\n" "\n" "EXAMPLES:\n" -" oomkill # trace OOM kills\n"; +" oomkill # trace OOM kills\n" +" oomkill --cgroup # trace OOM kills with cgroup display\n"; static const struct argp_option opts[] = { { "verbose", 'v', NULL, 0, "Verbose debug output", 0 }, + { "cgroup", 'c', NULL, 0, "Display cgroup information", 0 }, { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help", 0 }, {}, }; @@ -46,6 +52,9 @@ static const struct argp_option opts[] = { static error_t parse_arg(int key, char *arg, struct argp_state *state) { switch (key) { + case 'c': + display_cgroup = true; + break; case 'v': verbose = true; break; @@ -60,18 +69,28 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) static int handle_event(void *ctx, void *data, size_t len) { - char loadavg[256]; + char loadavg[256], cgroup_path[PATH_MAX]; char ts[32]; struct data_t *e = data; str_timestamp("%H:%M:%S", ts, sizeof(ts)); + printf("%s Triggered by PID %d (\"%s\"),", ts, e->tpid, e->tcomm); + if (display_cgroup) { + get_cgroupid_path(e->cgroupid, cgroup_path, sizeof(cgroup_path)); + printf(" CGROUP %lld (\"%s\"),", e->cgroupid, cgroup_path); + if (e->mem_cgroupid) { + if (e->mem_cgroupid != e->cgroupid) + get_cgroupid_path(e->mem_cgroupid, cgroup_path, + sizeof(cgroup_path)); + printf(" MEMCG %lld (\"%s\"),", e->mem_cgroupid, cgroup_path); + } + } + printf(" OOM kill of PID %d (\"%s\"), %lld pages", e->tpid, e->tcomm, e->pages); if (str_loadavg(loadavg, sizeof(loadavg)) > 0) - printf("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\"), %lld pages, loadavg: %s", - ts, e->fpid, e->fcomm, e->tpid, e->tcomm, e->pages, loadavg); + printf(", loadavg: %s\n", loadavg); else - printf("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\"), %lld pages\n", - ts, e->fpid, e->fcomm, e->tpid, e->tcomm, e->pages); + printf("\n"); return 0; } diff --git a/libbpf-tools/oomkill.h b/libbpf-tools/oomkill.h index 086099d5ebc0..3a89a9e9a0fd 100644 --- a/libbpf-tools/oomkill.h +++ b/libbpf-tools/oomkill.h @@ -8,6 +8,8 @@ struct data_t { __u32 fpid; __u32 tpid; __u64 pages; + __u64 cgroupid; + __u64 mem_cgroupid; char fcomm[TASK_COMM_LEN]; char tcomm[TASK_COMM_LEN]; };