Skip to content

Commit 82eb71e

Browse files
committed
perf: Disallow mis-matched inherited group reads
jira VULN-4127 cve CVE-2023-5717 commit-author Peter Zijlstra <[email protected]> commit 32671e3 upstream-diff The mainline fix 32671e3 adds a new `group_generation' field to the `perf_event' struct. This breaks LTS 8.6 kABI. The new field was preserved, but moved to the end of the struct and wrapped in the `RH_KABI_EXTEND' macro. The kABI in this particular case is preserved, as the `perf_event' struct is always dynamically allocated through `perf_event_alloc()' and it's never used as an array. Because group consistency is non-atomic between parent (filedesc) and children (inherited) events, it is possible for PERF_FORMAT_GROUP read() to try and sum non-matching counter groups -- with non-sensical results. Add group_generation to distinguish the case where a parent group removes and adds an event and thus has the same number, but a different configuration of events as inherited groups. This became a problem when commit fa8c269 ("perf/core: Invert perf_read_group() loops") flipped the order of child_list and sibling_list. Previously it would iterate the group (sibling_list) first, and for each sibling traverse the child_list. In this order, only the group composition of the parent is relevant. By flipping the order the group composition of the child (inherited) events becomes an issue and the mis-match in group composition becomes evident. That said; even prior to this commit, while reading of a group that is not equally inherited was not broken, it still made no sense. (Ab)use ECHILD as error return to indicate issues with child process group composition. Fixes: fa8c269 ("perf/core: Invert perf_read_group() loops") Reported-by: Budimir Markovic <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected] (cherry picked from commit 32671e3) Signed-off-by: Marcin Wcisło <[email protected]>
1 parent a5f217e commit 82eb71e

File tree

2 files changed

+36
-6
lines changed

2 files changed

+36
-6
lines changed

include/linux/perf_event.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct perf_guest_info_callbacks {
5757
#include <linux/cgroup.h>
5858
#include <linux/refcount.h>
5959
#include <linux/security.h>
60+
#include <linux/rh_kabi.h>
6061
#include <asm/local.h>
6162

6263
struct perf_callchain_entry {
@@ -800,6 +801,8 @@ struct perf_event {
800801
RH_KABI_BROKEN_INSERT(void *security)
801802
#endif
802803
struct list_head sb_list;
804+
805+
RH_KABI_EXTEND(unsigned int group_generation)
803806
#endif /* CONFIG_PERF_EVENTS */
804807
};
805808

kernel/events/core.c

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2020,6 +2020,7 @@ static void perf_group_attach(struct perf_event *event)
20202020

20212021
list_add_tail(&event->sibling_list, &group_leader->sibling_list);
20222022
group_leader->nr_siblings++;
2023+
group_leader->group_generation++;
20232024

20242025
perf_event__header_size(group_leader);
20252026

@@ -2212,6 +2213,7 @@ static void perf_group_detach(struct perf_event *event)
22122213
if (leader != event) {
22132214
list_del_init(&event->sibling_list);
22142215
event->group_leader->nr_siblings--;
2216+
event->group_leader->group_generation++;
22152217
goto out;
22162218
}
22172219

@@ -5278,7 +5280,7 @@ static int __perf_read_group_add(struct perf_event *leader,
52785280
u64 read_format, u64 *values)
52795281
{
52805282
struct perf_event_context *ctx = leader->ctx;
5281-
struct perf_event *sub;
5283+
struct perf_event *sub, *parent;
52825284
unsigned long flags;
52835285
int n = 1; /* skip @nr */
52845286
int ret;
@@ -5288,6 +5290,33 @@ static int __perf_read_group_add(struct perf_event *leader,
52885290
return ret;
52895291

52905292
raw_spin_lock_irqsave(&ctx->lock, flags);
5293+
/*
5294+
* Verify the grouping between the parent and child (inherited)
5295+
* events is still in tact.
5296+
*
5297+
* Specifically:
5298+
* - leader->ctx->lock pins leader->sibling_list
5299+
* - parent->child_mutex pins parent->child_list
5300+
* - parent->ctx->mutex pins parent->sibling_list
5301+
*
5302+
* Because parent->ctx != leader->ctx (and child_list nests inside
5303+
* ctx->mutex), group destruction is not atomic between children, also
5304+
* see perf_event_release_kernel(). Additionally, parent can grow the
5305+
* group.
5306+
*
5307+
* Therefore it is possible to have parent and child groups in a
5308+
* different configuration and summing over such a beast makes no sense
5309+
* what so ever.
5310+
*
5311+
* Reject this.
5312+
*/
5313+
parent = leader->parent;
5314+
if (parent &&
5315+
(parent->group_generation != leader->group_generation ||
5316+
parent->nr_siblings != leader->nr_siblings)) {
5317+
ret = -ECHILD;
5318+
goto unlock;
5319+
}
52915320

52925321
/*
52935322
* Since we co-schedule groups, {enabled,running} times of siblings
@@ -5317,8 +5346,9 @@ static int __perf_read_group_add(struct perf_event *leader,
53175346
values[n++] = primary_event_id(sub);
53185347
}
53195348

5349+
unlock:
53205350
raw_spin_unlock_irqrestore(&ctx->lock, flags);
5321-
return 0;
5351+
return ret;
53225352
}
53235353

53245354
static int perf_read_group(struct perf_event *event,
@@ -5337,10 +5367,6 @@ static int perf_read_group(struct perf_event *event,
53375367

53385368
values[0] = 1 + leader->nr_siblings;
53395369

5340-
/*
5341-
* By locking the child_mutex of the leader we effectively
5342-
* lock the child list of all siblings.. XXX explain how.
5343-
*/
53445370
mutex_lock(&leader->child_mutex);
53455371

53465372
ret = __perf_read_group_add(leader, read_format, values);
@@ -12994,6 +13020,7 @@ static int inherit_group(struct perf_event *parent_event,
1299413020
!perf_get_aux_event(child_ctr, leader))
1299513021
return -EINVAL;
1299613022
}
13023+
leader->group_generation = parent_event->group_generation;
1299713024
return 0;
1299813025
}
1299913026

0 commit comments

Comments
 (0)