2015-05-21 11:20:58

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH 01/10] perf,x86: Fix event/group validation

Commit 43b4578071c0 ("perf/x86: Reduce stack usage of
x86_schedule_events()") violated the rule that 'fake' scheduling; as
used for event/group validation; should not change the event state.

This went mostly un-noticed because repeated calls of
x86_pmu::get_event_constraints() would give the same result. And
x86_pmu::put_event_constraints() would mostly not do anything.

Things could still go wrong esp. for shared_regs, because
cpuc->is_fake can return a different constraint and then the
put_event_constraint will not match up.

Commit e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption
bug workaround") made the situation much worse by actually setting the
event->hw.constraint value to NULL, so when validation and actual
scheduling interact we get NULL ptr derefs.

Fix it by removing the constraint pointer from the event and move it
back to an array, this time in cpuc instead of on the stack.

Reported-by: Vince Weaver <[email protected]>
Fixes: 43b4578071c0 ("perf/x86: Reduce stack usage of x86_schedule_events()")
Fixes: e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption bug workaround")
Cc: Andrew Hunter <[email protected]>
Cc: Maria Dimakopoulou <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 32 +++++++++++++-------------
arch/x86/kernel/cpu/perf_event.h | 9 ++++---
arch/x86/kernel/cpu/perf_event_intel.c | 15 +++---------
arch/x86/kernel/cpu/perf_event_intel_ds.c | 4 +--
arch/x86/kernel/cpu/perf_event_intel_uncore.c | 7 ++---
arch/x86/kernel/cpu/perf_event_intel_uncore.h | 1
6 files changed, 32 insertions(+), 36 deletions(-)

--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -620,7 +620,7 @@ struct sched_state {
struct perf_sched {
int max_weight;
int max_events;
- struct perf_event **events;
+ struct event_constraint **constraints;
struct sched_state state;
int saved_states;
struct sched_state saved[SCHED_STATES_MAX];
@@ -629,7 +629,7 @@ struct perf_sched {
/*
* Initialize interator that runs through all events and counters.
*/
-static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
int num, int wmin, int wmax)
{
int idx;
@@ -637,10 +637,10 @@ static void perf_sched_init(struct perf_
memset(sched, 0, sizeof(*sched));
sched->max_events = num;
sched->max_weight = wmax;
- sched->events = events;
+ sched->constraints = constraints;

for (idx = 0; idx < num; idx++) {
- if (events[idx]->hw.constraint->weight == wmin)
+ if (constraints[idx]->weight == wmin)
break;
}

@@ -687,7 +687,7 @@ static bool __perf_sched_find_counter(st
if (sched->state.event >= sched->max_events)
return false;

- c = sched->events[sched->state.event]->hw.constraint;
+ c = sched->constraints[sched->state.event];
/* Prefer fixed purpose counters */
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
@@ -745,7 +745,7 @@ static bool perf_sched_next_event(struct
if (sched->state.weight > sched->max_weight)
return false;
}
- c = sched->events[sched->state.event]->hw.constraint;
+ c = sched->constraints[sched->state.event];
} while (c->weight != sched->state.weight);

sched->state.counter = 0; /* start with first counter */
@@ -756,12 +756,12 @@ static bool perf_sched_next_event(struct
/*
* Assign a counter for each event.
*/
-int perf_assign_events(struct perf_event **events, int n,
+int perf_assign_events(struct event_constraint **constraints, int n,
int wmin, int wmax, int *assign)
{
struct perf_sched sched;

- perf_sched_init(&sched, events, n, wmin, wmax);
+ perf_sched_init(&sched, constraints, n, wmin, wmax);

do {
if (!perf_sched_find_counter(&sched))
@@ -788,9 +788,8 @@ int x86_schedule_events(struct cpu_hw_ev
x86_pmu.start_scheduling(cpuc);

for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
- hwc = &cpuc->event_list[i]->hw;
c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
- hwc->constraint = c;
+ cpuc->event_constraint[i] = c;

wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
@@ -801,7 +800,7 @@ int x86_schedule_events(struct cpu_hw_ev
*/
for (i = 0; i < n; i++) {
hwc = &cpuc->event_list[i]->hw;
- c = hwc->constraint;
+ c = cpuc->event_constraint[i];

/* never assigned */
if (hwc->idx == -1)
@@ -821,9 +820,10 @@ int x86_schedule_events(struct cpu_hw_ev
}

/* slow path */
- if (i != n)
- unsched = perf_assign_events(cpuc->event_list, n, wmin,
+ if (i != n) {
+ unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
wmax, assign);
+ }

/*
* In case of success (unsched = 0), mark events as committed,
@@ -840,7 +840,7 @@ int x86_schedule_events(struct cpu_hw_ev
e = cpuc->event_list[i];
e->hw.flags |= PERF_X86_EVENT_COMMITTED;
if (x86_pmu.commit_scheduling)
- x86_pmu.commit_scheduling(cpuc, e, assign[i]);
+ x86_pmu.commit_scheduling(cpuc, i, assign[i]);
}
}

@@ -1292,8 +1292,10 @@ static void x86_pmu_del(struct perf_even
x86_pmu.put_event_constraints(cpuc, event);

/* Delete the array entry. */
- while (++i < cpuc->n_events)
+ while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
+ cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ }
--cpuc->n_events;

perf_event_update_userpage(event);
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -172,7 +172,10 @@ struct cpu_hw_events {
added in the current transaction */
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
u64 tags[X86_PMC_IDX_MAX];
+
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+ struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
+

unsigned int group_flag;
int is_fake;
@@ -519,9 +522,7 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);

- void (*commit_scheduling)(struct cpu_hw_events *cpuc,
- struct perf_event *event,
- int cntr);
+ void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);

void (*start_scheduling)(struct cpu_hw_events *cpuc);

@@ -717,7 +718,7 @@ static inline void __x86_pmu_enable_even

void x86_pmu_enable_all(int added);

-int perf_assign_events(struct perf_event **events, int n,
+int perf_assign_events(struct event_constraint **constraints, int n,
int wmin, int wmax, int *assign);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);

--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2106,7 +2106,7 @@ static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
- struct event_constraint *c1 = event->hw.constraint;
+ struct event_constraint *c1 = cpuc->event_constraint[idx];
struct event_constraint *c2;

/*
@@ -2188,8 +2188,6 @@ intel_put_shared_regs_event_constraints(
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
- struct event_constraint *c = event->hw.constraint;
-
intel_put_shared_regs_event_constraints(cpuc, event);

/*
@@ -2197,19 +2195,14 @@ static void intel_put_event_constraints(
* all events are subject to and must call the
* put_excl_constraints() routine
*/
- if (c && cpuc->excl_cntrs)
+ if (cpuc->excl_cntrs)
intel_put_excl_constraints(cpuc, event);
-
- /* cleanup dynamic constraint */
- if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
- event->hw.constraint = NULL;
}

-static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
- struct perf_event *event, int cntr)
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
{
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct event_constraint *c = event->hw.constraint;
+ struct event_constraint *c = cpuc->event_constraint[idx];
struct intel_excl_states *xlo, *xl;
int tid = cpuc->excl_thread_id;
int o_tid = 1 - tid;
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -706,9 +706,9 @@ void intel_pmu_pebs_disable(struct perf_

cpuc->pebs_enabled &= ~(1ULL << hwc->idx);

- if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
- else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled &= ~(1ULL << 63);

if (cpuc->enabled)
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -364,9 +364,8 @@ static int uncore_assign_events(struct i
bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);

for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
- hwc = &box->event_list[i]->hw;
c = uncore_get_event_constraint(box, box->event_list[i]);
- hwc->constraint = c;
+ box->event_constraint[i] = c;
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
@@ -374,7 +373,7 @@ static int uncore_assign_events(struct i
/* fastpath, try to reuse previous register */
for (i = 0; i < n; i++) {
hwc = &box->event_list[i]->hw;
- c = hwc->constraint;
+ c = box->event_constraint[i];

/* never assigned */
if (hwc->idx == -1)
@@ -394,7 +393,7 @@ static int uncore_assign_events(struct i
}
/* slow path */
if (i != n)
- ret = perf_assign_events(box->event_list, n,
+ ret = perf_assign_events(box->event_constraint, n,
wmin, wmax, assign);

if (!assign || ret) {
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -97,6 +97,7 @@ struct intel_uncore_box {
atomic_t refcnt;
struct perf_event *events[UNCORE_PMC_IDX_MAX];
struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
+ struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
u64 tags[UNCORE_PMC_IDX_MAX];
struct pci_dev *pci_dev;


2015-05-21 12:35:06

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

Peter,

On Thu, May 21, 2015 at 4:17 AM, Peter Zijlstra <[email protected]> wrote:
> Commit 43b4578071c0 ("perf/x86: Reduce stack usage of
> x86_schedule_events()") violated the rule that 'fake' scheduling; as
> used for event/group validation; should not change the event state.
>
> This went mostly un-noticed because repeated calls of
> x86_pmu::get_event_constraints() would give the same result. And
> x86_pmu::put_event_constraints() would mostly not do anything.
>
> Things could still go wrong esp. for shared_regs, because
> cpuc->is_fake can return a different constraint and then the
> put_event_constraint will not match up.
>
I don't follow this here. What do you mean by 'match up'?

> Commit e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption
> bug workaround") made the situation much worse by actually setting the
> event->hw.constraint value to NULL, so when validation and actual
> scheduling interact we get NULL ptr derefs.
>

But x86_schedule_events() does reset the hw.constraint for each invocation:

c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
hwc->constraint = c;

> Fix it by removing the constraint pointer from the event and move it
> back to an array, this time in cpuc instead of on the stack.
>
> Reported-by: Vince Weaver <[email protected]>
> Fixes: 43b4578071c0 ("perf/x86: Reduce stack usage of x86_schedule_events()")
> Fixes: e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption bug workaround")
> Cc: Andrew Hunter <[email protected]>
> Cc: Maria Dimakopoulou <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> arch/x86/kernel/cpu/perf_event.c | 32 +++++++++++++-------------
> arch/x86/kernel/cpu/perf_event.h | 9 ++++---
> arch/x86/kernel/cpu/perf_event_intel.c | 15 +++---------
> arch/x86/kernel/cpu/perf_event_intel_ds.c | 4 +--
> arch/x86/kernel/cpu/perf_event_intel_uncore.c | 7 ++---
> arch/x86/kernel/cpu/perf_event_intel_uncore.h | 1
> 6 files changed, 32 insertions(+), 36 deletions(-)
>
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -620,7 +620,7 @@ struct sched_state {
> struct perf_sched {
> int max_weight;
> int max_events;
> - struct perf_event **events;
> + struct event_constraint **constraints;
> struct sched_state state;
> int saved_states;
> struct sched_state saved[SCHED_STATES_MAX];
> @@ -629,7 +629,7 @@ struct perf_sched {
> /*
> * Initialize interator that runs through all events and counters.
> */
> -static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
> +static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
> int num, int wmin, int wmax)
> {
> int idx;
> @@ -637,10 +637,10 @@ static void perf_sched_init(struct perf_
> memset(sched, 0, sizeof(*sched));
> sched->max_events = num;
> sched->max_weight = wmax;
> - sched->events = events;
> + sched->constraints = constraints;
>
> for (idx = 0; idx < num; idx++) {
> - if (events[idx]->hw.constraint->weight == wmin)
> + if (constraints[idx]->weight == wmin)
> break;
> }
>
> @@ -687,7 +687,7 @@ static bool __perf_sched_find_counter(st
> if (sched->state.event >= sched->max_events)
> return false;
>
> - c = sched->events[sched->state.event]->hw.constraint;
> + c = sched->constraints[sched->state.event];
> /* Prefer fixed purpose counters */
> if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
> idx = INTEL_PMC_IDX_FIXED;
> @@ -745,7 +745,7 @@ static bool perf_sched_next_event(struct
> if (sched->state.weight > sched->max_weight)
> return false;
> }
> - c = sched->events[sched->state.event]->hw.constraint;
> + c = sched->constraints[sched->state.event];
> } while (c->weight != sched->state.weight);
>
> sched->state.counter = 0; /* start with first counter */
> @@ -756,12 +756,12 @@ static bool perf_sched_next_event(struct
> /*
> * Assign a counter for each event.
> */
> -int perf_assign_events(struct perf_event **events, int n,
> +int perf_assign_events(struct event_constraint **constraints, int n,
> int wmin, int wmax, int *assign)
> {
> struct perf_sched sched;
>
> - perf_sched_init(&sched, events, n, wmin, wmax);
> + perf_sched_init(&sched, constraints, n, wmin, wmax);
>
> do {
> if (!perf_sched_find_counter(&sched))
> @@ -788,9 +788,8 @@ int x86_schedule_events(struct cpu_hw_ev
> x86_pmu.start_scheduling(cpuc);
>
> for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
> - hwc = &cpuc->event_list[i]->hw;
> c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
> - hwc->constraint = c;
> + cpuc->event_constraint[i] = c;
>
> wmin = min(wmin, c->weight);
> wmax = max(wmax, c->weight);
> @@ -801,7 +800,7 @@ int x86_schedule_events(struct cpu_hw_ev
> */
> for (i = 0; i < n; i++) {
> hwc = &cpuc->event_list[i]->hw;
> - c = hwc->constraint;
> + c = cpuc->event_constraint[i];
>
> /* never assigned */
> if (hwc->idx == -1)
> @@ -821,9 +820,10 @@ int x86_schedule_events(struct cpu_hw_ev
> }
>
> /* slow path */
> - if (i != n)
> - unsched = perf_assign_events(cpuc->event_list, n, wmin,
> + if (i != n) {
> + unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
> wmax, assign);
> + }
>
> /*
> * In case of success (unsched = 0), mark events as committed,
> @@ -840,7 +840,7 @@ int x86_schedule_events(struct cpu_hw_ev
> e = cpuc->event_list[i];
> e->hw.flags |= PERF_X86_EVENT_COMMITTED;
> if (x86_pmu.commit_scheduling)
> - x86_pmu.commit_scheduling(cpuc, e, assign[i]);
> + x86_pmu.commit_scheduling(cpuc, i, assign[i]);
> }
> }
>
> @@ -1292,8 +1292,10 @@ static void x86_pmu_del(struct perf_even
> x86_pmu.put_event_constraints(cpuc, event);
>
> /* Delete the array entry. */
> - while (++i < cpuc->n_events)
> + while (++i < cpuc->n_events) {
> cpuc->event_list[i-1] = cpuc->event_list[i];
> + cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
> + }
> --cpuc->n_events;
>
> perf_event_update_userpage(event);
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -172,7 +172,10 @@ struct cpu_hw_events {
> added in the current transaction */
> int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
> u64 tags[X86_PMC_IDX_MAX];
> +
> struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
> + struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
> +
>
> unsigned int group_flag;
> int is_fake;
> @@ -519,9 +522,7 @@ struct x86_pmu {
> void (*put_event_constraints)(struct cpu_hw_events *cpuc,
> struct perf_event *event);
>
> - void (*commit_scheduling)(struct cpu_hw_events *cpuc,
> - struct perf_event *event,
> - int cntr);
> + void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
>
> void (*start_scheduling)(struct cpu_hw_events *cpuc);
>
> @@ -717,7 +718,7 @@ static inline void __x86_pmu_enable_even
>
> void x86_pmu_enable_all(int added);
>
> -int perf_assign_events(struct perf_event **events, int n,
> +int perf_assign_events(struct event_constraint **constraints, int n,
> int wmin, int wmax, int *assign);
> int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
>
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -2106,7 +2106,7 @@ static struct event_constraint *
> intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
> struct perf_event *event)
> {
> - struct event_constraint *c1 = event->hw.constraint;
> + struct event_constraint *c1 = cpuc->event_constraint[idx];
> struct event_constraint *c2;
>
> /*
> @@ -2188,8 +2188,6 @@ intel_put_shared_regs_event_constraints(
> static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
> struct perf_event *event)
> {
> - struct event_constraint *c = event->hw.constraint;
> -
> intel_put_shared_regs_event_constraints(cpuc, event);
>
> /*
> @@ -2197,19 +2195,14 @@ static void intel_put_event_constraints(
> * all events are subject to and must call the
> * put_excl_constraints() routine
> */
> - if (c && cpuc->excl_cntrs)
> + if (cpuc->excl_cntrs)
> intel_put_excl_constraints(cpuc, event);
> -
> - /* cleanup dynamic constraint */
> - if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
> - event->hw.constraint = NULL;
> }
>
> -static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
> - struct perf_event *event, int cntr)
> +static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
> {
> struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
> - struct event_constraint *c = event->hw.constraint;
> + struct event_constraint *c = cpuc->event_constraint[idx];
> struct intel_excl_states *xlo, *xl;
> int tid = cpuc->excl_thread_id;
> int o_tid = 1 - tid;
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -706,9 +706,9 @@ void intel_pmu_pebs_disable(struct perf_
>
> cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
>
> - if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
> + if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
> cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
> - else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
> + else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
> cpuc->pebs_enabled &= ~(1ULL << 63);
>
> if (cpuc->enabled)
> --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
> @@ -364,9 +364,8 @@ static int uncore_assign_events(struct i
> bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
>
> for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
> - hwc = &box->event_list[i]->hw;
> c = uncore_get_event_constraint(box, box->event_list[i]);
> - hwc->constraint = c;
> + box->event_constraint[i] = c;
> wmin = min(wmin, c->weight);
> wmax = max(wmax, c->weight);
> }
> @@ -374,7 +373,7 @@ static int uncore_assign_events(struct i
> /* fastpath, try to reuse previous register */
> for (i = 0; i < n; i++) {
> hwc = &box->event_list[i]->hw;
> - c = hwc->constraint;
> + c = box->event_constraint[i];
>
> /* never assigned */
> if (hwc->idx == -1)
> @@ -394,7 +393,7 @@ static int uncore_assign_events(struct i
> }
> /* slow path */
> if (i != n)
> - ret = perf_assign_events(box->event_list, n,
> + ret = perf_assign_events(box->event_constraint, n,
> wmin, wmax, assign);
>
> if (!assign || ret) {
> --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
> +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
> @@ -97,6 +97,7 @@ struct intel_uncore_box {
> atomic_t refcnt;
> struct perf_event *events[UNCORE_PMC_IDX_MAX];
> struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
> + struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
> unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
> u64 tags[UNCORE_PMC_IDX_MAX];
> struct pci_dev *pci_dev;
>
>

2015-05-21 12:56:25

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, May 21, 2015 at 05:35:02AM -0700, Stephane Eranian wrote:
> Peter,
>
> On Thu, May 21, 2015 at 4:17 AM, Peter Zijlstra <[email protected]> wrote:
> > Commit 43b4578071c0 ("perf/x86: Reduce stack usage of
> > x86_schedule_events()") violated the rule that 'fake' scheduling; as
> > used for event/group validation; should not change the event state.
> >
> > This went mostly un-noticed because repeated calls of
> > x86_pmu::get_event_constraints() would give the same result. And
> > x86_pmu::put_event_constraints() would mostly not do anything.
> >
> > Things could still go wrong esp. for shared_regs, because
> > cpuc->is_fake can return a different constraint and then the
> > put_event_constraint will not match up.
> >
> I don't follow this here. What do you mean by 'match up'?

Ah, I wrote that Changelog for a prior patch; which by writing the
changelog I found faulty.

But I then forgot to update the Changelog.

I was under the impression put_event_constraints() would actually take
the constraint as an argument, and with the below example, it would not
do put on the same it would get.

> > Commit e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption
> > bug workaround") made the situation much worse by actually setting the
> > event->hw.constraint value to NULL, so when validation and actual
> > scheduling interact we get NULL ptr derefs.
> >
>
> But x86_schedule_events() does reset the hw.constraint for each invocation:
>
> c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
> hwc->constraint = c;

Yes, so if you have:

validate_group()

hwc->constraint = c;

<context switch>

c = hwc->constraint;

The second c might not be the first.

2015-05-21 13:07:24

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, May 21, 2015 at 5:56 AM, Peter Zijlstra <[email protected]> wrote:
> On Thu, May 21, 2015 at 05:35:02AM -0700, Stephane Eranian wrote:
>> > Commit e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption
>> > bug workaround") made the situation much worse by actually setting the
>> > event->hw.constraint value to NULL, so when validation and actual
>> > scheduling interact we get NULL ptr derefs.
>> >
>>
>> But x86_schedule_events() does reset the hw.constraint for each invocation:
>>
>> c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
>> hwc->constraint = c;
>
> Yes, so if you have:
>
> validate_group()
>
> hwc->constraint = c;
>
Ok, you get that because validate_group() invokes x6_schedule_events() but
on the fake_cpuc. This on fake_cpuc->event_list[]->hwc.

> <context switch>
>
> c = hwc->constraint;
>
> The second c might not be the first.
And where does this assignment come from?
For actual scheduling, we are using the actual cpuc, not fake_cpuc.
Validate_group() does not modify global cpuc state. Or am I missing something?

2015-05-21 13:10:01

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, May 21, 2015 at 06:07:20AM -0700, Stephane Eranian wrote:
> On Thu, May 21, 2015 at 5:56 AM, Peter Zijlstra <[email protected]> wrote:
> > On Thu, May 21, 2015 at 05:35:02AM -0700, Stephane Eranian wrote:
> >> > Commit e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption
> >> > bug workaround") made the situation much worse by actually setting the
> >> > event->hw.constraint value to NULL, so when validation and actual
> >> > scheduling interact we get NULL ptr derefs.
> >> >
> >>
> >> But x86_schedule_events() does reset the hw.constraint for each invocation:
> >>
> >> c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
> >> hwc->constraint = c;
> >
> > Yes, so if you have:
> >
> > validate_group()
> >
> > hwc->constraint = c;
> >
> Ok, you get that because validate_group() invokes x6_schedule_events() but
> on the fake_cpuc. This on fake_cpuc->event_list[]->hwc.
>
> > <context switch>
> >
> > c = hwc->constraint;
> >
> > The second c might not be the first.
> And where does this assignment come from?

That's a read. The <context switch> can include a call to
x86_schedule_events().

> For actual scheduling, we are using the actual cpuc, not fake_cpuc.
> Validate_group() does not modify global cpuc state. Or am I missing
> something?

No, but x86_schedule_event() can modify event state, which is the fail.

2015-05-21 13:18:17

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, May 21, 2015 at 6:09 AM, Peter Zijlstra <[email protected]> wrote:
> On Thu, May 21, 2015 at 06:07:20AM -0700, Stephane Eranian wrote:
>> On Thu, May 21, 2015 at 5:56 AM, Peter Zijlstra <[email protected]> wrote:
>> > On Thu, May 21, 2015 at 05:35:02AM -0700, Stephane Eranian wrote:
>> >> > Commit e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption
>> >> > bug workaround") made the situation much worse by actually setting the
>> >> > event->hw.constraint value to NULL, so when validation and actual
>> >> > scheduling interact we get NULL ptr derefs.
>> >> >
>> >>
>> >> But x86_schedule_events() does reset the hw.constraint for each invocation:
>> >>
>> >> c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
>> >> hwc->constraint = c;
>> >
>> > Yes, so if you have:
>> >
>> > validate_group()
>> >
>> > hwc->constraint = c;
>> >
>> Ok, you get that because validate_group() invokes x6_schedule_events() but
>> on the fake_cpuc. This on fake_cpuc->event_list[]->hwc.
>>
>> > <context switch>
>> >
>> > c = hwc->constraint;
>> >
>> > The second c might not be the first.
>> And where does this assignment come from?
>
> That's a read. The <context switch> can include a call to
> x86_schedule_events().
Yes, but x86_schedule_events() never reads the constraint
without setting it again before.

>
>> For actual scheduling, we are using the actual cpuc, not fake_cpuc.
>> Validate_group() does not modify global cpuc state. Or am I missing
>> something?
>
> No, but x86_schedule_event() can modify event state, which is the fail.
>
Yes, it does modify the cpuc->event_list[]->hwc, because it is used as a
cache for *EACH* invocation of the function. It is irrelevant outside the
function.

>

2015-05-21 13:20:23

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, May 21, 2015 at 06:18:15AM -0700, Stephane Eranian wrote:
> Yes, it does modify the cpuc->event_list[]->hwc, because it is used as a
> cache for *EACH* invocation of the function. It is irrelevant outside the
> function.

Yes, but the problem is that they _nest_.

So you get to use the one from the nested call in the outer call.

2015-05-21 13:27:25

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, May 21, 2015 at 6:20 AM, Peter Zijlstra <[email protected]> wrote:
> On Thu, May 21, 2015 at 06:18:15AM -0700, Stephane Eranian wrote:
>> Yes, it does modify the cpuc->event_list[]->hwc, because it is used as a
>> cache for *EACH* invocation of the function. It is irrelevant outside the
>> function.
>
> Yes, but the problem is that they _nest_.
>
> So you get to use the one from the nested call in the outer call.

You mean x86_schedule_events() calls x86_schedule_events()?
Or are you talking about a preemption while executing x86_schedule_events()?

2015-05-21 13:29:32

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, 2015-05-21 at 06:27 -0700, Stephane Eranian wrote:
> Or are you talking about a preemption while executing x86_schedule_events()?

That.

And we can of course cure that by an earlier patch I send; but I find it
a much simpler rule to just never allow modifying global state for
validation.

2015-05-21 13:37:07

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, May 21, 2015 at 6:29 AM, Peter Zijlstra <[email protected]> wrote:
> On Thu, 2015-05-21 at 06:27 -0700, Stephane Eranian wrote:
>> Or are you talking about a preemption while executing x86_schedule_events()?
>
> That.
>
> And we can of course cure that by an earlier patch I send; but I find it
> a much simpler rule to just never allow modifying global state for
> validation.

I can see validation being preempted, but not the context switch code path.
Is that what you are talking about?

You are saying validate_group() is in the middle of x86_schedule_events()
using fake_cpuc, when it gets preempted. The context switch code when it loads
the new thread's PMU state calls x86_schedule_events() which modifies the
cpuc->event_list[]->hwc. But this is cpuc vs. fake_cpuc again. So yes, the calls
nest but they do not touch the same state. And when you eventually come back
to validate_group() you are back to using the fake_cpuc. So I am still not clear
on how the corruption can happen.

2015-05-21 14:04:13

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, 2015-05-21 at 06:36 -0700, Stephane Eranian wrote:
> On Thu, May 21, 2015 at 6:29 AM, Peter Zijlstra <[email protected]> wrote:
> > On Thu, 2015-05-21 at 06:27 -0700, Stephane Eranian wrote:
> >> Or are you talking about a preemption while executing x86_schedule_events()?
> >
> > That.
> >
> > And we can of course cure that by an earlier patch I send; but I find it
> > a much simpler rule to just never allow modifying global state for
> > validation.
>
> I can see validation being preempted, but not the context switch code path.
> Is that what you are talking about?
>
> You are saying validate_group() is in the middle of x86_schedule_events()
> using fake_cpuc, when it gets preempted. The context switch code when it loads
> the new thread's PMU state calls x86_schedule_events() which modifies the
> cpuc->event_list[]->hwc. But this is cpuc vs. fake_cpuc again. So yes, the calls
> nest but they do not touch the same state.

They both touch event->hw->constraint.

> And when you eventually come back
> to validate_group() you are back to using the fake_cpuc. So I am still not clear
> on how the corruption can happen.

validate_group()
x86_schedule_events()
event->hw.constraint = c; # store

<context switch>
perf_task_event_sched_in()
...
x86_schedule_events();
event->hw.constraint = c2; # store

...

put_event_constraints(event); # assume failure to schedule
intel_put_event_constraints()
event->hw.constraint = NULL;

<context switch end>

c = event->hw.constraint; # read -> NULL

if (!test_bit(hwc->idx, c->idxmsk)) # <- *BOOM* NULL deref


This in particular is possible when the event in question is a cpu-wide
event and group-leader, where the validate_group() tries to add an event
to the group.

2015-05-21 14:53:41

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, May 21, 2015 at 01:17:11PM +0200, Peter Zijlstra wrote:
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -706,9 +706,9 @@ void intel_pmu_pebs_disable(struct perf_
>
> cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
>
> - if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
> + if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
> cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
> - else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
> + else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
> cpuc->pebs_enabled &= ~(1ULL << 63);
>
> if (cpuc->enabled)

This btw look like an independent bug.

2015-05-21 15:11:13

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, May 21, 2015 at 7:03 AM, Peter Zijlstra <[email protected]> wrote:
> On Thu, 2015-05-21 at 06:36 -0700, Stephane Eranian wrote:
>> On Thu, May 21, 2015 at 6:29 AM, Peter Zijlstra <[email protected]> wrote:
>> > On Thu, 2015-05-21 at 06:27 -0700, Stephane Eranian wrote:
>> >> Or are you talking about a preemption while executing x86_schedule_events()?
>> >
>> > That.
>> >
>> > And we can of course cure that by an earlier patch I send; but I find it
>> > a much simpler rule to just never allow modifying global state for
>> > validation.
>>
>> I can see validation being preempted, but not the context switch code path.
>> Is that what you are talking about?
>>
>> You are saying validate_group() is in the middle of x86_schedule_events()
>> using fake_cpuc, when it gets preempted. The context switch code when it loads
>> the new thread's PMU state calls x86_schedule_events() which modifies the
>> cpuc->event_list[]->hwc. But this is cpuc vs. fake_cpuc again. So yes, the calls
>> nest but they do not touch the same state.
>
> They both touch event->hw->constraint.
>
>> And when you eventually come back
>> to validate_group() you are back to using the fake_cpuc. So I am still not clear
>> on how the corruption can happen.
>
> validate_group()
> x86_schedule_events()
> event->hw.constraint = c; # store
>
> <context switch>
> perf_task_event_sched_in()
> ...
> x86_schedule_events();
> event->hw.constraint = c2; # store
>
> ...
>
> put_event_constraints(event); # assume failure to schedule
> intel_put_event_constraints()
> event->hw.constraint = NULL;
>
> <context switch end>
>
> c = event->hw.constraint; # read -> NULL
>
> if (!test_bit(hwc->idx, c->idxmsk)) # <- *BOOM* NULL deref
>
>
> This in particular is possible when the event in question is a cpu-wide
> event and group-leader, where the validate_group() tries to add an event
> to the group.
>
Ok, I think I get it now. It is not related to fake_cpuc vs. cpuc, it is related
to the fact that the constraint is cached in the event struct itself and that
one is shared between validate_group() and x86_schedule_events() because
cpu_hw_event->event_list[] is an array of pointers to events and not an array of
events.

2015-05-21 15:42:07

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, May 21, 2015 at 7:53 AM, Peter Zijlstra <[email protected]> wrote:
> On Thu, May 21, 2015 at 01:17:11PM +0200, Peter Zijlstra wrote:
>> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
>> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
>> @@ -706,9 +706,9 @@ void intel_pmu_pebs_disable(struct perf_
>>
>> cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
>>
>> - if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
>> + if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
>> cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
>> - else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
>> + else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
>> cpuc->pebs_enabled &= ~(1ULL << 63);
>>
>> if (cpuc->enabled)
>
> This btw look like an independent bug.

You mean, referring to event->hw.constraint?

2015-05-22 06:50:00

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation


* Stephane Eranian <[email protected]> wrote:

> On Thu, May 21, 2015 at 7:03 AM, Peter Zijlstra <[email protected]> wrote:
> > On Thu, 2015-05-21 at 06:36 -0700, Stephane Eranian wrote:
> >> On Thu, May 21, 2015 at 6:29 AM, Peter Zijlstra <[email protected]> wrote:
> >> > On Thu, 2015-05-21 at 06:27 -0700, Stephane Eranian wrote:
> >> >> Or are you talking about a preemption while executing x86_schedule_events()?
> >> >
> >> > That.
> >> >
> >> > And we can of course cure that by an earlier patch I send; but I find it
> >> > a much simpler rule to just never allow modifying global state for
> >> > validation.
> >>
> >> I can see validation being preempted, but not the context switch code path.
> >> Is that what you are talking about?
> >>
> >> You are saying validate_group() is in the middle of x86_schedule_events()
> >> using fake_cpuc, when it gets preempted. The context switch code when it loads
> >> the new thread's PMU state calls x86_schedule_events() which modifies the
> >> cpuc->event_list[]->hwc. But this is cpuc vs. fake_cpuc again. So yes, the calls
> >> nest but they do not touch the same state.
> >
> > They both touch event->hw->constraint.
> >
> >> And when you eventually come back
> >> to validate_group() you are back to using the fake_cpuc. So I am still not clear
> >> on how the corruption can happen.
> >
> > validate_group()
> > x86_schedule_events()
> > event->hw.constraint = c; # store
> >
> > <context switch>
> > perf_task_event_sched_in()
> > ...
> > x86_schedule_events();
> > event->hw.constraint = c2; # store
> >
> > ...
> >
> > put_event_constraints(event); # assume failure to schedule
> > intel_put_event_constraints()
> > event->hw.constraint = NULL;
> >
> > <context switch end>
> >
> > c = event->hw.constraint; # read -> NULL
> >
> > if (!test_bit(hwc->idx, c->idxmsk)) # <- *BOOM* NULL deref
> >
> >
> > This in particular is possible when the event in question is a cpu-wide
> > event and group-leader, where the validate_group() tries to add an event
> > to the group.
>
> Ok, I think I get it now. It is not related to fake_cpuc vs. cpuc,
> it is related to the fact that the constraint is cached in the event
> struct itself and that one is shared between validate_group() and
> x86_schedule_events() because cpu_hw_event->event_list[] is an array
> of pointers to events and not an array of events.

Btw., comments and the code structure should be greatly enhanced to
make all that very clear and hard to mess up.

A month ago perf became fuzzing-proof, and now that's down the drain
again...

Thanks,

Ingo

2015-05-22 09:26:57

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On Thu, May 21, 2015 at 11:49 PM, Ingo Molnar <[email protected]> wrote:
>
>
> * Stephane Eranian <[email protected]> wrote:
>
> > On Thu, May 21, 2015 at 7:03 AM, Peter Zijlstra <[email protected]> wrote:
> > > On Thu, 2015-05-21 at 06:36 -0700, Stephane Eranian wrote:
> > >> On Thu, May 21, 2015 at 6:29 AM, Peter Zijlstra <[email protected]> wrote:
> > >> > On Thu, 2015-05-21 at 06:27 -0700, Stephane Eranian wrote:
> > >> >> Or are you talking about a preemption while executing x86_schedule_events()?
> > >> >
> > >> > That.
> > >> >
> > >> > And we can of course cure that by an earlier patch I send; but I find it
> > >> > a much simpler rule to just never allow modifying global state for
> > >> > validation.
> > >>
> > >> I can see validation being preempted, but not the context switch code path.
> > >> Is that what you are talking about?
> > >>
> > >> You are saying validate_group() is in the middle of x86_schedule_events()
> > >> using fake_cpuc, when it gets preempted. The context switch code when it loads
> > >> the new thread's PMU state calls x86_schedule_events() which modifies the
> > >> cpuc->event_list[]->hwc. But this is cpuc vs. fake_cpuc again. So yes, the calls
> > >> nest but they do not touch the same state.
> > >
> > > They both touch event->hw->constraint.
> > >
> > >> And when you eventually come back
> > >> to validate_group() you are back to using the fake_cpuc. So I am still not clear
> > >> on how the corruption can happen.
> > >
> > > validate_group()
> > > x86_schedule_events()
> > > event->hw.constraint = c; # store
> > >
> > > <context switch>
> > > perf_task_event_sched_in()
> > > ...
> > > x86_schedule_events();
> > > event->hw.constraint = c2; # store
> > >
> > > ...
> > >
> > > put_event_constraints(event); # assume failure to schedule
> > > intel_put_event_constraints()
> > > event->hw.constraint = NULL;
> > >
> > > <context switch end>
> > >
> > > c = event->hw.constraint; # read -> NULL
> > >
> > > if (!test_bit(hwc->idx, c->idxmsk)) # <- *BOOM* NULL deref
> > >
> > >
> > > This in particular is possible when the event in question is a cpu-wide
> > > event and group-leader, where the validate_group() tries to add an event
> > > to the group.
> >
> > Ok, I think I get it now. It is not related to fake_cpuc vs. cpuc,
> > it is related to the fact that the constraint is cached in the event
> > struct itself and that one is shared between validate_group() and
> > x86_schedule_events() because cpu_hw_event->event_list[] is an array
> > of pointers to events and not an array of events.
>
> Btw., comments and the code structure should be greatly enhanced to
> make all that very clear and hard to mess up.
>
Peter and I will clean this up.

>
> A month ago perf became fuzzing-proof, and now that's down the drain
> again...
>
It will be fixed this week.

2015-05-22 09:46:29

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation


* Stephane Eranian <[email protected]> wrote:

> On Thu, May 21, 2015 at 11:49 PM, Ingo Molnar <[email protected]> wrote:
> >
> >
> > * Stephane Eranian <[email protected]> wrote:
> >
> > > On Thu, May 21, 2015 at 7:03 AM, Peter Zijlstra <[email protected]> wrote:
> > > > On Thu, 2015-05-21 at 06:36 -0700, Stephane Eranian wrote:
> > > >> On Thu, May 21, 2015 at 6:29 AM, Peter Zijlstra <[email protected]> wrote:
> > > >> > On Thu, 2015-05-21 at 06:27 -0700, Stephane Eranian wrote:
> > > >> >> Or are you talking about a preemption while executing x86_schedule_events()?
> > > >> >
> > > >> > That.
> > > >> >
> > > >> > And we can of course cure that by an earlier patch I send; but I find it
> > > >> > a much simpler rule to just never allow modifying global state for
> > > >> > validation.
> > > >>
> > > >> I can see validation being preempted, but not the context switch code path.
> > > >> Is that what you are talking about?
> > > >>
> > > >> You are saying validate_group() is in the middle of x86_schedule_events()
> > > >> using fake_cpuc, when it gets preempted. The context switch code when it loads
> > > >> the new thread's PMU state calls x86_schedule_events() which modifies the
> > > >> cpuc->event_list[]->hwc. But this is cpuc vs. fake_cpuc again. So yes, the calls
> > > >> nest but they do not touch the same state.
> > > >
> > > > They both touch event->hw->constraint.
> > > >
> > > >> And when you eventually come back
> > > >> to validate_group() you are back to using the fake_cpuc. So I am still not clear
> > > >> on how the corruption can happen.
> > > >
> > > > validate_group()
> > > > x86_schedule_events()
> > > > event->hw.constraint = c; # store
> > > >
> > > > <context switch>
> > > > perf_task_event_sched_in()
> > > > ...
> > > > x86_schedule_events();
> > > > event->hw.constraint = c2; # store
> > > >
> > > > ...
> > > >
> > > > put_event_constraints(event); # assume failure to schedule
> > > > intel_put_event_constraints()
> > > > event->hw.constraint = NULL;
> > > >
> > > > <context switch end>
> > > >
> > > > c = event->hw.constraint; # read -> NULL
> > > >
> > > > if (!test_bit(hwc->idx, c->idxmsk)) # <- *BOOM* NULL deref
> > > >
> > > >
> > > > This in particular is possible when the event in question is a cpu-wide
> > > > event and group-leader, where the validate_group() tries to add an event
> > > > to the group.
> > >
> > > Ok, I think I get it now. It is not related to fake_cpuc vs. cpuc,
> > > it is related to the fact that the constraint is cached in the event
> > > struct itself and that one is shared between validate_group() and
> > > x86_schedule_events() because cpu_hw_event->event_list[] is an array
> > > of pointers to events and not an array of events.
> >
> > Btw., comments and the code structure should be greatly enhanced
> > to make all that very clear and hard to mess up.
> >
> Peter and I will clean this up.

Great, thanks!

Ingo

2015-08-21 20:32:19

by Sasha Levin

[permalink] [raw]
Subject: Re: [PATCH 01/10] perf,x86: Fix event/group validation

On 05/21/2015 07:17 AM, Peter Zijlstra wrote:
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -2106,7 +2106,7 @@ static struct event_constraint *
> intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
> struct perf_event *event)
> {
> - struct event_constraint *c1 = event->hw.constraint;
> + struct event_constraint *c1 = cpuc->event_constraint[idx];
> struct event_constraint *c2;

Hey Peter,

I was chasing a memory corruption in this area and I think I found
a possible culprit:

After this patch, In the code above, we'd access "cpuc->event_constraint[idx]"
and read/change memory.

The problem is that a valid value for idx is also -1, which isn't checked
here, so we end up accessing and possibly corrupting memory that isn't ours.


Thanks,
Sasha