Commit 43b4578071c0 ("perf/x86: Reduce stack usage of
x86_schedule_events()") violated the rule that 'fake' scheduling; as
used for event/group validation; should not change the event state.
This went mostly un-noticed because repeated calls of
x86_pmu::get_event_constraints() would give the same result. And
x86_pmu::put_event_constraints() would mostly not do anything.
Commit e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption
bug workaround") made the situation much worse by actually setting the
event->hw.constraint value to NULL, so when validation and actual
scheduling interact we get NULL ptr derefs.
Fix it by removing the constraint pointer from the event and move it
back to an array, this time in cpuc instead of on the stack.
validate_group()
x86_schedule_events()
event->hw.constraint = c; # store
<context switch>
perf_task_event_sched_in()
...
x86_schedule_events();
event->hw.constraint = c2; # store
...
put_event_constraints(event); # assume failure to schedule
intel_put_event_constraints()
event->hw.constraint = NULL;
<context switch end>
c = event->hw.constraint; # read -> NULL
if (!test_bit(hwc->idx, c->idxmsk)) # <- *BOOM* NULL deref
This in particular is possible when the event in question is a
cpu-wide event and group-leader, where the validate_group() tries to
add an event to the group.
Fixes: 43b4578071c0 ("perf/x86: Reduce stack usage of x86_schedule_events()")
Fixes: e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption bug workaround")
Cc: Andrew Hunter <[email protected]>
Cc: Maria Dimakopoulou <[email protected]>
Reported-by: Vince Weaver <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 33 ++++++++++++++------------
arch/x86/kernel/cpu/perf_event.h | 9 +++----
arch/x86/kernel/cpu/perf_event_intel.c | 15 +++--------
arch/x86/kernel/cpu/perf_event_intel_ds.c | 4 +--
arch/x86/kernel/cpu/perf_event_intel_uncore.c | 7 ++---
arch/x86/kernel/cpu/perf_event_intel_uncore.h | 1
6 files changed, 33 insertions(+), 36 deletions(-)
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -620,7 +620,7 @@ struct sched_state {
struct perf_sched {
int max_weight;
int max_events;
- struct perf_event **events;
+ struct event_constraint **constraints;
struct sched_state state;
int saved_states;
struct sched_state saved[SCHED_STATES_MAX];
@@ -629,7 +629,7 @@ struct perf_sched {
/*
* Initialize interator that runs through all events and counters.
*/
-static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
int num, int wmin, int wmax)
{
int idx;
@@ -637,10 +637,10 @@ static void perf_sched_init(struct perf_
memset(sched, 0, sizeof(*sched));
sched->max_events = num;
sched->max_weight = wmax;
- sched->events = events;
+ sched->constraints = constraints;
for (idx = 0; idx < num; idx++) {
- if (events[idx]->hw.constraint->weight == wmin)
+ if (constraints[idx]->weight == wmin)
break;
}
@@ -687,7 +687,7 @@ static bool __perf_sched_find_counter(st
if (sched->state.event >= sched->max_events)
return false;
- c = sched->events[sched->state.event]->hw.constraint;
+ c = sched->constraints[sched->state.event];
/* Prefer fixed purpose counters */
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
@@ -745,7 +745,7 @@ static bool perf_sched_next_event(struct
if (sched->state.weight > sched->max_weight)
return false;
}
- c = sched->events[sched->state.event]->hw.constraint;
+ c = sched->constraints[sched->state.event];
} while (c->weight != sched->state.weight);
sched->state.counter = 0; /* start with first counter */
@@ -756,12 +756,12 @@ static bool perf_sched_next_event(struct
/*
* Assign a counter for each event.
*/
-int perf_assign_events(struct perf_event **events, int n,
+int perf_assign_events(struct event_constraint **constraints, int n,
int wmin, int wmax, int *assign)
{
struct perf_sched sched;
- perf_sched_init(&sched, events, n, wmin, wmax);
+ perf_sched_init(&sched, constraints, n, wmin, wmax);
do {
if (!perf_sched_find_counter(&sched))
@@ -788,9 +788,9 @@ int x86_schedule_events(struct cpu_hw_ev
x86_pmu.start_scheduling(cpuc);
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
- hwc = &cpuc->event_list[i]->hw;
+ cpuc->event_constraint[i] = NULL;
c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
- hwc->constraint = c;
+ cpuc->event_constraint[i] = c;
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
@@ -801,7 +801,7 @@ int x86_schedule_events(struct cpu_hw_ev
*/
for (i = 0; i < n; i++) {
hwc = &cpuc->event_list[i]->hw;
- c = hwc->constraint;
+ c = cpuc->event_constraint[i];
/* never assigned */
if (hwc->idx == -1)
@@ -821,9 +821,10 @@ int x86_schedule_events(struct cpu_hw_ev
}
/* slow path */
- if (i != n)
- unsched = perf_assign_events(cpuc->event_list, n, wmin,
+ if (i != n) {
+ unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
wmax, assign);
+ }
/*
* In case of success (unsched = 0), mark events as committed,
@@ -840,7 +841,7 @@ int x86_schedule_events(struct cpu_hw_ev
e = cpuc->event_list[i];
e->hw.flags |= PERF_X86_EVENT_COMMITTED;
if (x86_pmu.commit_scheduling)
- x86_pmu.commit_scheduling(cpuc, e, assign[i]);
+ x86_pmu.commit_scheduling(cpuc, i, assign[i]);
}
}
@@ -1292,8 +1293,10 @@ static void x86_pmu_del(struct perf_even
x86_pmu.put_event_constraints(cpuc, event);
/* Delete the array entry. */
- while (++i < cpuc->n_events)
+ while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
+ cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ }
--cpuc->n_events;
perf_event_update_userpage(event);
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -172,7 +172,10 @@ struct cpu_hw_events {
added in the current transaction */
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
u64 tags[X86_PMC_IDX_MAX];
+
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+ struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
+
unsigned int group_flag;
int is_fake;
@@ -519,9 +522,7 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
- void (*commit_scheduling)(struct cpu_hw_events *cpuc,
- struct perf_event *event,
- int cntr);
+ void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
void (*start_scheduling)(struct cpu_hw_events *cpuc);
@@ -717,7 +718,7 @@ static inline void __x86_pmu_enable_even
void x86_pmu_enable_all(int added);
-int perf_assign_events(struct perf_event **events, int n,
+int perf_assign_events(struct event_constraint **constraints, int n,
int wmin, int wmax, int *assign);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2106,7 +2106,7 @@ static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
- struct event_constraint *c1 = event->hw.constraint;
+ struct event_constraint *c1 = cpuc->event_constraint[idx];
struct event_constraint *c2;
/*
@@ -2188,8 +2188,6 @@ intel_put_shared_regs_event_constraints(
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
- struct event_constraint *c = event->hw.constraint;
-
intel_put_shared_regs_event_constraints(cpuc, event);
/*
@@ -2197,19 +2195,14 @@ static void intel_put_event_constraints(
* all events are subject to and must call the
* put_excl_constraints() routine
*/
- if (c && cpuc->excl_cntrs)
+ if (cpuc->excl_cntrs)
intel_put_excl_constraints(cpuc, event);
-
- /* cleanup dynamic constraint */
- if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
- event->hw.constraint = NULL;
}
-static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
- struct perf_event *event, int cntr)
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
{
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct event_constraint *c = event->hw.constraint;
+ struct event_constraint *c = cpuc->event_constraint[idx];
struct intel_excl_states *xlo, *xl;
int tid = cpuc->excl_thread_id;
int o_tid = 1 - tid;
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -706,9 +706,9 @@ void intel_pmu_pebs_disable(struct perf_
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
- if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
- else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled &= ~(1ULL << 63);
if (cpuc->enabled)
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -364,9 +364,8 @@ static int uncore_assign_events(struct i
bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
- hwc = &box->event_list[i]->hw;
c = uncore_get_event_constraint(box, box->event_list[i]);
- hwc->constraint = c;
+ box->event_constraint[i] = c;
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
@@ -374,7 +373,7 @@ static int uncore_assign_events(struct i
/* fastpath, try to reuse previous register */
for (i = 0; i < n; i++) {
hwc = &box->event_list[i]->hw;
- c = hwc->constraint;
+ c = box->event_constraint[i];
/* never assigned */
if (hwc->idx == -1)
@@ -394,7 +393,7 @@ static int uncore_assign_events(struct i
}
/* slow path */
if (i != n)
- ret = perf_assign_events(box->event_list, n,
+ ret = perf_assign_events(box->event_constraint, n,
wmin, wmax, assign);
if (!assign || ret) {
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -97,6 +97,7 @@ struct intel_uncore_box {
atomic_t refcnt;
struct perf_event *events[UNCORE_PMC_IDX_MAX];
struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
+ struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
u64 tags[UNCORE_PMC_IDX_MAX];
struct pci_dev *pci_dev;
On Fri, May 22, 2015 at 03:29:06PM +0200, Peter Zijlstra wrote:
> @@ -788,9 +788,9 @@ int x86_schedule_events(struct cpu_hw_ev
> x86_pmu.start_scheduling(cpuc);
>
> for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
> + cpuc->event_constraint[i] = NULL;
^^^ that is new, which is esp. important in light of the
intel_get_event_constraints() hunk below, which would happily continue
life with a garbage constraint.
> c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
> + cpuc->event_constraint[i] = c;
>
> wmin = min(wmin, c->weight);
> wmax = max(wmax, c->weight);
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -2106,7 +2106,7 @@ static struct event_constraint *
> intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
> struct perf_event *event)
> {
> - struct event_constraint *c1 = event->hw.constraint;
> + struct event_constraint *c1 = cpuc->event_constraint[idx];
> struct event_constraint *c2;
>
> /*
On Fri, May 22, 2015 at 6:40 AM, Peter Zijlstra <[email protected]> wrote:
> On Fri, May 22, 2015 at 03:29:06PM +0200, Peter Zijlstra wrote:
>> @@ -788,9 +788,9 @@ int x86_schedule_events(struct cpu_hw_ev
>> x86_pmu.start_scheduling(cpuc);
>>
>> for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
>> + cpuc->event_constraint[i] = NULL;
>
> ^^^ that is new, which is esp. important in light of the
> intel_get_event_constraints() hunk below, which would happily continue
> life with a garbage constraint.
>
You've moved the constraint list from event to cpuc. Yet, it is still
an array of pointers
to constraints. So here you are saying, that in the case validate_group() is
preempted and there is a context switch, there is still a risk of
overwriting the
constraint? I don't see how because validate_group() is using a fake_cpuc.
So yes, the cpuc->event_constraint[] array is modified but it is not the same
as the actual cpuc used by non-validate code. Or am I still missing something?
When using dynamic constraints, we already have constraint storage in cpuc
(to avoid calling kmalloc() in ctxsw context). Thus, I am wondering if it would
not be easier to always use cpuc for constraint storage (no more pointers).
>> c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
>> + cpuc->event_constraint[i] = c;
>>
>> wmin = min(wmin, c->weight);
>> wmax = max(wmax, c->weight);
>
>
>> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
>> @@ -2106,7 +2106,7 @@ static struct event_constraint *
>> intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
>> struct perf_event *event)
>> {
>> - struct event_constraint *c1 = event->hw.constraint;
>> + struct event_constraint *c1 = cpuc->event_constraint[idx];
>> struct event_constraint *c2;
>>
>> /*
On Tue, May 26, 2015 at 02:24:38AM -0700, Stephane Eranian wrote:
> On Fri, May 22, 2015 at 6:40 AM, Peter Zijlstra <[email protected]> wrote:
> > On Fri, May 22, 2015 at 03:29:06PM +0200, Peter Zijlstra wrote:
> >> @@ -788,9 +788,9 @@ int x86_schedule_events(struct cpu_hw_ev
> >> x86_pmu.start_scheduling(cpuc);
> >>
> >> for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
> >> + cpuc->event_constraint[i] = NULL;
> >
> > ^^^ that is new, which is esp. important in light of the
> > intel_get_event_constraints() hunk below, which would happily continue
> > life with a garbage constraint.
> >
> You've moved the constraint list from event to cpuc. Yet, it is still
> an array of pointers
> to constraints. So here you are saying, that in the case validate_group() is
> preempted and there is a context switch, there is still a risk of
> overwriting the
> constraint? I don't see how because validate_group() is using a fake_cpuc.
> So yes, the cpuc->event_constraint[] array is modified but it is not the same
> as the actual cpuc used by non-validate code. Or am I still missing something?
>
> When using dynamic constraints, we already have constraint storage in cpuc
> (to avoid calling kmalloc() in ctxsw context). Thus, I am wondering if it would
> not be easier to always use cpuc for constraint storage (no more pointers).
No; the problem here is repeated use of the cpuc (the real one). Say one
scheduling run installs a constraint pointer for event i. Then event i
gets removed and another installed in the same spot.
Then the next scheduling run will pick up the old pointer in
intel_get_event_constraints() as a base for the new one.
On Tue, May 26, 2015 at 3:12 AM, Peter Zijlstra <[email protected]> wrote:
> On Tue, May 26, 2015 at 02:24:38AM -0700, Stephane Eranian wrote:
>> On Fri, May 22, 2015 at 6:40 AM, Peter Zijlstra <[email protected]> wrote:
>> > On Fri, May 22, 2015 at 03:29:06PM +0200, Peter Zijlstra wrote:
>> >> @@ -788,9 +788,9 @@ int x86_schedule_events(struct cpu_hw_ev
>> >> x86_pmu.start_scheduling(cpuc);
>> >>
>> >> for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
>> >> + cpuc->event_constraint[i] = NULL;
>> >
>> > ^^^ that is new, which is esp. important in light of the
>> > intel_get_event_constraints() hunk below, which would happily continue
>> > life with a garbage constraint.
>> >
>> You've moved the constraint list from event to cpuc. Yet, it is still
>> an array of pointers
>> to constraints. So here you are saying, that in the case validate_group() is
>> preempted and there is a context switch, there is still a risk of
>> overwriting the
>> constraint? I don't see how because validate_group() is using a fake_cpuc.
>> So yes, the cpuc->event_constraint[] array is modified but it is not the same
>> as the actual cpuc used by non-validate code. Or am I still missing something?
>>
>> When using dynamic constraints, we already have constraint storage in cpuc
>> (to avoid calling kmalloc() in ctxsw context). Thus, I am wondering if it would
>> not be easier to always use cpuc for constraint storage (no more pointers).
>
> No; the problem here is repeated use of the cpuc (the real one). Say one
> scheduling run installs a constraint pointer for event i. Then event i
> gets removed and another installed in the same spot.
>
> Then the next scheduling run will pick up the old pointer in
> intel_get_event_constraints() as a base for the new one.
>
But where is the code that says: skip reinstalling the constraint
in intel_get_event_constraints() because there is already a (stale)
one? I don't see where that is.
On Tue, May 26, 2015 at 04:46:07AM -0700, Stephane Eranian wrote:
> On Tue, May 26, 2015 at 3:12 AM, Peter Zijlstra <[email protected]> wrote:
> > On Tue, May 26, 2015 at 02:24:38AM -0700, Stephane Eranian wrote:
> >> On Fri, May 22, 2015 at 6:40 AM, Peter Zijlstra <[email protected]> wrote:
> >> > On Fri, May 22, 2015 at 03:29:06PM +0200, Peter Zijlstra wrote:
> >> >> @@ -788,9 +788,9 @@ int x86_schedule_events(struct cpu_hw_ev
> >> >> x86_pmu.start_scheduling(cpuc);
> >> >>
> >> >> for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
> >> >> + cpuc->event_constraint[i] = NULL;
> >> >
> But where is the code that says: skip reinstalling the constraint
> in intel_get_event_constraints() because there is already a (stale)
> one? I don't see where that is.
IIRC the problem was that the copy from c2 into c1:
if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
c1->weight = c2->weight;
c2 = c1;
}
is incomplete. For instance, flags is not copied, and some code down the
line might check that and get wrong flags.
I'm not entirely sure I saw misbehaviour, but I figured I'd better close
that hole and rule out this is contributing to fail.
On Tue, May 26, 2015 at 5:16 AM, Peter Zijlstra <[email protected]> wrote:
> On Tue, May 26, 2015 at 04:46:07AM -0700, Stephane Eranian wrote:
>> On Tue, May 26, 2015 at 3:12 AM, Peter Zijlstra <[email protected]> wrote:
>> > On Tue, May 26, 2015 at 02:24:38AM -0700, Stephane Eranian wrote:
>> >> On Fri, May 22, 2015 at 6:40 AM, Peter Zijlstra <[email protected]> wrote:
>> >> > On Fri, May 22, 2015 at 03:29:06PM +0200, Peter Zijlstra wrote:
>> >> >> @@ -788,9 +788,9 @@ int x86_schedule_events(struct cpu_hw_ev
>> >> >> x86_pmu.start_scheduling(cpuc);
>> >> >>
>> >> >> for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
>> >> >> + cpuc->event_constraint[i] = NULL;
>> >> >
>
>> But where is the code that says: skip reinstalling the constraint
>> in intel_get_event_constraints() because there is already a (stale)
>> one? I don't see where that is.
>
> IIRC the problem was that the copy from c2 into c1:
>
> if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
> bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
> c1->weight = c2->weight;
> c2 = c1;
> }
>
> is incomplete. For instance, flags is not copied, and some code down the
> line might check that and get wrong flags.
>
Ok, now I remember this code. It has to do with incremental scheduling.
Suppose E1, E2, E3 events where E1 is exclusive. The first call is
for scheduling E1. It gets to get_event_constraint() which "allocates" a
dynamic constraint. The second call tries to schedule E1, E2. But the
second time for E1, you already have the dynamic constraint allocated, so
this code is reusing the constraint storage and just updates the bitmask
and weight.
Now, that the storage is not actually dynamic (kmalloc'd), but taken from a
fixed size array in cpuc, I believe we can simplify this and "re-allocate"
the constraint for each incremental call to intel_get_event_constraints().
Do you agree?
> I'm not entirely sure I saw misbehaviour, but I figured I'd better close
> that hole and rule out this is contributing to fail.
>
On Tue, May 26, 2015 at 05:25:59AM -0700, Stephane Eranian wrote:
> > IIRC the problem was that the copy from c2 into c1:
> >
> > if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
> > bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
> > c1->weight = c2->weight;
> > c2 = c1;
> > }
> >
> > is incomplete. For instance, flags is not copied, and some code down the
> > line might check that and get wrong flags.
> >
> Ok, now I remember this code. It has to do with incremental scheduling.
> Suppose E1, E2, E3 events where E1 is exclusive. The first call is
> for scheduling E1. It gets to get_event_constraint() which "allocates" a
> dynamic constraint. The second call tries to schedule E1, E2. But the
> second time for E1, you already have the dynamic constraint allocated, so
> this code is reusing the constraint storage and just updates the bitmask
> and weight.
>
> Now, that the storage is not actually dynamic (kmalloc'd), but taken from a
> fixed size array in cpuc, I believe we can simplify this and "re-allocate"
> the constraint for each incremental call to intel_get_event_constraints().
> Do you agree?
That would probably work, the whole incremental thing seems superfluous
to me.
On Tue, May 26, 2015 at 6:22 AM, Peter Zijlstra <[email protected]> wrote:
> On Tue, May 26, 2015 at 05:25:59AM -0700, Stephane Eranian wrote:
>> > IIRC the problem was that the copy from c2 into c1:
>> >
>> > if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
>> > bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
>> > c1->weight = c2->weight;
>> > c2 = c1;
>> > }
>> >
>> > is incomplete. For instance, flags is not copied, and some code down the
>> > line might check that and get wrong flags.
>> >
>> Ok, now I remember this code. It has to do with incremental scheduling.
>> Suppose E1, E2, E3 events where E1 is exclusive. The first call is
>> for scheduling E1. It gets to get_event_constraint() which "allocates" a
>> dynamic constraint. The second call tries to schedule E1, E2. But the
>> second time for E1, you already have the dynamic constraint allocated, so
>> this code is reusing the constraint storage and just updates the bitmask
>> and weight.
>>
>> Now, that the storage is not actually dynamic (kmalloc'd), but taken from a
>> fixed size array in cpuc, I believe we can simplify this and "re-allocate"
>> the constraint for each incremental call to intel_get_event_constraints().
>> Do you agree?
>
> That would probably work, the whole incremental thing seems superfluous
> to me.
Well, you could pass the whole lists (4) of events in one call and
let the arch level
sort it out. It could do one pass and stop at first error to get a
fixed bound like
today. But it would need to deal with groups as well...so not so easy and why
repeat the group processing for each arch?