2012-06-01 03:20:59

by Yan, Zheng

[permalink] [raw]
Subject: [PATCH] perf: Fix intel shared extra msr allocation

From: "Yan, Zheng" <[email protected]>

intel_shared_reg_get/put_constraints() can be indirectly called
by validate_group(). In that case, they should avoid modifying
the perf_event date structure because the event can be already
in active state. Otherwise the shared extra msr's reference
count will be left in inconsistent state.

Signed-off-by: Zheng Yan <[email protected]>
---
arch/x86/kernel/cpu/perf_event_intel.c | 31 +++++++++++++++++++++++--------
1 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..10840d0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,11 +1119,21 @@ intel_bts_constraints(struct perf_event *event)
return NULL;
}

-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static bool intel_try_alt_er(struct perf_event *event, int *idx,
+ int orig_idx, bool fake_cpuc)
{
- if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
+ if (!(x86_pmu.er_flags & ERF_HAS_RSP_1) || *idx != orig_idx)
return false;

+ /* don't modify the event structure if the cpuc is faked */
+ if (fake_cpuc) {
+ if (*idx == EXTRA_REG_RSP_0)
+ *idx = EXTRA_REG_RSP_1;
+ else if (*idx == EXTRA_REG_RSP_1)
+ *idx = EXTRA_REG_RSP_0;
+ return (*idx != orig_idx);
+ }
+
if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01bb;
@@ -1139,6 +1149,7 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
if (event->hw.extra_reg.idx == orig_idx)
return false;

+ *idx = event->hw.extra_reg.idx;
return true;
}

@@ -1155,16 +1166,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct hw_perf_event_extra *reg)
{
struct event_constraint *c = &emptyconstraint;
+ struct intel_shared_regs *shared_regs = cpuc->shared_regs;
struct er_account *era;
unsigned long flags;
int orig_idx = reg->idx;
+ int idx = orig_idx;

- /* already allocated shared msr */
- if (reg->alloc)
+ /* shared msr is already allocated and cpuc is not faked */
+ if (reg->alloc && shared_regs->core_id != -1)
return NULL; /* call x86_get_event_constraint() */

again:
- era = &cpuc->shared_regs->regs[reg->idx];
+ era = &shared_regs->regs[idx];
/*
* we use spin_lock_irqsave() to avoid lockdep issues when
* passing a fake cpuc
@@ -1181,14 +1194,16 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
atomic_inc(&era->ref);

/* no need to reallocate during incremental event scheduling */
- reg->alloc = 1;
+ if (shared_regs->core_id != -1)
+ reg->alloc = 1;

/*
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = NULL;
- } else if (intel_try_alt_er(event, orig_idx)) {
+ } else if (intel_try_alt_er(event, &idx, orig_idx,
+ shared_regs->core_id == -1)) {
raw_spin_unlock_irqrestore(&era->lock, flags);
goto again;
}
@@ -1208,7 +1223,7 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
* allocated. Also takes care of event which do
* not use an extra shared reg
*/
- if (!reg->alloc)
+ if (!reg->alloc || cpuc->shared_regs->core_id == -1)
return;

era = &cpuc->shared_regs->regs[reg->idx];
--
1.7.7.6


2012-06-01 09:35:52

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Fri, Jun 1, 2012 at 5:20 AM, Yan, Zheng <[email protected]> wrote:
> From: "Yan, Zheng" <[email protected]>
>
> intel_shared_reg_get/put_constraints() can be indirectly called
> by validate_group(). In that case, they should avoid modifying
> the perf_event date structure because the event can be already
> in active state. Otherwise the shared extra msr's reference
> count will be left in inconsistent state.
>
I understand the problem but I am wondering if you actually saw
it in real life. The reason I am asking is because of the way
validate_group() collects the events and how they are added
to sibling_list. The new event is added at the tail. Thus it will
come last, and will get to __intel_shared_reg_get_constraints()
last, thus I am wondering if it can really modify the programming
on the existing events.

See more comments inline.

> Signed-off-by: Zheng Yan <[email protected]>
> ---
>  arch/x86/kernel/cpu/perf_event_intel.c |   31 +++++++++++++++++++++++--------
>  1 files changed, 23 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 166546e..10840d0 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1119,11 +1119,21 @@ intel_bts_constraints(struct perf_event *event)
>        return NULL;
>  }
>
> -static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
> +static bool intel_try_alt_er(struct perf_event *event, int *idx,
> +                            int orig_idx, bool fake_cpuc)
>  {
> -       if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
> +       if (!(x86_pmu.er_flags & ERF_HAS_RSP_1) || *idx != orig_idx)
>                return false;
>
> +       /* don't modify the event structure if the cpuc is faked */
> +       if (fake_cpuc) {
> +               if (*idx == EXTRA_REG_RSP_0)
> +                       *idx = EXTRA_REG_RSP_1;
> +               else if (*idx == EXTRA_REG_RSP_1)
> +                       *idx = EXTRA_REG_RSP_0;
> +               return (*idx != orig_idx);
> +       }
> +
I understand that.

>        if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01bb;
> @@ -1139,6 +1149,7 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
>        if (event->hw.extra_reg.idx == orig_idx)
>                return false;
>
> +       *idx = event->hw.extra_reg.idx;
>        return true;
>  }
>
> @@ -1155,16 +1166,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>                                   struct hw_perf_event_extra *reg)
>  {
>        struct event_constraint *c = &emptyconstraint;
> +       struct intel_shared_regs *shared_regs = cpuc->shared_regs;
>        struct er_account *era;
>        unsigned long flags;
>        int orig_idx = reg->idx;
> +       int idx = orig_idx;
>
> -       /* already allocated shared msr */
> -       if (reg->alloc)
> +       /* shared msr is already allocated and cpuc is not faked */
> +       if (reg->alloc && shared_regs->core_id != -1)
>                return NULL; /* call x86_get_event_constraint() */
>
I don't understand what you need this stuff. Shared_regs is faked as well.

>  again:
> -       era = &cpuc->shared_regs->regs[reg->idx];
> +       era = &shared_regs->regs[idx];
>        /*
>         * we use spin_lock_irqsave() to avoid lockdep issues when
>         * passing a fake cpuc
> @@ -1181,14 +1194,16 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>                atomic_inc(&era->ref);
>
>                /* no need to reallocate during incremental event scheduling */
> -               reg->alloc = 1;
> +               if (shared_regs->core_id != -1)
> +                       reg->alloc = 1;
>
>                /*
>                 * need to call x86_get_event_constraint()
>                 * to check if associated event has constraints
>                 */
>                c = NULL;
> -       } else if (intel_try_alt_er(event, orig_idx)) {
> +       } else if (intel_try_alt_er(event, &idx, orig_idx,
> +                                   shared_regs->core_id == -1)) {
>                raw_spin_unlock_irqrestore(&era->lock, flags);
>                goto again;
>        }
> @@ -1208,7 +1223,7 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
>         * allocated. Also takes care of event which do
>         * not use an extra shared reg
>         */
> -       if (!reg->alloc)
> +       if (!reg->alloc || cpuc->shared_regs->core_id == -1)
>                return;
>
>        era = &cpuc->shared_regs->regs[reg->idx];
> --
> 1.7.7.6
>

2012-06-01 14:11:13

by Yan, Zheng

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Fri, Jun 1, 2012 at 5:35 PM, Stephane Eranian <[email protected]> wrote:
> On Fri, Jun 1, 2012 at 5:20 AM, Yan, Zheng <[email protected]> wrote:
>> From: "Yan, Zheng" <[email protected]>
>>
>> intel_shared_reg_get/put_constraints() can be indirectly called
>> by validate_group(). In that case, they should avoid modifying
>> the perf_event date structure because the event can be already
>> in active state. Otherwise the shared extra msr's reference
>> count will be left in inconsistent state.
>>
> I understand the problem but I am wondering if you actually saw
> it in real life. The reason I am asking is because ?of the way
> validate_group() collects the events and how they are added
> to sibling_list. The new event is added at the tail. Thus it will
> come last, and will get to __intel_shared_reg_get_constraints()
> last, thus I am wondering if it can really modify the programming
> on the existing events.

The real problem is from __intel_shared_reg_put_constraints(). it set
reg->alloc to 0 and decreases fake_cpuc->shared_regs->regs[reg->idx]'s
reference count. Later when deleting the event, put_constraints() will find
reg->alloc is 0 and it won't decrease the shared msr's reference count.

Run 'perf stat --group -a -C 0 -e LLC-loads -e LLC-stores sleep 1" on
Nehalem can trigger the bug.

>
> See more comments inline.
>
>> Signed-off-by: Zheng Yan <[email protected]>
>> ---
>> ?arch/x86/kernel/cpu/perf_event_intel.c | ? 31 +++++++++++++++++++++++--------
>> ?1 files changed, 23 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
>> index 166546e..10840d0 100644
>> --- a/arch/x86/kernel/cpu/perf_event_intel.c
>> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
>> @@ -1119,11 +1119,21 @@ intel_bts_constraints(struct perf_event *event)
>> ? ? ? ?return NULL;
>> ?}
>>
>> -static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
>> +static bool intel_try_alt_er(struct perf_event *event, int *idx,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?int orig_idx, bool fake_cpuc)
>> ?{
>> - ? ? ? if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
>> + ? ? ? if (!(x86_pmu.er_flags & ERF_HAS_RSP_1) || *idx != orig_idx)
>> ? ? ? ? ? ? ? ?return false;
>>
>> + ? ? ? /* don't modify the event structure if the cpuc is faked */
>> + ? ? ? if (fake_cpuc) {
>> + ? ? ? ? ? ? ? if (*idx == EXTRA_REG_RSP_0)
>> + ? ? ? ? ? ? ? ? ? ? ? *idx = EXTRA_REG_RSP_1;
>> + ? ? ? ? ? ? ? else if (*idx == EXTRA_REG_RSP_1)
>> + ? ? ? ? ? ? ? ? ? ? ? *idx = EXTRA_REG_RSP_0;
>> + ? ? ? ? ? ? ? return (*idx != orig_idx);
>> + ? ? ? }
>> +
> I understand that.
>
>> ? ? ? ?if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
>> ? ? ? ? ? ? ? ?event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>> ? ? ? ? ? ? ? ?event->hw.config |= 0x01bb;
>> @@ -1139,6 +1149,7 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
>> ? ? ? ?if (event->hw.extra_reg.idx == orig_idx)
>> ? ? ? ? ? ? ? ?return false;
>>
>> + ? ? ? *idx = event->hw.extra_reg.idx;
>> ? ? ? ?return true;
>> ?}
>>
>> @@ -1155,16 +1166,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct hw_perf_event_extra *reg)
>> ?{
>> ? ? ? ?struct event_constraint *c = &emptyconstraint;
>> + ? ? ? struct intel_shared_regs *shared_regs = cpuc->shared_regs;
>> ? ? ? ?struct er_account *era;
>> ? ? ? ?unsigned long flags;
>> ? ? ? ?int orig_idx = reg->idx;
>> + ? ? ? int idx = orig_idx;
>>
>> - ? ? ? /* already allocated shared msr */
>> - ? ? ? if (reg->alloc)
>> + ? ? ? /* shared msr is already allocated and cpuc is not faked */
>> + ? ? ? if (reg->alloc && shared_regs->core_id != -1)
>> ? ? ? ? ? ? ? ?return NULL; /* call x86_get_event_constraint() */
>>
> I don't understand what you need this stuff. Shared_regs is faked as well.

The event can be in active state, we should avoid clearing reg->alloc.

Regards
Yan, Zheng

2012-06-04 13:12:52

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Fri, Jun 1, 2012 at 4:11 PM, Yan, Zheng <[email protected]> wrote:
> On Fri, Jun 1, 2012 at 5:35 PM, Stephane Eranian <[email protected]> wrote:
>> On Fri, Jun 1, 2012 at 5:20 AM, Yan, Zheng <[email protected]> wrote:
>>> From: "Yan, Zheng" <[email protected]>
>>>
>>> intel_shared_reg_get/put_constraints() can be indirectly called
>>> by validate_group(). In that case, they should avoid modifying
>>> the perf_event date structure because the event can be already
>>> in active state. Otherwise the shared extra msr's reference
>>> count will be left in inconsistent state.
>>>
>> I understand the problem but I am wondering if you actually saw
>> it in real life. The reason I am asking is because  of the way
>> validate_group() collects the events and how they are added
>> to sibling_list. The new event is added at the tail. Thus it will
>> come last, and will get to __intel_shared_reg_get_constraints()
>> last, thus I am wondering if it can really modify the programming
>> on the existing events.
>
> The real problem is from __intel_shared_reg_put_constraints(). it set
> reg->alloc to 0 and decreases fake_cpuc->shared_regs->regs[reg->idx]'s
> reference count. Later when deleting the event,  put_constraints() will find
> reg->alloc is 0 and it won't decrease the shared msr's reference count.
>
> Run 'perf stat --group -a -C 0 -e LLC-loads -e LLC-stores sleep 1" on
> Nehalem can trigger the bug.
>
And what do you see in this particular example?

I'd like to see the results via libpfm4 and /perf_examples/syst_count:
$ sudo ./syst_count -e offcore_response_0:dmnd_data_rd
-eoffcore_response_0:dmnd_rfo -p -d 10 -c 0

Should see 50% for each event group.

>>
>> See more comments inline.
>>
>>> Signed-off-by: Zheng Yan <[email protected]>
>>> ---
>>>  arch/x86/kernel/cpu/perf_event_intel.c |   31 +++++++++++++++++++++++--------
>>>  1 files changed, 23 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
>>> index 166546e..10840d0 100644
>>> --- a/arch/x86/kernel/cpu/perf_event_intel.c
>>> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
>>> @@ -1119,11 +1119,21 @@ intel_bts_constraints(struct perf_event *event)
>>>        return NULL;
>>>  }
>>>
>>> -static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
>>> +static bool intel_try_alt_er(struct perf_event *event, int *idx,
>>> +                            int orig_idx, bool fake_cpuc)
>>>  {
>>> -       if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
>>> +       if (!(x86_pmu.er_flags & ERF_HAS_RSP_1) || *idx != orig_idx)
>>>                return false;
>>>
>>> +       /* don't modify the event structure if the cpuc is faked */
>>> +       if (fake_cpuc) {
>>> +               if (*idx == EXTRA_REG_RSP_0)
>>> +                       *idx = EXTRA_REG_RSP_1;
>>> +               else if (*idx == EXTRA_REG_RSP_1)
>>> +                       *idx = EXTRA_REG_RSP_0;
>>> +               return (*idx != orig_idx);
>>> +       }
>>> +
>> I understand that.
>>
>>>        if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
>>>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>>>                event->hw.config |= 0x01bb;
>>> @@ -1139,6 +1149,7 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
>>>        if (event->hw.extra_reg.idx == orig_idx)
>>>                return false;
>>>
>>> +       *idx = event->hw.extra_reg.idx;
>>>        return true;
>>>  }
>>>
>>> @@ -1155,16 +1166,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>>>                                   struct hw_perf_event_extra *reg)
>>>  {
>>>        struct event_constraint *c = &emptyconstraint;
>>> +       struct intel_shared_regs *shared_regs = cpuc->shared_regs;
>>>        struct er_account *era;
>>>        unsigned long flags;
>>>        int orig_idx = reg->idx;
>>> +       int idx = orig_idx;
>>>
>>> -       /* already allocated shared msr */
>>> -       if (reg->alloc)
>>> +       /* shared msr is already allocated and cpuc is not faked */
>>> +       if (reg->alloc && shared_regs->core_id != -1)
>>>                return NULL; /* call x86_get_event_constraint() */
>>>
>> I don't understand what you need this stuff. Shared_regs is faked as well.
>
> The event can be in active state, we should avoid clearing reg->alloc.
>
> Regards
> Yan, Zheng

2012-06-05 02:18:17

by Yan, Zheng

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On 06/04/2012 09:12 PM, Stephane Eranian wrote:
> On Fri, Jun 1, 2012 at 4:11 PM, Yan, Zheng <[email protected]> wrote:
>> On Fri, Jun 1, 2012 at 5:35 PM, Stephane Eranian <[email protected]> wrote:
>>> On Fri, Jun 1, 2012 at 5:20 AM, Yan, Zheng <[email protected]> wrote:
>>>> From: "Yan, Zheng" <[email protected]>
>>>>
>>>> intel_shared_reg_get/put_constraints() can be indirectly called
>>>> by validate_group(). In that case, they should avoid modifying
>>>> the perf_event date structure because the event can be already
>>>> in active state. Otherwise the shared extra msr's reference
>>>> count will be left in inconsistent state.
>>>>
>>> I understand the problem but I am wondering if you actually saw
>>> it in real life. The reason I am asking is because of the way
>>> validate_group() collects the events and how they are added
>>> to sibling_list. The new event is added at the tail. Thus it will
>>> come last, and will get to __intel_shared_reg_get_constraints()
>>> last, thus I am wondering if it can really modify the programming
>>> on the existing events.
>>
>> The real problem is from __intel_shared_reg_put_constraints(). it set
>> reg->alloc to 0 and decreases fake_cpuc->shared_regs->regs[reg->idx]'s
>> reference count. Later when deleting the event, put_constraints() will find
>> reg->alloc is 0 and it won't decrease the shared msr's reference count.
>>
>> Run 'perf stat --group -a -C 0 -e LLC-loads -e LLC-stores sleep 1" on
>> Nehalem can trigger the bug.
>>
> And what do you see in this particular example?
The offcore_rsp msr's reference count are left in inconsistent state. It means
we can only use the msr for LLC-loads event after that. Any other events that
require programming the offcore_rsp msr will fail.

>
> I'd like to see the results via libpfm4 and /perf_examples/syst_count:
> $ sudo ./syst_count -e offcore_response_0:dmnd_data_rd
> -eoffcore_response_0:dmnd_rfo -p -d 10 -c 0
>
This does not use the event group feature. do you mean
./syst_count -e offcore_response_0:dmnd_data_rd,offcore_response_0:dmnd_rfo -p -d 10 -c 0.

After apply my patch, the above command fails on Nehalem. Because there is no offcore_rsp1.

Regards
Yan, Zheng

2012-06-05 10:14:43

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Fri, 2012-06-01 at 22:11 +0800, Yan, Zheng wrote:
>
> The real problem is from __intel_shared_reg_put_constraints(). it set
> reg->alloc to 0

ok

> and decreases fake_cpuc->shared_regs->regs[reg->idx]'s
> reference count.

So? That's fake state, who cares what its left in?

> Later when deleting the event, put_constraints() will find
> reg->alloc is 0 and it won't decrease the shared msr's reference count.

OK, so the only problem is us setting reg->alloc to 0?

2012-06-05 10:21:25

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Tue, Jun 5, 2012 at 12:14 PM, Peter Zijlstra <[email protected]> wrote:
> On Fri, 2012-06-01 at 22:11 +0800, Yan, Zheng wrote:
>>
>> The real problem is from __intel_shared_reg_put_constraints(). it set
>> reg->alloc to 0
>
> ok
>
>> and decreases fake_cpuc->shared_regs->regs[reg->idx]'s
>> reference count.
>
> So? That's fake state, who cares what its left in?
>
Right, that's what I am trying to elucidate. Who cares about it
beyond validate_group()? The fake_cpuc is reallocated for each
validate group. And this includes the fake shared_regs.

>> Later when deleting the event,  put_constraints() will find
>> reg->alloc is 0 and it won't decrease the shared msr's reference count.
>
> OK, so the only problem is us setting reg->alloc to 0?

I agree with the first part of the patch in intel_try_alt_er(), we
should not touch
the actual event struct. But I am still unclear about the reg->alloc part.

2012-06-05 10:28:04

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Tue, 2012-06-05 at 12:21 +0200, Stephane Eranian wrote:
> I agree with the first part of the patch in intel_try_alt_er(), we
> should not touch
> the actual event struct. But I am still unclear about the reg->alloc part.

reg->alloc is part of the actual event.

Thing is, the patch is horridly ugly.. while I agree that changing event
state isn't good, special casing all that code isn't good either.

I was looking at cloning the events for validate_group() as well, but so
far that's not turning out too pretty either.

2012-06-05 10:38:34

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Tue, Jun 5, 2012 at 12:27 PM, Peter Zijlstra <[email protected]> wrote:
> On Tue, 2012-06-05 at 12:21 +0200, Stephane Eranian wrote:
>> I agree with the first part of the patch in intel_try_alt_er(), we
>> should not touch
>> the actual event struct. But I am still unclear about the reg->alloc part.
>
> reg->alloc is part of the actual event.
>
Ok, I missed that (despite writing some of that code ;-<)

> Thing is, the patch is horridly ugly.. while I agree that changing event
> state isn't good, special casing all that code isn't good either.

Yeah, not pretty.

>
> I was looking at cloning the events for validate_group() as well, but so
> far that's not turning out too pretty either.

How about we add a field or flag to cpuc to tell it's fake, and then in
try_alt_er() and __intel_shared_reg_get_constraints() we avoid touching
live struct (like reg->alloc) if fake==1. I think he was trying to do the same
with the core_id == -1 test.

2012-06-05 12:07:15

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Tue, 2012-06-05 at 12:38 +0200, Stephane Eranian wrote:
> How about we add a field or flag to cpuc to tell it's fake, and then
> in
> try_alt_er() and __intel_shared_reg_get_constraints() we avoid
> touching
> live struct (like reg->alloc) if fake==1. I think he was trying to do
> the same with the core_id == -1 test.

We might have to do something like that, however I'm trying to figure
out when that reg->alloc test in __intel_shared_reg_get_contraints() is
useful.

If it is useful in event scheduling, we cannot just leave it out in
validate_group().

2012-06-05 12:40:11

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Tue, 2012-06-05 at 14:07 +0200, Peter Zijlstra wrote:
> On Tue, 2012-06-05 at 12:38 +0200, Stephane Eranian wrote:
> > How about we add a field or flag to cpuc to tell it's fake, and then
> > in
> > try_alt_er() and __intel_shared_reg_get_constraints() we avoid
> > touching
> > live struct (like reg->alloc) if fake==1. I think he was trying to do
> > the same with the core_id == -1 test.
>
> We might have to do something like that, however I'm trying to figure
> out when that reg->alloc test in __intel_shared_reg_get_contraints() is
> useful.
>
> If it is useful in event scheduling, we cannot just leave it out in
> validate_group().

OK, so x86_schedule_events() can call that multiple times on the same
event in case we keep adding events.. it needs the constraints of the
previous events as well, and in that case we skip the whole extra_reg
muck.

But for validate_group() we only do this once, so it should work. Nasty
though.


2012-06-05 12:51:55

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Tue, Jun 5, 2012 at 2:39 PM, Peter Zijlstra <[email protected]> wrote:
> On Tue, 2012-06-05 at 14:07 +0200, Peter Zijlstra wrote:
>> On Tue, 2012-06-05 at 12:38 +0200, Stephane Eranian wrote:
>> > How about we add a field or flag to cpuc to tell it's fake, and then
>> > in
>> > try_alt_er() and __intel_shared_reg_get_constraints() we avoid
>> > touching
>> > live struct (like reg->alloc) if fake==1. I think he was trying to do
>> > the same with the core_id == -1 test.
>>
>> We might have to do something like that, however I'm trying to figure
>> out when that reg->alloc test in __intel_shared_reg_get_contraints() is
>> useful.
>>
>> If it is useful in event scheduling, we cannot just leave it out in
>> validate_group().
>
> OK, so x86_schedule_events() can call that multiple times on the same
> event in case we keep adding events.. it needs the constraints of the
> previous events as well, and in that case we skip the whole extra_reg
> muck.

That reg->alloc is to avoid going through that shared MSR allocation
code again because we already have a valid assignment. It helps with
incremental scheduling of events (collect() -> schedule()).
>
> But for validate_group() we only do this once, so it should work. Nasty
> though.

True, only one pass. So I think if you say in:
__intel_shared_reg_put_constraints()

if (!cpuc->is_fake)
reg->alloc = 1;

That should do it given that:

__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
struct hw_perf_event_extra *reg)
{
struct er_account *era;
if (!reg->alloc)
return;

}

But then, if reg->alloc was already set to 1, you will have a problem
if you leave
it as is. So I think in __intel_shared_reg_put_constraints(), you still need:

__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
struct hw_perf_event_extra *reg)
{
if (!reg->alloc || cpuc->is_fake)
return;

Or something like that.

2012-06-05 13:05:03

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Tue, 2012-06-05 at 14:51 +0200, Stephane Eranian wrote:
> So I think if you say in:
> __intel_shared_reg_put_constraints()
>
> if (!cpuc->is_fake)
> reg->alloc = 1;
>
> That should do it given that:
>
> __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
> struct hw_perf_event_extra *reg)
> {
> struct er_account *era;
> if (!reg->alloc)
> return;
>
> }
>
> But then, if reg->alloc was already set to 1, you will have a problem
> if you leave
> it as is. So I think in __intel_shared_reg_put_constraints(), you still need:
>
> __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
> struct hw_perf_event_extra *reg)
> {
> if (!reg->alloc || cpuc->is_fake)
> return;
>
> Or something like that.

Right, and then something slightly less hideous for intel_try_alt_er().

How does something like this look?

---
arch/x86/kernel/cpu/perf_event_intel.c | 43 +++++++++++++++++++++-----------
1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..a3b7eb3 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
return NULL;
}

-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
{
if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
- return false;
+ return idx;

- if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
+ if (idx == EXTRA_REG_RSP_0)
+ return EXTRA_REG_RSP_1;
+
+ if (idx == EXTRA_REG_RSP_1)
+ return EXTRA_REG_RSP_0;
+
+ return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+ if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01bb;
event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
- } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+ } else if (idx == EXTRA_REG_RSP_1) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01b7;
event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
}
-
- if (event->hw.extra_reg.idx == orig_idx)
- return false;
-
- return true;
}

/*
@@ -1157,14 +1163,14 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct event_constraint *c = &emptyconstraint;
struct er_account *era;
unsigned long flags;
- int orig_idx = reg->idx;
+ int idx = reg->idx;

/* already allocated shared msr */
if (reg->alloc)
return NULL; /* call x86_get_event_constraint() */

again:
- era = &cpuc->shared_regs->regs[reg->idx];
+ era = &cpuc->shared_regs->regs[idx];
/*
* we use spin_lock_irqsave() to avoid lockdep issues when
* passing a fake cpuc
@@ -1181,16 +1187,23 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
atomic_inc(&era->ref);

/* no need to reallocate during incremental event scheduling */
- reg->alloc = 1;
+ if (!cpuc->is_fake) {
+ if (idx != reg->idx)
+ intel_fixup_er(event, idx);
+ reg->alloc = 1;
+ }

/*
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = NULL;
- } else if (intel_try_alt_er(event, orig_idx)) {
- raw_spin_unlock_irqrestore(&era->lock, flags);
- goto again;
+ } else {
+ idx = intel_alt_er(idx);
+ if (idx != reg->idx) {
+ raw_spin_unlock_irqrestore(&era->lock, flags);
+ goto again;
+ }
}
raw_spin_unlock_irqrestore(&era->lock, flags);

2012-06-05 13:30:43

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH] perf, x86: Fix Intel shared extra MSR allocation

Subject: perf: Fix Intel shared extra MSR allocation

Zheng Yan reported that event group validation can wreck event state
when Intel extra_reg allocation changes event state.

Validation shouldn't change any persistent state. Cloning events in
validate_{event,group}() isn't really pretty either, so add a few
special cases to avoid modifying the event state.

The code is restructured to minimize the special case impact.

Reported-by: Zheng Yan <[email protected]>
Cc: Stephane Eranian <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 1 +
arch/x86/kernel/cpu/perf_event.h | 1 +
arch/x86/kernel/cpu/perf_event_intel.c | 74 +++++++++++++++++++++++---------
3 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6d..cb60838 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
if (!cpuc->shared_regs)
goto error;
}
+ cpuc->is_fake = 1;
return cpuc;
error:
free_fake_cpuc(cpuc);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf..83794d8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */

unsigned int group_flag;
+ int is_fake;

/*
* Intel DebugStore bits
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..6d00c42 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
return NULL;
}

-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
{
if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
- return false;
+ return idx;

- if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
+ if (idx == EXTRA_REG_RSP_0)
+ return EXTRA_REG_RSP_1;
+
+ if (idx == EXTRA_REG_RSP_1)
+ return EXTRA_REG_RSP_0;
+
+ return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+ if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01bb;
event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
- } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+ } else if (idx == EXTRA_REG_RSP_1) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01b7;
event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
}
-
- if (event->hw.extra_reg.idx == orig_idx)
- return false;
-
- return true;
}

/*
@@ -1157,14 +1163,14 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct event_constraint *c = &emptyconstraint;
struct er_account *era;
unsigned long flags;
- int orig_idx = reg->idx;
+ int idx = reg->idx;

/* already allocated shared msr */
if (reg->alloc)
return NULL; /* call x86_get_event_constraint() */

again:
- era = &cpuc->shared_regs->regs[reg->idx];
+ era = &cpuc->shared_regs->regs[idx];
/*
* we use spin_lock_irqsave() to avoid lockdep issues when
* passing a fake cpuc
@@ -1173,6 +1179,29 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,

if (!atomic_read(&era->ref) || era->config == reg->config) {

+ /*
+ * If its a fake cpuc -- as per validate_{group,event}() we
+ * shouldn't touch event state and we can avoid doing so
+ * since both will only call get_event_constraints() once
+ * on each event, this avoids the need for reg->alloc.
+ *
+ * Not doing the ER fixup will only result in era->reg being
+ * wrong, but since we won't actually try and program hardware
+ * this isn't a problem either.
+ */
+ if (!cpuc->is_fake) {
+ if (idx != reg->idx)
+ intel_fixup_er(event, idx);
+
+ /*
+ * x86_schedule_events() can call get_event_constraints()
+ * multiple times on events in the case of incremental
+ * scheduling(). reg->alloc ensures we only do the ER
+ * allocation once.
+ */
+ reg->alloc = 1;
+ }
+
/* lock in msr value */
era->config = reg->config;
era->reg = reg->reg;
@@ -1180,17 +1209,17 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
/* one more user */
atomic_inc(&era->ref);

- /* no need to reallocate during incremental event scheduling */
- reg->alloc = 1;
-
/*
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = NULL;
- } else if (intel_try_alt_er(event, orig_idx)) {
- raw_spin_unlock_irqrestore(&era->lock, flags);
- goto again;
+ } else {
+ idx = intel_alt_er(idx);
+ if (idx != reg->idx) {
+ raw_spin_unlock_irqrestore(&era->lock, flags);
+ goto again;
+ }
}
raw_spin_unlock_irqrestore(&era->lock, flags);

@@ -1204,11 +1233,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
struct er_account *era;

/*
- * only put constraint if extra reg was actually
- * allocated. Also takes care of event which do
- * not use an extra shared reg
+ * Only put constraint if extra reg was actually allocated. Also takes
+ * care of event which do not use an extra shared reg.
+ *
+ * Also, if this is a fake cpuc we shouldn't touch any event state
+ * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+ * either since it'll be thrown out.
*/
- if (!reg->alloc)
+ if (!reg->alloc || cpuc->is_fake)
return;

era = &cpuc->shared_regs->regs[reg->idx];

2012-06-05 13:31:22

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

Looks nicer, still missing this little piece + is_fake definition and
setting is allocate_fakecpuc())

@@ -1210,7 +1210,7 @@ __intel_shared_reg_put_constraints(struct
cpu_hw_events *cpuc,
* allocated. Also takes care of event which do
* not use an extra shared reg
*/
- if (!reg->alloc)
+ if (!reg->alloc || cpuc->is_fake)
return;


On Tue, Jun 5, 2012 at 3:04 PM, Peter Zijlstra <[email protected]> wrote:
> On Tue, 2012-06-05 at 14:51 +0200, Stephane Eranian wrote:
>> So I think if you say in:
>> __intel_shared_reg_put_constraints()
>>
>>      if (!cpuc->is_fake)
>>            reg->alloc = 1;
>>
>> That should do it given that:
>>
>> __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
>>                                    struct hw_perf_event_extra *reg)
>> {
>>         struct er_account *era;
>>         if (!reg->alloc)
>>                 return;
>>
>> }
>>
>> But then, if reg->alloc was already set to 1, you will have a problem
>> if you leave
>> it as is. So I think in __intel_shared_reg_put_constraints(), you still need:
>>
>> __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
>>                                    struct hw_perf_event_extra *reg)
>> {
>>        if (!reg->alloc || cpuc->is_fake)
>>           return;
>>
>> Or something like that.
>
> Right, and then something slightly less hideous for intel_try_alt_er().
>
> How does something like this look?
>
> ---
>  arch/x86/kernel/cpu/perf_event_intel.c |   43 +++++++++++++++++++++-----------
>  1 file changed, 28 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 166546e..a3b7eb3 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
>        return NULL;
>  }
>
> -static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
> +static int intel_alt_er(int idx)
>  {
>        if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
> -               return false;
> +               return idx;
>
> -       if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
> +       if (idx == EXTRA_REG_RSP_0)
> +               return EXTRA_REG_RSP_1;
> +
> +       if (idx == EXTRA_REG_RSP_1)
> +               return EXTRA_REG_RSP_0;
> +
> +       return idx;
> +}
> +
> +static void intel_fixup_er(struct perf_event *event, int idx)
> +{
> +       if (idx == EXTRA_REG_RSP_0) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01bb;
>                event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
>                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
> -       } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
> +       } else if (idx == EXTRA_REG_RSP_1) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01b7;
>                event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
>                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
>        }
> -
> -       if (event->hw.extra_reg.idx == orig_idx)
> -               return false;
> -
> -       return true;
>  }
>
>  /*
> @@ -1157,14 +1163,14 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>        struct event_constraint *c = &emptyconstraint;
>        struct er_account *era;
>        unsigned long flags;
> -       int orig_idx = reg->idx;
> +       int idx = reg->idx;
>
>        /* already allocated shared msr */
>        if (reg->alloc)
>                return NULL; /* call x86_get_event_constraint() */
>
>  again:
> -       era = &cpuc->shared_regs->regs[reg->idx];
> +       era = &cpuc->shared_regs->regs[idx];
>        /*
>         * we use spin_lock_irqsave() to avoid lockdep issues when
>         * passing a fake cpuc
> @@ -1181,16 +1187,23 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>                atomic_inc(&era->ref);
>
>                /* no need to reallocate during incremental event scheduling */
> -               reg->alloc = 1;
> +               if (!cpuc->is_fake) {
> +                       if (idx != reg->idx)
> +                               intel_fixup_er(event, idx);
> +                       reg->alloc = 1;
> +               }
>
>                /*
>                 * need to call x86_get_event_constraint()
>                 * to check if associated event has constraints
>                 */
>                c = NULL;
> -       } else if (intel_try_alt_er(event, orig_idx)) {
> -               raw_spin_unlock_irqrestore(&era->lock, flags);
> -               goto again;
> +       } else {
> +               idx = intel_alt_er(idx);
> +               if (idx != reg->idx) {
> +                       raw_spin_unlock_irqrestore(&era->lock, flags);
> +                       goto again;
> +               }
>        }
>        raw_spin_unlock_irqrestore(&era->lock, flags);
>
>

2012-06-05 13:32:54

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Tue, 2012-06-05 at 15:31 +0200, Stephane Eranian wrote:
> Looks nicer, still missing this little piece + is_fake definition and
> setting is allocate_fakecpuc())
>
Yeah, just send out something more complete -- although only built
tested.

This was just seeing how the intel_try_alt_er() stuff could be done
saner.

2012-06-05 13:38:20

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Tue, Jun 5, 2012 at 3:32 PM, Peter Zijlstra <[email protected]> wrote:
> On Tue, 2012-06-05 at 15:31 +0200, Stephane Eranian wrote:
>> Looks nicer, still missing this little piece + is_fake definition and
>> setting is allocate_fakecpuc())
>>
> Yeah, just send out something more complete -- although only built
> tested.
>
> This was just seeing how the intel_try_alt_er() stuff could be done
> saner.

Will try it on my systems. Though it appears the patch has some
^M all over.
Thanks for looking into this today.

2012-06-05 13:47:28

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Tue, 2012-06-05 at 15:38 +0200, Stephane Eranian wrote:

> Will try it on my systems. Though it appears the patch has some
> ^M all over.

Yeah, stupid evolution can only do quoted-printable these days..
progress they call this :/

I can send you an awk script to unmangle it if you want.. but git can do
it too somehow.

2012-06-05 13:51:30

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On Tue, Jun 5, 2012 at 3:47 PM, Peter Zijlstra <[email protected]> wrote:
> On Tue, 2012-06-05 at 15:38 +0200, Stephane Eranian wrote:
>
>> Will try it on my systems. Though it appears the patch has some
>> ^M all over.
>
> Yeah, stupid evolution can only do quoted-printable these days..
> progress they call this :/
>
> I can send you an awk script to unmangle it if you want.. but git can do
> it too somehow.

Will try with git, otherwise can do it manually, patch is small enough.
Thanks.

2012-06-05 13:56:35

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation

On Tue, 2012-06-05 at 15:30 +0200, Peter Zijlstra wrote:

> @@ -1157,14 +1163,14 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
> struct event_constraint *c = &emptyconstraint;
> struct er_account *era;
> unsigned long flags;
> - int orig_idx = reg->idx;
> + int idx = reg->idx;
>
> /* already allocated shared msr */
> if (reg->alloc)
> return NULL; /* call x86_get_event_constraint() */

I'm afraid that needs to be:

/*
* reg->alloc can be set due to existing state, so for fake cpuc
* we need to ignore this, otherwise we might fail to allocate
* proper fake state for this extra reg constraint. Also see
* the comment below.
*/
if (reg->alloc && !cpuc->is_fake)
return NULL; /* call x86_get_event_constraints() */

>
> again:
> - era = &cpuc->shared_regs->regs[reg->idx];
> + era = &cpuc->shared_regs->regs[idx];
> /*
> * we use spin_lock_irqsave() to avoid lockdep issues when
> * passing a fake cpuc
> @@ -1173,6 +1179,29 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>
> if (!atomic_read(&era->ref) || era->config == reg->config) {
>
> + /*
> + * If its a fake cpuc -- as per validate_{group,event}() we
> + * shouldn't touch event state and we can avoid doing so
> + * since both will only call get_event_constraints() once
> + * on each event, this avoids the need for reg->alloc.
> + *
> + * Not doing the ER fixup will only result in era->reg being
> + * wrong, but since we won't actually try and program hardware
> + * this isn't a problem either.
> + */
> + if (!cpuc->is_fake) {
> + if (idx != reg->idx)
> + intel_fixup_er(event, idx);
> +
> + /*
> + * x86_schedule_events() can call get_event_constraints()
> + * multiple times on events in the case of incremental
> + * scheduling(). reg->alloc ensures we only do the ER
> + * allocation once.
> + */
> + reg->alloc = 1;
> + }
> +
> /* lock in msr value */
> era->config = reg->config;
> era->reg = reg->reg;

2012-06-05 21:26:21

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation

On Tue, Jun 5, 2012 at 3:56 PM, Peter Zijlstra <[email protected]> wrote:
> On Tue, 2012-06-05 at 15:30 +0200, Peter Zijlstra wrote:
>
>> @@ -1157,14 +1163,14 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>>       struct event_constraint *c = &emptyconstraint;
>>       struct er_account *era;
>>       unsigned long flags;
>> -     int orig_idx = reg->idx;
>> +     int idx = reg->idx;
>>
>>       /* already allocated shared msr */
>>       if (reg->alloc)
>>               return NULL; /* call x86_get_event_constraint() */
>
> I'm afraid that needs to be:
>
>        /*
>         * reg->alloc can be set due to existing state, so for fake cpuc
>         * we need to ignore this, otherwise we might fail to allocate
>         * proper fake state for this extra reg constraint. Also see
>         * the comment below.
>         */
>        if (reg->alloc && !cpuc->is_fake)
>                return NULL; /* call x86_get_event_constraints() */
>
>>
Yes.

>>  again:
>> -     era = &cpuc->shared_regs->regs[reg->idx];
>> +     era = &cpuc->shared_regs->regs[idx];
>>       /*
>>        * we use spin_lock_irqsave() to avoid lockdep issues when
>>        * passing a fake cpuc
>> @@ -1173,6 +1179,29 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>>
>>       if (!atomic_read(&era->ref) || era->config == reg->config) {
>>
>> +             /*
>> +              * If its a fake cpuc -- as per validate_{group,event}() we
>> +              * shouldn't touch event state and we can avoid doing so
>> +              * since both will only call get_event_constraints() once
>> +              * on each event, this avoids the need for reg->alloc.
>> +              *
>> +              * Not doing the ER fixup will only result in era->reg being
>> +              * wrong, but since we won't actually try and program hardware
>> +              * this isn't a problem either.
>> +              */
>> +             if (!cpuc->is_fake) {
>> +                     if (idx != reg->idx)
>> +                             intel_fixup_er(event, idx);
>> +
>> +                     /*
>> +                      * x86_schedule_events() can call get_event_constraints()
>> +                      * multiple times on events in the case of incremental
>> +                      * scheduling(). reg->alloc ensures we only do the ER
>> +                      * allocation once.
>> +                      */
>> +                     reg->alloc = 1;
>> +             }
>> +
>>               /* lock in msr value */
>>               era->config = reg->config;
>>               era->reg = reg->reg;
>
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m???? ????????I?

2012-06-05 21:35:33

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation


Zheng Yan reported that event group validation can wreck event state
when Intel extra_reg allocation changes event state.

Validation shouldn't change any persistent state. Cloning events in
validate_{event,group}() isn't really pretty either, so add a few
special cases to avoid modifying the event state.

The code is restructured to minimize the special case impact.

Reported-by: Zheng Yan <[email protected]>
Cc: Stephane Eranian <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Acked-by: Stephane Eranian <[email protected]>
---

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6d..cb60838 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
if (!cpuc->shared_regs)
goto error;
}
+ cpuc->is_fake = 1;
return cpuc;
error:
free_fake_cpuc(cpuc);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf..83794d8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */

unsigned int group_flag;
+ int is_fake;

/*
* Intel DebugStore bits
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..76a2bd2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
return NULL;
}

-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
{
if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
- return false;
+ return idx;

- if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
+ if (idx == EXTRA_REG_RSP_0)
+ return EXTRA_REG_RSP_1;
+
+ if (idx == EXTRA_REG_RSP_1)
+ return EXTRA_REG_RSP_0;
+
+ return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+ if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01bb;
event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
- } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+ } else if (idx == EXTRA_REG_RSP_1) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01b7;
event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
}
-
- if (event->hw.extra_reg.idx == orig_idx)
- return false;
-
- return true;
}

/*
@@ -1157,14 +1163,19 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct event_constraint *c = &emptyconstraint;
struct er_account *era;
unsigned long flags;
- int orig_idx = reg->idx;
+ int idx = reg->idx;

- /* already allocated shared msr */
- if (reg->alloc)
- return NULL; /* call x86_get_event_constraint() */
+ /*
+ * reg->alloc can be set due to existing state, so for fake cpuc
+ * we need to ignore this, otherwise we might fail to allocate
+ * proper fake state for this extra reg constraint. Also see
+ * the comment below.
+ */
+ if (reg->alloc && !cpuc->is_fake)
+ return NULL; /* call x86_get_event_constraints() */

again:
- era = &cpuc->shared_regs->regs[reg->idx];
+ era = &cpuc->shared_regs->regs[idx];
/*
* we use spin_lock_irqsave() to avoid lockdep issues when
* passing a fake cpuc
@@ -1172,6 +1183,27 @@ again:
raw_spin_lock_irqsave(&era->lock, flags);

if (!atomic_read(&era->ref) || era->config == reg->config) {
+ /*
+ * If its a fake cpuc -- as per validate_{group,event}() we
+ * shouldn't touch event state and we can avoid doing so
+ * since both will only call get_event_constraints() once
+ * on each event, this avoids the need for reg->alloc.
+ *
+ * Not doing the ER fixup will only result in era->reg being
+ * wrong, but since we won't actually try and program hardware
+ * this isn't a problem either.
+ */
+ if (!cpuc->is_fake) {
+ if (idx != reg->idx)
+ intel_fixup_er(event, idx);
+ /*
+ * x86_schedule_events() calls get_event_constraints()
+ * multiple times on events in the case of incremental
+ * scheduling(). reg->alloc ensures we only do the ER
+ * allocation once.
+ */
+ reg->alloc = 1;
+ }

/* lock in msr value */
era->config = reg->config;
@@ -1180,18 +1212,19 @@ again:
/* one more user */
atomic_inc(&era->ref);

- /* no need to reallocate during incremental event scheduling */
- reg->alloc = 1;
-
/*
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = NULL;
- } else if (intel_try_alt_er(event, orig_idx)) {
- raw_spin_unlock_irqrestore(&era->lock, flags);
- goto again;
+ } else {
+ idx = intel_alt_er(idx);
+ if (idx != reg->idx) {
+ raw_spin_unlock_irqrestore(&era->lock, flags);
+ goto again;
+ }
}
+
raw_spin_unlock_irqrestore(&era->lock, flags);

return c;
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
struct er_account *era;

/*
- * only put constraint if extra reg was actually
- * allocated. Also takes care of event which do
- * not use an extra shared reg
+ * Only put constraint if extra reg was actually allocated. Also takes
+ * care of event which do not use an extra shared reg.
+ *
+ * Also, if this is a fake cpuc we shouldn't touch any event state
+ * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+ * either since it'll be thrown out.
*/
- if (!reg->alloc)
+ if (!reg->alloc || cpuc->is_fake)
return;

era = &cpuc->shared_regs->regs[reg->idx];

2012-06-06 01:00:20

by Yan, Zheng

[permalink] [raw]
Subject: Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation

On 06/05/2012 09:56 PM, Peter Zijlstra wrote:
>> @@ -1157,14 +1163,14 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>> > struct event_constraint *c = &emptyconstraint;
>> > struct er_account *era;
>> > unsigned long flags;
>> > - int orig_idx = reg->idx;
>> > + int idx = reg->idx;
>> >
>> > /* already allocated shared msr */
>> > if (reg->alloc)
>> > return NULL; /* call x86_get_event_constraint() */
> I'm afraid that needs to be:
>
> /*
> * reg->alloc can be set due to existing state, so for fake cpuc
> * we need to ignore this, otherwise we might fail to allocate
> * proper fake state for this extra reg constraint. Also see
> * the comment below.
> */
> if (reg->alloc && !cpuc->is_fake)
> return NULL; /* call x86_get_event_constraints() */
>
Agree

Regards
Yan, Zheng


2012-06-06 10:12:43

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

There is something wrong with this patch, I instrumented the code
and I can see:
[ 1377.324575] 1. idx=1 reg_idx=1 ref=-1 config=0xff01 era->config=0xff01
^^^^^^
The test case on WSM (RSP0, RSP1):

$ perf stat -a -C13 -e
offcore_response_1:dmnd_data_rd,offcore_response_1:dmnd_data_rd sleep
100 &
$ perf stat -a -C1 -e offcore_response_1:dmnd_rfo sleep 1

I think this happens during scheduling of the events, i.e., during the
run and not on initial
programming. That could happen with cgroups, for instance.

On Tue, Jun 5, 2012 at 3:51 PM, Stephane Eranian <[email protected]> wrote:
> On Tue, Jun 5, 2012 at 3:47 PM, Peter Zijlstra <[email protected]> wrote:
>> On Tue, 2012-06-05 at 15:38 +0200, Stephane Eranian wrote:
>>
>>> Will try it on my systems. Though it appears the patch has some
>>> ^M all over.
>>
>> Yeah, stupid evolution can only do quoted-printable these days..
>> progress they call this :/
>>
>> I can send you an awk script to unmangle it if you want.. but git can do
>> it too somehow.
>
> Will try with git, otherwise can do it manually, patch is small enough.
> Thanks.

2012-06-06 10:35:17

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation

Ok, I found the problem. It was in intel_fixup_er().
Unlike in the original code, this routine must update
the event->extra_reg.idx to the idx parameter instead
of trying to swap out from it.

I will repost an updated version (v2).


On Tue, Jun 5, 2012 at 11:35 PM, Stephane Eranian <[email protected]> wrote:
>
> Zheng Yan reported that event group validation can wreck event state
> when Intel extra_reg allocation changes event state.
>
> Validation shouldn't change any persistent state. Cloning events in
> validate_{event,group}() isn't really pretty either, so add a few
> special cases to avoid modifying the event state.
>
> The code is restructured to minimize the special case impact.
>
> Reported-by: Zheng Yan <[email protected]>
> Cc: Stephane Eranian <[email protected]>
> Signed-off-by: Peter Zijlstra <[email protected]>
> Acked-by: Stephane Eranian <[email protected]>
> ---
>
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index e049d6d..cb60838 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
>                if (!cpuc->shared_regs)
>                        goto error;
>        }
> +       cpuc->is_fake = 1;
>        return cpuc;
>  error:
>        free_fake_cpuc(cpuc);
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index 6638aaf..83794d8 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -117,6 +117,7 @@ struct cpu_hw_events {
>        struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
>
>        unsigned int            group_flag;
> +       int                     is_fake;
>
>        /*
>         * Intel DebugStore bits
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 166546e..76a2bd2 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
>        return NULL;
>  }
>
> -static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
> +static int intel_alt_er(int idx)
>  {
>        if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
> -               return false;
> +               return idx;
>
> -       if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
> +       if (idx == EXTRA_REG_RSP_0)
> +               return EXTRA_REG_RSP_1;
> +
> +       if (idx == EXTRA_REG_RSP_1)
> +               return EXTRA_REG_RSP_0;
> +
> +       return idx;
> +}
> +
> +static void intel_fixup_er(struct perf_event *event, int idx)
> +{
> +       if (idx == EXTRA_REG_RSP_0) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01bb;
>                event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
>                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
> -       } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
> +       } else if (idx == EXTRA_REG_RSP_1) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01b7;
>                event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
>                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
>        }
> -
> -       if (event->hw.extra_reg.idx == orig_idx)
> -               return false;
> -
> -       return true;
>  }
>
>  /*
> @@ -1157,14 +1163,19 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
>        struct event_constraint *c = &emptyconstraint;
>        struct er_account *era;
>        unsigned long flags;
> -       int orig_idx = reg->idx;
> +       int idx = reg->idx;
>
> -       /* already allocated shared msr */
> -       if (reg->alloc)
> -               return NULL; /* call x86_get_event_constraint() */
> +       /*
> +        * reg->alloc can be set due to existing state, so for fake cpuc
> +        * we need to ignore this, otherwise we might fail to allocate
> +        * proper fake state for this extra reg constraint. Also see
> +        * the comment below.
> +       */
> +       if (reg->alloc && !cpuc->is_fake)
> +               return NULL; /* call x86_get_event_constraints() */
>
>  again:
> -       era = &cpuc->shared_regs->regs[reg->idx];
> +       era = &cpuc->shared_regs->regs[idx];
>        /*
>         * we use spin_lock_irqsave() to avoid lockdep issues when
>         * passing a fake cpuc
> @@ -1172,6 +1183,27 @@ again:
>        raw_spin_lock_irqsave(&era->lock, flags);
>
>        if (!atomic_read(&era->ref) || era->config == reg->config) {
> +               /*
> +                * If its a fake cpuc -- as per validate_{group,event}() we
> +                * shouldn't touch event state and we can avoid doing so
> +                * since both will only call get_event_constraints() once
> +                * on each event, this avoids the need for reg->alloc.
> +                *
> +                * Not doing the ER fixup will only result in era->reg being
> +                * wrong, but since we won't actually try and program hardware
> +                * this isn't a problem either.
> +                */
> +               if (!cpuc->is_fake) {
> +                       if (idx != reg->idx)
> +                               intel_fixup_er(event, idx);
> +                       /*
> +                        * x86_schedule_events() calls get_event_constraints()
> +                        * multiple times on events in the case of incremental
> +                        * scheduling(). reg->alloc ensures we only do the ER
> +                        * allocation once.
> +                        */
> +                       reg->alloc = 1;
> +               }
>
>                /* lock in msr value */
>                era->config = reg->config;
> @@ -1180,18 +1212,19 @@ again:
>                /* one more user */
>                atomic_inc(&era->ref);
>
> -               /* no need to reallocate during incremental event scheduling */
> -               reg->alloc = 1;
> -
>                /*
>                 * need to call x86_get_event_constraint()
>                 * to check if associated event has constraints
>                 */
>                c = NULL;
> -       } else if (intel_try_alt_er(event, orig_idx)) {
> -               raw_spin_unlock_irqrestore(&era->lock, flags);
> -               goto again;
> +       } else {
> +               idx = intel_alt_er(idx);
> +               if (idx != reg->idx) {
> +                       raw_spin_unlock_irqrestore(&era->lock, flags);
> +                       goto again;
> +               }
>        }
> +
>        raw_spin_unlock_irqrestore(&era->lock, flags);
>
>        return c;
> @@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
>        struct er_account *era;
>
>        /*
> -        * only put constraint if extra reg was actually
> -        * allocated. Also takes care of event which do
> -        * not use an extra shared reg
> +        * Only put constraint if extra reg was actually allocated. Also takes
> +        * care of event which do not use an extra shared reg.
> +        *
> +        * Also, if this is a fake cpuc we shouldn't touch any event state
> +        * (reg->alloc) and we don't care about leaving inconsistent cpuc state
> +        * either since it'll be thrown out.
>         */
> -       if (!reg->alloc)
> +       if (!reg->alloc || cpuc->is_fake)
>                return;
>
>        era = &cpuc->shared_regs->regs[reg->idx];

2012-06-06 10:37:06

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation

On Wed, 2012-06-06 at 12:35 +0200, Stephane Eranian wrote:
> Ok, I found the problem. It was in intel_fixup_er().
> Unlike in the original code, this routine must update
> the event->extra_reg.idx to the idx parameter instead
> of trying to swap out from it.

Ah indeed. Thanks!

2012-06-06 10:54:06

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation

On Wed, 2012-06-06 at 12:36 +0200, Peter Zijlstra wrote:
> On Wed, 2012-06-06 at 12:35 +0200, Stephane Eranian wrote:
> > Ok, I found the problem. It was in intel_fixup_er().
> > Unlike in the original code, this routine must update
> > the event->extra_reg.idx to the idx parameter instead
> > of trying to swap out from it.
>
> Ah indeed. Thanks!


static void intel_fixup_er(struct perf_event *event, int idx)
{
event->hw.extra_reg.idx = idx;

if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01b7;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
} else if (idx == EXTRA_REG_RSP_1) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01bb;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
}
}

Like that then?

2012-06-06 11:43:53

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation

On Wed, Jun 6, 2012 at 12:53 PM, Peter Zijlstra <[email protected]> wrote:
> On Wed, 2012-06-06 at 12:36 +0200, Peter Zijlstra wrote:
>> On Wed, 2012-06-06 at 12:35 +0200, Stephane Eranian wrote:
>> > Ok, I found the problem. It was in intel_fixup_er().
>> > Unlike in the original code, this routine must update
>> > the event->extra_reg.idx to the idx parameter instead
>> > of trying to swap out from it.
>>
>> Ah indeed. Thanks!
>
>
> static void intel_fixup_er(struct perf_event *event, int idx)
> {
>        event->hw.extra_reg.idx = idx;
>
>        if (idx == EXTRA_REG_RSP_0) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01b7;
>                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
>        } else if (idx == EXTRA_REG_RSP_1) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01bb;
>                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
>        }
> }
>
> Like that then?

Yes.

2012-06-06 11:57:37

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation

On Wed, Jun 6, 2012 at 12:53 PM, Peter Zijlstra <[email protected]> wrote:
> On Wed, 2012-06-06 at 12:36 +0200, Peter Zijlstra wrote:
>> On Wed, 2012-06-06 at 12:35 +0200, Stephane Eranian wrote:
>> > Ok, I found the problem. It was in intel_fixup_er().
>> > Unlike in the original code, this routine must update
>> > the event->extra_reg.idx to the idx parameter instead
>> > of trying to swap out from it.
>>
>> Ah indeed. Thanks!
>
>
> static void intel_fixup_er(struct perf_event *event, int idx)
> {
>        event->hw.extra_reg.idx = idx;
>
>        if (idx == EXTRA_REG_RSP_0) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01b7;
>                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
>        } else if (idx == EXTRA_REG_RSP_1) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01bb;
>                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
>        }
> }
>
> Like that then?


Are you going to repost the update patch, or shall I?

2012-06-06 12:06:33

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation

On Wed, 2012-06-06 at 13:57 +0200, Stephane Eranian wrote:

> Are you going to repost the update patch, or shall I?



---
Subject: perf, x86: Fix Intel shared extra MSR allocation
From: Peter Zijlstra <[email protected]>
Date: Tue, 05 Jun 2012 15:30:31 +0200

Zheng Yan reported that event group validation can wreck event state
when Intel extra_reg allocation changes event state.

Validation shouldn't change any persistent state. Cloning events in
validate_{event,group}() isn't really pretty either, so add a few
special cases to avoid modifying the event state.

The code is restructured to minimize the special case impact.

Reported-by: Zheng Yan <[email protected]>
Acked-by: Stephane Eranian <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins
---
arch/x86/kernel/cpu/perf_event.c | 1
arch/x86/kernel/cpu/perf_event.h | 1
arch/x86/kernel/cpu/perf_event_intel.c | 92 ++++++++++++++++++++++-----------
3 files changed, 66 insertions(+), 28 deletions(-)

--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fa
if (!cpuc->shared_regs)
goto error;
}
+ cpuc->is_fake = 1;
return cpuc;
error:
free_fake_cpuc(cpuc);
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */

unsigned int group_flag;
+ int is_fake;

/*
* Intel DebugStore bits
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event
return NULL;
}

-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
{
if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
- return false;
+ return idx;

- if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
- event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= 0x01bb;
- event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
- event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
- } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+ if (idx == EXTRA_REG_RSP_0)
+ return EXTRA_REG_RSP_1;
+
+ if (idx == EXTRA_REG_RSP_1)
+ return EXTRA_REG_RSP_0;
+
+ return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+ event->hw.extra_reg.idx = idx;
+
+ if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01b7;
- event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+ } else if (idx == EXTRA_REG_RSP_1) {
+ event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+ event->hw.config |= 0x01bb;
+ event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
}
-
- if (event->hw.extra_reg.idx == orig_idx)
- return false;
-
- return true;
}

/*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struc
struct event_constraint *c = &emptyconstraint;
struct er_account *era;
unsigned long flags;
- int orig_idx = reg->idx;
+ int idx = reg->idx;

- /* already allocated shared msr */
- if (reg->alloc)
+ /*
+ * reg->alloc can be set due to existing state, so for fake cpuc we
+ * need to ignore this, otherwise we might fail to allocate proper fake
+ * state for this extra reg constraint. Also see the comment below.
+ */
+ if (reg->alloc && !cpuc->is_fake)
return NULL; /* call x86_get_event_constraint() */

again:
- era = &cpuc->shared_regs->regs[reg->idx];
+ era = &cpuc->shared_regs->regs[idx];
/*
* we use spin_lock_irqsave() to avoid lockdep issues when
* passing a fake cpuc
@@ -1173,6 +1183,29 @@ __intel_shared_reg_get_constraints(struc

if (!atomic_read(&era->ref) || era->config == reg->config) {

+ /*
+ * If its a fake cpuc -- as per validate_{group,event}() we
+ * shouldn't touch event state and we can avoid doing so
+ * since both will only call get_event_constraints() once
+ * on each event, this avoids the need for reg->alloc.
+ *
+ * Not doing the ER fixup will only result in era->reg being
+ * wrong, but since we won't actually try and program hardware
+ * this isn't a problem either.
+ */
+ if (!cpuc->is_fake) {
+ if (idx != reg->idx)
+ intel_fixup_er(event, idx);
+
+ /*
+ * x86_schedule_events() can call get_event_constraints()
+ * multiple times on events in the case of incremental
+ * scheduling(). reg->alloc ensures we only do the ER
+ * allocation once.
+ */
+ reg->alloc = 1;
+ }
+
/* lock in msr value */
era->config = reg->config;
era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ __intel_shared_reg_get_constraints(struc
/* one more user */
atomic_inc(&era->ref);

- /* no need to reallocate during incremental event scheduling */
- reg->alloc = 1;
-
/*
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = NULL;
- } else if (intel_try_alt_er(event, orig_idx)) {
- raw_spin_unlock_irqrestore(&era->lock, flags);
- goto again;
+ } else {
+ idx = intel_alt_er(idx);
+ if (idx != reg->idx) {
+ raw_spin_unlock_irqrestore(&era->lock, flags);
+ goto again;
+ }
}
raw_spin_unlock_irqrestore(&era->lock, flags);

@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struc
struct er_account *era;

/*
- * only put constraint if extra reg was actually
- * allocated. Also takes care of event which do
- * not use an extra shared reg
+ * Only put constraint if extra reg was actually allocated. Also takes
+ * care of event which do not use an extra shared reg.
+ *
+ * Also, if this is a fake cpuc we shouldn't touch any event state
+ * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+ * either since it'll be thrown out.
*/
- if (!reg->alloc)
+ if (!reg->alloc || cpuc->is_fake)
return;

era = &cpuc->shared_regs->regs[reg->idx];

2012-06-06 12:08:45

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf, x86: Fix Intel shared extra MSR allocation

Looks good.
thanks.

On Wed, Jun 6, 2012 at 2:06 PM, Peter Zijlstra <[email protected]> wrote:
> On Wed, 2012-06-06 at 13:57 +0200, Stephane Eranian wrote:
>
>> Are you going to repost the update patch, or shall I?
>
>
>
> ---
> Subject: perf, x86: Fix Intel shared extra MSR allocation
> From: Peter Zijlstra <[email protected]>
> Date: Tue, 05 Jun 2012 15:30:31 +0200
>
> Zheng Yan reported that event group validation can wreck event state
> when Intel extra_reg allocation changes event state.
>
> Validation shouldn't change any persistent state. Cloning events in
> validate_{event,group}() isn't really pretty either, so add a few
> special cases to avoid modifying the event state.
>
> The code is restructured to minimize the special case impact.
>
> Reported-by: Zheng Yan <[email protected]>
> Acked-by: Stephane Eranian <[email protected]>
> Signed-off-by: Peter Zijlstra <[email protected]>
> Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins
> ---
>  arch/x86/kernel/cpu/perf_event.c       |    1
>  arch/x86/kernel/cpu/perf_event.h       |    1
>  arch/x86/kernel/cpu/perf_event_intel.c |   92 ++++++++++++++++++++++-----------
>  3 files changed, 66 insertions(+), 28 deletions(-)
>
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fa
>                if (!cpuc->shared_regs)
>                        goto error;
>        }
> +       cpuc->is_fake = 1;
>        return cpuc;
>  error:
>        free_fake_cpuc(cpuc);
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -117,6 +117,7 @@ struct cpu_hw_events {
>        struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
>
>        unsigned int            group_flag;
> +       int                     is_fake;
>
>        /*
>         * Intel DebugStore bits
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event
>        return NULL;
>  }
>
> -static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
> +static int intel_alt_er(int idx)
>  {
>        if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
> -               return false;
> +               return idx;
>
> -       if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
> -               event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
> -               event->hw.config |= 0x01bb;
> -               event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
> -               event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
> -       } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
> +       if (idx == EXTRA_REG_RSP_0)
> +               return EXTRA_REG_RSP_1;
> +
> +       if (idx == EXTRA_REG_RSP_1)
> +               return EXTRA_REG_RSP_0;
> +
> +       return idx;
> +}
> +
> +static void intel_fixup_er(struct perf_event *event, int idx)
> +{
> +       event->hw.extra_reg.idx = idx;
> +
> +       if (idx == EXTRA_REG_RSP_0) {
>                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
>                event->hw.config |= 0x01b7;
> -               event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
>                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
> +       } else if (idx == EXTRA_REG_RSP_1) {
> +               event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
> +               event->hw.config |= 0x01bb;
> +               event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
>        }
> -
> -       if (event->hw.extra_reg.idx == orig_idx)
> -               return false;
> -
> -       return true;
>  }
>
>  /*
> @@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struc
>        struct event_constraint *c = &emptyconstraint;
>        struct er_account *era;
>        unsigned long flags;
> -       int orig_idx = reg->idx;
> +       int idx = reg->idx;
>
> -       /* already allocated shared msr */
> -       if (reg->alloc)
> +       /*
> +        * reg->alloc can be set due to existing state, so for fake cpuc we
> +        * need to ignore this, otherwise we might fail to allocate proper fake
> +        * state for this extra reg constraint. Also see the comment below.
> +        */
> +       if (reg->alloc && !cpuc->is_fake)
>                return NULL; /* call x86_get_event_constraint() */
>
>  again:
> -       era = &cpuc->shared_regs->regs[reg->idx];
> +       era = &cpuc->shared_regs->regs[idx];
>        /*
>         * we use spin_lock_irqsave() to avoid lockdep issues when
>         * passing a fake cpuc
> @@ -1173,6 +1183,29 @@ __intel_shared_reg_get_constraints(struc
>
>        if (!atomic_read(&era->ref) || era->config == reg->config) {
>
> +               /*
> +                * If its a fake cpuc -- as per validate_{group,event}() we
> +                * shouldn't touch event state and we can avoid doing so
> +                * since both will only call get_event_constraints() once
> +                * on each event, this avoids the need for reg->alloc.
> +                *
> +                * Not doing the ER fixup will only result in era->reg being
> +                * wrong, but since we won't actually try and program hardware
> +                * this isn't a problem either.
> +                */
> +               if (!cpuc->is_fake) {
> +                       if (idx != reg->idx)
> +                               intel_fixup_er(event, idx);
> +
> +                       /*
> +                        * x86_schedule_events() can call get_event_constraints()
> +                        * multiple times on events in the case of incremental
> +                        * scheduling(). reg->alloc ensures we only do the ER
> +                        * allocation once.
> +                        */
> +                       reg->alloc = 1;
> +               }
> +
>                /* lock in msr value */
>                era->config = reg->config;
>                era->reg = reg->reg;
> @@ -1180,17 +1213,17 @@ __intel_shared_reg_get_constraints(struc
>                /* one more user */
>                atomic_inc(&era->ref);
>
> -               /* no need to reallocate during incremental event scheduling */
> -               reg->alloc = 1;
> -
>                /*
>                 * need to call x86_get_event_constraint()
>                 * to check if associated event has constraints
>                 */
>                c = NULL;
> -       } else if (intel_try_alt_er(event, orig_idx)) {
> -               raw_spin_unlock_irqrestore(&era->lock, flags);
> -               goto again;
> +       } else {
> +               idx = intel_alt_er(idx);
> +               if (idx != reg->idx) {
> +                       raw_spin_unlock_irqrestore(&era->lock, flags);
> +                       goto again;
> +               }
>        }
>        raw_spin_unlock_irqrestore(&era->lock, flags);
>
> @@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struc
>        struct er_account *era;
>
>        /*
> -        * only put constraint if extra reg was actually
> -        * allocated. Also takes care of event which do
> -        * not use an extra shared reg
> +        * Only put constraint if extra reg was actually allocated. Also takes
> +        * care of event which do not use an extra shared reg.
> +        *
> +        * Also, if this is a fake cpuc we shouldn't touch any event state
> +        * (reg->alloc) and we don't care about leaving inconsistent cpuc state
> +        * either since it'll be thrown out.
>         */
> -       if (!reg->alloc)
> +       if (!reg->alloc || cpuc->is_fake)
>                return;
>
>        era = &cpuc->shared_regs->regs[reg->idx];
>
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m???? ????????I?

2012-06-06 15:58:17

by Peter Zijlstra

[permalink] [raw]
Subject: [tip:perf/core] perf/x86: Fix Intel shared extra MSR allocation

Commit-ID: b430f7c4706aeba4270c7ab7744fc504b9315e1c
Gitweb: http://git.kernel.org/tip/b430f7c4706aeba4270c7ab7744fc504b9315e1c
Author: Peter Zijlstra <[email protected]>
AuthorDate: Tue, 5 Jun 2012 15:30:31 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 6 Jun 2012 16:59:44 +0200

perf/x86: Fix Intel shared extra MSR allocation

Zheng Yan reported that event group validation can wreck event state
when Intel extra_reg allocation changes event state.

Validation shouldn't change any persistent state. Cloning events in
validate_{event,group}() isn't really pretty either, so add a few
special cases to avoid modifying the event state.

The code is restructured to minimize the special case impact.

Reported-by: Zheng Yan <[email protected]>
Acked-by: Stephane Eranian <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 1 +
arch/x86/kernel/cpu/perf_event.h | 1 +
arch/x86/kernel/cpu/perf_event_intel.c | 92 ++++++++++++++++++++++----------
3 files changed, 66 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6d..cb60838 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
if (!cpuc->shared_regs)
goto error;
}
+ cpuc->is_fake = 1;
return cpuc;
error:
free_fake_cpuc(cpuc);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf..83794d8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */

unsigned int group_flag;
+ int is_fake;

/*
* Intel DebugStore bits
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..965baa2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
return NULL;
}

-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
{
if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
- return false;
+ return idx;

- if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
- event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= 0x01bb;
- event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
- event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
- } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+ if (idx == EXTRA_REG_RSP_0)
+ return EXTRA_REG_RSP_1;
+
+ if (idx == EXTRA_REG_RSP_1)
+ return EXTRA_REG_RSP_0;
+
+ return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+ event->hw.extra_reg.idx = idx;
+
+ if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01b7;
- event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+ } else if (idx == EXTRA_REG_RSP_1) {
+ event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+ event->hw.config |= 0x01bb;
+ event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
}
-
- if (event->hw.extra_reg.idx == orig_idx)
- return false;
-
- return true;
}

/*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct event_constraint *c = &emptyconstraint;
struct er_account *era;
unsigned long flags;
- int orig_idx = reg->idx;
+ int idx = reg->idx;

- /* already allocated shared msr */
- if (reg->alloc)
+ /*
+ * reg->alloc can be set due to existing state, so for fake cpuc we
+ * need to ignore this, otherwise we might fail to allocate proper fake
+ * state for this extra reg constraint. Also see the comment below.
+ */
+ if (reg->alloc && !cpuc->is_fake)
return NULL; /* call x86_get_event_constraint() */

again:
- era = &cpuc->shared_regs->regs[reg->idx];
+ era = &cpuc->shared_regs->regs[idx];
/*
* we use spin_lock_irqsave() to avoid lockdep issues when
* passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:

if (!atomic_read(&era->ref) || era->config == reg->config) {

+ /*
+ * If its a fake cpuc -- as per validate_{group,event}() we
+ * shouldn't touch event state and we can avoid doing so
+ * since both will only call get_event_constraints() once
+ * on each event, this avoids the need for reg->alloc.
+ *
+ * Not doing the ER fixup will only result in era->reg being
+ * wrong, but since we won't actually try and program hardware
+ * this isn't a problem either.
+ */
+ if (!cpuc->is_fake) {
+ if (idx != reg->idx)
+ intel_fixup_er(event, idx);
+
+ /*
+ * x86_schedule_events() can call get_event_constraints()
+ * multiple times on events in the case of incremental
+ * scheduling(). reg->alloc ensures we only do the ER
+ * allocation once.
+ */
+ reg->alloc = 1;
+ }
+
/* lock in msr value */
era->config = reg->config;
era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
/* one more user */
atomic_inc(&era->ref);

- /* no need to reallocate during incremental event scheduling */
- reg->alloc = 1;
-
/*
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = NULL;
- } else if (intel_try_alt_er(event, orig_idx)) {
- raw_spin_unlock_irqrestore(&era->lock, flags);
- goto again;
+ } else {
+ idx = intel_alt_er(idx);
+ if (idx != reg->idx) {
+ raw_spin_unlock_irqrestore(&era->lock, flags);
+ goto again;
+ }
}
raw_spin_unlock_irqrestore(&era->lock, flags);

@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
struct er_account *era;

/*
- * only put constraint if extra reg was actually
- * allocated. Also takes care of event which do
- * not use an extra shared reg
+ * Only put constraint if extra reg was actually allocated. Also takes
+ * care of event which do not use an extra shared reg.
+ *
+ * Also, if this is a fake cpuc we shouldn't touch any event state
+ * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+ * either since it'll be thrown out.
*/
- if (!reg->alloc)
+ if (!reg->alloc || cpuc->is_fake)
return;

era = &cpuc->shared_regs->regs[reg->idx];

2012-06-06 16:12:08

by Peter Zijlstra

[permalink] [raw]
Subject: [tip:perf/core] perf/x86: Fix Intel shared extra MSR allocation

Commit-ID: 5a425294ee7d4ab5a374248e85838dfd450caf75
Gitweb: http://git.kernel.org/tip/5a425294ee7d4ab5a374248e85838dfd450caf75
Author: Peter Zijlstra <[email protected]>
AuthorDate: Tue, 5 Jun 2012 15:30:31 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 6 Jun 2012 17:22:26 +0200

perf/x86: Fix Intel shared extra MSR allocation

Zheng Yan reported that event group validation can wreck event state
when Intel extra_reg allocation changes event state.

Validation shouldn't change any persistent state. Cloning events in
validate_{event,group}() isn't really pretty either, so add a few
special cases to avoid modifying the event state.

The code is restructured to minimize the special case impact.

Reported-by: Zheng Yan <[email protected]>
Acked-by: Stephane Eranian <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 1 +
arch/x86/kernel/cpu/perf_event.h | 1 +
arch/x86/kernel/cpu/perf_event_intel.c | 92 ++++++++++++++++++++++----------
3 files changed, 66 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6d..cb60838 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
if (!cpuc->shared_regs)
goto error;
}
+ cpuc->is_fake = 1;
return cpuc;
error:
free_fake_cpuc(cpuc);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf..83794d8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */

unsigned int group_flag;
+ int is_fake;

/*
* Intel DebugStore bits
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546e..965baa2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
return NULL;
}

-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
{
if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
- return false;
+ return idx;

- if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
- event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= 0x01bb;
- event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
- event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
- } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+ if (idx == EXTRA_REG_RSP_0)
+ return EXTRA_REG_RSP_1;
+
+ if (idx == EXTRA_REG_RSP_1)
+ return EXTRA_REG_RSP_0;
+
+ return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+ event->hw.extra_reg.idx = idx;
+
+ if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01b7;
- event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+ } else if (idx == EXTRA_REG_RSP_1) {
+ event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+ event->hw.config |= 0x01bb;
+ event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
}
-
- if (event->hw.extra_reg.idx == orig_idx)
- return false;
-
- return true;
}

/*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct event_constraint *c = &emptyconstraint;
struct er_account *era;
unsigned long flags;
- int orig_idx = reg->idx;
+ int idx = reg->idx;

- /* already allocated shared msr */
- if (reg->alloc)
+ /*
+ * reg->alloc can be set due to existing state, so for fake cpuc we
+ * need to ignore this, otherwise we might fail to allocate proper fake
+ * state for this extra reg constraint. Also see the comment below.
+ */
+ if (reg->alloc && !cpuc->is_fake)
return NULL; /* call x86_get_event_constraint() */

again:
- era = &cpuc->shared_regs->regs[reg->idx];
+ era = &cpuc->shared_regs->regs[idx];
/*
* we use spin_lock_irqsave() to avoid lockdep issues when
* passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:

if (!atomic_read(&era->ref) || era->config == reg->config) {

+ /*
+ * If its a fake cpuc -- as per validate_{group,event}() we
+ * shouldn't touch event state and we can avoid doing so
+ * since both will only call get_event_constraints() once
+ * on each event, this avoids the need for reg->alloc.
+ *
+ * Not doing the ER fixup will only result in era->reg being
+ * wrong, but since we won't actually try and program hardware
+ * this isn't a problem either.
+ */
+ if (!cpuc->is_fake) {
+ if (idx != reg->idx)
+ intel_fixup_er(event, idx);
+
+ /*
+ * x86_schedule_events() can call get_event_constraints()
+ * multiple times on events in the case of incremental
+ * scheduling(). reg->alloc ensures we only do the ER
+ * allocation once.
+ */
+ reg->alloc = 1;
+ }
+
/* lock in msr value */
era->config = reg->config;
era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
/* one more user */
atomic_inc(&era->ref);

- /* no need to reallocate during incremental event scheduling */
- reg->alloc = 1;
-
/*
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = NULL;
- } else if (intel_try_alt_er(event, orig_idx)) {
- raw_spin_unlock_irqrestore(&era->lock, flags);
- goto again;
+ } else {
+ idx = intel_alt_er(idx);
+ if (idx != reg->idx) {
+ raw_spin_unlock_irqrestore(&era->lock, flags);
+ goto again;
+ }
}
raw_spin_unlock_irqrestore(&era->lock, flags);

@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
struct er_account *era;

/*
- * only put constraint if extra reg was actually
- * allocated. Also takes care of event which do
- * not use an extra shared reg
+ * Only put constraint if extra reg was actually allocated. Also takes
+ * care of event which do not use an extra shared reg.
+ *
+ * Also, if this is a fake cpuc we shouldn't touch any event state
+ * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+ * either since it'll be thrown out.
*/
- if (!reg->alloc)
+ if (!reg->alloc || cpuc->is_fake)
return;

era = &cpuc->shared_regs->regs[reg->idx];

2012-06-07 01:25:38

by Yan, Zheng

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On 06/06/2012 06:12 PM, Stephane Eranian wrote:
> There is something wrong with this patch, I instrumented the code
> and I can see:
> [ 1377.324575] 1. idx=1 reg_idx=1 ref=-1 config=0xff01 era->config=0xff01
> ^^^^^^
> The test case on WSM (RSP0, RSP1):
>
> $ perf stat -a -C13 -e
> offcore_response_1:dmnd_data_rd,offcore_response_1:dmnd_data_rd sleep
> 100 &
> $ perf stat -a -C1 -e offcore_response_1:dmnd_rfo sleep 1
>
> I think this happens during scheduling of the events, i.e., during the
> run and not on initial
> programming. That could happen with cgroups, for instance.
>

Maybe we need adjust shared MSRs' reference counts in intel_fixup_er() ?

2012-06-07 04:01:33

by Yan, Zheng

[permalink] [raw]
Subject: Re: [PATCH] perf: Fix intel shared extra msr allocation

On 06/06/2012 06:12 PM, Stephane Eranian wrote:
> There is something wrong with this patch, I instrumented the code
> and I can see:
> [ 1377.324575] 1. idx=1 reg_idx=1 ref=-1 config=0xff01 era->config=0xff01
> ^^^^^^
> The test case on WSM (RSP0, RSP1):
>
> $ perf stat -a -C13 -e
> offcore_response_1:dmnd_data_rd,offcore_response_1:dmnd_data_rd sleep
> 100 &
> $ perf stat -a -C1 -e offcore_response_1:dmnd_rfo sleep 1
>
> I think this happens during scheduling of the events, i.e., during the
> run and not on initial
> programming. That could happen with cgroups, for instance.
>
The bug is in intel_fixup_er(), it should be:

static void intel_fixup_er(struct perf_event *event, int idx)
{
if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01b7;
event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
} else if (idx == EXTRA_REG_RSP_1) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01bb;
event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
}
}

Regards
Yan, Zheng