2015-05-19 00:00:50

by Thomas Gleixner

[permalink] [raw]
Subject: [patch 3/6] x86, perf, cqm: Remove pointless spinlock from state cache

struct intel_cqm_state is a strict per cpu cache of the rmid and the
usage counter. It can never be modified from a remote cpu.

The 3 functions which modify the content: start, stop and del (del
maps to stop) are called from the perf core with interrupts disabled
which is enough protection for the per cpu state values.

Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/kernel/cpu/perf_event_intel_cqm.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)

Index: linux/arch/x86/kernel/cpu/perf_event_intel_cqm.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ linux/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -17,11 +17,16 @@ static unsigned int cqm_max_rmid = -1;
static unsigned int cqm_l3_scale; /* supposedly cacheline size */

struct intel_cqm_state {
- raw_spinlock_t lock;
u32 rmid;
int cnt;
};

+/*
+ * The cached intel_cqm_state is strictly per cpu and can never be
+ * updated from a remote cpu. Both functions which modify the state
+ * (intel_cqm_event_start and intel_cqm_event_stop) are called with
+ * interrupts disabled, which is sufficient for the protection.
+ */
static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);

/*
@@ -963,15 +968,12 @@ static void intel_cqm_event_start(struct
{
struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
u32 rmid = event->hw.cqm_rmid;
- unsigned long flags;

if (!(event->hw.cqm_state & PERF_HES_STOPPED))
return;

event->hw.cqm_state &= ~PERF_HES_STOPPED;

- raw_spin_lock_irqsave(&state->lock, flags);
-
if (state->cnt++)
WARN_ON_ONCE(state->rmid != rmid);
else
@@ -984,21 +986,17 @@ static void intel_cqm_event_start(struct
* Technology component.
*/
wrmsr(MSR_IA32_PQR_ASSOC, rmid, 0);
-
- raw_spin_unlock_irqrestore(&state->lock, flags);
}

static void intel_cqm_event_stop(struct perf_event *event, int mode)
{
struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
- unsigned long flags;

if (event->hw.cqm_state & PERF_HES_STOPPED)
return;

event->hw.cqm_state |= PERF_HES_STOPPED;

- raw_spin_lock_irqsave(&state->lock, flags);
intel_cqm_event_read(event);

if (!--state->cnt) {
@@ -1013,8 +1011,6 @@ static void intel_cqm_event_stop(struct
} else {
WARN_ON_ONCE(!state->rmid);
}
-
- raw_spin_unlock_irqrestore(&state->lock, flags);
}

static int intel_cqm_event_add(struct perf_event *event, int mode)
@@ -1257,7 +1253,6 @@ static void intel_cqm_cpu_prepare(unsign
struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
struct cpuinfo_x86 *c = &cpu_data(cpu);

- raw_spin_lock_init(&state->lock);
state->rmid = 0;
state->cnt = 0;



2015-05-19 09:23:03

by Matt Fleming

[permalink] [raw]
Subject: Re: [patch 3/6] x86, perf, cqm: Remove pointless spinlock from state cache

On Tue, 19 May, at 12:00:53AM, Thomas Gleixner wrote:
> struct intel_cqm_state is a strict per cpu cache of the rmid and the
> usage counter. It can never be modified from a remote cpu.
>
> The 3 functions which modify the content: start, stop and del (del
> maps to stop) are called from the perf core with interrupts disabled
> which is enough protection for the per cpu state values.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> arch/x86/kernel/cpu/perf_event_intel_cqm.c | 17 ++++++-----------
> 1 file changed, 6 insertions(+), 11 deletions(-)

The state locking code was taken from Peter's original patch last year,
so it would be good for him to chime in that this is safe. It's probably
just that it was necessary in Peter's patches but after I refactored
bits I forgot to rip it out.

But yeah, from reading the code again the lock does look entirely
superfluous.

So unless Peter complains,

Acked-by: Matt Fleming <[email protected]>

--
Matt Fleming, Intel Open Source Technology Center

2015-05-19 10:51:31

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [patch 3/6] x86, perf, cqm: Remove pointless spinlock from state cache

On Tue, May 19, 2015 at 10:13:18AM +0100, Matt Fleming wrote:
> On Tue, 19 May, at 12:00:53AM, Thomas Gleixner wrote:
> > struct intel_cqm_state is a strict per cpu cache of the rmid and the
> > usage counter. It can never be modified from a remote cpu.
> >
> > The 3 functions which modify the content: start, stop and del (del
> > maps to stop) are called from the perf core with interrupts disabled
> > which is enough protection for the per cpu state values.
> >
> > Signed-off-by: Thomas Gleixner <[email protected]>
> > ---
> > arch/x86/kernel/cpu/perf_event_intel_cqm.c | 17 ++++++-----------
> > 1 file changed, 6 insertions(+), 11 deletions(-)
>
> The state locking code was taken from Peter's original patch last year,
> so it would be good for him to chime in that this is safe. It's probably
> just that it was necessary in Peter's patches but after I refactored
> bits I forgot to rip it out.
>
> But yeah, from reading the code again the lock does look entirely
> superfluous.

I think that all stems from a point in time when it wasn't at all clear
to me what the hardware looked like, but what do I know, I can't even
remember last week.

All the patches looked good to me, so I already queued them.

I'll add your Ack on them.

2015-06-05 18:13:59

by Juvva, Kanaka D

[permalink] [raw]
Subject: RE: [patch 3/6] x86, perf, cqm: Remove pointless spinlock from state cache

Tested-by: Kanaka Juvva<[email protected]>

> -----Original Message-----
> From: Thomas Gleixner [mailto:[email protected]]
> Sent: Monday, May 18, 2015 5:01 PM
> To: LKML
> Cc: Peter Zijlstra; Vikas Shivappa; [email protected]; Fleming, Matt; Auld, Will;
> Juvva, Kanaka D
> Subject: [patch 3/6] x86, perf, cqm: Remove pointless spinlock from state cache
>
> struct intel_cqm_state is a strict per cpu cache of the rmid and the usage
> counter. It can never be modified from a remote cpu.
>
> The 3 functions which modify the content: start, stop and del (del maps to stop)
> are called from the perf core with interrupts disabled which is enough protection
> for the per cpu state values.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> arch/x86/kernel/cpu/perf_event_intel_cqm.c | 17 ++++++-----------
> 1 file changed, 6 insertions(+), 11 deletions(-)
>
> Index: linux/arch/x86/kernel/cpu/perf_event_intel_cqm.c
> =================================================================
> ==
> --- linux.orig/arch/x86/kernel/cpu/perf_event_intel_cqm.c
> +++ linux/arch/x86/kernel/cpu/perf_event_intel_cqm.c
> @@ -17,11 +17,16 @@ static unsigned int cqm_max_rmid = -1; static unsigned
> int cqm_l3_scale; /* supposedly cacheline size */
>
> struct intel_cqm_state {
> - raw_spinlock_t lock;
> u32 rmid;
> int cnt;
> };
>
> +/*
> + * The cached intel_cqm_state is strictly per cpu and can never be
> + * updated from a remote cpu. Both functions which modify the state
> + * (intel_cqm_event_start and intel_cqm_event_stop) are called with
> + * interrupts disabled, which is sufficient for the protection.
> + */
> static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
>
> /*
> @@ -963,15 +968,12 @@ static void intel_cqm_event_start(struct {
> struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
> u32 rmid = event->hw.cqm_rmid;
> - unsigned long flags;
>
> if (!(event->hw.cqm_state & PERF_HES_STOPPED))
> return;
>
> event->hw.cqm_state &= ~PERF_HES_STOPPED;
>
> - raw_spin_lock_irqsave(&state->lock, flags);
> -
> if (state->cnt++)
> WARN_ON_ONCE(state->rmid != rmid);
> else
> @@ -984,21 +986,17 @@ static void intel_cqm_event_start(struct
> * Technology component.
> */
> wrmsr(MSR_IA32_PQR_ASSOC, rmid, 0);
> -
> - raw_spin_unlock_irqrestore(&state->lock, flags);
> }
>
> static void intel_cqm_event_stop(struct perf_event *event, int mode) {
> struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
> - unsigned long flags;
>
> if (event->hw.cqm_state & PERF_HES_STOPPED)
> return;
>
> event->hw.cqm_state |= PERF_HES_STOPPED;
>
> - raw_spin_lock_irqsave(&state->lock, flags);
> intel_cqm_event_read(event);
>
> if (!--state->cnt) {
> @@ -1013,8 +1011,6 @@ static void intel_cqm_event_stop(struct
> } else {
> WARN_ON_ONCE(!state->rmid);
> }
> -
> - raw_spin_unlock_irqrestore(&state->lock, flags);
> }
>
> static int intel_cqm_event_add(struct perf_event *event, int mode) @@ -
> 1257,7 +1253,6 @@ static void intel_cqm_cpu_prepare(unsign
> struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
> struct cpuinfo_x86 *c = &cpu_data(cpu);
>
> - raw_spin_lock_init(&state->lock);
> state->rmid = 0;
> state->cnt = 0;
>
>