2017-07-26 19:39:12

by Vince Weaver

[permalink] [raw]
Subject: perf: bug in rdpmc/mmap accounting after exec

Hello

so one last bug found by the PAPI testsuite.

This one involves the rdpmc auto-disable on last unmap of an event
feature.

Failing test case:

fd=perf_event_open();
addr=mmap(fd);
exec() // without closing or unmapping the event
fd=perf_event_open();
addr=mmap(fd);
rdpmc() // GPFs due to rdpmc being disabled

I won't pretend to be able to follow the rdpmc disabling code, but if I
add some printks it looks like
current->mm->context.perf_rdpmc_allowed
isn't properly being reset on exec?

In fact, current->mm->context.perf_rdpmc_allowed goes negative which seems
like it shouldn't happen?

Anyway, a test case for this can be found in the perf_event_tests,
tests/rdpmc/rdpmc_exec_papi

Vince


2017-08-02 17:39:37

by Peter Zijlstra

[permalink] [raw]
Subject: Re: perf: bug in rdpmc/mmap accounting after exec

On Wed, Jul 26, 2017 at 03:39:01PM -0400, Vince Weaver wrote:
> Hello
>
> so one last bug found by the PAPI testsuite.
>
> This one involves the rdpmc auto-disable on last unmap of an event
> feature.
>
> Failing test case:
>
> fd=perf_event_open();
> addr=mmap(fd);
> exec() // without closing or unmapping the event
> fd=perf_event_open();
> addr=mmap(fd);
> rdpmc() // GPFs due to rdpmc being disabled
>
> I won't pretend to be able to follow the rdpmc disabling code, but if I
> add some printks it looks like
> current->mm->context.perf_rdpmc_allowed
> isn't properly being reset on exec?
>
> In fact, current->mm->context.perf_rdpmc_allowed goes negative which seems
> like it shouldn't happen?
>
> Anyway, a test case for this can be found in the perf_event_tests,
> tests/rdpmc/rdpmc_exec_papi

Good find that...

The below seems to fix that for me.

---
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8e3db8f642a7..3186f6e081c4 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2114,11 +2114,14 @@ static void refresh_pce(void *ignored)
load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
}

-static void x86_pmu_event_mapped(struct perf_event *event)
+static void x86_pmu_event_mapped(struct perf_event *event, struct vm_area_struct *vma)
{
+ struct mm_struct *mm;
+
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;

+ mm = vma->vm_mm;
/*
* This function relies on not being called concurrently in two
* tasks in the same mm. Otherwise one task could observe
@@ -2129,22 +2132,21 @@ static void x86_pmu_event_mapped(struct perf_event *event)
* For now, this can't happen because all callers hold mmap_sem
* for write. If this changes, we'll need a different solution.
*/
- lockdep_assert_held_exclusive(&current->mm->mmap_sem);
+ lockdep_assert_held_exclusive(&mm->mmap_sem);

- if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}

-static void x86_pmu_event_unmapped(struct perf_event *event)
+static void x86_pmu_event_unmapped(struct perf_event *event, struct vm_area_struct *vma)
{
- if (!current->mm)
- return;
+ struct mm_struct *mm = vma->vm_mm;

if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;

- if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}

static int x86_pmu_event_idx(struct perf_event *event)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a3b873fc59e4..271a32a7e3e1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -310,8 +310,8 @@ struct pmu {
* Notification that the event was mapped or unmapped. Called
* in the context of the mapping task.
*/
- void (*event_mapped) (struct perf_event *event); /*optional*/
- void (*event_unmapped) (struct perf_event *event); /*optional*/
+ void (*event_mapped) (struct perf_event *event, struct vm_area_struct *vma); /*optional*/
+ void (*event_unmapped) (struct perf_event *event, struct vm_area_struct *vma); /*optional*/

/*
* Flags for ->add()/->del()/ ->start()/->stop(). There are
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 426c2ffba16d..f724348bba08 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5090,7 +5105,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
atomic_inc(&event->rb->aux_mmap_count);

if (event->pmu->event_mapped)
- event->pmu->event_mapped(event);
+ event->pmu->event_mapped(event, vma);
}

static void perf_pmu_output_stop(struct perf_event *event);
@@ -5113,7 +5128,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
unsigned long size = perf_data_size(rb);

if (event->pmu->event_unmapped)
- event->pmu->event_unmapped(event);
+ event->pmu->event_unmapped(event, vma);

/*
* rb->aux_mmap_count will always drop before rb->mmap_count and
@@ -5411,7 +5426,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_ops = &perf_mmap_vmops;

if (event->pmu->event_mapped)
- event->pmu->event_mapped(event);
+ event->pmu->event_mapped(event, vma);

return ret;
}

2017-08-03 18:04:22

by Vince Weaver

[permalink] [raw]
Subject: Re: perf: bug in rdpmc/mmap accounting after exec

On Wed, 2 Aug 2017, Peter Zijlstra wrote:

> On Wed, Jul 26, 2017 at 03:39:01PM -0400, Vince Weaver wrote:
> > In fact, current->mm->context.perf_rdpmc_allowed goes negative which seems
> > like it shouldn't happen?

>
> Good find that...
>
> The below seems to fix that for me.

yes, I can confirm that with this patch all of the various PAPI and
perf_event_tests pass on my test system.

Thanks,

Vince

2017-08-04 00:37:45

by Andy Lutomirski

[permalink] [raw]
Subject: Re: perf: bug in rdpmc/mmap accounting after exec

On Wed, Aug 2, 2017 at 10:39 AM, Peter Zijlstra <[email protected]> wrote:
> On Wed, Jul 26, 2017 at 03:39:01PM -0400, Vince Weaver wrote:
>> Hello
>>
>> so one last bug found by the PAPI testsuite.
>>
>> This one involves the rdpmc auto-disable on last unmap of an event
>> feature.
>>
>> Failing test case:
>>
>> fd=perf_event_open();
>> addr=mmap(fd);
>> exec() // without closing or unmapping the event
>> fd=perf_event_open();
>> addr=mmap(fd);
>> rdpmc() // GPFs due to rdpmc being disabled
>>
>> I won't pretend to be able to follow the rdpmc disabling code, but if I
>> add some printks it looks like
>> current->mm->context.perf_rdpmc_allowed
>> isn't properly being reset on exec?
>>
>> In fact, current->mm->context.perf_rdpmc_allowed goes negative which seems
>> like it shouldn't happen?
>>
>> Anyway, a test case for this can be found in the perf_event_tests,
>> tests/rdpmc/rdpmc_exec_papi
>
> Good find that...
>
> The below seems to fix that for me.
>

...because execve plays funny games with non-current mms. Whoops.

Reviewed-by: Andy Lutomirski <[email protected]>

Can you send it upstream and tag it for stable?

2017-08-04 09:17:12

by Peter Zijlstra

[permalink] [raw]
Subject: Re: perf: bug in rdpmc/mmap accounting after exec

On Thu, Aug 03, 2017 at 05:37:22PM -0700, Andy Lutomirski wrote:
> Reviewed-by: Andy Lutomirski <[email protected]>

Thanks!

> Can you send it upstream and tag it for stable?

Yeah, I'll make it pretty, write a changelog and things like that :-)

2017-08-04 11:12:15

by Peter Zijlstra

[permalink] [raw]
Subject: Re: perf: bug in rdpmc/mmap accounting after exec

On Fri, Aug 04, 2017 at 11:17:02AM +0200, Peter Zijlstra wrote:
> Yeah, I'll make it pretty, write a changelog and things like that :-)

Slightly different from the earlier one in that I now pass @mm along
instead of @vma. Same thing otherwise.

---
Subject: perf,x86: Fix RDPMC vs mm_struct tracking
From: Peter Zijlstra <[email protected]>
Date: Wed, 2 Aug 2017 19:39:30 +0200

Vince reported:

> Failing test case:
>
> fd=perf_event_open();
> addr=mmap(fd);
> exec() // without closing or unmapping the event
> fd=perf_event_open();
> addr=mmap(fd);
> rdpmc() // GPFs due to rdpmc being disabled

The problem is of course that exec() plays tricks with what is
current->mm, only destroying the old mappings after having installed
the new mm.

Fix this confusion by passing along vma->vm_mm instead of relying on
current->mm.

Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stephane Eranian <[email protected]>
Cc: [email protected]
Reported-by: Vince Weaver <[email protected]>
Tested-by: Vince Weaver <[email protected]>
Reviewed-by: Andy Lutomirski <[email protected]>
Fixes: 1e0fb9ec679c ("perf: Add pmu callbacks to track event mapping and unmapping")
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
---
arch/x86/events/core.c | 16 +++++++---------
include/linux/perf_event.h | 4 ++--
kernel/events/core.c | 6 +++---
3 files changed, 12 insertions(+), 14 deletions(-)

--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2114,7 +2114,7 @@ static void refresh_pce(void *ignored)
load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
}

-static void x86_pmu_event_mapped(struct perf_event *event)
+static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
{
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
@@ -2129,22 +2129,20 @@ static void x86_pmu_event_mapped(struct
* For now, this can't happen because all callers hold mmap_sem
* for write. If this changes, we'll need a different solution.
*/
- lockdep_assert_held_exclusive(&current->mm->mmap_sem);
+ lockdep_assert_held_exclusive(&mm->mmap_sem);

- if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}

-static void x86_pmu_event_unmapped(struct perf_event *event)
+static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
{
- if (!current->mm)
- return;

if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;

- if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}

static int x86_pmu_event_idx(struct perf_event *event)
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -310,8 +310,8 @@ struct pmu {
* Notification that the event was mapped or unmapped. Called
* in the context of the mapping task.
*/
- void (*event_mapped) (struct perf_event *event); /*optional*/
- void (*event_unmapped) (struct perf_event *event); /*optional*/
+ void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /*optional*/
+ void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /*optional*/

/*
* Flags for ->add()/->del()/ ->start()/->stop(). There are
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5090,7 +5090,7 @@ static void perf_mmap_open(struct vm_are
atomic_inc(&event->rb->aux_mmap_count);

if (event->pmu->event_mapped)
- event->pmu->event_mapped(event);
+ event->pmu->event_mapped(event, vma->vm_mm);
}

static void perf_pmu_output_stop(struct perf_event *event);
@@ -5113,7 +5113,7 @@ static void perf_mmap_close(struct vm_ar
unsigned long size = perf_data_size(rb);

if (event->pmu->event_unmapped)
- event->pmu->event_unmapped(event);
+ event->pmu->event_unmapped(event, vma->vm_mm);

/*
* rb->aux_mmap_count will always drop before rb->mmap_count and
@@ -5411,7 +5411,7 @@ static int perf_mmap(struct file *file,
vma->vm_ops = &perf_mmap_vmops;

if (event->pmu->event_mapped)
- event->pmu->event_mapped(event);
+ event->pmu->event_mapped(event, vma->vm_mm);

return ret;
}

Subject: [tip:perf/core] perf/x86: Fix RDPMC vs. mm_struct tracking

Commit-ID: bfe334924ccd9f4a53f30240c03cf2f43f5b2df1
Gitweb: http://git.kernel.org/tip/bfe334924ccd9f4a53f30240c03cf2f43f5b2df1
Author: Peter Zijlstra <[email protected]>
AuthorDate: Wed, 2 Aug 2017 19:39:30 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Thu, 10 Aug 2017 12:01:08 +0200

perf/x86: Fix RDPMC vs. mm_struct tracking

Vince reported the following rdpmc() testcase failure:

> Failing test case:
>
> fd=perf_event_open();
> addr=mmap(fd);
> exec() // without closing or unmapping the event
> fd=perf_event_open();
> addr=mmap(fd);
> rdpmc() // GPFs due to rdpmc being disabled

The problem is of course that exec() plays tricks with what is
current->mm, only destroying the old mappings after having
installed the new mm.

Fix this confusion by passing along vma->vm_mm instead of relying on
current->mm.

Reported-by: Vince Weaver <[email protected]>
Tested-by: Vince Weaver <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Andy Lutomirski <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Stephane Eranian <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Fixes: 1e0fb9ec679c ("perf: Add pmu callbacks to track event mapping and unmapping")
Link: http://lkml.kernel.org/r/[email protected]
[ Minor cleanups. ]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/events/core.c | 16 +++++++---------
include/linux/perf_event.h | 4 ++--
kernel/events/core.c | 6 +++---
3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8e3db8f..af12e29 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2114,7 +2114,7 @@ static void refresh_pce(void *ignored)
load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
}

-static void x86_pmu_event_mapped(struct perf_event *event)
+static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
{
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
@@ -2129,22 +2129,20 @@ static void x86_pmu_event_mapped(struct perf_event *event)
* For now, this can't happen because all callers hold mmap_sem
* for write. If this changes, we'll need a different solution.
*/
- lockdep_assert_held_exclusive(&current->mm->mmap_sem);
+ lockdep_assert_held_exclusive(&mm->mmap_sem);

- if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}

-static void x86_pmu_event_unmapped(struct perf_event *event)
+static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
{
- if (!current->mm)
- return;

if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;

- if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}

static int x86_pmu_event_idx(struct perf_event *event)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a3b873f..b14095b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -310,8 +310,8 @@ struct pmu {
* Notification that the event was mapped or unmapped. Called
* in the context of the mapping task.
*/
- void (*event_mapped) (struct perf_event *event); /*optional*/
- void (*event_unmapped) (struct perf_event *event); /*optional*/
+ void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /* optional */
+ void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /* optional */

/*
* Flags for ->add()/->del()/ ->start()/->stop(). There are
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 426c2ff..a654b8a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5090,7 +5090,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
atomic_inc(&event->rb->aux_mmap_count);

if (event->pmu->event_mapped)
- event->pmu->event_mapped(event);
+ event->pmu->event_mapped(event, vma->vm_mm);
}

static void perf_pmu_output_stop(struct perf_event *event);
@@ -5113,7 +5113,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
unsigned long size = perf_data_size(rb);

if (event->pmu->event_unmapped)
- event->pmu->event_unmapped(event);
+ event->pmu->event_unmapped(event, vma->vm_mm);

/*
* rb->aux_mmap_count will always drop before rb->mmap_count and
@@ -5411,7 +5411,7 @@ aux_unlock:
vma->vm_ops = &perf_mmap_vmops;

if (event->pmu->event_mapped)
- event->pmu->event_mapped(event);
+ event->pmu->event_mapped(event, vma->vm_mm);

return ret;
}