Hello
so one last bug found by the PAPI testsuite.
This one involves the rdpmc auto-disable on last unmap of an event
feature.
Failing test case:
fd=perf_event_open();
addr=mmap(fd);
exec() // without closing or unmapping the event
fd=perf_event_open();
addr=mmap(fd);
rdpmc() // GPFs due to rdpmc being disabled
I won't pretend to be able to follow the rdpmc disabling code, but if I
add some printks it looks like
current->mm->context.perf_rdpmc_allowed
isn't properly being reset on exec?
In fact, current->mm->context.perf_rdpmc_allowed goes negative which seems
like it shouldn't happen?
Anyway, a test case for this can be found in the perf_event_tests,
tests/rdpmc/rdpmc_exec_papi
Vince
On Wed, Jul 26, 2017 at 03:39:01PM -0400, Vince Weaver wrote:
> Hello
>
> so one last bug found by the PAPI testsuite.
>
> This one involves the rdpmc auto-disable on last unmap of an event
> feature.
>
> Failing test case:
>
> fd=perf_event_open();
> addr=mmap(fd);
> exec() // without closing or unmapping the event
> fd=perf_event_open();
> addr=mmap(fd);
> rdpmc() // GPFs due to rdpmc being disabled
>
> I won't pretend to be able to follow the rdpmc disabling code, but if I
> add some printks it looks like
> current->mm->context.perf_rdpmc_allowed
> isn't properly being reset on exec?
>
> In fact, current->mm->context.perf_rdpmc_allowed goes negative which seems
> like it shouldn't happen?
>
> Anyway, a test case for this can be found in the perf_event_tests,
> tests/rdpmc/rdpmc_exec_papi
Good find that...
The below seems to fix that for me.
---
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8e3db8f642a7..3186f6e081c4 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2114,11 +2114,14 @@ static void refresh_pce(void *ignored)
load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
}
-static void x86_pmu_event_mapped(struct perf_event *event)
+static void x86_pmu_event_mapped(struct perf_event *event, struct vm_area_struct *vma)
{
+ struct mm_struct *mm;
+
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
+ mm = vma->vm_mm;
/*
* This function relies on not being called concurrently in two
* tasks in the same mm. Otherwise one task could observe
@@ -2129,22 +2132,21 @@ static void x86_pmu_event_mapped(struct perf_event *event)
* For now, this can't happen because all callers hold mmap_sem
* for write. If this changes, we'll need a different solution.
*/
- lockdep_assert_held_exclusive(¤t->mm->mmap_sem);
+ lockdep_assert_held_exclusive(&mm->mmap_sem);
- if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1)
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}
-static void x86_pmu_event_unmapped(struct perf_event *event)
+static void x86_pmu_event_unmapped(struct perf_event *event, struct vm_area_struct *vma)
{
- if (!current->mm)
- return;
+ struct mm_struct *mm = vma->vm_mm;
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
- if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed))
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}
static int x86_pmu_event_idx(struct perf_event *event)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a3b873fc59e4..271a32a7e3e1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -310,8 +310,8 @@ struct pmu {
* Notification that the event was mapped or unmapped. Called
* in the context of the mapping task.
*/
- void (*event_mapped) (struct perf_event *event); /*optional*/
- void (*event_unmapped) (struct perf_event *event); /*optional*/
+ void (*event_mapped) (struct perf_event *event, struct vm_area_struct *vma); /*optional*/
+ void (*event_unmapped) (struct perf_event *event, struct vm_area_struct *vma); /*optional*/
/*
* Flags for ->add()/->del()/ ->start()/->stop(). There are
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 426c2ffba16d..f724348bba08 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5090,7 +5105,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
atomic_inc(&event->rb->aux_mmap_count);
if (event->pmu->event_mapped)
- event->pmu->event_mapped(event);
+ event->pmu->event_mapped(event, vma);
}
static void perf_pmu_output_stop(struct perf_event *event);
@@ -5113,7 +5128,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
unsigned long size = perf_data_size(rb);
if (event->pmu->event_unmapped)
- event->pmu->event_unmapped(event);
+ event->pmu->event_unmapped(event, vma);
/*
* rb->aux_mmap_count will always drop before rb->mmap_count and
@@ -5411,7 +5426,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_ops = &perf_mmap_vmops;
if (event->pmu->event_mapped)
- event->pmu->event_mapped(event);
+ event->pmu->event_mapped(event, vma);
return ret;
}
On Wed, 2 Aug 2017, Peter Zijlstra wrote:
> On Wed, Jul 26, 2017 at 03:39:01PM -0400, Vince Weaver wrote:
> > In fact, current->mm->context.perf_rdpmc_allowed goes negative which seems
> > like it shouldn't happen?
>
> Good find that...
>
> The below seems to fix that for me.
yes, I can confirm that with this patch all of the various PAPI and
perf_event_tests pass on my test system.
Thanks,
Vince
On Wed, Aug 2, 2017 at 10:39 AM, Peter Zijlstra <[email protected]> wrote:
> On Wed, Jul 26, 2017 at 03:39:01PM -0400, Vince Weaver wrote:
>> Hello
>>
>> so one last bug found by the PAPI testsuite.
>>
>> This one involves the rdpmc auto-disable on last unmap of an event
>> feature.
>>
>> Failing test case:
>>
>> fd=perf_event_open();
>> addr=mmap(fd);
>> exec() // without closing or unmapping the event
>> fd=perf_event_open();
>> addr=mmap(fd);
>> rdpmc() // GPFs due to rdpmc being disabled
>>
>> I won't pretend to be able to follow the rdpmc disabling code, but if I
>> add some printks it looks like
>> current->mm->context.perf_rdpmc_allowed
>> isn't properly being reset on exec?
>>
>> In fact, current->mm->context.perf_rdpmc_allowed goes negative which seems
>> like it shouldn't happen?
>>
>> Anyway, a test case for this can be found in the perf_event_tests,
>> tests/rdpmc/rdpmc_exec_papi
>
> Good find that...
>
> The below seems to fix that for me.
>
...because execve plays funny games with non-current mms. Whoops.
Reviewed-by: Andy Lutomirski <[email protected]>
Can you send it upstream and tag it for stable?
On Thu, Aug 03, 2017 at 05:37:22PM -0700, Andy Lutomirski wrote:
> Reviewed-by: Andy Lutomirski <[email protected]>
Thanks!
> Can you send it upstream and tag it for stable?
Yeah, I'll make it pretty, write a changelog and things like that :-)
On Fri, Aug 04, 2017 at 11:17:02AM +0200, Peter Zijlstra wrote:
> Yeah, I'll make it pretty, write a changelog and things like that :-)
Slightly different from the earlier one in that I now pass @mm along
instead of @vma. Same thing otherwise.
---
Subject: perf,x86: Fix RDPMC vs mm_struct tracking
From: Peter Zijlstra <[email protected]>
Date: Wed, 2 Aug 2017 19:39:30 +0200
Vince reported:
> Failing test case:
>
> fd=perf_event_open();
> addr=mmap(fd);
> exec() // without closing or unmapping the event
> fd=perf_event_open();
> addr=mmap(fd);
> rdpmc() // GPFs due to rdpmc being disabled
The problem is of course that exec() plays tricks with what is
current->mm, only destroying the old mappings after having installed
the new mm.
Fix this confusion by passing along vma->vm_mm instead of relying on
current->mm.
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stephane Eranian <[email protected]>
Cc: [email protected]
Reported-by: Vince Weaver <[email protected]>
Tested-by: Vince Weaver <[email protected]>
Reviewed-by: Andy Lutomirski <[email protected]>
Fixes: 1e0fb9ec679c ("perf: Add pmu callbacks to track event mapping and unmapping")
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
---
arch/x86/events/core.c | 16 +++++++---------
include/linux/perf_event.h | 4 ++--
kernel/events/core.c | 6 +++---
3 files changed, 12 insertions(+), 14 deletions(-)
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2114,7 +2114,7 @@ static void refresh_pce(void *ignored)
load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
}
-static void x86_pmu_event_mapped(struct perf_event *event)
+static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
{
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
@@ -2129,22 +2129,20 @@ static void x86_pmu_event_mapped(struct
* For now, this can't happen because all callers hold mmap_sem
* for write. If this changes, we'll need a different solution.
*/
- lockdep_assert_held_exclusive(¤t->mm->mmap_sem);
+ lockdep_assert_held_exclusive(&mm->mmap_sem);
- if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1)
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}
-static void x86_pmu_event_unmapped(struct perf_event *event)
+static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
{
- if (!current->mm)
- return;
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
- if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed))
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}
static int x86_pmu_event_idx(struct perf_event *event)
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -310,8 +310,8 @@ struct pmu {
* Notification that the event was mapped or unmapped. Called
* in the context of the mapping task.
*/
- void (*event_mapped) (struct perf_event *event); /*optional*/
- void (*event_unmapped) (struct perf_event *event); /*optional*/
+ void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /*optional*/
+ void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /*optional*/
/*
* Flags for ->add()/->del()/ ->start()/->stop(). There are
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5090,7 +5090,7 @@ static void perf_mmap_open(struct vm_are
atomic_inc(&event->rb->aux_mmap_count);
if (event->pmu->event_mapped)
- event->pmu->event_mapped(event);
+ event->pmu->event_mapped(event, vma->vm_mm);
}
static void perf_pmu_output_stop(struct perf_event *event);
@@ -5113,7 +5113,7 @@ static void perf_mmap_close(struct vm_ar
unsigned long size = perf_data_size(rb);
if (event->pmu->event_unmapped)
- event->pmu->event_unmapped(event);
+ event->pmu->event_unmapped(event, vma->vm_mm);
/*
* rb->aux_mmap_count will always drop before rb->mmap_count and
@@ -5411,7 +5411,7 @@ static int perf_mmap(struct file *file,
vma->vm_ops = &perf_mmap_vmops;
if (event->pmu->event_mapped)
- event->pmu->event_mapped(event);
+ event->pmu->event_mapped(event, vma->vm_mm);
return ret;
}
Commit-ID: bfe334924ccd9f4a53f30240c03cf2f43f5b2df1
Gitweb: http://git.kernel.org/tip/bfe334924ccd9f4a53f30240c03cf2f43f5b2df1
Author: Peter Zijlstra <[email protected]>
AuthorDate: Wed, 2 Aug 2017 19:39:30 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Thu, 10 Aug 2017 12:01:08 +0200
perf/x86: Fix RDPMC vs. mm_struct tracking
Vince reported the following rdpmc() testcase failure:
> Failing test case:
>
> fd=perf_event_open();
> addr=mmap(fd);
> exec() // without closing or unmapping the event
> fd=perf_event_open();
> addr=mmap(fd);
> rdpmc() // GPFs due to rdpmc being disabled
The problem is of course that exec() plays tricks with what is
current->mm, only destroying the old mappings after having
installed the new mm.
Fix this confusion by passing along vma->vm_mm instead of relying on
current->mm.
Reported-by: Vince Weaver <[email protected]>
Tested-by: Vince Weaver <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Andy Lutomirski <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Stephane Eranian <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Fixes: 1e0fb9ec679c ("perf: Add pmu callbacks to track event mapping and unmapping")
Link: http://lkml.kernel.org/r/[email protected]
[ Minor cleanups. ]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/events/core.c | 16 +++++++---------
include/linux/perf_event.h | 4 ++--
kernel/events/core.c | 6 +++---
3 files changed, 12 insertions(+), 14 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8e3db8f..af12e29 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2114,7 +2114,7 @@ static void refresh_pce(void *ignored)
load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
}
-static void x86_pmu_event_mapped(struct perf_event *event)
+static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
{
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
@@ -2129,22 +2129,20 @@ static void x86_pmu_event_mapped(struct perf_event *event)
* For now, this can't happen because all callers hold mmap_sem
* for write. If this changes, we'll need a different solution.
*/
- lockdep_assert_held_exclusive(¤t->mm->mmap_sem);
+ lockdep_assert_held_exclusive(&mm->mmap_sem);
- if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1)
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}
-static void x86_pmu_event_unmapped(struct perf_event *event)
+static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
{
- if (!current->mm)
- return;
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
- if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed))
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}
static int x86_pmu_event_idx(struct perf_event *event)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a3b873f..b14095b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -310,8 +310,8 @@ struct pmu {
* Notification that the event was mapped or unmapped. Called
* in the context of the mapping task.
*/
- void (*event_mapped) (struct perf_event *event); /*optional*/
- void (*event_unmapped) (struct perf_event *event); /*optional*/
+ void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /* optional */
+ void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /* optional */
/*
* Flags for ->add()/->del()/ ->start()/->stop(). There are
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 426c2ff..a654b8a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5090,7 +5090,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
atomic_inc(&event->rb->aux_mmap_count);
if (event->pmu->event_mapped)
- event->pmu->event_mapped(event);
+ event->pmu->event_mapped(event, vma->vm_mm);
}
static void perf_pmu_output_stop(struct perf_event *event);
@@ -5113,7 +5113,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
unsigned long size = perf_data_size(rb);
if (event->pmu->event_unmapped)
- event->pmu->event_unmapped(event);
+ event->pmu->event_unmapped(event, vma->vm_mm);
/*
* rb->aux_mmap_count will always drop before rb->mmap_count and
@@ -5411,7 +5411,7 @@ aux_unlock:
vma->vm_ops = &perf_mmap_vmops;
if (event->pmu->event_mapped)
- event->pmu->event_mapped(event);
+ event->pmu->event_mapped(event, vma->vm_mm);
return ret;
}