2023-12-16 07:28:45

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH] perf/x86: Fix out of range data

On x86 each cpu_hw_events maintains a table for counter assignment but
it missed to update one for the deleted event in x86_pmu_del(). This
can make perf_clear_dirty_counters() reset used counter if it's called
before event scheduling or enabling. Then it would return out of range
data which doesn't make sense.

The following code can reproduce the problem.

$ cat repro.c
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <linux/perf_event.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/syscall.h>

struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.disabled = 1,
};

void *worker(void *arg)
{
int cpu = (long)arg;
int fd1 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
int fd2 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
void *p;

do {
ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0);
ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);

ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
munmap(p, 4096);
ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
} while (1);

return NULL;
}

int main(void)
{
int i;
int n = sysconf(_SC_NPROCESSORS_ONLN);
pthread_t *th = calloc(n, sizeof(*th));

for (i = 0; i < n; i++)
pthread_create(&th[i], NULL, worker, (void *)(long)i);
for (i = 0; i < n; i++)
pthread_join(th[i], NULL);

free(th);
return 0;
}

And you can see the out of range data using perf stat like this.
Probably it'd be easier to see on a large machine.

$ gcc -o repro repro.c -pthread
$ ./repro &
$ sudo perf stat -A -I 1000 2>&1 | awk '{ if (length($3) > 15) print }'
1.001028462 CPU6 196,719,295,683,763 cycles # 194290.996 GHz (71.54%)
1.001028462 CPU3 396,077,485,787,730 branch-misses # 15804359784.80% of all branches (71.07%)
1.001028462 CPU17 197,608,350,727,877 branch-misses # 14594186554.56% of all branches (71.22%)
2.020064073 CPU4 198,372,472,612,140 cycles # 194681.113 GHz (70.95%)
2.020064073 CPU6 199,419,277,896,696 cycles # 195720.007 GHz (70.57%)
2.020064073 CPU20 198,147,174,025,639 cycles # 194474.654 GHz (71.03%)
2.020064073 CPU20 198,421,240,580,145 stalled-cycles-frontend # 100.14% frontend cycles idle (70.93%)
3.037443155 CPU4 197,382,689,923,416 cycles # 194043.065 GHz (71.30%)
3.037443155 CPU20 196,324,797,879,414 cycles # 193003.773 GHz (71.69%)
3.037443155 CPU5 197,679,956,608,205 stalled-cycles-backend # 1315606428.66% backend cycles idle (71.19%)
3.037443155 CPU5 198,571,860,474,851 instructions # 13215422.58 insn per cycle

It should move the contents in the cpuc->assign as well.

Fixes: 5471eea5d3bf ("perf/x86: Reset the dirty counter to prevent the leak for an RDPMC task")
Cc: Kan Liang <[email protected]>
Cc: [email protected]
Signed-off-by: Namhyung Kim <[email protected]>
---
arch/x86/events/core.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 09050641ce5d..5b0dd07b1ef1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ cpuc->assign[i-1] = cpuc->assign[i];
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
--
2.43.0.472.g3155946c3a-goog



2023-12-16 12:42:59

by Liang, Kan

[permalink] [raw]
Subject: Re: [PATCH] perf/x86: Fix out of range data



On 2023-12-16 2:28 a.m., Namhyung Kim wrote:
> On x86 each cpu_hw_events maintains a table for counter assignment but
> it missed to update one for the deleted event in x86_pmu_del(). This
> can make perf_clear_dirty_counters() reset used counter if it's called
> before event scheduling or enabling. Then it would return out of range
> data which doesn't make sense.
>
> The following code can reproduce the problem.
>
> $ cat repro.c
> #include <pthread.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <unistd.h>
> #include <linux/perf_event.h>
> #include <sys/ioctl.h>
> #include <sys/mman.h>
> #include <sys/syscall.h>
>
> struct perf_event_attr attr = {
> .type = PERF_TYPE_HARDWARE,
> .config = PERF_COUNT_HW_CPU_CYCLES,
> .disabled = 1,
> };
>
> void *worker(void *arg)
> {
> int cpu = (long)arg;
> int fd1 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
> int fd2 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
> void *p;
>
> do {
> ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
> p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0);
> ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
>
> ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
> munmap(p, 4096);
> ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
> } while (1);
>
> return NULL;
> }
>
> int main(void)
> {
> int i;
> int n = sysconf(_SC_NPROCESSORS_ONLN);
> pthread_t *th = calloc(n, sizeof(*th));
>
> for (i = 0; i < n; i++)
> pthread_create(&th[i], NULL, worker, (void *)(long)i);
> for (i = 0; i < n; i++)
> pthread_join(th[i], NULL);
>
> free(th);
> return 0;
> }
>
> And you can see the out of range data using perf stat like this.
> Probably it'd be easier to see on a large machine.
>
> $ gcc -o repro repro.c -pthread
> $ ./repro &
> $ sudo perf stat -A -I 1000 2>&1 | awk '{ if (length($3) > 15) print }'
> 1.001028462 CPU6 196,719,295,683,763 cycles # 194290.996 GHz (71.54%)
> 1.001028462 CPU3 396,077,485,787,730 branch-misses # 15804359784.80% of all branches (71.07%)
> 1.001028462 CPU17 197,608,350,727,877 branch-misses # 14594186554.56% of all branches (71.22%)
> 2.020064073 CPU4 198,372,472,612,140 cycles # 194681.113 GHz (70.95%)
> 2.020064073 CPU6 199,419,277,896,696 cycles # 195720.007 GHz (70.57%)
> 2.020064073 CPU20 198,147,174,025,639 cycles # 194474.654 GHz (71.03%)
> 2.020064073 CPU20 198,421,240,580,145 stalled-cycles-frontend # 100.14% frontend cycles idle (70.93%)
> 3.037443155 CPU4 197,382,689,923,416 cycles # 194043.065 GHz (71.30%)
> 3.037443155 CPU20 196,324,797,879,414 cycles # 193003.773 GHz (71.69%)
> 3.037443155 CPU5 197,679,956,608,205 stalled-cycles-backend # 1315606428.66% backend cycles idle (71.19%)
> 3.037443155 CPU5 198,571,860,474,851 instructions # 13215422.58 insn per cycle
>
> It should move the contents in the cpuc->assign as well.

Yes, the patch looks good to me.

Reviewed-by: Kan Liang <[email protected]>

Thanks,
Kan
>
> Fixes: 5471eea5d3bf ("perf/x86: Reset the dirty counter to prevent the leak for an RDPMC task")
> Cc: Kan Liang <[email protected]>
> Cc: [email protected]
> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> arch/x86/events/core.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index 09050641ce5d..5b0dd07b1ef1 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
> while (++i < cpuc->n_events) {
> cpuc->event_list[i-1] = cpuc->event_list[i];
> cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
> + cpuc->assign[i-1] = cpuc->assign[i];
> }
> cpuc->event_constraint[i-1] = NULL;
> --cpuc->n_events;

2024-01-09 21:28:25

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH] perf/x86: Fix out of range data

Hello,

On Sat, Dec 16, 2023 at 4:42 AM Liang, Kan <[email protected]> wrote:
>
>
>
> On 2023-12-16 2:28 a.m., Namhyung Kim wrote:
> > On x86 each cpu_hw_events maintains a table for counter assignment but
> > it missed to update one for the deleted event in x86_pmu_del(). This
> > can make perf_clear_dirty_counters() reset used counter if it's called
> > before event scheduling or enabling. Then it would return out of range
> > data which doesn't make sense.
> >
> > The following code can reproduce the problem.
> >
> > $ cat repro.c
> > #include <pthread.h>
> > #include <stdio.h>
> > #include <stdlib.h>
> > #include <unistd.h>
> > #include <linux/perf_event.h>
> > #include <sys/ioctl.h>
> > #include <sys/mman.h>
> > #include <sys/syscall.h>
> >
> > struct perf_event_attr attr = {
> > .type = PERF_TYPE_HARDWARE,
> > .config = PERF_COUNT_HW_CPU_CYCLES,
> > .disabled = 1,
> > };
> >
> > void *worker(void *arg)
> > {
> > int cpu = (long)arg;
> > int fd1 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
> > int fd2 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
> > void *p;
> >
> > do {
> > ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
> > p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0);
> > ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
> >
> > ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
> > munmap(p, 4096);
> > ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
> > } while (1);
> >
> > return NULL;
> > }
> >
> > int main(void)
> > {
> > int i;
> > int n = sysconf(_SC_NPROCESSORS_ONLN);
> > pthread_t *th = calloc(n, sizeof(*th));
> >
> > for (i = 0; i < n; i++)
> > pthread_create(&th[i], NULL, worker, (void *)(long)i);
> > for (i = 0; i < n; i++)
> > pthread_join(th[i], NULL);
> >
> > free(th);
> > return 0;
> > }
> >
> > And you can see the out of range data using perf stat like this.
> > Probably it'd be easier to see on a large machine.
> >
> > $ gcc -o repro repro.c -pthread
> > $ ./repro &
> > $ sudo perf stat -A -I 1000 2>&1 | awk '{ if (length($3) > 15) print }'
> > 1.001028462 CPU6 196,719,295,683,763 cycles # 194290.996 GHz (71.54%)
> > 1.001028462 CPU3 396,077,485,787,730 branch-misses # 15804359784.80% of all branches (71.07%)
> > 1.001028462 CPU17 197,608,350,727,877 branch-misses # 14594186554.56% of all branches (71.22%)
> > 2.020064073 CPU4 198,372,472,612,140 cycles # 194681.113 GHz (70.95%)
> > 2.020064073 CPU6 199,419,277,896,696 cycles # 195720.007 GHz (70.57%)
> > 2.020064073 CPU20 198,147,174,025,639 cycles # 194474.654 GHz (71.03%)
> > 2.020064073 CPU20 198,421,240,580,145 stalled-cycles-frontend # 100.14% frontend cycles idle (70.93%)
> > 3.037443155 CPU4 197,382,689,923,416 cycles # 194043.065 GHz (71.30%)
> > 3.037443155 CPU20 196,324,797,879,414 cycles # 193003.773 GHz (71.69%)
> > 3.037443155 CPU5 197,679,956,608,205 stalled-cycles-backend # 1315606428.66% backend cycles idle (71.19%)
> > 3.037443155 CPU5 198,571,860,474,851 instructions # 13215422.58 insn per cycle
> >
> > It should move the contents in the cpuc->assign as well.
>
> Yes, the patch looks good to me.
>
> Reviewed-by: Kan Liang <[email protected]>

Thanks for your review, Kan.

Ingo, Peter, can you please pick this up?

Thanks,
Namhyung

2024-02-06 21:19:28

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH] perf/x86: Fix out of range data

Ping!

On Tue, Jan 9, 2024 at 1:28 PM Namhyung Kim <[email protected]> wrote:
>
> Hello,
>
> On Sat, Dec 16, 2023 at 4:42 AM Liang, Kan <[email protected]> wrote:
> >
> >
> >
> > On 2023-12-16 2:28 a.m., Namhyung Kim wrote:
> > > On x86 each cpu_hw_events maintains a table for counter assignment but
> > > it missed to update one for the deleted event in x86_pmu_del(). This
> > > can make perf_clear_dirty_counters() reset used counter if it's called
> > > before event scheduling or enabling. Then it would return out of range
> > > data which doesn't make sense.
> > >
> > > The following code can reproduce the problem.
> > >
> > > $ cat repro.c
> > > #include <pthread.h>
> > > #include <stdio.h>
> > > #include <stdlib.h>
> > > #include <unistd.h>
> > > #include <linux/perf_event.h>
> > > #include <sys/ioctl.h>
> > > #include <sys/mman.h>
> > > #include <sys/syscall.h>
> > >
> > > struct perf_event_attr attr = {
> > > .type = PERF_TYPE_HARDWARE,
> > > .config = PERF_COUNT_HW_CPU_CYCLES,
> > > .disabled = 1,
> > > };
> > >
> > > void *worker(void *arg)
> > > {
> > > int cpu = (long)arg;
> > > int fd1 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
> > > int fd2 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
> > > void *p;
> > >
> > > do {
> > > ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
> > > p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0);
> > > ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
> > >
> > > ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
> > > munmap(p, 4096);
> > > ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
> > > } while (1);
> > >
> > > return NULL;
> > > }
> > >
> > > int main(void)
> > > {
> > > int i;
> > > int n = sysconf(_SC_NPROCESSORS_ONLN);
> > > pthread_t *th = calloc(n, sizeof(*th));
> > >
> > > for (i = 0; i < n; i++)
> > > pthread_create(&th[i], NULL, worker, (void *)(long)i);
> > > for (i = 0; i < n; i++)
> > > pthread_join(th[i], NULL);
> > >
> > > free(th);
> > > return 0;
> > > }
> > >
> > > And you can see the out of range data using perf stat like this.
> > > Probably it'd be easier to see on a large machine.
> > >
> > > $ gcc -o repro repro.c -pthread
> > > $ ./repro &
> > > $ sudo perf stat -A -I 1000 2>&1 | awk '{ if (length($3) > 15) print }'
> > > 1.001028462 CPU6 196,719,295,683,763 cycles # 194290.996 GHz (71.54%)
> > > 1.001028462 CPU3 396,077,485,787,730 branch-misses # 15804359784.80% of all branches (71.07%)
> > > 1.001028462 CPU17 197,608,350,727,877 branch-misses # 14594186554.56% of all branches (71.22%)
> > > 2.020064073 CPU4 198,372,472,612,140 cycles # 194681.113 GHz (70.95%)
> > > 2.020064073 CPU6 199,419,277,896,696 cycles # 195720.007 GHz (70.57%)
> > > 2.020064073 CPU20 198,147,174,025,639 cycles # 194474.654 GHz (71.03%)
> > > 2.020064073 CPU20 198,421,240,580,145 stalled-cycles-frontend # 100.14% frontend cycles idle (70.93%)
> > > 3.037443155 CPU4 197,382,689,923,416 cycles # 194043.065 GHz (71.30%)
> > > 3.037443155 CPU20 196,324,797,879,414 cycles # 193003.773 GHz (71.69%)
> > > 3.037443155 CPU5 197,679,956,608,205 stalled-cycles-backend # 1315606428.66% backend cycles idle (71.19%)
> > > 3.037443155 CPU5 198,571,860,474,851 instructions # 13215422.58 insn per cycle
> > >
> > > It should move the contents in the cpuc->assign as well.
> >
> > Yes, the patch looks good to me.
> >
> > Reviewed-by: Kan Liang <[email protected]>
>
> Thanks for your review, Kan.
>
> Ingo, Peter, can you please pick this up?
>
> Thanks,
> Namhyung