2021-12-14 21:56:41

by Rob Herring

[permalink] [raw]
Subject: [PATCH v8] libperf: Add arm64 support to perf_mmap__read_self()

Add the arm64 variants for read_perf_counter() and read_timestamp().
Unfortunately the counter number is encoded into the instruction, so the
code is a bit verbose to enumerate all possible counters.

Signed-off-by: Rob Herring <[email protected]>
---
v8:
- Set attr.config1 to request user access on arm64
v7:
- Move enabling of libperf user read test for arm64 to this patch
---
tools/lib/perf/mmap.c | 98 +++++++++++++++++++++++++++++++
tools/lib/perf/tests/test-evsel.c | 5 +-
2 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index c89dfa5f67b3..7ee3eb9f5e27 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -13,6 +13,7 @@
#include <internal/lib.h>
#include <linux/kernel.h>
#include <linux/math64.h>
+#include <linux/stringify.h>
#include "internal.h"

void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
@@ -294,6 +295,103 @@ static u64 read_timestamp(void)

return low | ((u64)high) << 32;
}
+#elif defined(__aarch64__)
+#define read_sysreg(r) ({ \
+ u64 __val; \
+ asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
+ __val; \
+})
+
+static u64 read_pmccntr(void)
+{
+ return read_sysreg(pmccntr_el0);
+}
+
+#define PMEVCNTR_READ(idx) \
+ static u64 read_pmevcntr_##idx(void) { \
+ return read_sysreg(pmevcntr##idx##_el0); \
+ }
+
+PMEVCNTR_READ(0);
+PMEVCNTR_READ(1);
+PMEVCNTR_READ(2);
+PMEVCNTR_READ(3);
+PMEVCNTR_READ(4);
+PMEVCNTR_READ(5);
+PMEVCNTR_READ(6);
+PMEVCNTR_READ(7);
+PMEVCNTR_READ(8);
+PMEVCNTR_READ(9);
+PMEVCNTR_READ(10);
+PMEVCNTR_READ(11);
+PMEVCNTR_READ(12);
+PMEVCNTR_READ(13);
+PMEVCNTR_READ(14);
+PMEVCNTR_READ(15);
+PMEVCNTR_READ(16);
+PMEVCNTR_READ(17);
+PMEVCNTR_READ(18);
+PMEVCNTR_READ(19);
+PMEVCNTR_READ(20);
+PMEVCNTR_READ(21);
+PMEVCNTR_READ(22);
+PMEVCNTR_READ(23);
+PMEVCNTR_READ(24);
+PMEVCNTR_READ(25);
+PMEVCNTR_READ(26);
+PMEVCNTR_READ(27);
+PMEVCNTR_READ(28);
+PMEVCNTR_READ(29);
+PMEVCNTR_READ(30);
+
+/*
+ * Read a value direct from PMEVCNTR<idx>
+ */
+static u64 read_perf_counter(unsigned int counter)
+{
+ static u64 (* const read_f[])(void) = {
+ read_pmevcntr_0,
+ read_pmevcntr_1,
+ read_pmevcntr_2,
+ read_pmevcntr_3,
+ read_pmevcntr_4,
+ read_pmevcntr_5,
+ read_pmevcntr_6,
+ read_pmevcntr_7,
+ read_pmevcntr_8,
+ read_pmevcntr_9,
+ read_pmevcntr_10,
+ read_pmevcntr_11,
+ read_pmevcntr_13,
+ read_pmevcntr_12,
+ read_pmevcntr_14,
+ read_pmevcntr_15,
+ read_pmevcntr_16,
+ read_pmevcntr_17,
+ read_pmevcntr_18,
+ read_pmevcntr_19,
+ read_pmevcntr_20,
+ read_pmevcntr_21,
+ read_pmevcntr_22,
+ read_pmevcntr_23,
+ read_pmevcntr_24,
+ read_pmevcntr_25,
+ read_pmevcntr_26,
+ read_pmevcntr_27,
+ read_pmevcntr_28,
+ read_pmevcntr_29,
+ read_pmevcntr_30,
+ read_pmccntr
+ };
+
+ if (counter < ARRAY_SIZE(read_f))
+ return (read_f[counter])();
+
+ return 0;
+}
+
+static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
+
#else
static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
static u64 read_timestamp(void) { return 0; }
diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
index 33ae9334861a..89be89afb24d 100644
--- a/tools/lib/perf/tests/test-evsel.c
+++ b/tools/lib/perf/tests/test-evsel.c
@@ -130,6 +130,9 @@ static int test_stat_user_read(int event)
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = event,
+#ifdef __aarch64__
+ .config1 = 0x2, /* Request user access */
+#endif
};
int err, i;

@@ -150,7 +153,7 @@ static int test_stat_user_read(int event)
pc = perf_evsel__mmap_base(evsel, 0, 0);
__T("failed to get mmapped address", pc);

-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
__T("userspace counter access not supported", pc->cap_user_rdpmc);
__T("userspace counter access not enabled", pc->index);
__T("userspace counter width not set", pc->pmc_width >= 32);
--
2.32.0



2021-12-14 22:08:54

by Rob Herring

[permalink] [raw]
Subject: Re: [PATCH v8] libperf: Add arm64 support to perf_mmap__read_self()

On Tue, Dec 14, 2021 at 3:56 PM Rob Herring <[email protected]> wrote:
>
> Add the arm64 variants for read_perf_counter() and read_timestamp().
> Unfortunately the counter number is encoded into the instruction, so the
> code is a bit verbose to enumerate all possible counters.
>
> Signed-off-by: Rob Herring <[email protected]>
> ---
> v8:
> - Set attr.config1 to request user access on arm64

Hit send a bit too quick. The kernel side[1] has now been applied by
Will, so hopefully the libperf enabling can land in 5.17 too.

Rob

[1] https://lore.kernel.org/all/[email protected]/

2022-01-11 22:44:00

by Rob Herring

[permalink] [raw]
Subject: Re: [PATCH v8] libperf: Add arm64 support to perf_mmap__read_self()

On Tue, Dec 14, 2021 at 3:56 PM Rob Herring <[email protected]> wrote:
>
> Add the arm64 variants for read_perf_counter() and read_timestamp().
> Unfortunately the counter number is encoded into the instruction, so the
> code is a bit verbose to enumerate all possible counters.
>
> Signed-off-by: Rob Herring <[email protected]>
> ---
> v8:
> - Set attr.config1 to request user access on arm64
> v7:
> - Move enabling of libperf user read test for arm64 to this patch
> ---
> tools/lib/perf/mmap.c | 98 +++++++++++++++++++++++++++++++
> tools/lib/perf/tests/test-evsel.c | 5 +-
> 2 files changed, 102 insertions(+), 1 deletion(-)

Ping!

>
> diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
> index c89dfa5f67b3..7ee3eb9f5e27 100644
> --- a/tools/lib/perf/mmap.c
> +++ b/tools/lib/perf/mmap.c
> @@ -13,6 +13,7 @@
> #include <internal/lib.h>
> #include <linux/kernel.h>
> #include <linux/math64.h>
> +#include <linux/stringify.h>
> #include "internal.h"
>
> void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
> @@ -294,6 +295,103 @@ static u64 read_timestamp(void)
>
> return low | ((u64)high) << 32;
> }
> +#elif defined(__aarch64__)
> +#define read_sysreg(r) ({ \
> + u64 __val; \
> + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
> + __val; \
> +})
> +
> +static u64 read_pmccntr(void)
> +{
> + return read_sysreg(pmccntr_el0);
> +}
> +
> +#define PMEVCNTR_READ(idx) \
> + static u64 read_pmevcntr_##idx(void) { \
> + return read_sysreg(pmevcntr##idx##_el0); \
> + }
> +
> +PMEVCNTR_READ(0);
> +PMEVCNTR_READ(1);
> +PMEVCNTR_READ(2);
> +PMEVCNTR_READ(3);
> +PMEVCNTR_READ(4);
> +PMEVCNTR_READ(5);
> +PMEVCNTR_READ(6);
> +PMEVCNTR_READ(7);
> +PMEVCNTR_READ(8);
> +PMEVCNTR_READ(9);
> +PMEVCNTR_READ(10);
> +PMEVCNTR_READ(11);
> +PMEVCNTR_READ(12);
> +PMEVCNTR_READ(13);
> +PMEVCNTR_READ(14);
> +PMEVCNTR_READ(15);
> +PMEVCNTR_READ(16);
> +PMEVCNTR_READ(17);
> +PMEVCNTR_READ(18);
> +PMEVCNTR_READ(19);
> +PMEVCNTR_READ(20);
> +PMEVCNTR_READ(21);
> +PMEVCNTR_READ(22);
> +PMEVCNTR_READ(23);
> +PMEVCNTR_READ(24);
> +PMEVCNTR_READ(25);
> +PMEVCNTR_READ(26);
> +PMEVCNTR_READ(27);
> +PMEVCNTR_READ(28);
> +PMEVCNTR_READ(29);
> +PMEVCNTR_READ(30);
> +
> +/*
> + * Read a value direct from PMEVCNTR<idx>
> + */
> +static u64 read_perf_counter(unsigned int counter)
> +{
> + static u64 (* const read_f[])(void) = {
> + read_pmevcntr_0,
> + read_pmevcntr_1,
> + read_pmevcntr_2,
> + read_pmevcntr_3,
> + read_pmevcntr_4,
> + read_pmevcntr_5,
> + read_pmevcntr_6,
> + read_pmevcntr_7,
> + read_pmevcntr_8,
> + read_pmevcntr_9,
> + read_pmevcntr_10,
> + read_pmevcntr_11,
> + read_pmevcntr_13,
> + read_pmevcntr_12,
> + read_pmevcntr_14,
> + read_pmevcntr_15,
> + read_pmevcntr_16,
> + read_pmevcntr_17,
> + read_pmevcntr_18,
> + read_pmevcntr_19,
> + read_pmevcntr_20,
> + read_pmevcntr_21,
> + read_pmevcntr_22,
> + read_pmevcntr_23,
> + read_pmevcntr_24,
> + read_pmevcntr_25,
> + read_pmevcntr_26,
> + read_pmevcntr_27,
> + read_pmevcntr_28,
> + read_pmevcntr_29,
> + read_pmevcntr_30,
> + read_pmccntr
> + };
> +
> + if (counter < ARRAY_SIZE(read_f))
> + return (read_f[counter])();
> +
> + return 0;
> +}
> +
> +static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
> +
> #else
> static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
> static u64 read_timestamp(void) { return 0; }
> diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
> index 33ae9334861a..89be89afb24d 100644
> --- a/tools/lib/perf/tests/test-evsel.c
> +++ b/tools/lib/perf/tests/test-evsel.c
> @@ -130,6 +130,9 @@ static int test_stat_user_read(int event)
> struct perf_event_attr attr = {
> .type = PERF_TYPE_HARDWARE,
> .config = event,
> +#ifdef __aarch64__
> + .config1 = 0x2, /* Request user access */
> +#endif
> };
> int err, i;
>
> @@ -150,7 +153,7 @@ static int test_stat_user_read(int event)
> pc = perf_evsel__mmap_base(evsel, 0, 0);
> __T("failed to get mmapped address", pc);
>
> -#if defined(__i386__) || defined(__x86_64__)
> +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
> __T("userspace counter access not supported", pc->cap_user_rdpmc);
> __T("userspace counter access not enabled", pc->index);
> __T("userspace counter width not set", pc->pmc_width >= 32);
> --
> 2.32.0
>

2022-01-12 18:00:56

by Masayoshi Mizuma

[permalink] [raw]
Subject: Re: [PATCH v8] libperf: Add arm64 support to perf_mmap__read_self()

On Tue, Dec 14, 2021 at 03:56:30PM -0600, Rob Herring wrote:
> Add the arm64 variants for read_perf_counter() and read_timestamp().
> Unfortunately the counter number is encoded into the instruction, so the
> code is a bit verbose to enumerate all possible counters.
>
> Signed-off-by: Rob Herring <[email protected]>

This patch works well on the aarch64 machine, thanks!
Please feel free to add:
Tested-by: Masayoshi Mizuma <[email protected]>

the log of make tests:
```
# echo 1 > /proc/sys/kernel/perf_user_access
# make tests V=1
make -f /src/linux/tools/build/Makefile.build dir=. obj=libperf
make -C /src/linux/tools/lib/api/ O= libapi.a
make -f /src/linux/tools/build/Makefile.build dir=./fd obj=libapi
make -f /src/linux/tools/build/Makefile.build dir=./fs obj=libapi
make -f /src/linux/tools/build/Makefile.build dir=. obj=tests
make -f /src/linux/tools/build/Makefile.build dir=./tests obj=tests
running static:
- running tests/test-cpumap.c...OK
- running tests/test-threadmap.c...OK
- running tests/test-evlist.c...OK
- running tests/test-evsel.c...
loop = 65536, count = 328191
loop = 131072, count = 655878
loop = 262144, count = 1311204
loop = 524288, count = 2640123
loop = 1048576, count = 5243358
loop = 65536, count = 1290820
loop = 131072, count = 2509179
loop = 262144, count = 5011308
loop = 524288, count = 9999221
loop = 1048576, count = 20136809
OK
running dynamic:
- running tests/test-cpumap.c...OK
- running tests/test-threadmap.c...OK
- running tests/test-evlist.c...OK
- running tests/test-evsel.c...
loop = 65536, count = 328219
loop = 131072, count = 655856
loop = 262144, count = 1311199
loop = 524288, count = 2633704
loop = 1048576, count = 5243402
loop = 65536, count = 1303460
loop = 131072, count = 2513614
loop = 262144, count = 5020097
loop = 524288, count = 10043687
loop = 1048576, count = 20101337
OK
#
```

- Masa

> ---
> v8:
> - Set attr.config1 to request user access on arm64
> v7:
> - Move enabling of libperf user read test for arm64 to this patch
> ---
> tools/lib/perf/mmap.c | 98 +++++++++++++++++++++++++++++++
> tools/lib/perf/tests/test-evsel.c | 5 +-
> 2 files changed, 102 insertions(+), 1 deletion(-)
>
> diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
> index c89dfa5f67b3..7ee3eb9f5e27 100644
> --- a/tools/lib/perf/mmap.c
> +++ b/tools/lib/perf/mmap.c
> @@ -13,6 +13,7 @@
> #include <internal/lib.h>
> #include <linux/kernel.h>
> #include <linux/math64.h>
> +#include <linux/stringify.h>
> #include "internal.h"
>
> void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
> @@ -294,6 +295,103 @@ static u64 read_timestamp(void)
>
> return low | ((u64)high) << 32;
> }
> +#elif defined(__aarch64__)
> +#define read_sysreg(r) ({ \
> + u64 __val; \
> + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
> + __val; \
> +})
> +
> +static u64 read_pmccntr(void)
> +{
> + return read_sysreg(pmccntr_el0);
> +}
> +
> +#define PMEVCNTR_READ(idx) \
> + static u64 read_pmevcntr_##idx(void) { \
> + return read_sysreg(pmevcntr##idx##_el0); \
> + }
> +
> +PMEVCNTR_READ(0);
> +PMEVCNTR_READ(1);
> +PMEVCNTR_READ(2);
> +PMEVCNTR_READ(3);
> +PMEVCNTR_READ(4);
> +PMEVCNTR_READ(5);
> +PMEVCNTR_READ(6);
> +PMEVCNTR_READ(7);
> +PMEVCNTR_READ(8);
> +PMEVCNTR_READ(9);
> +PMEVCNTR_READ(10);
> +PMEVCNTR_READ(11);
> +PMEVCNTR_READ(12);
> +PMEVCNTR_READ(13);
> +PMEVCNTR_READ(14);
> +PMEVCNTR_READ(15);
> +PMEVCNTR_READ(16);
> +PMEVCNTR_READ(17);
> +PMEVCNTR_READ(18);
> +PMEVCNTR_READ(19);
> +PMEVCNTR_READ(20);
> +PMEVCNTR_READ(21);
> +PMEVCNTR_READ(22);
> +PMEVCNTR_READ(23);
> +PMEVCNTR_READ(24);
> +PMEVCNTR_READ(25);
> +PMEVCNTR_READ(26);
> +PMEVCNTR_READ(27);
> +PMEVCNTR_READ(28);
> +PMEVCNTR_READ(29);
> +PMEVCNTR_READ(30);
> +
> +/*
> + * Read a value direct from PMEVCNTR<idx>
> + */
> +static u64 read_perf_counter(unsigned int counter)
> +{
> + static u64 (* const read_f[])(void) = {
> + read_pmevcntr_0,
> + read_pmevcntr_1,
> + read_pmevcntr_2,
> + read_pmevcntr_3,
> + read_pmevcntr_4,
> + read_pmevcntr_5,
> + read_pmevcntr_6,
> + read_pmevcntr_7,
> + read_pmevcntr_8,
> + read_pmevcntr_9,
> + read_pmevcntr_10,
> + read_pmevcntr_11,
> + read_pmevcntr_13,
> + read_pmevcntr_12,
> + read_pmevcntr_14,
> + read_pmevcntr_15,
> + read_pmevcntr_16,
> + read_pmevcntr_17,
> + read_pmevcntr_18,
> + read_pmevcntr_19,
> + read_pmevcntr_20,
> + read_pmevcntr_21,
> + read_pmevcntr_22,
> + read_pmevcntr_23,
> + read_pmevcntr_24,
> + read_pmevcntr_25,
> + read_pmevcntr_26,
> + read_pmevcntr_27,
> + read_pmevcntr_28,
> + read_pmevcntr_29,
> + read_pmevcntr_30,
> + read_pmccntr
> + };
> +
> + if (counter < ARRAY_SIZE(read_f))
> + return (read_f[counter])();
> +
> + return 0;
> +}
> +
> +static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
> +
> #else
> static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
> static u64 read_timestamp(void) { return 0; }
> diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
> index 33ae9334861a..89be89afb24d 100644
> --- a/tools/lib/perf/tests/test-evsel.c
> +++ b/tools/lib/perf/tests/test-evsel.c
> @@ -130,6 +130,9 @@ static int test_stat_user_read(int event)
> struct perf_event_attr attr = {
> .type = PERF_TYPE_HARDWARE,
> .config = event,
> +#ifdef __aarch64__
> + .config1 = 0x2, /* Request user access */
> +#endif
> };
> int err, i;
>
> @@ -150,7 +153,7 @@ static int test_stat_user_read(int event)
> pc = perf_evsel__mmap_base(evsel, 0, 0);
> __T("failed to get mmapped address", pc);
>
> -#if defined(__i386__) || defined(__x86_64__)
> +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
> __T("userspace counter access not supported", pc->cap_user_rdpmc);
> __T("userspace counter access not enabled", pc->index);
> __T("userspace counter width not set", pc->pmc_width >= 32);
> --
> 2.32.0
>

2022-01-14 21:34:08

by Rob Herring

[permalink] [raw]
Subject: Re: [PATCH v8] libperf: Add arm64 support to perf_mmap__read_self()

On Fri, Jan 14, 2022 at 8:19 AM Ian Rogers <[email protected]> wrote:
>
> On Tue, Dec 14, 2021, 1:56 PM Rob Herring <[email protected]> wrote:
>>
>> Add the arm64 variants for read_perf_counter() and read_timestamp().
>> Unfortunately the counter number is encoded into the instruction, so the
>> code is a bit verbose to enumerate all possible counters.
>>
>> Signed-off-by: Rob Herring <[email protected]>
>> ---
>> v8:
>> - Set attr.config1 to request user access on arm64
>> v7:
>> - Move enabling of libperf user read test for arm64 to this patch
>> ---
>> tools/lib/perf/mmap.c | 98 +++++++++++++++++++++++++++++++
>> tools/lib/perf/tests/test-evsel.c | 5 +-
>> 2 files changed, 102 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
>> index c89dfa5f67b3..7ee3eb9f5e27 100644
>> --- a/tools/lib/perf/mmap.c
>> +++ b/tools/lib/perf/mmap.c
>> @@ -13,6 +13,7 @@
>> #include <internal/lib.h>
>> #include <linux/kernel.h>
>> #include <linux/math64.h>
>> +#include <linux/stringify.h>
>> #include "internal.h"
>>
>> void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
>> @@ -294,6 +295,103 @@ static u64 read_timestamp(void)
>>
>> return low | ((u64)high) << 32;
>> }
>> +#elif defined(__aarch64__)
>> +#define read_sysreg(r) ({ \
>> + u64 __val; \
>> + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
>> + __val; \
>> +})
>> +
>> +static u64 read_pmccntr(void)
>> +{
>> + return read_sysreg(pmccntr_el0);
>> +}
>> +
>> +#define PMEVCNTR_READ(idx) \
>> + static u64 read_pmevcntr_##idx(void) { \
>> + return read_sysreg(pmevcntr##idx##_el0); \
>> + }
>> +
>> +PMEVCNTR_READ(0);
>> +PMEVCNTR_READ(1);
>> +PMEVCNTR_READ(2);
>> +PMEVCNTR_READ(3);
>> +PMEVCNTR_READ(4);
>> +PMEVCNTR_READ(5);
>> +PMEVCNTR_READ(6);
>> +PMEVCNTR_READ(7);
>> +PMEVCNTR_READ(8);
>> +PMEVCNTR_READ(9);
>> +PMEVCNTR_READ(10);
>> +PMEVCNTR_READ(11);
>> +PMEVCNTR_READ(12);
>> +PMEVCNTR_READ(13);
>> +PMEVCNTR_READ(14);
>> +PMEVCNTR_READ(15);
>> +PMEVCNTR_READ(16);
>> +PMEVCNTR_READ(17);
>> +PMEVCNTR_READ(18);
>> +PMEVCNTR_READ(19);
>> +PMEVCNTR_READ(20);
>> +PMEVCNTR_READ(21);
>> +PMEVCNTR_READ(22);
>> +PMEVCNTR_READ(23);
>> +PMEVCNTR_READ(24);
>> +PMEVCNTR_READ(25);
>> +PMEVCNTR_READ(26);
>> +PMEVCNTR_READ(27);
>> +PMEVCNTR_READ(28);
>> +PMEVCNTR_READ(29);
>> +PMEVCNTR_READ(30);
>
>
> Nit: It looks strange that 31 is not here, ie 31 counters rather than 32.

From the Arm ARM:
D13.4.8
PMEVCNTR<n>_EL0, Performance Monitors Event Count Registers, n = 0 - 30

The 32nd counter is the cycle counter which is special and accessed in
a different register.

Rob