2020-09-30 09:37:07

by liwei (GF)

[permalink] [raw]
Subject: [PATCH v2] drivers/perf: Add support for ARMv8.3-SPE

Armv8.3 extends the SPE by adding:
- Alignment field in the Events packet, and filtering on this event
using PMSEVFR_EL1.
- Support for the Scalable Vector Extension (SVE).

The main additions for SVE are:
- Recording the vector length for SVE operations in the Operation Type
packet. It is not possible to filter on vector length.
- Incomplete predicate and empty predicate fields in the Events packet,
and filtering on these events using PMSEVFR_EL1.

Update the check of pmsevfr for empty/partial predicated SVE and
alignment event in SPE driver. For adaption by the version of SPE,
expose 'pmsver' as cap attribute to userspace.

Signed-off-by: Wei Li <[email protected]>
---
v1 -> v2:
- Rename 'pmuver' to 'pmsver', change it's type to 'u16' from 'int'.
(Suggested by Will and Leo.)
- Expose 'pmsver' as cap attribute through sysfs, instead of printing.
(Suggested by Will.)
---
arch/arm64/include/asm/sysreg.h | 4 +++-
drivers/perf/arm_spe_pmu.c | 18 ++++++++++++++++--
2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 554a7e8ecb07..f4f9c1fc6398 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -281,7 +281,6 @@
#define SYS_PMSFCR_EL1_ST_SHIFT 18

#define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5)
-#define SYS_PMSEVFR_EL1_RES0 0x0000ffff00ff0f55UL

#define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6)
#define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0
@@ -787,6 +786,9 @@
#define ID_AA64DFR0_PMUVER_8_5 0x6
#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf

+#define ID_AA64DFR0_PMSVER_8_2 0x1
+#define ID_AA64DFR0_PMSVER_8_3 0x2
+
#define ID_DFR0_PERFMON_SHIFT 24

#define ID_DFR0_PERFMON_8_1 0x4
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index cc00915ad6d1..52e7869f5621 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -54,7 +54,7 @@ struct arm_spe_pmu {
struct hlist_node hotplug_node;

int irq; /* PPI */
-
+ u16 pmsver;
u16 min_period;
u16 counter_sz;

@@ -80,6 +80,15 @@ struct arm_spe_pmu {
/* Keep track of our dynamic hotplug state */
static enum cpuhp_state arm_spe_pmu_online;

+static u64 sys_pmsevfr_el1_mask[] = {
+ [ID_AA64DFR0_PMSVER_8_2] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
+ GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
+ BIT_ULL(1),
+ [ID_AA64DFR0_PMSVER_8_3] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
+ GENMASK_ULL(18, 17) | GENMASK_ULL(15, 11) | BIT_ULL(7) |
+ BIT_ULL(5) | BIT_ULL(3) | BIT_ULL(1),
+};
+
enum arm_spe_pmu_buf_fault_action {
SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
SPE_PMU_BUF_FAULT_ACT_FATAL,
@@ -93,6 +102,7 @@ enum arm_spe_pmu_capabilities {
SPE_PMU_CAP_FEAT_MAX,
SPE_PMU_CAP_CNT_SZ = SPE_PMU_CAP_FEAT_MAX,
SPE_PMU_CAP_MIN_IVAL,
+ SPE_PMU_CAP_PMSVER,
};

static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = {
@@ -110,6 +120,8 @@ static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
return spe_pmu->counter_sz;
case SPE_PMU_CAP_MIN_IVAL:
return spe_pmu->min_period;
+ case SPE_PMU_CAP_PMSVER:
+ return spe_pmu->pmsver;
default:
WARN(1, "unknown cap %d\n", cap);
}
@@ -143,6 +155,7 @@ static struct attribute *arm_spe_pmu_cap_attr[] = {
SPE_CAP_EXT_ATTR_ENTRY(ernd, SPE_PMU_CAP_ERND),
SPE_CAP_EXT_ATTR_ENTRY(count_size, SPE_PMU_CAP_CNT_SZ),
SPE_CAP_EXT_ATTR_ENTRY(min_interval, SPE_PMU_CAP_MIN_IVAL),
+ SPE_CAP_EXT_ATTR_ENTRY(pmsver, SPE_PMU_CAP_PMSVER),
NULL,
};

@@ -670,7 +683,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
!cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
return -ENOENT;

- if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
+ if (arm_spe_event_to_pmsevfr(event) & ~sys_pmsevfr_el1_mask[spe_pmu->pmsver])
return -EOPNOTSUPP;

if (attr->exclude_idle)
@@ -937,6 +950,7 @@ static void __arm_spe_pmu_dev_probe(void *info)
fld, smp_processor_id());
return;
}
+ spe_pmu->pmsver = (u16)fld;

/* Read PMBIDR first to determine whether or not we have access */
reg = read_sysreg_s(SYS_PMBIDR_EL1);
--
2.17.1


2020-10-02 10:58:51

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v2] drivers/perf: Add support for ARMv8.3-SPE

On Wed, Sep 30, 2020 at 05:31:35PM +0800, Wei Li wrote:
> Armv8.3 extends the SPE by adding:
> - Alignment field in the Events packet, and filtering on this event
> using PMSEVFR_EL1.
> - Support for the Scalable Vector Extension (SVE).
>
> The main additions for SVE are:
> - Recording the vector length for SVE operations in the Operation Type
> packet. It is not possible to filter on vector length.
> - Incomplete predicate and empty predicate fields in the Events packet,
> and filtering on these events using PMSEVFR_EL1.
>
> Update the check of pmsevfr for empty/partial predicated SVE and
> alignment event in SPE driver. For adaption by the version of SPE,
> expose 'pmsver' as cap attribute to userspace.
>
> Signed-off-by: Wei Li <[email protected]>
> ---
> v1 -> v2:
> - Rename 'pmuver' to 'pmsver', change it's type to 'u16' from 'int'.
> (Suggested by Will and Leo.)
> - Expose 'pmsver' as cap attribute through sysfs, instead of printing.
> (Suggested by Will.)
> ---
> arch/arm64/include/asm/sysreg.h | 4 +++-
> drivers/perf/arm_spe_pmu.c | 18 ++++++++++++++++--
> 2 files changed, 19 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 554a7e8ecb07..f4f9c1fc6398 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -281,7 +281,6 @@
> #define SYS_PMSFCR_EL1_ST_SHIFT 18
>
> #define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5)
> -#define SYS_PMSEVFR_EL1_RES0 0x0000ffff00ff0f55UL
>
> #define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6)
> #define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0
> @@ -787,6 +786,9 @@
> #define ID_AA64DFR0_PMUVER_8_5 0x6
> #define ID_AA64DFR0_PMUVER_IMP_DEF 0xf
>
> +#define ID_AA64DFR0_PMSVER_8_2 0x1
> +#define ID_AA64DFR0_PMSVER_8_3 0x2
> +
> #define ID_DFR0_PERFMON_SHIFT 24
>
> #define ID_DFR0_PERFMON_8_1 0x4
> diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
> index cc00915ad6d1..52e7869f5621 100644
> --- a/drivers/perf/arm_spe_pmu.c
> +++ b/drivers/perf/arm_spe_pmu.c
> @@ -54,7 +54,7 @@ struct arm_spe_pmu {
> struct hlist_node hotplug_node;
>
> int irq; /* PPI */
> -
> + u16 pmsver;
> u16 min_period;
> u16 counter_sz;
>
> @@ -80,6 +80,15 @@ struct arm_spe_pmu {
> /* Keep track of our dynamic hotplug state */
> static enum cpuhp_state arm_spe_pmu_online;
>
> +static u64 sys_pmsevfr_el1_mask[] = {
> + [ID_AA64DFR0_PMSVER_8_2] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> + GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
> + BIT_ULL(1),
> + [ID_AA64DFR0_PMSVER_8_3] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> + GENMASK_ULL(18, 17) | GENMASK_ULL(15, 11) | BIT_ULL(7) |
> + BIT_ULL(5) | BIT_ULL(3) | BIT_ULL(1),
> +};

Ok, so I finally figured out what I don't like about this: it's the fact
that the RES0 mask only ever reduces, but we have no way of ensuring that
by construction with this approach. In other words, it's a bit brittle to
keep all of these things defined entirely separately from one another.

How about a small change so that we define things like:

#define SYS_PMSEVFR_EL1_RES0_8_2 SYS_PMSEVFR_EL1_RES0 &
~(...)

#define SYS_PMSEVFR_EL1_RES0_8_3 SYS_PMSEVFR_EL1_RES0_8_2 &
~(...)

where the '...' parts identify the bits that are no longer RES0 for that
version of the architecture?

What do you think?

Will

2020-11-19 15:03:04

by liwei (GF)

[permalink] [raw]
Subject: Re: [PATCH v2] drivers/perf: Add support for ARMv8.3-SPE

Hi Will,

On 2020/10/2 18:57, Will Deacon wrote:
> On Wed, Sep 30, 2020 at 05:31:35PM +0800, Wei Li wrote:
>> Armv8.3 extends the SPE by adding:
>> - Alignment field in the Events packet, and filtering on this event
>> using PMSEVFR_EL1.
>> - Support for the Scalable Vector Extension (SVE).
>>
>> The main additions for SVE are:
>> - Recording the vector length for SVE operations in the Operation Type
>> packet. It is not possible to filter on vector length.
>> - Incomplete predicate and empty predicate fields in the Events packet,
>> and filtering on these events using PMSEVFR_EL1.
>>
>> Update the check of pmsevfr for empty/partial predicated SVE and
>> alignment event in SPE driver. For adaption by the version of SPE,
>> expose 'pmsver' as cap attribute to userspace.
>>
>> Signed-off-by: Wei Li <[email protected]>
>> ---
>> v1 -> v2:
>> - Rename 'pmuver' to 'pmsver', change it's type to 'u16' from 'int'.
>> (Suggested by Will and Leo.)
>> - Expose 'pmsver' as cap attribute through sysfs, instead of printing.
>> (Suggested by Will.)
>> ---
>> arch/arm64/include/asm/sysreg.h | 4 +++-
>> drivers/perf/arm_spe_pmu.c | 18 ++++++++++++++++--
>> 2 files changed, 19 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>> index 554a7e8ecb07..f4f9c1fc6398 100644
>> --- a/arch/arm64/include/asm/sysreg.h
>> +++ b/arch/arm64/include/asm/sysreg.h
>> @@ -281,7 +281,6 @@
>> #define SYS_PMSFCR_EL1_ST_SHIFT 18
>>
>> #define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5)
>> -#define SYS_PMSEVFR_EL1_RES0 0x0000ffff00ff0f55UL
>>
>> #define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6)
>> #define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0
>> @@ -787,6 +786,9 @@
>> #define ID_AA64DFR0_PMUVER_8_5 0x6
>> #define ID_AA64DFR0_PMUVER_IMP_DEF 0xf
>>
>> +#define ID_AA64DFR0_PMSVER_8_2 0x1
>> +#define ID_AA64DFR0_PMSVER_8_3 0x2
>> +
>> #define ID_DFR0_PERFMON_SHIFT 24
>>
>> #define ID_DFR0_PERFMON_8_1 0x4
>> diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
>> index cc00915ad6d1..52e7869f5621 100644
>> --- a/drivers/perf/arm_spe_pmu.c
>> +++ b/drivers/perf/arm_spe_pmu.c
>> @@ -54,7 +54,7 @@ struct arm_spe_pmu {
>> struct hlist_node hotplug_node;
>>
>> int irq; /* PPI */
>> -
>> + u16 pmsver;
>> u16 min_period;
>> u16 counter_sz;
>>
>> @@ -80,6 +80,15 @@ struct arm_spe_pmu {
>> /* Keep track of our dynamic hotplug state */
>> static enum cpuhp_state arm_spe_pmu_online;
>>
>> +static u64 sys_pmsevfr_el1_mask[] = {
>> + [ID_AA64DFR0_PMSVER_8_2] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
>> + GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
>> + BIT_ULL(1),
>> + [ID_AA64DFR0_PMSVER_8_3] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
>> + GENMASK_ULL(18, 17) | GENMASK_ULL(15, 11) | BIT_ULL(7) |
>> + BIT_ULL(5) | BIT_ULL(3) | BIT_ULL(1),
>> +};
>
> Ok, so I finally figured out what I don't like about this: it's the fact
> that the RES0 mask only ever reduces, but we have no way of ensuring that
> by construction with this approach. In other words, it's a bit brittle to
> keep all of these things defined entirely separately from one another.
>
> How about a small change so that we define things like:
>
> #define SYS_PMSEVFR_EL1_RES0_8_2 SYS_PMSEVFR_EL1_RES0 &
> ~(...)
>
> #define SYS_PMSEVFR_EL1_RES0_8_3 SYS_PMSEVFR_EL1_RES0_8_2 &
> ~(...)
>
> where the '...' parts identify the bits that are no longer RES0 for that
> version of the architecture?
>

Sorry for the long delay.

These is also an array-index-out-of-bounds issue when accessing 'sys_pmsevfr_el1_mask', if
the pmsver read in the future is bigger than what the driver supports.

So how about change to:

static u64 arm_spe_pmsevfr_mask(u16 pmsver)
{
u64 mask = 0;

if (pmsver >= ID_AA64DFR0_PMSVER_8_3)
mask |= BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11);
if (pmsver >= ID_AA64DFR0_PMSVER_8_2)
mask |= GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
BIT_ULL(1);
return mask;
}

Thus, the driver can try its best to support, and the definition is clear enough to show
the difference between versions of SPE.

Or should i still define them as what you advised and add a check of pmsver to just serve
the versions what the driver support?

Thanks,
Wei

2020-11-25 14:17:20

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v2] drivers/perf: Add support for ARMv8.3-SPE

On Thu, Nov 19, 2020 at 10:57:47PM +0800, liwei (GF) wrote:
> On 2020/10/2 18:57, Will Deacon wrote:
> > On Wed, Sep 30, 2020 at 05:31:35PM +0800, Wei Li wrote:
> >> @@ -80,6 +80,15 @@ struct arm_spe_pmu {
> >> /* Keep track of our dynamic hotplug state */
> >> static enum cpuhp_state arm_spe_pmu_online;
> >>
> >> +static u64 sys_pmsevfr_el1_mask[] = {
> >> + [ID_AA64DFR0_PMSVER_8_2] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> >> + GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
> >> + BIT_ULL(1),
> >> + [ID_AA64DFR0_PMSVER_8_3] = GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> >> + GENMASK_ULL(18, 17) | GENMASK_ULL(15, 11) | BIT_ULL(7) |
> >> + BIT_ULL(5) | BIT_ULL(3) | BIT_ULL(1),
> >> +};
> >
> > Ok, so I finally figured out what I don't like about this: it's the fact
> > that the RES0 mask only ever reduces, but we have no way of ensuring that
> > by construction with this approach. In other words, it's a bit brittle to
> > keep all of these things defined entirely separately from one another.
> >
> > How about a small change so that we define things like:
> >
> > #define SYS_PMSEVFR_EL1_RES0_8_2 SYS_PMSEVFR_EL1_RES0 &
> > ~(...)
> >
> > #define SYS_PMSEVFR_EL1_RES0_8_3 SYS_PMSEVFR_EL1_RES0_8_2 &
> > ~(...)
> >
> > where the '...' parts identify the bits that are no longer RES0 for that
> > version of the architecture?
> >
>
> Sorry for the long delay.
>
> These is also an array-index-out-of-bounds issue when accessing 'sys_pmsevfr_el1_mask', if
> the pmsver read in the future is bigger than what the driver supports.
>
> So how about change to:
>
> static u64 arm_spe_pmsevfr_mask(u16 pmsver)
> {
> u64 mask = 0;
>
> if (pmsver >= ID_AA64DFR0_PMSVER_8_3)
> mask |= BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11);
> if (pmsver >= ID_AA64DFR0_PMSVER_8_2)
> mask |= GENMASK_ULL(63, 48) | GENMASK_ULL(31, 24) |
> GENMASK_ULL(15, 12) | BIT_ULL(7) | BIT_ULL(5) | BIT_ULL(3) |
> BIT_ULL(1);
> return mask;
> }
>
> Thus, the driver can try its best to support, and the definition is clear enough to show
> the difference between versions of SPE.
>
> Or should i still define them as what you advised and add a check of pmsver to just serve
> the versions what the driver support?

I think I'd prefer that, yes.

Will