FEAT_PMUv3_TH (Armv8.8) is a new feature that allows conditional
counting of PMU events depending on how much the event increments on
a single cycle. Two new config fields for perf_event_open have been
added, and a PMU cap file for reading the max_threshold. See the first
commit message and the docs in the second commit for more details.
The change has been validated on the Arm FVP model:
# Zero values, works as expected (as before).
$ perf stat -e dtlb_walk/threshold=0,threshold_control=0/ -- true
5962 dtlb_walk/threshold=0,threshold_control=0/
# Threshold >= 255 causes count to be 0 because dtlb_walk doesn't
# increase by more than 1 per cycle.
$ perf stat -e dtlb_walk/threshold=255,threshold_control=5/ -- true
0 dtlb_walk/threshold=255,threshold_control=5/
# Keeping comparison as >= but lowering the threshold to 1 makes the
# count return.
$ perf stat -e dtlb_walk/threshold=1,threshold_control=5/ -- true
6329 dtlb_walk/threshold=1,threshold_control=5/
James Clark (2):
arm64: perf: Add support for event counting threshold
Documentation: arm64: Document the PMU event counting threshold
feature
Documentation/arch/arm64/perf.rst | 58 ++++++++++++++++++++++++++++++
drivers/perf/arm_pmuv3.c | 59 ++++++++++++++++++++++++++++++-
include/linux/perf/arm_pmuv3.h | 7 +++-
3 files changed, 122 insertions(+), 2 deletions(-)
base-commit: 21b61fe48c2fc43d98ebb67a1f3832e0478fa523
--
2.34.1
FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on
events whose count meets a specified threshold condition. For example if
PMEVTYPERn.TC (Threshold Control) is set to 0b101 (Greater than or
equal, count), and the threshold is set to 2, then the PMU counter will
now only increment by 1 when an event would have previously incremented
the PMU counter by 2 or more on a single processor cycle.
Two new Perf event config fields, 'threshold' and 'threshold_control'
have been added for controlling the feature:
$ perf stat -e stall_slot/threshold=2,threshold_control=5/
A new capability for reading out the maximum supported threshold value
has also been added:
$ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max
0x000000ff
If a threshold higher than threshold_max is provided, then no error is
generated but the threshold is clamped to the max value. If
FEAT_PMUv3_TH isn't implemented, then threshold_max reads zero, and
neither the 'threshold' nor 'threshold_control' parameters will be used.
The threshold is per PMU counter, and there are potentially different
threshold_max values per PMU type on heterogeneous systems.
Bits higher than 32 now need to be written into PMEVTYPER, so
armv8pmu_write_evtype() has to be updated to take a u64 value rather
than u32.
Signed-off-by: James Clark <[email protected]>
---
drivers/perf/arm_pmuv3.c | 59 +++++++++++++++++++++++++++++++++-
include/linux/perf/arm_pmuv3.h | 7 +++-
2 files changed, 64 insertions(+), 2 deletions(-)
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index e5a2ac4155f6..ad6574b2bdab 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -294,9 +294,16 @@ static const struct attribute_group armv8_pmuv3_events_attr_group = {
.is_visible = armv8pmu_event_attr_is_visible,
};
+#define TH_LO 2
+#define TH_HI 13
+#define TH_CNTL_LO 14
+#define TH_CNTL_HI 16
+
PMU_FORMAT_ATTR(event, "config:0-15");
PMU_FORMAT_ATTR(long, "config1:0");
PMU_FORMAT_ATTR(rdpmc, "config1:1");
+PMU_FORMAT_ATTR(threshold, "config1:" __stringify(TH_LO) "-" __stringify(TH_HI));
+PMU_FORMAT_ATTR(threshold_control, "config1:" __stringify(TH_CNTL_LO) "-" __stringify(TH_CNTL_HI));
static int sysctl_perf_user_access __read_mostly;
@@ -310,10 +317,22 @@ static inline bool armv8pmu_event_want_user_access(struct perf_event *event)
return event->attr.config1 & 0x2;
}
+static inline u32 armv8pmu_event_threshold(struct perf_event_attr *attr)
+{
+ return FIELD_GET(GENMASK(TH_HI, TH_LO), attr->config1);
+}
+
+static inline u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr)
+{
+ return FIELD_GET(GENMASK(TH_CNTL_HI, TH_CNTL_LO), attr->config1);
+}
+
static struct attribute *armv8_pmuv3_format_attrs[] = {
&format_attr_event.attr,
&format_attr_long.attr,
&format_attr_rdpmc.attr,
+ &format_attr_threshold.attr,
+ &format_attr_threshold_control.attr,
NULL,
};
@@ -365,10 +384,31 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR_RO(bus_width);
+static u32 threshold_max(struct arm_pmu *cpu_pmu)
+{
+ /*
+ * The largest value that can be written to PMEVTYPER<n>_EL0.TH is
+ * (2 ^ PMMIR.THWIDTH) - 1
+ */
+ return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1;
+}
+
+static ssize_t threshold_max_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+
+ return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
+}
+
+static DEVICE_ATTR_RO(threshold_max);
+
static struct attribute *armv8_pmuv3_caps_attrs[] = {
&dev_attr_slots.attr,
&dev_attr_bus_slots.attr,
&dev_attr_bus_width.attr,
+ &dev_attr_threshold_max.attr,
NULL,
};
@@ -552,7 +592,7 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value)
armv8pmu_write_hw_counter(event, value);
}
-static inline void armv8pmu_write_evtype(int idx, u32 val)
+static inline void armv8pmu_write_evtype(int idx, u64 val)
{
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
@@ -912,6 +952,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
struct perf_event_attr *attr)
{
unsigned long config_base = 0;
+ struct perf_event *perf_event = container_of(attr, struct perf_event,
+ attr);
+ struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
+ u32 th, th_max;
if (attr->exclude_idle)
return -EPERM;
@@ -943,6 +987,19 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
if (attr->exclude_user)
config_base |= ARMV8_PMU_EXCLUDE_EL0;
+ /*
+ * Insert event counting threshold (FEAT_PMUv3_TH) values. If
+ * FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will be
+ * 0 and no values will be written.
+ */
+ th_max = threshold_max(cpu_pmu);
+ if (th_max) {
+ th = min(armv8pmu_event_threshold(attr), th_max);
+ config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
+ config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
+ armv8pmu_event_threshold_control(attr));
+ }
+
/*
* Install the filter into config_base as this is used to
* construct the event type.
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index e3899bd77f5c..0e2a3c927150 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -228,7 +228,11 @@
/*
* PMXEVTYPER: Event selection reg
*/
-#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
+#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32)
+#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61)
+/* Mask for writable bits */
+#define ARMV8_PMU_EVTYPE_MASK (0xc800ffff | ARMV8_PMU_EVTYPE_TH | \
+ ARMV8_PMU_EVTYPE_TC)
#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
/*
@@ -254,6 +258,7 @@
#define ARMV8_PMU_BUS_SLOTS_MASK 0xff
#define ARMV8_PMU_BUS_WIDTH_SHIFT 16
#define ARMV8_PMU_BUS_WIDTH_MASK 0xf
+#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
/*
* This code is really good
--
2.34.1
Hi James
On 19/09/2023 10:51, James Clark wrote:
> FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on
> events whose count meets a specified threshold condition. For example if
> PMEVTYPERn.TC (Threshold Control) is set to 0b101 (Greater than or
> equal, count), and the threshold is set to 2, then the PMU counter will
> now only increment by 1 when an event would have previously incremented
> the PMU counter by 2 or more on a single processor cycle.
>
> Two new Perf event config fields, 'threshold' and 'threshold_control'
> have been added for controlling the feature:
>
> $ perf stat -e stall_slot/threshold=2,threshold_control=5/
>
> A new capability for reading out the maximum supported threshold value
> has also been added:
>
> $ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max
>
> 0x000000ff
>
> If a threshold higher than threshold_max is provided, then no error is
> generated but the threshold is clamped to the max value. If
> FEAT_PMUv3_TH isn't implemented, then threshold_max reads zero, and
> neither the 'threshold' nor 'threshold_control' parameters will be used.
>
> The threshold is per PMU counter, and there are potentially different
> threshold_max values per PMU type on heterogeneous systems.
>
> Bits higher than 32 now need to be written into PMEVTYPER, so
> armv8pmu_write_evtype() has to be updated to take a u64 value rather
> than u32.
Is this supported in the Aarch32 state ? If so, do we need to change
the arch specific register read/write "helpers" ?
>
> Signed-off-by: James Clark <[email protected]>
> ---
> drivers/perf/arm_pmuv3.c | 59 +++++++++++++++++++++++++++++++++-
> include/linux/perf/arm_pmuv3.h | 7 +++-
> 2 files changed, 64 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
> index e5a2ac4155f6..ad6574b2bdab 100644
> --- a/drivers/perf/arm_pmuv3.c
> +++ b/drivers/perf/arm_pmuv3.c
> @@ -294,9 +294,16 @@ static const struct attribute_group armv8_pmuv3_events_attr_group = {
> .is_visible = armv8pmu_event_attr_is_visible,
> };
>
> +#define TH_LO 2
> +#define TH_HI 13
> +#define TH_CNTL_LO 14
> +#define TH_CNTL_HI 16
> +
> PMU_FORMAT_ATTR(event, "config:0-15");
> PMU_FORMAT_ATTR(long, "config1:0");
> PMU_FORMAT_ATTR(rdpmc, "config1:1");
> +PMU_FORMAT_ATTR(threshold, "config1:" __stringify(TH_LO) "-" __stringify(TH_HI));
> +PMU_FORMAT_ATTR(threshold_control, "config1:" __stringify(TH_CNTL_LO) "-" __stringify(TH_CNTL_HI));
The perf core doesn't yet support adding aliases for
fields other than "events". May be we could add
some support for that in the future and use it
to provide meaningful aliases for the threshold
control.
Not sure if we can extrapolate threshold_control[0]
to be another config field => "counting_mode"
We would need to check with the architecture folks
to confirm that the meaning wouldn't change though.
>
> static int sysctl_perf_user_access __read_mostly;
>
> @@ -310,10 +317,22 @@ static inline bool armv8pmu_event_want_user_access(struct perf_event *event)
> return event->attr.config1 & 0x2;
> }
>
> +static inline u32 armv8pmu_event_threshold(struct perf_event_attr *attr)
> +{
> + return FIELD_GET(GENMASK(TH_HI, TH_LO), attr->config1);
> +}
> +
> +static inline u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr)
> +{
> + return FIELD_GET(GENMASK(TH_CNTL_HI, TH_CNTL_LO), attr->config1);
> +}
> +
> static struct attribute *armv8_pmuv3_format_attrs[] = {
> &format_attr_event.attr,
> &format_attr_long.attr,
> &format_attr_rdpmc.attr,
> + &format_attr_threshold.attr,
> + &format_attr_threshold_control.attr,
> NULL,
> };
>
> @@ -365,10 +384,31 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
>
> static DEVICE_ATTR_RO(bus_width);
>
> +static u32 threshold_max(struct arm_pmu *cpu_pmu)
> +{
> + /*
> + * The largest value that can be written to PMEVTYPER<n>_EL0.TH is
> + * (2 ^ PMMIR.THWIDTH) - 1
> + */
> + return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1;
> +}
> +
> +static ssize_t threshold_max_show(struct device *dev,
> + struct device_attribute *attr, char *page)
> +{
> + struct pmu *pmu = dev_get_drvdata(dev);
> + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
> +
> + return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
> +}
> +
> +static DEVICE_ATTR_RO(threshold_max);
> +
> static struct attribute *armv8_pmuv3_caps_attrs[] = {
> &dev_attr_slots.attr,
> &dev_attr_bus_slots.attr,
> &dev_attr_bus_width.attr,
> + &dev_attr_threshold_max.attr,
> NULL,
> };
>
> @@ -552,7 +592,7 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value)
> armv8pmu_write_hw_counter(event, value);
> }
>
> -static inline void armv8pmu_write_evtype(int idx, u32 val)
> +static inline void armv8pmu_write_evtype(int idx, u64 val)
> {
> u32 counter = ARMV8_IDX_TO_COUNTER(idx);
>
> @@ -912,6 +952,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
> struct perf_event_attr *attr)
> {
> unsigned long config_base = 0;
> + struct perf_event *perf_event = container_of(attr, struct perf_event,
> + attr);
> + struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
> + u32 th, th_max;
>
> if (attr->exclude_idle)
> return -EPERM;
> @@ -943,6 +987,19 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
> if (attr->exclude_user)
> config_base |= ARMV8_PMU_EXCLUDE_EL0;
>
> + /*
> + * Insert event counting threshold (FEAT_PMUv3_TH) values. If
> + * FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will be
> + * 0 and no values will be written.
> + */
> + th_max = threshold_max(cpu_pmu);
> + if (th_max) {
> + th = min(armv8pmu_event_threshold(attr), th_max);
> + config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
> + config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
> + armv8pmu_event_threshold_control(attr));
> + }
> +
> /*
> * Install the filter into config_base as this is used to
> * construct the event type.
> diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
> index e3899bd77f5c..0e2a3c927150 100644
> --- a/include/linux/perf/arm_pmuv3.h
> +++ b/include/linux/perf/arm_pmuv3.h
> @@ -228,7 +228,11 @@
> /*
> * PMXEVTYPER: Event selection reg
> */
> -#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
> +#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32)
> +#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61)
> +/* Mask for writable bits */
> +#define ARMV8_PMU_EVTYPE_MASK (0xc800ffff | ARMV8_PMU_EVTYPE_TH | \
> + ARMV8_PMU_EVTYPE_TC)
May need to be UL suffixed for safety ?
Suzuki
> #define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
>
> /*
> @@ -254,6 +258,7 @@
> #define ARMV8_PMU_BUS_SLOTS_MASK 0xff
> #define ARMV8_PMU_BUS_WIDTH_SHIFT 16
> #define ARMV8_PMU_BUS_WIDTH_MASK 0xf
> +#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
>
> /*
> * This code is really good
On 09/10/2023 13:50, Suzuki K Poulose wrote:
> Hi James
>
> On 19/09/2023 10:51, James Clark wrote:
>> FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on
>> events whose count meets a specified threshold condition. For example if
>> PMEVTYPERn.TC (Threshold Control) is set to 0b101 (Greater than or
>> equal, count), and the threshold is set to 2, then the PMU counter will
>> now only increment by 1 when an event would have previously incremented
>> the PMU counter by 2 or more on a single processor cycle.
>>
>> Two new Perf event config fields, 'threshold' and 'threshold_control'
>> have been added for controlling the feature:
>>
>> $ perf stat -e stall_slot/threshold=2,threshold_control=5/
>>
>> A new capability for reading out the maximum supported threshold value
>> has also been added:
>>
>> $ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max
>>
>> 0x000000ff
>>
>> If a threshold higher than threshold_max is provided, then no error is
>> generated but the threshold is clamped to the max value. If
>> FEAT_PMUv3_TH isn't implemented, then threshold_max reads zero, and
>> neither the 'threshold' nor 'threshold_control' parameters will be used.
>>
>> The threshold is per PMU counter, and there are potentially different
>> threshold_max values per PMU type on heterogeneous systems.
>>
>> Bits higher than 32 now need to be written into PMEVTYPER, so
>> armv8pmu_write_evtype() has to be updated to take a u64 value rather
>> than u32.
>
> Is this supported in the Aarch32 state ? If so, do we need to change
> the arch specific register read/write "helpers" ?
>
It's half supported. PMMIR.THWIDTH is readable and non-zero in aarch32,
but the threshold value can't be written because it's in the high part
of the event type register. Thresholding does affect aarch32 guests when
enabled from the host, but that doesn't really concern the code.
But yes you're right, it isn't building on aarch32 at the moment so I
need to do a V2. I'm going to make it so the max width is reported as 0
in sysfs for aarch32, and then don't ever try to write to the high part
of the event type (which was part of the compilation error anyway). No
change will be needed to the read/write helpers though, they already
handle 32/64 bits for the right platforms.
>>
>> Signed-off-by: James Clark <[email protected]>
>> ---
>> drivers/perf/arm_pmuv3.c | 59 +++++++++++++++++++++++++++++++++-
>> include/linux/perf/arm_pmuv3.h | 7 +++-
>> 2 files changed, 64 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
>> index e5a2ac4155f6..ad6574b2bdab 100644
>> --- a/drivers/perf/arm_pmuv3.c
>> +++ b/drivers/perf/arm_pmuv3.c
>> @@ -294,9 +294,16 @@ static const struct attribute_group
>> armv8_pmuv3_events_attr_group = {
>> .is_visible = armv8pmu_event_attr_is_visible,
>> };
>> +#define TH_LO 2
>> +#define TH_HI 13
>> +#define TH_CNTL_LO 14
>> +#define TH_CNTL_HI 16
>> +
>> PMU_FORMAT_ATTR(event, "config:0-15");
>> PMU_FORMAT_ATTR(long, "config1:0");
>> PMU_FORMAT_ATTR(rdpmc, "config1:1");
>> +PMU_FORMAT_ATTR(threshold, "config1:" __stringify(TH_LO) "-"
>> __stringify(TH_HI));
>> +PMU_FORMAT_ATTR(threshold_control, "config1:" __stringify(TH_CNTL_LO)
>> "-" __stringify(TH_CNTL_HI));
>
> The perf core doesn't yet support adding aliases for
> fields other than "events". May be we could add
> some support for that in the future and use it
> to provide meaningful aliases for the threshold
> control.
>
I think it could be useful, but I'm wondering about the use case. To
make use of this feature you would probably need to have the docs or
reference manual open anyway, and that gives you the names of the
control values. Any tool could trivially hold the list of names too.
But I suppose you could say the same about event names, and we go
through some effort to report those.
> Not sure if we can extrapolate threshold_control[0]
> to be another config field => "counting_mode"
> We would need to check with the architecture folks
> to confirm that the meaning wouldn't change though.
>
I think if was planned to be fixed in that way it would have already
been written that way? And I doubt they would agree to promise to not
re-use another value that didn't fit the pattern in the future. Then
we'd be in a big mess because we deviated from the reference manual and
it would be hard to re-map around whatever new value was introduced.
>> static int sysctl_perf_user_access __read_mostly;
>> @@ -310,10 +317,22 @@ static inline bool
>> armv8pmu_event_want_user_access(struct perf_event *event)
>> return event->attr.config1 & 0x2;
>> }
>> +static inline u32 armv8pmu_event_threshold(struct perf_event_attr
>> *attr)
>> +{
>> + return FIELD_GET(GENMASK(TH_HI, TH_LO), attr->config1);
>> +}
>> +
>> +static inline u8 armv8pmu_event_threshold_control(struct
>> perf_event_attr *attr)
>> +{
>> + return FIELD_GET(GENMASK(TH_CNTL_HI, TH_CNTL_LO), attr->config1);
>> +}
>> +
>> static struct attribute *armv8_pmuv3_format_attrs[] = {
>> &format_attr_event.attr,
>> &format_attr_long.attr,
>> &format_attr_rdpmc.attr,
>> + &format_attr_threshold.attr,
>> + &format_attr_threshold_control.attr,
>> NULL,
>> };
>> @@ -365,10 +384,31 @@ static ssize_t bus_width_show(struct device
>> *dev, struct device_attribute *attr,
>> static DEVICE_ATTR_RO(bus_width);
>> +static u32 threshold_max(struct arm_pmu *cpu_pmu)
>> +{
>> + /*
>> + * The largest value that can be written to PMEVTYPER<n>_EL0.TH is
>> + * (2 ^ PMMIR.THWIDTH) - 1
>> + */
>> + return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1;
>> +}
>> +
>> +static ssize_t threshold_max_show(struct device *dev,
>> + struct device_attribute *attr, char *page)
>> +{
>> + struct pmu *pmu = dev_get_drvdata(dev);
>> + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
>> +
>> + return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
>> +}
>> +
>> +static DEVICE_ATTR_RO(threshold_max);
>> +
>> static struct attribute *armv8_pmuv3_caps_attrs[] = {
>> &dev_attr_slots.attr,
>> &dev_attr_bus_slots.attr,
>> &dev_attr_bus_width.attr,
>> + &dev_attr_threshold_max.attr,
>> NULL,
>> };
>> @@ -552,7 +592,7 @@ static void armv8pmu_write_counter(struct
>> perf_event *event, u64 value)
>> armv8pmu_write_hw_counter(event, value);
>> }
>> -static inline void armv8pmu_write_evtype(int idx, u32 val)
>> +static inline void armv8pmu_write_evtype(int idx, u64 val)>> {
>> u32 counter = ARMV8_IDX_TO_COUNTER(idx);
>> @@ -912,6 +952,10 @@ static int armv8pmu_set_event_filter(struct
>> hw_perf_event *event,
>> struct perf_event_attr *attr)
>> {
>> unsigned long config_base = 0;
>> + struct perf_event *perf_event = container_of(attr, struct
>> perf_event,
>> + attr);
>> + struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
>> + u32 th, th_max;
>> if (attr->exclude_idle)
>> return -EPERM;
>> @@ -943,6 +987,19 @@ static int armv8pmu_set_event_filter(struct
>> hw_perf_event *event,
>> if (attr->exclude_user)
>> config_base |= ARMV8_PMU_EXCLUDE_EL0;
>> + /*
>> + * Insert event counting threshold (FEAT_PMUv3_TH) values. If
>> + * FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max)
>> will be
>> + * 0 and no values will be written.
>> + */
>> + th_max = threshold_max(cpu_pmu);
>> + if (th_max) {
>> + th = min(armv8pmu_event_threshold(attr), th_max);
>> + config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
>> + config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
>> + armv8pmu_event_threshold_control(attr));
>> + }
>> +
>> /*
>> * Install the filter into config_base as this is used to
>> * construct the event type.
>> diff --git a/include/linux/perf/arm_pmuv3.h
>> b/include/linux/perf/arm_pmuv3.h
>> index e3899bd77f5c..0e2a3c927150 100644
>> --- a/include/linux/perf/arm_pmuv3.h
>> +++ b/include/linux/perf/arm_pmuv3.h
>> @@ -228,7 +228,11 @@
>> /*
>> * PMXEVTYPER: Event selection reg
>> */
>> -#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable
>> bits */
>> +#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32)
>> +#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61)
>> +/* Mask for writable bits */
>> +#define ARMV8_PMU_EVTYPE_MASK (0xc800ffff | ARMV8_PMU_EVTYPE_TH | \
>> + ARMV8_PMU_EVTYPE_TC)
>
> May need to be UL suffixed for safety ?
>
I don't think it's strictly needed because GENMASK makes a UL so the
whole expression is auto promoted anyway. But I can add it for
completeness.
Also these will be split across the two asm/arm_pmuv3.h files in v2
because I need to keep the original mask for aarch32, so it will look a
little bit different.
Thanks
James
>
> Suzuki
>
>> #define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT
>> bits */
>> /*
>> @@ -254,6 +258,7 @@
>> #define ARMV8_PMU_BUS_SLOTS_MASK 0xff
>> #define ARMV8_PMU_BUS_WIDTH_SHIFT 16
>> #define ARMV8_PMU_BUS_WIDTH_MASK 0xf
>> +#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
>> /*
>> * This code is really good
>
>
On 09/10/2023 17:30, James Clark wrote:
>
>
> On 09/10/2023 13:50, Suzuki K Poulose wrote:
>> Hi James
>>
>> On 19/09/2023 10:51, James Clark wrote:
>>> FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on
>>> events whose count meets a specified threshold condition. For example if
>>> PMEVTYPERn.TC (Threshold Control) is set to 0b101 (Greater than or
>>> equal, count), and the threshold is set to 2, then the PMU counter will
>>> now only increment by 1 when an event would have previously incremented
>>> the PMU counter by 2 or more on a single processor cycle.
>>>
>>> Two new Perf event config fields, 'threshold' and 'threshold_control'
>>> have been added for controlling the feature:
>>>
>>> $ perf stat -e stall_slot/threshold=2,threshold_control=5/
>>>
>>> A new capability for reading out the maximum supported threshold value
>>> has also been added:
>>>
>>> $ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max
>>>
>>> 0x000000ff
>>>
>>> If a threshold higher than threshold_max is provided, then no error is
>>> generated but the threshold is clamped to the max value. If
>>> FEAT_PMUv3_TH isn't implemented, then threshold_max reads zero, and
>>> neither the 'threshold' nor 'threshold_control' parameters will be used.
>>>
>>> The threshold is per PMU counter, and there are potentially different
>>> threshold_max values per PMU type on heterogeneous systems.
>>>
>>> Bits higher than 32 now need to be written into PMEVTYPER, so
>>> armv8pmu_write_evtype() has to be updated to take a u64 value rather
>>> than u32.
>>
>> Is this supported in the Aarch32 state ? If so, do we need to change
>> the arch specific register read/write "helpers" ?
>>
>
> It's half supported. PMMIR.THWIDTH is readable and non-zero in aarch32,
> but the threshold value can't be written because it's in the high part
> of the event type register. Thresholding does affect aarch32 guests when
> enabled from the host, but that doesn't really concern the code.
>
> But yes you're right, it isn't building on aarch32 at the moment so I
> need to do a V2. I'm going to make it so the max width is reported as 0
> in sysfs for aarch32, and then don't ever try to write to the high part
> of the event type (which was part of the compilation error anyway). No
> change will be needed to the read/write helpers though, they already
> handle 32/64 bits for the right platforms.
>
>>>
>>> Signed-off-by: James Clark <[email protected]>
>>> ---
>>> drivers/perf/arm_pmuv3.c | 59 +++++++++++++++++++++++++++++++++-
>>> include/linux/perf/arm_pmuv3.h | 7 +++-
>>> 2 files changed, 64 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
>>> index e5a2ac4155f6..ad6574b2bdab 100644
>>> --- a/drivers/perf/arm_pmuv3.c
>>> +++ b/drivers/perf/arm_pmuv3.c
>>> @@ -294,9 +294,16 @@ static const struct attribute_group
>>> armv8_pmuv3_events_attr_group = {
>>> .is_visible = armv8pmu_event_attr_is_visible,
>>> };
>>> +#define TH_LO 2
>>> +#define TH_HI 13
>>> +#define TH_CNTL_LO 14
>>> +#define TH_CNTL_HI 16
>>> +
>>> PMU_FORMAT_ATTR(event, "config:0-15");
>>> PMU_FORMAT_ATTR(long, "config1:0");
>>> PMU_FORMAT_ATTR(rdpmc, "config1:1");
>>> +PMU_FORMAT_ATTR(threshold, "config1:" __stringify(TH_LO) "-"
>>> __stringify(TH_HI));
>>> +PMU_FORMAT_ATTR(threshold_control, "config1:" __stringify(TH_CNTL_LO)
>>> "-" __stringify(TH_CNTL_HI));
>>
>> The perf core doesn't yet support adding aliases for
>> fields other than "events". May be we could add
>> some support for that in the future and use it
>> to provide meaningful aliases for the threshold
>> control.
>>
>
> I think it could be useful, but I'm wondering about the use case. To
> make use of this feature you would probably need to have the docs or
> reference manual open anyway, and that gives you the names of the
> control values. Any tool could trivially hold the list of names too.
>
> But I suppose you could say the same about event names, and we go
> through some effort to report those.
>
>> Not sure if we can extrapolate threshold_control[0]
>> to be another config field => "counting_mode"
>> We would need to check with the architecture folks
>> to confirm that the meaning wouldn't change though.
>>
>
> I think if was planned to be fixed in that way it would have already
> been written that way? And I doubt they would agree to promise to not
> re-use another value that didn't fit the pattern in the future. Then
> we'd be in a big mess because we deviated from the reference manual and
> it would be hard to re-map around whatever new value was introduced.
Having looked at the spec again, it is indeed fixed. All 8 possible
values are defined for the TC and all of them follow the rule of
bit0 => 1 => counting mode
That kind of makes it easier for people to go one step closer to
use the field without having to refer to the Arm ARM all the time.
Suzuki
>
>>> static int sysctl_perf_user_access __read_mostly;
>>> @@ -310,10 +317,22 @@ static inline bool
>>> armv8pmu_event_want_user_access(struct perf_event *event)
>>> return event->attr.config1 & 0x2;
>>> }
>>> +static inline u32 armv8pmu_event_threshold(struct perf_event_attr
>>> *attr)
>>> +{
>>> + return FIELD_GET(GENMASK(TH_HI, TH_LO), attr->config1);
>>> +}
>>> +
>>> +static inline u8 armv8pmu_event_threshold_control(struct
>>> perf_event_attr *attr)
>>> +{
>>> + return FIELD_GET(GENMASK(TH_CNTL_HI, TH_CNTL_LO), attr->config1);
>>> +}
>>> +
>>> static struct attribute *armv8_pmuv3_format_attrs[] = {
>>> &format_attr_event.attr,
>>> &format_attr_long.attr,
>>> &format_attr_rdpmc.attr,
>>> + &format_attr_threshold.attr,
>>> + &format_attr_threshold_control.attr,
>>> NULL,
>>> };
>>> @@ -365,10 +384,31 @@ static ssize_t bus_width_show(struct device
>>> *dev, struct device_attribute *attr,
>>> static DEVICE_ATTR_RO(bus_width);
>>> +static u32 threshold_max(struct arm_pmu *cpu_pmu)
>>> +{
>>> + /*
>>> + * The largest value that can be written to PMEVTYPER<n>_EL0.TH is
>>> + * (2 ^ PMMIR.THWIDTH) - 1
>>> + */
>>> + return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1;
>>> +}
>>> +
>>> +static ssize_t threshold_max_show(struct device *dev,
>>> + struct device_attribute *attr, char *page)
>>> +{
>>> + struct pmu *pmu = dev_get_drvdata(dev);
>>> + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
>>> +
>>> + return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
>>> +}
>>> +
>>> +static DEVICE_ATTR_RO(threshold_max);
>>> +
>>> static struct attribute *armv8_pmuv3_caps_attrs[] = {
>>> &dev_attr_slots.attr,
>>> &dev_attr_bus_slots.attr,
>>> &dev_attr_bus_width.attr,
>>> + &dev_attr_threshold_max.attr,
>>> NULL,
>>> };
>>> @@ -552,7 +592,7 @@ static void armv8pmu_write_counter(struct
>>> perf_event *event, u64 value)
>>> armv8pmu_write_hw_counter(event, value);
>>> }
>>> -static inline void armv8pmu_write_evtype(int idx, u32 val)
>>> +static inline void armv8pmu_write_evtype(int idx, u64 val)>> {
>>> u32 counter = ARMV8_IDX_TO_COUNTER(idx);
>>> @@ -912,6 +952,10 @@ static int armv8pmu_set_event_filter(struct
>>> hw_perf_event *event,
>>> struct perf_event_attr *attr)
>>> {
>>> unsigned long config_base = 0;
>>> + struct perf_event *perf_event = container_of(attr, struct
>>> perf_event,
>>> + attr);
>>> + struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
>>> + u32 th, th_max;
>>> if (attr->exclude_idle)
>>> return -EPERM;
>>> @@ -943,6 +987,19 @@ static int armv8pmu_set_event_filter(struct
>>> hw_perf_event *event,
>>> if (attr->exclude_user)
>>> config_base |= ARMV8_PMU_EXCLUDE_EL0;
>>> + /*
>>> + * Insert event counting threshold (FEAT_PMUv3_TH) values. If
>>> + * FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max)
>>> will be
>>> + * 0 and no values will be written.
>>> + */
>>> + th_max = threshold_max(cpu_pmu);
>>> + if (th_max) {
>>> + th = min(armv8pmu_event_threshold(attr), th_max);
>>> + config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
>>> + config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
>>> + armv8pmu_event_threshold_control(attr));
>>> + }
>>> +
>>> /*
>>> * Install the filter into config_base as this is used to
>>> * construct the event type.
>>> diff --git a/include/linux/perf/arm_pmuv3.h
>>> b/include/linux/perf/arm_pmuv3.h
>>> index e3899bd77f5c..0e2a3c927150 100644
>>> --- a/include/linux/perf/arm_pmuv3.h
>>> +++ b/include/linux/perf/arm_pmuv3.h
>>> @@ -228,7 +228,11 @@
>>> /*
>>> * PMXEVTYPER: Event selection reg
>>> */
>>> -#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable
>>> bits */
>>> +#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32)
>>> +#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61)
>>> +/* Mask for writable bits */
>>> +#define ARMV8_PMU_EVTYPE_MASK (0xc800ffff | ARMV8_PMU_EVTYPE_TH | \
>>> + ARMV8_PMU_EVTYPE_TC)
>>
>> May need to be UL suffixed for safety ?
>>
>
> I don't think it's strictly needed because GENMASK makes a UL so the
> whole expression is auto promoted anyway. But I can add it for
> completeness.
>
> Also these will be split across the two asm/arm_pmuv3.h files in v2
> because I need to keep the original mask for aarch32, so it will look a
> little bit different.
>
> Thanks
> James
>
>>
>> Suzuki
>>
>>> #define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT
>>> bits */
>>> /*
>>> @@ -254,6 +258,7 @@
>>> #define ARMV8_PMU_BUS_SLOTS_MASK 0xff
>>> #define ARMV8_PMU_BUS_WIDTH_SHIFT 16
>>> #define ARMV8_PMU_BUS_WIDTH_MASK 0xf
>>> +#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
>>> /*
>>> * This code is really good
>>
>>
On 10/10/2023 12:03, Suzuki K Poulose wrote:
> On 09/10/2023 17:30, James Clark wrote:
>>
>>
>> On 09/10/2023 13:50, Suzuki K Poulose wrote:
>>> Hi James
>>>
>>> On 19/09/2023 10:51, James Clark wrote:
>>>> FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on
>>>> events whose count meets a specified threshold condition. For
>>>> example if
>>>> PMEVTYPERn.TC (Threshold Control) is set to 0b101 (Greater than or
>>>> equal, count), and the threshold is set to 2, then the PMU counter will
>>>> now only increment by 1 when an event would have previously incremented
>>>> the PMU counter by 2 or more on a single processor cycle.
>>>>
>>>> Two new Perf event config fields, 'threshold' and 'threshold_control'
>>>> have been added for controlling the feature:
>>>>
>>>> $ perf stat -e stall_slot/threshold=2,threshold_control=5/
>>>>
>>>> A new capability for reading out the maximum supported threshold value
>>>> has also been added:
>>>>
>>>> $ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max
>>>>
>>>> 0x000000ff
>>>>
>>>> If a threshold higher than threshold_max is provided, then no error is
>>>> generated but the threshold is clamped to the max value. If
>>>> FEAT_PMUv3_TH isn't implemented, then threshold_max reads zero, and
>>>> neither the 'threshold' nor 'threshold_control' parameters will be
>>>> used.
>>>>
>>>> The threshold is per PMU counter, and there are potentially different
>>>> threshold_max values per PMU type on heterogeneous systems.
>>>>
>>>> Bits higher than 32 now need to be written into PMEVTYPER, so
>>>> armv8pmu_write_evtype() has to be updated to take a u64 value rather
>>>> than u32.
>>>
>>> Is this supported in the Aarch32 state ? If so, do we need to change
>>> the arch specific register read/write "helpers" ?
>>>
>>
>> It's half supported. PMMIR.THWIDTH is readable and non-zero in aarch32,
>> but the threshold value can't be written because it's in the high part
>> of the event type register. Thresholding does affect aarch32 guests when
>> enabled from the host, but that doesn't really concern the code.
>>
>> But yes you're right, it isn't building on aarch32 at the moment so I
>> need to do a V2. I'm going to make it so the max width is reported as 0
>> in sysfs for aarch32, and then don't ever try to write to the high part
>> of the event type (which was part of the compilation error anyway). No
>> change will be needed to the read/write helpers though, they already
>> handle 32/64 bits for the right platforms.
>>
>>>>
>>>> Signed-off-by: James Clark <[email protected]>
>>>> ---
>>>> drivers/perf/arm_pmuv3.c | 59
>>>> +++++++++++++++++++++++++++++++++-
>>>> include/linux/perf/arm_pmuv3.h | 7 +++-
>>>> 2 files changed, 64 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
>>>> index e5a2ac4155f6..ad6574b2bdab 100644
>>>> --- a/drivers/perf/arm_pmuv3.c
>>>> +++ b/drivers/perf/arm_pmuv3.c
>>>> @@ -294,9 +294,16 @@ static const struct attribute_group
>>>> armv8_pmuv3_events_attr_group = {
>>>> .is_visible = armv8pmu_event_attr_is_visible,
>>>> };
>>>> +#define TH_LO 2
>>>> +#define TH_HI 13
>>>> +#define TH_CNTL_LO 14
>>>> +#define TH_CNTL_HI 16
>>>> +
>>>> PMU_FORMAT_ATTR(event, "config:0-15");
>>>> PMU_FORMAT_ATTR(long, "config1:0");
>>>> PMU_FORMAT_ATTR(rdpmc, "config1:1");
>>>> +PMU_FORMAT_ATTR(threshold, "config1:" __stringify(TH_LO) "-"
>>>> __stringify(TH_HI));
>>>> +PMU_FORMAT_ATTR(threshold_control, "config1:" __stringify(TH_CNTL_LO)
>>>> "-" __stringify(TH_CNTL_HI));
>>>
>>> The perf core doesn't yet support adding aliases for
>>> fields other than "events". May be we could add
>>> some support for that in the future and use it
>>> to provide meaningful aliases for the threshold
>>> control.
>>>
>>
>> I think it could be useful, but I'm wondering about the use case. To
>> make use of this feature you would probably need to have the docs or
>> reference manual open anyway, and that gives you the names of the
>> control values. Any tool could trivially hold the list of names too.
>>
>> But I suppose you could say the same about event names, and we go
>> through some effort to report those.
>>
>>> Not sure if we can extrapolate threshold_control[0]
>>> to be another config field => "counting_mode"
>>> We would need to check with the architecture folks
>>> to confirm that the meaning wouldn't change though.
>>>
>>
>> I think if was planned to be fixed in that way it would have already
>> been written that way? And I doubt they would agree to promise to not
>> re-use another value that didn't fit the pattern in the future. Then
>> we'd be in a big mess because we deviated from the reference manual and
>> it would be hard to re-map around whatever new value was introduced.
>
> Having looked at the spec again, it is indeed fixed. All 8 possible
> values are defined for the TC and all of them follow the rule of
> bit0 => 1 => counting mode
>
> That kind of makes it easier for people to go one step closer to
> use the field without having to refer to the Arm ARM all the time.
>
> Suzuki
>
Yep I agree. I thought there were empty values, but as the whole field
is filled already I can split it into two new attributes.
>
>
>>
>>>> static int sysctl_perf_user_access __read_mostly;
>>>> @@ -310,10 +317,22 @@ static inline bool
>>>> armv8pmu_event_want_user_access(struct perf_event *event)
>>>> return event->attr.config1 & 0x2;
>>>> }
>>>> +static inline u32 armv8pmu_event_threshold(struct perf_event_attr
>>>> *attr)
>>>> +{
>>>> + return FIELD_GET(GENMASK(TH_HI, TH_LO), attr->config1);
>>>> +}
>>>> +
>>>> +static inline u8 armv8pmu_event_threshold_control(struct
>>>> perf_event_attr *attr)
>>>> +{
>>>> + return FIELD_GET(GENMASK(TH_CNTL_HI, TH_CNTL_LO), attr->config1);
>>>> +}
>>>> +
>>>> static struct attribute *armv8_pmuv3_format_attrs[] = {
>>>> &format_attr_event.attr,
>>>> &format_attr_long.attr,
>>>> &format_attr_rdpmc.attr,
>>>> + &format_attr_threshold.attr,
>>>> + &format_attr_threshold_control.attr,
>>>> NULL,
>>>> };
>>>> @@ -365,10 +384,31 @@ static ssize_t bus_width_show(struct device
>>>> *dev, struct device_attribute *attr,
>>>> static DEVICE_ATTR_RO(bus_width);
>>>> +static u32 threshold_max(struct arm_pmu *cpu_pmu)
>>>> +{
>>>> + /*
>>>> + * The largest value that can be written to PMEVTYPER<n>_EL0.TH is
>>>> + * (2 ^ PMMIR.THWIDTH) - 1
>>>> + */
>>>> + return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir))
>>>> - 1;
>>>> +}
>>>> +
>>>> +static ssize_t threshold_max_show(struct device *dev,
>>>> + struct device_attribute *attr, char *page)
>>>> +{
>>>> + struct pmu *pmu = dev_get_drvdata(dev);
>>>> + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
>>>> +
>>>> + return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
>>>> +}
>>>> +
>>>> +static DEVICE_ATTR_RO(threshold_max);
>>>> +
>>>> static struct attribute *armv8_pmuv3_caps_attrs[] = {
>>>> &dev_attr_slots.attr,
>>>> &dev_attr_bus_slots.attr,
>>>> &dev_attr_bus_width.attr,
>>>> + &dev_attr_threshold_max.attr,
>>>> NULL,
>>>> };
>>>> @@ -552,7 +592,7 @@ static void armv8pmu_write_counter(struct
>>>> perf_event *event, u64 value)
>>>> armv8pmu_write_hw_counter(event, value);
>>>> }
>>>> -static inline void armv8pmu_write_evtype(int idx, u32 val)
>>>> +static inline void armv8pmu_write_evtype(int idx, u64 val)>> {
>>>> u32 counter = ARMV8_IDX_TO_COUNTER(idx);
>>>> @@ -912,6 +952,10 @@ static int armv8pmu_set_event_filter(struct
>>>> hw_perf_event *event,
>>>> struct perf_event_attr *attr)
>>>> {
>>>> unsigned long config_base = 0;
>>>> + struct perf_event *perf_event = container_of(attr, struct
>>>> perf_event,
>>>> + attr);
>>>> + struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
>>>> + u32 th, th_max;
>>>> if (attr->exclude_idle)
>>>> return -EPERM;
>>>> @@ -943,6 +987,19 @@ static int armv8pmu_set_event_filter(struct
>>>> hw_perf_event *event,
>>>> if (attr->exclude_user)
>>>> config_base |= ARMV8_PMU_EXCLUDE_EL0;
>>>> + /*
>>>> + * Insert event counting threshold (FEAT_PMUv3_TH) values. If
>>>> + * FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max)
>>>> will be
>>>> + * 0 and no values will be written.
>>>> + */
>>>> + th_max = threshold_max(cpu_pmu);
>>>> + if (th_max) {
>>>> + th = min(armv8pmu_event_threshold(attr), th_max);
>>>> + config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
>>>> + config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
>>>> + armv8pmu_event_threshold_control(attr));
>>>> + }
>>>> +
>>>> /*
>>>> * Install the filter into config_base as this is used to
>>>> * construct the event type.
>>>> diff --git a/include/linux/perf/arm_pmuv3.h
>>>> b/include/linux/perf/arm_pmuv3.h
>>>> index e3899bd77f5c..0e2a3c927150 100644
>>>> --- a/include/linux/perf/arm_pmuv3.h
>>>> +++ b/include/linux/perf/arm_pmuv3.h
>>>> @@ -228,7 +228,11 @@
>>>> /*
>>>> * PMXEVTYPER: Event selection reg
>>>> */
>>>> -#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable
>>>> bits */
>>>> +#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32)
>>>> +#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61)
>>>> +/* Mask for writable bits */
>>>> +#define ARMV8_PMU_EVTYPE_MASK (0xc800ffff | ARMV8_PMU_EVTYPE_TH | \
>>>> + ARMV8_PMU_EVTYPE_TC)
>>>
>>> May need to be UL suffixed for safety ?
>>>
>>
>> I don't think it's strictly needed because GENMASK makes a UL so the
>> whole expression is auto promoted anyway. But I can add it for
>> completeness.
>>
>> Also these will be split across the two asm/arm_pmuv3.h files in v2
>> because I need to keep the original mask for aarch32, so it will look a
>> little bit different.
>>
>> Thanks
>> James
>>
>>>
>>> Suzuki
>>>
>>>> #define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT
>>>> bits */
>>>> /*
>>>> @@ -254,6 +258,7 @@
>>>> #define ARMV8_PMU_BUS_SLOTS_MASK 0xff
>>>> #define ARMV8_PMU_BUS_WIDTH_SHIFT 16
>>>> #define ARMV8_PMU_BUS_WIDTH_MASK 0xf
>>>> +#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
>>>> /*
>>>> * This code is really good
>>>
>>>
>