2023-10-10 10:41:52

by James Clark

[permalink] [raw]
Subject: [PATCH v2 0/3] arm64: perf: Add support for event counting threshold

Changes since v1:

* Fix build on aarch32 by disabling FEAT_PMUv3_TH and splitting event
type mask between the platforms
* Change armv8pmu_write_evtype() to take unsigned long instead of u64
so it isn't unnecessarily wide on aarch32
* Add UL suffix to aarch64 event type mask definition

----

FEAT_PMUv3_TH (Armv8.8) is a new feature that allows conditional
counting of PMU events depending on how much the event increments on
a single cycle. Two new config fields for perf_event_open have been
added, and a PMU cap file for reading the max_threshold. See the second
commit message and the docs in the last commit for more details.

The change has been validated on the Arm FVP model:

# Zero values, works as expected (as before).
$ perf stat -e dtlb_walk/threshold=0,threshold_control=0/ -- true

5962 dtlb_walk/threshold=0,threshold_control=0/

# Threshold >= 255 causes count to be 0 because dtlb_walk doesn't
# increase by more than 1 per cycle.
$ perf stat -e dtlb_walk/threshold=255,threshold_control=5/ -- true

0 dtlb_walk/threshold=255,threshold_control=5/

# Keeping comparison as >= but lowering the threshold to 1 makes the
# count return.
$ perf stat -e dtlb_walk/threshold=1,threshold_control=5/ -- true

6329 dtlb_walk/threshold=1,threshold_control=5/


James Clark (3):
arm: perf: Include threshold control fields valid in PMEVTYPER mask
arm64: perf: Add support for event counting threshold
Documentation: arm64: Document the PMU event counting threshold
feature

Documentation/arch/arm64/perf.rst | 58 ++++++++++++++++++++++++++
arch/arm/include/asm/arm_pmuv3.h | 3 ++
arch/arm64/include/asm/arm_pmuv3.h | 4 ++
arch/arm64/kvm/pmu-emul.c | 1 +
arch/arm64/kvm/sys_regs.c | 1 +
drivers/perf/arm_pmuv3.c | 67 +++++++++++++++++++++++++++++-
include/linux/perf/arm_pmuv3.h | 4 +-
7 files changed, 136 insertions(+), 2 deletions(-)


base-commit: 94f6f0550c625fab1f373bb86a6669b45e9748b3
--
2.34.1


2023-10-10 10:42:00

by James Clark

[permalink] [raw]
Subject: [PATCH v2 1/3] arm: perf: Include threshold control fields valid in PMEVTYPER mask

FEAT_PMUv3_TH (Armv8.8) adds two new fields to PMEVTYPER, so include
them in the mask. These aren't writable on 32 bit kernels as they are in
the high part of the register, so split the mask definition to the asm
files for each platform.

Now where the value is used in some parts of KVM, include the asm file.

Despite not being used on aarch32, TH and TC macros are added to the
shared header file, because they are used in arm_pmuv3.c which is
compiled for both platforms.

Signed-off-by: James Clark <[email protected]>
---
arch/arm/include/asm/arm_pmuv3.h | 3 +++
arch/arm64/include/asm/arm_pmuv3.h | 4 ++++
arch/arm64/kvm/pmu-emul.c | 1 +
arch/arm64/kvm/sys_regs.c | 1 +
include/linux/perf/arm_pmuv3.h | 3 ++-
5 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
index 72529f5e2bed..491310133d09 100644
--- a/arch/arm/include/asm/arm_pmuv3.h
+++ b/arch/arm/include/asm/arm_pmuv3.h
@@ -9,6 +9,9 @@
#include <asm/cp15.h>
#include <asm/cputype.h>

+/* Mask for writable bits */
+#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff
+
#define PMCCNTR __ACCESS_CP15_64(0, c9)

#define PMCR __ACCESS_CP15(c9, 0, c12, 0)
diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
index 18dc2fb3d7b7..4faf4f7385a5 100644
--- a/arch/arm64/include/asm/arm_pmuv3.h
+++ b/arch/arm64/include/asm/arm_pmuv3.h
@@ -11,6 +11,10 @@
#include <asm/cpufeature.h>
#include <asm/sysreg.h>

+/* Mask for writable bits */
+#define ARMV8_PMU_EVTYPE_MASK (0xc800ffffUL | ARMV8_PMU_EVTYPE_TH | \
+ ARMV8_PMU_EVTYPE_TC)
+
#define RETURN_READ_PMEVCNTRN(n) \
return read_sysreg(pmevcntr##n##_el0)
static inline unsigned long read_pmevcntrn(int n)
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 6b066e04dc5d..0666212c0c15 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -11,6 +11,7 @@
#include <linux/perf_event.h>
#include <linux/perf/arm_pmu.h>
#include <linux/uaccess.h>
+#include <asm/arm_pmuv3.h>
#include <asm/kvm_emulate.h>
#include <kvm/arm_pmu.h>
#include <kvm/arm_vgic.h>
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index e92ec810d449..d0e11e684f07 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -17,6 +17,7 @@
#include <linux/printk.h>
#include <linux/uaccess.h>

+#include <asm/arm_pmuv3.h>
#include <asm/cacheflush.h>
#include <asm/cputype.h>
#include <asm/debug-monitors.h>
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index e3899bd77f5c..ec3a01502e7c 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -228,7 +228,8 @@
/*
* PMXEVTYPER: Event selection reg
*/
-#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
+#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32)
+#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61)
#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */

/*
--
2.34.1

2023-10-10 10:42:27

by James Clark

[permalink] [raw]
Subject: [PATCH v2 2/3] arm64: perf: Add support for event counting threshold

FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on
events whose count meets a specified threshold condition. For example if
PMEVTYPERn.TC (Threshold Control) is set to 0b101 (Greater than or
equal, count), and the threshold is set to 2, then the PMU counter will
now only increment by 1 when an event would have previously incremented
the PMU counter by 2 or more on a single processor cycle.

Two new Perf event config fields, 'threshold' and 'threshold_control'
have been added for controlling the feature:

$ perf stat -e stall_slot/threshold=2,threshold_control=5/

A new capability for reading out the maximum supported threshold value
has also been added:

$ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max

0x000000ff

If a threshold higher than threshold_max is provided, then no error is
generated but the threshold is clamped to the max value. If
FEAT_PMUv3_TH isn't implemented or a 32 bit kernel is running, then
threshold_max reads zero, and neither the 'threshold' nor
'threshold_control' parameters will be used.

The threshold is per PMU counter, and there are potentially different
threshold_max values per PMU type on heterogeneous systems.

Bits higher than 32 now need to be written into PMEVTYPER, so
armv8pmu_write_evtype() has to be updated to take an unsigned long value
rather than u32 which gives the correct behavior on both aarch32 and 64.

Signed-off-by: James Clark <[email protected]>
---
drivers/perf/arm_pmuv3.c | 67 +++++++++++++++++++++++++++++++++-
include/linux/perf/arm_pmuv3.h | 1 +
2 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 8fcaa26f0f8a..6d669b16a2bc 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -15,6 +15,7 @@
#include <clocksource/arm_arch_timer.h>

#include <linux/acpi.h>
+#include <linux/bitfield.h>
#include <linux/clocksource.h>
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
@@ -294,9 +295,16 @@ static const struct attribute_group armv8_pmuv3_events_attr_group = {
.is_visible = armv8pmu_event_attr_is_visible,
};

+#define TH_LO 2
+#define TH_HI 13
+#define TH_CNTL_LO 14
+#define TH_CNTL_HI 16
+
PMU_FORMAT_ATTR(event, "config:0-15");
PMU_FORMAT_ATTR(long, "config1:0");
PMU_FORMAT_ATTR(rdpmc, "config1:1");
+PMU_FORMAT_ATTR(threshold, "config1:" __stringify(TH_LO) "-" __stringify(TH_HI));
+PMU_FORMAT_ATTR(threshold_control, "config1:" __stringify(TH_CNTL_LO) "-" __stringify(TH_CNTL_HI));

static int sysctl_perf_user_access __read_mostly;

@@ -310,10 +318,22 @@ static inline bool armv8pmu_event_want_user_access(struct perf_event *event)
return event->attr.config1 & 0x2;
}

+static inline u32 armv8pmu_event_threshold(struct perf_event_attr *attr)
+{
+ return FIELD_GET(GENMASK(TH_HI, TH_LO), attr->config1);
+}
+
+static inline u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr)
+{
+ return FIELD_GET(GENMASK(TH_CNTL_HI, TH_CNTL_LO), attr->config1);
+}
+
static struct attribute *armv8_pmuv3_format_attrs[] = {
&format_attr_event.attr,
&format_attr_long.attr,
&format_attr_rdpmc.attr,
+ &format_attr_threshold.attr,
+ &format_attr_threshold_control.attr,
NULL,
};

@@ -365,10 +385,38 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,

static DEVICE_ATTR_RO(bus_width);

+static u32 threshold_max(struct arm_pmu *cpu_pmu)
+{
+ /*
+ * PMMIR.WIDTH is readable and non-zero on aarch32, but it would be
+ * impossible to write the threshold in the upper 32 bits of PMEVTYPER.
+ */
+ if (IS_ENABLED(CONFIG_ARM))
+ return 0;
+
+ /*
+ * The largest value that can be written to PMEVTYPER<n>_EL0.TH is
+ * (2 ^ PMMIR.THWIDTH) - 1.
+ */
+ return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1;
+}
+
+static ssize_t threshold_max_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+
+ return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
+}
+
+static DEVICE_ATTR_RO(threshold_max);
+
static struct attribute *armv8_pmuv3_caps_attrs[] = {
&dev_attr_slots.attr,
&dev_attr_bus_slots.attr,
&dev_attr_bus_width.attr,
+ &dev_attr_threshold_max.attr,
NULL,
};

@@ -552,7 +600,7 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value)
armv8pmu_write_hw_counter(event, value);
}

-static inline void armv8pmu_write_evtype(int idx, u32 val)
+static inline void armv8pmu_write_evtype(int idx, unsigned long val)
{
u32 counter = ARMV8_IDX_TO_COUNTER(idx);

@@ -914,6 +962,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
struct perf_event_attr *attr)
{
unsigned long config_base = 0;
+ struct perf_event *perf_event = container_of(attr, struct perf_event,
+ attr);
+ struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
+ u32 th, th_max;

if (attr->exclude_idle)
return -EPERM;
@@ -945,6 +997,19 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
if (attr->exclude_user)
config_base |= ARMV8_PMU_EXCLUDE_EL0;

+ /*
+ * Insert event counting threshold (FEAT_PMUv3_TH) values. If
+ * FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will be
+ * 0 and no values will be written.
+ */
+ th_max = threshold_max(cpu_pmu);
+ if (IS_ENABLED(CONFIG_ARM64) && th_max) {
+ th = min(armv8pmu_event_threshold(attr), th_max);
+ config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
+ config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
+ armv8pmu_event_threshold_control(attr));
+ }
+
/*
* Install the filter into config_base as this is used to
* construct the event type.
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index ec3a01502e7c..753f8dbd9d10 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -255,6 +255,7 @@
#define ARMV8_PMU_BUS_SLOTS_MASK 0xff
#define ARMV8_PMU_BUS_WIDTH_SHIFT 16
#define ARMV8_PMU_BUS_WIDTH_MASK 0xf
+#define ARMV8_PMU_THWIDTH GENMASK(23, 20)

/*
* This code is really good
--
2.34.1

2023-10-10 10:42:30

by James Clark

[permalink] [raw]
Subject: [PATCH v2 3/3] Documentation: arm64: Document the PMU event counting threshold feature

Add documentation for the two new Perf event open parameters and
the threshold_max capability file.

Signed-off-by: James Clark <[email protected]>
---
Documentation/arch/arm64/perf.rst | 58 +++++++++++++++++++++++++++++++
1 file changed, 58 insertions(+)

diff --git a/Documentation/arch/arm64/perf.rst b/Documentation/arch/arm64/perf.rst
index 1f87b57c2332..122a12607f37 100644
--- a/Documentation/arch/arm64/perf.rst
+++ b/Documentation/arch/arm64/perf.rst
@@ -164,3 +164,61 @@ and should be used to mask the upper bits as needed.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/arch/arm64/tests/user-events.c
.. _tools/lib/perf/tests/test-evsel.c:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/perf/tests/test-evsel.c
+
+Event Counting Threshold
+==========================================
+
+Overview
+--------
+
+FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on
+events whose count meets a specified threshold condition. For example if
+threshold_control is set to 5 ('Greater than or equal, count'), and
+the threshold is set to 2, then the PMU counter will now only increment
+by 1 when an event would have previously incremented the PMU counter by
+2 or more on a single processor cycle.
+
+To increment by the value of the event instead of 1, use the non 'count'
+comparisons, in this case 4 ('Greater than or equal'). Each comparison
+has a count and non count version, where the 'count' version always
+increments the PMU counter by 1 instead of the value of the event.
+
+How-to
+------
+
+The threshold and threshold control values can be provided per event:
+
+.. code-block:: sh
+
+ perf stat -e stall_slot/threshold=2,threshold_control=5/ \
+ -e dtlb_walk/threshold=10,threshold_control=4/
+
+And the following control values are supported:
+
+.. code-block::
+
+ 0: Not-equal
+ 1: Not-equal, count
+ 2: Equals
+ 3: Equals, count
+ 4: Greater-than-or-equal
+ 5: Greater-than-or-equal, count
+ 6: Less-than
+ 7: Less-than, count
+
+The maximum supported threshold value can be read from the caps of each
+PMU, for example:
+
+.. code-block:: sh
+
+ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max
+
+ 0x000000ff
+
+If a value higher than this is given, then it will be silently clamped
+to the maximum. The highest possible maximum is 4095, as the config
+field for threshold is limited to 12 bits, and the Perf tool will refuse
+to parse higher values.
+
+If the PMU doesn't support FEAT_PMUv3_TH, then threshold_max will read
+0, and both threshold and threshold_control will be silently ignored.
--
2.34.1

2023-10-10 12:53:40

by Suzuki K Poulose

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] arm: perf: Include threshold control fields valid in PMEVTYPER mask

On 10/10/2023 11:40, James Clark wrote:
> FEAT_PMUv3_TH (Armv8.8) adds two new fields to PMEVTYPER, so include
> them in the mask. These aren't writable on 32 bit kernels as they are in
> the high part of the register, so split the mask definition to the asm
> files for each platform.
>
> Now where the value is used in some parts of KVM, include the asm file.
>
> Despite not being used on aarch32, TH and TC macros are added to the
> shared header file, because they are used in arm_pmuv3.c which is
> compiled for both platforms.
>
> Signed-off-by: James Clark <[email protected]>
> ---
> arch/arm/include/asm/arm_pmuv3.h | 3 +++
> arch/arm64/include/asm/arm_pmuv3.h | 4 ++++
> arch/arm64/kvm/pmu-emul.c | 1 +
> arch/arm64/kvm/sys_regs.c | 1 +
> include/linux/perf/arm_pmuv3.h | 3 ++-
> 5 files changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
> index 72529f5e2bed..491310133d09 100644
> --- a/arch/arm/include/asm/arm_pmuv3.h
> +++ b/arch/arm/include/asm/arm_pmuv3.h
> @@ -9,6 +9,9 @@
> #include <asm/cp15.h>
> #include <asm/cputype.h>
>
> +/* Mask for writable bits */
> +#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff
> +
> #define PMCCNTR __ACCESS_CP15_64(0, c9)
>
> #define PMCR __ACCESS_CP15(c9, 0, c12, 0)
> diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
> index 18dc2fb3d7b7..4faf4f7385a5 100644
> --- a/arch/arm64/include/asm/arm_pmuv3.h
> +++ b/arch/arm64/include/asm/arm_pmuv3.h
> @@ -11,6 +11,10 @@
> #include <asm/cpufeature.h>
> #include <asm/sysreg.h>
>
> +/* Mask for writable bits */
> +#define ARMV8_PMU_EVTYPE_MASK (0xc800ffffUL | ARMV8_PMU_EVTYPE_TH | \
> + ARMV8_PMU_EVTYPE_TC)
> +
> #define RETURN_READ_PMEVCNTRN(n) \
> return read_sysreg(pmevcntr##n##_el0)
> static inline unsigned long read_pmevcntrn(int n)
> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> index 6b066e04dc5d..0666212c0c15 100644
> --- a/arch/arm64/kvm/pmu-emul.c
> +++ b/arch/arm64/kvm/pmu-emul.c
> @@ -11,6 +11,7 @@
> #include <linux/perf_event.h>
> #include <linux/perf/arm_pmu.h>
> #include <linux/uaccess.h>
> +#include <asm/arm_pmuv3.h>
> #include <asm/kvm_emulate.h>
> #include <kvm/arm_pmu.h>
> #include <kvm/arm_vgic.h>

You may want to mention in the commit description that there is no
impact on the KVM emulating the Guest PMU with this change, as it
ignores the fields in the upper half for setting up the attributes
for the backing event.

Suzuki

2023-10-10 15:01:26

by Suzuki K Poulose

[permalink] [raw]
Subject: Re: [PATCH v2 2/3] arm64: perf: Add support for event counting threshold

On 10/10/2023 11:40, James Clark wrote:
> FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on
> events whose count meets a specified threshold condition. For example if
> PMEVTYPERn.TC (Threshold Control) is set to 0b101 (Greater than or
> equal, count), and the threshold is set to 2, then the PMU counter will
> now only increment by 1 when an event would have previously incremented
> the PMU counter by 2 or more on a single processor cycle.
>
> Two new Perf event config fields, 'threshold' and 'threshold_control'
> have been added for controlling the feature:
>
> $ perf stat -e stall_slot/threshold=2,threshold_control=5/
>
> A new capability for reading out the maximum supported threshold value
> has also been added:
>
> $ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max
>
> 0x000000ff
>
> If a threshold higher than threshold_max is provided, then no error is
> generated but the threshold is clamped to the max value. If
> FEAT_PMUv3_TH isn't implemented or a 32 bit kernel is running, then
> threshold_max reads zero, and neither the 'threshold' nor
> 'threshold_control' parameters will be used.
>
> The threshold is per PMU counter, and there are potentially different
> threshold_max values per PMU type on heterogeneous systems.
>
> Bits higher than 32 now need to be written into PMEVTYPER, so
> armv8pmu_write_evtype() has to be updated to take an unsigned long value
> rather than u32 which gives the correct behavior on both aarch32 and 64.
>
> Signed-off-by: James Clark <[email protected]>
> ---
> drivers/perf/arm_pmuv3.c | 67 +++++++++++++++++++++++++++++++++-
> include/linux/perf/arm_pmuv3.h | 1 +
> 2 files changed, 67 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
> index 8fcaa26f0f8a..6d669b16a2bc 100644
> --- a/drivers/perf/arm_pmuv3.c
> +++ b/drivers/perf/arm_pmuv3.c
> @@ -15,6 +15,7 @@
> #include <clocksource/arm_arch_timer.h>
>
> #include <linux/acpi.h>
> +#include <linux/bitfield.h>
> #include <linux/clocksource.h>
> #include <linux/of.h>
> #include <linux/perf/arm_pmu.h>
> @@ -294,9 +295,16 @@ static const struct attribute_group armv8_pmuv3_events_attr_group = {
> .is_visible = armv8pmu_event_attr_is_visible,
> };
>
> +#define TH_LO 2
> +#define TH_HI 13
> +#define TH_CNTL_LO 14
> +#define TH_CNTL_HI 16
> +
> PMU_FORMAT_ATTR(event, "config:0-15");
> PMU_FORMAT_ATTR(long, "config1:0");
> PMU_FORMAT_ATTR(rdpmc, "config1:1");
> +PMU_FORMAT_ATTR(threshold, "config1:" __stringify(TH_LO) "-" __stringify(TH_HI));
> +PMU_FORMAT_ATTR(threshold_control, "config1:" __stringify(TH_CNTL_LO) "-" __stringify(TH_CNTL_HI));


>
> static int sysctl_perf_user_access __read_mostly;
>
> @@ -310,10 +318,22 @@ static inline bool armv8pmu_event_want_user_access(struct perf_event *event)
> return event->attr.config1 & 0x2;
> }
>
> +static inline u32 armv8pmu_event_threshold(struct perf_event_attr *attr)
> +{
> + return FIELD_GET(GENMASK(TH_HI, TH_LO), attr->config1);
> +}
> +
> +static inline u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr)
> +{
> + return FIELD_GET(GENMASK(TH_CNTL_HI, TH_CNTL_LO), attr->config1);
> +}
> +
> static struct attribute *armv8_pmuv3_format_attrs[] = {
> &format_attr_event.attr,
> &format_attr_long.attr,
> &format_attr_rdpmc.attr,

> + &format_attr_threshold.attr,
> + &format_attr_threshold_control.attr,

Given this is not supported for !CONFIG_ARM64, does it make sense to
remove them for that case, given we already take care of this in
the code using IS_ENABLED(CONFIG_ARM64) ? The nice part is that
the arm32 doesn't see something that is never usable. I understand
the maxthreshold=0 already gives out the hint.

Rest looks fine to me.

> NULL,
> };
>
> @@ -365,10 +385,38 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
>
> static DEVICE_ATTR_RO(bus_width);
>
> +static u32 threshold_max(struct arm_pmu *cpu_pmu)
> +{
> + /*
> + * PMMIR.WIDTH is readable and non-zero on aarch32, but it would be
> + * impossible to write the threshold in the upper 32 bits of PMEVTYPER.
> + */
> + if (IS_ENABLED(CONFIG_ARM))
> + return 0;
> +
> + /*
> + * The largest value that can be written to PMEVTYPER<n>_EL0.TH is
> + * (2 ^ PMMIR.THWIDTH) - 1.
> + */
> + return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1;
> +}
> +
> +static ssize_t threshold_max_show(struct device *dev,
> + struct device_attribute *attr, char *page)
> +{
> + struct pmu *pmu = dev_get_drvdata(dev);
> + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
> +
> + return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
> +}
> +
> +static DEVICE_ATTR_RO(threshold_max);
> +
> static struct attribute *armv8_pmuv3_caps_attrs[] = {
> &dev_attr_slots.attr,
> &dev_attr_bus_slots.attr,
> &dev_attr_bus_width.attr,
> + &dev_attr_threshold_max.attr,

Similarly here and could avoid the IS_ENABLED(CONFIG_ARM) above.

Suzuki


> NULL,
> };
>
> @@ -552,7 +600,7 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value)
> armv8pmu_write_hw_counter(event, value);
> }
>
> -static inline void armv8pmu_write_evtype(int idx, u32 val)
> +static inline void armv8pmu_write_evtype(int idx, unsigned long val)
> {
> u32 counter = ARMV8_IDX_TO_COUNTER(idx);
>
> @@ -914,6 +962,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
> struct perf_event_attr *attr)
> {
> unsigned long config_base = 0;
> + struct perf_event *perf_event = container_of(attr, struct perf_event,
> + attr);
> + struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
> + u32 th, th_max;
>
> if (attr->exclude_idle)
> return -EPERM;
> @@ -945,6 +997,19 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
> if (attr->exclude_user)
> config_base |= ARMV8_PMU_EXCLUDE_EL0;
>
> + /*
> + * Insert event counting threshold (FEAT_PMUv3_TH) values. If
> + * FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will be
> + * 0 and no values will be written.
> + */
> + th_max = threshold_max(cpu_pmu);
> + if (IS_ENABLED(CONFIG_ARM64) && th_max) {
> + th = min(armv8pmu_event_threshold(attr), th_max);
> + config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
> + config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
> + armv8pmu_event_threshold_control(attr));
> + }
> +
> /*
> * Install the filter into config_base as this is used to
> * construct the event type.
> diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
> index ec3a01502e7c..753f8dbd9d10 100644
> --- a/include/linux/perf/arm_pmuv3.h
> +++ b/include/linux/perf/arm_pmuv3.h
> @@ -255,6 +255,7 @@
> #define ARMV8_PMU_BUS_SLOTS_MASK 0xff
> #define ARMV8_PMU_BUS_WIDTH_SHIFT 16
> #define ARMV8_PMU_BUS_WIDTH_MASK 0xf
> +#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
>
> /*
> * This code is really good

2023-10-10 16:09:19

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v2 2/3] arm64: perf: Add support for event counting threshold



On 10/10/2023 16:00, Suzuki K Poulose wrote:
> On 10/10/2023 11:40, James Clark wrote:
>> FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on
>> events whose count meets a specified threshold condition. For example if
>> PMEVTYPERn.TC (Threshold Control) is set to 0b101 (Greater than or
>> equal, count), and the threshold is set to 2, then the PMU counter will
>> now only increment by 1 when an event would have previously incremented
>> the PMU counter by 2 or more on a single processor cycle.
>>
>> Two new Perf event config fields, 'threshold' and 'threshold_control'
>> have been added for controlling the feature:
>>
>>    $ perf stat -e stall_slot/threshold=2,threshold_control=5/
>>
>> A new capability for reading out the maximum supported threshold value
>> has also been added:
>>
>>    $ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max
>>
>>    0x000000ff
>>
>> If a threshold higher than threshold_max is provided, then no error is
>> generated but the threshold is clamped to the max value. If
>> FEAT_PMUv3_TH isn't implemented or a 32 bit kernel is running, then
>> threshold_max reads zero, and neither the 'threshold' nor
>> 'threshold_control' parameters will be used.
>>
>> The threshold is per PMU counter, and there are potentially different
>> threshold_max values per PMU type on heterogeneous systems.
>>
>> Bits higher than 32 now need to be written into PMEVTYPER, so
>> armv8pmu_write_evtype() has to be updated to take an unsigned long value
>> rather than u32 which gives the correct behavior on both aarch32 and 64.
>>
>> Signed-off-by: James Clark <[email protected]>
>> ---
>>   drivers/perf/arm_pmuv3.c       | 67 +++++++++++++++++++++++++++++++++-
>>   include/linux/perf/arm_pmuv3.h |  1 +
>>   2 files changed, 67 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
>> index 8fcaa26f0f8a..6d669b16a2bc 100644
>> --- a/drivers/perf/arm_pmuv3.c
>> +++ b/drivers/perf/arm_pmuv3.c
>> @@ -15,6 +15,7 @@
>>   #include <clocksource/arm_arch_timer.h>
>>     #include <linux/acpi.h>
>> +#include <linux/bitfield.h>
>>   #include <linux/clocksource.h>
>>   #include <linux/of.h>
>>   #include <linux/perf/arm_pmu.h>
>> @@ -294,9 +295,16 @@ static const struct attribute_group
>> armv8_pmuv3_events_attr_group = {
>>       .is_visible = armv8pmu_event_attr_is_visible,
>>   };
>>   +#define TH_LO    2
>> +#define TH_HI    13
>> +#define TH_CNTL_LO    14
>> +#define TH_CNTL_HI    16
>> +
>>   PMU_FORMAT_ATTR(event, "config:0-15");
>>   PMU_FORMAT_ATTR(long, "config1:0");
>>   PMU_FORMAT_ATTR(rdpmc, "config1:1");
>> +PMU_FORMAT_ATTR(threshold, "config1:" __stringify(TH_LO) "-"
>> __stringify(TH_HI));
>> +PMU_FORMAT_ATTR(threshold_control, "config1:" __stringify(TH_CNTL_LO)
>> "-" __stringify(TH_CNTL_HI));
>
>
>>     static int sysctl_perf_user_access __read_mostly;
>>   @@ -310,10 +318,22 @@ static inline bool
>> armv8pmu_event_want_user_access(struct perf_event *event)
>>       return event->attr.config1 & 0x2;
>>   }
>>   +static inline u32 armv8pmu_event_threshold(struct perf_event_attr
>> *attr)
>> +{
>> +    return FIELD_GET(GENMASK(TH_HI, TH_LO), attr->config1);
>> +}
>> +
>> +static inline u8 armv8pmu_event_threshold_control(struct
>> perf_event_attr *attr)
>> +{
>> +    return FIELD_GET(GENMASK(TH_CNTL_HI, TH_CNTL_LO), attr->config1);
>> +}
>> +
>>   static struct attribute *armv8_pmuv3_format_attrs[] = {
>>       &format_attr_event.attr,
>>       &format_attr_long.attr,
>>       &format_attr_rdpmc.attr,
>
>> +    &format_attr_threshold.attr,
>> +    &format_attr_threshold_control.attr,
>
> Given this is not supported for !CONFIG_ARM64, does it make sense to
> remove them for that case, given we already take care of this in
> the code using IS_ENABLED(CONFIG_ARM64) ? The nice part is that
> the arm32 doesn't see something that is never usable. I understand
> the maxthreshold=0 already gives out the hint.

I thought about it, but wouldn't it just move the IS_ENABLED somewhere
else? I think it's messier on the kernel side to conditionally show or
hide these files.

From the userspace side, they already have to handle both cases of the
missing file (old kernels) and a zero value. So I can't really see a
difference that makes it worthwhile to change.

Unless there is some kind of precedent to hiding sysfs files for
unavailable features? I mean maybe in that case you would also say to
hide the files if max width was 0 even on aarch64? If we don't want to
show things that aren't usable?

One other note that may not be obvious: the IS_ENABLED(CONFIG_ARM64) is
only needed to make the build succeed by not shifting things above 32
bits. Because the compiler couldn't see that threshold_max() always
returns 0 on aarch32. If it could then only one IS_ENABLED(CONFIG_ARM)
would be needed in threshold_max(). Although that's not really related
to showing or hiding the files.

>
> Rest looks fine to me.
>
>>       NULL,
>>   };
>>   @@ -365,10 +385,38 @@ static ssize_t bus_width_show(struct device
>> *dev, struct device_attribute *attr,
>>     static DEVICE_ATTR_RO(bus_width);
>>   +static u32 threshold_max(struct arm_pmu *cpu_pmu)
>> +{
>> +    /*
>> +     * PMMIR.WIDTH is readable and non-zero on aarch32, but it would be
>> +     * impossible to write the threshold in the upper 32 bits of
>> PMEVTYPER.
>> +     */
>> +    if (IS_ENABLED(CONFIG_ARM))
>> +        return 0;
>> +
>> +    /*
>> +     * The largest value that can be written to PMEVTYPER<n>_EL0.TH is
>> +     * (2 ^ PMMIR.THWIDTH) - 1.
>> +     */
>> +    return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1;
>> +}
>> +
>> +static ssize_t threshold_max_show(struct device *dev,
>> +                  struct device_attribute *attr, char *page)
>> +{
>> +    struct pmu *pmu = dev_get_drvdata(dev);
>> +    struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
>> +
>> +    return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
>> +}
>> +
>> +static DEVICE_ATTR_RO(threshold_max);
>> +
>>   static struct attribute *armv8_pmuv3_caps_attrs[] = {
>>       &dev_attr_slots.attr,
>>       &dev_attr_bus_slots.attr,
>>       &dev_attr_bus_width.attr,
>> +    &dev_attr_threshold_max.attr,
>
> Similarly here and could avoid the IS_ENABLED(CONFIG_ARM) above.

I don't think it can be avoided entirely, just moved somewhere else.

>
> Suzuki
>
>
>>       NULL,
>>   };
>>   @@ -552,7 +600,7 @@ static void armv8pmu_write_counter(struct
>> perf_event *event, u64 value)
>>           armv8pmu_write_hw_counter(event, value);
>>   }
>>   -static inline void armv8pmu_write_evtype(int idx, u32 val)
>> +static inline void armv8pmu_write_evtype(int idx, unsigned long val)
>>   {
>>       u32 counter = ARMV8_IDX_TO_COUNTER(idx);
>>   @@ -914,6 +962,10 @@ static int armv8pmu_set_event_filter(struct
>> hw_perf_event *event,
>>                        struct perf_event_attr *attr)
>>   {
>>       unsigned long config_base = 0;
>> +    struct perf_event *perf_event = container_of(attr, struct
>> perf_event,
>> +                             attr);
>> +    struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
>> +    u32 th, th_max;
>>         if (attr->exclude_idle)
>>           return -EPERM;
>> @@ -945,6 +997,19 @@ static int armv8pmu_set_event_filter(struct
>> hw_perf_event *event,
>>       if (attr->exclude_user)
>>           config_base |= ARMV8_PMU_EXCLUDE_EL0;
>>   +    /*
>> +     * Insert event counting threshold (FEAT_PMUv3_TH) values. If
>> +     * FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max)
>> will be
>> +     * 0 and no values will be written.
>> +     */
>> +    th_max = threshold_max(cpu_pmu);
>> +    if (IS_ENABLED(CONFIG_ARM64) && th_max) {
>> +        th = min(armv8pmu_event_threshold(attr), th_max);
>> +        config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
>> +        config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
>> +                      armv8pmu_event_threshold_control(attr));
>> +    }
>> +
>>       /*
>>        * Install the filter into config_base as this is used to
>>        * construct the event type.
>> diff --git a/include/linux/perf/arm_pmuv3.h
>> b/include/linux/perf/arm_pmuv3.h
>> index ec3a01502e7c..753f8dbd9d10 100644
>> --- a/include/linux/perf/arm_pmuv3.h
>> +++ b/include/linux/perf/arm_pmuv3.h
>> @@ -255,6 +255,7 @@
>>   #define ARMV8_PMU_BUS_SLOTS_MASK 0xff
>>   #define ARMV8_PMU_BUS_WIDTH_SHIFT 16
>>   #define ARMV8_PMU_BUS_WIDTH_MASK 0xf
>> +#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
>>     /*
>>    * This code is really good
>