2022-05-19 12:45:21

by Sandipan Das

[permalink] [raw]
Subject: [PATCH 0/5] perf/x86/amd: Add PerfMonV2 DF ehnancements

Add support for using AMD Performance Monitoring Version 2
(PerfMonV2) features for Data Fabric (DF) events on Zen 4
processors. Utilize new CPUID leaf to detect PerfMonV2 and
determine the number of available PMCs. Also introduce the
new event encoding format and RDPMC mappings for accessing
additional DF counters.

E.g. larger metric groups which require more than 4 counters
could not be counted even on a Zen 4 platform that supports
16 DF counters. Only partial counting is possible by using
"--metric-no-group".

$ sudo perf stat -M nps1_die_to_dram true

Before:

Performance counter stats for 'system wide':

<not counted> dram_channel_data_controller_4 (0.00%)
<not counted> dram_channel_data_controller_1 (0.00%)
<not counted> dram_channel_data_controller_6 (0.00%)
<not counted> dram_channel_data_controller_3 (0.00%)
<not counted> dram_channel_data_controller_0 (0.00%)
<not counted> dram_channel_data_controller_5 (0.00%)
<not counted> dram_channel_data_controller_2 (0.00%)
<not counted> dram_channel_data_controller_7 (0.00%)
886817 ns duration_time

0.000886817 seconds time elapsed

After:

Performance counter stats for 'system wide':

0 dram_channel_data_controller_4 # 0.2 MiB nps1_die_to_dram
0 dram_channel_data_controller_1
0 dram_channel_data_controller_6
2838 dram_channel_data_controller_3
0 dram_channel_data_controller_0
0 dram_channel_data_controller_5
0 dram_channel_data_controller_2
0 dram_channel_data_controller_7
896438 ns duration_time

0.000896438 seconds time elapsed

Sandipan Das (5):
perf/x86/amd/uncore: Use dynamic events array
perf/x86/amd/uncore: Use attr_update for format attributes
perf/x86/amd/uncore: Detect available DF counters
perf/x86/amd/uncore: Add PerfMonV2 DF event format
perf/x86/amd/uncore: Add PerfMonV2 RDPMC assignments

arch/x86/events/amd/uncore.c | 146 ++++++++++++++++++++++++------
arch/x86/include/asm/perf_event.h | 16 ++++
2 files changed, 136 insertions(+), 26 deletions(-)

--
2.34.1



2022-05-19 14:03:02

by Sandipan Das

[permalink] [raw]
Subject: [PATCH 4/5] perf/x86/amd/uncore: Add PerfMonV2 DF event format

If AMD Performance Monitoring Version 2 (PerfMonV2) is
supported, use bits 0-7, 32-37 as EventSelect and bits
8-15, 24-27 as UnitMask for Data Fabric (DF) events.

Signed-off-by: Sandipan Das <[email protected]>
---
arch/x86/events/amd/uncore.c | 24 +++++++++++++++++-------
arch/x86/include/asm/perf_event.h | 13 +++++++++++++
2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 233dd405dd06..ff4238eff087 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -209,10 +209,14 @@ static int amd_uncore_event_init(struct perf_event *event)
{
struct amd_uncore *uncore;
struct hw_perf_event *hwc = &event->hw;
+ u64 event_mask = AMD64_RAW_EVENT_MASK_NB;

if (event->attr.type != event->pmu->type)
return -ENOENT;

+ if (pmu_version >= 2 && is_nb_event(event))
+ event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB;
+
/*
* NB and Last level cache counters (MSRs) are shared across all cores
* that share the same NB / Last level cache. On family 16h and below,
@@ -221,7 +225,7 @@ static int amd_uncore_event_init(struct perf_event *event)
* out. So we do not support sampling and per-thread events via
* CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
*/
- hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
+ hwc->config = event->attr.config & event_mask;
hwc->idx = -1;

if (event->cpu < 0)
@@ -300,8 +304,10 @@ static struct device_attribute format_attr_##_var = \

DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
+DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */
DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */
DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
@@ -313,14 +319,14 @@ DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
/* Common DF and NB attributes */
static struct attribute *amd_uncore_df_format_attr[] = {
&format_attr_event12.attr, /* event */
- &format_attr_umask.attr, /* umask */
+ &format_attr_umask8.attr, /* umask */
NULL,
};

/* Common L2 and L3 attributes */
static struct attribute *amd_uncore_l3_format_attr[] = {
&format_attr_event12.attr, /* event */
- &format_attr_umask.attr, /* umask */
+ &format_attr_umask8.attr, /* umask */
NULL, /* threadmask */
NULL,
};
@@ -659,8 +665,12 @@ static int __init amd_uncore_init(void)
}

if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
- if (boot_cpu_data.x86 >= 0x17)
+ if (pmu_version >= 2) {
+ *df_attr++ = &format_attr_event14v2.attr;
+ *df_attr++ = &format_attr_umask12.attr;
+ } else if (boot_cpu_data.x86 >= 0x17) {
*df_attr = &format_attr_event14.attr;
+ }

amd_uncore_nb = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_nb) {
@@ -686,11 +696,11 @@ static int __init amd_uncore_init(void)
if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
if (boot_cpu_data.x86 >= 0x19) {
*l3_attr++ = &format_attr_event8.attr;
- *l3_attr++ = &format_attr_umask.attr;
+ *l3_attr++ = &format_attr_umask8.attr;
*l3_attr++ = &format_attr_threadmask2.attr;
} else if (boot_cpu_data.x86 >= 0x17) {
*l3_attr++ = &format_attr_event8.attr;
- *l3_attr++ = &format_attr_umask.attr;
+ *l3_attr++ = &format_attr_umask8.attr;
*l3_attr++ = &format_attr_threadmask8.attr;
}

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index af157aa74f4e..34348ae41cdb 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -89,6 +89,19 @@
#define AMD64_RAW_EVENT_MASK_NB \
(AMD64_EVENTSEL_EVENT | \
ARCH_PERFMON_EVENTSEL_UMASK)
+
+#define AMD64_PERFMON_V2_EVENTSEL_EVENT_NB \
+ (AMD64_EVENTSEL_EVENT | \
+ GENMASK_ULL(37, 36))
+
+#define AMD64_PERFMON_V2_EVENTSEL_UMASK_NB \
+ (ARCH_PERFMON_EVENTSEL_UMASK | \
+ GENMASK_ULL(27, 24))
+
+#define AMD64_PERFMON_V2_RAW_EVENT_MASK_NB \
+ (AMD64_PERFMON_V2_EVENTSEL_EVENT_NB | \
+ AMD64_PERFMON_V2_EVENTSEL_UMASK_NB)
+
#define AMD64_NUM_COUNTERS 4
#define AMD64_NUM_COUNTERS_CORE 6
#define AMD64_NUM_COUNTERS_NB 4
--
2.34.1


2022-05-19 14:25:13

by Sandipan Das

[permalink] [raw]
Subject: [PATCH 5/5] perf/x86/amd/uncore: Add PerfMonV2 RDPMC assignments

The current RDPMC assignment scheme maps four DF PMCs and
six L3 PMCs from index 6 to 15.

If AMD Performance Monitoring Version 2 (PerfMonV2) is
supported, there may be additional DF counters available
which are mapped starting from index 16 i.e. just after
the L3 counters. Update the RDPMC assignments accordingly.

Signed-off-by: Sandipan Das <[email protected]>
---
arch/x86/events/amd/uncore.c | 10 ++++++++++
1 file changed, 10 insertions(+)

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index ff4238eff087..d568afc705d2 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -158,6 +158,16 @@ static int amd_uncore_add(struct perf_event *event, int flags)
hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;

+ /*
+ * The first four DF counters are accessible via RDPMC index 6 to 9
+ * followed by the L3 counters from index 10 to 15. For processors
+ * with more than four DF counters, the DF RDPMC assignments become
+ * discontiguous as the additional counters are accessible starting
+ * from index 16.
+ */
+ if (is_nb_event(event) && hwc->idx >= NUM_COUNTERS_NB)
+ hwc->event_base_rdpmc += NUM_COUNTERS_L3;
+
if (flags & PERF_EF_START)
amd_uncore_start(event, PERF_EF_RELOAD);

--
2.34.1


2022-05-19 15:34:19

by Sandipan Das

[permalink] [raw]
Subject: [PATCH 3/5] perf/x86/amd/uncore: Detect available DF counters

If AMD Performance Monitoring Version 2 (PerfMonV2) is
supported, use CPUID leaf 0x80000022 EBX to detect the
number of Data Fabric (DF) PMCs. This offers more
flexibility if the counts change in later processor
families.

Signed-off-by: Sandipan Das <[email protected]>
---
arch/x86/events/amd/uncore.c | 10 ++++++++++
arch/x86/include/asm/perf_event.h | 3 +++
2 files changed, 13 insertions(+)

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index c3e218dccf6e..233dd405dd06 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -30,6 +30,7 @@
#undef pr_fmt
#define pr_fmt(fmt) "amd_uncore: " fmt

+static int pmu_version;
static int num_counters_llc;
static int num_counters_nb;
static bool l3_mask;
@@ -629,6 +630,7 @@ static int __init amd_uncore_init(void)
{
struct attribute **df_attr = amd_uncore_df_format_attr;
struct attribute **l3_attr = amd_uncore_l3_format_attr;
+ union cpuid_0x80000022_ebx ebx;
int ret = -ENODEV;

if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
@@ -638,6 +640,9 @@ static int __init amd_uncore_init(void)
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
return -ENODEV;

+ if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
+ pmu_version = 2;
+
num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L2;
if (boot_cpu_data.x86 >= 0x17) {
@@ -666,6 +671,11 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_nb;

+ if (pmu_version >= 2) {
+ ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
+ num_counters_nb = ebx.split.num_df_pmc;
+ }
+
pr_info("%d %s %s counters detected\n", num_counters_nb,
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
amd_nb_pmu.name);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 409725e86f42..af157aa74f4e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -194,6 +194,9 @@ union cpuid_0x80000022_ebx {
struct {
/* Number of Core Performance Counters */
unsigned int num_core_pmc:4;
+ unsigned int reserved:6;
+ /* Number of Data Fabric Counters */
+ unsigned int num_df_pmc:6;
} split;
unsigned int full;
};
--
2.34.1


2022-05-20 21:39:35

by Sandipan Das

[permalink] [raw]
Subject: [PATCH 2/5] perf/x86/amd/uncore: Use attr_update for format attributes

Use the update_attrs attribute group introduced by commit
f3a3a8257e5a ("perf/core: Add attr_groups_update into struct
pmu") and the is_visible() callback to populate the family
specifc attributes for uncore events.

The changes apply to attributes that are unique to families
such as slicemask for Family 17h and coreid for Family 19h.
The addition of common attributes such as event and umask,
whose formats change across families, remain unchanged.

Signed-off-by: Sandipan Das <[email protected]>
---
arch/x86/events/amd/uncore.c | 68 ++++++++++++++++++++++++++++--------
1 file changed, 54 insertions(+), 14 deletions(-)

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 8dfcf93711ab..c3e218dccf6e 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -246,6 +246,19 @@ static int amd_uncore_event_init(struct perf_event *event)
return 0;
}

+static umode_t
+amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
+ attr->mode : 0;
+}
+
+static umode_t
+amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
+}
+
static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -296,20 +309,33 @@ DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3
DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */

+/* Common DF and NB attributes */
static struct attribute *amd_uncore_df_format_attr[] = {
- &format_attr_event12.attr, /* event14 if F17h+ */
- &format_attr_umask.attr,
+ &format_attr_event12.attr, /* event */
+ &format_attr_umask.attr, /* umask */
NULL,
};

+/* Common L2 and L3 attributes */
static struct attribute *amd_uncore_l3_format_attr[] = {
- &format_attr_event12.attr, /* event8 if F17h+ */
- &format_attr_umask.attr,
- NULL, /* slicemask if F17h, coreid if F19h */
- NULL, /* threadmask8 if F17h, enallslices if F19h */
- NULL, /* enallcores if F19h */
- NULL, /* sliceid if F19h */
- NULL, /* threadmask2 if F19h */
+ &format_attr_event12.attr, /* event */
+ &format_attr_umask.attr, /* umask */
+ NULL, /* threadmask */
+ NULL,
+};
+
+/* F17h unique L3 attributes */
+static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
+ &format_attr_slicemask.attr, /* slicemask */
+ NULL,
+};
+
+/* F19h unique L3 attributes */
+static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
+ &format_attr_coreid.attr, /* coreid */
+ &format_attr_enallslices.attr, /* enallslices */
+ &format_attr_enallcores.attr, /* enallcores */
+ &format_attr_sliceid.attr, /* sliceid */
NULL,
};

@@ -323,6 +349,18 @@ static struct attribute_group amd_uncore_l3_format_group = {
.attrs = amd_uncore_l3_format_attr,
};

+static struct attribute_group amd_f17h_uncore_l3_format_group = {
+ .name = "format",
+ .attrs = amd_f17h_uncore_l3_format_attr,
+ .is_visible = amd_f17h_uncore_is_visible,
+};
+
+static struct attribute_group amd_f19h_uncore_l3_format_group = {
+ .name = "format",
+ .attrs = amd_f19h_uncore_l3_format_attr,
+ .is_visible = amd_f19h_uncore_is_visible,
+};
+
static const struct attribute_group *amd_uncore_df_attr_groups[] = {
&amd_uncore_attr_group,
&amd_uncore_df_format_group,
@@ -335,6 +373,12 @@ static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
NULL,
};

+static const struct attribute_group *amd_uncore_l3_attr_update[] = {
+ &amd_f17h_uncore_l3_format_group,
+ &amd_f19h_uncore_l3_format_group,
+ NULL,
+};
+
static struct pmu amd_nb_pmu = {
.task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_df_attr_groups,
@@ -352,6 +396,7 @@ static struct pmu amd_nb_pmu = {
static struct pmu amd_llc_pmu = {
.task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_l3_attr_groups,
+ .attr_update = amd_uncore_l3_attr_update,
.name = "amd_l2",
.event_init = amd_uncore_event_init,
.add = amd_uncore_add,
@@ -632,15 +677,10 @@ static int __init amd_uncore_init(void)
if (boot_cpu_data.x86 >= 0x19) {
*l3_attr++ = &format_attr_event8.attr;
*l3_attr++ = &format_attr_umask.attr;
- *l3_attr++ = &format_attr_coreid.attr;
- *l3_attr++ = &format_attr_enallslices.attr;
- *l3_attr++ = &format_attr_enallcores.attr;
- *l3_attr++ = &format_attr_sliceid.attr;
*l3_attr++ = &format_attr_threadmask2.attr;
} else if (boot_cpu_data.x86 >= 0x17) {
*l3_attr++ = &format_attr_event8.attr;
*l3_attr++ = &format_attr_umask.attr;
- *l3_attr++ = &format_attr_slicemask.attr;
*l3_attr++ = &format_attr_threadmask8.attr;
}

--
2.34.1