2023-06-13 10:15:17

by Ravi Bangoria

[permalink] [raw]
Subject: [PATCH 4/4] perf mem amd: Scan all PMUs instead of just core ones

Scanning only core PMUs is not sufficient on AMD since perf mem on
AMD uses IBS OP PMU, which is independent of core PMU.

Signed-off-by: Ravi Bangoria <[email protected]>
---
tools/perf/arch/x86/util/mem-events.c | 5 +++++
tools/perf/util/mem-events.c | 16 ++++++++++++----
tools/perf/util/mem-events.h | 1 +
3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
index a8a782bcb121..43af872e89a6 100644
--- a/tools/perf/arch/x86/util/mem-events.c
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -91,3 +91,8 @@ char *perf_mem_events__name(int i, char *pmu_name)

return (char *)e->name;
}
+
+bool perf_mem_events__via_core_pmus(void)
+{
+ return !x86__is_amd_cpu();
+}
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index be15aadb6b14..0c04f883d634 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -109,6 +109,14 @@ static bool perf_mem_event__supported(const char *mnt, char *sysfs_name)
return !stat(path, &st);
}

+bool __weak perf_mem_events__via_core_pmus(void)
+{
+ return true;
+}
+
+#define perf_mem_scan_next_pmu(pmu) \
+ (perf_mem_events__via_core_pmus() ? perf_pmus__scan_core(pmu) : perf_pmus__scan(pmu))
+
int perf_mem_events__init(void)
{
const char *mnt = sysfs__mount();
@@ -130,7 +138,7 @@ int perf_mem_events__init(void)
if (!e->tag)
continue;

- while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ while ((pmu = perf_mem_scan_next_pmu(pmu)) != NULL) {
scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name);
e->supported |= perf_mem_event__supported(mnt, sysfs_name);
}
@@ -165,7 +173,7 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
char sysfs_name[100];
struct perf_pmu *pmu = NULL;

- while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ while ((pmu = perf_mem_scan_next_pmu(pmu)) != NULL) {
scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
pmu->name);
if (!perf_mem_event__supported(mnt, sysfs_name)) {
@@ -188,7 +196,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
if (!e->record)
continue;

- if (perf_pmus__num_core_pmus() == 1) {
+ if (perf_pmus__num_mem_pmus() == 1) {
if (!e->supported) {
pr_err("failed: event '%s' not supported\n",
perf_mem_events__name(j, NULL));
@@ -203,7 +211,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
return -1;
}

- while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ while ((pmu = perf_mem_scan_next_pmu(pmu)) != NULL) {
rec_argv[i++] = "-e";
s = perf_mem_events__name(j, pmu->name);
if (s) {
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 12372309d60e..d650eb311113 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -36,6 +36,7 @@ enum {
extern unsigned int perf_mem_events__loads_ldlat;

int perf_mem_events__parse(const char *str);
+bool perf_mem_events__via_core_pmus(void);
int perf_mem_events__init(void);

char *perf_mem_events__name(int i, char *pmu_name);
--
2.40.1



2023-06-13 15:50:25

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH 4/4] perf mem amd: Scan all PMUs instead of just core ones

On Tue, Jun 13, 2023 at 2:56 AM Ravi Bangoria <[email protected]> wrote:
>
> Scanning only core PMUs is not sufficient on AMD since perf mem on
> AMD uses IBS OP PMU, which is independent of core PMU.
>
> Signed-off-by: Ravi Bangoria <[email protected]>
> ---
> tools/perf/arch/x86/util/mem-events.c | 5 +++++
> tools/perf/util/mem-events.c | 16 ++++++++++++----
> tools/perf/util/mem-events.h | 1 +
> 3 files changed, 18 insertions(+), 4 deletions(-)
>
> diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
> index a8a782bcb121..43af872e89a6 100644
> --- a/tools/perf/arch/x86/util/mem-events.c
> +++ b/tools/perf/arch/x86/util/mem-events.c
> @@ -91,3 +91,8 @@ char *perf_mem_events__name(int i, char *pmu_name)
>
> return (char *)e->name;
> }
> +
> +bool perf_mem_events__via_core_pmus(void)
> +{
> + return !x86__is_amd_cpu();
> +}
> diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
> index be15aadb6b14..0c04f883d634 100644
> --- a/tools/perf/util/mem-events.c
> +++ b/tools/perf/util/mem-events.c
> @@ -109,6 +109,14 @@ static bool perf_mem_event__supported(const char *mnt, char *sysfs_name)
> return !stat(path, &st);
> }
>
> +bool __weak perf_mem_events__via_core_pmus(void)
> +{
> + return true;
> +}
> +
> +#define perf_mem_scan_next_pmu(pmu) \
> + (perf_mem_events__via_core_pmus() ? perf_pmus__scan_core(pmu) : perf_pmus__scan(pmu))
> +
> int perf_mem_events__init(void)
> {
> const char *mnt = sysfs__mount();
> @@ -130,7 +138,7 @@ int perf_mem_events__init(void)
> if (!e->tag)
> continue;
>
> - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
> + while ((pmu = perf_mem_scan_next_pmu(pmu)) != NULL) {
> scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name);
> e->supported |= perf_mem_event__supported(mnt, sysfs_name);
> }
> @@ -165,7 +173,7 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
> char sysfs_name[100];
> struct perf_pmu *pmu = NULL;
>
> - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
> + while ((pmu = perf_mem_scan_next_pmu(pmu)) != NULL) {

It was my mistake to optimize this, I think we can just go back to:
perf_pmus__scan(pmu)
which would remove a lot of the weak/macros etc. here. We can have a
comment as to why this is scan not scan_core, because of AMD. I plan
to further improve overhead of PMUs so I'm not worried about losing
the small performance win from this.

Thanks,
Ian

> scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
> pmu->name);
> if (!perf_mem_event__supported(mnt, sysfs_name)) {
> @@ -188,7 +196,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
> if (!e->record)
> continue;
>
> - if (perf_pmus__num_core_pmus() == 1) {
> + if (perf_pmus__num_mem_pmus() == 1) {
> if (!e->supported) {
> pr_err("failed: event '%s' not supported\n",
> perf_mem_events__name(j, NULL));
> @@ -203,7 +211,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
> return -1;
> }
>
> - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
> + while ((pmu = perf_mem_scan_next_pmu(pmu)) != NULL) {
> rec_argv[i++] = "-e";
> s = perf_mem_events__name(j, pmu->name);
> if (s) {
> diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
> index 12372309d60e..d650eb311113 100644
> --- a/tools/perf/util/mem-events.h
> +++ b/tools/perf/util/mem-events.h
> @@ -36,6 +36,7 @@ enum {
> extern unsigned int perf_mem_events__loads_ldlat;
>
> int perf_mem_events__parse(const char *str);
> +bool perf_mem_events__via_core_pmus(void);
> int perf_mem_events__init(void);
>
> char *perf_mem_events__name(int i, char *pmu_name);
> --
> 2.40.1
>

2023-06-14 04:43:22

by Ravi Bangoria

[permalink] [raw]
Subject: Re: [PATCH 4/4] perf mem amd: Scan all PMUs instead of just core ones

>> @@ -165,7 +173,7 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
>> char sysfs_name[100];
>> struct perf_pmu *pmu = NULL;
>>
>> - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
>> + while ((pmu = perf_mem_scan_next_pmu(pmu)) != NULL) {
>
> It was my mistake to optimize this,

Not really. I mean, there was already a bug which just got exacerbated.

> I think we can just go back to:
> perf_pmus__scan(pmu)
> which would remove a lot of the weak/macros etc. here. We can have a
> comment as to why this is scan not scan_core, because of AMD. I plan
> to further improve overhead of PMUs so I'm not worried about losing
> the small performance win from this.

Sure. Let me do that.

Thanks,
Ravi