2023-09-25 16:19:12

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1] perf pmus: Make PMU alias name loading lazy

PMU alias names were computed when the first perf_pmu is created,
scanning all PMUs in event sources for a file called alias that
generally doesn't exist. Switch to trying to load the file when all
PMU related files are loaded in lookup. This would cause a PMU name
lookup of an alias name to fail if no PMUs were loaded, so in that
case all PMUs are loaded and the find repeated. The overhead is
similar but in the (very) general case not all PMUs are scanned for
the alias file.

As the overhead occurs once per invocation it doesn't show in perf
bench internals pmu-scan. On a tigerlake machine, the number of openat
system calls for an event of cpu/cycles/ with perf stat reduces from
94 to 69 (ie 25 fewer openat calls).

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/x86/util/pmu.c | 139 ---------------------------------
tools/perf/util/pmu.c | 39 ++++-----
tools/perf/util/pmu.h | 2 -
tools/perf/util/pmus.c | 10 +++
4 files changed, 31 insertions(+), 159 deletions(-)

diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index f428cffb0378..8b53ca468a50 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -17,15 +17,6 @@
#include "../../../util/pmus.h"
#include "env.h"

-struct pmu_alias {
- char *name;
- char *alias;
- struct list_head list;
-};
-
-static LIST_HEAD(pmu_alias_name_list);
-static bool cached_list;
-
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
{
#ifdef HAVE_AUXTRACE_SUPPORT
@@ -41,136 +32,6 @@ struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __mayb
return NULL;
}

-static void pmu_alias__delete(struct pmu_alias *pmu_alias)
-{
- if (!pmu_alias)
- return;
-
- zfree(&pmu_alias->name);
- zfree(&pmu_alias->alias);
- free(pmu_alias);
-}
-
-static struct pmu_alias *pmu_alias__new(char *name, char *alias)
-{
- struct pmu_alias *pmu_alias = zalloc(sizeof(*pmu_alias));
-
- if (pmu_alias) {
- pmu_alias->name = strdup(name);
- if (!pmu_alias->name)
- goto out_delete;
-
- pmu_alias->alias = strdup(alias);
- if (!pmu_alias->alias)
- goto out_delete;
- }
- return pmu_alias;
-
-out_delete:
- pmu_alias__delete(pmu_alias);
- return NULL;
-}
-
-static int setup_pmu_alias_list(void)
-{
- int fd, dirfd;
- DIR *dir;
- struct dirent *dent;
- struct pmu_alias *pmu_alias;
- char buf[MAX_PMU_NAME_LEN];
- FILE *file;
- int ret = -ENOMEM;
-
- dirfd = perf_pmu__event_source_devices_fd();
- if (dirfd < 0)
- return -1;
-
- dir = fdopendir(dirfd);
- if (!dir)
- return -errno;
-
- while ((dent = readdir(dir))) {
- if (!strcmp(dent->d_name, ".") ||
- !strcmp(dent->d_name, ".."))
- continue;
-
- fd = perf_pmu__pathname_fd(dirfd, dent->d_name, "alias", O_RDONLY);
- if (fd < 0)
- continue;
-
- file = fdopen(fd, "r");
- if (!file)
- continue;
-
- if (!fgets(buf, sizeof(buf), file)) {
- fclose(file);
- continue;
- }
-
- fclose(file);
-
- /* Remove the last '\n' */
- buf[strlen(buf) - 1] = 0;
-
- pmu_alias = pmu_alias__new(dent->d_name, buf);
- if (!pmu_alias)
- goto close_dir;
-
- list_add_tail(&pmu_alias->list, &pmu_alias_name_list);
- }
-
- ret = 0;
-
-close_dir:
- closedir(dir);
- return ret;
-}
-
-static const char *__pmu_find_real_name(const char *name)
-{
- struct pmu_alias *pmu_alias;
-
- list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) {
- if (!strcmp(name, pmu_alias->alias))
- return pmu_alias->name;
- }
-
- return name;
-}
-
-const char *pmu_find_real_name(const char *name)
-{
- if (cached_list)
- return __pmu_find_real_name(name);
-
- setup_pmu_alias_list();
- cached_list = true;
-
- return __pmu_find_real_name(name);
-}
-
-static const char *__pmu_find_alias_name(const char *name)
-{
- struct pmu_alias *pmu_alias;
-
- list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) {
- if (!strcmp(name, pmu_alias->name))
- return pmu_alias->alias;
- }
- return NULL;
-}
-
-const char *pmu_find_alias_name(const char *name)
-{
- if (cached_list)
- return __pmu_find_alias_name(name);
-
- setup_pmu_alias_list();
- cached_list = true;
-
- return __pmu_find_alias_name(name);
-}
-
int perf_pmus__num_mem_pmus(void)
{
/* AMD uses IBS OP pmu and not a core PMU for perf mem/c2c */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 0d81c059c91c..0f5c6ed257a8 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -937,16 +937,27 @@ perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
return NULL;
}

-const char * __weak
-pmu_find_real_name(const char *name)
+static char *pmu_find_alias_name(struct perf_pmu *pmu, int dirfd)
{
- return name;
-}
+ FILE *file = perf_pmu__open_file_at(pmu, dirfd, "alias");
+ char *line = NULL;
+ size_t line_len = 0;
+ ssize_t ret;

-const char * __weak
-pmu_find_alias_name(const char *name __maybe_unused)
-{
- return NULL;
+ if (!file)
+ return NULL;
+
+ ret = getline(&line, &line_len, file);
+ if (ret < 0) {
+ fclose(file);
+ return NULL;
+ }
+ /* Remove trailing newline. */
+ if (ret > 0 && line[ret - 1] == '\n')
+ line[--ret] = '\0';
+
+ fclose(file);
+ return line;
}

static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
@@ -957,12 +968,10 @@ static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
return max_precise;
}

-struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name)
+struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name)
{
struct perf_pmu *pmu;
__u32 type;
- const char *name = pmu_find_real_name(lookup_name);
- const char *alias_name;

pmu = zalloc(sizeof(*pmu));
if (!pmu)
@@ -995,18 +1004,12 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char
pmu->is_core = is_pmu_core(name);
pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core);

- alias_name = pmu_find_alias_name(name);
- if (alias_name) {
- pmu->alias_name = strdup(alias_name);
- if (!pmu->alias_name)
- goto err;
- }
-
pmu->type = type;
pmu->is_uncore = pmu_is_uncore(dirfd, name);
if (pmu->is_uncore)
pmu->id = pmu_id(name);
pmu->max_precise = pmu_max_precise(dirfd, pmu);
+ pmu->alias_name = pmu_find_alias_name(pmu, dirfd);
pmu->events_table = perf_pmu__find_events_table(pmu);
pmu_add_sys_aliases(pmu);
list_add_tail(&pmu->list, pmus);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 04b317b17d66..bc807729a7cd 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -251,8 +251,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);

int perf_pmu__match(const char *pattern, const char *name, const char *tok);

-const char *pmu_find_real_name(const char *name);
-const char *pmu_find_alias_name(const char *name);
double perf_pmu__cpu_slots_per_cycle(void);
int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size);
int perf_pmu__pathname_scnprintf(char *buf, size_t size,
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index 64e798e68a2d..ce4931461741 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -37,6 +37,8 @@ static LIST_HEAD(other_pmus);
static bool read_sysfs_core_pmus;
static bool read_sysfs_all_pmus;

+static void pmu_read_sysfs(bool core_only);
+
int pmu_name_len_no_suffix(const char *str, unsigned long *num)
{
int orig_len, len;
@@ -124,6 +126,14 @@ struct perf_pmu *perf_pmus__find(const char *name)
pmu = perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name);
close(dirfd);

+ if (!pmu) {
+ /*
+ * Looking up an inidividual PMU failed. This may mean name is
+ * an alias, so read the PMUs from sysfs and try to find again.
+ */
+ pmu_read_sysfs(core_pmu);
+ pmu = pmu_find(name);
+ }
return pmu;
}

--
2.42.0.515.g380fc7ccd1-goog


2023-09-28 05:12:16

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v1] perf pmus: Make PMU alias name loading lazy

Hi Ian,

On Sun, Sep 24, 2023 at 11:24 PM Ian Rogers <[email protected]> wrote:
>
> PMU alias names were computed when the first perf_pmu is created,
> scanning all PMUs in event sources for a file called alias that
> generally doesn't exist. Switch to trying to load the file when all
> PMU related files are loaded in lookup. This would cause a PMU name
> lookup of an alias name to fail if no PMUs were loaded, so in that
> case all PMUs are loaded and the find repeated. The overhead is
> similar but in the (very) general case not all PMUs are scanned for
> the alias file.
>
> As the overhead occurs once per invocation it doesn't show in perf
> bench internals pmu-scan. On a tigerlake machine, the number of openat
> system calls for an event of cpu/cycles/ with perf stat reduces from
> 94 to 69 (ie 25 fewer openat calls).

I think the pmu-scan bench could show the difference as it
calls perf_pmu__destroy() in the loop body. So every call to
perf_pmu__scan() should start from nothing, right?

>
> Signed-off-by: Ian Rogers <[email protected]>

Maybe we can load event aliases and formats lazily later.
Anyway, it looks good to me.

Acked-by: Namhyung Kim <[email protected]>

Thanks,
Namhyung


> ---
> tools/perf/arch/x86/util/pmu.c | 139 ---------------------------------
> tools/perf/util/pmu.c | 39 ++++-----
> tools/perf/util/pmu.h | 2 -
> tools/perf/util/pmus.c | 10 +++
> 4 files changed, 31 insertions(+), 159 deletions(-)
>
> diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> index f428cffb0378..8b53ca468a50 100644
> --- a/tools/perf/arch/x86/util/pmu.c
> +++ b/tools/perf/arch/x86/util/pmu.c
> @@ -17,15 +17,6 @@
> #include "../../../util/pmus.h"
> #include "env.h"
>
> -struct pmu_alias {
> - char *name;
> - char *alias;
> - struct list_head list;
> -};
> -
> -static LIST_HEAD(pmu_alias_name_list);
> -static bool cached_list;
> -
> struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> {
> #ifdef HAVE_AUXTRACE_SUPPORT
> @@ -41,136 +32,6 @@ struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __mayb
> return NULL;
> }
>
> -static void pmu_alias__delete(struct pmu_alias *pmu_alias)
> -{
> - if (!pmu_alias)
> - return;
> -
> - zfree(&pmu_alias->name);
> - zfree(&pmu_alias->alias);
> - free(pmu_alias);
> -}
> -
> -static struct pmu_alias *pmu_alias__new(char *name, char *alias)
> -{
> - struct pmu_alias *pmu_alias = zalloc(sizeof(*pmu_alias));
> -
> - if (pmu_alias) {
> - pmu_alias->name = strdup(name);
> - if (!pmu_alias->name)
> - goto out_delete;
> -
> - pmu_alias->alias = strdup(alias);
> - if (!pmu_alias->alias)
> - goto out_delete;
> - }
> - return pmu_alias;
> -
> -out_delete:
> - pmu_alias__delete(pmu_alias);
> - return NULL;
> -}
> -
> -static int setup_pmu_alias_list(void)
> -{
> - int fd, dirfd;
> - DIR *dir;
> - struct dirent *dent;
> - struct pmu_alias *pmu_alias;
> - char buf[MAX_PMU_NAME_LEN];
> - FILE *file;
> - int ret = -ENOMEM;
> -
> - dirfd = perf_pmu__event_source_devices_fd();
> - if (dirfd < 0)
> - return -1;
> -
> - dir = fdopendir(dirfd);
> - if (!dir)
> - return -errno;
> -
> - while ((dent = readdir(dir))) {
> - if (!strcmp(dent->d_name, ".") ||
> - !strcmp(dent->d_name, ".."))
> - continue;
> -
> - fd = perf_pmu__pathname_fd(dirfd, dent->d_name, "alias", O_RDONLY);
> - if (fd < 0)
> - continue;
> -
> - file = fdopen(fd, "r");
> - if (!file)
> - continue;
> -
> - if (!fgets(buf, sizeof(buf), file)) {
> - fclose(file);
> - continue;
> - }
> -
> - fclose(file);
> -
> - /* Remove the last '\n' */
> - buf[strlen(buf) - 1] = 0;
> -
> - pmu_alias = pmu_alias__new(dent->d_name, buf);
> - if (!pmu_alias)
> - goto close_dir;
> -
> - list_add_tail(&pmu_alias->list, &pmu_alias_name_list);
> - }
> -
> - ret = 0;
> -
> -close_dir:
> - closedir(dir);
> - return ret;
> -}
> -
> -static const char *__pmu_find_real_name(const char *name)
> -{
> - struct pmu_alias *pmu_alias;
> -
> - list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) {
> - if (!strcmp(name, pmu_alias->alias))
> - return pmu_alias->name;
> - }
> -
> - return name;
> -}
> -
> -const char *pmu_find_real_name(const char *name)
> -{
> - if (cached_list)
> - return __pmu_find_real_name(name);
> -
> - setup_pmu_alias_list();
> - cached_list = true;
> -
> - return __pmu_find_real_name(name);
> -}
> -
> -static const char *__pmu_find_alias_name(const char *name)
> -{
> - struct pmu_alias *pmu_alias;
> -
> - list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) {
> - if (!strcmp(name, pmu_alias->name))
> - return pmu_alias->alias;
> - }
> - return NULL;
> -}
> -
> -const char *pmu_find_alias_name(const char *name)
> -{
> - if (cached_list)
> - return __pmu_find_alias_name(name);
> -
> - setup_pmu_alias_list();
> - cached_list = true;
> -
> - return __pmu_find_alias_name(name);
> -}
> -
> int perf_pmus__num_mem_pmus(void)
> {
> /* AMD uses IBS OP pmu and not a core PMU for perf mem/c2c */
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index 0d81c059c91c..0f5c6ed257a8 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -937,16 +937,27 @@ perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> return NULL;
> }
>
> -const char * __weak
> -pmu_find_real_name(const char *name)
> +static char *pmu_find_alias_name(struct perf_pmu *pmu, int dirfd)
> {
> - return name;
> -}
> + FILE *file = perf_pmu__open_file_at(pmu, dirfd, "alias");
> + char *line = NULL;
> + size_t line_len = 0;
> + ssize_t ret;
>
> -const char * __weak
> -pmu_find_alias_name(const char *name __maybe_unused)
> -{
> - return NULL;
> + if (!file)
> + return NULL;
> +
> + ret = getline(&line, &line_len, file);
> + if (ret < 0) {
> + fclose(file);
> + return NULL;
> + }
> + /* Remove trailing newline. */
> + if (ret > 0 && line[ret - 1] == '\n')
> + line[--ret] = '\0';
> +
> + fclose(file);
> + return line;
> }
>
> static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
> @@ -957,12 +968,10 @@ static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
> return max_precise;
> }
>
> -struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name)
> +struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name)
> {
> struct perf_pmu *pmu;
> __u32 type;
> - const char *name = pmu_find_real_name(lookup_name);
> - const char *alias_name;
>
> pmu = zalloc(sizeof(*pmu));
> if (!pmu)
> @@ -995,18 +1004,12 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char
> pmu->is_core = is_pmu_core(name);
> pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core);
>
> - alias_name = pmu_find_alias_name(name);
> - if (alias_name) {
> - pmu->alias_name = strdup(alias_name);
> - if (!pmu->alias_name)
> - goto err;
> - }
> -
> pmu->type = type;
> pmu->is_uncore = pmu_is_uncore(dirfd, name);
> if (pmu->is_uncore)
> pmu->id = pmu_id(name);
> pmu->max_precise = pmu_max_precise(dirfd, pmu);
> + pmu->alias_name = pmu_find_alias_name(pmu, dirfd);
> pmu->events_table = perf_pmu__find_events_table(pmu);
> pmu_add_sys_aliases(pmu);
> list_add_tail(&pmu->list, pmus);
> diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
> index 04b317b17d66..bc807729a7cd 100644
> --- a/tools/perf/util/pmu.h
> +++ b/tools/perf/util/pmu.h
> @@ -251,8 +251,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
>
> int perf_pmu__match(const char *pattern, const char *name, const char *tok);
>
> -const char *pmu_find_real_name(const char *name);
> -const char *pmu_find_alias_name(const char *name);
> double perf_pmu__cpu_slots_per_cycle(void);
> int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size);
> int perf_pmu__pathname_scnprintf(char *buf, size_t size,
> diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
> index 64e798e68a2d..ce4931461741 100644
> --- a/tools/perf/util/pmus.c
> +++ b/tools/perf/util/pmus.c
> @@ -37,6 +37,8 @@ static LIST_HEAD(other_pmus);
> static bool read_sysfs_core_pmus;
> static bool read_sysfs_all_pmus;
>
> +static void pmu_read_sysfs(bool core_only);
> +
> int pmu_name_len_no_suffix(const char *str, unsigned long *num)
> {
> int orig_len, len;
> @@ -124,6 +126,14 @@ struct perf_pmu *perf_pmus__find(const char *name)
> pmu = perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name);
> close(dirfd);
>
> + if (!pmu) {
> + /*
> + * Looking up an inidividual PMU failed. This may mean name is
> + * an alias, so read the PMUs from sysfs and try to find again.
> + */
> + pmu_read_sysfs(core_pmu);
> + pmu = pmu_find(name);
> + }
> return pmu;
> }
>
> --
> 2.42.0.515.g380fc7ccd1-goog
>

2023-09-28 06:23:21

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH v1] perf pmus: Make PMU alias name loading lazy

On Wed, Sep 27, 2023 at 10:00 PM Namhyung Kim <[email protected]> wrote:
>
> Hi Ian,
>
> On Sun, Sep 24, 2023 at 11:24 PM Ian Rogers <[email protected]> wrote:
> >
> > PMU alias names were computed when the first perf_pmu is created,
> > scanning all PMUs in event sources for a file called alias that
> > generally doesn't exist. Switch to trying to load the file when all
> > PMU related files are loaded in lookup. This would cause a PMU name
> > lookup of an alias name to fail if no PMUs were loaded, so in that
> > case all PMUs are loaded and the find repeated. The overhead is
> > similar but in the (very) general case not all PMUs are scanned for
> > the alias file.
> >
> > As the overhead occurs once per invocation it doesn't show in perf
> > bench internals pmu-scan. On a tigerlake machine, the number of openat
> > system calls for an event of cpu/cycles/ with perf stat reduces from
> > 94 to 69 (ie 25 fewer openat calls).
>
> I think the pmu-scan bench could show the difference as it
> calls perf_pmu__destroy() in the loop body. So every call to
> perf_pmu__scan() should start from nothing, right?

The PMU alias name list was funny. It is/was maintained in the x86
specific PMU code and the destroy didn't clear the list. This change
adds an openat to loading a PMU for the alias, so pmu-scan shows a
very small slow down. However, in the more normal cases we're reducing
the number of openats by 25%.

Thanks,
Ian

> >
> > Signed-off-by: Ian Rogers <[email protected]>
>
> Maybe we can load event aliases and formats lazily later.
> Anyway, it looks good to me.
>
> Acked-by: Namhyung Kim <[email protected]>
>
> Thanks,
> Namhyung
>
>
> > ---
> > tools/perf/arch/x86/util/pmu.c | 139 ---------------------------------
> > tools/perf/util/pmu.c | 39 ++++-----
> > tools/perf/util/pmu.h | 2 -
> > tools/perf/util/pmus.c | 10 +++
> > 4 files changed, 31 insertions(+), 159 deletions(-)
> >
> > diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> > index f428cffb0378..8b53ca468a50 100644
> > --- a/tools/perf/arch/x86/util/pmu.c
> > +++ b/tools/perf/arch/x86/util/pmu.c
> > @@ -17,15 +17,6 @@
> > #include "../../../util/pmus.h"
> > #include "env.h"
> >
> > -struct pmu_alias {
> > - char *name;
> > - char *alias;
> > - struct list_head list;
> > -};
> > -
> > -static LIST_HEAD(pmu_alias_name_list);
> > -static bool cached_list;
> > -
> > struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> > {
> > #ifdef HAVE_AUXTRACE_SUPPORT
> > @@ -41,136 +32,6 @@ struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __mayb
> > return NULL;
> > }
> >
> > -static void pmu_alias__delete(struct pmu_alias *pmu_alias)
> > -{
> > - if (!pmu_alias)
> > - return;
> > -
> > - zfree(&pmu_alias->name);
> > - zfree(&pmu_alias->alias);
> > - free(pmu_alias);
> > -}
> > -
> > -static struct pmu_alias *pmu_alias__new(char *name, char *alias)
> > -{
> > - struct pmu_alias *pmu_alias = zalloc(sizeof(*pmu_alias));
> > -
> > - if (pmu_alias) {
> > - pmu_alias->name = strdup(name);
> > - if (!pmu_alias->name)
> > - goto out_delete;
> > -
> > - pmu_alias->alias = strdup(alias);
> > - if (!pmu_alias->alias)
> > - goto out_delete;
> > - }
> > - return pmu_alias;
> > -
> > -out_delete:
> > - pmu_alias__delete(pmu_alias);
> > - return NULL;
> > -}
> > -
> > -static int setup_pmu_alias_list(void)
> > -{
> > - int fd, dirfd;
> > - DIR *dir;
> > - struct dirent *dent;
> > - struct pmu_alias *pmu_alias;
> > - char buf[MAX_PMU_NAME_LEN];
> > - FILE *file;
> > - int ret = -ENOMEM;
> > -
> > - dirfd = perf_pmu__event_source_devices_fd();
> > - if (dirfd < 0)
> > - return -1;
> > -
> > - dir = fdopendir(dirfd);
> > - if (!dir)
> > - return -errno;
> > -
> > - while ((dent = readdir(dir))) {
> > - if (!strcmp(dent->d_name, ".") ||
> > - !strcmp(dent->d_name, ".."))
> > - continue;
> > -
> > - fd = perf_pmu__pathname_fd(dirfd, dent->d_name, "alias", O_RDONLY);
> > - if (fd < 0)
> > - continue;
> > -
> > - file = fdopen(fd, "r");
> > - if (!file)
> > - continue;
> > -
> > - if (!fgets(buf, sizeof(buf), file)) {
> > - fclose(file);
> > - continue;
> > - }
> > -
> > - fclose(file);
> > -
> > - /* Remove the last '\n' */
> > - buf[strlen(buf) - 1] = 0;
> > -
> > - pmu_alias = pmu_alias__new(dent->d_name, buf);
> > - if (!pmu_alias)
> > - goto close_dir;
> > -
> > - list_add_tail(&pmu_alias->list, &pmu_alias_name_list);
> > - }
> > -
> > - ret = 0;
> > -
> > -close_dir:
> > - closedir(dir);
> > - return ret;
> > -}
> > -
> > -static const char *__pmu_find_real_name(const char *name)
> > -{
> > - struct pmu_alias *pmu_alias;
> > -
> > - list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) {
> > - if (!strcmp(name, pmu_alias->alias))
> > - return pmu_alias->name;
> > - }
> > -
> > - return name;
> > -}
> > -
> > -const char *pmu_find_real_name(const char *name)
> > -{
> > - if (cached_list)
> > - return __pmu_find_real_name(name);
> > -
> > - setup_pmu_alias_list();
> > - cached_list = true;
> > -
> > - return __pmu_find_real_name(name);
> > -}
> > -
> > -static const char *__pmu_find_alias_name(const char *name)
> > -{
> > - struct pmu_alias *pmu_alias;
> > -
> > - list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) {
> > - if (!strcmp(name, pmu_alias->name))
> > - return pmu_alias->alias;
> > - }
> > - return NULL;
> > -}
> > -
> > -const char *pmu_find_alias_name(const char *name)
> > -{
> > - if (cached_list)
> > - return __pmu_find_alias_name(name);
> > -
> > - setup_pmu_alias_list();
> > - cached_list = true;
> > -
> > - return __pmu_find_alias_name(name);
> > -}
> > -
> > int perf_pmus__num_mem_pmus(void)
> > {
> > /* AMD uses IBS OP pmu and not a core PMU for perf mem/c2c */
> > diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> > index 0d81c059c91c..0f5c6ed257a8 100644
> > --- a/tools/perf/util/pmu.c
> > +++ b/tools/perf/util/pmu.c
> > @@ -937,16 +937,27 @@ perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> > return NULL;
> > }
> >
> > -const char * __weak
> > -pmu_find_real_name(const char *name)
> > +static char *pmu_find_alias_name(struct perf_pmu *pmu, int dirfd)
> > {
> > - return name;
> > -}
> > + FILE *file = perf_pmu__open_file_at(pmu, dirfd, "alias");
> > + char *line = NULL;
> > + size_t line_len = 0;
> > + ssize_t ret;
> >
> > -const char * __weak
> > -pmu_find_alias_name(const char *name __maybe_unused)
> > -{
> > - return NULL;
> > + if (!file)
> > + return NULL;
> > +
> > + ret = getline(&line, &line_len, file);
> > + if (ret < 0) {
> > + fclose(file);
> > + return NULL;
> > + }
> > + /* Remove trailing newline. */
> > + if (ret > 0 && line[ret - 1] == '\n')
> > + line[--ret] = '\0';
> > +
> > + fclose(file);
> > + return line;
> > }
> >
> > static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
> > @@ -957,12 +968,10 @@ static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
> > return max_precise;
> > }
> >
> > -struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name)
> > +struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name)
> > {
> > struct perf_pmu *pmu;
> > __u32 type;
> > - const char *name = pmu_find_real_name(lookup_name);
> > - const char *alias_name;
> >
> > pmu = zalloc(sizeof(*pmu));
> > if (!pmu)
> > @@ -995,18 +1004,12 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char
> > pmu->is_core = is_pmu_core(name);
> > pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core);
> >
> > - alias_name = pmu_find_alias_name(name);
> > - if (alias_name) {
> > - pmu->alias_name = strdup(alias_name);
> > - if (!pmu->alias_name)
> > - goto err;
> > - }
> > -
> > pmu->type = type;
> > pmu->is_uncore = pmu_is_uncore(dirfd, name);
> > if (pmu->is_uncore)
> > pmu->id = pmu_id(name);
> > pmu->max_precise = pmu_max_precise(dirfd, pmu);
> > + pmu->alias_name = pmu_find_alias_name(pmu, dirfd);
> > pmu->events_table = perf_pmu__find_events_table(pmu);
> > pmu_add_sys_aliases(pmu);
> > list_add_tail(&pmu->list, pmus);
> > diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
> > index 04b317b17d66..bc807729a7cd 100644
> > --- a/tools/perf/util/pmu.h
> > +++ b/tools/perf/util/pmu.h
> > @@ -251,8 +251,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
> >
> > int perf_pmu__match(const char *pattern, const char *name, const char *tok);
> >
> > -const char *pmu_find_real_name(const char *name);
> > -const char *pmu_find_alias_name(const char *name);
> > double perf_pmu__cpu_slots_per_cycle(void);
> > int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size);
> > int perf_pmu__pathname_scnprintf(char *buf, size_t size,
> > diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
> > index 64e798e68a2d..ce4931461741 100644
> > --- a/tools/perf/util/pmus.c
> > +++ b/tools/perf/util/pmus.c
> > @@ -37,6 +37,8 @@ static LIST_HEAD(other_pmus);
> > static bool read_sysfs_core_pmus;
> > static bool read_sysfs_all_pmus;
> >
> > +static void pmu_read_sysfs(bool core_only);
> > +
> > int pmu_name_len_no_suffix(const char *str, unsigned long *num)
> > {
> > int orig_len, len;
> > @@ -124,6 +126,14 @@ struct perf_pmu *perf_pmus__find(const char *name)
> > pmu = perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name);
> > close(dirfd);
> >
> > + if (!pmu) {
> > + /*
> > + * Looking up an inidividual PMU failed. This may mean name is
> > + * an alias, so read the PMUs from sysfs and try to find again.
> > + */
> > + pmu_read_sysfs(core_pmu);
> > + pmu = pmu_find(name);
> > + }
> > return pmu;
> > }
> >
> > --
> > 2.42.0.515.g380fc7ccd1-goog
> >

2023-10-03 04:11:15

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v1] perf pmus: Make PMU alias name loading lazy

On Wed, Sep 27, 2023 at 10:19 PM Ian Rogers <[email protected]> wrote:
>
> On Wed, Sep 27, 2023 at 10:00 PM Namhyung Kim <[email protected]> wrote:
> >
> > Hi Ian,
> >
> > On Sun, Sep 24, 2023 at 11:24 PM Ian Rogers <[email protected]> wrote:
> > >
> > > PMU alias names were computed when the first perf_pmu is created,
> > > scanning all PMUs in event sources for a file called alias that
> > > generally doesn't exist. Switch to trying to load the file when all
> > > PMU related files are loaded in lookup. This would cause a PMU name
> > > lookup of an alias name to fail if no PMUs were loaded, so in that
> > > case all PMUs are loaded and the find repeated. The overhead is
> > > similar but in the (very) general case not all PMUs are scanned for
> > > the alias file.
> > >
> > > As the overhead occurs once per invocation it doesn't show in perf
> > > bench internals pmu-scan. On a tigerlake machine, the number of openat
> > > system calls for an event of cpu/cycles/ with perf stat reduces from
> > > 94 to 69 (ie 25 fewer openat calls).
> >
> > I think the pmu-scan bench could show the difference as it
> > calls perf_pmu__destroy() in the loop body. So every call to
> > perf_pmu__scan() should start from nothing, right?
>
> The PMU alias name list was funny. It is/was maintained in the x86
> specific PMU code and the destroy didn't clear the list. This change
> adds an openat to loading a PMU for the alias, so pmu-scan shows a
> very small slow down. However, in the more normal cases we're reducing
> the number of openats by 25%.

I think that's ok. Applied to perf-tools-next, thanks!