Performance improvements to pmu scanning by holding onto the
event/metric tables for a cpuid (avoid regular expression comparisons)
and by lazily computing the default perf_event_attr for a PMU.
Before
% Running 'internals/pmu-scan' benchmark:
Computing performance of sysfs PMU event scan for 100 times
Average core PMU scanning took: 251.990 usec (+- 4.009 usec)
Average PMU scanning took: 3222.460 usec (+- 211.234 usec)
% Running 'internals/pmu-scan' benchmark:
Computing performance of sysfs PMU event scan for 100 times
Average core PMU scanning took: 260.120 usec (+- 7.905 usec)
Average PMU scanning took: 3228.995 usec (+- 211.196 usec)
% Running 'internals/pmu-scan' benchmark:
Computing performance of sysfs PMU event scan for 100 times
Average core PMU scanning took: 252.310 usec (+- 3.980 usec)
Average PMU scanning took: 3220.675 usec (+- 210.844 usec)
After:
% Running 'internals/pmu-scan' benchmark:
Computing performance of sysfs PMU event scan for 100 times
Average core PMU scanning took: 28.530 usec (+- 0.602 usec)
Average PMU scanning took: 275.725 usec (+- 18.253 usec)
% Running 'internals/pmu-scan' benchmark:
Computing performance of sysfs PMU event scan for 100 times
Average core PMU scanning took: 28.720 usec (+- 0.446 usec)
Average PMU scanning took: 271.015 usec (+- 18.762 usec)
% Running 'internals/pmu-scan' benchmark:
Computing performance of sysfs PMU event scan for 100 times
Average core PMU scanning took: 31.040 usec (+- 0.612 usec)
Average PMU scanning took: 267.340 usec (+- 17.209 usec)
Measuring the pmu-scan benchmark on a Tigerlake laptop: core PMU
scanning is reduced to 11.5% of the previous execution time, all PMU
scanning is reduced to 8.4% of the previous execution time. There is a
4.3% reduction in openat system calls.
Ian Rogers (7):
perf pmu: Rename perf_pmu__get_default_config to perf_pmu__arch_init
perf intel-pt: Move PMU initialization from default config code
perf arm-spe: Move PMU initialization from default config code
perf pmu: Const-ify file APIs
perf pmu: Const-ify perf_pmu__config_terms
perf pmu-events: Remember the events and metrics table
perf pmu: Lazily compute default config
tools/perf/arch/arm/util/cs-etm.c | 13 ++------
tools/perf/arch/arm/util/pmu.c | 10 +++---
tools/perf/arch/arm64/util/arm-spe.c | 48 +++++++++++++---------------
tools/perf/arch/s390/util/pmu.c | 3 +-
tools/perf/arch/x86/util/intel-pt.c | 27 +++++++---------
tools/perf/arch/x86/util/pmu.c | 6 ++--
tools/perf/pmu-events/jevents.py | 48 ++++++++++++++++------------
tools/perf/util/arm-spe.h | 4 ++-
tools/perf/util/cs-etm.h | 2 +-
tools/perf/util/intel-pt.h | 3 +-
tools/perf/util/parse-events.c | 12 +++----
tools/perf/util/pmu.c | 39 +++++++++++-----------
tools/perf/util/pmu.h | 18 ++++++-----
tools/perf/util/python.c | 2 +-
14 files changed, 117 insertions(+), 118 deletions(-)
--
2.42.0.609.gbb76f46606-goog
Avoid setting PMU values in intel_pt_pmu_default_config, move to
perf_pmu__arch_init.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/x86/util/intel-pt.c | 2 --
tools/perf/arch/x86/util/pmu.c | 1 +
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index b923bca939d9..6d6cd8f9133c 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -267,8 +267,6 @@ intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
attr->config = intel_pt_default_config(intel_pt_pmu);
- intel_pt_pmu->selectable = true;
-
return attr;
}
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 811e2377d2d5..949b3e2c67bd 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -22,6 +22,7 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
pmu->auxtrace = true;
+ pmu->selectable = true;
pmu->default_config = intel_pt_pmu_default_config(pmu);
}
if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
--
2.42.0.609.gbb76f46606-goog
Assign default_config as part of the
init. perf_pmu__get_default_config was doing more than just getting
the default config and so this is intended to better align with the
code.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/arm/util/pmu.c | 8 +++-----
tools/perf/arch/s390/util/pmu.c | 3 +--
tools/perf/arch/x86/util/pmu.c | 5 ++---
tools/perf/util/pmu.c | 14 +++++++-------
tools/perf/util/pmu.h | 2 +-
5 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
index a9623b128ece..d55d2b15f2e6 100644
--- a/tools/perf/arch/arm/util/pmu.c
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -14,22 +14,20 @@
#include "../../../util/pmu.h"
#include "../../../util/cs-etm.h"
-struct perf_event_attr
-*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
{
#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
/* add ETM default config here */
pmu->selectable = true;
- return cs_etm_get_default_config(pmu);
+ pmu->default_config = cs_etm_get_default_config(pmu);
#if defined(__aarch64__)
} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
- return arm_spe_pmu_default_config(pmu);
+ pmu->default_config = arm_spe_pmu_default_config(pmu);
} else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
pmu->selectable = true;
#endif
}
#endif
- return NULL;
}
diff --git a/tools/perf/arch/s390/util/pmu.c b/tools/perf/arch/s390/util/pmu.c
index 11f03f32e3fd..886c30e001fa 100644
--- a/tools/perf/arch/s390/util/pmu.c
+++ b/tools/perf/arch/s390/util/pmu.c
@@ -13,11 +13,10 @@
#define S390_PMUPAI_EXT "pai_ext"
#define S390_PMUCPUM_CF "cpum_cf"
-struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu)
+void perf_pmu__arch_init(struct perf_pmu *pmu)
{
if (!strcmp(pmu->name, S390_PMUPAI_CRYPTO) ||
!strcmp(pmu->name, S390_PMUPAI_EXT) ||
!strcmp(pmu->name, S390_PMUCPUM_CF))
pmu->selectable = true;
- return NULL;
}
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 8b53ca468a50..811e2377d2d5 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -17,19 +17,18 @@
#include "../../../util/pmus.h"
#include "env.h"
-struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
{
#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
pmu->auxtrace = true;
- return intel_pt_pmu_default_config(pmu);
+ pmu->default_config = intel_pt_pmu_default_config(pmu);
}
if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
pmu->auxtrace = true;
pmu->selectable = true;
}
#endif
- return NULL;
}
int perf_pmus__num_mem_pmus(void)
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 6b1b7f8f00fa..6e95b3d2c2e3 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -954,12 +954,6 @@ void pmu_add_sys_aliases(struct perf_pmu *pmu)
pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, pmu);
}
-struct perf_event_attr * __weak
-perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
-{
- return NULL;
-}
-
static char *pmu_find_alias_name(struct perf_pmu *pmu, int dirfd)
{
FILE *file = perf_pmu__open_file_at(pmu, dirfd, "alias");
@@ -991,6 +985,12 @@ static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
return max_precise;
}
+
+void __weak
+perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
+{
+}
+
struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name)
{
struct perf_pmu *pmu;
@@ -1037,7 +1037,7 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char
pmu_add_sys_aliases(pmu);
list_add_tail(&pmu->list, pmus);
- pmu->default_config = perf_pmu__get_default_config(pmu);
+ perf_pmu__arch_init(pmu);
return pmu;
err:
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 85190d058852..588c64e38d6b 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -233,7 +233,7 @@ bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name);
int perf_pmu__test(void);
-struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
+void perf_pmu__arch_init(struct perf_pmu *pmu);
void pmu_add_cpu_aliases_table(struct perf_pmu *pmu,
const struct pmu_events_table *table);
--
2.42.0.609.gbb76f46606-goog
File APIs don't alter the struct pmu so allow const ones to be passed.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/pmu.c | 12 ++++++------
tools/perf/util/pmu.h | 11 ++++++-----
tools/perf/util/python.c | 2 +-
3 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 6e95b3d2c2e3..e11901c923d7 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -575,7 +575,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
return 0;
}
-static inline bool pmu_alias_info_file(char *name)
+static inline bool pmu_alias_info_file(const char *name)
{
size_t len;
@@ -1771,7 +1771,7 @@ bool perf_pmu__is_software(const struct perf_pmu *pmu)
return !strcmp(pmu->name, "kprobe") || !strcmp(pmu->name, "uprobe");
}
-FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
+FILE *perf_pmu__open_file(const struct perf_pmu *pmu, const char *name)
{
char path[PATH_MAX];
@@ -1782,7 +1782,7 @@ FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
return fopen(path, "r");
}
-FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name)
+FILE *perf_pmu__open_file_at(const struct perf_pmu *pmu, int dirfd, const char *name)
{
int fd;
@@ -1793,7 +1793,7 @@ FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name)
return fdopen(fd, "r");
}
-int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
+int perf_pmu__scan_file(const struct perf_pmu *pmu, const char *name, const char *fmt,
...)
{
va_list args;
@@ -1810,7 +1810,7 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
return ret;
}
-int perf_pmu__scan_file_at(struct perf_pmu *pmu, int dirfd, const char *name,
+int perf_pmu__scan_file_at(const struct perf_pmu *pmu, int dirfd, const char *name,
const char *fmt, ...)
{
va_list args;
@@ -1827,7 +1827,7 @@ int perf_pmu__scan_file_at(struct perf_pmu *pmu, int dirfd, const char *name,
return ret;
}
-bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name)
+bool perf_pmu__file_exists(const struct perf_pmu *pmu, const char *name)
{
char path[PATH_MAX];
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 588c64e38d6b..24af7297b522 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -222,14 +222,15 @@ bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name);
*/
bool perf_pmu__is_software(const struct perf_pmu *pmu);
-FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name);
-FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name);
+FILE *perf_pmu__open_file(const struct perf_pmu *pmu, const char *name);
+FILE *perf_pmu__open_file_at(const struct perf_pmu *pmu, int dirfd, const char *name);
-int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4);
-int perf_pmu__scan_file_at(struct perf_pmu *pmu, int dirfd, const char *name,
+int perf_pmu__scan_file(const struct perf_pmu *pmu, const char *name, const char *fmt, ...)
+ __scanf(3, 4);
+int perf_pmu__scan_file_at(const struct perf_pmu *pmu, int dirfd, const char *name,
const char *fmt, ...) __scanf(4, 5);
-bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name);
+bool perf_pmu__file_exists(const struct perf_pmu *pmu, const char *name);
int perf_pmu__test(void);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index c29f5f0bb552..8761f51b5c7c 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -98,7 +98,7 @@ struct perf_pmu *evsel__find_pmu(const struct evsel *evsel __maybe_unused)
return NULL;
}
-int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...)
+int perf_pmu__scan_file(const struct perf_pmu *pmu, const char *name, const char *fmt, ...)
{
return EOF;
}
--
2.42.0.609.gbb76f46606-goog
strcmp_cpuid_str performs regular expression comparisons. Avoid
repeated computation of the table by remembering the table in a
static.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/pmu-events/jevents.py | 48 +++++++++++++++++++-------------
1 file changed, 28 insertions(+), 20 deletions(-)
diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index fd009752b427..8d8d5088c53c 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -978,28 +978,32 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
{
- const struct pmu_events_table *table = NULL;
- char *cpuid = perf_pmu__getcpuid(pmu);
+ static const struct pmu_events_table *table;
size_t i;
- /* on some platforms which uses cpus map, cpuid can be NULL for
- * PMUs other than CORE PMUs.
- */
- if (!cpuid)
- return NULL;
-
- i = 0;
- for (;;) {
- const struct pmu_events_map *map = &pmu_events_map[i++];
- if (!map->arch)
- break;
-
- if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
- table = &map->event_table;
- break;
+ if (!table) {
+ char *cpuid = perf_pmu__getcpuid(pmu);
+
+ /*
+ * On some platforms which uses cpus map, cpuid can be NULL for
+ * PMUs other than CORE PMUs.
+ */
+ if (!cpuid)
+ return NULL;
+
+ i = 0;
+ for (;;) {
+ const struct pmu_events_map *map = &pmu_events_map[i++];
+ if (!map->arch)
+ break;
+
+ if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
+ table = &map->event_table;
+ break;
+ }
}
+ free(cpuid);
}
- free(cpuid);
if (!pmu)
return table;
@@ -1015,13 +1019,17 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
{
- const struct pmu_metrics_table *table = NULL;
- char *cpuid = perf_pmu__getcpuid(pmu);
+ static const struct pmu_metrics_table *table;
+ char *cpuid;
int i;
+ if (table)
+ return table;
+
/* on some platforms which uses cpus map, cpuid can be NULL for
* PMUs other than CORE PMUs.
*/
+ cpuid = perf_pmu__getcpuid(pmu);
if (!cpuid)
return NULL;
--
2.42.0.609.gbb76f46606-goog
Avoid setting PMU values in arm_spe_pmu_default_config, move to
perf_pmu__arch_init.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/arm/util/pmu.c | 2 ++
tools/perf/arch/arm64/util/arm-spe.c | 3 ---
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
index d55d2b15f2e6..f25f68f84a94 100644
--- a/tools/perf/arch/arm/util/pmu.c
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -23,6 +23,8 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
pmu->default_config = cs_etm_get_default_config(pmu);
#if defined(__aarch64__)
} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
+ pmu->selectable = true;
+ pmu->is_uncore = false;
pmu->default_config = arm_spe_pmu_default_config(pmu);
} else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
pmu->selectable = true;
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 9cc3d6dcb849..08a76734ccd2 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -516,8 +516,5 @@ struct perf_event_attr
attr->sample_period = 4096;
}
- arm_spe_pmu->selectable = true;
- arm_spe_pmu->is_uncore = false;
-
return attr;
}
--
2.42.0.609.gbb76f46606-goog
Add const to related APIs, this is so they can be used to default
initialize a perf_event_attr from a const pmu.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/pmu.c | 10 +++++-----
tools/perf/util/pmu.h | 2 +-
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index e11901c923d7..eb17f00bd0d2 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -156,7 +156,7 @@ static void __perf_pmu_format__load(struct perf_pmu_format *format, FILE *file)
format->loaded = true;
}
-static void perf_pmu_format__load(struct perf_pmu *pmu, struct perf_pmu_format *format)
+static void perf_pmu_format__load(const struct perf_pmu *pmu, struct perf_pmu_format *format)
{
char path[PATH_MAX];
FILE *file = NULL;
@@ -1132,7 +1132,7 @@ void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel,
}
static struct perf_pmu_format *
-pmu_find_format(struct list_head *formats, const char *name)
+pmu_find_format(const struct list_head *formats, const char *name)
{
struct perf_pmu_format *format;
@@ -1230,7 +1230,7 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
return -1;
}
-static char *pmu_formats_string(struct list_head *formats)
+static char *pmu_formats_string(const struct list_head *formats)
{
struct perf_pmu_format *format;
char *str = NULL;
@@ -1256,7 +1256,7 @@ static char *pmu_formats_string(struct list_head *formats)
* Setup one of config[12] attr members based on the
* user input data - term parameter.
*/
-static int pmu_config_term(struct perf_pmu *pmu,
+static int pmu_config_term(const struct perf_pmu *pmu,
struct perf_event_attr *attr,
struct parse_events_term *term,
struct parse_events_terms *head_terms,
@@ -1379,7 +1379,7 @@ static int pmu_config_term(struct perf_pmu *pmu,
return 0;
}
-int perf_pmu__config_terms(struct perf_pmu *pmu,
+int perf_pmu__config_terms(const struct perf_pmu *pmu,
struct perf_event_attr *attr,
struct parse_events_terms *terms,
bool zero, struct parse_events_error *err)
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 24af7297b522..5a05131aa4ce 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -193,7 +193,7 @@ void pmu_add_sys_aliases(struct perf_pmu *pmu);
int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
struct parse_events_terms *head_terms,
struct parse_events_error *error);
-int perf_pmu__config_terms(struct perf_pmu *pmu,
+int perf_pmu__config_terms(const struct perf_pmu *pmu,
struct perf_event_attr *attr,
struct parse_events_terms *terms,
bool zero, struct parse_events_error *error);
--
2.42.0.609.gbb76f46606-goog
The default config is computed during creation of the PMU and may do
things like scanning sysfs, when the PMU may just be used as part of
scanning. Change default_config to perf_event_attr_init_default, a
callback that is used when a default config needs initializing. This
avoids holding onto the memory for a perf_event_attr and copying.
On a tigerlake laptop running the pmu-scan benchmark:
Before:
Running 'internals/pmu-scan' benchmark:
Computing performance of sysfs PMU event scan for 100 times
Average core PMU scanning took: 28.780 usec (+- 0.503 usec)
Average PMU scanning took: 283.480 usec (+- 18.471 usec)
Number of openat syscalls: 30,227
After:
Running 'internals/pmu-scan' benchmark:
Computing performance of sysfs PMU event scan for 100 times
Average core PMU scanning took: 27.880 usec (+- 0.169 usec)
Average PMU scanning took: 245.260 usec (+- 15.758 usec)
Number of openat syscalls: 28,914
Over 3 runs it is a nearly 12% reduction in execution time and a 4.3%
of openat calls.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/arm/util/cs-etm.c | 13 ++------
tools/perf/arch/arm/util/pmu.c | 4 +--
tools/perf/arch/arm64/util/arm-spe.c | 45 ++++++++++++++--------------
tools/perf/arch/x86/util/intel-pt.c | 25 ++++++++--------
tools/perf/arch/x86/util/pmu.c | 2 +-
tools/perf/util/arm-spe.h | 4 ++-
tools/perf/util/cs-etm.h | 2 +-
tools/perf/util/intel-pt.h | 3 +-
tools/perf/util/parse-events.c | 12 ++++----
tools/perf/util/pmu.c | 3 +-
tools/perf/util/pmu.h | 3 +-
11 files changed, 56 insertions(+), 60 deletions(-)
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index b8d6a953fd74..16bba74f048b 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -917,16 +917,9 @@ struct auxtrace_record *cs_etm_record_init(int *err)
* (CFG_CHG and evsel__set_config_if_unset()). If no default is set then user
* changes aren't tracked.
*/
-struct perf_event_attr *
-cs_etm_get_default_config(struct perf_pmu *pmu __maybe_unused)
+void
+cs_etm_get_default_config(const struct perf_pmu *pmu __maybe_unused,
+ struct perf_event_attr *attr)
{
- struct perf_event_attr *attr;
-
- attr = zalloc(sizeof(struct perf_event_attr));
- if (!attr)
- return NULL;
-
attr->sample_period = 1;
-
- return attr;
}
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
index f25f68f84a94..7f3af3b97f3b 100644
--- a/tools/perf/arch/arm/util/pmu.c
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -20,12 +20,12 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
/* add ETM default config here */
pmu->selectable = true;
- pmu->default_config = cs_etm_get_default_config(pmu);
+ pmu->perf_event_attr_init_default = cs_etm_get_default_config;
#if defined(__aarch64__)
} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
pmu->selectable = true;
pmu->is_uncore = false;
- pmu->default_config = arm_spe_pmu_default_config(pmu);
+ pmu->perf_event_attr_init_default = arm_spe_pmu_default_config;
} else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
pmu->selectable = true;
#endif
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 08a76734ccd2..e3acc739bd00 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -113,6 +113,25 @@ arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts,
}
}
+static __u64 arm_spe_pmu__sample_period(const struct perf_pmu *arm_spe_pmu)
+{
+ static __u64 sample_period;
+
+ if (sample_period)
+ return sample_period;
+
+ /*
+ * If kernel driver doesn't advertise a minimum,
+ * use max allowable by PMSIDR_EL1.INTERVAL
+ */
+ if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
+ &sample_period) != 1) {
+ pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
+ sample_period = 4096;
+ }
+ return sample_period;
+}
+
static int arm_spe_recording_options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts)
@@ -136,7 +155,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
return -EINVAL;
}
evsel->core.attr.freq = 0;
- evsel->core.attr.sample_period = arm_spe_pmu->default_config->sample_period;
+ evsel->core.attr.sample_period = arm_spe_pmu__sample_period(arm_spe_pmu);
evsel->needs_auxtrace_mmap = true;
arm_spe_evsel = evsel;
opts->full_auxtrace = true;
@@ -495,26 +514,8 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
return &sper->itr;
}
-struct perf_event_attr
-*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
+void
+arm_spe_pmu_default_config(const struct perf_pmu *arm_spe_pmu, struct perf_event_attr *attr)
{
- struct perf_event_attr *attr;
-
- attr = zalloc(sizeof(struct perf_event_attr));
- if (!attr) {
- pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
- return NULL;
- }
-
- /*
- * If kernel driver doesn't advertise a minimum,
- * use max allowable by PMSIDR_EL1.INTERVAL
- */
- if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
- &attr->sample_period) != 1) {
- pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
- attr->sample_period = 4096;
- }
-
- return attr;
+ attr->sample_period = arm_spe_pmu__sample_period(arm_spe_pmu);
}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 6d6cd8f9133c..fa0c718b9e72 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -60,7 +60,7 @@ struct intel_pt_recording {
size_t priv_size;
};
-static int intel_pt_parse_terms_with_default(struct perf_pmu *pmu,
+static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu,
const char *str,
u64 *config)
{
@@ -84,7 +84,7 @@ static int intel_pt_parse_terms_with_default(struct perf_pmu *pmu,
return err;
}
-static int intel_pt_parse_terms(struct perf_pmu *pmu, const char *str, u64 *config)
+static int intel_pt_parse_terms(const struct perf_pmu *pmu, const char *str, u64 *config)
{
*config = 0;
return intel_pt_parse_terms_with_default(pmu, str, config);
@@ -177,7 +177,7 @@ static int intel_pt_pick_bit(int bits, int target)
return pick;
}
-static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
+static u64 intel_pt_default_config(const struct perf_pmu *intel_pt_pmu)
{
char buf[256];
int mtc, mtc_periods = 0, mtc_period;
@@ -256,18 +256,17 @@ static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
return 0;
}
-struct perf_event_attr *
-intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
+void intel_pt_pmu_default_config(const struct perf_pmu *intel_pt_pmu,
+ struct perf_event_attr *attr)
{
- struct perf_event_attr *attr;
+ static u64 config;
+ static bool initialized;
- attr = zalloc(sizeof(struct perf_event_attr));
- if (!attr)
- return NULL;
-
- attr->config = intel_pt_default_config(intel_pt_pmu);
-
- return attr;
+ if (!initialized) {
+ config = intel_pt_default_config(intel_pt_pmu);
+ initialized = true;
+ }
+ attr->config = config;
}
static const char *intel_pt_find_filter(struct evlist *evlist,
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 949b3e2c67bd..469555ae9b3c 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -23,7 +23,7 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
pmu->auxtrace = true;
pmu->selectable = true;
- pmu->default_config = intel_pt_pmu_default_config(pmu);
+ pmu->perf_event_attr_init_default = intel_pt_pmu_default_config;
}
if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
pmu->auxtrace = true;
diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h
index 98d3235781c3..4f4900c18f3e 100644
--- a/tools/perf/util/arm-spe.h
+++ b/tools/perf/util/arm-spe.h
@@ -27,5 +27,7 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
int arm_spe_process_auxtrace_info(union perf_event *event,
struct perf_session *session);
-struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
+void arm_spe_pmu_default_config(const struct perf_pmu *arm_spe_pmu,
+ struct perf_event_attr *attr);
+
#endif
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 7cca37887917..4696267a32f0 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -242,7 +242,7 @@ struct cs_etm_packet_queue {
int cs_etm__process_auxtrace_info(union perf_event *event,
struct perf_session *session);
-struct perf_event_attr *cs_etm_get_default_config(struct perf_pmu *pmu);
+void cs_etm_get_default_config(const struct perf_pmu *pmu, struct perf_event_attr *attr);
enum cs_etm_pid_fmt {
CS_ETM_PIDFMT_NONE,
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
index c7d6068e3a6b..18fd0be52e6c 100644
--- a/tools/perf/util/intel-pt.h
+++ b/tools/perf/util/intel-pt.h
@@ -42,6 +42,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err);
int intel_pt_process_auxtrace_info(union perf_event *event,
struct perf_session *session);
-struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
+void intel_pt_pmu_default_config(const struct perf_pmu *intel_pt_pmu,
+ struct perf_event_attr *attr);
#endif
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c56e07bd7dd6..ea5579510b97 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1418,11 +1418,10 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
}
fix_raw(&parsed_terms, pmu);
- if (pmu->default_config) {
- memcpy(&attr, pmu->default_config, sizeof(struct perf_event_attr));
- } else {
- memset(&attr, 0, sizeof(attr));
- }
+ memset(&attr, 0, sizeof(attr));
+ if (pmu->perf_event_attr_init_default)
+ pmu->perf_event_attr_init_default(pmu, &attr);
+
attr.type = pmu->type;
if (list_empty(&parsed_terms.terms)) {
@@ -1466,7 +1465,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
* When using default config, record which bits of attr->config were
* changed by the user.
*/
- if (pmu->default_config && get_config_chgs(pmu, &parsed_terms, &config_terms)) {
+ if (pmu->perf_event_attr_init_default &&
+ get_config_chgs(pmu, &parsed_terms, &config_terms)) {
parse_events_terms__exit(&parsed_terms);
return -ENOMEM;
}
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index eb17f00bd0d2..a87371eb750e 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1403,7 +1403,7 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
struct parse_events_terms *head_terms,
struct parse_events_error *err)
{
- bool zero = !!pmu->default_config;
+ bool zero = !!pmu->perf_event_attr_init_default;
return perf_pmu__config_terms(pmu, attr, head_terms, zero, err);
}
@@ -2065,7 +2065,6 @@ void perf_pmu__delete(struct perf_pmu *pmu)
perf_cpu_map__put(pmu->cpus);
- zfree(&pmu->default_config);
zfree(&pmu->name);
zfree(&pmu->alias_name);
zfree(&pmu->id);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 5a05131aa4ce..1e05cbb4b4f9 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -95,7 +95,8 @@ struct perf_pmu {
* @default_config: Optional default perf_event_attr determined in
* architecture specific code.
*/
- struct perf_event_attr *default_config;
+ void (*perf_event_attr_init_default)(const struct perf_pmu *pmu,
+ struct perf_event_attr *attr);
/**
* @cpus: Empty or the contents of either of:
* <sysfs>/bus/event_source/devices/<name>/cpumask.
--
2.42.0.609.gbb76f46606-goog
Hello,
On 2023/10/7 10:13, Ian Rogers wrote:
> strcmp_cpuid_str performs regular expression comparisons. Avoid
> repeated computation of the table by remembering the table in a
> static.
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/perf/pmu-events/jevents.py | 48 +++++++++++++++++++-------------
> 1 file changed, 28 insertions(+), 20 deletions(-)
>
> diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
> index fd009752b427..8d8d5088c53c 100755
> --- a/tools/perf/pmu-events/jevents.py
> +++ b/tools/perf/pmu-events/jevents.py
> @@ -978,28 +978,32 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
>
> const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
> {
> - const struct pmu_events_table *table = NULL;
> - char *cpuid = perf_pmu__getcpuid(pmu);
> + static const struct pmu_events_table *table;
> size_t i;
>
> - /* on some platforms which uses cpus map, cpuid can be NULL for
> - * PMUs other than CORE PMUs.
> - */
> - if (!cpuid)
> - return NULL;
> -
> - i = 0;
> - for (;;) {
> - const struct pmu_events_map *map = &pmu_events_map[i++];
> - if (!map->arch)
> - break;
> -
> - if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
> - table = &map->event_table;
> - break;
> + if (!table) {
If there is no matched table in pmu_events_map,
perf_pmu__find_events_table() will enter this branch for repeated search
each time.
Or do we need to use another variable to indicate whether the search has
been performed?
Thanks,
Yang
On Sat, Oct 7, 2023 at 8:39 PM Yang Jihong <[email protected]> wrote:
>
> Hello,
>
> On 2023/10/7 10:13, Ian Rogers wrote:
> > strcmp_cpuid_str performs regular expression comparisons. Avoid
> > repeated computation of the table by remembering the table in a
> > static.
> >
> > Signed-off-by: Ian Rogers <[email protected]>
> > ---
> > tools/perf/pmu-events/jevents.py | 48 +++++++++++++++++++-------------
> > 1 file changed, 28 insertions(+), 20 deletions(-)
> >
> > diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
> > index fd009752b427..8d8d5088c53c 100755
> > --- a/tools/perf/pmu-events/jevents.py
> > +++ b/tools/perf/pmu-events/jevents.py
> > @@ -978,28 +978,32 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
> >
> > const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
> > {
> > - const struct pmu_events_table *table = NULL;
> > - char *cpuid = perf_pmu__getcpuid(pmu);
> > + static const struct pmu_events_table *table;
> > size_t i;
> >
> > - /* on some platforms which uses cpus map, cpuid can be NULL for
> > - * PMUs other than CORE PMUs.
> > - */
> > - if (!cpuid)
> > - return NULL;
> > -
> > - i = 0;
> > - for (;;) {
> > - const struct pmu_events_map *map = &pmu_events_map[i++];
> > - if (!map->arch)
> > - break;
> > -
> > - if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
> > - table = &map->event_table;
> > - break;
> > + if (!table) {
> If there is no matched table in pmu_events_map,
> perf_pmu__find_events_table() will enter this branch for repeated search
> each time.
> Or do we need to use another variable to indicate whether the search has
> been performed?
Agreed, the behavior will match the existing behavior. Longer term I
want to remove this code. Do you have a scenario we should optimize
for here?
Thanks,
Ian
> Thanks,
> Yang
Hello,
On 2023/10/8 13:49, Ian Rogers wrote:
> On Sat, Oct 7, 2023 at 8:39 PM Yang Jihong <[email protected]> wrote:
>>
>> Hello,
>>
>> On 2023/10/7 10:13, Ian Rogers wrote:
>>> strcmp_cpuid_str performs regular expression comparisons. Avoid
>>> repeated computation of the table by remembering the table in a
>>> static.
>>>
>>> Signed-off-by: Ian Rogers <[email protected]>
>>> ---
>>> tools/perf/pmu-events/jevents.py | 48 +++++++++++++++++++-------------
>>> 1 file changed, 28 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
>>> index fd009752b427..8d8d5088c53c 100755
>>> --- a/tools/perf/pmu-events/jevents.py
>>> +++ b/tools/perf/pmu-events/jevents.py
>>> @@ -978,28 +978,32 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
>>>
>>> const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
>>> {
>>> - const struct pmu_events_table *table = NULL;
>>> - char *cpuid = perf_pmu__getcpuid(pmu);
>>> + static const struct pmu_events_table *table;
>>> size_t i;
>>>
>>> - /* on some platforms which uses cpus map, cpuid can be NULL for
>>> - * PMUs other than CORE PMUs.
>>> - */
>>> - if (!cpuid)
>>> - return NULL;
>>> -
>>> - i = 0;
>>> - for (;;) {
>>> - const struct pmu_events_map *map = &pmu_events_map[i++];
>>> - if (!map->arch)
>>> - break;
>>> -
>>> - if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
>>> - table = &map->event_table;
>>> - break;
>>> + if (!table) {
>> If there is no matched table in pmu_events_map,
>> perf_pmu__find_events_table() will enter this branch for repeated search
>> each time.
>> Or do we need to use another variable to indicate whether the search has
>> been performed?
>
> Agreed, the behavior will match the existing behavior. Longer term I
> want to remove this code. Do you have a scenario we should optimize
> for here?
>
Yes, the CPU of the environment I'm using is "AuthenticAMD-15-6B-1" (not
in the pmu_events_map).
As a result, the search is repeated every time.
(If `perf record true` is executed once, the search is repeated for 6
times.)
This commit avoids repeated lookups to improve performance,
so if it's feasible, is it best to consider improving performance in
this case as well?
Thanks,
Yang
On 7/10/23 05:13, Ian Rogers wrote:
> Avoid setting PMU values in intel_pt_pmu_default_config, move to
> perf_pmu__arch_init.
>
> Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: Adrian Hunter <[email protected]>
> ---
> tools/perf/arch/x86/util/intel-pt.c | 2 --
> tools/perf/arch/x86/util/pmu.c | 1 +
> 2 files changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
> index b923bca939d9..6d6cd8f9133c 100644
> --- a/tools/perf/arch/x86/util/intel-pt.c
> +++ b/tools/perf/arch/x86/util/intel-pt.c
> @@ -267,8 +267,6 @@ intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
>
> attr->config = intel_pt_default_config(intel_pt_pmu);
>
> - intel_pt_pmu->selectable = true;
> -
> return attr;
> }
>
> diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> index 811e2377d2d5..949b3e2c67bd 100644
> --- a/tools/perf/arch/x86/util/pmu.c
> +++ b/tools/perf/arch/x86/util/pmu.c
> @@ -22,6 +22,7 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
> #ifdef HAVE_AUXTRACE_SUPPORT
> if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
> pmu->auxtrace = true;
> + pmu->selectable = true;
> pmu->default_config = intel_pt_pmu_default_config(pmu);
> }
> if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
On 7/10/23 05:13, Ian Rogers wrote:
> Assign default_config as part of the
> init. perf_pmu__get_default_config was doing more than just getting
> the default config and so this is intended to better align with the
> code.
>
> Signed-off-by: Ian Rogers <[email protected]>
One cosmetic comment otherwise:
Reviewed-by: Adrian Hunter <[email protected]>
> ---
> tools/perf/arch/arm/util/pmu.c | 8 +++-----
> tools/perf/arch/s390/util/pmu.c | 3 +--
> tools/perf/arch/x86/util/pmu.c | 5 ++---
> tools/perf/util/pmu.c | 14 +++++++-------
> tools/perf/util/pmu.h | 2 +-
> 5 files changed, 14 insertions(+), 18 deletions(-)
>
> diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
> index a9623b128ece..d55d2b15f2e6 100644
> --- a/tools/perf/arch/arm/util/pmu.c
> +++ b/tools/perf/arch/arm/util/pmu.c
> @@ -14,22 +14,20 @@
> #include "../../../util/pmu.h"
> #include "../../../util/cs-etm.h"
>
> -struct perf_event_attr
> -*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> +void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
> {
> #ifdef HAVE_AUXTRACE_SUPPORT
> if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
> /* add ETM default config here */
> pmu->selectable = true;
> - return cs_etm_get_default_config(pmu);
> + pmu->default_config = cs_etm_get_default_config(pmu);
> #if defined(__aarch64__)
> } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
> - return arm_spe_pmu_default_config(pmu);
> + pmu->default_config = arm_spe_pmu_default_config(pmu);
> } else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
> pmu->selectable = true;
> #endif
> }
>
> #endif
> - return NULL;
> }
> diff --git a/tools/perf/arch/s390/util/pmu.c b/tools/perf/arch/s390/util/pmu.c
> index 11f03f32e3fd..886c30e001fa 100644
> --- a/tools/perf/arch/s390/util/pmu.c
> +++ b/tools/perf/arch/s390/util/pmu.c
> @@ -13,11 +13,10 @@
> #define S390_PMUPAI_EXT "pai_ext"
> #define S390_PMUCPUM_CF "cpum_cf"
>
> -struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu)
> +void perf_pmu__arch_init(struct perf_pmu *pmu)
> {
> if (!strcmp(pmu->name, S390_PMUPAI_CRYPTO) ||
> !strcmp(pmu->name, S390_PMUPAI_EXT) ||
> !strcmp(pmu->name, S390_PMUCPUM_CF))
> pmu->selectable = true;
> - return NULL;
> }
> diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> index 8b53ca468a50..811e2377d2d5 100644
> --- a/tools/perf/arch/x86/util/pmu.c
> +++ b/tools/perf/arch/x86/util/pmu.c
> @@ -17,19 +17,18 @@
> #include "../../../util/pmus.h"
> #include "env.h"
>
> -struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> +void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
> {
> #ifdef HAVE_AUXTRACE_SUPPORT
> if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
> pmu->auxtrace = true;
> - return intel_pt_pmu_default_config(pmu);
> + pmu->default_config = intel_pt_pmu_default_config(pmu);
> }
> if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
> pmu->auxtrace = true;
> pmu->selectable = true;
> }
> #endif
> - return NULL;
> }
>
> int perf_pmus__num_mem_pmus(void)
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index 6b1b7f8f00fa..6e95b3d2c2e3 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -954,12 +954,6 @@ void pmu_add_sys_aliases(struct perf_pmu *pmu)
> pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, pmu);
> }
>
> -struct perf_event_attr * __weak
> -perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> -{
> - return NULL;
> -}
> -
> static char *pmu_find_alias_name(struct perf_pmu *pmu, int dirfd)
> {
> FILE *file = perf_pmu__open_file_at(pmu, dirfd, "alias");
> @@ -991,6 +985,12 @@ static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
> return max_precise;
> }
>
> +
Double blank line
> +void __weak
> +perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
> +{
> +}
> +
> struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name)
> {
> struct perf_pmu *pmu;
> @@ -1037,7 +1037,7 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char
> pmu_add_sys_aliases(pmu);
> list_add_tail(&pmu->list, pmus);
>
> - pmu->default_config = perf_pmu__get_default_config(pmu);
> + perf_pmu__arch_init(pmu);
>
> return pmu;
> err:
> diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
> index 85190d058852..588c64e38d6b 100644
> --- a/tools/perf/util/pmu.h
> +++ b/tools/perf/util/pmu.h
> @@ -233,7 +233,7 @@ bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name);
>
> int perf_pmu__test(void);
>
> -struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
> +void perf_pmu__arch_init(struct perf_pmu *pmu);
> void pmu_add_cpu_aliases_table(struct perf_pmu *pmu,
> const struct pmu_events_table *table);
>
On 7/10/23 05:13, Ian Rogers wrote:
> Add const to related APIs, this is so they can be used to default
> initialize a perf_event_attr from a const pmu.
>
> Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: Adrian Hunter <[email protected]>
> ---
> tools/perf/util/pmu.c | 10 +++++-----
> tools/perf/util/pmu.h | 2 +-
> 2 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index e11901c923d7..eb17f00bd0d2 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -156,7 +156,7 @@ static void __perf_pmu_format__load(struct perf_pmu_format *format, FILE *file)
> format->loaded = true;
> }
>
> -static void perf_pmu_format__load(struct perf_pmu *pmu, struct perf_pmu_format *format)
> +static void perf_pmu_format__load(const struct perf_pmu *pmu, struct perf_pmu_format *format)
> {
> char path[PATH_MAX];
> FILE *file = NULL;
> @@ -1132,7 +1132,7 @@ void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel,
> }
>
> static struct perf_pmu_format *
> -pmu_find_format(struct list_head *formats, const char *name)
> +pmu_find_format(const struct list_head *formats, const char *name)
> {
> struct perf_pmu_format *format;
>
> @@ -1230,7 +1230,7 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
> return -1;
> }
>
> -static char *pmu_formats_string(struct list_head *formats)
> +static char *pmu_formats_string(const struct list_head *formats)
> {
> struct perf_pmu_format *format;
> char *str = NULL;
> @@ -1256,7 +1256,7 @@ static char *pmu_formats_string(struct list_head *formats)
> * Setup one of config[12] attr members based on the
> * user input data - term parameter.
> */
> -static int pmu_config_term(struct perf_pmu *pmu,
> +static int pmu_config_term(const struct perf_pmu *pmu,
> struct perf_event_attr *attr,
> struct parse_events_term *term,
> struct parse_events_terms *head_terms,
> @@ -1379,7 +1379,7 @@ static int pmu_config_term(struct perf_pmu *pmu,
> return 0;
> }
>
> -int perf_pmu__config_terms(struct perf_pmu *pmu,
> +int perf_pmu__config_terms(const struct perf_pmu *pmu,
> struct perf_event_attr *attr,
> struct parse_events_terms *terms,
> bool zero, struct parse_events_error *err)
> diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
> index 24af7297b522..5a05131aa4ce 100644
> --- a/tools/perf/util/pmu.h
> +++ b/tools/perf/util/pmu.h
> @@ -193,7 +193,7 @@ void pmu_add_sys_aliases(struct perf_pmu *pmu);
> int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
> struct parse_events_terms *head_terms,
> struct parse_events_error *error);
> -int perf_pmu__config_terms(struct perf_pmu *pmu,
> +int perf_pmu__config_terms(const struct perf_pmu *pmu,
> struct perf_event_attr *attr,
> struct parse_events_terms *terms,
> bool zero, struct parse_events_error *error);
On 7/10/23 05:13, Ian Rogers wrote:
> File APIs don't alter the struct pmu so allow const ones to be passed.
>
> Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: Adrian Hunter <[email protected]>
> ---
> tools/perf/util/pmu.c | 12 ++++++------
> tools/perf/util/pmu.h | 11 ++++++-----
> tools/perf/util/python.c | 2 +-
> 3 files changed, 13 insertions(+), 12 deletions(-)
>
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index 6e95b3d2c2e3..e11901c923d7 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -575,7 +575,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
> return 0;
> }
>
> -static inline bool pmu_alias_info_file(char *name)
> +static inline bool pmu_alias_info_file(const char *name)
> {
> size_t len;
>
> @@ -1771,7 +1771,7 @@ bool perf_pmu__is_software(const struct perf_pmu *pmu)
> return !strcmp(pmu->name, "kprobe") || !strcmp(pmu->name, "uprobe");
> }
>
> -FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
> +FILE *perf_pmu__open_file(const struct perf_pmu *pmu, const char *name)
> {
> char path[PATH_MAX];
>
> @@ -1782,7 +1782,7 @@ FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
> return fopen(path, "r");
> }
>
> -FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name)
> +FILE *perf_pmu__open_file_at(const struct perf_pmu *pmu, int dirfd, const char *name)
> {
> int fd;
>
> @@ -1793,7 +1793,7 @@ FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name)
> return fdopen(fd, "r");
> }
>
> -int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
> +int perf_pmu__scan_file(const struct perf_pmu *pmu, const char *name, const char *fmt,
> ...)
> {
> va_list args;
> @@ -1810,7 +1810,7 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
> return ret;
> }
>
> -int perf_pmu__scan_file_at(struct perf_pmu *pmu, int dirfd, const char *name,
> +int perf_pmu__scan_file_at(const struct perf_pmu *pmu, int dirfd, const char *name,
> const char *fmt, ...)
> {
> va_list args;
> @@ -1827,7 +1827,7 @@ int perf_pmu__scan_file_at(struct perf_pmu *pmu, int dirfd, const char *name,
> return ret;
> }
>
> -bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name)
> +bool perf_pmu__file_exists(const struct perf_pmu *pmu, const char *name)
> {
> char path[PATH_MAX];
>
> diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
> index 588c64e38d6b..24af7297b522 100644
> --- a/tools/perf/util/pmu.h
> +++ b/tools/perf/util/pmu.h
> @@ -222,14 +222,15 @@ bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name);
> */
> bool perf_pmu__is_software(const struct perf_pmu *pmu);
>
> -FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name);
> -FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name);
> +FILE *perf_pmu__open_file(const struct perf_pmu *pmu, const char *name);
> +FILE *perf_pmu__open_file_at(const struct perf_pmu *pmu, int dirfd, const char *name);
>
> -int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4);
> -int perf_pmu__scan_file_at(struct perf_pmu *pmu, int dirfd, const char *name,
> +int perf_pmu__scan_file(const struct perf_pmu *pmu, const char *name, const char *fmt, ...)
> + __scanf(3, 4);
> +int perf_pmu__scan_file_at(const struct perf_pmu *pmu, int dirfd, const char *name,
> const char *fmt, ...) __scanf(4, 5);
>
> -bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name);
> +bool perf_pmu__file_exists(const struct perf_pmu *pmu, const char *name);
>
> int perf_pmu__test(void);
>
> diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
> index c29f5f0bb552..8761f51b5c7c 100644
> --- a/tools/perf/util/python.c
> +++ b/tools/perf/util/python.c
> @@ -98,7 +98,7 @@ struct perf_pmu *evsel__find_pmu(const struct evsel *evsel __maybe_unused)
> return NULL;
> }
>
> -int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...)
> +int perf_pmu__scan_file(const struct perf_pmu *pmu, const char *name, const char *fmt, ...)
> {
> return EOF;
> }
On 7/10/23 05:13, Ian Rogers wrote:
> strcmp_cpuid_str performs regular expression comparisons. Avoid
> repeated computation of the table by remembering the table in a
> static.
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/perf/pmu-events/jevents.py | 48 +++++++++++++++++++-------------
> 1 file changed, 28 insertions(+), 20 deletions(-)
>
> diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
> index fd009752b427..8d8d5088c53c 100755
> --- a/tools/perf/pmu-events/jevents.py
> +++ b/tools/perf/pmu-events/jevents.py
> @@ -978,28 +978,32 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
>
> const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
> {
> - const struct pmu_events_table *table = NULL;
> - char *cpuid = perf_pmu__getcpuid(pmu);
> + static const struct pmu_events_table *table;
> size_t i;
>
> - /* on some platforms which uses cpus map, cpuid can be NULL for
> - * PMUs other than CORE PMUs.
> - */
> - if (!cpuid)
> - return NULL;
> -
> - i = 0;
> - for (;;) {
> - const struct pmu_events_map *map = &pmu_events_map[i++];
> - if (!map->arch)
> - break;
> -
> - if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
> - table = &map->event_table;
> - break;
> + if (!table) {
> + char *cpuid = perf_pmu__getcpuid(pmu);
Seems to assume the function is never called with a pmu
that would give a different result for perf_pmu__getcpuid(pmu)
> +
> + /*
> + * On some platforms which uses cpus map, cpuid can be NULL for
> + * PMUs other than CORE PMUs.
> + */
> + if (!cpuid)
> + return NULL;
> +
> + i = 0;
> + for (;;) {
> + const struct pmu_events_map *map = &pmu_events_map[i++];
> + if (!map->arch)
> + break;
> +
> + if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
> + table = &map->event_table;
> + break;
> + }
> }
> + free(cpuid);
> }
> - free(cpuid);
> if (!pmu)
> return table;
>
> @@ -1015,13 +1019,17 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
>
> const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
> {
> - const struct pmu_metrics_table *table = NULL;
> - char *cpuid = perf_pmu__getcpuid(pmu);
> + static const struct pmu_metrics_table *table;
> + char *cpuid;
> int i;
>
> + if (table)
> + return table;
Ditto
> +
> /* on some platforms which uses cpus map, cpuid can be NULL for
> * PMUs other than CORE PMUs.
> */
> + cpuid = perf_pmu__getcpuid(pmu);
> if (!cpuid)
> return NULL;
>
On 7/10/23 05:13, Ian Rogers wrote:
> Avoid setting PMU values in arm_spe_pmu_default_config, move to
> perf_pmu__arch_init.
>
> Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: Adrian Hunter <[email protected]>
> ---
> tools/perf/arch/arm/util/pmu.c | 2 ++
> tools/perf/arch/arm64/util/arm-spe.c | 3 ---
> 2 files changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
> index d55d2b15f2e6..f25f68f84a94 100644
> --- a/tools/perf/arch/arm/util/pmu.c
> +++ b/tools/perf/arch/arm/util/pmu.c
> @@ -23,6 +23,8 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
> pmu->default_config = cs_etm_get_default_config(pmu);
> #if defined(__aarch64__)
> } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
> + pmu->selectable = true;
> + pmu->is_uncore = false;
> pmu->default_config = arm_spe_pmu_default_config(pmu);
> } else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
> pmu->selectable = true;
> diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
> index 9cc3d6dcb849..08a76734ccd2 100644
> --- a/tools/perf/arch/arm64/util/arm-spe.c
> +++ b/tools/perf/arch/arm64/util/arm-spe.c
> @@ -516,8 +516,5 @@ struct perf_event_attr
> attr->sample_period = 4096;
> }
>
> - arm_spe_pmu->selectable = true;
> - arm_spe_pmu->is_uncore = false;
> -
> return attr;
> }
On 7/10/23 05:13, Ian Rogers wrote:
> The default config is computed during creation of the PMU and may do
> things like scanning sysfs, when the PMU may just be used as part of
> scanning. Change default_config to perf_event_attr_init_default, a
> callback that is used when a default config needs initializing. This
> avoids holding onto the memory for a perf_event_attr and copying.
>
> On a tigerlake laptop running the pmu-scan benchmark:
>
> Before:
> Running 'internals/pmu-scan' benchmark:
> Computing performance of sysfs PMU event scan for 100 times
> Average core PMU scanning took: 28.780 usec (+- 0.503 usec)
> Average PMU scanning took: 283.480 usec (+- 18.471 usec)
> Number of openat syscalls: 30,227
>
> After:
> Running 'internals/pmu-scan' benchmark:
> Computing performance of sysfs PMU event scan for 100 times
> Average core PMU scanning took: 27.880 usec (+- 0.169 usec)
> Average PMU scanning took: 245.260 usec (+- 15.758 usec)
> Number of openat syscalls: 28,914
>
> Over 3 runs it is a nearly 12% reduction in execution time and a 4.3%
> of openat calls.
>
> Signed-off-by: Ian Rogers <[email protected]>
One minor comment below, otherwise:
Reviewed-by: Adrian Hunter <[email protected]>
> ---
> tools/perf/arch/arm/util/cs-etm.c | 13 ++------
> tools/perf/arch/arm/util/pmu.c | 4 +--
> tools/perf/arch/arm64/util/arm-spe.c | 45 ++++++++++++++--------------
> tools/perf/arch/x86/util/intel-pt.c | 25 ++++++++--------
> tools/perf/arch/x86/util/pmu.c | 2 +-
> tools/perf/util/arm-spe.h | 4 ++-
> tools/perf/util/cs-etm.h | 2 +-
> tools/perf/util/intel-pt.h | 3 +-
> tools/perf/util/parse-events.c | 12 ++++----
> tools/perf/util/pmu.c | 3 +-
> tools/perf/util/pmu.h | 3 +-
> 11 files changed, 56 insertions(+), 60 deletions(-)
>
> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> index b8d6a953fd74..16bba74f048b 100644
> --- a/tools/perf/arch/arm/util/cs-etm.c
> +++ b/tools/perf/arch/arm/util/cs-etm.c
> @@ -917,16 +917,9 @@ struct auxtrace_record *cs_etm_record_init(int *err)
> * (CFG_CHG and evsel__set_config_if_unset()). If no default is set then user
> * changes aren't tracked.
> */
> -struct perf_event_attr *
> -cs_etm_get_default_config(struct perf_pmu *pmu __maybe_unused)
> +void
> +cs_etm_get_default_config(const struct perf_pmu *pmu __maybe_unused,
> + struct perf_event_attr *attr)
> {
> - struct perf_event_attr *attr;
> -
> - attr = zalloc(sizeof(struct perf_event_attr));
> - if (!attr)
> - return NULL;
> -
> attr->sample_period = 1;
> -
> - return attr;
> }
> diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
> index f25f68f84a94..7f3af3b97f3b 100644
> --- a/tools/perf/arch/arm/util/pmu.c
> +++ b/tools/perf/arch/arm/util/pmu.c
> @@ -20,12 +20,12 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
> if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
> /* add ETM default config here */
> pmu->selectable = true;
> - pmu->default_config = cs_etm_get_default_config(pmu);
> + pmu->perf_event_attr_init_default = cs_etm_get_default_config;
> #if defined(__aarch64__)
> } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
> pmu->selectable = true;
> pmu->is_uncore = false;
> - pmu->default_config = arm_spe_pmu_default_config(pmu);
> + pmu->perf_event_attr_init_default = arm_spe_pmu_default_config;
> } else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
> pmu->selectable = true;
> #endif
> diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
> index 08a76734ccd2..e3acc739bd00 100644
> --- a/tools/perf/arch/arm64/util/arm-spe.c
> +++ b/tools/perf/arch/arm64/util/arm-spe.c
> @@ -113,6 +113,25 @@ arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts,
> }
> }
>
> +static __u64 arm_spe_pmu__sample_period(const struct perf_pmu *arm_spe_pmu)
> +{
> + static __u64 sample_period;
> +
> + if (sample_period)
> + return sample_period;
> +
> + /*
> + * If kernel driver doesn't advertise a minimum,
> + * use max allowable by PMSIDR_EL1.INTERVAL
> + */
> + if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
> + &sample_period) != 1) {
> + pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
> + sample_period = 4096;
> + }
> + return sample_period;
> +}
> +
> static int arm_spe_recording_options(struct auxtrace_record *itr,
> struct evlist *evlist,
> struct record_opts *opts)
> @@ -136,7 +155,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> return -EINVAL;
> }
> evsel->core.attr.freq = 0;
> - evsel->core.attr.sample_period = arm_spe_pmu->default_config->sample_period;
> + evsel->core.attr.sample_period = arm_spe_pmu__sample_period(arm_spe_pmu);
> evsel->needs_auxtrace_mmap = true;
> arm_spe_evsel = evsel;
> opts->full_auxtrace = true;
> @@ -495,26 +514,8 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
> return &sper->itr;
> }
>
> -struct perf_event_attr
> -*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
> +void
> +arm_spe_pmu_default_config(const struct perf_pmu *arm_spe_pmu, struct perf_event_attr *attr)
> {
> - struct perf_event_attr *attr;
> -
> - attr = zalloc(sizeof(struct perf_event_attr));
> - if (!attr) {
> - pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
> - return NULL;
> - }
> -
> - /*
> - * If kernel driver doesn't advertise a minimum,
> - * use max allowable by PMSIDR_EL1.INTERVAL
> - */
> - if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
> - &attr->sample_period) != 1) {
> - pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
> - attr->sample_period = 4096;
> - }
> -
> - return attr;
> + attr->sample_period = arm_spe_pmu__sample_period(arm_spe_pmu);
> }
> diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
> index 6d6cd8f9133c..fa0c718b9e72 100644
> --- a/tools/perf/arch/x86/util/intel-pt.c
> +++ b/tools/perf/arch/x86/util/intel-pt.c
> @@ -60,7 +60,7 @@ struct intel_pt_recording {
> size_t priv_size;
> };
>
> -static int intel_pt_parse_terms_with_default(struct perf_pmu *pmu,
> +static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu,
> const char *str,
> u64 *config)
> {
> @@ -84,7 +84,7 @@ static int intel_pt_parse_terms_with_default(struct perf_pmu *pmu,
> return err;
> }
>
> -static int intel_pt_parse_terms(struct perf_pmu *pmu, const char *str, u64 *config)
> +static int intel_pt_parse_terms(const struct perf_pmu *pmu, const char *str, u64 *config)
> {
> *config = 0;
> return intel_pt_parse_terms_with_default(pmu, str, config);
> @@ -177,7 +177,7 @@ static int intel_pt_pick_bit(int bits, int target)
> return pick;
> }
>
> -static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
> +static u64 intel_pt_default_config(const struct perf_pmu *intel_pt_pmu)
> {
> char buf[256];
> int mtc, mtc_periods = 0, mtc_period;
> @@ -256,18 +256,17 @@ static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
> return 0;
> }
>
> -struct perf_event_attr *
> -intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
> +void intel_pt_pmu_default_config(const struct perf_pmu *intel_pt_pmu,
> + struct perf_event_attr *attr)
> {
> - struct perf_event_attr *attr;
> + static u64 config;
> + static bool initialized;
>
> - attr = zalloc(sizeof(struct perf_event_attr));
> - if (!attr)
> - return NULL;
> -
> - attr->config = intel_pt_default_config(intel_pt_pmu);
> -
> - return attr;
> + if (!initialized) {
> + config = intel_pt_default_config(intel_pt_pmu);
> + initialized = true;
> + }
> + attr->config = config;
> }
>
> static const char *intel_pt_find_filter(struct evlist *evlist,
> diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> index 949b3e2c67bd..469555ae9b3c 100644
> --- a/tools/perf/arch/x86/util/pmu.c
> +++ b/tools/perf/arch/x86/util/pmu.c
> @@ -23,7 +23,7 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
> if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
> pmu->auxtrace = true;
> pmu->selectable = true;
> - pmu->default_config = intel_pt_pmu_default_config(pmu);
> + pmu->perf_event_attr_init_default = intel_pt_pmu_default_config;
> }
> if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
> pmu->auxtrace = true;
> diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h
> index 98d3235781c3..4f4900c18f3e 100644
> --- a/tools/perf/util/arm-spe.h
> +++ b/tools/perf/util/arm-spe.h
> @@ -27,5 +27,7 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
> int arm_spe_process_auxtrace_info(union perf_event *event,
> struct perf_session *session);
>
> -struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
> +void arm_spe_pmu_default_config(const struct perf_pmu *arm_spe_pmu,
> + struct perf_event_attr *attr);
> +
> #endif
> diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
> index 7cca37887917..4696267a32f0 100644
> --- a/tools/perf/util/cs-etm.h
> +++ b/tools/perf/util/cs-etm.h
> @@ -242,7 +242,7 @@ struct cs_etm_packet_queue {
>
> int cs_etm__process_auxtrace_info(union perf_event *event,
> struct perf_session *session);
> -struct perf_event_attr *cs_etm_get_default_config(struct perf_pmu *pmu);
> +void cs_etm_get_default_config(const struct perf_pmu *pmu, struct perf_event_attr *attr);
>
> enum cs_etm_pid_fmt {
> CS_ETM_PIDFMT_NONE,
> diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
> index c7d6068e3a6b..18fd0be52e6c 100644
> --- a/tools/perf/util/intel-pt.h
> +++ b/tools/perf/util/intel-pt.h
> @@ -42,6 +42,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err);
> int intel_pt_process_auxtrace_info(union perf_event *event,
> struct perf_session *session);
>
> -struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
> +void intel_pt_pmu_default_config(const struct perf_pmu *intel_pt_pmu,
> + struct perf_event_attr *attr);
>
> #endif
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index c56e07bd7dd6..ea5579510b97 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -1418,11 +1418,10 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
> }
> fix_raw(&parsed_terms, pmu);
>
> - if (pmu->default_config) {
> - memcpy(&attr, pmu->default_config, sizeof(struct perf_event_attr));
> - } else {
> - memset(&attr, 0, sizeof(attr));
> - }
> + memset(&attr, 0, sizeof(attr));
> + if (pmu->perf_event_attr_init_default)
> + pmu->perf_event_attr_init_default(pmu, &attr);
> +
> attr.type = pmu->type;
>
> if (list_empty(&parsed_terms.terms)) {
> @@ -1466,7 +1465,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
> * When using default config, record which bits of attr->config were
> * changed by the user.
> */
> - if (pmu->default_config && get_config_chgs(pmu, &parsed_terms, &config_terms)) {
> + if (pmu->perf_event_attr_init_default &&
> + get_config_chgs(pmu, &parsed_terms, &config_terms)) {
> parse_events_terms__exit(&parsed_terms);
> return -ENOMEM;
> }
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index eb17f00bd0d2..a87371eb750e 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -1403,7 +1403,7 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
> struct parse_events_terms *head_terms,
> struct parse_events_error *err)
> {
> - bool zero = !!pmu->default_config;
> + bool zero = !!pmu->perf_event_attr_init_default;
>
> return perf_pmu__config_terms(pmu, attr, head_terms, zero, err);
> }
> @@ -2065,7 +2065,6 @@ void perf_pmu__delete(struct perf_pmu *pmu)
>
> perf_cpu_map__put(pmu->cpus);
>
> - zfree(&pmu->default_config);
> zfree(&pmu->name);
> zfree(&pmu->alias_name);
> zfree(&pmu->id);
> diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
> index 5a05131aa4ce..1e05cbb4b4f9 100644
> --- a/tools/perf/util/pmu.h
> +++ b/tools/perf/util/pmu.h
> @@ -95,7 +95,8 @@ struct perf_pmu {
> * @default_config: Optional default perf_event_attr determined in
> * architecture specific code.
> */
> - struct perf_event_attr *default_config;
> + void (*perf_event_attr_init_default)(const struct perf_pmu *pmu,
> + struct perf_event_attr *attr);
Comment needs updating
> /**
> * @cpus: Empty or the contents of either of:
> * <sysfs>/bus/event_source/devices/<name>/cpumask.
On Thu, Oct 12, 2023 at 4:52 AM Adrian Hunter <[email protected]> wrote:
>
> On 7/10/23 05:13, Ian Rogers wrote:
> > Assign default_config as part of the
> > init. perf_pmu__get_default_config was doing more than just getting
> > the default config and so this is intended to better align with the
> > code.
> >
> > Signed-off-by: Ian Rogers <[email protected]>
>
> One cosmetic comment otherwise:
>
> Reviewed-by: Adrian Hunter <[email protected]>
>
> > ---
> > tools/perf/arch/arm/util/pmu.c | 8 +++-----
> > tools/perf/arch/s390/util/pmu.c | 3 +--
> > tools/perf/arch/x86/util/pmu.c | 5 ++---
> > tools/perf/util/pmu.c | 14 +++++++-------
> > tools/perf/util/pmu.h | 2 +-
> > 5 files changed, 14 insertions(+), 18 deletions(-)
> >
> > diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
> > index a9623b128ece..d55d2b15f2e6 100644
> > --- a/tools/perf/arch/arm/util/pmu.c
> > +++ b/tools/perf/arch/arm/util/pmu.c
> > @@ -14,22 +14,20 @@
> > #include "../../../util/pmu.h"
> > #include "../../../util/cs-etm.h"
> >
> > -struct perf_event_attr
> > -*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> > +void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
> > {
> > #ifdef HAVE_AUXTRACE_SUPPORT
> > if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
> > /* add ETM default config here */
> > pmu->selectable = true;
> > - return cs_etm_get_default_config(pmu);
> > + pmu->default_config = cs_etm_get_default_config(pmu);
> > #if defined(__aarch64__)
> > } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
> > - return arm_spe_pmu_default_config(pmu);
> > + pmu->default_config = arm_spe_pmu_default_config(pmu);
> > } else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
> > pmu->selectable = true;
> > #endif
> > }
> >
> > #endif
> > - return NULL;
> > }
> > diff --git a/tools/perf/arch/s390/util/pmu.c b/tools/perf/arch/s390/util/pmu.c
> > index 11f03f32e3fd..886c30e001fa 100644
> > --- a/tools/perf/arch/s390/util/pmu.c
> > +++ b/tools/perf/arch/s390/util/pmu.c
> > @@ -13,11 +13,10 @@
> > #define S390_PMUPAI_EXT "pai_ext"
> > #define S390_PMUCPUM_CF "cpum_cf"
> >
> > -struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu)
> > +void perf_pmu__arch_init(struct perf_pmu *pmu)
> > {
> > if (!strcmp(pmu->name, S390_PMUPAI_CRYPTO) ||
> > !strcmp(pmu->name, S390_PMUPAI_EXT) ||
> > !strcmp(pmu->name, S390_PMUCPUM_CF))
> > pmu->selectable = true;
> > - return NULL;
> > }
> > diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> > index 8b53ca468a50..811e2377d2d5 100644
> > --- a/tools/perf/arch/x86/util/pmu.c
> > +++ b/tools/perf/arch/x86/util/pmu.c
> > @@ -17,19 +17,18 @@
> > #include "../../../util/pmus.h"
> > #include "env.h"
> >
> > -struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> > +void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
> > {
> > #ifdef HAVE_AUXTRACE_SUPPORT
> > if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
> > pmu->auxtrace = true;
> > - return intel_pt_pmu_default_config(pmu);
> > + pmu->default_config = intel_pt_pmu_default_config(pmu);
> > }
> > if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
> > pmu->auxtrace = true;
> > pmu->selectable = true;
> > }
> > #endif
> > - return NULL;
> > }
> >
> > int perf_pmus__num_mem_pmus(void)
> > diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> > index 6b1b7f8f00fa..6e95b3d2c2e3 100644
> > --- a/tools/perf/util/pmu.c
> > +++ b/tools/perf/util/pmu.c
> > @@ -954,12 +954,6 @@ void pmu_add_sys_aliases(struct perf_pmu *pmu)
> > pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, pmu);
> > }
> >
> > -struct perf_event_attr * __weak
> > -perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> > -{
> > - return NULL;
> > -}
> > -
> > static char *pmu_find_alias_name(struct perf_pmu *pmu, int dirfd)
> > {
> > FILE *file = perf_pmu__open_file_at(pmu, dirfd, "alias");
> > @@ -991,6 +985,12 @@ static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
> > return max_precise;
> > }
> >
> > +
>
> Double blank line
Thanks, will fix in v2.
Ian
> > +void __weak
> > +perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
> > +{
> > +}
> > +
> > struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name)
> > {
> > struct perf_pmu *pmu;
> > @@ -1037,7 +1037,7 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char
> > pmu_add_sys_aliases(pmu);
> > list_add_tail(&pmu->list, pmus);
> >
> > - pmu->default_config = perf_pmu__get_default_config(pmu);
> > + perf_pmu__arch_init(pmu);
> >
> > return pmu;
> > err:
> > diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
> > index 85190d058852..588c64e38d6b 100644
> > --- a/tools/perf/util/pmu.h
> > +++ b/tools/perf/util/pmu.h
> > @@ -233,7 +233,7 @@ bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name);
> >
> > int perf_pmu__test(void);
> >
> > -struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
> > +void perf_pmu__arch_init(struct perf_pmu *pmu);
> > void pmu_add_cpu_aliases_table(struct perf_pmu *pmu,
> > const struct pmu_events_table *table);
> >
>