Set of fixes related to the comments here [1]. Mainly cleanups,
additional tests and refactoring since adding the new strcmp_cpuid_str()
metric expression.
I added the string replace function to the perf utils
rather than tools/lib/string.c because it didn't seem
easy to add tests for tools/lib.
[1]: https://lore.kernel.org/linux-arm-kernel/CAP-5=fVnUx0BnJC7X1rrm42OD7Bk=ZsHWNwAZMBYyB7yWhBfhQ@mail.gmail.com/
[2]: https://lore.kernel.org/linux-perf-users/[email protected]/
---
Changes since v2:
* Drop patches that were already applied (which makes the cover letter
mostly redundant)
* Avoid generating the compiler warning reported here [2]
Changes since v1:
* s -> haystack
* find -> needle
James Clark (3):
perf pmu: Move pmu__find_core_pmu() to pmus.c
perf pmus: Simplify perf_pmus__find_core_pmu()
perf pmu: Remove unused function
tools/perf/arch/arm64/util/pmu.c | 20 ++++++++------------
tools/perf/tests/expr.c | 2 +-
tools/perf/util/expr.c | 2 +-
tools/perf/util/pmu.c | 22 ----------------------
tools/perf/util/pmu.h | 3 +--
tools/perf/util/pmus.c | 8 +++++++-
6 files changed, 18 insertions(+), 39 deletions(-)
--
2.34.1
Currently the while loop always either exits on the first iteration with
a core PMU, or exits with NULL on heterogeneous systems or when not all
CPUs are online.
Both of the latter behaviors are undesirable for platforms other than
Arm so simplify it to always return the first core PMU, or NULL if none
exist.
This behavior was depended on by the Arm version of
pmu_metrics_table__find(), so the logic has been moved there instead.
Suggested-by: Ian Rogers <[email protected]>
Reviewed-by: Ian Rogers <[email protected]>
Signed-off-by: James Clark <[email protected]>
---
tools/perf/arch/arm64/util/pmu.c | 8 +++++++-
tools/perf/util/pmus.c | 14 +-------------
2 files changed, 8 insertions(+), 14 deletions(-)
diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
index 3d9330feebd2..3099f5f448ba 100644
--- a/tools/perf/arch/arm64/util/pmu.c
+++ b/tools/perf/arch/arm64/util/pmu.c
@@ -10,8 +10,14 @@
const struct pmu_metrics_table *pmu_metrics_table__find(void)
{
- struct perf_pmu *pmu = perf_pmus__find_core_pmu();
+ struct perf_pmu *pmu;
+
+ /* Metrics aren't currently supported on heterogeneous Arm systems */
+ if (perf_pmus__num_core_pmus() > 1)
+ return NULL;
+ /* Doesn't matter which one here because they'll all be the same */
+ pmu = perf_pmus__find_core_pmu();
if (pmu)
return perf_pmu__find_metrics_table(pmu);
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index cec869cbe163..64e798e68a2d 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -596,17 +596,5 @@ struct perf_pmu *evsel__find_pmu(const struct evsel *evsel)
struct perf_pmu *perf_pmus__find_core_pmu(void)
{
- struct perf_pmu *pmu = NULL;
-
- while ((pmu = perf_pmus__scan_core(pmu))) {
- /*
- * The cpumap should cover all CPUs. Otherwise, some CPUs may
- * not support some events or have different event IDs.
- */
- if (RC_CHK_ACCESS(pmu->cpus)->nr != cpu__max_cpu().cpu)
- return NULL;
-
- return pmu;
- }
- return NULL;
+ return perf_pmus__scan_core(NULL);
}
--
2.34.1
pmu_events_table__find() is no longer used so remove it and its Arm
specific version.
Reviewed-by: Ian Rogers <[email protected]>
Signed-off-by: James Clark <[email protected]>
---
tools/perf/arch/arm64/util/pmu.c | 10 ----------
tools/perf/util/pmu.c | 5 -----
tools/perf/util/pmu.h | 1 -
3 files changed, 16 deletions(-)
diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
index 3099f5f448ba..2a4eab2d160e 100644
--- a/tools/perf/arch/arm64/util/pmu.c
+++ b/tools/perf/arch/arm64/util/pmu.c
@@ -24,16 +24,6 @@ const struct pmu_metrics_table *pmu_metrics_table__find(void)
return NULL;
}
-const struct pmu_events_table *pmu_events_table__find(void)
-{
- struct perf_pmu *pmu = perf_pmus__find_core_pmu();
-
- if (pmu)
- return perf_pmu__find_events_table(pmu);
-
- return NULL;
-}
-
double perf_pmu__cpu_slots_per_cycle(void)
{
char path[PATH_MAX];
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index f50a5636633f..0d81c059c91c 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -776,11 +776,6 @@ char *perf_pmu__getcpuid(struct perf_pmu *pmu)
return cpuid;
}
-__weak const struct pmu_events_table *pmu_events_table__find(void)
-{
- return perf_pmu__find_events_table(NULL);
-}
-
__weak const struct pmu_metrics_table *pmu_metrics_table__find(void)
{
return perf_pmu__find_metrics_table(NULL);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index d7b46085642d..04b317b17d66 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -238,7 +238,6 @@ void pmu_add_cpu_aliases_table(struct perf_pmu *pmu,
const struct pmu_events_table *table);
char *perf_pmu__getcpuid(struct perf_pmu *pmu);
-const struct pmu_events_table *pmu_events_table__find(void);
const struct pmu_metrics_table *pmu_metrics_table__find(void);
int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
--
2.34.1
On 13/09/2023 16:33, James Clark wrote:
> pmu_events_table__find() is no longer used so remove it and its Arm
> specific version.
>
> Reviewed-by: Ian Rogers <[email protected]>
> Signed-off-by: James Clark <[email protected]>
> ---
You really should put the name of the function in the subject. Apart
from that:
Reviewed-by: John Garry <[email protected]>
> tools/perf/arch/arm64/util/pmu.c | 10 ----------
> tools/perf/util/pmu.c | 5 -----
> tools/perf/util/pmu.h | 1 -
> 3 files changed, 16 deletions(-)
>
> diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
> index 3099f5f448ba..2a4eab2d160e 100644
> --- a/tools/perf/arch/arm64/util/pmu.c
> +++ b/tools/perf/arch/arm64/util/pmu.c
> @@ -24,16 +24,6 @@ const struct pmu_metrics_table *pmu_metrics_table__find(void)
> return NULL;
> }
>
> -const struct pmu_events_table *pmu_events_table__find(void)
> -{
> - struct perf_pmu *pmu = perf_pmus__find_core_pmu();
> -
> - if (pmu)
> - return perf_pmu__find_events_table(pmu);
> -
> - return NULL;
> -}
> -
> double perf_pmu__cpu_slots_per_cycle(void)
> {
> char path[PATH_MAX];
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index f50a5636633f..0d81c059c91c 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -776,11 +776,6 @@ char *perf_pmu__getcpuid(struct perf_pmu *pmu)
> return cpuid;
> }
>
> -__weak const struct pmu_events_table *pmu_events_table__find(void)
> -{
> - return perf_pmu__find_events_table(NULL);
> -}
> -
> __weak const struct pmu_metrics_table *pmu_metrics_table__find(void)
> {
> return perf_pmu__find_metrics_table(NULL);
> diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
> index d7b46085642d..04b317b17d66 100644
> --- a/tools/perf/util/pmu.h
> +++ b/tools/perf/util/pmu.h
> @@ -238,7 +238,6 @@ void pmu_add_cpu_aliases_table(struct perf_pmu *pmu,
> const struct pmu_events_table *table);
>
> char *perf_pmu__getcpuid(struct perf_pmu *pmu);
> -const struct pmu_events_table *pmu_events_table__find(void);
> const struct pmu_metrics_table *pmu_metrics_table__find(void);
>
> int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
On 13/09/2023 16:33, James Clark wrote:
> Currently the while loop always either exits on the first iteration with
> a core PMU, or exits with NULL on heterogeneous systems or when not all
> CPUs are online.
>
> Both of the latter behaviors are undesirable for platforms other than
> Arm so simplify it to always return the first core PMU, or NULL if none
> exist.
>
> This behavior was depended on by the Arm version of
> pmu_metrics_table__find(), so the logic has been moved there instead.
>
> Suggested-by: Ian Rogers <[email protected]>
> Reviewed-by: Ian Rogers <[email protected]>
> Signed-off-by: James Clark <[email protected]>
Reviewed-by: John Garry <[email protected]>
On 14/09/2023 07:41, John Garry wrote:
> On 13/09/2023 16:33, James Clark wrote:
>> pmu_events_table__find() is no longer used so remove it and its Arm
>> specific version.
>>
>> Reviewed-by: Ian Rogers <[email protected]>
>> Signed-off-by: James Clark <[email protected]>
>> ---
>
> You really should put the name of the function in the subject. Apart
> from that:
>
> Reviewed-by: John Garry <[email protected]>
>
Noted, will do next time. Thanks for the reviews.
>> tools/perf/arch/arm64/util/pmu.c | 10 ----------
>> tools/perf/util/pmu.c | 5 -----
>> tools/perf/util/pmu.h | 1 -
>> 3 files changed, 16 deletions(-)
>>
>> diff --git a/tools/perf/arch/arm64/util/pmu.c
>> b/tools/perf/arch/arm64/util/pmu.c
>> index 3099f5f448ba..2a4eab2d160e 100644
>> --- a/tools/perf/arch/arm64/util/pmu.c
>> +++ b/tools/perf/arch/arm64/util/pmu.c
>> @@ -24,16 +24,6 @@ const struct pmu_metrics_table
>> *pmu_metrics_table__find(void)
>> return NULL;
>> }
>> -const struct pmu_events_table *pmu_events_table__find(void)
>> -{
>> - struct perf_pmu *pmu = perf_pmus__find_core_pmu();
>> -
>> - if (pmu)
>> - return perf_pmu__find_events_table(pmu);
>> -
>> - return NULL;
>> -}
>> -
>> double perf_pmu__cpu_slots_per_cycle(void)
>> {
>> char path[PATH_MAX];
>> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
>> index f50a5636633f..0d81c059c91c 100644
>> --- a/tools/perf/util/pmu.c
>> +++ b/tools/perf/util/pmu.c
>> @@ -776,11 +776,6 @@ char *perf_pmu__getcpuid(struct perf_pmu *pmu)
>> return cpuid;
>> }
>> -__weak const struct pmu_events_table *pmu_events_table__find(void)
>> -{
>> - return perf_pmu__find_events_table(NULL);
>> -}
>> -
>> __weak const struct pmu_metrics_table *pmu_metrics_table__find(void)
>> {
>> return perf_pmu__find_metrics_table(NULL);
>> diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
>> index d7b46085642d..04b317b17d66 100644
>> --- a/tools/perf/util/pmu.h
>> +++ b/tools/perf/util/pmu.h
>> @@ -238,7 +238,6 @@ void pmu_add_cpu_aliases_table(struct perf_pmu *pmu,
>> const struct pmu_events_table *table);
>> char *perf_pmu__getcpuid(struct perf_pmu *pmu);
>> -const struct pmu_events_table *pmu_events_table__find(void);
>> const struct pmu_metrics_table *pmu_metrics_table__find(void);
>> int perf_pmu__convert_scale(const char *scale, char **end, double
>> *sval);
>
On 13/09/2023 16:33, James Clark wrote:
> Currently the while loop always either exits on the first iteration with
> a core PMU, or exits with NULL on heterogeneous systems or when not all
> CPUs are online.
>
> Both of the latter behaviors are undesirable for platforms other than
> Arm so simplify it to always return the first core PMU, or NULL if none
> exist.
>
> This behavior was depended on by the Arm version of
> pmu_metrics_table__find(), so the logic has been moved there instead.
>
> Suggested-by: Ian Rogers <[email protected]>
> Reviewed-by: Ian Rogers <[email protected]>
> Signed-off-by: James Clark <[email protected]>
Turns out the "Simple expression parser" test is failing on
heterogeneous arm systems without this patch. I didn't realise there was
a dependency and should have put the commits the other way round. I will
leave the error message here in case someone bumps into it, but no fix
is required apart from applying the remaining patches in this set:
$ perf test expr -v
4: Simple expression parser :
--- start ---
test child forked, pid 4902
Using CPUID 0x00000000410fd070
FAILED tests/expr.c:83 get_cpuid
test child finished with -1
---- end ----
Simple expression parser: FAILED!
> ---
> tools/perf/arch/arm64/util/pmu.c | 8 +++++++-
> tools/perf/util/pmus.c | 14 +-------------
> 2 files changed, 8 insertions(+), 14 deletions(-)
>
> diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
> index 3d9330feebd2..3099f5f448ba 100644
> --- a/tools/perf/arch/arm64/util/pmu.c
> +++ b/tools/perf/arch/arm64/util/pmu.c
> @@ -10,8 +10,14 @@
>
> const struct pmu_metrics_table *pmu_metrics_table__find(void)
> {
> - struct perf_pmu *pmu = perf_pmus__find_core_pmu();
> + struct perf_pmu *pmu;
> +
> + /* Metrics aren't currently supported on heterogeneous Arm systems */
> + if (perf_pmus__num_core_pmus() > 1)
> + return NULL;
>
> + /* Doesn't matter which one here because they'll all be the same */
> + pmu = perf_pmus__find_core_pmu();
> if (pmu)
> return perf_pmu__find_metrics_table(pmu);
>
> diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
> index cec869cbe163..64e798e68a2d 100644
> --- a/tools/perf/util/pmus.c
> +++ b/tools/perf/util/pmus.c
> @@ -596,17 +596,5 @@ struct perf_pmu *evsel__find_pmu(const struct evsel *evsel)
>
> struct perf_pmu *perf_pmus__find_core_pmu(void)
> {
> - struct perf_pmu *pmu = NULL;
> -
> - while ((pmu = perf_pmus__scan_core(pmu))) {
> - /*
> - * The cpumap should cover all CPUs. Otherwise, some CPUs may
> - * not support some events or have different event IDs.
> - */
> - if (RC_CHK_ACCESS(pmu->cpus)->nr != cpu__max_cpu().cpu)
> - return NULL;
> -
> - return pmu;
> - }
> - return NULL;
> + return perf_pmus__scan_core(NULL);
> }
Hello,
On Wed, Sep 13, 2023 at 8:34 AM James Clark <[email protected]> wrote:
>
> Set of fixes related to the comments here [1]. Mainly cleanups,
> additional tests and refactoring since adding the new strcmp_cpuid_str()
> metric expression.
>
> I added the string replace function to the perf utils
> rather than tools/lib/string.c because it didn't seem
> easy to add tests for tools/lib.
>
> [1]: https://lore.kernel.org/linux-arm-kernel/CAP-5=fVnUx0BnJC7X1rrm42OD7Bk=ZsHWNwAZMBYyB7yWhBfhQ@mail.gmail.com/
> [2]: https://lore.kernel.org/linux-perf-users/[email protected]/
>
> ---
> Changes since v2:
> * Drop patches that were already applied (which makes the cover letter
> mostly redundant)
> * Avoid generating the compiler warning reported here [2]
>
> Changes since v1:
> * s -> haystack
> * find -> needle
>
> James Clark (3):
> perf pmu: Move pmu__find_core_pmu() to pmus.c
> perf pmus: Simplify perf_pmus__find_core_pmu()
> perf pmu: Remove unused function
Applied to perf-tools-next, thanks!