v1 -> v2:
- Optimize code in patch 1 as Mathieu adviced.
- Fix memleak in patch 2.
- Detail the commit info to explain the reason.
This patch set fixes perf record failure when we mix arm_spe_x event
with other events in specific order.
Wei Li (2):
perf tools: Fix record failure when mixed with ARM SPE event
perf tools: ARM SPE code cleanup
tools/perf/arch/arm/util/auxtrace.c | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
--
2.17.1
When recording with cache-misses and arm_spe_x event, i found that
it will just fail without showing any error info if i put cache-misses
after 'arm_spe_x' event.
[root@localhost 0620]# perf record -e cache-misses -e \
arm_spe_0/ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,\
jitter=1,store_filter=1,min_latency=0/ sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.067 MB perf.data ]
[root@localhost 0620]# perf record -e \
arm_spe_0/ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,\
store_filter=1,min_latency=0/ -e cache-misses sleep 1
[root@localhost 0620]#
The current code can only work if the only event to be traced is an
'arm_spe_x', or if it is the last event to be specified. Otherwise the
last event type will be checked against all the arm_spe_pmus[i]->types,
none will match and an out of bound 'i' index will be used in
arm_spe_recording_init().
We don't support concurrent multiple arm_spe_x events currently, that
is checked in arm_spe_recording_options(), and it will show the relevant
info. So add the check and record of the first found 'arm_spe_pmu' to
fix this issue here.
Fixes: ffd3d18c20b8d ("perf tools: Add ARM Statistical Profiling Extensions (SPE) support")
Signed-off-by: Wei Li <[email protected]>
---
tools/perf/arch/arm/util/auxtrace.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index 0a6e75b8777a..28a5d0c18b1d 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -56,7 +56,7 @@ struct auxtrace_record
struct perf_pmu *cs_etm_pmu;
struct evsel *evsel;
bool found_etm = false;
- bool found_spe = false;
+ struct perf_pmu *found_spe = NULL;
static struct perf_pmu **arm_spe_pmus = NULL;
static int nr_spes = 0;
int i = 0;
@@ -74,12 +74,12 @@ struct auxtrace_record
evsel->core.attr.type == cs_etm_pmu->type)
found_etm = true;
- if (!nr_spes)
+ if (!nr_spes || found_spe)
continue;
for (i = 0; i < nr_spes; i++) {
if (evsel->core.attr.type == arm_spe_pmus[i]->type) {
- found_spe = true;
+ found_spe = arm_spe_pmus[i];
break;
}
}
@@ -96,7 +96,7 @@ struct auxtrace_record
#if defined(__aarch64__)
if (found_spe)
- return arm_spe_recording_init(err, arm_spe_pmus[i]);
+ return arm_spe_recording_init(err, found_spe);
#endif
/*
--
2.17.1
On Fri, Jul 24, 2020 at 03:11:10PM +0800, Wei Li wrote:
> When recording with cache-misses and arm_spe_x event, i found that
> it will just fail without showing any error info if i put cache-misses
> after 'arm_spe_x' event.
>
> [root@localhost 0620]# perf record -e cache-misses -e \
> arm_spe_0/ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,\
> jitter=1,store_filter=1,min_latency=0/ sleep 1
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.067 MB perf.data ]
> [root@localhost 0620]# perf record -e \
> arm_spe_0/ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,\
> store_filter=1,min_latency=0/ -e cache-misses sleep 1
> [root@localhost 0620]#
>
> The current code can only work if the only event to be traced is an
> 'arm_spe_x', or if it is the last event to be specified. Otherwise the
> last event type will be checked against all the arm_spe_pmus[i]->types,
> none will match and an out of bound 'i' index will be used in
> arm_spe_recording_init().
>
> We don't support concurrent multiple arm_spe_x events currently, that
> is checked in arm_spe_recording_options(), and it will show the relevant
> info. So add the check and record of the first found 'arm_spe_pmu' to
> fix this issue here.
>
> Fixes: ffd3d18c20b8d ("perf tools: Add ARM Statistical Profiling Extensions (SPE) support")
> Signed-off-by: Wei Li <[email protected]>
Thanks for the patch, Wei. I have tested this series on Arm64 D06
platform:
Tested-by: Leo Yan <[email protected]>
I'd like to wait for Mathieu's ACK.
Thanks,
Leo
> ---
> tools/perf/arch/arm/util/auxtrace.c | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
> index 0a6e75b8777a..28a5d0c18b1d 100644
> --- a/tools/perf/arch/arm/util/auxtrace.c
> +++ b/tools/perf/arch/arm/util/auxtrace.c
> @@ -56,7 +56,7 @@ struct auxtrace_record
> struct perf_pmu *cs_etm_pmu;
> struct evsel *evsel;
> bool found_etm = false;
> - bool found_spe = false;
> + struct perf_pmu *found_spe = NULL;
> static struct perf_pmu **arm_spe_pmus = NULL;
> static int nr_spes = 0;
> int i = 0;
> @@ -74,12 +74,12 @@ struct auxtrace_record
> evsel->core.attr.type == cs_etm_pmu->type)
> found_etm = true;
>
> - if (!nr_spes)
> + if (!nr_spes || found_spe)
> continue;
>
> for (i = 0; i < nr_spes; i++) {
> if (evsel->core.attr.type == arm_spe_pmus[i]->type) {
> - found_spe = true;
> + found_spe = arm_spe_pmus[i];
> break;
> }
> }
> @@ -96,7 +96,7 @@ struct auxtrace_record
>
> #if defined(__aarch64__)
> if (found_spe)
> - return arm_spe_recording_init(err, arm_spe_pmus[i]);
> + return arm_spe_recording_init(err, found_spe);
> #endif
>
> /*
> --
> 2.17.1
>
On Fri, Jul 24, 2020 at 03:11:10PM +0800, Wei Li wrote:
> When recording with cache-misses and arm_spe_x event, i found that
> it will just fail without showing any error info if i put cache-misses
> after 'arm_spe_x' event.
>
> [root@localhost 0620]# perf record -e cache-misses -e \
> arm_spe_0/ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,\
> jitter=1,store_filter=1,min_latency=0/ sleep 1
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.067 MB perf.data ]
> [root@localhost 0620]# perf record -e \
> arm_spe_0/ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,\
> store_filter=1,min_latency=0/ -e cache-misses sleep 1
> [root@localhost 0620]#
>
> The current code can only work if the only event to be traced is an
> 'arm_spe_x', or if it is the last event to be specified. Otherwise the
> last event type will be checked against all the arm_spe_pmus[i]->types,
> none will match and an out of bound 'i' index will be used in
> arm_spe_recording_init().
>
> We don't support concurrent multiple arm_spe_x events currently, that
> is checked in arm_spe_recording_options(), and it will show the relevant
> info. So add the check and record of the first found 'arm_spe_pmu' to
> fix this issue here.
>
> Fixes: ffd3d18c20b8d ("perf tools: Add ARM Statistical Profiling Extensions (SPE) support")
Usually SHA1 are 12 character long rather than 13. Depending on what Arnaldo
wants to do you may have to resend.
> Signed-off-by: Wei Li <[email protected]>
Reviewed-by: Mathieu Poirier <[email protected]>
> ---
> tools/perf/arch/arm/util/auxtrace.c | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
> index 0a6e75b8777a..28a5d0c18b1d 100644
> --- a/tools/perf/arch/arm/util/auxtrace.c
> +++ b/tools/perf/arch/arm/util/auxtrace.c
> @@ -56,7 +56,7 @@ struct auxtrace_record
> struct perf_pmu *cs_etm_pmu;
> struct evsel *evsel;
> bool found_etm = false;
> - bool found_spe = false;
> + struct perf_pmu *found_spe = NULL;
> static struct perf_pmu **arm_spe_pmus = NULL;
> static int nr_spes = 0;
> int i = 0;
> @@ -74,12 +74,12 @@ struct auxtrace_record
> evsel->core.attr.type == cs_etm_pmu->type)
> found_etm = true;
>
> - if (!nr_spes)
> + if (!nr_spes || found_spe)
> continue;
>
> for (i = 0; i < nr_spes; i++) {
> if (evsel->core.attr.type == arm_spe_pmus[i]->type) {
> - found_spe = true;
> + found_spe = arm_spe_pmus[i];
> break;
> }
> }
> @@ -96,7 +96,7 @@ struct auxtrace_record
>
> #if defined(__aarch64__)
> if (found_spe)
> - return arm_spe_recording_init(err, arm_spe_pmus[i]);
> + return arm_spe_recording_init(err, found_spe);
> #endif
>
> /*
> --
> 2.17.1
>