In per-cpu mode there are multiple aux buffers and each one has a
fixed sink, so the hw ID mappings which only need to be emitted once
for each buffer, even with the new per-sink trace ID pools.
But in per-thread mode there is only a single buffer which can be
written to from any sink with now potentially overlapping trace IDs, so
hw ID mappings need to be re-emitted every time the sink changes.
This will require a change in Perf to track this so it knows which
decode tree to use for each segment of the buffer. In theory it's also
possible to look at the CPU ID on the AUX records, but this is more
consistent with the existing system, and allows for correct decode using
either mechanism.
Signed-off-by: James Clark <[email protected]>
---
drivers/hwtracing/coresight/coresight-etm-perf.c | 14 ++++++++++++++
drivers/hwtracing/coresight/coresight-etm-perf.h | 2 ++
2 files changed, 16 insertions(+)
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 17cafa1a7f18..b6f505b50e67 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -499,6 +499,20 @@ static void etm_event_start(struct perf_event *event, int flags)
&sink->perf_sink_id_map))
goto fail_disable_path;
+ /*
+ * In per-cpu mode there are multiple aux buffers and each one has a
+ * fixed sink, so the hw ID mappings which only need to be emitted once
+ * for each buffer.
+ *
+ * But in per-thread mode there is only a single buffer which can be
+ * written to from any sink with potentially overlapping trace IDs, so
+ * hw ID mappings need to be re-emitted every time the sink changes.
+ */
+ if (event->cpu == -1 && event_data->last_sink_hwid != sink) {
+ cpumask_clear(&event_data->aux_hwid_done);
+ event_data->last_sink_hwid = sink;
+ }
+
/*
* output cpu / trace ID in perf record, once for the lifetime
* of the event.
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
index 744531158d6b..bd4553b2a1ec 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.h
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -52,6 +52,7 @@ struct etm_filters {
* @snk_config: The sink configuration.
* @cfg_hash: The hash id of any coresight config selected.
* @path: An array of path, each slot for one CPU.
+ * @last_sink_hwid: Last sink that a hwid was emitted for.
*/
struct etm_event_data {
struct work_struct work;
@@ -60,6 +61,7 @@ struct etm_event_data {
void *snk_config;
u32 cfg_hash;
struct list_head * __percpu *path;
+ struct coresight_device *last_sink_hwid;
};
int etm_perf_symlink(struct coresight_device *csdev, bool link);
--
2.34.1
On 04/06/2024 15:30, James Clark wrote:
> In per-cpu mode there are multiple aux buffers and each one has a
> fixed sink, so the hw ID mappings which only need to be emitted once
> for each buffer, even with the new per-sink trace ID pools.
>
> But in per-thread mode there is only a single buffer which can be
> written to from any sink with now potentially overlapping trace IDs, so
> hw ID mappings need to be re-emitted every time the sink changes.
>
> This will require a change in Perf to track this so it knows which
> decode tree to use for each segment of the buffer. In theory it's also
> possible to look at the CPU ID on the AUX records, but this is more
> consistent with the existing system, and allows for correct decode using
> either mechanism.
>
> Signed-off-by: James Clark <[email protected]>
> ---
> drivers/hwtracing/coresight/coresight-etm-perf.c | 14 ++++++++++++++
> drivers/hwtracing/coresight/coresight-etm-perf.h | 2 ++
> 2 files changed, 16 insertions(+)
>
> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
> index 17cafa1a7f18..b6f505b50e67 100644
> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
> @@ -499,6 +499,20 @@ static void etm_event_start(struct perf_event *event, int flags)
> &sink->perf_sink_id_map))
> goto fail_disable_path;
>
> + /*
> + * In per-cpu mode there are multiple aux buffers and each one has a
> + * fixed sink, so the hw ID mappings which only need to be emitted once
> + * for each buffer.
> + *
> + * But in per-thread mode there is only a single buffer which can be
> + * written to from any sink with potentially overlapping trace IDs, so
> + * hw ID mappings need to be re-emitted every time the sink changes.
> + */
> + if (event->cpu == -1 && event_data->last_sink_hwid != sink) {
> + cpumask_clear(&event_data->aux_hwid_done);
> + event_data->last_sink_hwid = sink;
> + }
I am wondering if we really need this patch, as we have the sinkid in
the HWID already ? We would emit the packet for each CPU only once and
that wouldn't change the HWID ?
Suzuki
> +
> /*
> * output cpu / trace ID in perf record, once for the lifetime
> * of the event.
> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
> index 744531158d6b..bd4553b2a1ec 100644
> --- a/drivers/hwtracing/coresight/coresight-etm-perf.h
> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
> @@ -52,6 +52,7 @@ struct etm_filters {
> * @snk_config: The sink configuration.
> * @cfg_hash: The hash id of any coresight config selected.
> * @path: An array of path, each slot for one CPU.
> + * @last_sink_hwid: Last sink that a hwid was emitted for.
> */
> struct etm_event_data {
> struct work_struct work;
> @@ -60,6 +61,7 @@ struct etm_event_data {
> void *snk_config;
> u32 cfg_hash;
> struct list_head * __percpu *path;
> + struct coresight_device *last_sink_hwid;
> };
>
> int etm_perf_symlink(struct coresight_device *csdev, bool link);
On 10/06/2024 11:29, Suzuki K Poulose wrote:
> On 04/06/2024 15:30, James Clark wrote:
>> In per-cpu mode there are multiple aux buffers and each one has a
>> fixed sink, so the hw ID mappings which only need to be emitted once
>> for each buffer, even with the new per-sink trace ID pools.
>>
>> But in per-thread mode there is only a single buffer which can be
>> written to from any sink with now potentially overlapping trace IDs, so
>> hw ID mappings need to be re-emitted every time the sink changes.
>>
>> This will require a change in Perf to track this so it knows which
>> decode tree to use for each segment of the buffer. In theory it's also
>> possible to look at the CPU ID on the AUX records, but this is more
>> consistent with the existing system, and allows for correct decode using
>> either mechanism.
>>
>> Signed-off-by: James Clark <[email protected]>
>> ---
>> drivers/hwtracing/coresight/coresight-etm-perf.c | 14 ++++++++++++++
>> drivers/hwtracing/coresight/coresight-etm-perf.h | 2 ++
>> 2 files changed, 16 insertions(+)
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c
>> b/drivers/hwtracing/coresight/coresight-etm-perf.c
>> index 17cafa1a7f18..b6f505b50e67 100644
>> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
>> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
>> @@ -499,6 +499,20 @@ static void etm_event_start(struct perf_event
>> *event, int flags)
>> &sink->perf_sink_id_map))
>> goto fail_disable_path;
>> + /*
>> + * In per-cpu mode there are multiple aux buffers and each one has a
>> + * fixed sink, so the hw ID mappings which only need to be
>> emitted once
>> + * for each buffer.
>> + *
>> + * But in per-thread mode there is only a single buffer which can be
>> + * written to from any sink with potentially overlapping trace
>> IDs, so
>> + * hw ID mappings need to be re-emitted every time the sink changes.
>> + */
>> + if (event->cpu == -1 && event_data->last_sink_hwid != sink) {
>> + cpumask_clear(&event_data->aux_hwid_done);
>> + event_data->last_sink_hwid = sink;
>> + }
>
> I am wondering if we really need this patch, as we have the sinkid in
> the HWID already ? We would emit the packet for each CPU only once and
> that wouldn't change the HWID ?
>
> Suzuki
>
>
It would be needed for per-thread mode if we didn't have the CPU sample
bit set on AUX records. Because otherwise you wouldn't know when the
process had moved to a new sink with new mappings. But I suppose this is
redundant information now that the CPU bit is set on AUX records so I
can remove this.
I was thinking it might be nice if a tool _only_ wanted to look at HWIDs
then it could do the decode correctly with just that. If we remove this
then tools will always have to set the CPU sample bit, but it's pretty
much required anyway and Perf was already doing it.
James