When tracing etm data of multiple threads on multiple cpus through
perf interface, each cpu has a unique etr_perf_buffer while sharing
the same etr device. There is no guarantee that the last cpu starts
etm tracing also stops last. This makes perf_data check fail.
Fix it by checking etr_buf instead of etr_perf_buffer.
Fixes: 3147da92a8a8 ("coresight: tmc-etr: Allocate and free ETR memory buffers for CPU-wide scenarios")
Signed-off-by: Yabin Cui <[email protected]>
---
v1 -> v2: rename perf_data to perf_buf. Add fixes tag.
---
drivers/hwtracing/coresight/coresight-tmc-etr.c | 6 +++---
drivers/hwtracing/coresight/coresight-tmc.h | 6 +++---
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 17006705287a..90d1548ad268 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1484,7 +1484,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev,
goto out;
}
- if (WARN_ON(drvdata->perf_data != etr_perf)) {
+ if (WARN_ON(drvdata->perf_buf != etr_buf)) {
lost = true;
spin_unlock_irqrestore(&drvdata->spinlock, flags);
goto out;
@@ -1497,7 +1497,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev,
CS_LOCK(drvdata->base);
/* Reset perf specific data */
- drvdata->perf_data = NULL;
+ drvdata->perf_buf = NULL;
spin_unlock_irqrestore(&drvdata->spinlock, flags);
size = etr_buf->len;
@@ -1556,7 +1556,7 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
}
etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf);
- drvdata->perf_data = etr_perf;
+ drvdata->perf_buf = etr_perf->etr_buf;
/*
* No HW configuration is needed if the sink is already in
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
index 1ed50411cc3c..f9a0c95e9ba2 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.h
+++ b/drivers/hwtracing/coresight/coresight-tmc.h
@@ -178,8 +178,8 @@ struct etr_buf {
* device configuration register (DEVID)
* @idr: Holds etr_bufs allocated for this ETR.
* @idr_mutex: Access serialisation for idr.
- * @perf_data: PERF buffer for ETR.
- * @sysfs_data: SYSFS buffer for ETR.
+ * @sysfs_buf: SYSFS buffer for ETR.
+ * @perf_buf: PERF buffer for ETR.
*/
struct tmc_drvdata {
void __iomem *base;
@@ -202,7 +202,7 @@ struct tmc_drvdata {
struct idr idr;
struct mutex idr_mutex;
struct etr_buf *sysfs_buf;
- void *perf_data;
+ struct etr_buf *perf_buf;
};
struct etr_buf_operations {
--
2.23.0.rc1.153.gdeed80330f-goog
Hi Yabin,
On Mon, 12 Aug 2019 at 13:03, Yabin Cui <[email protected]> wrote:
>
> When tracing etm data of multiple threads on multiple cpus through
> perf interface, each cpu has a unique etr_perf_buffer while sharing
> the same etr device. There is no guarantee that the last cpu starts
> etm tracing also stops last. This makes perf_data check fail.
Did you actually see the check fail or is this a theoretical thing?
I'm really perplex here has I have tested this scenario many times
without issues.
>
> Fix it by checking etr_buf instead of etr_perf_buffer.
>
> Fixes: 3147da92a8a8 ("coresight: tmc-etr: Allocate and free ETR memory buffers for CPU-wide scenarios")
> Signed-off-by: Yabin Cui <[email protected]>
> ---
>
> v1 -> v2: rename perf_data to perf_buf. Add fixes tag.
>
> ---
> drivers/hwtracing/coresight/coresight-tmc-etr.c | 6 +++---
> drivers/hwtracing/coresight/coresight-tmc.h | 6 +++---
> 2 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> index 17006705287a..90d1548ad268 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> @@ -1484,7 +1484,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev,
> goto out;
> }
>
> - if (WARN_ON(drvdata->perf_data != etr_perf)) {
> + if (WARN_ON(drvdata->perf_buf != etr_buf)) {
In CPU wide scenarios each perf event (one per CPU) is associated with
an event_data during the setup process. The event_data is the
etr_perf holding a reference to the perf ring buffer for that specific
event along with the etr_buf, regardless of who created the latter.
From there, when the event is installed on a CPU, the csdev for that
CPU is given a reference to the event_data of that event[1]. Before
going further notice how there is a per CPU csdev and event handle to
keep track of event specifics[2]. As such both (per CPU) csdev and
event handle carry the exact same reference to the etr_perf.
When an event is stopped the exact same per CPU references are
taken[3] and later fed to sink->update_buffer(). If
sink->update_buffer() is called on an event (again one per CPU)
function etm_event_start() must have been called and
(drvdata->perf_data == etf_perf) needs to hold.
If that is not the case, an event from a completely different trace
session has been installed on that CPU and that can't happen thanks to
drvdata->id.
As such if you get a condition where (drvdata->perf_buf != etr_buf),
one of two things has happened:
1) Something went seriously wrong and the WARN_ON() did its job.
2) A corner case is manifesting in your test environment that I
haven't provisioned for. If that is the case we need to figure out
exactly what is happening before considering a fix.
Thanks,
Mathieu
[1]. https://elixir.bootlin.com/linux/v5.3-rc4/source/drivers/hwtracing/coresight/coresight-tmc-etr.c#L1559
[2]. https://elixir.bootlin.com/linux/v5.3-rc4/source/drivers/hwtracing/coresight/coresight-etm-perf.c#L298
[3]. https://elixir.bootlin.com/linux/v5.3-rc4/source/drivers/hwtracing/coresight/coresight-etm-perf.c#L348
> lost = true;
> spin_unlock_irqrestore(&drvdata->spinlock, flags);
> goto out;
> @@ -1497,7 +1497,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev,
>
> CS_LOCK(drvdata->base);
> /* Reset perf specific data */
> - drvdata->perf_data = NULL;
> + drvdata->perf_buf = NULL;
> spin_unlock_irqrestore(&drvdata->spinlock, flags);
>
> size = etr_buf->len;
> @@ -1556,7 +1556,7 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
> }
>
> etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf);
> - drvdata->perf_data = etr_perf;
> + drvdata->perf_buf = etr_perf->etr_buf;
>
> /*
> * No HW configuration is needed if the sink is already in
> diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
> index 1ed50411cc3c..f9a0c95e9ba2 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc.h
> +++ b/drivers/hwtracing/coresight/coresight-tmc.h
> @@ -178,8 +178,8 @@ struct etr_buf {
> * device configuration register (DEVID)
> * @idr: Holds etr_bufs allocated for this ETR.
> * @idr_mutex: Access serialisation for idr.
> - * @perf_data: PERF buffer for ETR.
> - * @sysfs_data: SYSFS buffer for ETR.
> + * @sysfs_buf: SYSFS buffer for ETR.
> + * @perf_buf: PERF buffer for ETR.
> */
> struct tmc_drvdata {
> void __iomem *base;
> @@ -202,7 +202,7 @@ struct tmc_drvdata {
> struct idr idr;
> struct mutex idr_mutex;
> struct etr_buf *sysfs_buf;
> - void *perf_data;
> + struct etr_buf *perf_buf;
> };
>
> struct etr_buf_operations {
> --
> 2.23.0.rc1.153.gdeed80330f-goog
>
> Did you actually see the check fail or is this a theoretical thing?
> I'm really perplex here has I have tested this scenario many times
> without issues.
>
I have seen this warning in dmesg output, that's how I find the problem.
> In CPU wide scenarios each perf event (one per CPU) is associated with
> an event_data during the setup process. The event_data is the
> etr_perf holding a reference to the perf ring buffer for that specific
> event along with the etr_buf, regardless of who created the latter.
Agree.
> From there, when the event is installed on a CPU, the csdev for that
> CPU is given a reference to the event_data of that event[1]. Before
> going further notice how there is a per CPU csdev and event handle to
> keep track of event specifics[2]. As such both (per CPU) csdev and
> event handle carry the exact same reference to the etr_perf.
>
On my test device (Pixel 3), there is an ETM device on each cpu, but only
one ETR device for the whole device. So there is only one instance of etr
csdev in the kernel. If multiple cpus are scheduling on etm perf events at
the same time, all of them are trying to set their event_data to the same
etr csdev. And different perf events have different event_data. A warning
situation is as below:
cpu 0
schedule on event A (set etr csdev->perf_data to event_a.etr_perf)
cpu 1
schedule on event B (set etr csdev->perf_data to event_b.etr_perf)
cpu 1
schedule off event B (update buffer, does nothing since csdev->refcnt != 1)
cpu 0
schedule off event A (update buffer, but etr csdev->perf_data check fail)
On Wed, 14 Aug 2019 at 17:51, Yabin Cui <[email protected]> wrote:
>
> > Did you actually see the check fail or is this a theoretical thing?
> > I'm really perplex here has I have tested this scenario many times
> > without issues.
> >
> I have seen this warning in dmesg output, that's how I find the problem.
>
> > In CPU wide scenarios each perf event (one per CPU) is associated with
> > an event_data during the setup process. The event_data is the
> > etr_perf holding a reference to the perf ring buffer for that specific
> > event along with the etr_buf, regardless of who created the latter.
>
> Agree.
>
> > From there, when the event is installed on a CPU, the csdev for that
> > CPU is given a reference to the event_data of that event[1]. Before
> > going further notice how there is a per CPU csdev and event handle to
> > keep track of event specifics[2]. As such both (per CPU) csdev and
> > event handle carry the exact same reference to the etr_perf.
> >
> On my test device (Pixel 3), there is an ETM device on each cpu, but only
> one ETR device for the whole device. So there is only one instance of etr
> csdev in the kernel. If multiple cpus are scheduling on etm perf events at
> the same time, all of them are trying to set their event_data to the same
> etr csdev. And different perf events have different event_data. A warning
> situation is as below:
>
> cpu 0
> schedule on event A (set etr csdev->perf_data to event_a.etr_perf)
>
> cpu 1
> schedule on event B (set etr csdev->perf_data to event_b.etr_perf)
>
You are 100% right and looking at it again this morning it just jumped
at me. I simply can't understand how it did not manifest itself
during all the hammering I did on it.
Please see details in my other (and upcoming) email.
Thanks,
Mathieu
> cpu 1
> schedule off event B (update buffer, does nothing since csdev->refcnt != 1)
>
> cpu 0
> schedule off event A (update buffer, but etr csdev->perf_data check fail)
On Mon, 12 Aug 2019 at 13:03, Yabin Cui <[email protected]> wrote:
>
> When tracing etm data of multiple threads on multiple cpus through
> perf interface, each cpu has a unique etr_perf_buffer while sharing
> the same etr device. There is no guarantee that the last cpu starts
> etm tracing also stops last. This makes perf_data check fail.
>
> Fix it by checking etr_buf instead of etr_perf_buffer.
>
> Fixes: 3147da92a8a8 ("coresight: tmc-etr: Allocate and free ETR memory buffers for CPU-wide scenarios")
> Signed-off-by: Yabin Cui <[email protected]>
> ---
>
> v1 -> v2: rename perf_data to perf_buf. Add fixes tag.
>
> ---
> drivers/hwtracing/coresight/coresight-tmc-etr.c | 6 +++---
> drivers/hwtracing/coresight/coresight-tmc.h | 6 +++---
> 2 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> index 17006705287a..90d1548ad268 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> @@ -1484,7 +1484,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev,
> goto out;
> }
>
> - if (WARN_ON(drvdata->perf_data != etr_perf)) {
> + if (WARN_ON(drvdata->perf_buf != etr_buf)) {
> lost = true;
> spin_unlock_irqrestore(&drvdata->spinlock, flags);
> goto out;
> @@ -1497,7 +1497,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev,
>
> CS_LOCK(drvdata->base);
> /* Reset perf specific data */
> - drvdata->perf_data = NULL;
> + drvdata->perf_buf = NULL;
> spin_unlock_irqrestore(&drvdata->spinlock, flags);
>
> size = etr_buf->len;
> @@ -1556,7 +1556,7 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
> }
>
> etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf);
> - drvdata->perf_data = etr_perf;
> + drvdata->perf_buf = etr_perf->etr_buf;
Ok for the fix. Looking a things again I don't see a need to do the
assignment for each event - this needs to be done only when the device
is assocated with a monitored process. Please move it here [1].
Thanks,
Mathieu
[1]. https://elixir.bootlin.com/linux/v5.3-rc4/source/drivers/hwtracing/coresight/coresight-tmc-etr.c#L1572
>
> /*
> * No HW configuration is needed if the sink is already in
> diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
> index 1ed50411cc3c..f9a0c95e9ba2 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc.h
> +++ b/drivers/hwtracing/coresight/coresight-tmc.h
> @@ -178,8 +178,8 @@ struct etr_buf {
> * device configuration register (DEVID)
> * @idr: Holds etr_bufs allocated for this ETR.
> * @idr_mutex: Access serialisation for idr.
> - * @perf_data: PERF buffer for ETR.
> - * @sysfs_data: SYSFS buffer for ETR.
> + * @sysfs_buf: SYSFS buffer for ETR.
> + * @perf_buf: PERF buffer for ETR.
> */
> struct tmc_drvdata {
> void __iomem *base;
> @@ -202,7 +202,7 @@ struct tmc_drvdata {
> struct idr idr;
> struct mutex idr_mutex;
> struct etr_buf *sysfs_buf;
> - void *perf_data;
> + struct etr_buf *perf_buf;
> };
>
> struct etr_buf_operations {
> --
> 2.23.0.rc1.153.gdeed80330f-goog
>