ARM Neoverse-N2 (#2139208) and Cortex-A710(##2119858) suffers from
an erratum, which when triggered, might cause the TRBE to overwrite
the trace data already collected in FILL mode, in the event of a WRAP.
i.e, the TRBE doesn't stop writing the data, instead wraps to the base
and could write upto 3 cache line size worth trace. Thus, this could
corrupt the trace at the "BASE" pointer.
The workaround is to program the write pointer 256bytes from the
base, such that if the erratum is triggered, it doesn't overwrite
the trace data that was captured. This skipped region could be
padded with ignore packets at the end of the session, so that
the decoder sees a continuous buffer with some padding at the
beginning. The trace data written at the base is considered
lost as the limit could have been in the middle of the perf
ring buffer, and jumping to the "base" is not acceptable.
We set the flags already to indicate that some amount of trace
was lost during the FILL event IRQ. So this is fine.
One important change with the work around is, we program the
TRBBASER_EL1 to current page where we are allowed to write.
Otherwise, it could overwrite a region that may be consumed
by the perf. Towards this, we always make sure that the
"handle->head" and thus the trbe_write is PAGE_SIZE aligned,
so that we can set the BASE to the PAGE base and move the
TRBPTR to the 256bytes offset.
Cc: Mike Leach <[email protected]>
Cc: Mathieu Poirier <[email protected]>
Cc: Anshuman Khandual <[email protected]>
Cc: Leo Yan <[email protected]>
Signed-off-by: Suzuki K Poulose <[email protected]>
---
Change since v1:
- Updated comment with ASCII art
- Add _BYTES suffix for the space to skip for the work around.
---
drivers/hwtracing/coresight/coresight-trbe.c | 144 +++++++++++++++++--
1 file changed, 132 insertions(+), 12 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
index f569010c672b..983dd5039e52 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.c
+++ b/drivers/hwtracing/coresight/coresight-trbe.c
@@ -16,6 +16,7 @@
#define pr_fmt(fmt) DRVNAME ": " fmt
#include <asm/barrier.h>
+#include <asm/cpufeature.h>
#include <asm/cputype.h>
#include "coresight-self-hosted-trace.h"
@@ -84,9 +85,17 @@ struct trbe_buf {
* per TRBE instance, we keep track of the list of errata that
* affects the given instance of the TRBE.
*/
-#define TRBE_ERRATA_MAX 0
+#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE 0
+#define TRBE_ERRATA_MAX 1
+
+/*
+ * Safe limit for the number of bytes that may be overwritten
+ * when the erratum is triggered.
+ */
+#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES 256
static unsigned long trbe_errata_cpucaps[TRBE_ERRATA_MAX] = {
+ [TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
};
/*
@@ -519,10 +528,13 @@ static void trbe_enable_hw(struct trbe_buf *buf)
set_trbe_limit_pointer_enabled(buf->trbe_limit);
}
-static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
+static enum trbe_fault_action trbe_get_fault_act(struct perf_output_handle *handle,
+ u64 trbsr)
{
int ec = get_trbe_ec(trbsr);
int bsc = get_trbe_bsc(trbsr);
+ struct trbe_buf *buf = etm_perf_sink_config(handle);
+ struct trbe_cpudata *cpudata = buf->cpudata;
WARN_ON(is_trbe_running(trbsr));
if (is_trbe_trg(trbsr) || is_trbe_abort(trbsr))
@@ -531,10 +543,16 @@ static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT))
return TRBE_FAULT_ACT_FATAL;
- if (is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) {
- if (get_trbe_write_pointer() == get_trbe_base_pointer())
- return TRBE_FAULT_ACT_WRAP;
- }
+ /*
+ * If the trbe is affected by TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
+ * it might write data after a WRAP event in the fill mode.
+ * Thus the check TRBPTR == TRBBASER will not be honored.
+ */
+ if ((is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) &&
+ (trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) ||
+ get_trbe_write_pointer() == get_trbe_base_pointer()))
+ return TRBE_FAULT_ACT_WRAP;
+
return TRBE_FAULT_ACT_SPURIOUS;
}
@@ -544,6 +562,8 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
{
u64 write;
u64 start_off, end_off;
+ u64 size;
+ u64 overwrite_skip = TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
/*
* If the TRBE has wrapped around the write pointer has
@@ -559,7 +579,18 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
if (WARN_ON_ONCE(end_off < start_off))
return 0;
- return (end_off - start_off);
+
+ size = end_off - start_off;
+ /*
+ * If the TRBE is affected by the following erratum, we must fill
+ * the space we skipped with IGNORE packets. And we are always
+ * guaranteed to have at least a PAGE_SIZE space in the buffer.
+ */
+ if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) &&
+ !WARN_ON(size < overwrite_skip))
+ __trbe_pad_buf(buf, start_off, overwrite_skip);
+
+ return size;
}
static void *arm_trbe_alloc_buffer(struct coresight_device *csdev,
@@ -678,7 +709,7 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
clr_trbe_irq();
isb();
- act = trbe_get_fault_act(status);
+ act = trbe_get_fault_act(handle, status);
/*
* If this was not due to a WRAP event, we have some
* errors and as such buffer is empty.
@@ -702,21 +733,95 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
return size;
}
+
+static int trbe_apply_work_around_before_enable(struct trbe_buf *buf)
+{
+ /*
+ * TRBE_WORKAROUND_OVERWRITE_FILL_MODE causes the TRBE to overwrite a few cache
+ * line size from the "TRBBASER_EL1" in the event of a "FILL".
+ * Thus, we could loose some amount of the trace at the base.
+ *
+ * Before Fix:
+ *
+ * normal-BASE head normal-PTR tail normal-LIMIT
+ * | \/ /
+ * -------------------------------------------------------------
+ * | | |xyzdefghij..|... tuvw| |
+ * -------------------------------------------------------------
+ * / | \
+ * After Fix-> TRBBASER TRBPTR TRBLIMITR.LIMIT
+ *
+ * In the normal course of action, we would set the TRBBASER to the
+ * beginning of the ring-buffer (normal-BASE). But with the erratum,
+ * the TRBE could overwrite the contents at the "normal-BASE", after
+ * hitting the "normal-LIMIT", since it doesn't stop as expected. And
+ * this is wrong. So we must always make sure that the TRBBASER is
+ * within the region [head, head+size].
+ *
+ * Also, we would set the TRBPTR to head (after adjusting for
+ * alignment) at normal-PTR. This would mean that the last few bytes
+ * of the trace (say, "xyz") might overwrite the first few bytes of
+ * trace written ("abc"). More importantly they will appear in what\
+ * userspace sees as the beginning of the trace, which is wrong. We may
+ * not always have space to move the latest trace "xyz" to the correct
+ * order as it must appear beyond the LIMIT. (i.e, [head..head+size].
+ * Thus it is easier to ignore those bytes than to complicate the
+ * driver to move it, assuming that the erratum was triggered and doing
+ * additional checks to see if there is indeed allowed space at
+ * TRBLIMITR.LIMIT.
+ *
+ * To summarize, with the work around:
+ *
+ * - We always align the offset for the next session to PAGE_SIZE
+ * (This is to ensure we can program the TRBBASER to this offset
+ * within the region [head...head+size]).
+ *
+ * - At TRBE enable:
+ * - Set the TRBBASER to the page aligned offset of the current
+ * proposed write offset. (which is guaranteed to be aligned
+ * as above)
+ * - Move the TRBPTR to skip first 256bytes (that might be
+ * overwritten with the erratum). This ensures that the trace
+ * generated in the session is not re-written.
+ *
+ * - At trace collection:
+ * - Pad the 256bytes skipped above again with IGNORE packets.
+ */
+ if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE)) {
+ if (WARN_ON(!IS_ALIGNED(buf->trbe_write, PAGE_SIZE)))
+ return -EINVAL;
+ buf->trbe_hw_base = buf->trbe_write;
+ buf->trbe_write += TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
+ }
+
+ return 0;
+}
+
static int __arm_trbe_enable(struct trbe_buf *buf,
struct perf_output_handle *handle)
{
+ int ret = 0;
+
perf_aux_output_flag(handle, PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
buf->trbe_limit = compute_trbe_buffer_limit(handle);
buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
if (buf->trbe_limit == buf->trbe_base) {
- trbe_stop_and_truncate_event(handle);
- return -ENOSPC;
+ ret = -ENOSPC;
+ goto err;
}
/* Set the base of the TRBE to the buffer base */
buf->trbe_hw_base = buf->trbe_base;
+
+ ret = trbe_apply_work_around_before_enable(buf);
+ if (ret)
+ goto err;
+
*this_cpu_ptr(buf->cpudata->drvdata->handle) = handle;
trbe_enable_hw(buf);
return 0;
+err:
+ trbe_stop_and_truncate_event(handle);
+ return ret;
}
static int arm_trbe_enable(struct coresight_device *csdev, u32 mode, void *data)
@@ -860,7 +965,7 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
if (!is_perf_trbe(handle))
return IRQ_NONE;
- act = trbe_get_fault_act(status);
+ act = trbe_get_fault_act(handle, status);
switch (act) {
case TRBE_FAULT_ACT_WRAP:
truncated = !!trbe_handle_overflow(handle);
@@ -1000,7 +1105,22 @@ static void arm_trbe_probe_cpu(void *info)
}
trbe_check_errata(cpudata);
- cpudata->trbe_align = cpudata->trbe_hw_align;
+ /*
+ * If the TRBE is affected by erratum TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
+ * we must always program the TBRPTR_EL1, 256bytes from a page
+ * boundary, with TRBBASER_EL1 set to the page, to prevent
+ * TRBE over-writing 256bytes at TRBBASER_EL1 on FILL event.
+ *
+ * Thus make sure we always align our write pointer to a PAGE_SIZE,
+ * which also guarantees that we have at least a PAGE_SIZE space in
+ * the buffer (TRBLIMITR is PAGE aligned) and thus we can skip
+ * the required bytes at the base.
+ */
+ if (trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE))
+ cpudata->trbe_align = PAGE_SIZE;
+ else
+ cpudata->trbe_align = cpudata->trbe_hw_align;
+
cpudata->trbe_flag = get_trbe_flag_update(trbidr);
cpudata->cpu = cpu;
cpudata->drvdata = drvdata;
--
2.24.1
On 9/21/21 7:11 PM, Suzuki K Poulose wrote:
> ARM Neoverse-N2 (#2139208) and Cortex-A710(##2119858) suffers from
> an erratum, which when triggered, might cause the TRBE to overwrite
> the trace data already collected in FILL mode, in the event of a WRAP.
> i.e, the TRBE doesn't stop writing the data, instead wraps to the base
> and could write upto 3 cache line size worth trace. Thus, this could
> corrupt the trace at the "BASE" pointer.
>
> The workaround is to program the write pointer 256bytes from the
3 cache lines = 256 bytes on all implementation which might have TRBE ?
OR this skid bytes should be derived from the platform cache line size
instead.
> base, such that if the erratum is triggered, it doesn't overwrite
> the trace data that was captured. This skipped region could be
> padded with ignore packets at the end of the session, so that
> the decoder sees a continuous buffer with some padding at the
> beginning. The trace data written at the base is considered
> lost as the limit could have been in the middle of the perf
> ring buffer, and jumping to the "base" is not acceptable.
> We set the flags already to indicate that some amount of trace
> was lost during the FILL event IRQ. So this is fine.
Via PERF_AUX_FLAG_TRUNCATED ? Should be specified here to be clear.
>
> One important change with the work around is, we program the
> TRBBASER_EL1 to current page where we are allowed to write.
> Otherwise, it could overwrite a region that may be consumed
> by the perf. Towards this, we always make sure that the
> "handle->head" and thus the trbe_write is PAGE_SIZE aligned,
> so that we can set the BASE to the PAGE base and move the
> TRBPTR to the 256bytes offset.
>
> Cc: Mike Leach <[email protected]>
> Cc: Mathieu Poirier <[email protected]>
> Cc: Anshuman Khandual <[email protected]>
> Cc: Leo Yan <[email protected]>
> Signed-off-by: Suzuki K Poulose <[email protected]>
> ---
> Change since v1:
> - Updated comment with ASCII art
> - Add _BYTES suffix for the space to skip for the work around.
> ---
> drivers/hwtracing/coresight/coresight-trbe.c | 144 +++++++++++++++++--
> 1 file changed, 132 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
> index f569010c672b..983dd5039e52 100644
> --- a/drivers/hwtracing/coresight/coresight-trbe.c
> +++ b/drivers/hwtracing/coresight/coresight-trbe.c
> @@ -16,6 +16,7 @@
> #define pr_fmt(fmt) DRVNAME ": " fmt
>
> #include <asm/barrier.h>
> +#include <asm/cpufeature.h>
> #include <asm/cputype.h>
>
> #include "coresight-self-hosted-trace.h"
> @@ -84,9 +85,17 @@ struct trbe_buf {
> * per TRBE instance, we keep track of the list of errata that
> * affects the given instance of the TRBE.
> */
> -#define TRBE_ERRATA_MAX 0
> +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE 0
> +#define TRBE_ERRATA_MAX 1
> +
> +/*
> + * Safe limit for the number of bytes that may be overwritten
> + * when the erratum is triggered.
> + */
> +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES 256
As mentioned earlier, does it depend on the platform cache line size ?
Otherwise if the skip bytes is something platform independent, should
be mentioned here in a comment.
>
> static unsigned long trbe_errata_cpucaps[TRBE_ERRATA_MAX] = {
> + [TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
> };
>
> /*
> @@ -519,10 +528,13 @@ static void trbe_enable_hw(struct trbe_buf *buf)
> set_trbe_limit_pointer_enabled(buf->trbe_limit);
> }
>
> -static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
> +static enum trbe_fault_action trbe_get_fault_act(struct perf_output_handle *handle,
> + u64 trbsr)
> {
> int ec = get_trbe_ec(trbsr);
> int bsc = get_trbe_bsc(trbsr);
> + struct trbe_buf *buf = etm_perf_sink_config(handle);
> + struct trbe_cpudata *cpudata = buf->cpudata;
Passing down the perf handle to derive trbe_cpudata seems to be right.
>
> WARN_ON(is_trbe_running(trbsr));
> if (is_trbe_trg(trbsr) || is_trbe_abort(trbsr))
> @@ -531,10 +543,16 @@ static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
> if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT))
> return TRBE_FAULT_ACT_FATAL;
>
> - if (is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) {
> - if (get_trbe_write_pointer() == get_trbe_base_pointer())
> - return TRBE_FAULT_ACT_WRAP;
> - }
> + /*
> + * If the trbe is affected by TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
> + * it might write data after a WRAP event in the fill mode.
> + * Thus the check TRBPTR == TRBBASER will not be honored.
> + */
Needs bit formatting/alignment cleanup.
> + if ((is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) &&
> + (trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) ||
> + get_trbe_write_pointer() == get_trbe_base_pointer()))
> + return TRBE_FAULT_ACT_WRAP;
> +
Right, TRBE without the errata should continue to have the write
pointer = base pointer check. Could all TRBE errata checks like
the following be shortened (without the workaround index) for
better readability ? But not something very important.
trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE)
> return TRBE_FAULT_ACT_SPURIOUS;
> }
>
> @@ -544,6 +562,8 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
> {
> u64 write;
> u64 start_off, end_off;
> + u64 size;
> + u64 overwrite_skip = TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
>
> /*
> * If the TRBE has wrapped around the write pointer has
> @@ -559,7 +579,18 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
>
> if (WARN_ON_ONCE(end_off < start_off))
> return 0;
> - return (end_off - start_off);
> +
> + size = end_off - start_off;
> + /*
> + * If the TRBE is affected by the following erratum, we must fill
> + * the space we skipped with IGNORE packets. And we are always
> + * guaranteed to have at least a PAGE_SIZE space in the buffer.
> + */
> + if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) &&
> + !WARN_ON(size < overwrite_skip))
> + __trbe_pad_buf(buf, start_off, overwrite_skip);
> +
> + return size;
> }
>
> static void *arm_trbe_alloc_buffer(struct coresight_device *csdev,
> @@ -678,7 +709,7 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
> clr_trbe_irq();
> isb();
>
> - act = trbe_get_fault_act(status);
> + act = trbe_get_fault_act(handle, status);
> /*
> * If this was not due to a WRAP event, we have some
> * errors and as such buffer is empty.
> @@ -702,21 +733,95 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
> return size;
> }
>
> +
> +static int trbe_apply_work_around_before_enable(struct trbe_buf *buf)
> +{
> + /*
> + * TRBE_WORKAROUND_OVERWRITE_FILL_MODE causes the TRBE to overwrite a few cache
few cache lines = 3 cache lines ?
> + * line size from the "TRBBASER_EL1" in the event of a "FILL".
> + * Thus, we could loose some amount of the trace at the base.
> + *
> + * Before Fix:
> + *
> + * normal-BASE head normal-PTR tail normal-LIMIT
> + * | \/ /
> + * -------------------------------------------------------------
> + * | | |xyzdefghij..|... tuvw| |
> + * -------------------------------------------------------------
> + * / | \
> + * After Fix-> TRBBASER TRBPTR TRBLIMITR.LIMIT
> + *
> + * In the normal course of action, we would set the TRBBASER to the
> + * beginning of the ring-buffer (normal-BASE). But with the erratum,
> + * the TRBE could overwrite the contents at the "normal-BASE", after
> + * hitting the "normal-LIMIT", since it doesn't stop as expected. And
> + * this is wrong. So we must always make sure that the TRBBASER is
> + * within the region [head, head+size].
> + *
> + * Also, we would set the TRBPTR to head (after adjusting for
> + * alignment) at normal-PTR. This would mean that the last few bytes
> + * of the trace (say, "xyz") might overwrite the first few bytes of
> + * trace written ("abc"). More importantly they will appear in what\
> + * userspace sees as the beginning of the trace, which is wrong. We may
> + * not always have space to move the latest trace "xyz" to the correct
> + * order as it must appear beyond the LIMIT. (i.e, [head..head+size].
> + * Thus it is easier to ignore those bytes than to complicate the
> + * driver to move it, assuming that the erratum was triggered and doing
> + * additional checks to see if there is indeed allowed space at
> + * TRBLIMITR.LIMIT.
> + *
> + * To summarize, with the work around:
> + *
> + * - We always align the offset for the next session to PAGE_SIZE
> + * (This is to ensure we can program the TRBBASER to this offset
> + * within the region [head...head+size]).
> + *
> + * - At TRBE enable:
> + * - Set the TRBBASER to the page aligned offset of the current
> + * proposed write offset. (which is guaranteed to be aligned
> + * as above)
> + * - Move the TRBPTR to skip first 256bytes (that might be
> + * overwritten with the erratum). This ensures that the trace
> + * generated in the session is not re-written.
> + *
> + * - At trace collection:
> + * - Pad the 256bytes skipped above again with IGNORE packets.
> + */
> + if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE)) {
> + if (WARN_ON(!IS_ALIGNED(buf->trbe_write, PAGE_SIZE)))
> + return -EINVAL;
> + buf->trbe_hw_base = buf->trbe_write;
> + buf->trbe_write += TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
> + }
> +
> + return 0;
> +}
> +
> static int __arm_trbe_enable(struct trbe_buf *buf,
> struct perf_output_handle *handle)
> {
> + int ret = 0;
> +
> perf_aux_output_flag(handle, PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
> buf->trbe_limit = compute_trbe_buffer_limit(handle);
> buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
> if (buf->trbe_limit == buf->trbe_base) {
> - trbe_stop_and_truncate_event(handle);
> - return -ENOSPC;
> + ret = -ENOSPC;
> + goto err;
> }
> /* Set the base of the TRBE to the buffer base */
> buf->trbe_hw_base = buf->trbe_base;
> +
> + ret = trbe_apply_work_around_before_enable(buf);
> + if (ret)
> + goto err;
> +
> *this_cpu_ptr(buf->cpudata->drvdata->handle) = handle;
> trbe_enable_hw(buf);
> return 0;
> +err:
> + trbe_stop_and_truncate_event(handle);
> + return ret;
> }
>
> static int arm_trbe_enable(struct coresight_device *csdev, u32 mode, void *data)
> @@ -860,7 +965,7 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
> if (!is_perf_trbe(handle))
> return IRQ_NONE;
>
> - act = trbe_get_fault_act(status);
> + act = trbe_get_fault_act(handle, status);
> switch (act) {
> case TRBE_FAULT_ACT_WRAP:
> truncated = !!trbe_handle_overflow(handle);
> @@ -1000,7 +1105,22 @@ static void arm_trbe_probe_cpu(void *info)
> }
>
> trbe_check_errata(cpudata);
> - cpudata->trbe_align = cpudata->trbe_hw_align;
> + /*
> + * If the TRBE is affected by erratum TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
> + * we must always program the TBRPTR_EL1, 256bytes from a page
> + * boundary, with TRBBASER_EL1 set to the page, to prevent
> + * TRBE over-writing 256bytes at TRBBASER_EL1 on FILL event.
> + *
> + * Thus make sure we always align our write pointer to a PAGE_SIZE,
> + * which also guarantees that we have at least a PAGE_SIZE space in
> + * the buffer (TRBLIMITR is PAGE aligned) and thus we can skip
> + * the required bytes at the base.
> + */
> + if (trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE))
> + cpudata->trbe_align = PAGE_SIZE;
> + else
> + cpudata->trbe_align = cpudata->trbe_hw_align;
> +
But like trbe_apply_work_around_before_enable(), trbe_align assignment
should also be wrapped inside a new helper which should contain these
comments and conditional block. Because it makes sense to have errata
work arounds in the leaf level helper functions, rather than TRBE core
operations.
> cpudata->trbe_flag = get_trbe_flag_update(trbidr);
> cpudata->cpu = cpu;
> cpudata->drvdata = drvdata;
>
On 23/09/2021 07:13, Anshuman Khandual wrote:
>
>
> On 9/21/21 7:11 PM, Suzuki K Poulose wrote:
>> ARM Neoverse-N2 (#2139208) and Cortex-A710(##2119858) suffers from
>> an erratum, which when triggered, might cause the TRBE to overwrite
>> the trace data already collected in FILL mode, in the event of a WRAP.
>> i.e, the TRBE doesn't stop writing the data, instead wraps to the base
>> and could write upto 3 cache line size worth trace. Thus, this could
>> corrupt the trace at the "BASE" pointer.
>>
>> The workaround is to program the write pointer 256bytes from the
>
> 3 cache lines = 256 bytes on all implementation which might have TRBE ?
> OR this skid bytes should be derived from the platform cache line size
> instead.
256bytes is the aligned (to the power of 2) value for the safe guard.
Not 3 cache lines. Ideally, if there is another CPU that has larger
cache line size, affected by the erratum, yes, we must do that.
But for now this is sufficient.
>
>> base, such that if the erratum is triggered, it doesn't overwrite
>> the trace data that was captured. This skipped region could be
>> padded with ignore packets at the end of the session, so that
>> the decoder sees a continuous buffer with some padding at the
>> beginning. The trace data written at the base is considered
>> lost as the limit could have been in the middle of the perf
>> ring buffer, and jumping to the "base" is not acceptable.
>> We set the flags already to indicate that some amount of trace
>> was lost during the FILL event IRQ. So this is fine.
>
> Via PERF_AUX_FLAG_TRUNCATED ? Should be specified here to be clear.
Please note that setting the flag is not a side effect of the
work around. And as such I don't think this needs to be mentioned
here. e.g, we changed this to COLLISION recently for WRAP events.
It makes sense to keep the details to the driver.
>
>>
>> One important change with the work around is, we program the
>> TRBBASER_EL1 to current page where we are allowed to write.
>> Otherwise, it could overwrite a region that may be consumed
>> by the perf. Towards this, we always make sure that the
>> "handle->head" and thus the trbe_write is PAGE_SIZE aligned,
>> so that we can set the BASE to the PAGE base and move the
>> TRBPTR to the 256bytes offset.
>>
>> Cc: Mike Leach <[email protected]>
>> Cc: Mathieu Poirier <[email protected]>
>> Cc: Anshuman Khandual <[email protected]>
>> Cc: Leo Yan <[email protected]>
>> Signed-off-by: Suzuki K Poulose <[email protected]>
>> ---
>> Change since v1:
>> - Updated comment with ASCII art
>> - Add _BYTES suffix for the space to skip for the work around.
>> ---
>> drivers/hwtracing/coresight/coresight-trbe.c | 144 +++++++++++++++++--
>> 1 file changed, 132 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
>> index f569010c672b..983dd5039e52 100644
>> --- a/drivers/hwtracing/coresight/coresight-trbe.c
>> +++ b/drivers/hwtracing/coresight/coresight-trbe.c
>> @@ -16,6 +16,7 @@
>> #define pr_fmt(fmt) DRVNAME ": " fmt
>>
>> #include <asm/barrier.h>
>> +#include <asm/cpufeature.h>
>> #include <asm/cputype.h>
>>
>> #include "coresight-self-hosted-trace.h"
>> @@ -84,9 +85,17 @@ struct trbe_buf {
>> * per TRBE instance, we keep track of the list of errata that
>> * affects the given instance of the TRBE.
>> */
>> -#define TRBE_ERRATA_MAX 0
>> +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE 0
>> +#define TRBE_ERRATA_MAX 1
>> +
>> +/*
>> + * Safe limit for the number of bytes that may be overwritten
>> + * when the erratum is triggered.
>> + */
>> +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES 256
>
> As mentioned earlier, does it depend on the platform cache line size ?
> Otherwise if the skip bytes is something platform independent, should
> be mentioned here in a comment.
I could add in a comment.
>
>>
>> static unsigned long trbe_errata_cpucaps[TRBE_ERRATA_MAX] = {
>> + [TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
>> };
>>
>> /*
>> @@ -519,10 +528,13 @@ static void trbe_enable_hw(struct trbe_buf *buf)
>> set_trbe_limit_pointer_enabled(buf->trbe_limit);
>> }
>>
>> -static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
>> +static enum trbe_fault_action trbe_get_fault_act(struct perf_output_handle *handle,
>> + u64 trbsr)
>> {
>> int ec = get_trbe_ec(trbsr);
>> int bsc = get_trbe_bsc(trbsr);
>> + struct trbe_buf *buf = etm_perf_sink_config(handle);
>> + struct trbe_cpudata *cpudata = buf->cpudata;
>
> Passing down the perf handle to derive trbe_cpudata seems to be right.
>
>>
>> WARN_ON(is_trbe_running(trbsr));
>> if (is_trbe_trg(trbsr) || is_trbe_abort(trbsr))
>> @@ -531,10 +543,16 @@ static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
>> if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT))
>> return TRBE_FAULT_ACT_FATAL;
>>
>> - if (is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) {
>> - if (get_trbe_write_pointer() == get_trbe_base_pointer())
>> - return TRBE_FAULT_ACT_WRAP;
>> - }
>> + /*
>> + * If the trbe is affected by TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
>> + * it might write data after a WRAP event in the fill mode.
>> + * Thus the check TRBPTR == TRBBASER will not be honored.
>> + */
>
> Needs bit formatting/alignment cleanup.
>
>> + if ((is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) &&
>> + (trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) ||
>> + get_trbe_write_pointer() == get_trbe_base_pointer()))
>> + return TRBE_FAULT_ACT_WRAP;
>> +
>
> Right, TRBE without the errata should continue to have the write
> pointer = base pointer check. Could all TRBE errata checks like
> the following be shortened (without the workaround index) for
> better readability ? But not something very important.
>
> trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE)
Do you mean something like :
trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) ->
trbe_may_overwrite_in_fill_mode(cpudata) ?
>
>
>> return TRBE_FAULT_ACT_SPURIOUS;
>> }
>>
>> @@ -544,6 +562,8 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
>> {
>> u64 write;
>> u64 start_off, end_off;
>> + u64 size;
>> + u64 overwrite_skip = TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
>>
>> /*
>> * If the TRBE has wrapped around the write pointer has
>> @@ -559,7 +579,18 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
>>
>> if (WARN_ON_ONCE(end_off < start_off))
>> return 0;
>> - return (end_off - start_off);
>> +
>> + size = end_off - start_off;
>> + /*
>> + * If the TRBE is affected by the following erratum, we must fill
>> + * the space we skipped with IGNORE packets. And we are always
>> + * guaranteed to have at least a PAGE_SIZE space in the buffer.
>> + */
>> + if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) &&
>> + !WARN_ON(size < overwrite_skip))
>> + __trbe_pad_buf(buf, start_off, overwrite_skip);
>> +
>> + return size;
>> }
>>
>> static void *arm_trbe_alloc_buffer(struct coresight_device *csdev,
>> @@ -678,7 +709,7 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
>> clr_trbe_irq();
>> isb();
>>
>> - act = trbe_get_fault_act(status);
>> + act = trbe_get_fault_act(handle, status);
>> /*
>> * If this was not due to a WRAP event, we have some
>> * errors and as such buffer is empty.
>> @@ -702,21 +733,95 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
>> return size;
>> }
>>
>> +
>> +static int trbe_apply_work_around_before_enable(struct trbe_buf *buf)
>> +{
>> + /*
>> + * TRBE_WORKAROUND_OVERWRITE_FILL_MODE causes the TRBE to overwrite a few cache
>
> few cache lines = 3 cache lines ?
Yes, upto 3.
>
>> + * line size from the "TRBBASER_EL1" in the event of a "FILL".
>> + * Thus, we could loose some amount of the trace at the base.
>> + *
>> + * Before Fix:
>> + *
>> + * normal-BASE head normal-PTR tail normal-LIMIT
>> + * | \/ /
>> + * -------------------------------------------------------------
>> + * | | |xyzdefghij..|... tuvw| |
>> + * -------------------------------------------------------------
>> + * / | \
>> + * After Fix-> TRBBASER TRBPTR TRBLIMITR.LIMIT
>> + *
>> + * In the normal course of action, we would set the TRBBASER to the
>> + * beginning of the ring-buffer (normal-BASE). But with the erratum,
>> + * the TRBE could overwrite the contents at the "normal-BASE", after
>> + * hitting the "normal-LIMIT", since it doesn't stop as expected. And
>> + * this is wrong. So we must always make sure that the TRBBASER is
>> + * within the region [head, head+size].
>> + *
>> + * Also, we would set the TRBPTR to head (after adjusting for
>> + * alignment) at normal-PTR. This would mean that the last few bytes
>> + * of the trace (say, "xyz") might overwrite the first few bytes of
>> + * trace written ("abc"). More importantly they will appear in what\
>> + * userspace sees as the beginning of the trace, which is wrong. We may
>> + * not always have space to move the latest trace "xyz" to the correct
>> + * order as it must appear beyond the LIMIT. (i.e, [head..head+size].
>> + * Thus it is easier to ignore those bytes than to complicate the
>> + * driver to move it, assuming that the erratum was triggered and doing
>> + * additional checks to see if there is indeed allowed space at
>> + * TRBLIMITR.LIMIT.
>> + *
>> + * To summarize, with the work around:
>> + *
>> + * - We always align the offset for the next session to PAGE_SIZE
>> + * (This is to ensure we can program the TRBBASER to this offset
>> + * within the region [head...head+size]).
>> + *
>> + * - At TRBE enable:
>> + * - Set the TRBBASER to the page aligned offset of the current
>> + * proposed write offset. (which is guaranteed to be aligned
>> + * as above)
>> + * - Move the TRBPTR to skip first 256bytes (that might be
>> + * overwritten with the erratum). This ensures that the trace
>> + * generated in the session is not re-written.
>> + *
>> + * - At trace collection:
>> + * - Pad the 256bytes skipped above again with IGNORE packets.
>> + */
>> + if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE)) {
>> + if (WARN_ON(!IS_ALIGNED(buf->trbe_write, PAGE_SIZE)))
>> + return -EINVAL;
>> + buf->trbe_hw_base = buf->trbe_write;
>> + buf->trbe_write += TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static int __arm_trbe_enable(struct trbe_buf *buf,
>> struct perf_output_handle *handle)
>> {
>> + int ret = 0;
>> +
>> perf_aux_output_flag(handle, PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
>> buf->trbe_limit = compute_trbe_buffer_limit(handle);
>> buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
>> if (buf->trbe_limit == buf->trbe_base) {
>> - trbe_stop_and_truncate_event(handle);
>> - return -ENOSPC;
>> + ret = -ENOSPC;
>> + goto err;
>> }
>> /* Set the base of the TRBE to the buffer base */
>> buf->trbe_hw_base = buf->trbe_base;
>> +
>> + ret = trbe_apply_work_around_before_enable(buf);
>> + if (ret)
>> + goto err;
>> +
>> *this_cpu_ptr(buf->cpudata->drvdata->handle) = handle;
>> trbe_enable_hw(buf);
>> return 0;
>> +err:
>> + trbe_stop_and_truncate_event(handle);
>> + return ret;
>> }
>>
>> static int arm_trbe_enable(struct coresight_device *csdev, u32 mode, void *data)
>> @@ -860,7 +965,7 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
>> if (!is_perf_trbe(handle))
>> return IRQ_NONE;
>>
>> - act = trbe_get_fault_act(status);
>> + act = trbe_get_fault_act(handle, status);
>> switch (act) {
>> case TRBE_FAULT_ACT_WRAP:
>> truncated = !!trbe_handle_overflow(handle);
>> @@ -1000,7 +1105,22 @@ static void arm_trbe_probe_cpu(void *info)
>> }
>>
>> trbe_check_errata(cpudata);
>> - cpudata->trbe_align = cpudata->trbe_hw_align;
>> + /*
>> + * If the TRBE is affected by erratum TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
>> + * we must always program the TBRPTR_EL1, 256bytes from a page
>> + * boundary, with TRBBASER_EL1 set to the page, to prevent
>> + * TRBE over-writing 256bytes at TRBBASER_EL1 on FILL event.
>> + *
>> + * Thus make sure we always align our write pointer to a PAGE_SIZE,
>> + * which also guarantees that we have at least a PAGE_SIZE space in
>> + * the buffer (TRBLIMITR is PAGE aligned) and thus we can skip
>> + * the required bytes at the base.
>> + */
>> + if (trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE))
>> + cpudata->trbe_align = PAGE_SIZE;
>> + else
>> + cpudata->trbe_align = cpudata->trbe_hw_align;
>> +
>
> But like trbe_apply_work_around_before_enable(), trbe_align assignment
> should also be wrapped inside a new helper which should contain these
> comments and conditional block. Because it makes sense to have errata
> work arounds in the leaf level helper functions, rather than TRBE core
> operations.
That would imply we re-initialize the trbe_align in the new helper after
setting the value here for all other unaffected TRBEs. I would rather
leave it as it is, until we have more work arounds that touch this area.
This is one of code called per TRBE instance.
Suzuki
On 9/28/21 4:10 PM, Suzuki K Poulose wrote:
> On 23/09/2021 07:13, Anshuman Khandual wrote:
>>
>>
>> On 9/21/21 7:11 PM, Suzuki K Poulose wrote:
>>> ARM Neoverse-N2 (#2139208) and Cortex-A710(##2119858) suffers from
>>> an erratum, which when triggered, might cause the TRBE to overwrite
>>> the trace data already collected in FILL mode, in the event of a WRAP.
>>> i.e, the TRBE doesn't stop writing the data, instead wraps to the base
>>> and could write upto 3 cache line size worth trace. Thus, this could
>>> corrupt the trace at the "BASE" pointer.
>>>
>>> The workaround is to program the write pointer 256bytes from the
>>
>> 3 cache lines = 256 bytes on all implementation which might have TRBE ?
>> OR this skid bytes should be derived from the platform cache line size
>> instead.
>
> 256bytes is the aligned (to the power of 2) value for the safe guard.
> Not 3 cache lines. Ideally, if there is another CPU that has larger
> cache line size, affected by the erratum, yes, we must do that.
> But for now this is sufficient.
Okay.
>
>>
>>> base, such that if the erratum is triggered, it doesn't overwrite
>>> the trace data that was captured. This skipped region could be
>>> padded with ignore packets at the end of the session, so that
>>> the decoder sees a continuous buffer with some padding at the
>>> beginning. The trace data written at the base is considered
>>> lost as the limit could have been in the middle of the perf
>>> ring buffer, and jumping to the "base" is not acceptable.
>>> We set the flags already to indicate that some amount of trace
>>> was lost during the FILL event IRQ. So this is fine.
>>
>> Via PERF_AUX_FLAG_TRUNCATED ? Should be specified here to be clear.
>
> Please note that setting the flag is not a side effect of the
> work around. And as such I don't think this needs to be mentioned
> here. e.g, we changed this to COLLISION recently for WRAP events.
> It makes sense to keep the details to the driver.
Okay.
>
>>
>>>
>
>>> One important change with the work around is, we program the
>>> TRBBASER_EL1 to current page where we are allowed to write.
>>> Otherwise, it could overwrite a region that may be consumed
>>> by the perf. Towards this, we always make sure that the
>>> "handle->head" and thus the trbe_write is PAGE_SIZE aligned,
>>> so that we can set the BASE to the PAGE base and move the
>>> TRBPTR to the 256bytes offset.
>>>
>>> Cc: Mike Leach <[email protected]>
>>> Cc: Mathieu Poirier <[email protected]>
>>> Cc: Anshuman Khandual <[email protected]>
>>> Cc: Leo Yan <[email protected]>
>>> Signed-off-by: Suzuki K Poulose <[email protected]>
>>> ---
>>> Change since v1:
>>> - Updated comment with ASCII art
>>> - Add _BYTES suffix for the space to skip for the work around.
>>> ---
>>> drivers/hwtracing/coresight/coresight-trbe.c | 144 +++++++++++++++++--
>>> 1 file changed, 132 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
>>> index f569010c672b..983dd5039e52 100644
>>> --- a/drivers/hwtracing/coresight/coresight-trbe.c
>>> +++ b/drivers/hwtracing/coresight/coresight-trbe.c
>>> @@ -16,6 +16,7 @@
>>> #define pr_fmt(fmt) DRVNAME ": " fmt
>>> #include <asm/barrier.h>
>>> +#include <asm/cpufeature.h>
>>> #include <asm/cputype.h>
>>> #include "coresight-self-hosted-trace.h"
>>> @@ -84,9 +85,17 @@ struct trbe_buf {
>>> * per TRBE instance, we keep track of the list of errata that
>>> * affects the given instance of the TRBE.
>>> */
>>> -#define TRBE_ERRATA_MAX 0
>>> +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE 0
>>> +#define TRBE_ERRATA_MAX 1
>>> +
>>> +/*
>>> + * Safe limit for the number of bytes that may be overwritten
>>> + * when the erratum is triggered.
>>> + */
>>> +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES 256
>>
>> As mentioned earlier, does it depend on the platform cache line size ?
>> Otherwise if the skip bytes is something platform independent, should
>> be mentioned here in a comment.
>
> I could add in a comment.
>
>>
>>> static unsigned long trbe_errata_cpucaps[TRBE_ERRATA_MAX] = {
>>> + [TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
>>> };
>>> /*
>>> @@ -519,10 +528,13 @@ static void trbe_enable_hw(struct trbe_buf *buf)
>>> set_trbe_limit_pointer_enabled(buf->trbe_limit);
>>> }
>>> -static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
>>> +static enum trbe_fault_action trbe_get_fault_act(struct perf_output_handle *handle,
>>> + u64 trbsr)
>>> {
>>> int ec = get_trbe_ec(trbsr);
>>> int bsc = get_trbe_bsc(trbsr);
>>> + struct trbe_buf *buf = etm_perf_sink_config(handle);
>>> + struct trbe_cpudata *cpudata = buf->cpudata;
>>
>> Passing down the perf handle to derive trbe_cpudata seems to be right.
>>
>>> WARN_ON(is_trbe_running(trbsr));
>>> if (is_trbe_trg(trbsr) || is_trbe_abort(trbsr))
>>> @@ -531,10 +543,16 @@ static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
>>> if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT))
>>> return TRBE_FAULT_ACT_FATAL;
>>> - if (is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) {
>>> - if (get_trbe_write_pointer() == get_trbe_base_pointer())
>>> - return TRBE_FAULT_ACT_WRAP;
>>> - }
>>> + /*
>>> + * If the trbe is affected by TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
>>> + * it might write data after a WRAP event in the fill mode.
>>> + * Thus the check TRBPTR == TRBBASER will not be honored.
>>> + */
>>
>> Needs bit formatting/alignment cleanup.
>>
>>> + if ((is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) &&
>>> + (trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) ||
>>> + get_trbe_write_pointer() == get_trbe_base_pointer()))
>>> + return TRBE_FAULT_ACT_WRAP;
>>> +
>>
>> Right, TRBE without the errata should continue to have the write
>> pointer = base pointer check. Could all TRBE errata checks like
>> the following be shortened (without the workaround index) for
>> better readability ? But not something very important.
>>
>> trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE)
>
> Do you mean something like :
>
> trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) ->
> trbe_may_overwrite_in_fill_mode(cpudata) ?
Right, something similar which absorbs the work around index in
its name itself.
>
>
>>
>>
>>> return TRBE_FAULT_ACT_SPURIOUS;
>>> }
>>> @@ -544,6 +562,8 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
>>> {
>>> u64 write;
>>> u64 start_off, end_off;
>>> + u64 size;
>>> + u64 overwrite_skip = TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
>>> /*
>>> * If the TRBE has wrapped around the write pointer has
>>> @@ -559,7 +579,18 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
>>> if (WARN_ON_ONCE(end_off < start_off))
>>> return 0;
>>> - return (end_off - start_off);
>>> +
>>> + size = end_off - start_off;
>>> + /*
>>> + * If the TRBE is affected by the following erratum, we must fill
>>> + * the space we skipped with IGNORE packets. And we are always
>>> + * guaranteed to have at least a PAGE_SIZE space in the buffer.
>>> + */
>>> + if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) &&
>>> + !WARN_ON(size < overwrite_skip))
>>> + __trbe_pad_buf(buf, start_off, overwrite_skip);
>>> +
>>> + return size;
>>> }
>>> static void *arm_trbe_alloc_buffer(struct coresight_device *csdev,
>>> @@ -678,7 +709,7 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
>>> clr_trbe_irq();
>>> isb();
>>> - act = trbe_get_fault_act(status);
>>> + act = trbe_get_fault_act(handle, status);
>>> /*
>>> * If this was not due to a WRAP event, we have some
>>> * errors and as such buffer is empty.
>>> @@ -702,21 +733,95 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
>>> return size;
>>> }
>>> +
>>> +static int trbe_apply_work_around_before_enable(struct trbe_buf *buf)
>>> +{
>>> + /*
>>> + * TRBE_WORKAROUND_OVERWRITE_FILL_MODE causes the TRBE to overwrite a few cache
>>
>> few cache lines = 3 cache lines ?
>
> Yes, upto 3.
>
>>
>>> + * line size from the "TRBBASER_EL1" in the event of a "FILL".
>>> + * Thus, we could loose some amount of the trace at the base.
>>> + *
>>> + * Before Fix:
>>> + *
>>> + * normal-BASE head normal-PTR tail normal-LIMIT
>>> + * | \/ /
>>> + * -------------------------------------------------------------
>>> + * | | |xyzdefghij..|... tuvw| |
>>> + * -------------------------------------------------------------
>>> + * / | \
>>> + * After Fix-> TRBBASER TRBPTR TRBLIMITR.LIMIT
>>> + *
>>> + * In the normal course of action, we would set the TRBBASER to the
>>> + * beginning of the ring-buffer (normal-BASE). But with the erratum,
>>> + * the TRBE could overwrite the contents at the "normal-BASE", after
>>> + * hitting the "normal-LIMIT", since it doesn't stop as expected. And
>>> + * this is wrong. So we must always make sure that the TRBBASER is
>>> + * within the region [head, head+size].
>>> + *
>>> + * Also, we would set the TRBPTR to head (after adjusting for
>>> + * alignment) at normal-PTR. This would mean that the last few bytes
>>> + * of the trace (say, "xyz") might overwrite the first few bytes of
>>> + * trace written ("abc"). More importantly they will appear in what\
>>> + * userspace sees as the beginning of the trace, which is wrong. We may
>>> + * not always have space to move the latest trace "xyz" to the correct
>>> + * order as it must appear beyond the LIMIT. (i.e, [head..head+size].
>>> + * Thus it is easier to ignore those bytes than to complicate the
>>> + * driver to move it, assuming that the erratum was triggered and doing
>>> + * additional checks to see if there is indeed allowed space at
>>> + * TRBLIMITR.LIMIT.
>>> + *
>>> + * To summarize, with the work around:
>>> + *
>>> + * - We always align the offset for the next session to PAGE_SIZE
>>> + * (This is to ensure we can program the TRBBASER to this offset
>>> + * within the region [head...head+size]).
>>> + *
>>> + * - At TRBE enable:
>>> + * - Set the TRBBASER to the page aligned offset of the current
>>> + * proposed write offset. (which is guaranteed to be aligned
>>> + * as above)
>>> + * - Move the TRBPTR to skip first 256bytes (that might be
>>> + * overwritten with the erratum). This ensures that the trace
>>> + * generated in the session is not re-written.
>>> + *
>>> + * - At trace collection:
>>> + * - Pad the 256bytes skipped above again with IGNORE packets.
>>> + */
>>> + if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE)) {
>>> + if (WARN_ON(!IS_ALIGNED(buf->trbe_write, PAGE_SIZE)))
>>> + return -EINVAL;
>>> + buf->trbe_hw_base = buf->trbe_write;
>>> + buf->trbe_write += TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
>>> + }
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static int __arm_trbe_enable(struct trbe_buf *buf,
>>> struct perf_output_handle *handle)
>>> {
>>> + int ret = 0;
>>> +
>>> perf_aux_output_flag(handle, PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
>>> buf->trbe_limit = compute_trbe_buffer_limit(handle);
>>> buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
>>> if (buf->trbe_limit == buf->trbe_base) {
>>> - trbe_stop_and_truncate_event(handle);
>>> - return -ENOSPC;
>>> + ret = -ENOSPC;
>>> + goto err;
>>> }
>>> /* Set the base of the TRBE to the buffer base */
>>> buf->trbe_hw_base = buf->trbe_base;
>>> +
>>> + ret = trbe_apply_work_around_before_enable(buf);
>>> + if (ret)
>>> + goto err;
>>> +
>>> *this_cpu_ptr(buf->cpudata->drvdata->handle) = handle;
>>> trbe_enable_hw(buf);
>>> return 0;
>>> +err:
>>> + trbe_stop_and_truncate_event(handle);
>>> + return ret;
>>> }
>>> static int arm_trbe_enable(struct coresight_device *csdev, u32 mode, void *data)
>>> @@ -860,7 +965,7 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
>>> if (!is_perf_trbe(handle))
>>> return IRQ_NONE;
>>> - act = trbe_get_fault_act(status);
>>> + act = trbe_get_fault_act(handle, status);
>>> switch (act) {
>>> case TRBE_FAULT_ACT_WRAP:
>>> truncated = !!trbe_handle_overflow(handle);
>>> @@ -1000,7 +1105,22 @@ static void arm_trbe_probe_cpu(void *info)
>>> }
>>> trbe_check_errata(cpudata);
>>> - cpudata->trbe_align = cpudata->trbe_hw_align;
>>> + /*
>>> + * If the TRBE is affected by erratum TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
>>> + * we must always program the TBRPTR_EL1, 256bytes from a page
>>> + * boundary, with TRBBASER_EL1 set to the page, to prevent
>>> + * TRBE over-writing 256bytes at TRBBASER_EL1 on FILL event.
>>> + *
>>> + * Thus make sure we always align our write pointer to a PAGE_SIZE,
>>> + * which also guarantees that we have at least a PAGE_SIZE space in
>>> + * the buffer (TRBLIMITR is PAGE aligned) and thus we can skip
>>> + * the required bytes at the base.
>>> + */
>>> + if (trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE))
>>> + cpudata->trbe_align = PAGE_SIZE;
>>> + else
>>> + cpudata->trbe_align = cpudata->trbe_hw_align;
>>> +
>>
>> But like trbe_apply_work_around_before_enable(), trbe_align assignment
>> should also be wrapped inside a new helper which should contain these
>> comments and conditional block. Because it makes sense to have errata
>> work arounds in the leaf level helper functions, rather than TRBE core
>> operations.
>
> That would imply we re-initialize the trbe_align in the new helper after
> setting the value here for all other unaffected TRBEs. I would rather
> leave it as it is, until we have more work arounds that touch this area.
> This is one of code called per TRBE instance.
Okay.
On Tue, Sep 21, 2021 at 02:41:13PM +0100, Suzuki K Poulose wrote:
> ARM Neoverse-N2 (#2139208) and Cortex-A710(##2119858) suffers from
> an erratum, which when triggered, might cause the TRBE to overwrite
> the trace data already collected in FILL mode, in the event of a WRAP.
> i.e, the TRBE doesn't stop writing the data, instead wraps to the base
> and could write upto 3 cache line size worth trace. Thus, this could
> corrupt the trace at the "BASE" pointer.
>
> The workaround is to program the write pointer 256bytes from the
> base, such that if the erratum is triggered, it doesn't overwrite
> the trace data that was captured. This skipped region could be
> padded with ignore packets at the end of the session, so that
> the decoder sees a continuous buffer with some padding at the
> beginning. The trace data written at the base is considered
> lost as the limit could have been in the middle of the perf
> ring buffer, and jumping to the "base" is not acceptable.
> We set the flags already to indicate that some amount of trace
> was lost during the FILL event IRQ. So this is fine.
>
> One important change with the work around is, we program the
> TRBBASER_EL1 to current page where we are allowed to write.
> Otherwise, it could overwrite a region that may be consumed
> by the perf. Towards this, we always make sure that the
> "handle->head" and thus the trbe_write is PAGE_SIZE aligned,
> so that we can set the BASE to the PAGE base and move the
> TRBPTR to the 256bytes offset.
>
> Cc: Mike Leach <[email protected]>
> Cc: Mathieu Poirier <[email protected]>
> Cc: Anshuman Khandual <[email protected]>
> Cc: Leo Yan <[email protected]>
> Signed-off-by: Suzuki K Poulose <[email protected]>
> ---
> Change since v1:
> - Updated comment with ASCII art
> - Add _BYTES suffix for the space to skip for the work around.
> ---
> drivers/hwtracing/coresight/coresight-trbe.c | 144 +++++++++++++++++--
> 1 file changed, 132 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
> index f569010c672b..983dd5039e52 100644
> --- a/drivers/hwtracing/coresight/coresight-trbe.c
> +++ b/drivers/hwtracing/coresight/coresight-trbe.c
> @@ -16,6 +16,7 @@
> #define pr_fmt(fmt) DRVNAME ": " fmt
>
> #include <asm/barrier.h>
> +#include <asm/cpufeature.h>
> #include <asm/cputype.h>
>
> #include "coresight-self-hosted-trace.h"
> @@ -84,9 +85,17 @@ struct trbe_buf {
> * per TRBE instance, we keep track of the list of errata that
> * affects the given instance of the TRBE.
> */
> -#define TRBE_ERRATA_MAX 0
> +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE 0
> +#define TRBE_ERRATA_MAX 1
> +
> +/*
> + * Safe limit for the number of bytes that may be overwritten
> + * when the erratum is triggered.
> + */
> +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES 256
>
> static unsigned long trbe_errata_cpucaps[TRBE_ERRATA_MAX] = {
> + [TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
> };
>
> /*
> @@ -519,10 +528,13 @@ static void trbe_enable_hw(struct trbe_buf *buf)
> set_trbe_limit_pointer_enabled(buf->trbe_limit);
> }
>
> -static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
> +static enum trbe_fault_action trbe_get_fault_act(struct perf_output_handle *handle,
> + u64 trbsr)
> {
> int ec = get_trbe_ec(trbsr);
> int bsc = get_trbe_bsc(trbsr);
> + struct trbe_buf *buf = etm_perf_sink_config(handle);
> + struct trbe_cpudata *cpudata = buf->cpudata;
>
> WARN_ON(is_trbe_running(trbsr));
> if (is_trbe_trg(trbsr) || is_trbe_abort(trbsr))
> @@ -531,10 +543,16 @@ static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
> if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT))
> return TRBE_FAULT_ACT_FATAL;
>
> - if (is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) {
> - if (get_trbe_write_pointer() == get_trbe_base_pointer())
> - return TRBE_FAULT_ACT_WRAP;
> - }
> + /*
> + * If the trbe is affected by TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
> + * it might write data after a WRAP event in the fill mode.
> + * Thus the check TRBPTR == TRBBASER will not be honored.
> + */
> + if ((is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) &&
> + (trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) ||
> + get_trbe_write_pointer() == get_trbe_base_pointer()))
> + return TRBE_FAULT_ACT_WRAP;
> +
I'm very perplexed by the trbe_has_erratum() infrastructure... Since this is a
TRBE the code will always run on the CPU it is associated with, and if
I'm correct here we could call this_cpu_has_cap() directly with the same
outcome. I doubt that all divers using the cpucaps subsystem carry a shadow
structure to keep the same information.
I have to stop here for today. Although small in size this patchset demands a
fair amount of involvement - I will continue next week but I may not go through
the whole thing for this revision.
Thanks,
Mathieu
> return TRBE_FAULT_ACT_SPURIOUS;
> }
>
> @@ -544,6 +562,8 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
> {
> u64 write;
> u64 start_off, end_off;
> + u64 size;
> + u64 overwrite_skip = TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
>
> /*
> * If the TRBE has wrapped around the write pointer has
> @@ -559,7 +579,18 @@ static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
>
> if (WARN_ON_ONCE(end_off < start_off))
> return 0;
> - return (end_off - start_off);
> +
> + size = end_off - start_off;
> + /*
> + * If the TRBE is affected by the following erratum, we must fill
> + * the space we skipped with IGNORE packets. And we are always
> + * guaranteed to have at least a PAGE_SIZE space in the buffer.
> + */
> + if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) &&
> + !WARN_ON(size < overwrite_skip))
> + __trbe_pad_buf(buf, start_off, overwrite_skip);
> +
> + return size;
> }
>
> static void *arm_trbe_alloc_buffer(struct coresight_device *csdev,
> @@ -678,7 +709,7 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
> clr_trbe_irq();
> isb();
>
> - act = trbe_get_fault_act(status);
> + act = trbe_get_fault_act(handle, status);
> /*
> * If this was not due to a WRAP event, we have some
> * errors and as such buffer is empty.
> @@ -702,21 +733,95 @@ static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
> return size;
> }
>
> +
> +static int trbe_apply_work_around_before_enable(struct trbe_buf *buf)
> +{
> + /*
> + * TRBE_WORKAROUND_OVERWRITE_FILL_MODE causes the TRBE to overwrite a few cache
> + * line size from the "TRBBASER_EL1" in the event of a "FILL".
> + * Thus, we could loose some amount of the trace at the base.
> + *
> + * Before Fix:
> + *
> + * normal-BASE head normal-PTR tail normal-LIMIT
> + * | \/ /
> + * -------------------------------------------------------------
> + * | | |xyzdefghij..|... tuvw| |
> + * -------------------------------------------------------------
> + * / | \
> + * After Fix-> TRBBASER TRBPTR TRBLIMITR.LIMIT
> + *
> + * In the normal course of action, we would set the TRBBASER to the
> + * beginning of the ring-buffer (normal-BASE). But with the erratum,
> + * the TRBE could overwrite the contents at the "normal-BASE", after
> + * hitting the "normal-LIMIT", since it doesn't stop as expected. And
> + * this is wrong. So we must always make sure that the TRBBASER is
> + * within the region [head, head+size].
> + *
> + * Also, we would set the TRBPTR to head (after adjusting for
> + * alignment) at normal-PTR. This would mean that the last few bytes
> + * of the trace (say, "xyz") might overwrite the first few bytes of
> + * trace written ("abc"). More importantly they will appear in what\
> + * userspace sees as the beginning of the trace, which is wrong. We may
> + * not always have space to move the latest trace "xyz" to the correct
> + * order as it must appear beyond the LIMIT. (i.e, [head..head+size].
> + * Thus it is easier to ignore those bytes than to complicate the
> + * driver to move it, assuming that the erratum was triggered and doing
> + * additional checks to see if there is indeed allowed space at
> + * TRBLIMITR.LIMIT.
> + *
> + * To summarize, with the work around:
> + *
> + * - We always align the offset for the next session to PAGE_SIZE
> + * (This is to ensure we can program the TRBBASER to this offset
> + * within the region [head...head+size]).
> + *
> + * - At TRBE enable:
> + * - Set the TRBBASER to the page aligned offset of the current
> + * proposed write offset. (which is guaranteed to be aligned
> + * as above)
> + * - Move the TRBPTR to skip first 256bytes (that might be
> + * overwritten with the erratum). This ensures that the trace
> + * generated in the session is not re-written.
> + *
> + * - At trace collection:
> + * - Pad the 256bytes skipped above again with IGNORE packets.
> + */
> + if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE)) {
> + if (WARN_ON(!IS_ALIGNED(buf->trbe_write, PAGE_SIZE)))
> + return -EINVAL;
> + buf->trbe_hw_base = buf->trbe_write;
> + buf->trbe_write += TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
> + }
> +
> + return 0;
> +}
> +
> static int __arm_trbe_enable(struct trbe_buf *buf,
> struct perf_output_handle *handle)
> {
> + int ret = 0;
> +
> perf_aux_output_flag(handle, PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
> buf->trbe_limit = compute_trbe_buffer_limit(handle);
> buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
> if (buf->trbe_limit == buf->trbe_base) {
> - trbe_stop_and_truncate_event(handle);
> - return -ENOSPC;
> + ret = -ENOSPC;
> + goto err;
> }
> /* Set the base of the TRBE to the buffer base */
> buf->trbe_hw_base = buf->trbe_base;
> +
> + ret = trbe_apply_work_around_before_enable(buf);
> + if (ret)
> + goto err;
> +
> *this_cpu_ptr(buf->cpudata->drvdata->handle) = handle;
> trbe_enable_hw(buf);
> return 0;
> +err:
> + trbe_stop_and_truncate_event(handle);
> + return ret;
> }
>
> static int arm_trbe_enable(struct coresight_device *csdev, u32 mode, void *data)
> @@ -860,7 +965,7 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
> if (!is_perf_trbe(handle))
> return IRQ_NONE;
>
> - act = trbe_get_fault_act(status);
> + act = trbe_get_fault_act(handle, status);
> switch (act) {
> case TRBE_FAULT_ACT_WRAP:
> truncated = !!trbe_handle_overflow(handle);
> @@ -1000,7 +1105,22 @@ static void arm_trbe_probe_cpu(void *info)
> }
>
> trbe_check_errata(cpudata);
> - cpudata->trbe_align = cpudata->trbe_hw_align;
> + /*
> + * If the TRBE is affected by erratum TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
> + * we must always program the TBRPTR_EL1, 256bytes from a page
> + * boundary, with TRBBASER_EL1 set to the page, to prevent
> + * TRBE over-writing 256bytes at TRBBASER_EL1 on FILL event.
> + *
> + * Thus make sure we always align our write pointer to a PAGE_SIZE,
> + * which also guarantees that we have at least a PAGE_SIZE space in
> + * the buffer (TRBLIMITR is PAGE aligned) and thus we can skip
> + * the required bytes at the base.
> + */
> + if (trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE))
> + cpudata->trbe_align = PAGE_SIZE;
> + else
> + cpudata->trbe_align = cpudata->trbe_hw_align;
> +
> cpudata->trbe_flag = get_trbe_flag_update(trbidr);
> cpudata->cpu = cpu;
> cpudata->drvdata = drvdata;
> --
> 2.24.1
>
Hi Mathieu
On 01/10/2021 18:15, Mathieu Poirier wrote:
> On Tue, Sep 21, 2021 at 02:41:13PM +0100, Suzuki K Poulose wrote:
>> ARM Neoverse-N2 (#2139208) and Cortex-A710(##2119858) suffers from
>> an erratum, which when triggered, might cause the TRBE to overwrite
>> the trace data already collected in FILL mode, in the event of a WRAP.
>> i.e, the TRBE doesn't stop writing the data, instead wraps to the base
>> and could write upto 3 cache line size worth trace. Thus, this could
>> corrupt the trace at the "BASE" pointer.
>>
>> The workaround is to program the write pointer 256bytes from the
>> base, such that if the erratum is triggered, it doesn't overwrite
>> the trace data that was captured. This skipped region could be
>> padded with ignore packets at the end of the session, so that
>> the decoder sees a continuous buffer with some padding at the
>> beginning. The trace data written at the base is considered
>> lost as the limit could have been in the middle of the perf
>> ring buffer, and jumping to the "base" is not acceptable.
>> We set the flags already to indicate that some amount of trace
>> was lost during the FILL event IRQ. So this is fine.
>>
>> One important change with the work around is, we program the
>> TRBBASER_EL1 to current page where we are allowed to write.
>> Otherwise, it could overwrite a region that may be consumed
>> by the perf. Towards this, we always make sure that the
>> "handle->head" and thus the trbe_write is PAGE_SIZE aligned,
>> so that we can set the BASE to the PAGE base and move the
>> TRBPTR to the 256bytes offset.
>>
>> Cc: Mike Leach <[email protected]>
>> Cc: Mathieu Poirier <[email protected]>
>> Cc: Anshuman Khandual <[email protected]>
>> Cc: Leo Yan <[email protected]>
>> Signed-off-by: Suzuki K Poulose <[email protected]>
>> ---
>> Change since v1:
>> - Updated comment with ASCII art
>> - Add _BYTES suffix for the space to skip for the work around.
>> ---
>> drivers/hwtracing/coresight/coresight-trbe.c | 144 +++++++++++++++++--
>> 1 file changed, 132 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
>> index f569010c672b..983dd5039e52 100644
>> --- a/drivers/hwtracing/coresight/coresight-trbe.c
>> +++ b/drivers/hwtracing/coresight/coresight-trbe.c
>> @@ -16,6 +16,7 @@
>> #define pr_fmt(fmt) DRVNAME ": " fmt
>>
>> #include <asm/barrier.h>
>> +#include <asm/cpufeature.h>
>> #include <asm/cputype.h>
>>
>> #include "coresight-self-hosted-trace.h"
>> @@ -84,9 +85,17 @@ struct trbe_buf {
>> * per TRBE instance, we keep track of the list of errata that
>> * affects the given instance of the TRBE.
>> */
>> -#define TRBE_ERRATA_MAX 0
>> +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE 0
>> +#define TRBE_ERRATA_MAX 1
>> +
>> +/*
>> + * Safe limit for the number of bytes that may be overwritten
>> + * when the erratum is triggered.
>> + */
>> +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES 256
>>
>> static unsigned long trbe_errata_cpucaps[TRBE_ERRATA_MAX] = {
>> + [TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
>> };
>>
>> /*
>> @@ -519,10 +528,13 @@ static void trbe_enable_hw(struct trbe_buf *buf)
>> set_trbe_limit_pointer_enabled(buf->trbe_limit);
>> }
>>
>> -static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
>> +static enum trbe_fault_action trbe_get_fault_act(struct perf_output_handle *handle,
>> + u64 trbsr)
>> {
>> int ec = get_trbe_ec(trbsr);
>> int bsc = get_trbe_bsc(trbsr);
>> + struct trbe_buf *buf = etm_perf_sink_config(handle);
>> + struct trbe_cpudata *cpudata = buf->cpudata;
>>
>> WARN_ON(is_trbe_running(trbsr));
>> if (is_trbe_trg(trbsr) || is_trbe_abort(trbsr))
>> @@ -531,10 +543,16 @@ static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
>> if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT))
>> return TRBE_FAULT_ACT_FATAL;
>>
>> - if (is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) {
>> - if (get_trbe_write_pointer() == get_trbe_base_pointer())
>> - return TRBE_FAULT_ACT_WRAP;
>> - }
>> + /*
>> + * If the trbe is affected by TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
>> + * it might write data after a WRAP event in the fill mode.
>> + * Thus the check TRBPTR == TRBBASER will not be honored.
>> + */
>> + if ((is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) &&
>> + (trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) ||
>> + get_trbe_write_pointer() == get_trbe_base_pointer()))
>> + return TRBE_FAULT_ACT_WRAP;
>> +
>
> I'm very perplexed by the trbe_has_erratum() infrastructure... Since this is a
> TRBE the code will always run on the CPU it is associated with, and if
> I'm correct here we could call this_cpu_has_cap() directly with the same
> outcome. I doubt that all divers using the cpucaps subsystem carry a shadow
> structure to keep the same information.
Very valid question. Of course, we can use the this_cpu_has_cap()
helper. Unlike the cpus_have_*_cap() - which gives you the system
wide status of the erratum - the cpucap doesn't keep a cache of which
CPUs are affected by a given erratum. Thus this_cpu_has_cap() would
involve running the detection on the current CPU everytime we call it.
i.e, scanning the MIDR of the CPU through the list of affected MIDRs
for the given erratum. This is a bit of overhead.
Given that we already have CPU specific information in trbe_cpudata, I
chose to cache the affected errata locally. This gives us quick access
to the erratum for individual TRBE instances. Of course this list is
initialised at TRBE probe and thus avoids us having to do the costly
check, each time we need it. I could make this clear in the patch
which introduces the framework.
Thanks for the review
Suzuki
> Thanks,
> Mathieu
Good morning,
On Mon, Oct 04, 2021 at 09:46:07AM +0100, Suzuki K Poulose wrote:
> Hi Mathieu
>
> On 01/10/2021 18:15, Mathieu Poirier wrote:
> > On Tue, Sep 21, 2021 at 02:41:13PM +0100, Suzuki K Poulose wrote:
> > > ARM Neoverse-N2 (#2139208) and Cortex-A710(##2119858) suffers from
> > > an erratum, which when triggered, might cause the TRBE to overwrite
> > > the trace data already collected in FILL mode, in the event of a WRAP.
> > > i.e, the TRBE doesn't stop writing the data, instead wraps to the base
> > > and could write upto 3 cache line size worth trace. Thus, this could
> > > corrupt the trace at the "BASE" pointer.
> > >
> > > The workaround is to program the write pointer 256bytes from the
> > > base, such that if the erratum is triggered, it doesn't overwrite
> > > the trace data that was captured. This skipped region could be
> > > padded with ignore packets at the end of the session, so that
> > > the decoder sees a continuous buffer with some padding at the
> > > beginning. The trace data written at the base is considered
> > > lost as the limit could have been in the middle of the perf
> > > ring buffer, and jumping to the "base" is not acceptable.
> > > We set the flags already to indicate that some amount of trace
> > > was lost during the FILL event IRQ. So this is fine.
> > >
> > > One important change with the work around is, we program the
> > > TRBBASER_EL1 to current page where we are allowed to write.
> > > Otherwise, it could overwrite a region that may be consumed
> > > by the perf. Towards this, we always make sure that the
> > > "handle->head" and thus the trbe_write is PAGE_SIZE aligned,
> > > so that we can set the BASE to the PAGE base and move the
> > > TRBPTR to the 256bytes offset.
> > >
> > > Cc: Mike Leach <[email protected]>
> > > Cc: Mathieu Poirier <[email protected]>
> > > Cc: Anshuman Khandual <[email protected]>
> > > Cc: Leo Yan <[email protected]>
> > > Signed-off-by: Suzuki K Poulose <[email protected]>
> > > ---
> > > Change since v1:
> > > - Updated comment with ASCII art
> > > - Add _BYTES suffix for the space to skip for the work around.
> > > ---
> > > drivers/hwtracing/coresight/coresight-trbe.c | 144 +++++++++++++++++--
> > > 1 file changed, 132 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
> > > index f569010c672b..983dd5039e52 100644
> > > --- a/drivers/hwtracing/coresight/coresight-trbe.c
> > > +++ b/drivers/hwtracing/coresight/coresight-trbe.c
> > > @@ -16,6 +16,7 @@
> > > #define pr_fmt(fmt) DRVNAME ": " fmt
> > > #include <asm/barrier.h>
> > > +#include <asm/cpufeature.h>
> > > #include <asm/cputype.h>
> > > #include "coresight-self-hosted-trace.h"
> > > @@ -84,9 +85,17 @@ struct trbe_buf {
> > > * per TRBE instance, we keep track of the list of errata that
> > > * affects the given instance of the TRBE.
> > > */
> > > -#define TRBE_ERRATA_MAX 0
> > > +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE 0
> > > +#define TRBE_ERRATA_MAX 1
> > > +
> > > +/*
> > > + * Safe limit for the number of bytes that may be overwritten
> > > + * when the erratum is triggered.
> > > + */
> > > +#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES 256
> > > static unsigned long trbe_errata_cpucaps[TRBE_ERRATA_MAX] = {
> > > + [TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
> > > };
> > > /*
> > > @@ -519,10 +528,13 @@ static void trbe_enable_hw(struct trbe_buf *buf)
> > > set_trbe_limit_pointer_enabled(buf->trbe_limit);
> > > }
> > > -static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
> > > +static enum trbe_fault_action trbe_get_fault_act(struct perf_output_handle *handle,
> > > + u64 trbsr)
> > > {
> > > int ec = get_trbe_ec(trbsr);
> > > int bsc = get_trbe_bsc(trbsr);
> > > + struct trbe_buf *buf = etm_perf_sink_config(handle);
> > > + struct trbe_cpudata *cpudata = buf->cpudata;
> > > WARN_ON(is_trbe_running(trbsr));
> > > if (is_trbe_trg(trbsr) || is_trbe_abort(trbsr))
> > > @@ -531,10 +543,16 @@ static enum trbe_fault_action trbe_get_fault_act(u64 trbsr)
> > > if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT))
> > > return TRBE_FAULT_ACT_FATAL;
> > > - if (is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) {
> > > - if (get_trbe_write_pointer() == get_trbe_base_pointer())
> > > - return TRBE_FAULT_ACT_WRAP;
> > > - }
> > > + /*
> > > + * If the trbe is affected by TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
> > > + * it might write data after a WRAP event in the fill mode.
> > > + * Thus the check TRBPTR == TRBBASER will not be honored.
> > > + */
> > > + if ((is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) &&
> > > + (trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) ||
> > > + get_trbe_write_pointer() == get_trbe_base_pointer()))
> > > + return TRBE_FAULT_ACT_WRAP;
> > > +
> >
> > I'm very perplexed by the trbe_has_erratum() infrastructure... Since this is a
> > TRBE the code will always run on the CPU it is associated with, and if
> > I'm correct here we could call this_cpu_has_cap() directly with the same
> > outcome. I doubt that all divers using the cpucaps subsystem carry a shadow
> > structure to keep the same information.
>
> Very valid question. Of course, we can use the this_cpu_has_cap()
> helper. Unlike the cpus_have_*_cap() - which gives you the system
> wide status of the erratum - the cpucap doesn't keep a cache of which
> CPUs are affected by a given erratum. Thus this_cpu_has_cap() would
> involve running the detection on the current CPU everytime we call it.
> i.e, scanning the MIDR of the CPU through the list of affected MIDRs
> for the given erratum. This is a bit of overhead.
I've looked around in the kernel for other places where this_cpu_has_cap() is
used. In most instance it is part of some initialisation code where actions are
taken based on the turn value of the function. In our case we need to call this
regularly so yes, I agree with your design.
>
> Given that we already have CPU specific information in trbe_cpudata, I
> chose to cache the affected errata locally. This gives us quick access
> to the erratum for individual TRBE instances. Of course this list is
> initialised at TRBE probe and thus avoids us having to do the costly
> check, each time we need it. I could make this clear in the patch
> which introduces the framework.
Yes please.
Thanks,
Mathieu
>
>
> Thanks for the review
>
> Suzuki
>
> > Thanks,
> > Mathieu