From: Robert Richter <[email protected]>
This patch series implements workarounds for HW errata in Cavium's
ThunderX GICV3.
The first one is a gicv3 code update I sent a while ago and is a
prerequisit for patch #4. Patch #2 adds generic code to parse the hw
revision provided by an IIDR register value and runs specific code if
hw matches. This is used to implement the actual errata fixes in patch
#3 (gicv3) and #4 (gicv3-its).
Robert Richter (4):
irqchip, gicv3-its: Read typer register outside the loop
irqchip, gicv3: Add HW revision detection and configuration
irqchip, gicv3: Implement Cavium ThunderX erratum 23154
irqchip, gicv3-its: Implement Cavium ThunderX errata 22375, 24313
drivers/irqchip/irq-gic-common.c | 11 +++++++++
drivers/irqchip/irq-gic-common.h | 9 +++++++
drivers/irqchip/irq-gic-v3-its.c | 51 ++++++++++++++++++++++++++++++++++++----
drivers/irqchip/irq-gic-v3.c | 51 +++++++++++++++++++++++++++++++++++++++-
4 files changed, 116 insertions(+), 6 deletions(-)
--
2.1.1
From: Robert Richter <[email protected]>
No need to read the typer register in the loop. Values do not change.
Signed-off-by: Robert Richter <[email protected]>
---
drivers/irqchip/irq-gic-v3-its.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 1b7e155869f6..c5ce21277920 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -803,6 +803,8 @@ static int its_alloc_tables(struct its_node *its)
int psz = SZ_64K;
u64 shr = GITS_BASER_InnerShareable;
u64 cache = GITS_BASER_WaWb;
+ u64 typer = readq_relaxed(its->base + GITS_TYPER);
+ u32 ids = GITS_TYPER_DEVBITS(typer);
for (i = 0; i < GITS_BASER_NR_REGS; i++) {
u64 val = readq_relaxed(its->base + GITS_BASER + i * 8);
@@ -825,9 +827,6 @@ static int its_alloc_tables(struct its_node *its)
* For other tables, only allocate a single page.
*/
if (type == GITS_BASER_TYPE_DEVICE) {
- u64 typer = readq_relaxed(its->base + GITS_TYPER);
- u32 ids = GITS_TYPER_DEVBITS(typer);
-
/*
* 'order' was initialized earlier to the default page
* granule of the the ITS. We can't have an allocation
--
2.1.1
From: Robert Richter <[email protected]>
Some GIC revisions require an individual configuration to esp. add
workarounds for HW bugs. This patch implements generic code to parse
the hw revision provided by an IIDR register value and runs specific
code if hw matches. There are functions that read the IIDR registers
for GICV3 and ITS (GICD_IIDR/GITS_IIDR) and then go through a list of
init functions to be called for specific versions.
The patch is needed to implement workarounds for HW errata in Cavium's
ThunderX GICV3.
Signed-off-by: Robert Richter <[email protected]>
---
drivers/irqchip/irq-gic-common.c | 11 +++++++++++
drivers/irqchip/irq-gic-common.h | 9 +++++++++
drivers/irqchip/irq-gic-v3-its.c | 15 +++++++++++++++
drivers/irqchip/irq-gic-v3.c | 14 ++++++++++++++
4 files changed, 49 insertions(+)
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index ad96ebb0c7ab..27ad9bef760f 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -21,6 +21,17 @@
#include "irq-gic-common.h"
+void gic_check_capabilities(u32 iidr, const struct gic_capabilities *cap,
+ void *data)
+{
+ for (; cap->desc; cap++) {
+ if ((iidr & cap->mask) != cap->id)
+ continue;
+ cap->init(data);
+ pr_info("%s\n", cap->desc);
+ }
+}
+
int gic_configure_irq(unsigned int irq, unsigned int type,
void __iomem *base, void (*sync_access)(void))
{
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index 35a9884778bd..90d55b97c0df 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -20,10 +20,19 @@
#include <linux/of.h>
#include <linux/irqdomain.h>
+struct gic_capabilities {
+ const char *desc;
+ void (*init)(void *data);
+ u32 id;
+ u32 mask;
+};
+
int gic_configure_irq(unsigned int irq, unsigned int type,
void __iomem *base, void (*sync_access)(void));
void gic_dist_config(void __iomem *base, int gic_irqs,
void (*sync_access)(void));
void gic_cpu_config(void __iomem *base, void (*sync_access)(void));
+void gic_check_capabilities(u32 iidr, const struct gic_capabilities *cap,
+ void *data);
#endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index c5ce21277920..de2fc82bba83 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -36,6 +36,7 @@
#include <asm/cputype.h>
#include <asm/exception.h>
+#include "irq-gic-common.h"
#include "irqchip.h"
#define ITS_FLAGS_CMDQ_NEEDS_FLUSHING (1 << 0)
@@ -1381,6 +1382,18 @@ static int its_force_quiescent(void __iomem *base)
}
}
+static const struct gic_capabilities its_errata[] = {
+ {
+ }
+};
+
+static void its_check_capabilities(struct its_node *its)
+{
+ u32 iidr = readl_relaxed(its->base + GITS_IIDR);
+
+ gic_check_capabilities(iidr, its_errata, its);
+}
+
static int its_probe(struct device_node *node, struct irq_domain *parent)
{
struct resource res;
@@ -1439,6 +1452,8 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
}
its->cmd_write = its->cmd_base;
+ its_check_capabilities(its);
+
err = its_alloc_tables(its);
if (err)
goto out_free_cmd;
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 49875adb6b44..f4bafc69cc18 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -765,6 +765,18 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
.free = gic_irq_domain_free,
};
+static const struct gic_capabilities gicv3_errata[] = {
+ {
+ }
+};
+
+static void gicv3_check_capabilities(void)
+{
+ u32 iidr = readl_relaxed(gic_data.dist_base + GICD_IIDR);
+
+ gic_check_capabilities(iidr, gicv3_errata, NULL);
+}
+
static int __init gic_of_init(struct device_node *node, struct device_node *parent)
{
void __iomem *dist_base;
@@ -824,6 +836,8 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
gic_data.nr_redist_regions = nr_redist_regions;
gic_data.redist_stride = redist_stride;
+ gicv3_check_capabilities();
+
/*
* Find out how many interrupts are supported.
* The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
--
2.1.1
From: Robert Richter <[email protected]>
This patch implements Cavium ThunderX erratum 23154.
The gicv3 of ThunderX requires a modified version for reading the IAR
status to ensure data synchronization. Since this is in the fast-path
and called with each interrupt, runtime patching is used using jump
label patching for smallest overhead (no-op). This is the same
technique as used for tracepoints.
Signed-off-by: Robert Richter <[email protected]>
---
drivers/irqchip/irq-gic-v3.c | 37 ++++++++++++++++++++++++++++++++++++-
1 file changed, 36 insertions(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index f4bafc69cc18..57bb69686ec6 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -107,14 +107,38 @@ static void gic_redist_wait_for_rwp(void)
}
/* Low level accessors */
-static u64 __maybe_unused gic_read_iar(void)
+static u64 gic_read_iar_common(void)
+{
+ u64 irqstat;
+
+ asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
+ return irqstat;
+}
+
+/* Cavium ThunderX erratum 23154 */
+static u64 gic_read_iar_cavium_thunderx(void)
{
u64 irqstat;
+ asm volatile("nop;nop;nop;nop;");
+ asm volatile("nop;nop;nop;nop;");
asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
+ asm volatile("nop;nop;nop;nop;");
+ mb();
+
return irqstat;
}
+struct static_key is_cavium_thunderx = STATIC_KEY_INIT_FALSE;
+
+static u64 __maybe_unused gic_read_iar(void)
+{
+ if (static_key_false(&is_cavium_thunderx))
+ return gic_read_iar_common();
+ else
+ return gic_read_iar_cavium_thunderx();
+}
+
static void __maybe_unused gic_write_pmr(u64 val)
{
asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
@@ -765,8 +789,19 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
.free = gic_irq_domain_free,
};
+static void gicv3_enable_cavium_thunderx(void *data)
+{
+ static_key_slow_inc(&is_cavium_thunderx);
+}
+
static const struct gic_capabilities gicv3_errata[] = {
{
+ .desc = "GIC: Cavium erratum 23154",
+ .id = 0xa100034c, /* ThunderX pass 1.x */
+ .mask = 0xffff0fff,
+ .init = gicv3_enable_cavium_thunderx,
+ },
+ {
}
};
--
2.1.1
From: Robert Richter <[email protected]>
This implements two gicv3-its errata for ThunderX. Both with small
impact affecting only ITS table allocation.
erratum 22375: only alloc 8MB table size
erratum 24313: ignore memory access type
The fixes are in ITS initialization and basically ignore memory access
type and table size provided by the TYPER and BASER registers.
Signed-off-by: Robert Richter <[email protected]>
---
drivers/irqchip/irq-gic-v3-its.c | 35 +++++++++++++++++++++++++++++++----
1 file changed, 31 insertions(+), 4 deletions(-)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index de2fc82bba83..049ef761fbc3 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -39,7 +39,8 @@
#include "irq-gic-common.h"
#include "irqchip.h"
-#define ITS_FLAGS_CMDQ_NEEDS_FLUSHING (1 << 0)
+#define ITS_FLAGS_CMDQ_NEEDS_FLUSHING (1ULL << 0)
+#define ITS_FLAGS_CAVIUM_THUNDERX (1ULL << 1)
#define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0)
@@ -803,9 +804,22 @@ static int its_alloc_tables(struct its_node *its)
int i;
int psz = SZ_64K;
u64 shr = GITS_BASER_InnerShareable;
- u64 cache = GITS_BASER_WaWb;
- u64 typer = readq_relaxed(its->base + GITS_TYPER);
- u32 ids = GITS_TYPER_DEVBITS(typer);
+ u64 cache;
+ u64 typer;
+ u32 ids;
+
+ if (its->flags & ITS_FLAGS_CAVIUM_THUNDERX) {
+ /*
+ * erratum 22375: only alloc 8MB table size
+ * erratum 24313: ignore memory access type
+ */
+ cache = 0;
+ ids = 0x13; /* 20 bits, 8MB */
+ } else {
+ cache = GITS_BASER_WaWb;
+ typer = readq_relaxed(its->base + GITS_TYPER);
+ ids = GITS_TYPER_DEVBITS(typer);
+ }
for (i = 0; i < GITS_BASER_NR_REGS; i++) {
u64 val = readq_relaxed(its->base + GITS_BASER + i * 8);
@@ -1382,8 +1396,21 @@ static int its_force_quiescent(void __iomem *base)
}
}
+static void its_enable_cavium_thunderx(void *data)
+{
+ struct its_node *its = data;
+
+ its->flags |= ITS_FLAGS_CAVIUM_THUNDERX;
+}
+
static const struct gic_capabilities its_errata[] = {
{
+ .desc = "ITS: Cavium errata 22375, 24313",
+ .id = 0xa100034c, /* ThunderX pass 1.x */
+ .mask = 0xffff0fff,
+ .init = its_enable_cavium_thunderx,
+ },
+ {
}
};
--
2.1.1
Hi Robert,
On 30/06/15 15:14, Robert Richter wrote:
> From: Robert Richter <[email protected]>
>
> This patch implements Cavium ThunderX erratum 23154.
>
> The gicv3 of ThunderX requires a modified version for reading the IAR
> status to ensure data synchronization. Since this is in the fast-path
> and called with each interrupt, runtime patching is used using jump
> label patching for smallest overhead (no-op). This is the same
> technique as used for tracepoints.
>
> Signed-off-by: Robert Richter <[email protected]>
> ---
> drivers/irqchip/irq-gic-v3.c | 37 ++++++++++++++++++++++++++++++++++++-
> 1 file changed, 36 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
> index f4bafc69cc18..57bb69686ec6 100644
> --- a/drivers/irqchip/irq-gic-v3.c
> +++ b/drivers/irqchip/irq-gic-v3.c
> @@ -107,14 +107,38 @@ static void gic_redist_wait_for_rwp(void)
> }
>
> /* Low level accessors */
> -static u64 __maybe_unused gic_read_iar(void)
> +static u64 gic_read_iar_common(void)
> +{
> + u64 irqstat;
> +
> + asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
> + return irqstat;
> +}
> +
> +/* Cavium ThunderX erratum 23154 */
> +static u64 gic_read_iar_cavium_thunderx(void)
> {
> u64 irqstat;
>
> + asm volatile("nop;nop;nop;nop;");
> + asm volatile("nop;nop;nop;nop;");
> asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
> + asm volatile("nop;nop;nop;nop;");
> + mb();
> +
> return irqstat;
> }
>
> +struct static_key is_cavium_thunderx = STATIC_KEY_INIT_FALSE;
> +
> +static u64 __maybe_unused gic_read_iar(void)
> +{
> + if (static_key_false(&is_cavium_thunderx))
> + return gic_read_iar_common();
> + else
> + return gic_read_iar_cavium_thunderx();
> +}
> +
> static void __maybe_unused gic_write_pmr(u64 val)
> {
> asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
> @@ -765,8 +789,19 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
> .free = gic_irq_domain_free,
> };
>
> +static void gicv3_enable_cavium_thunderx(void *data)
> +{
> + static_key_slow_inc(&is_cavium_thunderx);
> +}
> +
> static const struct gic_capabilities gicv3_errata[] = {
> {
> + .desc = "GIC: Cavium erratum 23154",
> + .id = 0xa100034c, /* ThunderX pass 1.x */
> + .mask = 0xffff0fff,
> + .init = gicv3_enable_cavium_thunderx,
> + },
> + {
> }
> };
>
>
How does this work when running a guest? Does the virtualized access
suffer from the same erratum? If that's the case, we need a better
workaround...
Thanks,
M.
--
Jazz is not dead. It just smells funny...
On Tue, Jun 30, 2015 at 04:14:02PM +0200, Robert Richter wrote:
> +static u64 gic_read_iar_cavium_thunderx(void)
> {
> u64 irqstat;
>
> + asm volatile("nop;nop;nop;nop;");
> + asm volatile("nop;nop;nop;nop;");
> asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
> + asm volatile("nop;nop;nop;nop;");
> + mb();
NAK. Please read the GCC manual for proper use of asm(). Even with
"volatile" there, it doesn't stop GCC from inserting instructions
between these.
If you need an instruction sequence to be consecutive, then it _must_
be one single asm().
There should also be a comment explaining why that code is necessary
here. Please think about the future when you've forgotten why those
nops are there. The kernel is a long term project, and it's important
that people understand why things are coded the way they are so that
the kernel can be properly maintained into the future.
--
FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up
according to speedtest.net.
On 30/06/15 15:14, Robert Richter wrote:
> From: Robert Richter <[email protected]>
>
> No need to read the typer register in the loop. Values do not change.
>
> Signed-off-by: Robert Richter <[email protected]>
> ---
> drivers/irqchip/irq-gic-v3-its.c | 5 ++---
> 1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> index 1b7e155869f6..c5ce21277920 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -803,6 +803,8 @@ static int its_alloc_tables(struct its_node *its)
> int psz = SZ_64K;
> u64 shr = GITS_BASER_InnerShareable;
> u64 cache = GITS_BASER_WaWb;
> + u64 typer = readq_relaxed(its->base + GITS_TYPER);
> + u32 ids = GITS_TYPER_DEVBITS(typer);
>
> for (i = 0; i < GITS_BASER_NR_REGS; i++) {
> u64 val = readq_relaxed(its->base + GITS_BASER + i * 8);
> @@ -825,9 +827,6 @@ static int its_alloc_tables(struct its_node *its)
> * For other tables, only allocate a single page.
> */
> if (type == GITS_BASER_TYPE_DEVICE) {
> - u64 typer = readq_relaxed(its->base + GITS_TYPER);
> - u32 ids = GITS_TYPER_DEVBITS(typer);
> -
> /*
> * 'order' was initialized earlier to the default page
> * granule of the the ITS. We can't have an allocation
>
The TYPER value certainly don't change, but there is also only one
device table per ITS instance, so we only read it once. What do we gain
here?
Thanks,
M.
--
Jazz is not dead. It just smells funny...
Russell,
thanks for your comments.
On 06.07.15 11:48:12, Russell King - ARM Linux wrote:
> On Tue, Jun 30, 2015 at 04:14:02PM +0200, Robert Richter wrote:
> > +static u64 gic_read_iar_cavium_thunderx(void)
> > {
> > u64 irqstat;
> >
> > + asm volatile("nop;nop;nop;nop;");
> > + asm volatile("nop;nop;nop;nop;");
> > asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
> > + asm volatile("nop;nop;nop;nop;");
> > + mb();
>
> NAK. Please read the GCC manual for proper use of asm(). Even with
> "volatile" there, it doesn't stop GCC from inserting instructions
> between these.
>
> If you need an instruction sequence to be consecutive, then it _must_
> be one single asm().
I will update this section.
> There should also be a comment explaining why that code is necessary
> here. Please think about the future when you've forgotten why those
> nops are there. The kernel is a long term project, and it's important
> that people understand why things are coded the way they are so that
> the kernel can be properly maintained into the future.
I will add a comment to the code with a brief description. Would the
current text from the patch description including the erratum number
be sufficient for that?
Thanks,
-Robert
Marc,
On 06.07.15 11:43:02, Marc Zyngier wrote:
> On 30/06/15 15:14, Robert Richter wrote:
> > static const struct gic_capabilities gicv3_errata[] = {
> > {
> > + .desc = "GIC: Cavium erratum 23154",
> > + .id = 0xa100034c, /* ThunderX pass 1.x */
> > + .mask = 0xffff0fff,
> > + .init = gicv3_enable_cavium_thunderx,
> > + },
> > + {
> > }
> > };
> >
> >
>
> How does this work when running a guest? Does the virtualized access
> suffer from the same erratum? If that's the case, we need a better
> workaround...
We need to apply the workaround also for guests. So you are right,
evaluating GICD_IIDR does not enable the workaround then as the
register is emulated with ARM as implementer.
We considering MIDR_EL1 as a version check for this errata now. This
should be the host's cpuid when running as a guest, right?
Thanks,
-Robert
On 08/07/15 11:28, Robert Richter wrote:
> Marc,
>
> On 06.07.15 11:43:02, Marc Zyngier wrote:
>> On 30/06/15 15:14, Robert Richter wrote:
>>> static const struct gic_capabilities gicv3_errata[] = {
>>> {
>>> + .desc = "GIC: Cavium erratum 23154",
>>> + .id = 0xa100034c, /* ThunderX pass 1.x */
>>> + .mask = 0xffff0fff,
>>> + .init = gicv3_enable_cavium_thunderx,
>>> + },
>>> + {
>>> }
>>> };
>>>
>>>
>>
>> How does this work when running a guest? Does the virtualized access
>> suffer from the same erratum? If that's the case, we need a better
>> workaround...
>
> We need to apply the workaround also for guests. So you are right,
> evaluating GICD_IIDR does not enable the workaround then as the
> register is emulated with ARM as implementer.
>
> We considering MIDR_EL1 as a version check for this errata now. This
> should be the host's cpuid when running as a guest, right?
Yes, that should work, as we don't repaint MIDR_EL1 *yet*. But it also
means that we're going to have a hard time emulating another CPU (such
as A57) on top of ThunderX. Probably not a big deal at the moment...
M.
--
Jazz is not dead. It just smells funny...