On big-endian systems readl/writel may perform an unwanted endian swap,
breaking generic-chip.c. Let the platform code opt to use the __raw_
variants by selecting RAW_IRQ_ACCESSORS.
This is required in order for bcm3384 to use GENERIC_IRQ_CHIP. Several
existing irqchip drivers also use the __raw_ accessors directly, so it
is a reasonably common requirement.
Signed-off-by: Kevin Cernekee <[email protected]>
---
drivers/irqchip/Kconfig | 3 +++
include/linux/irq.h | 13 +++++++++++++
2 files changed, 16 insertions(+)
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index b21f12f..6f0e80b 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -2,6 +2,9 @@ config IRQCHIP
def_bool y
depends on OF_IRQ
+config RAW_IRQ_ACCESSORS
+ bool
+
config ARM_GIC
bool
select IRQ_DOMAIN
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 03f48d9..ed1ea8a 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -639,6 +639,17 @@ void arch_teardown_hwirq(unsigned int irq);
void irq_init_desc(unsigned int irq);
#endif
+#ifdef CONFIG_RAW_IRQ_ACCESSORS
+
+#ifndef irq_reg_writel
+# define irq_reg_writel(val, addr) __raw_writel(val, addr)
+#endif
+#ifndef irq_reg_readl
+# define irq_reg_readl(addr) __raw_readl(addr)
+#endif
+
+#else
+
#ifndef irq_reg_writel
# define irq_reg_writel(val, addr) writel(val, addr)
#endif
@@ -646,6 +657,8 @@ void irq_init_desc(unsigned int irq);
# define irq_reg_readl(addr) readl(addr)
#endif
+#endif
+
/**
* struct irq_chip_regs - register offsets for struct irq_gci
* @enable: Enable register offset to reg_base
--
2.1.1
This change was just made on bcm7120-l2, so let's keep things consistent
between the two drivers.
Signed-off-by: Kevin Cernekee <[email protected]>
---
drivers/irqchip/irq-brcmstb-l2.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index c9bdf20..8b82b86 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -58,8 +58,8 @@ static void brcmstb_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
chained_irq_enter(chip, desc);
- status = __raw_readl(b->base + CPU_STATUS) &
- ~(__raw_readl(b->base + CPU_MASK_STATUS));
+ status = irq_reg_readl(b->base + CPU_STATUS) &
+ ~(irq_reg_readl(b->base + CPU_MASK_STATUS));
if (status == 0) {
raw_spin_lock(&desc->lock);
@@ -71,7 +71,7 @@ static void brcmstb_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
do {
irq = ffs(status) - 1;
/* ack at our level */
- __raw_writel(1 << irq, b->base + CPU_CLEAR);
+ irq_reg_writel(1 << irq, b->base + CPU_CLEAR);
status &= ~(1 << irq);
generic_handle_irq(irq_find_mapping(b->domain, irq));
} while (status);
@@ -86,12 +86,12 @@ static void brcmstb_l2_intc_suspend(struct irq_data *d)
irq_gc_lock(gc);
/* Save the current mask */
- b->saved_mask = __raw_readl(b->base + CPU_MASK_STATUS);
+ b->saved_mask = irq_reg_readl(b->base + CPU_MASK_STATUS);
if (b->can_wake) {
/* Program the wakeup mask */
- __raw_writel(~gc->wake_active, b->base + CPU_MASK_SET);
- __raw_writel(gc->wake_active, b->base + CPU_MASK_CLEAR);
+ irq_reg_writel(~gc->wake_active, b->base + CPU_MASK_SET);
+ irq_reg_writel(gc->wake_active, b->base + CPU_MASK_CLEAR);
}
irq_gc_unlock(gc);
}
@@ -103,11 +103,11 @@ static void brcmstb_l2_intc_resume(struct irq_data *d)
irq_gc_lock(gc);
/* Clear unmasked non-wakeup interrupts */
- __raw_writel(~b->saved_mask & ~gc->wake_active, b->base + CPU_CLEAR);
+ irq_reg_writel(~b->saved_mask & ~gc->wake_active, b->base + CPU_CLEAR);
/* Restore the saved mask */
- __raw_writel(b->saved_mask, b->base + CPU_MASK_SET);
- __raw_writel(~b->saved_mask, b->base + CPU_MASK_CLEAR);
+ irq_reg_writel(b->saved_mask, b->base + CPU_MASK_SET);
+ irq_reg_writel(~b->saved_mask, b->base + CPU_MASK_CLEAR);
irq_gc_unlock(gc);
}
@@ -132,8 +132,8 @@ int __init brcmstb_l2_intc_of_init(struct device_node *np,
}
/* Disable all interrupts by default */
- __raw_writel(0xffffffff, data->base + CPU_MASK_SET);
- __raw_writel(0xffffffff, data->base + CPU_CLEAR);
+ irq_reg_writel(0xffffffff, data->base + CPU_MASK_SET);
+ irq_reg_writel(0xffffffff, data->base + CPU_CLEAR);
data->parent_irq = irq_of_parse_and_map(np, 0);
if (data->parent_irq < 0) {
--
2.1.1
Most implementations of the bcm7120-l2 controller only have a single
32-bit enable word + 32-bit status word. But some instances have added
more enable/status pairs in order to support 64+ IRQs (which are all
ORed into one parent IRQ input). Make the following changes to allow
the driver to support this:
- Extend DT bindings so that multiple words can be specified for the
reg property, various masks, etc.
- Add loops to the probe/handle functions to deal with each word
separately
- Allocate 1 generic-chip for every 32 IRQs, so we can still use the
clr/set helper functions
- Update the documentation
Signed-off-by: Kevin Cernekee <[email protected]>
---
.../interrupt-controller/brcm,bcm7120-l2-intc.txt | 26 ++--
drivers/irqchip/irq-bcm7120-l2.c | 146 ++++++++++++++-------
2 files changed, 114 insertions(+), 58 deletions(-)
diff --git a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7120-l2-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7120-l2-intc.txt
index ff812a8..bae1f21 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7120-l2-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7120-l2-intc.txt
@@ -13,7 +13,12 @@ Such an interrupt controller has the following hardware design:
or if they will output an interrupt signal at this 2nd level interrupt
controller, in particular for UARTs
-- not all 32-bits within the interrupt controller actually map to an interrupt
+- typically has one 32-bit enable word and one 32-bit status word, but on
+ some hardware may have more than one enable/status pair
+
+- no atomic set/clear operations
+
+- not all bits within the interrupt controller actually map to an interrupt
The typical hardware layout for this controller is represented below:
@@ -48,7 +53,9 @@ The typical hardware layout for this controller is represented below:
Required properties:
- compatible: should be "brcm,bcm7120-l2-intc"
-- reg: specifies the base physical address and size of the registers
+- reg: specifies the base physical address and size of the registers;
+ multiple pairs may be specified, with the first pair handling IRQ offsets
+ 0..31 and the second pair handling 32..63
- interrupt-controller: identifies the node as an interrupt controller
- #interrupt-cells: specifies the number of cells needed to encode an interrupt
source, should be 1.
@@ -59,18 +66,21 @@ Required properties:
- brcm,int-map-mask: 32-bits bit mask describing how many and which interrupts
are wired to this 2nd level interrupt controller, and how they match their
respective interrupt parents. Should match exactly the number of interrupts
- specified in the 'interrupts' property.
+ specified in the 'interrupts' property, multiplied by the number of
+ enable/status register pairs implemented by this controller. For
+ multiple parent IRQs with multiple enable/status words, this looks like:
+ <irq0_w0 irq0_w1 irq1_w0 irq1_w1 ...>
Optional properties:
- brcm,irq-can-wake: if present, this means the L2 controller can be used as a
wakeup source for system suspend/resume.
-- brcm,int-fwd-mask: if present, a 32-bits bit mask to configure for the
- interrupts which have a mux gate, typically UARTs. Setting these bits will
- make their respective interrupts outputs bypass this 2nd level interrupt
- controller completely, it completely transparent for the interrupt controller
- parent
+- brcm,int-fwd-mask: if present, a bit mask to configure the interrupts which
+ have a mux gate, typically UARTs. Setting these bits will make their
+ respective interrupt outputs bypass this 2nd level interrupt controller
+ completely; it is completely transparent for the interrupt controller
+ parent. This should have one 32-bit word per enable/status pair.
Example:
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index c76c9ad..734fece 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -23,6 +23,7 @@
#include <linux/io.h>
#include <linux/irqdomain.h>
#include <linux/reboot.h>
+#include <linux/bitops.h>
#include <linux/irqchip/chained_irq.h>
#include "irqchip.h"
@@ -31,28 +32,43 @@
#define IRQEN 0x00
#define IRQSTAT 0x04
+#define MAX_WORDS 4
+#define IRQS_PER_WORD 32
+
struct bcm7120_l2_intc_data {
- void __iomem *base;
+ unsigned int n_words;
+ void __iomem *base[MAX_WORDS];
struct irq_domain *domain;
bool can_wake;
- u32 irq_fwd_mask;
- u32 irq_map_mask;
+ u32 irq_fwd_mask[MAX_WORDS];
+ u32 irq_map_mask[MAX_WORDS];
};
static void bcm7120_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
{
struct bcm7120_l2_intc_data *b = irq_desc_get_handler_data(desc);
struct irq_chip *chip = irq_desc_get_chip(desc);
- u32 status;
+ unsigned int idx;
chained_irq_enter(chip, desc);
- status = irq_reg_readl(b->base + IRQSTAT);
- do {
- irq = ffs(status) - 1;
- status &= ~(1 << irq);
- generic_handle_irq(irq_find_mapping(b->domain, irq));
- } while (status);
+ for (idx = 0; idx < b->n_words; idx++) {
+ int base = idx * IRQS_PER_WORD;
+ struct irq_chip_generic *gc =
+ irq_get_domain_generic_chip(b->domain, base);
+ unsigned long pending;
+ int hwirq;
+
+ irq_gc_lock(gc);
+ pending = irq_reg_readl(b->base[idx] + IRQSTAT) &
+ gc->mask_cache;
+ irq_gc_unlock(gc);
+
+ for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) {
+ generic_handle_irq(irq_find_mapping(b->domain,
+ base + hwirq));
+ }
+ }
chained_irq_exit(chip, desc);
}
@@ -65,7 +81,7 @@ static void bcm7120_l2_intc_suspend(struct irq_data *d)
irq_gc_lock(gc);
if (b->can_wake) {
irq_reg_writel(gc->mask_cache | gc->wake_active,
- b->base + IRQEN);
+ gc->reg_base + IRQEN);
}
irq_gc_unlock(gc);
}
@@ -76,7 +92,7 @@ static void bcm7120_l2_intc_resume(struct irq_data *d)
/* Restore the saved mask */
irq_gc_lock(gc);
- irq_reg_writel(gc->mask_cache, b->base + IRQEN);
+ irq_reg_writel(gc->mask_cache, gc->reg_base + IRQEN);
irq_gc_unlock(gc);
}
@@ -85,6 +101,7 @@ static int bcm7120_l2_intc_init_one(struct device_node *dn,
int irq, const __be32 *map_mask)
{
int parent_irq;
+ unsigned int idx;
parent_irq = irq_of_parse_and_map(dn, irq);
if (parent_irq < 0) {
@@ -92,7 +109,12 @@ static int bcm7120_l2_intc_init_one(struct device_node *dn,
return parent_irq;
}
- data->irq_map_mask |= be32_to_cpup(map_mask + irq);
+ /* For multiple parent IRQs with multiple words, this looks like:
+ * <irq0_w0 irq0_w1 irq1_w0 irq1_w1 ...>
+ */
+ for (idx = 0; idx < data->n_words; idx++)
+ data->irq_map_mask[idx] |=
+ be32_to_cpup(map_mask + irq * data->n_words + idx);
irq_set_handler_data(parent_irq, data);
irq_set_chained_handler(parent_irq, bcm7120_l2_intc_irq_handle);
@@ -109,26 +131,41 @@ int __init bcm7120_l2_intc_of_init(struct device_node *dn,
struct irq_chip_type *ct;
const __be32 *map_mask;
int num_parent_irqs;
- int ret = 0, len, irq;
+ int ret = 0, len;
+ unsigned int idx, irq;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
- data->base = of_iomap(dn, 0);
- if (!data->base) {
+ for (idx = 0; idx < MAX_WORDS; idx++) {
+ data->base[idx] = of_iomap(dn, idx);
+ if (!data->base[idx])
+ break;
+ data->n_words = idx + 1;
+ }
+ if (!data->n_words) {
pr_err("failed to remap intc L2 registers\n");
ret = -ENOMEM;
- goto out_free;
+ goto out_unmap;
}
- if (of_property_read_u32(dn, "brcm,int-fwd-mask", &data->irq_fwd_mask))
- data->irq_fwd_mask = 0;
-
- /* Enable all interrupt specified in the interrupt forward mask and have
- * the other disabled
+ /* Enable all interrupts specified in the interrupt forward mask;
+ * disable all others. If the property doesn't exist (-EINVAL),
+ * assume all zeroes.
*/
- irq_reg_writel(data->irq_fwd_mask, data->base + IRQEN);
+ ret = of_property_read_u32_array(dn, "brcm,int-fwd-mask",
+ data->irq_fwd_mask, data->n_words);
+ if (ret == 0 || ret == -EINVAL) {
+ for (idx = 0; idx < data->n_words; idx++)
+ irq_reg_writel(data->irq_fwd_mask[idx],
+ data->base[idx] + IRQEN);
+ } else {
+ /* property exists but has the wrong number of words */
+ pr_err("invalid int-fwd-mask property\n");
+ ret = -EINVAL;
+ goto out_unmap;
+ }
num_parent_irqs = of_irq_count(dn);
if (num_parent_irqs <= 0) {
@@ -138,7 +175,8 @@ int __init bcm7120_l2_intc_of_init(struct device_node *dn,
}
map_mask = of_get_property(dn, "brcm,int-map-mask", &len);
- if (!map_mask || (len != (sizeof(*map_mask) * num_parent_irqs))) {
+ if (!map_mask ||
+ (len != (sizeof(*map_mask) * num_parent_irqs * data->n_words))) {
pr_err("invalid brcm,int-map-mask property\n");
ret = -EINVAL;
goto out_unmap;
@@ -150,14 +188,14 @@ int __init bcm7120_l2_intc_of_init(struct device_node *dn,
goto out_unmap;
}
- data->domain = irq_domain_add_linear(dn, 32,
- &irq_generic_chip_ops, NULL);
+ data->domain = irq_domain_add_linear(dn, IRQS_PER_WORD * data->n_words,
+ &irq_generic_chip_ops, NULL);
if (!data->domain) {
ret = -ENOMEM;
goto out_unmap;
}
- ret = irq_alloc_domain_generic_chips(data->domain, 32, 1,
+ ret = irq_alloc_domain_generic_chips(data->domain, IRQS_PER_WORD, 1,
dn->full_name, handle_level_irq, clr, 0,
IRQ_GC_INIT_MASK_CACHE);
if (ret) {
@@ -165,39 +203,47 @@ int __init bcm7120_l2_intc_of_init(struct device_node *dn,
goto out_free_domain;
}
- gc = irq_get_domain_generic_chip(data->domain, 0);
- gc->unused = 0xffffffff & ~data->irq_map_mask;
- gc->reg_base = data->base;
- gc->private = data;
- ct = gc->chip_types;
-
- ct->regs.mask = IRQEN;
- ct->chip.irq_mask = irq_gc_mask_clr_bit;
- ct->chip.irq_unmask = irq_gc_mask_set_bit;
- ct->chip.irq_ack = irq_gc_noop;
- ct->chip.irq_suspend = bcm7120_l2_intc_suspend;
- ct->chip.irq_resume = bcm7120_l2_intc_resume;
-
- if (of_property_read_bool(dn, "brcm,irq-can-wake")) {
+ if (of_property_read_bool(dn, "brcm,irq-can-wake"))
data->can_wake = true;
- /* This IRQ chip can wake the system, set all relevant child
- * interupts in wake_enabled mask
- */
- gc->wake_enabled = 0xffffffff;
- gc->wake_enabled &= ~gc->unused;
- ct->chip.irq_set_wake = irq_gc_set_wake;
+
+ for (idx = 0; idx < data->n_words; idx++) {
+ irq = idx * IRQS_PER_WORD;
+ gc = irq_get_domain_generic_chip(data->domain, irq);
+
+ gc->unused = 0xffffffff & ~data->irq_map_mask[idx];
+ gc->reg_base = data->base[idx];
+ gc->private = data;
+ ct = gc->chip_types;
+
+ ct->regs.mask = IRQEN;
+ ct->chip.irq_mask = irq_gc_mask_clr_bit;
+ ct->chip.irq_unmask = irq_gc_mask_set_bit;
+ ct->chip.irq_ack = irq_gc_noop;
+ ct->chip.irq_suspend = bcm7120_l2_intc_suspend;
+ ct->chip.irq_resume = bcm7120_l2_intc_resume;
+
+ if (data->can_wake) {
+ /* This IRQ chip can wake the system, set all
+ * relevant child interupts in wake_enabled mask
+ */
+ gc->wake_enabled = 0xffffffff;
+ gc->wake_enabled &= ~gc->unused;
+ ct->chip.irq_set_wake = irq_gc_set_wake;
+ }
}
pr_info("registered BCM7120 L2 intc (mem: 0x%p, parent IRQ(s): %d)\n",
- data->base, num_parent_irqs);
+ data->base[0], num_parent_irqs);
return 0;
out_free_domain:
irq_domain_remove(data->domain);
out_unmap:
- iounmap(data->base);
-out_free:
+ for (idx = 0; idx < MAX_WORDS; idx++) {
+ if (data->base[idx])
+ iounmap(data->base[idx]);
+ }
kfree(data);
return ret;
}
--
2.1.1
Some chips, such as BCM6328, only require the former driver. Some
BCM7xxx STB configurations only require the latter driver. Treat them
as two separate entities, and update the mach-bcm dependencies to
reflect the change.
Signed-off-by: Kevin Cernekee <[email protected]>
---
arch/arm/mach-bcm/Kconfig | 1 +
drivers/irqchip/Kconfig | 5 +++++
drivers/irqchip/Makefile | 4 ++--
drivers/irqchip/irq-bcm7120-l2.c | 2 +-
4 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index 2abad74..bf47eb0 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -125,6 +125,7 @@ config ARCH_BRCMSTB
select HAVE_ARM_ARCH_TIMER
select BRCMSTB_GISB_ARB
select BRCMSTB_L2_IRQ
+ select BCM7120_L2_IRQ
help
Say Y if you intend to run the kernel on a Broadcom ARM-based STB
chipset.
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 6a03c65..2d52b07 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -51,6 +51,11 @@ config ATMEL_AIC5_IRQ
select MULTI_IRQ_HANDLER
select SPARSE_IRQ
+config BCM7120_L2_IRQ
+ bool
+ select GENERIC_IRQ_CHIP
+ select IRQ_DOMAIN
+
config BRCMSTB_L2_IRQ
bool
select GENERIC_IRQ_CHIP
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 173bb5f..f0909d0 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -35,6 +35,6 @@ obj-$(CONFIG_TB10X_IRQC) += irq-tb10x.o
obj-$(CONFIG_XTENSA) += irq-xtensa-pic.o
obj-$(CONFIG_XTENSA_MX) += irq-xtensa-mx.o
obj-$(CONFIG_IRQ_CROSSBAR) += irq-crossbar.o
-obj-$(CONFIG_BRCMSTB_L2_IRQ) += irq-brcmstb-l2.o \
- irq-bcm7120-l2.o
+obj-$(CONFIG_BCM7120_L2_IRQ) += irq-bcm7120-l2.o
+obj-$(CONFIG_BRCMSTB_L2_IRQ) += irq-brcmstb-l2.o
obj-$(CONFIG_KEYSTONE_IRQ) += irq-keystone.o
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index 734fece..91065b9 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -247,5 +247,5 @@ out_unmap:
kfree(data);
return ret;
}
-IRQCHIP_DECLARE(brcmstb_l2_intc, "brcm,bcm7120-l2-intc",
+IRQCHIP_DECLARE(bcm7120_l2_intc, "brcm,bcm7120-l2-intc",
bcm7120_l2_intc_of_init);
--
2.1.1
The cached value already incorporates irq_fwd_mask, and was saved the
last time an IRQ was enabled/disabled.
Signed-off-by: Kevin Cernekee <[email protected]>
---
drivers/irqchip/irq-bcm7120-l2.c | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index e9331f8..c76c9ad 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -37,7 +37,6 @@ struct bcm7120_l2_intc_data {
bool can_wake;
u32 irq_fwd_mask;
u32 irq_map_mask;
- u32 saved_mask;
};
static void bcm7120_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
@@ -62,14 +61,11 @@ static void bcm7120_l2_intc_suspend(struct irq_data *d)
{
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
struct bcm7120_l2_intc_data *b = gc->private;
- u32 reg;
irq_gc_lock(gc);
- /* Save the current mask and the interrupt forward mask */
- b->saved_mask = irq_reg_readl(b->base + IRQEN) | b->irq_fwd_mask;
if (b->can_wake) {
- reg = b->saved_mask | gc->wake_active;
- irq_reg_writel(reg, b->base + IRQEN);
+ irq_reg_writel(gc->mask_cache | gc->wake_active,
+ b->base + IRQEN);
}
irq_gc_unlock(gc);
}
@@ -77,11 +73,10 @@ static void bcm7120_l2_intc_suspend(struct irq_data *d)
static void bcm7120_l2_intc_resume(struct irq_data *d)
{
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
- struct bcm7120_l2_intc_data *b = gc->private;
/* Restore the saved mask */
irq_gc_lock(gc);
- irq_reg_writel(b->saved_mask, b->base + IRQEN);
+ irq_reg_writel(gc->mask_cache, b->base + IRQEN);
irq_gc_unlock(gc);
}
--
2.1.1
This mask should have been 0xffff_ffff, not 0x0fff_ffff.
The change should not have an effect on current users (STB) because bits
31:27 are unused.
Signed-off-by: Kevin Cernekee <[email protected]>
---
drivers/irqchip/irq-bcm7120-l2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index f041992..e9331f8 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -171,7 +171,7 @@ int __init bcm7120_l2_intc_of_init(struct device_node *dn,
}
gc = irq_get_domain_generic_chip(data->domain, 0);
- gc->unused = 0xfffffff & ~data->irq_map_mask;
+ gc->unused = 0xffffffff & ~data->irq_map_mask;
gc->reg_base = data->base;
gc->private = data;
ct = gc->chip_types;
--
2.1.1
This can compile for MIPS (or anything else) now.
Signed-off-by: Kevin Cernekee <[email protected]>
---
drivers/irqchip/Kconfig | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 6f0e80b..6a03c65 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -53,7 +53,6 @@ config ATMEL_AIC5_IRQ
config BRCMSTB_L2_IRQ
bool
- depends on ARM
select GENERIC_IRQ_CHIP
select IRQ_DOMAIN
--
2.1.1
This check may be prone to race conditions, e.g.
1) Some external event (e.g. GPIO level) causes an IRQ to become pending
2) Peripheral asserts the L2 IRQ
3) CPU takes an interrupt
4) The event from #1 goes away
5) bcm7120_l2_intc_irq_handle() reads back a 0 status
Unlike the hardware supported by brcmstb-l2, the bcm7120-l2 controller
does not latch the IRQ status. Bits can change if the inputs to the
controller change. Also, do_bad_IRQ() is an ARM-specific macro.
So let's just nuke it.
Signed-off-by: Kevin Cernekee <[email protected]>
---
drivers/irqchip/irq-bcm7120-l2.c | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index b9f4fb8..49d8f3d 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -27,8 +27,6 @@
#include "irqchip.h"
-#include <asm/mach/irq.h>
-
/* Register offset in the L2 interrupt controller */
#define IRQEN 0x00
#define IRQSTAT 0x04
@@ -51,19 +49,12 @@ static void bcm7120_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
chained_irq_enter(chip, desc);
status = __raw_readl(b->base + IRQSTAT);
-
- if (status == 0) {
- do_bad_IRQ(irq, desc);
- goto out;
- }
-
do {
irq = ffs(status) - 1;
status &= ~(1 << irq);
generic_handle_irq(irq_find_mapping(b->domain, irq));
} while (status);
-out:
chained_irq_exit(chip, desc);
}
--
2.1.1
This keeps things consistent between the "core" bcm7120-l2 driver and the
helpers in generic-chip.c.
Signed-off-by: Kevin Cernekee <[email protected]>
---
drivers/irqchip/irq-bcm7120-l2.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index 6472b71..f041992 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -48,7 +48,7 @@ static void bcm7120_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
chained_irq_enter(chip, desc);
- status = __raw_readl(b->base + IRQSTAT);
+ status = irq_reg_readl(b->base + IRQSTAT);
do {
irq = ffs(status) - 1;
status &= ~(1 << irq);
@@ -66,10 +66,10 @@ static void bcm7120_l2_intc_suspend(struct irq_data *d)
irq_gc_lock(gc);
/* Save the current mask and the interrupt forward mask */
- b->saved_mask = __raw_readl(b->base + IRQEN) | b->irq_fwd_mask;
+ b->saved_mask = irq_reg_readl(b->base + IRQEN) | b->irq_fwd_mask;
if (b->can_wake) {
reg = b->saved_mask | gc->wake_active;
- __raw_writel(reg, b->base + IRQEN);
+ irq_reg_writel(reg, b->base + IRQEN);
}
irq_gc_unlock(gc);
}
@@ -81,7 +81,7 @@ static void bcm7120_l2_intc_resume(struct irq_data *d)
/* Restore the saved mask */
irq_gc_lock(gc);
- __raw_writel(b->saved_mask, b->base + IRQEN);
+ irq_reg_writel(b->saved_mask, b->base + IRQEN);
irq_gc_unlock(gc);
}
@@ -133,7 +133,7 @@ int __init bcm7120_l2_intc_of_init(struct device_node *dn,
/* Enable all interrupt specified in the interrupt forward mask and have
* the other disabled
*/
- __raw_writel(data->irq_fwd_mask, data->base + IRQEN);
+ irq_reg_writel(data->irq_fwd_mask, data->base + IRQEN);
num_parent_irqs = of_irq_count(dn);
if (num_parent_irqs <= 0) {
--
2.1.1
A couple of accesses to IRQEN (base+0x00) just used "base" directly, so
they would break if IRQEN ever became nonzero. Make sure that all
reads/writes specify the register offset constant.
Signed-off-by: Kevin Cernekee <[email protected]>
---
drivers/irqchip/irq-bcm7120-l2.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index 49d8f3d..6472b71 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -66,10 +66,10 @@ static void bcm7120_l2_intc_suspend(struct irq_data *d)
irq_gc_lock(gc);
/* Save the current mask and the interrupt forward mask */
- b->saved_mask = __raw_readl(b->base) | b->irq_fwd_mask;
+ b->saved_mask = __raw_readl(b->base + IRQEN) | b->irq_fwd_mask;
if (b->can_wake) {
reg = b->saved_mask | gc->wake_active;
- __raw_writel(reg, b->base);
+ __raw_writel(reg, b->base + IRQEN);
}
irq_gc_unlock(gc);
}
@@ -81,7 +81,7 @@ static void bcm7120_l2_intc_resume(struct irq_data *d)
/* Restore the saved mask */
irq_gc_lock(gc);
- __raw_writel(b->saved_mask, b->base);
+ __raw_writel(b->saved_mask, b->base + IRQEN);
irq_gc_unlock(gc);
}
--
2.1.1
The irq-brcmstb-l2 driver has a single dependency on the ARM code, the
do_bad_IRQ macro. Expand this macro in-place so that the driver can be
built on non-ARM platforms.
Signed-off-by: Kevin Cernekee <[email protected]>
---
drivers/irqchip/irq-brcmstb-l2.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index c15c840..c9bdf20 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -19,6 +19,7 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/platform_device.h>
+#include <linux/spinlock.h>
#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/of_address.h>
@@ -30,8 +31,6 @@
#include <linux/irqchip.h>
#include <linux/irqchip/chained_irq.h>
-#include <asm/mach/irq.h>
-
#include "irqchip.h"
/* Register offsets in the L2 interrupt controller */
@@ -63,7 +62,9 @@ static void brcmstb_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
~(__raw_readl(b->base + CPU_MASK_STATUS));
if (status == 0) {
- do_bad_IRQ(irq, desc);
+ raw_spin_lock(&desc->lock);
+ handle_bad_irq(irq, desc);
+ raw_spin_unlock(&desc->lock);
goto out;
}
--
2.1.1
On Tuesday 28 October 2014 20:58:49 Kevin Cernekee wrote:
> The irq-brcmstb-l2 driver has a single dependency on the ARM code, the
> do_bad_IRQ macro. Expand this macro in-place so that the driver can be
> built on non-ARM platforms.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
>
Acked-by: Arnd Bergmann <[email protected]>
On Tuesday 28 October 2014 20:58:48 Kevin Cernekee wrote:
>
> +#ifdef CONFIG_RAW_IRQ_ACCESSORS
> +
> +#ifndef irq_reg_writel
> +# define irq_reg_writel(val, addr) __raw_writel(val, addr)
> +#endif
> +#ifndef irq_reg_readl
> +# define irq_reg_readl(addr) __raw_readl(addr)
> +#endif
> +
> +#else
> +
No, this is just wrong: registers almost always have a fixed endianess
indenpent of CPU endianess, so if you use __raw_writel, it will be
broken on one or the other.
If you have a machine that uses big-endian registers in the interrupt
controller, you need to find a way to use the correct accessors
(e.g. iowrite32be) and use them independent of what endianess the CPU
is running.
As this code is being used on all sorts of platforms, you can't assume
that they all use the same endianess, which makes it rather tricky.
As the first step, you can probably introduce a new Kconfig symbol
GENERIC_IRQ_CHIP_BE, and then make that mutually exclusive with the
existing users that all use little-endian registers:
#if defined(CONFIG_GENERIC_IRQ_CHIP) && !defined(CONFIG_GENERIC_IRQ_CHIP_BE)
#define irq_reg_writel(val, addr) writel(val, addr)
#else if defined(CONFIG_GENERIC_IRQ_CHIP_BE) && !defined(CONFIG_GENERIC_IRQ_CHIP)
#define irq_reg_writel(val, addr) iowrite32be(val, addr)
#else
/* provoke a compile error when this is used */
#define irq_reg_writel(val, addr) irq_reg_writel_unknown_endian(val, addr)
#endif
and
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,5 +1,6 @@
obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
+obj-$(CONFIG_GENERIC_IRQ_CHIP_BE) += generic-chip.o
obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o
Note that you might also have a case where you have more than
one generic irqchip driver built into the kernel, which require
different endianess. We can't really support that case without
changing the generic-chip implementation.
Arnd
On Tuesday 28 October 2014 20:58:51 Kevin Cernekee wrote:
> This can compile for MIPS (or anything else) now.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
>
It's a silent symbol, so the dependency is obviously not needed,
Acked-by: Arnd Bergmann <[email protected]>
On Tuesday 28 October 2014 20:58:52 Kevin Cernekee wrote:
>
> irq_gc_lock(gc);
> /* Save the current mask and the interrupt forward mask */
> - b->saved_mask = __raw_readl(b->base) | b->irq_fwd_mask;
> + b->saved_mask = __raw_readl(b->base + IRQEN) | b->irq_fwd_mask;
> if (b->can_wake) {
> reg = b->saved_mask | gc->wake_active;
> - __raw_writel(reg, b->base);
> + __raw_writel(reg, b->base + IRQEN);
> }
> irq_gc_unlock(gc);
> }
> @@ -81,7 +81,7 @@ static void bcm7120_l2_intc_resume(struct irq_data *d)
>
> /* Restore the saved mask */
> irq_gc_lock(gc);
> - __raw_writel(b->saved_mask, b->base);
> + __raw_writel(b->saved_mask, b->base + IRQEN);
> irq_gc_unlock(gc);
>
You should really change this one too, to use fixed-endian accessors.
__raw_writel can't safely be used in drivers, and it will break
big-endian kernels running on ARM BRCMSTB.
Arnd
On Tuesday 28 October 2014 20:58:53 Kevin Cernekee wrote:
> This keeps things consistent between the "core" bcm7120-l2 driver and the
> helpers in generic-chip.c.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
> ---
>
Ah, you did. Nevermind my comment to patch 5 then ;-)
Acked-by: Arnd Bergmann <[email protected]>
On Tuesday 28 October 2014 20:58:54 Kevin Cernekee wrote:
> This change was just made on bcm7120-l2, so let's keep things consistent
> between the two drivers.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
>
Acked-by: Arnd Bergmann <[email protected]>
On Tuesday 28 October 2014 20:58:55 Kevin Cernekee wrote:
> This mask should have been 0xffff_ffff, not 0x0fff_ffff.
>
> The change should not have an effect on current users (STB) because bits
> 31:27 are unused.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
>
Acked-by: Arnd Bergmann <[email protected]>
On Tuesday 28 October 2014 20:58:57 Kevin Cernekee wrote:
> Most implementations of the bcm7120-l2 controller only have a single
> 32-bit enable word + 32-bit status word. But some instances have added
> more enable/status pairs in order to support 64+ IRQs (which are all
> ORed into one parent IRQ input). Make the following changes to allow
> the driver to support this:
>
> - Extend DT bindings so that multiple words can be specified for the
> reg property, various masks, etc.
>
> - Add loops to the probe/handle functions to deal with each word
> separately
>
> - Allocate 1 generic-chip for every 32 IRQs, so we can still use the
> clr/set helper functions
>
> - Update the documentation
>
> Signed-off-by: Kevin Cernekee <[email protected]>
You should probably specify a 'big-endian' DT property for the driver
to check. If you have both LE and BE versions of this device, we
must make sure that we use the correct accessors.
As long as we don't need to build a kernel that supports both (if
I understand you correctly, the ARM SoCs use a LE instance of this
device, while the MIPS SoCs use a BE version) you can still decide
at compile-time which one you want, but please add the runtime check
now, so if we ever get a new combination we can handle it at runtime
with a more complex driver implementation.
If I read your code right, you have decided to use one IRQ domain
per register set, rather than one domain for all of them. I don't
know which of the two ways is better here, but it would be good if
you could explain in the patch description why you did it like this.
Arnd
On Tuesday 28 October 2014 20:58:58 Kevin Cernekee wrote:
> Some chips, such as BCM6328, only require the former driver. Some
> BCM7xxx STB configurations only require the latter driver. Treat them
> as two separate entities, and update the mach-bcm dependencies to
> reflect the change.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
Acked-by: Arnd Bergmann <[email protected]>
Arnd
On Wednesday 29 October 2014 08:46:00 Arnd Bergmann wrote:
> On Tuesday 28 October 2014 20:58:52 Kevin Cernekee wrote:
> >
> > irq_gc_lock(gc);
> > /* Save the current mask and the interrupt forward mask */
> > - b->saved_mask = __raw_readl(b->base) | b->irq_fwd_mask;
> > + b->saved_mask = __raw_readl(b->base + IRQEN) | b->irq_fwd_mask;
> > if (b->can_wake) {
> > reg = b->saved_mask | gc->wake_active;
> > - __raw_writel(reg, b->base);
> > + __raw_writel(reg, b->base + IRQEN);
> > }
> > irq_gc_unlock(gc);
> > }
> > @@ -81,7 +81,7 @@ static void bcm7120_l2_intc_resume(struct irq_data *d)
> >
> > /* Restore the saved mask */
> > irq_gc_lock(gc);
> > - __raw_writel(b->saved_mask, b->base);
> > + __raw_writel(b->saved_mask, b->base + IRQEN);
> > irq_gc_unlock(gc);
> >
>
> You should really change this one too, to use fixed-endian accessors.
> __raw_writel can't safely be used in drivers, and it will break
> big-endian kernels running on ARM BRCMSTB.
As you already fix this in patch 6, please disregard my comment above,
your patch looks ok.
Acked-by: Arnd Bergmann <[email protected]>
On Tue, 28 Oct 2014, Kevin Cernekee wrote:
> On big-endian systems readl/writel may perform an unwanted endian swap,
> breaking generic-chip.c. Let the platform code opt to use the __raw_
> variants by selecting RAW_IRQ_ACCESSORS.
>
> This is required in order for bcm3384 to use GENERIC_IRQ_CHIP. Several
> existing irqchip drivers also use the __raw_ accessors directly, so it
> is a reasonably common requirement.
How does that work with multi arch kernels?
Thanks,
tglx
> Signed-off-by: Kevin Cernekee <[email protected]>
> ---
> drivers/irqchip/Kconfig | 3 +++
> include/linux/irq.h | 13 +++++++++++++
> 2 files changed, 16 insertions(+)
>
> diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
> index b21f12f..6f0e80b 100644
> --- a/drivers/irqchip/Kconfig
> +++ b/drivers/irqchip/Kconfig
> @@ -2,6 +2,9 @@ config IRQCHIP
> def_bool y
> depends on OF_IRQ
>
> +config RAW_IRQ_ACCESSORS
> + bool
> +
> config ARM_GIC
> bool
> select IRQ_DOMAIN
> diff --git a/include/linux/irq.h b/include/linux/irq.h
> index 03f48d9..ed1ea8a 100644
> --- a/include/linux/irq.h
> +++ b/include/linux/irq.h
> @@ -639,6 +639,17 @@ void arch_teardown_hwirq(unsigned int irq);
> void irq_init_desc(unsigned int irq);
> #endif
>
> +#ifdef CONFIG_RAW_IRQ_ACCESSORS
> +
> +#ifndef irq_reg_writel
> +# define irq_reg_writel(val, addr) __raw_writel(val, addr)
> +#endif
> +#ifndef irq_reg_readl
> +# define irq_reg_readl(addr) __raw_readl(addr)
> +#endif
> +
> +#else
> +
> #ifndef irq_reg_writel
> # define irq_reg_writel(val, addr) writel(val, addr)
> #endif
> @@ -646,6 +657,8 @@ void irq_init_desc(unsigned int irq);
> # define irq_reg_readl(addr) readl(addr)
> #endif
>
> +#endif
> +
> /**
> * struct irq_chip_regs - register offsets for struct irq_gci
> * @enable: Enable register offset to reg_base
> --
> 2.1.1
>
>
On 10/28/2014 08:58 PM, Kevin Cernekee wrote:
> This check may be prone to race conditions, e.g.
>
> 1) Some external event (e.g. GPIO level) causes an IRQ to become pending
> 2) Peripheral asserts the L2 IRQ
> 3) CPU takes an interrupt
> 4) The event from #1 goes away
> 5) bcm7120_l2_intc_irq_handle() reads back a 0 status
>
> Unlike the hardware supported by brcmstb-l2, the bcm7120-l2 controller
> does not latch the IRQ status. Bits can change if the inputs to the
> controller change. Also, do_bad_IRQ() is an ARM-specific macro.
>
> So let's just nuke it.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
Acked-by: Florian Fainelli <[email protected]>
> ---
> drivers/irqchip/irq-bcm7120-l2.c | 9 ---------
> 1 file changed, 9 deletions(-)
>
> diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
> index b9f4fb8..49d8f3d 100644
> --- a/drivers/irqchip/irq-bcm7120-l2.c
> +++ b/drivers/irqchip/irq-bcm7120-l2.c
> @@ -27,8 +27,6 @@
>
> #include "irqchip.h"
>
> -#include <asm/mach/irq.h>
> -
> /* Register offset in the L2 interrupt controller */
> #define IRQEN 0x00
> #define IRQSTAT 0x04
> @@ -51,19 +49,12 @@ static void bcm7120_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
> chained_irq_enter(chip, desc);
>
> status = __raw_readl(b->base + IRQSTAT);
> -
> - if (status == 0) {
> - do_bad_IRQ(irq, desc);
> - goto out;
> - }
> -
> do {
> irq = ffs(status) - 1;
> status &= ~(1 << irq);
> generic_handle_irq(irq_find_mapping(b->domain, irq));
> } while (status);
>
> -out:
> chained_irq_exit(chip, desc);
> }
>
>
On 10/28/2014 08:58 PM, Kevin Cernekee wrote:
> The irq-brcmstb-l2 driver has a single dependency on the ARM code, the
> do_bad_IRQ macro. Expand this macro in-place so that the driver can be
> built on non-ARM platforms.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
Acked-by: Florian Fainelli <[email protected]>
> ---
> drivers/irqchip/irq-brcmstb-l2.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
> index c15c840..c9bdf20 100644
> --- a/drivers/irqchip/irq-brcmstb-l2.c
> +++ b/drivers/irqchip/irq-brcmstb-l2.c
> @@ -19,6 +19,7 @@
> #include <linux/slab.h>
> #include <linux/module.h>
> #include <linux/platform_device.h>
> +#include <linux/spinlock.h>
> #include <linux/of.h>
> #include <linux/of_irq.h>
> #include <linux/of_address.h>
> @@ -30,8 +31,6 @@
> #include <linux/irqchip.h>
> #include <linux/irqchip/chained_irq.h>
>
> -#include <asm/mach/irq.h>
> -
> #include "irqchip.h"
>
> /* Register offsets in the L2 interrupt controller */
> @@ -63,7 +62,9 @@ static void brcmstb_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
> ~(__raw_readl(b->base + CPU_MASK_STATUS));
>
> if (status == 0) {
> - do_bad_IRQ(irq, desc);
> + raw_spin_lock(&desc->lock);
> + handle_bad_irq(irq, desc);
> + raw_spin_unlock(&desc->lock);
> goto out;
> }
>
>
On 10/28/2014 08:58 PM, Kevin Cernekee wrote:
> A couple of accesses to IRQEN (base+0x00) just used "base" directly, so
> they would break if IRQEN ever became nonzero. Make sure that all
> reads/writes specify the register offset constant.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
Acked-by: Florian Fainelli <[email protected]>
> ---
> drivers/irqchip/irq-bcm7120-l2.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
> index 49d8f3d..6472b71 100644
> --- a/drivers/irqchip/irq-bcm7120-l2.c
> +++ b/drivers/irqchip/irq-bcm7120-l2.c
> @@ -66,10 +66,10 @@ static void bcm7120_l2_intc_suspend(struct irq_data *d)
>
> irq_gc_lock(gc);
> /* Save the current mask and the interrupt forward mask */
> - b->saved_mask = __raw_readl(b->base) | b->irq_fwd_mask;
> + b->saved_mask = __raw_readl(b->base + IRQEN) | b->irq_fwd_mask;
> if (b->can_wake) {
> reg = b->saved_mask | gc->wake_active;
> - __raw_writel(reg, b->base);
> + __raw_writel(reg, b->base + IRQEN);
> }
> irq_gc_unlock(gc);
> }
> @@ -81,7 +81,7 @@ static void bcm7120_l2_intc_resume(struct irq_data *d)
>
> /* Restore the saved mask */
> irq_gc_lock(gc);
> - __raw_writel(b->saved_mask, b->base);
> + __raw_writel(b->saved_mask, b->base + IRQEN);
> irq_gc_unlock(gc);
> }
>
>
On 10/28/2014 08:58 PM, Kevin Cernekee wrote:
> This keeps things consistent between the "core" bcm7120-l2 driver and the
> helpers in generic-chip.c.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
Acked-by: Florian Fainelli <[email protected]>
> ---
> drivers/irqchip/irq-bcm7120-l2.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
> index 6472b71..f041992 100644
> --- a/drivers/irqchip/irq-bcm7120-l2.c
> +++ b/drivers/irqchip/irq-bcm7120-l2.c
> @@ -48,7 +48,7 @@ static void bcm7120_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
>
> chained_irq_enter(chip, desc);
>
> - status = __raw_readl(b->base + IRQSTAT);
> + status = irq_reg_readl(b->base + IRQSTAT);
> do {
> irq = ffs(status) - 1;
> status &= ~(1 << irq);
> @@ -66,10 +66,10 @@ static void bcm7120_l2_intc_suspend(struct irq_data *d)
>
> irq_gc_lock(gc);
> /* Save the current mask and the interrupt forward mask */
> - b->saved_mask = __raw_readl(b->base + IRQEN) | b->irq_fwd_mask;
> + b->saved_mask = irq_reg_readl(b->base + IRQEN) | b->irq_fwd_mask;
> if (b->can_wake) {
> reg = b->saved_mask | gc->wake_active;
> - __raw_writel(reg, b->base + IRQEN);
> + irq_reg_writel(reg, b->base + IRQEN);
> }
> irq_gc_unlock(gc);
> }
> @@ -81,7 +81,7 @@ static void bcm7120_l2_intc_resume(struct irq_data *d)
>
> /* Restore the saved mask */
> irq_gc_lock(gc);
> - __raw_writel(b->saved_mask, b->base + IRQEN);
> + irq_reg_writel(b->saved_mask, b->base + IRQEN);
> irq_gc_unlock(gc);
> }
>
> @@ -133,7 +133,7 @@ int __init bcm7120_l2_intc_of_init(struct device_node *dn,
> /* Enable all interrupt specified in the interrupt forward mask and have
> * the other disabled
> */
> - __raw_writel(data->irq_fwd_mask, data->base + IRQEN);
> + irq_reg_writel(data->irq_fwd_mask, data->base + IRQEN);
>
> num_parent_irqs = of_irq_count(dn);
> if (num_parent_irqs <= 0) {
>
On 10/28/2014 08:58 PM, Kevin Cernekee wrote:
> This change was just made on bcm7120-l2, so let's keep things consistent
> between the two drivers.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
Acked-by: Florian Fainelli <[email protected]>
> ---
> drivers/irqchip/irq-brcmstb-l2.c | 22 +++++++++++-----------
> 1 file changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
> index c9bdf20..8b82b86 100644
> --- a/drivers/irqchip/irq-brcmstb-l2.c
> +++ b/drivers/irqchip/irq-brcmstb-l2.c
> @@ -58,8 +58,8 @@ static void brcmstb_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
>
> chained_irq_enter(chip, desc);
>
> - status = __raw_readl(b->base + CPU_STATUS) &
> - ~(__raw_readl(b->base + CPU_MASK_STATUS));
> + status = irq_reg_readl(b->base + CPU_STATUS) &
> + ~(irq_reg_readl(b->base + CPU_MASK_STATUS));
>
> if (status == 0) {
> raw_spin_lock(&desc->lock);
> @@ -71,7 +71,7 @@ static void brcmstb_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
> do {
> irq = ffs(status) - 1;
> /* ack at our level */
> - __raw_writel(1 << irq, b->base + CPU_CLEAR);
> + irq_reg_writel(1 << irq, b->base + CPU_CLEAR);
> status &= ~(1 << irq);
> generic_handle_irq(irq_find_mapping(b->domain, irq));
> } while (status);
> @@ -86,12 +86,12 @@ static void brcmstb_l2_intc_suspend(struct irq_data *d)
>
> irq_gc_lock(gc);
> /* Save the current mask */
> - b->saved_mask = __raw_readl(b->base + CPU_MASK_STATUS);
> + b->saved_mask = irq_reg_readl(b->base + CPU_MASK_STATUS);
>
> if (b->can_wake) {
> /* Program the wakeup mask */
> - __raw_writel(~gc->wake_active, b->base + CPU_MASK_SET);
> - __raw_writel(gc->wake_active, b->base + CPU_MASK_CLEAR);
> + irq_reg_writel(~gc->wake_active, b->base + CPU_MASK_SET);
> + irq_reg_writel(gc->wake_active, b->base + CPU_MASK_CLEAR);
> }
> irq_gc_unlock(gc);
> }
> @@ -103,11 +103,11 @@ static void brcmstb_l2_intc_resume(struct irq_data *d)
>
> irq_gc_lock(gc);
> /* Clear unmasked non-wakeup interrupts */
> - __raw_writel(~b->saved_mask & ~gc->wake_active, b->base + CPU_CLEAR);
> + irq_reg_writel(~b->saved_mask & ~gc->wake_active, b->base + CPU_CLEAR);
>
> /* Restore the saved mask */
> - __raw_writel(b->saved_mask, b->base + CPU_MASK_SET);
> - __raw_writel(~b->saved_mask, b->base + CPU_MASK_CLEAR);
> + irq_reg_writel(b->saved_mask, b->base + CPU_MASK_SET);
> + irq_reg_writel(~b->saved_mask, b->base + CPU_MASK_CLEAR);
> irq_gc_unlock(gc);
> }
>
> @@ -132,8 +132,8 @@ int __init brcmstb_l2_intc_of_init(struct device_node *np,
> }
>
> /* Disable all interrupts by default */
> - __raw_writel(0xffffffff, data->base + CPU_MASK_SET);
> - __raw_writel(0xffffffff, data->base + CPU_CLEAR);
> + irq_reg_writel(0xffffffff, data->base + CPU_MASK_SET);
> + irq_reg_writel(0xffffffff, data->base + CPU_CLEAR);
>
> data->parent_irq = irq_of_parse_and_map(np, 0);
> if (data->parent_irq < 0) {
>
On 10/28/2014 08:58 PM, Kevin Cernekee wrote:
> This mask should have been 0xffff_ffff, not 0x0fff_ffff.
>
> The change should not have an effect on current users (STB) because bits
> 31:27 are unused.
Nice catch!
>
> Signed-off-by: Kevin Cernekee <[email protected]>
Acked-by: Florian Fainelli <[email protected]>
> ---
> drivers/irqchip/irq-bcm7120-l2.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
> index f041992..e9331f8 100644
> --- a/drivers/irqchip/irq-bcm7120-l2.c
> +++ b/drivers/irqchip/irq-bcm7120-l2.c
> @@ -171,7 +171,7 @@ int __init bcm7120_l2_intc_of_init(struct device_node *dn,
> }
>
> gc = irq_get_domain_generic_chip(data->domain, 0);
> - gc->unused = 0xfffffff & ~data->irq_map_mask;
> + gc->unused = 0xffffffff & ~data->irq_map_mask;
> gc->reg_base = data->base;
> gc->private = data;
> ct = gc->chip_types;
>
On 10/28/2014 08:58 PM, Kevin Cernekee wrote:
> The cached value already incorporates irq_fwd_mask, and was saved the
> last time an IRQ was enabled/disabled.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
Acked-by: Florian Fainelli <[email protected]>
> ---
> drivers/irqchip/irq-bcm7120-l2.c | 11 +++--------
> 1 file changed, 3 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
> index e9331f8..c76c9ad 100644
> --- a/drivers/irqchip/irq-bcm7120-l2.c
> +++ b/drivers/irqchip/irq-bcm7120-l2.c
> @@ -37,7 +37,6 @@ struct bcm7120_l2_intc_data {
> bool can_wake;
> u32 irq_fwd_mask;
> u32 irq_map_mask;
> - u32 saved_mask;
> };
>
> static void bcm7120_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc)
> @@ -62,14 +61,11 @@ static void bcm7120_l2_intc_suspend(struct irq_data *d)
> {
> struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
> struct bcm7120_l2_intc_data *b = gc->private;
> - u32 reg;
>
> irq_gc_lock(gc);
> - /* Save the current mask and the interrupt forward mask */
> - b->saved_mask = irq_reg_readl(b->base + IRQEN) | b->irq_fwd_mask;
> if (b->can_wake) {
> - reg = b->saved_mask | gc->wake_active;
> - irq_reg_writel(reg, b->base + IRQEN);
> + irq_reg_writel(gc->mask_cache | gc->wake_active,
> + b->base + IRQEN);
> }
> irq_gc_unlock(gc);
> }
> @@ -77,11 +73,10 @@ static void bcm7120_l2_intc_suspend(struct irq_data *d)
> static void bcm7120_l2_intc_resume(struct irq_data *d)
> {
> struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
> - struct bcm7120_l2_intc_data *b = gc->private;
>
> /* Restore the saved mask */
> irq_gc_lock(gc);
> - irq_reg_writel(b->saved_mask, b->base + IRQEN);
> + irq_reg_writel(gc->mask_cache, b->base + IRQEN);
> irq_gc_unlock(gc);
> }
>
>
On 10/28/2014 08:58 PM, Kevin Cernekee wrote:
> This can compile for MIPS (or anything else) now.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
Acked-by: Florian Fainelli <[email protected]>
> ---
> drivers/irqchip/Kconfig | 1 -
> 1 file changed, 1 deletion(-)
>
> diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
> index 6f0e80b..6a03c65 100644
> --- a/drivers/irqchip/Kconfig
> +++ b/drivers/irqchip/Kconfig
> @@ -53,7 +53,6 @@ config ATMEL_AIC5_IRQ
>
> config BRCMSTB_L2_IRQ
> bool
> - depends on ARM
> select GENERIC_IRQ_CHIP
> select IRQ_DOMAIN
>
>
On 10/28/2014 08:58 PM, Kevin Cernekee wrote:
> Some chips, such as BCM6328, only require the former driver. Some
> BCM7xxx STB configurations only require the latter driver. Treat them
> as two separate entities, and update the mach-bcm dependencies to
> reflect the change.
>
> Signed-off-by: Kevin Cernekee <[email protected]>
Acked-by: Florian Fainelli <[email protected]>
> ---
> arch/arm/mach-bcm/Kconfig | 1 +
> drivers/irqchip/Kconfig | 5 +++++
> drivers/irqchip/Makefile | 4 ++--
> drivers/irqchip/irq-bcm7120-l2.c | 2 +-
> 4 files changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
> index 2abad74..bf47eb0 100644
> --- a/arch/arm/mach-bcm/Kconfig
> +++ b/arch/arm/mach-bcm/Kconfig
> @@ -125,6 +125,7 @@ config ARCH_BRCMSTB
> select HAVE_ARM_ARCH_TIMER
> select BRCMSTB_GISB_ARB
> select BRCMSTB_L2_IRQ
> + select BCM7120_L2_IRQ
> help
> Say Y if you intend to run the kernel on a Broadcom ARM-based STB
> chipset.
> diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
> index 6a03c65..2d52b07 100644
> --- a/drivers/irqchip/Kconfig
> +++ b/drivers/irqchip/Kconfig
> @@ -51,6 +51,11 @@ config ATMEL_AIC5_IRQ
> select MULTI_IRQ_HANDLER
> select SPARSE_IRQ
>
> +config BCM7120_L2_IRQ
> + bool
> + select GENERIC_IRQ_CHIP
> + select IRQ_DOMAIN
> +
> config BRCMSTB_L2_IRQ
> bool
> select GENERIC_IRQ_CHIP
> diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
> index 173bb5f..f0909d0 100644
> --- a/drivers/irqchip/Makefile
> +++ b/drivers/irqchip/Makefile
> @@ -35,6 +35,6 @@ obj-$(CONFIG_TB10X_IRQC) += irq-tb10x.o
> obj-$(CONFIG_XTENSA) += irq-xtensa-pic.o
> obj-$(CONFIG_XTENSA_MX) += irq-xtensa-mx.o
> obj-$(CONFIG_IRQ_CROSSBAR) += irq-crossbar.o
> -obj-$(CONFIG_BRCMSTB_L2_IRQ) += irq-brcmstb-l2.o \
> - irq-bcm7120-l2.o
> +obj-$(CONFIG_BCM7120_L2_IRQ) += irq-bcm7120-l2.o
> +obj-$(CONFIG_BRCMSTB_L2_IRQ) += irq-brcmstb-l2.o
> obj-$(CONFIG_KEYSTONE_IRQ) += irq-keystone.o
> diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
> index 734fece..91065b9 100644
> --- a/drivers/irqchip/irq-bcm7120-l2.c
> +++ b/drivers/irqchip/irq-bcm7120-l2.c
> @@ -247,5 +247,5 @@ out_unmap:
> kfree(data);
> return ret;
> }
> -IRQCHIP_DECLARE(brcmstb_l2_intc, "brcm,bcm7120-l2-intc",
> +IRQCHIP_DECLARE(bcm7120_l2_intc, "brcm,bcm7120-l2-intc",
> bcm7120_l2_intc_of_init);
>
On 10/29/2014 12:43 AM, Arnd Bergmann wrote:
> On Tuesday 28 October 2014 20:58:48 Kevin Cernekee wrote:
>>
>> +#ifdef CONFIG_RAW_IRQ_ACCESSORS
>> +
>> +#ifndef irq_reg_writel
>> +# define irq_reg_writel(val, addr) __raw_writel(val, addr)
>> +#endif
>> +#ifndef irq_reg_readl
>> +# define irq_reg_readl(addr) __raw_readl(addr)
>> +#endif
>> +
>> +#else
>> +
>
> No, this is just wrong: registers almost always have a fixed endianess
> indenpent of CPU endianess, so if you use __raw_writel, it will be
> broken on one or the other.
Our brcmstb platforms had an endian strap settings for MIPS-based
platforms, and for most peripherals this would be just completely
transparent, as the HW always will do the internal swapping, such that
host CPU does read/writes in its native endianess regardless of the
actual strap settings.
AFAICT bcm3384, a MIPS-based Cable Modem platform has only one endianess
setting: BE, and the HW only supports that specific endianess.
>
> If you have a machine that uses big-endian registers in the interrupt
> controller, you need to find a way to use the correct accessors
> (e.g. iowrite32be) and use them independent of what endianess the CPU
> is running.
>
> As this code is being used on all sorts of platforms, you can't assume
> that they all use the same endianess, which makes it rather tricky.
I think the more general problem with the use of readl_*() I/O accessors
is that they just happen to work fine on most platforms out there: ARM
Little-endian, because this nicely matches the endianess expected by the
HW and that does not enforce an audit of whether your actual peripheral
expects little-endian writes to be done.
The other problem is that readl() on ARM implies a barrier, which is not
necesarily true/necessary for some other platforms such as some MIPS
processors.
>
> As the first step, you can probably introduce a new Kconfig symbol
> GENERIC_IRQ_CHIP_BE, and then make that mutually exclusive with the
> existing users that all use little-endian registers:
>
> #if defined(CONFIG_GENERIC_IRQ_CHIP) && !defined(CONFIG_GENERIC_IRQ_CHIP_BE)
> #define irq_reg_writel(val, addr) writel(val, addr)
> #else if defined(CONFIG_GENERIC_IRQ_CHIP_BE) && !defined(CONFIG_GENERIC_IRQ_CHIP)
> #define irq_reg_writel(val, addr) iowrite32be(val, addr)
> #else
> /* provoke a compile error when this is used */
> #define irq_reg_writel(val, addr) irq_reg_writel_unknown_endian(val, addr)
> #endif
>
> and
>
> --- a/kernel/irq/Makefile
> +++ b/kernel/irq/Makefile
> @@ -1,5 +1,6 @@
>
> obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
> +obj-$(CONFIG_GENERIC_IRQ_CHIP_BE) += generic-chip.o
> obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o
> obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
> obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o
>
> Note that you might also have a case where you have more than
> one generic irqchip driver built into the kernel, which require
> different endianess. We can't really support that case without
> changing the generic-chip implementation.
>
> Arnd
>
On Wed, Oct 29, 2014 at 12:43 AM, Arnd Bergmann <[email protected]> wrote:
> On Tuesday 28 October 2014 20:58:48 Kevin Cernekee wrote:
>>
>> +#ifdef CONFIG_RAW_IRQ_ACCESSORS
>> +
>> +#ifndef irq_reg_writel
>> +# define irq_reg_writel(val, addr) __raw_writel(val, addr)
>> +#endif
>> +#ifndef irq_reg_readl
>> +# define irq_reg_readl(addr) __raw_readl(addr)
>> +#endif
>> +
>> +#else
>> +
>
> No, this is just wrong: registers almost always have a fixed endianess
> indenpent of CPU endianess, so if you use __raw_writel, it will be
> broken on one or the other.
>
> If you have a machine that uses big-endian registers in the interrupt
> controller, you need to find a way to use the correct accessors
> (e.g. iowrite32be) and use them independent of what endianess the CPU
> is running.
>
> As this code is being used on all sorts of platforms, you can't assume
> that they all use the same endianess, which makes it rather tricky.
>
> As the first step, you can probably introduce a new Kconfig symbol
> GENERIC_IRQ_CHIP_BE, and then make that mutually exclusive with the
> existing users that all use little-endian registers:
>
> #if defined(CONFIG_GENERIC_IRQ_CHIP) && !defined(CONFIG_GENERIC_IRQ_CHIP_BE)
> #define irq_reg_writel(val, addr) writel(val, addr)
> #else if defined(CONFIG_GENERIC_IRQ_CHIP_BE) && !defined(CONFIG_GENERIC_IRQ_CHIP)
> #define irq_reg_writel(val, addr) iowrite32be(val, addr)
> #else
> /* provoke a compile error when this is used */
> #define irq_reg_writel(val, addr) irq_reg_writel_unknown_endian(val, addr)
> #endif
Thanks for the quick feedback, guys. Let me try to fill in a little
more background information.
The irqchip drivers in question can be used on a variety of different SoCs:
BCM7xxx STB chip with ARM host (always LE)
BCM7xxx STB chip with MIPS host (user-selectable LE or BE via jumper)
BCM33xx cable chip with MIPS host (always BE)
BCM33xx cable chip with ARM host (always LE)
BCM63xx[x] DSL chip with MIPS host (always BE)
BCM63xx[x] DSL chip with ARM host (always LE, I think)
BCM68xx PON chip with MIPS host (always BE)
The host CPU is connected to the peripheral/register interface using a
32-bit wide data bus. A simple 32-bit store originating from the host
CPU, targeted to an onchip SoC peripheral, will never need endian
swapping. i.e. this code works equally well on all supported systems
regardless of endianness:
volatile u32 *foo = (void *)MY_REG_VA;
*foo = 0x12345678;
8-bit and 16-bit accesses may be another story, but only appear in a
few very special peripherals.
The external PCI/PCIe address space, by contrast, is always mapped
"bytewise," such that a 32-bit word's LSB is at offset 0 and the MSB
is at offset 3. On the BE platforms, readl/writel/readw/writew will
implement an extra endian swap, allowing stock PCI device drivers such
as bnx2 or e1000e to work without modification. (The other option is
to byteswap the data but use address swizzling for 8/16 bit accesses,
that isn't enabled by default.)
The problem we see here is that irq_reg_{readl,writel} use
readl/writel to access a non-PCI peripheral, thus adding an unwanted
endian swap. And I can't avoid using the irq_reg_* accessors unless I
skip using GENERIC_IRQ_CHIP.
So, a few possible solutions include:
1) Implement your CONFIG_GENERIC_IRQ_CHIP_BE suggestion. This could
probably be made to work, but I would need to define
CONFIG_GENERIC_IRQ_CHIP / CONFIG_GENERIC_IRQ_CHIP_BE conditionally
based on whether the build was LE or BE. It would be nicer if the
driver didn't have to think about endianness because we know all of
these register accesses are always "native."
2) Offer a common way for irqchips to force GENERIC_IRQ_CHIP to use
the __raw_ variants. Since there are already other irqchip drivers
using __raw_*, this seems like it might be useful to others someday.
3) Stuff my __raw_ definitions into the mach-specific <irq.h>.
4) Don't use GENERIC_IRQ_CHIP at all; just reimplement the helpers
locally using __raw_* macros.
> registers almost always have a fixed endianess
> indenpent of CPU endianess
Going back to this statement - in my own personal experience, SoCs are
usually designed such that extra swaps are NOT necessary when
accessing onchip peripherals. Although I've seen a few cases where
1-2 peripherals, often third party IP cores, needed special treatment.
FWIW, several of the BCM7xxx peripherals default to "native" mode (no
swap for either LE/BE), but can be optionally reconfigured as LE in
order to preserve compatibility with the standard AHCI/SDHCI/...
drivers that use the PCI accessors.
Not sure how easy it is to figure out which other SoCs do require the
swap, as we'd need to exclude both PCI drivers and LE hosts whose
drivers just used plain readl. But a quick look around the drivers/
tree shows quite a few users of the __raw_ accessors:
$ git grep -l __raw_readl drivers | wc -l
228
By contrast, for BE-only registers:
$ git grep -lE "(ioread32be)|(readl_be)" drivers/ | wc -l
42
The latter list seems to include a lot of FPGAs. Maybe it costs them
too many gates/LEs to support both endian orderings.
Thomas:
> How does that work with multi arch kernels?
I am assuming this question refers to e.g. CONFIG_ARCH_MULTIPLATFORM
If GENERIC_IRQ_CHIP is being used, the current implementation of
generic-chip.c will have to pick one global definition of
irq_reg_{readl,writel} for all supported SoCs.
One possibility is that individual irqchip drivers requiring special
accessors can pass in their own function pointers, similar to this:
struct sdhci_ops {
#ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
u32 (*read_l)(struct sdhci_host *host, int reg);
u16 (*read_w)(struct sdhci_host *host, int reg);
u8 (*read_b)(struct sdhci_host *host, int reg);
void (*write_l)(struct sdhci_host *host, u32 val, int reg);
void (*write_w)(struct sdhci_host *host, u16 val, int reg);
void (*write_b)(struct sdhci_host *host, u8 val, int reg);
#endif
and fall back to readl/writel if none are supplied. It would be nice
if this provided common definitions for the __raw_ and maybe the BE
variants too.
Or, we could add IRQ_GC_NATIVE_IO and/or IRQ_GC_BE_IO to enum irq_gc_flags.
Would either of these choices satisfy everyone's goals?
On Wed, 29 Oct 2014, Kevin Cernekee wrote:
> Thomas:
> > How does that work with multi arch kernels?
>
> I am assuming this question refers to e.g. CONFIG_ARCH_MULTIPLATFORM
>
> If GENERIC_IRQ_CHIP is being used, the current implementation of
> generic-chip.c will have to pick one global definition of
> irq_reg_{readl,writel} for all supported SoCs.
>
> One possibility is that individual irqchip drivers requiring special
> accessors can pass in their own function pointers, similar to this:
>
> struct sdhci_ops {
> #ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
> u32 (*read_l)(struct sdhci_host *host, int reg);
> u16 (*read_w)(struct sdhci_host *host, int reg);
> u8 (*read_b)(struct sdhci_host *host, int reg);
> void (*write_l)(struct sdhci_host *host, u32 val, int reg);
> void (*write_w)(struct sdhci_host *host, u16 val, int reg);
> void (*write_b)(struct sdhci_host *host, u8 val, int reg);
> #endif
>
> and fall back to readl/writel if none are supplied. It would be nice
> if this provided common definitions for the __raw_ and maybe the BE
> variants too.
>
> Or, we could add IRQ_GC_NATIVE_IO and/or IRQ_GC_BE_IO to enum irq_gc_flags.
I definitely prefer to have these options in the generic chip
implementation so we avoid that driver writers duplicate code in
creative and wrong ways.
Thanks,
tglx
On Wednesday 29 October 2014 11:48:39 Kevin Cernekee wrote:
> On Wed, Oct 29, 2014 at 12:43 AM, Arnd Bergmann <[email protected]> wrote:
> > On Tuesday 28 October 2014 20:58:48 Kevin Cernekee wrote:
> >>
> >> +#ifdef CONFIG_RAW_IRQ_ACCESSORS
> >> +
> >> +#ifndef irq_reg_writel
> >> +# define irq_reg_writel(val, addr) __raw_writel(val, addr)
> >> +#endif
> >> +#ifndef irq_reg_readl
> >> +# define irq_reg_readl(addr) __raw_readl(addr)
> >> +#endif
> >> +
> >> +#else
> >> +
> >
> > No, this is just wrong: registers almost always have a fixed endianess
> > indenpent of CPU endianess, so if you use __raw_writel, it will be
> > broken on one or the other.
> >
> > If you have a machine that uses big-endian registers in the interrupt
> > controller, you need to find a way to use the correct accessors
> > (e.g. iowrite32be) and use them independent of what endianess the CPU
> > is running.
> >
> > As this code is being used on all sorts of platforms, you can't assume
> > that they all use the same endianess, which makes it rather tricky.
> >
> > As the first step, you can probably introduce a new Kconfig symbol
> > GENERIC_IRQ_CHIP_BE, and then make that mutually exclusive with the
> > existing users that all use little-endian registers:
> >
> > #if defined(CONFIG_GENERIC_IRQ_CHIP) && !defined(CONFIG_GENERIC_IRQ_CHIP_BE)
> > #define irq_reg_writel(val, addr) writel(val, addr)
> > #else if defined(CONFIG_GENERIC_IRQ_CHIP_BE) && !defined(CONFIG_GENERIC_IRQ_CHIP)
> > #define irq_reg_writel(val, addr) iowrite32be(val, addr)
> > #else
> > /* provoke a compile error when this is used */
> > #define irq_reg_writel(val, addr) irq_reg_writel_unknown_endian(val, addr)
> > #endif
>
> Thanks for the quick feedback, guys. Let me try to fill in a little
> more background information.
>
> The irqchip drivers in question can be used on a variety of different SoCs:
>
> BCM7xxx STB chip with ARM host (always LE)
> BCM7xxx STB chip with MIPS host (user-selectable LE or BE via jumper)
> BCM33xx cable chip with MIPS host (always BE)
> BCM33xx cable chip with ARM host (always LE)
> BCM63xx[x] DSL chip with MIPS host (always BE)
> BCM63xx[x] DSL chip with ARM host (always LE, I think)
> BCM68xx PON chip with MIPS host (always BE)
>
> The host CPU is connected to the peripheral/register interface using a
> 32-bit wide data bus. A simple 32-bit store originating from the host
> CPU, targeted to an onchip SoC peripheral, will never need endian
> swapping. i.e. this code works equally well on all supported systems
> regardless of endianness:
>
> volatile u32 *foo = (void *)MY_REG_VA;
> *foo = 0x12345678;
>
> 8-bit and 16-bit accesses may be another story, but only appear in a
> few very special peripherals.
Sorry, but this makes no sense. If you run a little-endian kernel
on one of the MIPS systems that you marked as "always BE", or a
big-endian kernel on the systems that are marked "always LE",
then you have to byte swap.
Same for the BCM7xxx MIPS chip if the jumper sets the strapping
pin the opposite way from the running kernel, although I can see
the argument that you would hope nobody does that.
On the other hand, if you give hardware designers two ways to do
something, of course they will do both eventually, so I'm sure
someone has already done it (probably not supported using upstream
Linux)
> The problem we see here is that irq_reg_{readl,writel} use
> readl/writel to access a non-PCI peripheral, thus adding an unwanted
> endian swap. And I can't avoid using the irq_reg_* accessors unless I
> skip using GENERIC_IRQ_CHIP.
That is probably the best way forward.
> So, a few possible solutions include:
>
> 1) Implement your CONFIG_GENERIC_IRQ_CHIP_BE suggestion. This could
> probably be made to work, but I would need to define
> CONFIG_GENERIC_IRQ_CHIP / CONFIG_GENERIC_IRQ_CHIP_BE conditionally
> based on whether the build was LE or BE. It would be nicer if the
> driver didn't have to think about endianness because we know all of
> these register accesses are always "native."
If you want to support "native" you have three endianess settings
to support in your driver, not one, and a run-time selection
that looks at the "big-endian" property in DT as well as the
endianess that the kernel was built for.
> 2) Offer a common way for irqchips to force GENERIC_IRQ_CHIP to use
> the __raw_ variants. Since there are already other irqchip drivers
> using __raw_*, this seems like it might be useful to others someday.
The drivers using __raw_ today (exynos, mxs, s3c24xx) are all broken
and should be fixed. This is all old code that was written without
taking endianess into consideration and breaks if you try to run a
big-endian kernel.
> 3) Stuff my __raw_ definitions into the mach-specific <irq.h>.
That would still break the use of secondary interrupt controllers
using generic irqchip.
> 4) Don't use GENERIC_IRQ_CHIP at all; just reimplement the helpers
> locally using __raw_* macros.
This would break the ARM machines, unless you make it depend on
the CPU architecture. You should still check for the "big-endian"
property to see if the strapping pin was actually set the way
you expect it.
> > registers almost always have a fixed endianess
> > indenpent of CPU endianess
>
> Going back to this statement - in my own personal experience, SoCs are
> usually designed such that extra swaps are NOT necessary when
> accessing onchip peripherals. Although I've seen a few cases where
> 1-2 peripherals, often third party IP cores, needed special treatment.
In my experience, the opposite is true: hardware designers will put
anything in the SoCs that they happen to need, and disregard the
specifications for endianess if they exist. If you are lucky, each
part gets used in only one form, but some people are crazy enough
to put the byte swaps into hardware and make life miserable for us.
> FWIW, several of the BCM7xxx peripherals default to "native" mode (no
> swap for either LE/BE), but can be optionally reconfigured as LE in
> order to preserve compatibility with the standard AHCI/SDHCI/...
> drivers that use the PCI accessors.
The reconfigurability is definitely the worst part.
> Not sure how easy it is to figure out which other SoCs do require the
> swap, as we'd need to exclude both PCI drivers and LE hosts whose
> drivers just used plain readl. But a quick look around the drivers/
> tree shows quite a few users of the __raw_ accessors:
>
> $ git grep -l __raw_readl drivers | wc -l
> 228
Most of these are bugs. About half of them are for Samsung SoCs,
and this is after we've spent a lot of time changing other drivers
for the same chips to use readl() in order to make them work with
big-endian kernels.
> By contrast, for BE-only registers:
>
> $ git grep -lE "(ioread32be)|(readl_be)" drivers/ | wc -l
> 42
Most big-endian drivers come from powerpc, which uses in_be32:
git grep in_be32 | wc -l
950
These drivers typically work just as well on little-endian kernels.
> The latter list seems to include a lot of FPGAs. Maybe it costs them
> too many gates/LEs to support both endian orderings.
The FPGA developers are priviledged because they can fix their hardware
when they get it wrong ;-)
> Or, we could add IRQ_GC_NATIVE_IO and/or IRQ_GC_BE_IO to enum irq_gc_flags.
>
> Would either of these choices satisfy everyone's goals?
This is what I meant with doing extra work in the case where we want to
support both in the same kernel. We would only enable the runtime
logic if both GENERIC_IRQ_CHIP and GENERIC_IRQ_CHIP_BE are set, and
leave it up to the platform to select the right one. For MIPS BCM7xxx,
you could use
config BCM7xxx
select GENERIC_IRQ_CHIP if CPU_LITTLE_ENDIAN
select GENERIC_IRQ_CHIP_BE if CPU_BIG_ENDIAN
so you would default to the hardwired big-endian accessor unless
some other drivers selects GENERIC_IRQ_CHIP.
Arnd
On Wednesday 29 October 2014 10:36:25 Florian Fainelli wrote:
> On 10/29/2014 12:43 AM, Arnd Bergmann wrote:
> > On Tuesday 28 October 2014 20:58:48 Kevin Cernekee wrote:
> >>
> >> +#ifdef CONFIG_RAW_IRQ_ACCESSORS
> >> +
> >> +#ifndef irq_reg_writel
> >> +# define irq_reg_writel(val, addr) __raw_writel(val, addr)
> >> +#endif
> >> +#ifndef irq_reg_readl
> >> +# define irq_reg_readl(addr) __raw_readl(addr)
> >> +#endif
> >> +
> >> +#else
> >> +
> >
> > No, this is just wrong: registers almost always have a fixed endianess
> > indenpent of CPU endianess, so if you use __raw_writel, it will be
> > broken on one or the other.
>
> Our brcmstb platforms had an endian strap settings for MIPS-based
> platforms, and for most peripherals this would be just completely
> transparent, as the HW always will do the internal swapping, such that
> host CPU does read/writes in its native endianess regardless of the
> actual strap settings.
That's irrelevant, it just makes matters worse because then you
might run into all combinations of big-endian and little-endian
kernels vs MMIO registers.
> AFAICT bcm3384, a MIPS-based Cable Modem platform has only one endianess
> setting: BE, and the HW only supports that specific endianess.
But they might have a secondary interrupt controller on a PCI card that
uses the same driver in the little-endian mode.
> > If you have a machine that uses big-endian registers in the interrupt
> > controller, you need to find a way to use the correct accessors
> > (e.g. iowrite32be) and use them independent of what endianess the CPU
> > is running.
> >
> > As this code is being used on all sorts of platforms, you can't assume
> > that they all use the same endianess, which makes it rather tricky.
>
> I think the more general problem with the use of readl_*() I/O accessors
> is that they just happen to work fine on most platforms out there: ARM
> Little-endian, because this nicely matches the endianess expected by the
> HW and that does not enforce an audit of whether your actual peripheral
> expects little-endian writes to be done.
Most peripherals have registers that are designed for PCI compatibility,
and PCI mandates little-endian registers. This has very little to do with
the architecture.
We also have a lot of peripherals that have big-endian registers, e.g
for things that are shared between ARM and PowerPC.
The only hardware that is causing problems is the kind where the hardware
developer tried to be helpful by making it possible to change endianess,
and this is what causes endless nightmares for kernel developers.
I think the easiest solution here is to make this irqchip not use
the irq-generic logic, because it clearly is not generic.
> The other problem is that readl() on ARM implies a barrier, which is not
> necesarily true/necessary for some other platforms such as some MIPS
> processors.
readl has to provide the semantics that PCI devices expect on x86.
On x86, it implies a barrier, so everything else has to do the same.
If you know that a driver does not need barriers, you can use
readl_relaxed(). It doesn't currently work on all architectures, but
it works on MIPS and I have a patch series from Will Deacon that I want
to push to make it work everywhere.
Arnd
On Wed, Oct 29, 2014 at 12:14 PM, Arnd Bergmann <[email protected]> wrote:
>> The host CPU is connected to the peripheral/register interface using a
>> 32-bit wide data bus. A simple 32-bit store originating from the host
>> CPU, targeted to an onchip SoC peripheral, will never need endian
>> swapping. i.e. this code works equally well on all supported systems
>> regardless of endianness:
>>
>> volatile u32 *foo = (void *)MY_REG_VA;
>> *foo = 0x12345678;
>>
>> 8-bit and 16-bit accesses may be another story, but only appear in a
>> few very special peripherals.
>
> Sorry, but this makes no sense. If you run a little-endian kernel
> on one of the MIPS systems that you marked as "always BE", or a
> big-endian kernel on the systems that are marked "always LE",
> then you have to byte swap.
If I ran an LE MIPS kernel on a BE system, it would hang on boot. I
know this through experience. ;-)
Setting aside the problem that the instruction words, pointers, and
bitfields in the image are all in the wrong byte order, there are many
other endian-specific assumptions baked into the executable.
Does this actually work on other architectures like ARM? I still see
compile-time checks for CONFIG_CPU_ENDIAN* in a couple of places under
arch/arm.
>> FWIW, several of the BCM7xxx peripherals default to "native" mode (no
>> swap for either LE/BE), but can be optionally reconfigured as LE in
>> order to preserve compatibility with the standard AHCI/SDHCI/...
>> drivers that use the PCI accessors.
>
> The reconfigurability is definitely the worst part.
That depends on who is doing the reconfiguring...
If the kernel has to support both options, then it's definitely a
hassle (and probably quite intrusive for some drivers).
In our case we just enable swapping unconditionally and then use the
stock driver code, with no changes to the I/O accessors.
>> Or, we could add IRQ_GC_NATIVE_IO and/or IRQ_GC_BE_IO to enum irq_gc_flags.
>>
>> Would either of these choices satisfy everyone's goals?
>
> This is what I meant with doing extra work in the case where we want to
> support both in the same kernel. We would only enable the runtime
> logic if both GENERIC_IRQ_CHIP and GENERIC_IRQ_CHIP_BE are set, and
> leave it up to the platform to select the right one. For MIPS BCM7xxx,
> you could use
>
> config BCM7xxx
> select GENERIC_IRQ_CHIP if CPU_LITTLE_ENDIAN
> select GENERIC_IRQ_CHIP_BE if CPU_BIG_ENDIAN
>
> so you would default to the hardwired big-endian accessor unless
> some other drivers selects GENERIC_IRQ_CHIP.
generic-chip.c already has a fair amount of indirection, with pointers
to saved masks, user-specified register offsets, and such. Is there a
concern that introducing, say, a pair of readl/writel function
pointers, would cause an unacceptable performance drop?
Backing up a little bit, do we have a consensus that defining
irq_reg_{readl,writel} at compile time from include/linux/irq.h is a
bad idea for multiplatform images, and it should be removed one way or
another?
On Wednesday 29 October 2014 13:09:47 Kevin Cernekee wrote:
> On Wed, Oct 29, 2014 at 12:14 PM, Arnd Bergmann <[email protected]> wrote:
> >> The host CPU is connected to the peripheral/register interface using a
> >> 32-bit wide data bus. A simple 32-bit store originating from the host
> >> CPU, targeted to an onchip SoC peripheral, will never need endian
> >> swapping. i.e. this code works equally well on all supported systems
> >> regardless of endianness:
> >>
> >> volatile u32 *foo = (void *)MY_REG_VA;
> >> *foo = 0x12345678;
> >>
> >> 8-bit and 16-bit accesses may be another story, but only appear in a
> >> few very special peripherals.
> >
> > Sorry, but this makes no sense. If you run a little-endian kernel
> > on one of the MIPS systems that you marked as "always BE", or a
> > big-endian kernel on the systems that are marked "always LE",
> > then you have to byte swap.
>
> If I ran an LE MIPS kernel on a BE system, it would hang on boot. I
> know this through experience.
What is a "BE system" then? Is the CPU core not capable of running
code either way?
> Setting aside the problem that the instruction words, pointers, and
> bitfields in the image are all in the wrong byte order, there are many
> other endian-specific assumptions baked into the executable.
Most of those are handled by the compiler. Bitfields are of course
a problem when they are accessed through DMA, but I would assume
that this is still a problem with the hardware byteswap hack that
the Broadcom SoCs have.
Of course, anything that uses __raw_readl on an MMIO register is
broken if you try to do this, which was my point the whole time.
> Does this actually work on other architectures like ARM? I still see
> compile-time checks for CONFIG_CPU_ENDIAN* in a couple of places under
> arch/arm.
Yes, it should work on any architecture that supports both modes. It
definitely works on all ARM cores I know, and on most PowerPC cores.
I always assumed that MIPS was bi-endian as well, but according to
what you say I guess it is not.
ARM and PowerPC can actually switch endianess in the kernel, and this
is what they do in the first instruction when you run a different
endianess from what the boot loader runs as it calls into the kernel.
The ARM boot protocol requires entering the kernel in little-endian
mode, while I think on PowerPC the boot loader is supposed to detect
the format of the kernel binary and pick the right mode before calling
it.
> >> Or, we could add IRQ_GC_NATIVE_IO and/or IRQ_GC_BE_IO to enum irq_gc_flags.
> >>
> >> Would either of these choices satisfy everyone's goals?
> >
> > This is what I meant with doing extra work in the case where we want to
> > support both in the same kernel. We would only enable the runtime
> > logic if both GENERIC_IRQ_CHIP and GENERIC_IRQ_CHIP_BE are set, and
> > leave it up to the platform to select the right one. For MIPS BCM7xxx,
> > you could use
> >
> > config BCM7xxx
> > select GENERIC_IRQ_CHIP if CPU_LITTLE_ENDIAN
> > select GENERIC_IRQ_CHIP_BE if CPU_BIG_ENDIAN
> >
> > so you would default to the hardwired big-endian accessor unless
> > some other drivers selects GENERIC_IRQ_CHIP.
>
> generic-chip.c already has a fair amount of indirection, with pointers
> to saved masks, user-specified register offsets, and such. Is there a
> concern that introducing, say, a pair of readl/writel function
> pointers, would cause an unacceptable performance drop?
I don't know. Thomas' reply suggests that it isn't. Doing byteswap
in software at a register access is usually free in terms of CPU
cycles, but an indirect function call can be noticeable if we do
that a lot.
> Backing up a little bit, do we have a consensus that defining
> irq_reg_{readl,writel} at compile time from include/linux/irq.h is a
> bad idea for multiplatform images, and it should be removed one way or
> another?
If we can prove at compile-time that all users of irq_reg_{readl,writel}
are the same, then I think it's ok to hardcode it, but of course if
any driver we build needs the opposite of the others, or needs CPU-endian
access, then it definitely can't work.
Arnd
On Wed, 29 Oct 2014, Arnd Bergmann wrote:
> On Wednesday 29 October 2014 13:09:47 Kevin Cernekee wrote:
> > generic-chip.c already has a fair amount of indirection, with pointers
> > to saved masks, user-specified register offsets, and such. Is there a
> > concern that introducing, say, a pair of readl/writel function
> > pointers, would cause an unacceptable performance drop?
>
> I don't know. Thomas' reply suggests that it isn't. Doing byteswap
> in software at a register access is usually free in terms of CPU
> cycles, but an indirect function call can be noticeable if we do
> that a lot.
I did not say that it is free. I merily said that I prefer to have
this solved at the core level rather than at the driver level. So you
have several options to do so:
1) Indirections
2) Different functions for the different access modes
3) Alternatives
#1 Is the simplest solution, but imposes the overhead of an indirect
function call for something trivial
#2 The most efficient and flexible way if you have to provide
different access modes for different drivers. But it comes with the
price of increasing the text foot print.
#3 Smart and efficient, but requires that on a particular system all
drivers use the same access mode.
Thanks,
tglx
On Wednesday 29 October 2014 22:31:06 Thomas Gleixner wrote:
> On Wed, 29 Oct 2014, Arnd Bergmann wrote:
> > On Wednesday 29 October 2014 13:09:47 Kevin Cernekee wrote:
> > > generic-chip.c already has a fair amount of indirection, with pointers
> > > to saved masks, user-specified register offsets, and such. Is there a
> > > concern that introducing, say, a pair of readl/writel function
> > > pointers, would cause an unacceptable performance drop?
> >
> > I don't know. Thomas' reply suggests that it isn't. Doing byteswap
> > in software at a register access is usually free in terms of CPU
> > cycles, but an indirect function call can be noticeable if we do
> > that a lot.
>
> I did not say that it is free. I merily said that I prefer to have
> this solved at the core level rather than at the driver level.
Yes, I understood that.
> So you have several options to do so:
>
> 1) Indirections
>
> 2) Different functions for the different access modes
>
> 3) Alternatives
>
> #1 Is the simplest solution, but imposes the overhead of an indirect
> function call for something trivial
>
> #2 The most efficient and flexible way if you have to provide
> different access modes for different drivers. But it comes with the
> price of increasing the text foot print.
>
> #3 Smart and efficient, but requires that on a particular system all
> drivers use the same access mode.
Right. The option that I was explaining earlier basically combines #1 and
#3: For all kernels on which we know the endianess of all generic-irqchip
users at compile time, we hardcode that, and we use indirections of
some sort for the cases where we build a kernel that needs both.
Arnd
On Wed, 29 Oct 2014, Arnd Bergmann wrote:
> Right. The option that I was explaining earlier basically combines #1 and
> #3: For all kernels on which we know the endianess of all generic-irqchip
> users at compile time, we hardcode that, and we use indirections of
> some sort for the cases where we build a kernel that needs both.
Works for me.
Thanks,
tglx
On Wed, Oct 29, 2014 at 2:13 PM, Arnd Bergmann <[email protected]> wrote:
> On Wednesday 29 October 2014 13:09:47 Kevin Cernekee wrote:
>> On Wed, Oct 29, 2014 at 12:14 PM, Arnd Bergmann <[email protected]> wrote:
>> >> The host CPU is connected to the peripheral/register interface using a
>> >> 32-bit wide data bus. A simple 32-bit store originating from the host
>> >> CPU, targeted to an onchip SoC peripheral, will never need endian
>> >> swapping. i.e. this code works equally well on all supported systems
>> >> regardless of endianness:
>> >>
>> >> volatile u32 *foo = (void *)MY_REG_VA;
>> >> *foo = 0x12345678;
>> >>
>> >> 8-bit and 16-bit accesses may be another story, but only appear in a
>> >> few very special peripherals.
>> >
>> > Sorry, but this makes no sense. If you run a little-endian kernel
>> > on one of the MIPS systems that you marked as "always BE", or a
>> > big-endian kernel on the systems that are marked "always LE",
>> > then you have to byte swap.
>>
>> If I ran an LE MIPS kernel on a BE system, it would hang on boot. I
>> know this through experience.
>
> What is a "BE system" then? Is the CPU core not capable of running
> code either way?
On the MIPS BCM7xxx chips, LE/BE support was a design requirement. So:
- The chips include a strap pin for LE/BE so it can be configured
through board jumpers. This is the only supported method of switching
endianness.
- Endianness interactions and performance concerns have been analyzed
for all peripherals, buses, and data flows.
- As Florian mentioned earlier, the LE/BE strap preconfigures several
hardware blocks at boot time, e.g. telling the SPI controller how to
arrange the incoming data such that the MSB of each instruction word
read from flash shows up in the right place.
- The entire software stack (and even the cross toolchain) needs to
be compiled for either LE or BE.
So in this context a "BE system" is a BCM7xxx MIPS chip strapped for
BE, or one of the BCM33xx/BCM63xx/BCM68xx MIPS chips that is hardwired
and verified for BE only.
>> Does this actually work on other architectures like ARM? I still see
>> compile-time checks for CONFIG_CPU_ENDIAN* in a couple of places under
>> arch/arm.
>
> Yes, it should work on any architecture that supports both modes. It
> definitely works on all ARM cores I know, and on most PowerPC cores.
> I always assumed that MIPS was bi-endian as well, but according to
> what you say I guess it is not.
>
> ARM and PowerPC can actually switch endianess in the kernel, and this
> is what they do in the first instruction when you run a different
> endianess from what the boot loader runs as it calls into the kernel.
> The ARM boot protocol requires entering the kernel in little-endian
> mode, while I think on PowerPC the boot loader is supposed to detect
> the format of the kernel binary and pick the right mode before calling
> it.
Is it the intention to allow runtime endian switching on any
ARM/PowerPC platform (even the Samsung products you mentioned)? Or
only on the boards that were designed to operate this way?
Our problem becomes much simpler if we assume that the majority of
systems have a fixed endianness, and only a few special cases need to
accommodate the different kernel/register endianness permutations
you've listed.
On Wed, Oct 29, 2014 at 12:53 AM, Arnd Bergmann <[email protected]> wrote:
> On Tuesday 28 October 2014 20:58:57 Kevin Cernekee wrote:
>> Most implementations of the bcm7120-l2 controller only have a single
>> 32-bit enable word + 32-bit status word. But some instances have added
>> more enable/status pairs in order to support 64+ IRQs (which are all
>> ORed into one parent IRQ input). Make the following changes to allow
>> the driver to support this:
>>
>> - Extend DT bindings so that multiple words can be specified for the
>> reg property, various masks, etc.
>>
>> - Add loops to the probe/handle functions to deal with each word
>> separately
>>
>> - Allocate 1 generic-chip for every 32 IRQs, so we can still use the
>> clr/set helper functions
>>
>> - Update the documentation
>>
>> Signed-off-by: Kevin Cernekee <[email protected]>
>
> You should probably specify a 'big-endian' DT property for the driver
> to check. If you have both LE and BE versions of this device, we
> must make sure that we use the correct accessors.
>
> As long as we don't need to build a kernel that supports both (if
> I understand you correctly, the ARM SoCs use a LE instance of this
> device, while the MIPS SoCs use a BE version) you can still decide
> at compile-time which one you want, but please add the runtime check
> now, so if we ever get a new combination we can handle it at runtime
> with a more complex driver implementation.
Under discussion in the other thread...
> If I read your code right, you have decided to use one IRQ domain
> per register set, rather than one domain for all of them. I don't
> know which of the two ways is better here, but it would be good if
> you could explain in the patch description why you did it like this.
This uses one domain per bcm7120-l2 DT node. If the DT node defines
multiple enable/status pairs (i.e. >=64 IRQs) then the driver will
create a single IRQ domain with 2+ generic chips.
Multiple generic chips are required because the generic-chip code can
only handle one enable/status register pair per instance.
On Wednesday 29 October 2014 16:22:31 Kevin Cernekee wrote:
>
> This uses one domain per bcm7120-l2 DT node. If the DT node defines
> multiple enable/status pairs (i.e. >=64 IRQs) then the driver will
> create a single IRQ domain with 2+ generic chips.
>
> Multiple generic chips are required because the generic-chip code can
> only handle one enable/status register pair per instance.
>
Makes sense. Just make sure you have that explanation in the patch
description.
Arnd
On Wednesday 29 October 2014 16:05:21 Kevin Cernekee wrote:
> On Wed, Oct 29, 2014 at 2:13 PM, Arnd Bergmann <[email protected]> wrote:
> > On Wednesday 29 October 2014 13:09:47 Kevin Cernekee wrote:
> >> On Wed, Oct 29, 2014 at 12:14 PM, Arnd Bergmann <[email protected]> wrote:
> >> >> The host CPU is connected to the peripheral/register interface using a
> >> >> 32-bit wide data bus. A simple 32-bit store originating from the host
> >> >> CPU, targeted to an onchip SoC peripheral, will never need endian
> >> >> swapping. i.e. this code works equally well on all supported systems
> >> >> regardless of endianness:
> >> >>
> >> >> volatile u32 *foo = (void *)MY_REG_VA;
> >> >> *foo = 0x12345678;
> >> >>
> >> >> 8-bit and 16-bit accesses may be another story, but only appear in a
> >> >> few very special peripherals.
> >> >
> >> > Sorry, but this makes no sense. If you run a little-endian kernel
> >> > on one of the MIPS systems that you marked as "always BE", or a
> >> > big-endian kernel on the systems that are marked "always LE",
> >> > then you have to byte swap.
> >>
> >> If I ran an LE MIPS kernel on a BE system, it would hang on boot. I
> >> know this through experience.
> >
> > What is a "BE system" then? Is the CPU core not capable of running
> > code either way?
>
> On the MIPS BCM7xxx chips, LE/BE support was a design requirement. So:
>
> - The chips include a strap pin for LE/BE so it can be configured
> through board jumpers. This is the only supported method of switching
> endianness.
>
> - Endianness interactions and performance concerns have been analyzed
> for all peripherals, buses, and data flows.
>
> - As Florian mentioned earlier, the LE/BE strap preconfigures several
> hardware blocks at boot time, e.g. telling the SPI controller how to
> arrange the incoming data such that the MSB of each instruction word
> read from flash shows up in the right place.
>
> - The entire software stack (and even the cross toolchain) needs to
> be compiled for either LE or BE.
>
> So in this context a "BE system" is a BCM7xxx MIPS chip strapped for
> BE, or one of the BCM33xx/BCM63xx/BCM68xx MIPS chips that is hardwired
> and verified for BE only.
Ah, I think I understand what you mean now. So this strapping is done
for legacy operating systems that are not endian-aware and hardwired
to one or the other.
In Linux, we don't care about that, we have the source and we can
just make it run on any hardware we care about. If you port a kernel
to such a platform, the best strategy is to ignore what the SoC
vendor tried to do for the other OSs and set the chip into "never
translate" in hardware so you can handle it correctly in the kernel.
Presumably you want to keep the boot loader, so unfortunately
that can mean having to override the setting in early kernel code
before you touch any hardware. The nasty part is when the hardware
designers put a byteswap logic in front of the flash, because then
you have to create an image that stores the bootloader in opposite
endianess from the kernel, but I'd assume that's still better than
the hacks from the vendor BSP.
You have multiple problems if you rely on the byteswaps being
done in hardware:
- You can't build a kernel that runs on all SoCs, not even all
systems using the same SoC when that strapping pin gives you
two incompatible versions
- Any MMIO access to device memory storing byte streams (network
packets, audio, block, ...) will be swapped the same way that
the registers do, which means you now have to do the expensive
byte swaps (memcpy_fromio) in software instead of the cheap ones
(writel)
- If the hardware swap was implemented wrong, all the addresses
for 8 or 16 bit MMIO registers are wrong too and you have to
fix them up in software, which is much worse than swapping the
contents.
- It's impossible to share device drivers with saner hardware
platforms that let the CPU access MMIO registers in whichever
way the device expects it.
> >> Does this actually work on other architectures like ARM? I still see
> >> compile-time checks for CONFIG_CPU_ENDIAN* in a couple of places under
> >> arch/arm.
> >
> > Yes, it should work on any architecture that supports both modes. It
> > definitely works on all ARM cores I know, and on most PowerPC cores.
> > I always assumed that MIPS was bi-endian as well, but according to
> > what you say I guess it is not.
> >
> > ARM and PowerPC can actually switch endianess in the kernel, and this
> > is what they do in the first instruction when you run a different
> > endianess from what the boot loader runs as it calls into the kernel.
> > The ARM boot protocol requires entering the kernel in little-endian
> > mode, while I think on PowerPC the boot loader is supposed to detect
> > the format of the kernel binary and pick the right mode before calling
> > it.
>
> Is it the intention to allow runtime endian switching on any
> ARM/PowerPC platform (even the Samsung products you mentioned)? Or
> only on the boards that were designed to operate this way?
Any sane SoC will come without byteswapping on the buses, so that's
trivial to handle. You just have to build kernel and userspace in the
same endianess and have to ensure that all drivers use the correct
accessors that match what the hardware does.
The Samsung platforms get it wrong because they tried to optimize
out the barriers implied by writel, before we had writel_relaxed.
When nobody made a mistake like that, you can run a kernel of either
endianess on any hardware.
> Our problem becomes much simpler if we assume that the majority of
> systems have a fixed endianness, and only a few special cases need to
> accommodate the different kernel/register endianness permutations
> you've listed.
Good point. It seems that there is currently no support for BCM7xxx
in upstream Linux, and that is the only one that has the crazy
strapping pin, so I guess you could avoid a lot of the problems by
changing the MIPS code to assume BE registers, and if anybody wants
to submit BCM7xxx MIPS support to mainline, they have to make sure
it's in the right mode.
Arnd
On Thu, Oct 30, 2014 at 2:58 AM, Arnd Bergmann <[email protected]> wrote:
>> On the MIPS BCM7xxx chips, LE/BE support was a design requirement. So:
>>
>> - The chips include a strap pin for LE/BE so it can be configured
>> through board jumpers. This is the only supported method of switching
>> endianness.
>>
>> - Endianness interactions and performance concerns have been analyzed
>> for all peripherals, buses, and data flows.
>>
>> - As Florian mentioned earlier, the LE/BE strap preconfigures several
>> hardware blocks at boot time, e.g. telling the SPI controller how to
>> arrange the incoming data such that the MSB of each instruction word
>> read from flash shows up in the right place.
>>
>> - The entire software stack (and even the cross toolchain) needs to
>> be compiled for either LE or BE.
>>
>> So in this context a "BE system" is a BCM7xxx MIPS chip strapped for
>> BE, or one of the BCM33xx/BCM63xx/BCM68xx MIPS chips that is hardwired
>> and verified for BE only.
>
> Ah, I think I understand what you mean now. So this strapping is done
> for legacy operating systems that are not endian-aware and hardwired
> to one or the other.
Hmm, maybe, but this wasn't done for legacy reasons. The system was
designed to run in either endianness, with the understanding that all
software would be built for either LE or BE and that the hardware
would be strapped for one or the other.
On dev boards this is a jumper on the board, but on production boards
it is often immutable.
> In Linux, we don't care about that, we have the source and we can
> just make it run on any hardware we care about. If you port a kernel
> to such a platform, the best strategy is to ignore what the SoC
> vendor tried to do for the other OSs and set the chip into "never
> translate" in hardware so you can handle it correctly in the kernel.
Right, the intention was to remain source-compatible between LE/BE,
but not binary-compatible.
Either the __raw_ accessors, or dynamically choosing between
readl/ioread32be based on CONFIG_CPU_BIG_ENDIAN (or DT properties, if
absolutely necessary), would work. We've usually chosen the __raw_
path as it simplifies the code; all we want is a simple "load word"
instruction with no swapping on either LE or BE.
The registers themselves are automatically accessed in native endian
order regardless of the CPU's configuration, due to the way the bus
interfaces were designed.
> Presumably you want to keep the boot loader, so unfortunately
> that can mean having to override the setting in early kernel code
> before you touch any hardware. The nasty part is when the hardware
> designers put a byteswap logic in front of the flash, because then
> you have to create an image that stores the bootloader in opposite
> endianess from the kernel, but I'd assume that's still better than
> the hacks from the vendor BSP.
FWIW, these platforms have 4 different flash controllers, and each one
has its own unique byteswap hardware.
Everything works OK today, but changing the endianness at runtime
could open a big can of worms.
> You have multiple problems if you rely on the byteswaps being
> done in hardware:
>
> - You can't build a kernel that runs on all SoCs, not even all
> systems using the same SoC when that strapping pin gives you
> two incompatible versions
Correct. It was never a requirement to use a single image for both LE and BE.
> - Any MMIO access to device memory storing byte streams (network
> packets, audio, block, ...) will be swapped the same way that
> the registers do, which means you now have to do the expensive
> byte swaps (memcpy_fromio) in software instead of the cheap ones
> (writel)
The various endianness settings also affect our CPU's "view" of DRAM.
All current BCM7xxx SoCs have extra logic to make sure that packet
data, disk sectors from SATA, and other "bulky" transfers all arrive
in a suitable byte order.
> - If the hardware swap was implemented wrong, all the addresses
> for 8 or 16 bit MMIO registers are wrong too and you have to
> fix them up in software, which is much worse than swapping the
> contents.
We have this mode available on some of the peripherals, but chose not to use it.
One situation where it can prove useful: for PCIe enable the HW
byteswap, so readl() can be implemented as a straight 32-bit load with
no swap. The lesser-used 8-bit and 16-bit accessors would then
implement address swizzling. Other memory-mapped SoC peripherals that
Linux wants to treat as PCIe devices (accessing via readl/writel) can
then be configured for no HW byteswap.
> - It's impossible to share device drivers with saner hardware
> platforms that let the CPU access MMIO registers in whichever
> way the device expects it.
That depends somewhat on whether we're talking about binary-level
compatibility, or source-level compatibility. For the latter case, we
can always redefine readl() to match the hardware at compile time. On
MIPS this can be done through CONFIG_SWAP_IO_SPACE.
>> Is it the intention to allow runtime endian switching on any
>> ARM/PowerPC platform (even the Samsung products you mentioned)? Or
>> only on the boards that were designed to operate this way?
>
> Any sane SoC will come without byteswapping on the buses, so that's
> trivial to handle. You just have to build kernel and userspace in the
> same endianess and have to ensure that all drivers use the correct
> accessors that match what the hardware does.
>
> The Samsung platforms get it wrong because they tried to optimize
> out the barriers implied by writel, before we had writel_relaxed.
> When nobody made a mistake like that, you can run a kernel of either
> endianess on any hardware.
Hmm, OK. So it sounds like Samsung used the __raw_ macros to avoid
the barriers, but inadvertently nuked the endian swap. bcm7120-l2
used the __raw_ macros to avoid the endian swap, and barriers were a
"don't care."
>> Our problem becomes much simpler if we assume that the majority of
>> systems have a fixed endianness, and only a few special cases need to
>> accommodate the different kernel/register endianness permutations
>> you've listed.
>
> Good point. It seems that there is currently no support for BCM7xxx
> in upstream Linux, and that is the only one that has the crazy
> strapping pin, so I guess you could avoid a lot of the problems by
> changing the MIPS code to assume BE registers, and if anybody wants
> to submit BCM7xxx MIPS support to mainline, they have to make sure
> it's in the right mode.
One catch is that almost all BCM7xxx MIPS systems are actually LE, and
BE gets less test coverage. Some boards cannot even be configured for
BE. BE has mostly been kept around to accommodate a few customers
with legacy code, not out of a burning desire to support both modes of
operation...
On Thursday 30 October 2014 12:03:38 Kevin Cernekee wrote:
> On Thu, Oct 30, 2014 at 2:58 AM, Arnd Bergmann <[email protected]> wrote:
> >> On the MIPS BCM7xxx chips, LE/BE support was a design requirement. So:
> >>
> >> - The chips include a strap pin for LE/BE so it can be configured
> >> through board jumpers. This is the only supported method of switching
> >> endianness.
> >>
> >> - Endianness interactions and performance concerns have been analyzed
> >> for all peripherals, buses, and data flows.
> >>
> >> - As Florian mentioned earlier, the LE/BE strap preconfigures several
> >> hardware blocks at boot time, e.g. telling the SPI controller how to
> >> arrange the incoming data such that the MSB of each instruction word
> >> read from flash shows up in the right place.
> >>
> >> - The entire software stack (and even the cross toolchain) needs to
> >> be compiled for either LE or BE.
> >>
> >> So in this context a "BE system" is a BCM7xxx MIPS chip strapped for
> >> BE, or one of the BCM33xx/BCM63xx/BCM68xx MIPS chips that is hardwired
> >> and verified for BE only.
> >
> > Ah, I think I understand what you mean now. So this strapping is done
> > for legacy operating systems that are not endian-aware and hardwired
> > to one or the other.
>
> Hmm, maybe, but this wasn't done for legacy reasons. The system was
> designed to run in either endianness, with the understanding that all
> software would be built for either LE or BE and that the hardware
> would be strapped for one or the other.
>
> On dev boards this is a jumper on the board, but on production boards
> it is often immutable.
But only legacy OSs would need the jumper or the pin. Any modern OS
like Linux should just work in either endianess independent of how
the registers are done.
> > In Linux, we don't care about that, we have the source and we can
> > just make it run on any hardware we care about. If you port a kernel
> > to such a platform, the best strategy is to ignore what the SoC
> > vendor tried to do for the other OSs and set the chip into "never
> > translate" in hardware so you can handle it correctly in the kernel.
>
> Right, the intention was to remain source-compatible between LE/BE,
> but not binary-compatible.
Well, I guess they failed on the "source-compatible" part ;-)
> Either the __raw_ accessors, or dynamically choosing between
> readl/ioread32be based on CONFIG_CPU_BIG_ENDIAN (or DT properties, if
> absolutely necessary), would work.
No, this is just wrong. Don't ever assume that the endianess of the
kernel has anything to do with how the hardware is built. You can
make it a separate Kconfig option, and you can make it default
to CPU_BIG_ENDIAN, but the two things are fundamentally different,
whatever the hardware designers try to tell you.
> > You have multiple problems if you rely on the byteswaps being
> > done in hardware:
> >
> > - You can't build a kernel that runs on all SoCs, not even all
> > systems using the same SoC when that strapping pin gives you
> > two incompatible versions
>
> Correct. It was never a requirement to use a single image for both LE and BE.
I didn't mean one kernel image that runs in both BE and LE mode, that
would be crazy. What I mean is one image that can run on the SoC
in either strapping mode but with the CPU endianess set the way that
matches how the kernel is built.
> > - Any MMIO access to device memory storing byte streams (network
> > packets, audio, block, ...) will be swapped the same way that
> > the registers do, which means you now have to do the expensive
> > byte swaps (memcpy_fromio) in software instead of the cheap ones
> > (writel)
>
> The various endianness settings also affect our CPU's "view" of DRAM.
> All current BCM7xxx SoCs have extra logic to make sure that packet
> data, disk sectors from SATA, and other "bulky" transfers all arrive
> in a suitable byte order.
Wow, that seems like a lot of hardware effort to gain basically
nothing. If they managed to get this right, at least it won't
make it harder to support the hardware properly.
So the byte stream data is never swapped, or always swapped an even
number of times, regardless of the strapping pin, right?
> > - If the hardware swap was implemented wrong, all the addresses
> > for 8 or 16 bit MMIO registers are wrong too and you have to
> > fix them up in software, which is much worse than swapping the
> > contents.
>
> We have this mode available on some of the peripherals, but chose not to use it.
>
> One situation where it can prove useful: for PCIe enable the HW
> byteswap, so readl() can be implemented as a straight 32-bit load with
> no swap. The lesser-used 8-bit and 16-bit accessors would then
> implement address swizzling. Other memory-mapped SoC peripherals that
> Linux wants to treat as PCIe devices (accessing via readl/writel) can
> then be configured for no HW byteswap.
This is the part where it gets really crazy and the only sane way to
deal with it is to turn off the entire swizzling in hardware.
> > - It's impossible to share device drivers with saner hardware
> > platforms that let the CPU access MMIO registers in whichever
> > way the device expects it.
>
> That depends somewhat on whether we're talking about binary-level
> compatibility, or source-level compatibility. For the latter case, we
> can always redefine readl() to match the hardware at compile time. On
> MIPS this can be done through CONFIG_SWAP_IO_SPACE.
That option seems to be incompatible with running one kernel across
multiple SoC families, if each of them does it differently.
The comment in arch/mips/include/asm/mach-generic/mangle-port.h
suggests that it was originally meant only for PIO access, not
for MMIO, but asm/io.h uses it for both.
> >> Our problem becomes much simpler if we assume that the majority of
> >> systems have a fixed endianness, and only a few special cases need to
> >> accommodate the different kernel/register endianness permutations
> >> you've listed.
> >
> > Good point. It seems that there is currently no support for BCM7xxx
> > in upstream Linux, and that is the only one that has the crazy
> > strapping pin, so I guess you could avoid a lot of the problems by
> > changing the MIPS code to assume BE registers, and if anybody wants
> > to submit BCM7xxx MIPS support to mainline, they have to make sure
> > it's in the right mode.
>
> One catch is that almost all BCM7xxx MIPS systems are actually LE, and
> BE gets less test coverage. Some boards cannot even be configured for
> BE. BE has mostly been kept around to accommodate a few customers
> with legacy code, not out of a burning desire to support both modes of
> operation...
If all the boards can be configured for LE, then you can just make this
mode required when upstreaming the kernel port, independent of how the
kernel runs.
Arnd
On Thu, Oct 30, 2014 at 12:52 PM, Arnd Bergmann <[email protected]> wrote:
>> > Ah, I think I understand what you mean now. So this strapping is done
>> > for legacy operating systems that are not endian-aware and hardwired
>> > to one or the other.
>>
>> Hmm, maybe, but this wasn't done for legacy reasons. The system was
>> designed to run in either endianness, with the understanding that all
>> software would be built for either LE or BE and that the hardware
>> would be strapped for one or the other.
>>
>> On dev boards this is a jumper on the board, but on production boards
>> it is often immutable.
>
> But only legacy OSs would need the jumper or the pin. Any modern OS
> like Linux should just work in either endianess independent of how
> the registers are done.
I did some checking; AFAICT our hardware does not support the type of
runtime endian switching that you describe.
MIPS CP0 register 12 bit 25 (Reverse Endian) "reverses the memory
endian selection when operating in user mode. Kernel or debug mode
references are not affected by the state of this bit."
MIPS CP0 register 16 bit 15 (Big Endian) "Indicates the endian mode in
which the processor is running. Set via SI_Endian input pin." This
bit is read-only.
Based on the datasheets, the RE bit may be supported in BMIPS4380, but
is not supported in BMIPS5000 or MIPS 34K. In any event, it won't
have an impact on kernel I/O accesses.
>> > In Linux, we don't care about that, we have the source and we can
>> > just make it run on any hardware we care about. If you port a kernel
>> > to such a platform, the best strategy is to ignore what the SoC
>> > vendor tried to do for the other OSs and set the chip into "never
>> > translate" in hardware so you can handle it correctly in the kernel.
>>
>> Right, the intention was to remain source-compatible between LE/BE,
>> but not binary-compatible.
>
> Well, I guess they failed on the "source-compatible" part ;-)
We have kernel trees and bootloaders that build LE/BE images from the
same source tree, just by changing CONFIG_CPU_*_ENDIAN. They use the
__raw_ macros or equivalent when talking to peripherals.
>> Either the __raw_ accessors, or dynamically choosing between
>> readl/ioread32be based on CONFIG_CPU_BIG_ENDIAN (or DT properties, if
>> absolutely necessary), would work.
>
> No, this is just wrong. Don't ever assume that the endianess of the
> kernel has anything to do with how the hardware is built. You can
> make it a separate Kconfig option, and you can make it default
> to CPU_BIG_ENDIAN, but the two things are fundamentally different,
> whatever the hardware designers try to tell you.
But we have a situation where kernel endianness == hardware
endianness, so can't we make simplifying assumptions?
>> > - Any MMIO access to device memory storing byte streams (network
>> > packets, audio, block, ...) will be swapped the same way that
>> > the registers do, which means you now have to do the expensive
>> > byte swaps (memcpy_fromio) in software instead of the cheap ones
>> > (writel)
>>
>> The various endianness settings also affect our CPU's "view" of DRAM.
>> All current BCM7xxx SoCs have extra logic to make sure that packet
>> data, disk sectors from SATA, and other "bulky" transfers all arrive
>> in a suitable byte order.
>
> Wow, that seems like a lot of hardware effort to gain basically
> nothing. If they managed to get this right, at least it won't
> make it harder to support the hardware properly.
The hardware was built to guarantee that a byte access to skb->data[0]
reads back byte 0 of the packet, not byte 3, regardless of whether
we're in LE or BE mode.
It takes a surprising amount of effort to make sure this is done
consistently and correctly across dozens of onchip peripherals.
>> That depends somewhat on whether we're talking about binary-level
>> compatibility, or source-level compatibility. For the latter case, we
>> can always redefine readl() to match the hardware at compile time. On
>> MIPS this can be done through CONFIG_SWAP_IO_SPACE.
>
> That option seems to be incompatible with running one kernel across
> multiple SoC families, if each of them does it differently.
Agreed. We have many such challenges on MIPS. In the end, it is
possible that not every MIPS SoC will "fit the mold."
>> One catch is that almost all BCM7xxx MIPS systems are actually LE, and
>> BE gets less test coverage. Some boards cannot even be configured for
>> BE. BE has mostly been kept around to accommodate a few customers
>> with legacy code, not out of a burning desire to support both modes of
>> operation...
>
> If all the boards can be configured for LE, then you can just make this
> mode required when upstreaming the kernel port, independent of how the
> kernel runs.
I was hoping to eventually come up with a multiplatform BE BMIPS image
that boots on 3384, 6328, and 7346... (even though the common case for
7xxx is LE, it makes for a nice demo)
On Thursday 30 October 2014 13:54:06 Kevin Cernekee wrote:
> On Thu, Oct 30, 2014 at 12:52 PM, Arnd Bergmann <[email protected]> wrote:
> >> > Ah, I think I understand what you mean now. So this strapping is done
> >> > for legacy operating systems that are not endian-aware and hardwired
> >> > to one or the other.
> >>
> >> Hmm, maybe, but this wasn't done for legacy reasons. The system was
> >> designed to run in either endianness, with the understanding that all
> >> software would be built for either LE or BE and that the hardware
> >> would be strapped for one or the other.
> >>
> >> On dev boards this is a jumper on the board, but on production boards
> >> it is often immutable.
> >
> > But only legacy OSs would need the jumper or the pin. Any modern OS
> > like Linux should just work in either endianess independent of how
> > the registers are done.
>
> I did some checking; AFAICT our hardware does not support the type of
> runtime endian switching that you describe.
>
> MIPS CP0 register 12 bit 25 (Reverse Endian) "reverses the memory
> endian selection when operating in user mode. Kernel or debug mode
> references are not affected by the state of this bit."
>
> MIPS CP0 register 16 bit 15 (Big Endian) "Indicates the endian mode in
> which the processor is running. Set via SI_Endian input pin." This
> bit is read-only.
>
> Based on the datasheets, the RE bit may be supported in BMIPS4380, but
> is not supported in BMIPS5000 or MIPS 34K. In any event, it won't
> have an impact on kernel I/O accesses.
I see. Most importantly, changing the RE bit would be unsupportable by
Linux, because that would break our system call ABI: you can't have
kernel and user space running at different endianess.
Thanks for looking that up, that explains a lot!
> >> > In Linux, we don't care about that, we have the source and we can
> >> > just make it run on any hardware we care about. If you port a kernel
> >> > to such a platform, the best strategy is to ignore what the SoC
> >> > vendor tried to do for the other OSs and set the chip into "never
> >> > translate" in hardware so you can handle it correctly in the kernel.
> >>
> >> Right, the intention was to remain source-compatible between LE/BE,
> >> but not binary-compatible.
> >
> > Well, I guess they failed on the "source-compatible" part ;-)
>
> We have kernel trees and bootloaders that build LE/BE images from the
> same source tree, just by changing CONFIG_CPU_*_ENDIAN. They use the
> __raw_ macros or equivalent when talking to peripherals.
What I meant is that you can't use the normal driver API. The __raw_*
accessors are in no way guaranteed to do what you want across
architectures, they may be missing barriers and the compiler is free
to reorder accesses or split up word accesses into byte accesses
for any reason. It's basically just a 'volatile' pointer dereference
that is meant to access device memory through an __iomem pointer.
I don't doubt that these accessors do the right thing for you on
MIPS, but that is because either the hardware or the definition
in source code puts more into them than the API guarantees.
> >> Either the __raw_ accessors, or dynamically choosing between
> >> readl/ioread32be based on CONFIG_CPU_BIG_ENDIAN (or DT properties, if
> >> absolutely necessary), would work.
> >
> > No, this is just wrong. Don't ever assume that the endianess of the
> > kernel has anything to do with how the hardware is built. You can
> > make it a separate Kconfig option, and you can make it default
> > to CPU_BIG_ENDIAN, but the two things are fundamentally different,
> > whatever the hardware designers try to tell you.
>
> But we have a situation where kernel endianness == hardware
> endianness, so can't we make simplifying assumptions?
Yes, based on your description above, that works. I'm just really
surprised about that kind of hardware design.
> >> > - Any MMIO access to device memory storing byte streams (network
> >> > packets, audio, block, ...) will be swapped the same way that
> >> > the registers do, which means you now have to do the expensive
> >> > byte swaps (memcpy_fromio) in software instead of the cheap ones
> >> > (writel)
> >>
> >> The various endianness settings also affect our CPU's "view" of DRAM.
> >> All current BCM7xxx SoCs have extra logic to make sure that packet
> >> data, disk sectors from SATA, and other "bulky" transfers all arrive
> >> in a suitable byte order.
> >
> > Wow, that seems like a lot of hardware effort to gain basically
> > nothing. If they managed to get this right, at least it won't
> > make it harder to support the hardware properly.
>
> The hardware was built to guarantee that a byte access to skb->data[0]
> reads back byte 0 of the packet, not byte 3, regardless of whether
> we're in LE or BE mode.
>
> It takes a surprising amount of effort to make sure this is done
> consistently and correctly across dozens of onchip peripherals.
Yes, I can imagine the horror this must mean for hardware designers.
On systems where the hardware doesn't do any byte swaps, this is
no problem at all, the kernel just does the swaps in either
readl()/ioread32() or in ioread32be(), but not in ioread32_rep(),
which is used for repeated FIFO accesses.
> >> One catch is that almost all BCM7xxx MIPS systems are actually LE, and
> >> BE gets less test coverage. Some boards cannot even be configured for
> >> BE. BE has mostly been kept around to accommodate a few customers
> >> with legacy code, not out of a burning desire to support both modes of
> >> operation...
> >
> > If all the boards can be configured for LE, then you can just make this
> > mode required when upstreaming the kernel port, independent of how the
> > kernel runs.
>
> I was hoping to eventually come up with a multiplatform BE BMIPS image
> that boots on 3384, 6328, and 7346... (even though the common case for
> 7xxx is LE, it makes for a nice demo)
Yes, definitely.
Arnd