2010-11-27 12:25:31

by Hans Ulli Kroll

[permalink] [raw]
Subject: [PATCH] ARM: Gemini: Add support for PCI BUS

Add support for PCI Bux on Gemini Devices SL3516
-changed Paulius Zaleckas mail address
-changed #include <asm/gpio.h> to #include <linux/gpio.h>

Signed-off-by: Hans Ulli Kroll <[email protected]>
---
arch/arm/Kconfig | 1 +
arch/arm/mach-gemini/Makefile | 2 +
arch/arm/mach-gemini/include/mach/hardware.h | 8 +
arch/arm/mach-gemini/include/mach/irqs.h | 7 +-
arch/arm/mach-gemini/mm.c | 5 +
arch/arm/mach-gemini/pci.c | 319 ++++++++++++++++++++++++++
6 files changed, 340 insertions(+), 2 deletions(-)
create mode 100644 arch/arm/mach-gemini/pci.c

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a19a526..5d4b398 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -307,6 +307,7 @@ config ARCH_GEMINI
select CPU_FA526
select ARCH_REQUIRE_GPIOLIB
select ARCH_USES_GETTIMEOFFSET
+ select PCI
help
Support for the Cortina Systems Gemini family SoCs

diff --git a/arch/arm/mach-gemini/Makefile b/arch/arm/mach-gemini/Makefile
index c5b24b9..c263f48 100644
--- a/arch/arm/mach-gemini/Makefile
+++ b/arch/arm/mach-gemini/Makefile
@@ -6,6 +6,8 @@

obj-y := irq.o mm.o time.o devices.o gpio.o

+obj-$(CONFIG_PCI) += pci.o
+
# Board-specific support
obj-$(CONFIG_MACH_NAS4220B) += board-nas4220b.o
obj-$(CONFIG_MACH_RUT100) += board-rut1xx.o
diff --git a/arch/arm/mach-gemini/include/mach/hardware.h b/arch/arm/mach-gemini/include/mach/hardware.h
index 213a4fc..38c530f 100644
--- a/arch/arm/mach-gemini/include/mach/hardware.h
+++ b/arch/arm/mach-gemini/include/mach/hardware.h
@@ -71,4 +71,12 @@
*/
#define IO_ADDRESS(x) ((((x) & 0xFFF00000) >> 4) | ((x) & 0x000FFFFF) | 0xF0000000)

+/*
+ * PCI subsystem macros
+ */
+#define PCIBIOS_MIN_IO 0x00000100
+#define PCIBIOS_MIN_MEM 0x00000000
+
+#define pcibios_assign_all_busses() 1
+
#endif
diff --git a/arch/arm/mach-gemini/include/mach/irqs.h b/arch/arm/mach-gemini/include/mach/irqs.h
index 06bc47e..c737673 100644
--- a/arch/arm/mach-gemini/include/mach/irqs.h
+++ b/arch/arm/mach-gemini/include/mach/irqs.h
@@ -43,11 +43,14 @@

#define NORMAL_IRQ_NUM 32

-#define GPIO_IRQ_BASE NORMAL_IRQ_NUM
+#define PCI_IRQ_BASE NORMAL_IRQ_NUM
+#define PCI_IRQ_NUM 4
+
+#define GPIO_IRQ_BASE (NORMAL_IRQ_NUM + PCI_IRQ_NUM)
#define GPIO_IRQ_NUM (3 * 32)

#define ARCH_TIMER_IRQ IRQ_TIMER2

-#define NR_IRQS (NORMAL_IRQ_NUM + GPIO_IRQ_NUM)
+#define NR_IRQS (NORMAL_IRQ_NUM + PCI_IRQ_NUM + GPIO_IRQ_NUM)

#endif /* __MACH_IRQS_H__ */
diff --git a/arch/arm/mach-gemini/mm.c b/arch/arm/mach-gemini/mm.c
index 5194824..2bf20b2 100644
--- a/arch/arm/mach-gemini/mm.c
+++ b/arch/arm/mach-gemini/mm.c
@@ -59,6 +59,11 @@ static struct map_desc gemini_io_desc[] __initdata = {
.length = SZ_512K,
.type = MT_DEVICE,
}, {
+ .virtual = IO_ADDRESS(GEMINI_PCI_IO_BASE),
+ .pfn = __phys_to_pfn(GEMINI_PCI_IO_BASE),
+ .length = SZ_512K,
+ .type = MT_DEVICE,
+ }, {
.virtual = IO_ADDRESS(GEMINI_FLASH_CTRL_BASE),
.pfn = __phys_to_pfn(GEMINI_FLASH_CTRL_BASE),
.length = SZ_512K,
diff --git a/arch/arm/mach-gemini/pci.c b/arch/arm/mach-gemini/pci.c
new file mode 100644
index 0000000..1acdd07
--- /dev/null
+++ b/arch/arm/mach-gemini/pci.c
@@ -0,0 +1,319 @@
+/*
+ * Support for Gemini PCI Controller
+ *
+ * Copyright (C) 2009 Janos Laube <[email protected]>
+ * Copyright (C) 2009 Paulius Zaleckas <[email protected]>
+ *
+ * based on SL2312 PCI controller code
+ * Storlink (C) 2003
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+
+#include <asm/mach/pci.h>
+#include <linux/gpio.h>
+
+#include <mach/irqs.h>
+
+#define GEMINI_PCI_IOSIZE_1M 0x0000
+
+#define GEMINI_PCI_PMC 0x40
+#define GEMINI_PCI_PMCSR 0x44
+#define GEMINI_PCI_CTRL1 0x48
+#define GEMINI_PCI_CTRL2 0x4C
+#define GEMINI_PCI_MEM1_BASE_SIZE 0x50
+#define GEMINI_PCI_MEM2_BASE_SIZE 0x54
+#define GEMINI_PCI_MEM3_BASE_SIZE 0x58
+
+#define PCI_CTRL2_INTSTS_OFFSET 28
+#define PCI_CTRL2_INTMASK_OFFSET 22
+
+#define GEMINI_PCI_DMA_MASK 0xFFF00000
+#define GEMINI_PCI_DMA_MEM1_BASE 0x00000000
+#define GEMINI_PCI_DMA_MEM2_BASE 0x00000000
+#define GEMINI_PCI_DMA_MEM3_BASE 0x00000000
+#define GEMINI_PCI_DMA_MEM1_SIZE 7
+#define GEMINI_PCI_DMA_MEM2_SIZE 6
+#define GEMINI_PCI_DMA_MEM3_SIZE 6
+
+#define PCI_CONF_ENABLE (1 << 31)
+#define PCI_CONF_WHERE(r) ((r) & 0xFC)
+#define PCI_CONF_BUS(b) (((b) & 0xFF) << 16)
+#define PCI_CONF_DEVICE(d) (((d) & 0x1F) << 11)
+#define PCI_CONF_FUNCTION(f) (((f) & 0x07) << 8)
+
+#define PCI_IOSIZE_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE))
+#define PCI_PROT_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x04)
+#define PCI_CTRL_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x08)
+#define PCI_SOFTRST_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x10)
+#define PCI_CONFIG_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x28)
+#define PCI_DATA_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x2C)
+
+
+static DEFINE_SPINLOCK(gemini_pci_lock);
+
+static struct resource gemini_pci_resource_io = {
+ .name = "PCI I/O Space",
+ .start = IO_ADDRESS(GEMINI_PCI_IO_BASE),
+ .end = IO_ADDRESS(GEMINI_PCI_IO_BASE) + SZ_1M - 1,
+ .flags = IORESOURCE_IO,
+};
+
+static struct resource gemini_pci_resource_mem = {
+ .name = "PCI Memory Space",
+ .start = GEMINI_PCI_MEM_BASE,
+ .end = GEMINI_PCI_MEM_BASE + SZ_128M - 1,
+ .flags = IORESOURCE_MEM,
+};
+
+static int gemini_pci_read_config(struct pci_bus *bus, unsigned int fn,
+ int config, int size, u32 *value)
+{
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&gemini_pci_lock, irq_flags);
+
+ __raw_writel(PCI_CONF_BUS(bus->number) |
+ PCI_CONF_DEVICE(PCI_SLOT(fn)) |
+ PCI_CONF_FUNCTION(PCI_FUNC(fn)) |
+ PCI_CONF_WHERE(config) |
+ PCI_CONF_ENABLE,
+ PCI_CONFIG_REG);
+
+ *value = __raw_readl(PCI_DATA_REG);
+
+ if (size == 1)
+ *value = (*value >> (8 * (config & 3))) & 0xFF;
+ else if (size == 2)
+ *value = (*value >> (8 * (config & 3))) & 0xFFFF;
+
+ spin_unlock_irqrestore(&gemini_pci_lock, irq_flags);
+
+ dev_dbg(&bus->dev,
+ "[read] slt: %.2d, fnc: %d, cnf: 0x%.2X, val (%d bytes): 0x%.8X\n",
+ PCI_SLOT(fn), PCI_FUNC(fn), config, size, *value);
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int gemini_pci_write_config(struct pci_bus *bus, unsigned int fn,
+ int config, int size, u32 value)
+{
+ unsigned long irq_flags = 0;
+ int ret = PCIBIOS_SUCCESSFUL;
+
+ dev_dbg(&bus->dev,
+ "[write] slt: %.2d, fnc: %d, cnf: 0x%.2X, val (%d bytes): 0x%.8X\n",
+ PCI_SLOT(fn), PCI_FUNC(fn), config, size, value);
+
+ spin_lock_irqsave(&gemini_pci_lock, irq_flags);
+
+ __raw_writel(PCI_CONF_BUS(bus->number) |
+ PCI_CONF_DEVICE(PCI_SLOT(fn)) |
+ PCI_CONF_FUNCTION(PCI_FUNC(fn)) |
+ PCI_CONF_WHERE(config) |
+ PCI_CONF_ENABLE,
+ PCI_CONFIG_REG);
+
+ switch (size) {
+ case 4:
+ __raw_writel(value, PCI_DATA_REG);
+ break;
+ case 2:
+ __raw_writew(value, PCI_DATA_REG + (config & 3));
+ break;
+ case 1:
+ __raw_writeb(value, PCI_DATA_REG + (config & 3));
+ break;
+ default:
+ ret = PCIBIOS_BAD_REGISTER_NUMBER;
+ }
+
+ spin_unlock_irqrestore(&gemini_pci_lock, irq_flags);
+
+ return ret;
+}
+
+static struct pci_ops gemini_pci_ops = {
+ .read = gemini_pci_read_config,
+ .write = gemini_pci_write_config,
+};
+
+static int __init gemini_pci_request_resources(struct pci_sys_data *sys)
+{
+ if (request_resource(&ioport_resource, &gemini_pci_resource_io))
+ goto bad_resources;
+ if (request_resource(&iomem_resource, &gemini_pci_resource_mem))
+ goto bad_resources;
+
+ sys->resource[0] = &gemini_pci_resource_io;
+ sys->resource[1] = &gemini_pci_resource_mem;
+ sys->resource[2] = 0;
+
+ return 0;
+
+bad_resources:
+ pr_err("Gemini PCI: request_resource() failed. "
+ "Abort PCI bus enumeration.\n");
+ return -1;
+}
+
+static int __init gemini_pci_setup(int nr, struct pci_sys_data *sys)
+{
+ unsigned int cmd;
+
+ if ((nr > 0) || gemini_pci_request_resources(sys))
+ return 0;
+
+ /* setup I/O space to 1MB size */
+ __raw_writel(GEMINI_PCI_IOSIZE_1M, PCI_IOSIZE_REG);
+
+ /* setup hostbridge */
+ cmd = __raw_readl(PCI_CTRL_REG);
+ cmd |= PCI_COMMAND_IO;
+ cmd |= PCI_COMMAND_MEMORY;
+ cmd |= PCI_COMMAND_MASTER;
+ __raw_writel(cmd, PCI_CTRL_REG);
+
+ return 1;
+}
+
+static struct pci_bus __init *gemini_pci_scan_bus(int nr,
+ struct pci_sys_data *sys)
+{
+ unsigned int reg = 0;
+ struct pci_bus *bus = 0;
+
+ bus = pci_scan_bus(nr, &gemini_pci_ops, sys);
+ if (bus) {
+ dev_dbg(&bus->dev, "setting up PCI DMA\n");
+ reg = (GEMINI_PCI_DMA_MEM1_BASE & GEMINI_PCI_DMA_MASK)
+ | (GEMINI_PCI_DMA_MEM1_SIZE << 16);
+ gemini_pci_write_config(bus, 0, GEMINI_PCI_MEM1_BASE_SIZE,
+ 4, reg);
+ reg = (GEMINI_PCI_DMA_MEM2_BASE & GEMINI_PCI_DMA_MASK)
+ | (GEMINI_PCI_DMA_MEM2_SIZE << 16);
+ gemini_pci_write_config(bus, 0, GEMINI_PCI_MEM2_BASE_SIZE,
+ 4, reg);
+ reg = (GEMINI_PCI_DMA_MEM3_BASE & GEMINI_PCI_DMA_MASK)
+ | (GEMINI_PCI_DMA_MEM3_SIZE << 16);
+ gemini_pci_write_config(bus, 0, GEMINI_PCI_MEM3_BASE_SIZE,
+ 4, reg);
+ }
+
+ return bus;
+}
+
+/* Should work with all boards based on original Storlink EVB */
+static int __init gemini_pci_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+{
+ if (slot < 9 || slot > 12)
+ return -1;
+
+ return PCI_IRQ_BASE + (((slot - 9) + (pin - 1)) & 0x3);
+}
+
+static struct hw_pci gemini_hw_pci __initdata = {
+ .nr_controllers = 1,
+ .setup = gemini_pci_setup,
+ .scan = gemini_pci_scan_bus,
+ .swizzle = pci_std_swizzle,
+ .map_irq = gemini_pci_map_irq,
+};
+
+/* we need this for muxed PCI interrupts handling */
+static struct pci_bus bogus_pci_bus;
+
+static void gemini_pci_ack_irq(unsigned int irq)
+{
+ unsigned int reg;
+
+ gemini_pci_read_config(&bogus_pci_bus, 0, GEMINI_PCI_CTRL2, 4, &reg);
+ reg &= ~(0xF << PCI_CTRL2_INTSTS_OFFSET);
+ reg |= 1 << (irq - PCI_IRQ_BASE + PCI_CTRL2_INTSTS_OFFSET);
+ gemini_pci_write_config(&bogus_pci_bus, 0, GEMINI_PCI_CTRL2, 4, reg);
+}
+
+static void gemini_pci_mask_irq(unsigned int irq)
+{
+ unsigned int reg;
+
+ gemini_pci_read_config(&bogus_pci_bus, 0, GEMINI_PCI_CTRL2, 4, &reg);
+ reg &= ~((0xF << PCI_CTRL2_INTSTS_OFFSET)
+ | (1 << (irq - PCI_IRQ_BASE + PCI_CTRL2_INTMASK_OFFSET)));
+ gemini_pci_write_config(&bogus_pci_bus, 0, GEMINI_PCI_CTRL2, 4, reg);
+}
+
+static void gemini_pci_unmask_irq(unsigned int irq)
+{
+ unsigned int reg;
+
+ gemini_pci_read_config(&bogus_pci_bus, 0, GEMINI_PCI_CTRL2, 4, &reg);
+ reg &= ~(0xF << PCI_CTRL2_INTSTS_OFFSET);
+ reg |= 1 << (irq - PCI_IRQ_BASE + PCI_CTRL2_INTMASK_OFFSET);
+ gemini_pci_write_config(&bogus_pci_bus, 0, GEMINI_PCI_CTRL2, 4, reg);
+}
+
+static void gemini_pci_irq_handler(unsigned int irq, struct irq_desc *desc)
+{
+ unsigned int pci_irq_no, irq_stat, reg, i;
+
+ gemini_pci_read_config(&bogus_pci_bus, 0, GEMINI_PCI_CTRL2, 4, &reg);
+ irq_stat = reg >> PCI_CTRL2_INTSTS_OFFSET;
+
+ for (i = 0; i < 4; i++) {
+
+ if ((irq_stat & (1 << i)) == 0)
+ continue;
+
+ pci_irq_no = PCI_IRQ_BASE + i;
+
+ BUG_ON(!(irq_desc[pci_irq_no].handle_irq));
+ irq_desc[pci_irq_no].handle_irq(pci_irq_no,
+ &irq_desc[pci_irq_no]);
+ }
+}
+
+static struct irq_chip gemini_pci_irq_chip = {
+ .name = "PCI",
+ .ack = gemini_pci_ack_irq,
+ .mask = gemini_pci_mask_irq,
+ .unmask = gemini_pci_unmask_irq,
+};
+
+static int __init gemini_pci_init(void)
+{
+ int i;
+
+ for (i = 72; i <= 95; i++)
+ gpio_request(i, "PCI");
+
+ /* initialize our bogus bus */
+ dev_set_name(&bogus_pci_bus.dev, "PCI IRQ handler");
+ bogus_pci_bus.number = 0;
+
+ /* mask and clear all interrupts */
+ gemini_pci_write_config(&bogus_pci_bus, 0, GEMINI_PCI_CTRL2 + 2, 2,
+ 0xF000);
+
+ for (i = PCI_IRQ_BASE; i < PCI_IRQ_BASE + 4; i++) {
+ set_irq_chip(i, &gemini_pci_irq_chip);
+ set_irq_handler(i, handle_level_irq);
+ set_irq_flags(i, IRQF_VALID);
+ }
+
+ set_irq_chained_handler(IRQ_PCI, gemini_pci_irq_handler);
+
+ pci_common_init(&gemini_hw_pci);
+
+ return 0;
+}
+
+subsys_initcall(gemini_pci_init);
--
1.7.3.2


2010-11-28 19:56:36

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

On Saturday 27 November 2010 13:24:35 Hans Ulli Kroll wrote:
> +#define PCI_IOSIZE_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE))
> +#define PCI_PROT_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x04)
> +#define PCI_CTRL_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x08)
> +#define PCI_SOFTRST_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x10)
> +#define PCI_CONFIG_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x28)
> +#define PCI_DATA_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x2C)

If you use the virtual address of the mapping instead of
GEMINI_PCI_IO_BASE, you don't need to repeat the IO_ADDRESS()
macro everywhere. I have a patch that gets rid of all the
conflicting definitions of this macro because it breaks
a multi-platform build once we get there.

> +static DEFINE_SPINLOCK(gemini_pci_lock);
> +
> +static struct resource gemini_pci_resource_io = {
> + .name = "PCI I/O Space",
> + .start = IO_ADDRESS(GEMINI_PCI_IO_BASE),
> + .end = IO_ADDRESS(GEMINI_PCI_IO_BASE) + SZ_1M - 1,
> + .flags = IORESOURCE_IO,
> +};
> +

This looks wrong in multiple ways:

* resources are physical addresses, not virtual addresses
* GEMINI_PCI_IO_BASE is an address in memory space, so it
needs to be IORESOURCE_MEM, not IORESOURCE_IO. You can
also register the IORESOURCE_IO resource, but that would
be .start=PCIBIOS_MIN_IO, .end=IO_SPACE_LIMIT.
* IO_SPACE_LIMIT is larger than the I/O window, which can
cause overflows. Setting it to 0xffff is generally enough.

> + spin_lock_irqsave(&gemini_pci_lock, irq_flags);
> +
> + __raw_writel(PCI_CONF_BUS(bus->number) |
> + PCI_CONF_DEVICE(PCI_SLOT(fn)) |
> + PCI_CONF_FUNCTION(PCI_FUNC(fn)) |
> + PCI_CONF_WHERE(config) |
> + PCI_CONF_ENABLE,
> + PCI_CONFIG_REG);
> +
> + switch (size) {
> + case 4:
> + __raw_writel(value, PCI_DATA_REG);
> + break;
> + case 2:
> + __raw_writew(value, PCI_DATA_REG + (config & 3));
> + break;
> + case 1:
> + __raw_writeb(value, PCI_DATA_REG + (config & 3));
> + break;
> + default:
> + ret = PCIBIOS_BAD_REGISTER_NUMBER;
> + }
> +
> + spin_unlock_irqrestore(&gemini_pci_lock, irq_flags);

The I/O ordering is probably not what you think it is.
There is no ordering guarantee between __raw_writel and
spin_lock/spin_unlock, so you really should be using
readl/writel.

Note that the pci_ops are called under another spinlock, so
you also don't need to take gemini_pci_lock here.

Arnd

2010-11-29 12:17:20

by Hans Ulli Kroll

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS



On Sun, 28 Nov 2010, Arnd Bergmann wrote:

> On Saturday 27 November 2010 13:24:35 Hans Ulli Kroll wrote:
> > +#define PCI_IOSIZE_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE))
> > +#define PCI_PROT_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x04)
> > +#define PCI_CTRL_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x08)
> > +#define PCI_SOFTRST_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x10)
> > +#define PCI_CONFIG_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x28)
> > +#define PCI_DATA_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x2C)
>
> If you use the virtual address of the mapping instead of
> GEMINI_PCI_IO_BASE, you don't need to repeat the IO_ADDRESS()
> macro everywhere. I have a patch that gets rid of all the
> conflicting definitions of this macro because it breaks
> a multi-platform build once we get there.
>
> > +static DEFINE_SPINLOCK(gemini_pci_lock);
> > +
> > +static struct resource gemini_pci_resource_io = {
> > + .name = "PCI I/O Space",
> > + .start = IO_ADDRESS(GEMINI_PCI_IO_BASE),
> > + .end = IO_ADDRESS(GEMINI_PCI_IO_BASE) + SZ_1M - 1,
> > + .flags = IORESOURCE_IO,
> > +};
> > +
>
> This looks wrong in multiple ways:
>
> * resources are physical addresses, not virtual addresses
> * GEMINI_PCI_IO_BASE is an address in memory space, so it
> needs to be IORESOURCE_MEM, not IORESOURCE_IO. You can
> also register the IORESOURCE_IO resource, but that would
> be .start=PCIBIOS_MIN_IO, .end=IO_SPACE_LIMIT.
> * IO_SPACE_LIMIT is larger than the I/O window, which can
> cause overflows. Setting it to 0xffff is generally enough.
>

So I must remove these lines ??
}, {
.virtual = IO_ADDRESS(GEMINI_PCI_IO_BASE),
.pfn = __phys_to_pfn(GEMINI_PCI_IO_BASE),
.length = SZ_512K,
.type = MT_DEVICE,
},
These are from arch/arm/mach-gemini/mm.c

> > + spin_lock_irqsave(&gemini_pci_lock, irq_flags);
> > +
> > + __raw_writel(PCI_CONF_BUS(bus->number) |
> > + PCI_CONF_DEVICE(PCI_SLOT(fn)) |
> > + PCI_CONF_FUNCTION(PCI_FUNC(fn)) |
> > + PCI_CONF_WHERE(config) |
> > + PCI_CONF_ENABLE,
> > + PCI_CONFIG_REG);
> > +
> > + switch (size) {
> > + case 4:
> > + __raw_writel(value, PCI_DATA_REG);
> > + break;
> > + case 2:
> > + __raw_writew(value, PCI_DATA_REG + (config & 3));
> > + break;
> > + case 1:
> > + __raw_writeb(value, PCI_DATA_REG + (config & 3));
> > + break;
> > + default:
> > + ret = PCIBIOS_BAD_REGISTER_NUMBER;
> > + }
> > +
> > + spin_unlock_irqrestore(&gemini_pci_lock, irq_flags);
>
> The I/O ordering is probably not what you think it is.
> There is no ordering guarantee between __raw_writel and
> spin_lock/spin_unlock, so you really should be using
> readl/writel.
>
> Note that the pci_ops are called under another spinlock, so
> you also don't need to take gemini_pci_lock here.
>
> Arnd
>

2010-11-29 15:02:58

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

On Monday 29 November 2010, Hans Ulli Kroll wrote:
> >
> > This looks wrong in multiple ways:
> >
> > * resources are physical addresses, not virtual addresses
> > * GEMINI_PCI_IO_BASE is an address in memory space, so it
> > needs to be IORESOURCE_MEM, not IORESOURCE_IO. You can
> > also register the IORESOURCE_IO resource, but that would
> > be .start=PCIBIOS_MIN_IO, .end=IO_SPACE_LIMIT.
> > * IO_SPACE_LIMIT is larger than the I/O window, which can
> > cause overflows. Setting it to 0xffff is generally enough.
> >
>
> So I must remove these lines ??
> }, {
> .virtual = IO_ADDRESS(GEMINI_PCI_IO_BASE),
> .pfn = __phys_to_pfn(GEMINI_PCI_IO_BASE),
> .length = SZ_512K,
> .type = MT_DEVICE,
> },
> These are from arch/arm/mach-gemini/mm.c

No, these look right, just fix the issues I mentioned, i.e.
the definition of IO_SPACE_LIMIT and the resource registration.

Arnd

2010-11-29 16:11:23

by Paulius Zaleckas

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

On 11/28/2010 09:56 PM, Arnd Bergmann wrote:
> On Saturday 27 November 2010 13:24:35 Hans Ulli Kroll wrote:
>> +#define PCI_IOSIZE_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE))
>> +#define PCI_PROT_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x04)
>> +#define PCI_CTRL_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x08)
>> +#define PCI_SOFTRST_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x10)
>> +#define PCI_CONFIG_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x28)
>> +#define PCI_DATA_REG (IO_ADDRESS(GEMINI_PCI_IO_BASE) + 0x2C)
>
> If you use the virtual address of the mapping instead of
> GEMINI_PCI_IO_BASE, you don't need to repeat the IO_ADDRESS()
> macro everywhere. I have a patch that gets rid of all the
> conflicting definitions of this macro because it breaks
> a multi-platform build once we get there.
>
>> +static DEFINE_SPINLOCK(gemini_pci_lock);
>> +
>> +static struct resource gemini_pci_resource_io = {
>> + .name = "PCI I/O Space",
>> + .start = IO_ADDRESS(GEMINI_PCI_IO_BASE),
>> + .end = IO_ADDRESS(GEMINI_PCI_IO_BASE) + SZ_1M - 1,
>> + .flags = IORESOURCE_IO,
>> +};
>> +
>
> This looks wrong in multiple ways:
>
> * resources are physical addresses, not virtual addresses
> * GEMINI_PCI_IO_BASE is an address in memory space, so it
> needs to be IORESOURCE_MEM, not IORESOURCE_IO. You can
> also register the IORESOURCE_IO resource, but that would
> be .start=PCIBIOS_MIN_IO, .end=IO_SPACE_LIMIT.
> * IO_SPACE_LIMIT is larger than the I/O window, which can
> cause overflows. Setting it to 0xffff is generally enough.
>
>> + spin_lock_irqsave(&gemini_pci_lock, irq_flags);
>> +
>> + __raw_writel(PCI_CONF_BUS(bus->number) |
>> + PCI_CONF_DEVICE(PCI_SLOT(fn)) |
>> + PCI_CONF_FUNCTION(PCI_FUNC(fn)) |
>> + PCI_CONF_WHERE(config) |
>> + PCI_CONF_ENABLE,
>> + PCI_CONFIG_REG);
>> +
>> + switch (size) {
>> + case 4:
>> + __raw_writel(value, PCI_DATA_REG);
>> + break;
>> + case 2:
>> + __raw_writew(value, PCI_DATA_REG + (config& 3));
>> + break;
>> + case 1:
>> + __raw_writeb(value, PCI_DATA_REG + (config& 3));
>> + break;
>> + default:
>> + ret = PCIBIOS_BAD_REGISTER_NUMBER;
>> + }
>> +
>> + spin_unlock_irqrestore(&gemini_pci_lock, irq_flags);
>
> The I/O ordering is probably not what you think it is.
> There is no ordering guarantee between __raw_writel and
> spin_lock/spin_unlock, so you really should be using
> readl/writel.

No he really should NOT use readl/writel. The ONLY difference
between readl/writel and __raw_readl/__raw_writel is endianess
conversion. __raw_*l is not doing it. Which to use depend only
on HW.

> Note that the pci_ops are called under another spinlock, so
> you also don't need to take gemini_pci_lock here.
>
> Arnd

2010-11-29 16:45:31

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

On Monday 29 November 2010, Paulius Zaleckas wrote:
> > The I/O ordering is probably not what you think it is.
> > There is no ordering guarantee between __raw_writel and
> > spin_lock/spin_unlock, so you really should be using
> > readl/writel.
>
> No he really should NOT use readl/writel. The ONLY difference
> between readl/writel and __raw_readl/__raw_writel is endianess
> conversion. __raw_*l is not doing it. Which to use depend only
> on HW.

There are many differences between readl and __raw_readl, including

* __raw_readl does not have barriers and does not serialize with
spinlocks, so it breaks on out-of-order CPUs.
* __raw_readl does not have a specific endianess, while readl is
fixed little-endian, just as the hardware is in this case.
The endian-conversion is a NOP on little-endian ARM, but required
if you actually run on a big-endian ARM (you don't).
* __raw_readl may not be atomic, gcc is free to split the access
into byte wise reads (it normally does not, unless you mark
the pointer __attribute__((packed))).

In essence, it is almost never a good idea to use __raw_readl, and
the double underscores should tell you so.

Arnd

2010-11-29 18:52:59

by Paulius Zaleckas

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

On 11/29/2010 06:45 PM, Arnd Bergmann wrote:
> On Monday 29 November 2010, Paulius Zaleckas wrote:
>>> The I/O ordering is probably not what you think it is.
>>> There is no ordering guarantee between __raw_writel and
>>> spin_lock/spin_unlock, so you really should be using
>>> readl/writel.
>>
>> No he really should NOT use readl/writel. The ONLY difference
>> between readl/writel and __raw_readl/__raw_writel is endianess
>> conversion. __raw_*l is not doing it. Which to use depend only
>> on HW.
>
> There are many differences between readl and __raw_readl, including
>
> * __raw_readl does not have barriers and does not serialize with
> spinlocks, so it breaks on out-of-order CPUs.
> * __raw_readl does not have a specific endianess, while readl is
> fixed little-endian, just as the hardware is in this case.
> The endian-conversion is a NOP on little-endian ARM, but required
> if you actually run on a big-endian ARM (you don't).
> * __raw_readl may not be atomic, gcc is free to split the access
> into byte wise reads (it normally does not, unless you mark
> the pointer __attribute__((packed))).
>
> In essence, it is almost never a good idea to use __raw_readl, and
> the double underscores should tell you so.

You are wrong:

Since CONFIG_ARM_DMA_MEM_BUFFERABLE is NOT defined for FA526 core,
no barriers are in use when using readl. It just translates into
le32_to_cpu(__raw_readl(x)). Now this CPU has physical pin for endianess
configuration and if you will chose big-endian you will fail to read
internal registers, because they ALSO change endianess and le32_to_cpu()
will screw it. However it is different when accessing registers through
PCI bus, then you need to use readl().

2010-11-29 19:33:08

by Russell King - ARM Linux

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

On Mon, Nov 29, 2010 at 06:05:07PM +0200, Paulius Zaleckas wrote:
> No he really should NOT use readl/writel. The ONLY difference
> between readl/writel and __raw_readl/__raw_writel is endianess
> conversion. __raw_*l is not doing it. Which to use depend only
> on HW.

Wrong. readl/writel have barriers too to guarantee ordering between
device accesses and memory accesses. (device accesses are already
ordered with respect to themselves.) __raw variants do not
guarantee the relative ordering between memory accesses and device
accesses.

2010-11-29 19:57:28

by Paulius Zaleckas

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

On 11/29/2010 09:32 PM, Russell King - ARM Linux wrote:
> On Mon, Nov 29, 2010 at 06:05:07PM +0200, Paulius Zaleckas wrote:
>> No he really should NOT use readl/writel. The ONLY difference
>> between readl/writel and __raw_readl/__raw_writel is endianess
>> conversion. __raw_*l is not doing it. Which to use depend only
>> on HW.
>
> Wrong. readl/writel have barriers too to guarantee ordering between
> device accesses and memory accesses. (device accesses are already
> ordered with respect to themselves.) __raw variants do not
> guarantee the relative ordering between memory accesses and device
> accesses.

#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
#define __iormb() rmb()
#define __iowmb() wmb()
#else
#define __iormb() do { } while (0)
#define __iowmb() do { } while (0)
#endif

CONFIG_ARM_DMA_MEM_BUFFERABLE is NOT defined in this case.
I don't see any other barriers.
readl does endianess conversion. What to do if you don't need it?
Do it one more time?

2010-11-29 20:03:05

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

On Monday 29 November 2010 19:52:55 Paulius Zaleckas wrote:
> On 11/29/2010 06:45 PM, Arnd Bergmann wrote:
> > There are many differences between readl and __raw_readl, including
> >
> > * __raw_readl does not have barriers and does not serialize with
> > spinlocks, so it breaks on out-of-order CPUs.
> > * __raw_readl does not have a specific endianess, while readl is
> > fixed little-endian, just as the hardware is in this case.
> > The endian-conversion is a NOP on little-endian ARM, but required
> > if you actually run on a big-endian ARM (you don't).
> > * __raw_readl may not be atomic, gcc is free to split the access
> > into byte wise reads (it normally does not, unless you mark
> > the pointer __attribute__((packed))).
> >
> > In essence, it is almost never a good idea to use __raw_readl, and
> > the double underscores should tell you so.
>
> You are wrong:
>
> Since CONFIG_ARM_DMA_MEM_BUFFERABLE is NOT defined for FA526 core,
> no barriers are in use when using readl. It just translates into
> le32_to_cpu(__raw_readl(x)). Now this CPU has physical pin for endianess
> configuration and if you will chose big-endian you will fail to read
> internal registers, because they ALSO change endianess and le32_to_cpu()
> will screw it. However it is different when accessing registers through
> PCI bus, then you need to use readl().

Ok, I only checked that the platform does not support big-endian Linux
kernel, not if the HW designers screwed up their registers, sorry about
that.

The other points are of course still valid: If the code ever gets
used on an out of order CPU, it is broken. More importantly, if someone
looks at the code as an example for writing another PCI support code,
it may end up getting copied to some place where it ends up causing
trouble.

The typical way to deal with mixed-endian hardware reliably is to have
a header file containing code like

#ifdef CONFIG_GEMINI_BIG_ENDIAN_IO
#define gemini_readl(x) __swab32(readl(x))
#define ...
#else
#define gemini_readl(x) readl(x))
#endif

This also takes care of the (not as unlikely as you'd hope) case that
the next person reusing the PCI hardware wires its endianess different
from the CPU endianess.

Arnd

2010-11-29 20:20:06

by Paulius Zaleckas

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

On 11/29/2010 10:02 PM, Arnd Bergmann wrote:
> On Monday 29 November 2010 19:52:55 Paulius Zaleckas wrote:
>> On 11/29/2010 06:45 PM, Arnd Bergmann wrote:
>>> There are many differences between readl and __raw_readl, including
>>>
>>> * __raw_readl does not have barriers and does not serialize with
>>> spinlocks, so it breaks on out-of-order CPUs.
>>> * __raw_readl does not have a specific endianess, while readl is
>>> fixed little-endian, just as the hardware is in this case.
>>> The endian-conversion is a NOP on little-endian ARM, but required
>>> if you actually run on a big-endian ARM (you don't).
>>> * __raw_readl may not be atomic, gcc is free to split the access
>>> into byte wise reads (it normally does not, unless you mark
>>> the pointer __attribute__((packed))).
>>>
>>> In essence, it is almost never a good idea to use __raw_readl, and
>>> the double underscores should tell you so.
>>
>> You are wrong:
>>
>> Since CONFIG_ARM_DMA_MEM_BUFFERABLE is NOT defined for FA526 core,
>> no barriers are in use when using readl. It just translates into
>> le32_to_cpu(__raw_readl(x)). Now this CPU has physical pin for endianess
>> configuration and if you will chose big-endian you will fail to read
>> internal registers, because they ALSO change endianess and le32_to_cpu()
>> will screw it. However it is different when accessing registers through
>> PCI bus, then you need to use readl().
>
> Ok, I only checked that the platform does not support big-endian Linux
> kernel, not if the HW designers screwed up their registers, sorry about
> that.
>
> The other points are of course still valid: If the code ever gets
> used on an out of order CPU, it is broken. More importantly, if someone
> looks at the code as an example for writing another PCI support code,
> it may end up getting copied to some place where it ends up causing
> trouble.
>
> The typical way to deal with mixed-endian hardware reliably is to have
> a header file containing code like
>
> #ifdef CONFIG_GEMINI_BIG_ENDIAN_IO
> #define gemini_readl(x) __swab32(readl(x))
> #define ...
> #else
> #define gemini_readl(x) readl(x))
> #endif
>
> This also takes care of the (not as unlikely as you'd hope) case that
> the next person reusing the PCI hardware wires its endianess different
> from the CPU endianess.

Actually I am not very sure how CPU works in big endian mode :)
I have never tried it and I think only some guys who made it did that.
So readl will work for 99.99% of cases. In datasheet they say that:
"All registers in Gemini use Little Endian and must be accessed by aligned
32-bit word operations. The bus connection interface logic provides an Endian
Conversion function."
For me it looks like it can mean whatever you want :)

2010-11-30 08:15:19

by Hans Ulli Kroll

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS



On Mon, 29 Nov 2010, Paulius Zaleckas wrote:

> On 11/29/2010 10:02 PM, Arnd Bergmann wrote:
> > On Monday 29 November 2010 19:52:55 Paulius Zaleckas wrote:
> > > On 11/29/2010 06:45 PM, Arnd Bergmann wrote:
> > > > There are many differences between readl and __raw_readl, including
> > > >
> > > > * __raw_readl does not have barriers and does not serialize with
> > > > spinlocks, so it breaks on out-of-order CPUs.
> > > > * __raw_readl does not have a specific endianess, while readl is
> > > > fixed little-endian, just as the hardware is in this case.
> > > > The endian-conversion is a NOP on little-endian ARM, but required
> > > > if you actually run on a big-endian ARM (you don't).
> > > > * __raw_readl may not be atomic, gcc is free to split the access
> > > > into byte wise reads (it normally does not, unless you mark
> > > > the pointer __attribute__((packed))).
> > > >
> > > > In essence, it is almost never a good idea to use __raw_readl, and
> > > > the double underscores should tell you so.
> > >
> > > You are wrong:
> > >
> > > Since CONFIG_ARM_DMA_MEM_BUFFERABLE is NOT defined for FA526 core,
> > > no barriers are in use when using readl. It just translates into
> > > le32_to_cpu(__raw_readl(x)). Now this CPU has physical pin for endianess
> > > configuration and if you will chose big-endian you will fail to read
> > > internal registers, because they ALSO change endianess and le32_to_cpu()
> > > will screw it. However it is different when accessing registers through
> > > PCI bus, then you need to use readl().
> >
> > Ok, I only checked that the platform does not support big-endian Linux
> > kernel, not if the HW designers screwed up their registers, sorry about
> > that.
> >
> > The other points are of course still valid: If the code ever gets
> > used on an out of order CPU, it is broken. More importantly, if someone
> > looks at the code as an example for writing another PCI support code,
> > it may end up getting copied to some place where it ends up causing
> > trouble.
> >
> > The typical way to deal with mixed-endian hardware reliably is to have
> > a header file containing code like
> >
> > #ifdef CONFIG_GEMINI_BIG_ENDIAN_IO
> > #define gemini_readl(x) __swab32(readl(x))
> > #define ...
> > #else
> > #define gemini_readl(x) readl(x))
> > #endif
> >
> > This also takes care of the (not as unlikely as you'd hope) case that
> > the next person reusing the PCI hardware wires its endianess different
> > from the CPU endianess.
>
> Actually I am not very sure how CPU works in big endian mode :)
> I have never tried it and I think only some guys who made it did that.
> So readl will work for 99.99% of cases. In datasheet they say that:
> "All registers in Gemini use Little Endian and must be accessed by aligned
> 32-bit word operations. The bus connection interface logic provides an Endian
> Conversion function."
> For me it looks like it can mean whatever you want :)
>

I think the endianes pin switched only the cpu, not the hardware
registers.

Here is some sample code from the ethernet devive on Gemini
typedef union
{
unsigned int bits32;
struct bit
{
#if (BIG_ENDIAN==1)
unsigned int reserved : 15; // bit 31:17
unsigned int v_bit_mode : 1; // bit 16
unsigned int device_id : 12; // bit 15:4
unsigned int revision_id : 4; // bit 3:0
#else
unsigned int revision_id : 4; // bit 3:0
unsigned int device_id : 12; // bit 15:4
unsigned int v_bit_mode : 1; // bit 16
unsigned int reserved : 15; // bit 31:17
#endif
} bits;
} TOE_VERSION_T;

2010-11-30 09:34:59

by Paulius Zaleckas

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

On Tue, Nov 30, 2010 at 10:15 AM, Hans Ulli Kroll
<[email protected]> wrote:
>
>
> On Mon, 29 Nov 2010, Paulius Zaleckas wrote:
>
>> On 11/29/2010 10:02 PM, Arnd Bergmann wrote:
>> > On Monday 29 November 2010 19:52:55 Paulius Zaleckas wrote:
>> > > On 11/29/2010 06:45 PM, Arnd Bergmann wrote:
>> > > > There are many differences between readl and __raw_readl, including
>> > > >
>> > > > * __raw_readl does not have barriers and does not serialize with
>> > > > ? ? spinlocks, so it breaks on out-of-order CPUs.
>> > > > * __raw_readl does not have a specific endianess, while readl is
>> > > > ? ? fixed little-endian, just as the hardware is in this case.
>> > > > ? ? The endian-conversion is a NOP on little-endian ARM, but required
>> > > > ? ? if you actually run on a big-endian ARM (you don't).
>> > > > * __raw_readl may not be atomic, gcc is free to split the access
>> > > > ? ? into byte wise reads (it normally does not, unless you mark
>> > > > ? ? the pointer __attribute__((packed))).
>> > > >
>> > > > In essence, it is almost never a good idea to use __raw_readl, and
>> > > > the double underscores should tell you so.
>> > >
>> > > You are wrong:
>> > >
>> > > Since CONFIG_ARM_DMA_MEM_BUFFERABLE is NOT defined for FA526 core,
>> > > no barriers are in use when using readl. It just translates into
>> > > le32_to_cpu(__raw_readl(x)). Now this CPU has physical pin for endianess
>> > > configuration and if you will chose big-endian you will fail to read
>> > > internal registers, because they ALSO change endianess and le32_to_cpu()
>> > > will screw it. However it is different when accessing registers through
>> > > PCI bus, then you need to use readl().
>> >
>> > Ok, I only checked that the platform does not support big-endian Linux
>> > kernel, not if the HW designers screwed up their registers, sorry about
>> > that.
>> >
>> > The other points are of course still valid: If the code ever gets
>> > used on an out of order CPU, it is broken. More importantly, if someone
>> > looks at the code as an example for writing another PCI support code,
>> > it may end up getting copied to some place where it ends up causing
>> > trouble.
>> >
>> > The typical way to deal with mixed-endian hardware reliably is to have
>> > a header file containing code like
>> >
>> > #ifdef CONFIG_GEMINI_BIG_ENDIAN_IO
>> > #define gemini_readl(x) __swab32(readl(x))
>> > #define ...
>> > #else
>> > #define gemini_readl(x) readl(x))
>> > #endif
>> >
>> > This also takes care of the (not as unlikely as you'd hope) case that
>> > the next person reusing the PCI hardware wires its endianess different
>> > from the CPU endianess.
>>
>> Actually I am not very sure how CPU works in big endian mode :)
>> I have never tried it and I think only some guys who made it did that.
>> So readl will work for 99.99% of cases. In datasheet they say that:
>> "All registers in Gemini use Little Endian and must be accessed by aligned
>> 32-bit word operations. The bus connection interface logic provides an Endian
>> Conversion function."
>> For me it looks like it can mean whatever you want :)
>>
>
> I think the endianes pin switched only the cpu, not the hardware
> registers.

Yes, but original driver used readl/writel and it does le32_to_cpu,
so that structure definition is just reversing it.
If you will use __raw_readl/__raw_writel than there will be no need
for this redefinition.

> Here is some sample code from the ethernet devive on Gemini
> typedef union
> {
> ? ? ? ?unsigned int bits32;
> ? ? ? ?struct bit
> ? ? ? ?{
> #if (BIG_ENDIAN==1)
> ? ? ? ? ? ? ? ?unsigned int reserved ? ? ? ? ? : 15; ? // bit 31:17
> ? ? ? ? ? ? ? ?unsigned int v_bit_mode ? ? ? ? : 1; ? ?// bit 16
> ? ? ? ? ? ? ? ?unsigned int device_id ? ? ? ? ?: 12; ? // bit 15:4
> ? ? ? ? ? ? ? ?unsigned int revision_id ? ? ? ?: 4; ? ?// bit ?3:0
> #else
> ? ? ? ? ? ? ? ?unsigned int revision_id ? ? ? ?: 4; ? ?// bit ?3:0
> ? ? ? ? ? ? ? ?unsigned int device_id ? ? ? ? ?: 12; ? // bit 15:4
> ? ? ? ? ? ? ? ?unsigned int v_bit_mode ? ? ? ? : 1; ? ?// bit 16
> ? ? ? ? ? ? ? ?unsigned int reserved ? ? ? ? ? : 15; ? // bit 31:17
> #endif

The other thing is that this endianess redefinition is very starnge since
it should swap bytes and not bits inside this struct. So I assume that
big endian was never tested on this driver and it will not work.
But ofcouse I can be wrong here :)

> ? ? ? ?} bits;
> } TOE_VERSION_T;
>
>
>

2010-12-01 11:52:49

by Hans Ulli Kroll

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS



On Tue, 30 Nov 2010, Paulius Zaleckas wrote:

> On Tue, Nov 30, 2010 at 10:15 AM, Hans Ulli Kroll
> <[email protected]> wrote:
> >
> >
> > On Mon, 29 Nov 2010, Paulius Zaleckas wrote:
> >
> >> On 11/29/2010 10:02 PM, Arnd Bergmann wrote:
> >> > On Monday 29 November 2010 19:52:55 Paulius Zaleckas wrote:
> >> > > On 11/29/2010 06:45 PM, Arnd Bergmann wrote:
> >> > > > There are many differences between readl and __raw_readl, including
> >> > > >
> >> > > > * __raw_readl does not have barriers and does not serialize with
> >> > > > ? ? spinlocks, so it breaks on out-of-order CPUs.
> >> > > > * __raw_readl does not have a specific endianess, while readl is
> >> > > > ? ? fixed little-endian, just as the hardware is in this case.
> >> > > > ? ? The endian-conversion is a NOP on little-endian ARM, but required
> >> > > > ? ? if you actually run on a big-endian ARM (you don't).
> >> > > > * __raw_readl may not be atomic, gcc is free to split the access
> >> > > > ? ? into byte wise reads (it normally does not, unless you mark
> >> > > > ? ? the pointer __attribute__((packed))).
> >> > > >
> >> > > > In essence, it is almost never a good idea to use __raw_readl, and
> >> > > > the double underscores should tell you so.
> >> > >
> >> > > You are wrong:
> >> > >
> >> > > Since CONFIG_ARM_DMA_MEM_BUFFERABLE is NOT defined for FA526 core,
> >> > > no barriers are in use when using readl. It just translates into
> >> > > le32_to_cpu(__raw_readl(x)). Now this CPU has physical pin for endianess
> >> > > configuration and if you will chose big-endian you will fail to read
> >> > > internal registers, because they ALSO change endianess and le32_to_cpu()
> >> > > will screw it. However it is different when accessing registers through
> >> > > PCI bus, then you need to use readl().
> >> >
> >> > Ok, I only checked that the platform does not support big-endian Linux
> >> > kernel, not if the HW designers screwed up their registers, sorry about
> >> > that.
> >> >
> >> > The other points are of course still valid: If the code ever gets
> >> > used on an out of order CPU, it is broken. More importantly, if someone
> >> > looks at the code as an example for writing another PCI support code,
> >> > it may end up getting copied to some place where it ends up causing
> >> > trouble.
> >> >
> >> > The typical way to deal with mixed-endian hardware reliably is to have
> >> > a header file containing code like
> >> >
> >> > #ifdef CONFIG_GEMINI_BIG_ENDIAN_IO
> >> > #define gemini_readl(x) __swab32(readl(x))
> >> > #define ...
> >> > #else
> >> > #define gemini_readl(x) readl(x))
> >> > #endif
> >> >
> >> > This also takes care of the (not as unlikely as you'd hope) case that
> >> > the next person reusing the PCI hardware wires its endianess different
> >> > from the CPU endianess.
> >>
> >> Actually I am not very sure how CPU works in big endian mode :)
> >> I have never tried it and I think only some guys who made it did that.
> >> So readl will work for 99.99% of cases. In datasheet they say that:
> >> "All registers in Gemini use Little Endian and must be accessed by aligned
> >> 32-bit word operations. The bus connection interface logic provides an Endian
> >> Conversion function."
> >> For me it looks like it can mean whatever you want :)
> >>
> >
> > I think the endianes pin switched only the cpu, not the hardware
> > registers.
>
> Yes, but original driver used readl/writel and it does le32_to_cpu,
> so that structure definition is just reversing it.
> If you will use __raw_readl/__raw_writel than there will be no need
> for this redefinition.
>
> > Here is some sample code from the ethernet devive on Gemini
> > typedef union
> > {
> > ? ? ? ?unsigned int bits32;
> > ? ? ? ?struct bit
> > ? ? ? ?{
> > #if (BIG_ENDIAN==1)
> > ? ? ? ? ? ? ? ?unsigned int reserved ? ? ? ? ? : 15; ? // bit 31:17
> > ? ? ? ? ? ? ? ?unsigned int v_bit_mode ? ? ? ? : 1; ? ?// bit 16
> > ? ? ? ? ? ? ? ?unsigned int device_id ? ? ? ? ?: 12; ? // bit 15:4
> > ? ? ? ? ? ? ? ?unsigned int revision_id ? ? ? ?: 4; ? ?// bit ?3:0
> > #else
> > ? ? ? ? ? ? ? ?unsigned int revision_id ? ? ? ?: 4; ? ?// bit ?3:0
> > ? ? ? ? ? ? ? ?unsigned int device_id ? ? ? ? ?: 12; ? // bit 15:4
> > ? ? ? ? ? ? ? ?unsigned int v_bit_mode ? ? ? ? : 1; ? ?// bit 16
> > ? ? ? ? ? ? ? ?unsigned int reserved ? ? ? ? ? : 15; ? // bit 31:17
> > #endif
>
> The other thing is that this endianess redefinition is very starnge since
> it should swap bytes and not bits inside this struct. So I assume that
> big endian was never tested on this driver and it will not work.
> But ofcouse I can be wrong here :)
>

At this momment my brain restarts in very slow motion mode ;-)
This can't work. The definition Storlinksemi uses for swapping bits and
bytes are totaly wrong.
They never _even_ testet this, or understand little endian or big endian.
Take this simple sample

typedef union {
unsigned int bits32;
struct bit {
#if (BIG_ENDIAN==0)
unsigned int a : 1;
unsigned int b : 31;
#else
unsigned int b : 31;
unsigned int a : 1;
#endif
};
} TEST;

They swaped the bits inside one byte

> > ? ? ? ?} bits;
> > } TOE_VERSION_T;
> >
> >
> >
>

2010-12-01 13:08:09

by Paulius Zaleckas

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

On Wed, Dec 1, 2010 at 1:52 PM, Hans Ulli Kroll
<[email protected]> wrote:
>
>
> On Tue, 30 Nov 2010, Paulius Zaleckas wrote:
>
>> On Tue, Nov 30, 2010 at 10:15 AM, Hans Ulli Kroll
>> <[email protected]> wrote:
>> >
>> >
>> > On Mon, 29 Nov 2010, Paulius Zaleckas wrote:
>> >
>> >> On 11/29/2010 10:02 PM, Arnd Bergmann wrote:
>> >> > On Monday 29 November 2010 19:52:55 Paulius Zaleckas wrote:
>> >> > > On 11/29/2010 06:45 PM, Arnd Bergmann wrote:
>> >> > > > There are many differences between readl and __raw_readl, including
>> >> > > >
>> >> > > > * __raw_readl does not have barriers and does not serialize with
>> >> > > > ? ? spinlocks, so it breaks on out-of-order CPUs.
>> >> > > > * __raw_readl does not have a specific endianess, while readl is
>> >> > > > ? ? fixed little-endian, just as the hardware is in this case.
>> >> > > > ? ? The endian-conversion is a NOP on little-endian ARM, but required
>> >> > > > ? ? if you actually run on a big-endian ARM (you don't).
>> >> > > > * __raw_readl may not be atomic, gcc is free to split the access
>> >> > > > ? ? into byte wise reads (it normally does not, unless you mark
>> >> > > > ? ? the pointer __attribute__((packed))).
>> >> > > >
>> >> > > > In essence, it is almost never a good idea to use __raw_readl, and
>> >> > > > the double underscores should tell you so.
>> >> > >
>> >> > > You are wrong:
>> >> > >
>> >> > > Since CONFIG_ARM_DMA_MEM_BUFFERABLE is NOT defined for FA526 core,
>> >> > > no barriers are in use when using readl. It just translates into
>> >> > > le32_to_cpu(__raw_readl(x)). Now this CPU has physical pin for endianess
>> >> > > configuration and if you will chose big-endian you will fail to read
>> >> > > internal registers, because they ALSO change endianess and le32_to_cpu()
>> >> > > will screw it. However it is different when accessing registers through
>> >> > > PCI bus, then you need to use readl().
>> >> >
>> >> > Ok, I only checked that the platform does not support big-endian Linux
>> >> > kernel, not if the HW designers screwed up their registers, sorry about
>> >> > that.
>> >> >
>> >> > The other points are of course still valid: If the code ever gets
>> >> > used on an out of order CPU, it is broken. More importantly, if someone
>> >> > looks at the code as an example for writing another PCI support code,
>> >> > it may end up getting copied to some place where it ends up causing
>> >> > trouble.
>> >> >
>> >> > The typical way to deal with mixed-endian hardware reliably is to have
>> >> > a header file containing code like
>> >> >
>> >> > #ifdef CONFIG_GEMINI_BIG_ENDIAN_IO
>> >> > #define gemini_readl(x) __swab32(readl(x))
>> >> > #define ...
>> >> > #else
>> >> > #define gemini_readl(x) readl(x))
>> >> > #endif
>> >> >
>> >> > This also takes care of the (not as unlikely as you'd hope) case that
>> >> > the next person reusing the PCI hardware wires its endianess different
>> >> > from the CPU endianess.
>> >>
>> >> Actually I am not very sure how CPU works in big endian mode :)
>> >> I have never tried it and I think only some guys who made it did that.
>> >> So readl will work for 99.99% of cases. In datasheet they say that:
>> >> "All registers in Gemini use Little Endian and must be accessed by aligned
>> >> 32-bit word operations. The bus connection interface logic provides an Endian
>> >> Conversion function."
>> >> For me it looks like it can mean whatever you want :)
>> >>
>> >
>> > I think the endianes pin switched only the cpu, not the hardware
>> > registers.
>>
>> Yes, but original driver used readl/writel and it does le32_to_cpu,
>> so that structure definition is just reversing it.
>> If you will use __raw_readl/__raw_writel than there will be no need
>> for this redefinition.
>>
>> > Here is some sample code from the ethernet devive on Gemini
>> > typedef union
>> > {
>> > ? ? ? ?unsigned int bits32;
>> > ? ? ? ?struct bit
>> > ? ? ? ?{
>> > #if (BIG_ENDIAN==1)
>> > ? ? ? ? ? ? ? ?unsigned int reserved ? ? ? ? ? : 15; ? // bit 31:17
>> > ? ? ? ? ? ? ? ?unsigned int v_bit_mode ? ? ? ? : 1; ? ?// bit 16
>> > ? ? ? ? ? ? ? ?unsigned int device_id ? ? ? ? ?: 12; ? // bit 15:4
>> > ? ? ? ? ? ? ? ?unsigned int revision_id ? ? ? ?: 4; ? ?// bit ?3:0
>> > #else
>> > ? ? ? ? ? ? ? ?unsigned int revision_id ? ? ? ?: 4; ? ?// bit ?3:0
>> > ? ? ? ? ? ? ? ?unsigned int device_id ? ? ? ? ?: 12; ? // bit 15:4
>> > ? ? ? ? ? ? ? ?unsigned int v_bit_mode ? ? ? ? : 1; ? ?// bit 16
>> > ? ? ? ? ? ? ? ?unsigned int reserved ? ? ? ? ? : 15; ? // bit 31:17
>> > #endif
>>
>> The other thing is that this endianess redefinition is very starnge since
>> it should swap bytes and not bits inside this struct. So I assume that
>> big endian was never tested on this driver and it will not work.
>> But ofcouse I can be wrong here :)
>>
>
> At this momment my brain restarts in very slow motion mode ;-)
> This can't work. The definition Storlinksemi uses for swapping bits and
> bytes are totaly wrong.
> They never _even_ testet this, or understand little endian or big endian.
> Take this simple sample
>
> typedef union {
> ? ? ? ?unsigned int bits32;
> ? ? ? ?struct bit {
> #if (BIG_ENDIAN==0)
> ? ? ? ? ? ? ? ?unsigned int a ?: 1;
> ? ? ? ? ? ? ? ?unsigned int b ?: 31;
> #else
> ? ? ? ? ? ? ? ?unsigned int b ?: 31;
> ? ? ? ? ? ? ? ?unsigned int a ?: 1;
> #endif
> ? ? ? ?};
> } TEST;
>
> They swaped the bits inside one byte

Ha! Wait a minute. Looks like we are both wrong...
Read the beginning of: http://thread.gmane.org/gmane.linux.kernel/1037608

So it means we should use readl/writel and get rid of these non-portable
bit-fields...

>> > ? ? ? ?} bits;
>> > } TOE_VERSION_T;
>> >
>> >
>> >
>>

2010-12-01 15:02:39

by Hans Ulli Kroll

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS



On Wed, 1 Dec 2010, Paulius Zaleckas wrote:

> On Wed, Dec 1, 2010 at 1:52 PM, Hans Ulli Kroll
> <[email protected]> wrote:
> >
> >
> > On Tue, 30 Nov 2010, Paulius Zaleckas wrote:
> >
> >> On Tue, Nov 30, 2010 at 10:15 AM, Hans Ulli Kroll
> >> <[email protected]> wrote:
> >> >
> >> >
> >> > On Mon, 29 Nov 2010, Paulius Zaleckas wrote:
> >> >
> >> >> On 11/29/2010 10:02 PM, Arnd Bergmann wrote:
> >> >> > On Monday 29 November 2010 19:52:55 Paulius Zaleckas wrote:
> >> >> > > On 11/29/2010 06:45 PM, Arnd Bergmann wrote:
> >> >> > > > There are many differences between readl and __raw_readl, including
> >> >> > > >
> >> >> > > > * __raw_readl does not have barriers and does not serialize with
> >> >> > > > ? ? spinlocks, so it breaks on out-of-order CPUs.
> >> >> > > > * __raw_readl does not have a specific endianess, while readl is
> >> >> > > > ? ? fixed little-endian, just as the hardware is in this case.
> >> >> > > > ? ? The endian-conversion is a NOP on little-endian ARM, but required
> >> >> > > > ? ? if you actually run on a big-endian ARM (you don't).
> >> >> > > > * __raw_readl may not be atomic, gcc is free to split the access
> >> >> > > > ? ? into byte wise reads (it normally does not, unless you mark
> >> >> > > > ? ? the pointer __attribute__((packed))).
> >> >> > > >
> >> >> > > > In essence, it is almost never a good idea to use __raw_readl, and
> >> >> > > > the double underscores should tell you so.
> >> >> > >
> >> >> > > You are wrong:
> >> >> > >
> >> >> > > Since CONFIG_ARM_DMA_MEM_BUFFERABLE is NOT defined for FA526 core,
> >> >> > > no barriers are in use when using readl. It just translates into
> >> >> > > le32_to_cpu(__raw_readl(x)). Now this CPU has physical pin for endianess
> >> >> > > configuration and if you will chose big-endian you will fail to read
> >> >> > > internal registers, because they ALSO change endianess and le32_to_cpu()
> >> >> > > will screw it. However it is different when accessing registers through
> >> >> > > PCI bus, then you need to use readl().
> >> >> >
> >> >> > Ok, I only checked that the platform does not support big-endian Linux
> >> >> > kernel, not if the HW designers screwed up their registers, sorry about
> >> >> > that.
> >> >> >
> >> >> > The other points are of course still valid: If the code ever gets
> >> >> > used on an out of order CPU, it is broken. More importantly, if someone
> >> >> > looks at the code as an example for writing another PCI support code,
> >> >> > it may end up getting copied to some place where it ends up causing
> >> >> > trouble.
> >> >> >
> >> >> > The typical way to deal with mixed-endian hardware reliably is to have
> >> >> > a header file containing code like
> >> >> >
> >> >> > #ifdef CONFIG_GEMINI_BIG_ENDIAN_IO
> >> >> > #define gemini_readl(x) __swab32(readl(x))
> >> >> > #define ...
> >> >> > #else
> >> >> > #define gemini_readl(x) readl(x))
> >> >> > #endif
> >> >> >
> >> >> > This also takes care of the (not as unlikely as you'd hope) case that
> >> >> > the next person reusing the PCI hardware wires its endianess different
> >> >> > from the CPU endianess.
> >> >>
> >> >> Actually I am not very sure how CPU works in big endian mode :)
> >> >> I have never tried it and I think only some guys who made it did that.
> >> >> So readl will work for 99.99% of cases. In datasheet they say that:
> >> >> "All registers in Gemini use Little Endian and must be accessed by aligned
> >> >> 32-bit word operations. The bus connection interface logic provides an Endian
> >> >> Conversion function."
> >> >> For me it looks like it can mean whatever you want :)
> >> >>
> >> >
> >> > I think the endianes pin switched only the cpu, not the hardware
> >> > registers.
> >>
> >> Yes, but original driver used readl/writel and it does le32_to_cpu,
> >> so that structure definition is just reversing it.
> >> If you will use __raw_readl/__raw_writel than there will be no need
> >> for this redefinition.
> >>
> >> > Here is some sample code from the ethernet devive on Gemini
> >> > typedef union
> >> > {
> >> > ? ? ? ?unsigned int bits32;
> >> > ? ? ? ?struct bit
> >> > ? ? ? ?{
> >> > #if (BIG_ENDIAN==1)
> >> > ? ? ? ? ? ? ? ?unsigned int reserved ? ? ? ? ? : 15; ? // bit 31:17
> >> > ? ? ? ? ? ? ? ?unsigned int v_bit_mode ? ? ? ? : 1; ? ?// bit 16
> >> > ? ? ? ? ? ? ? ?unsigned int device_id ? ? ? ? ?: 12; ? // bit 15:4
> >> > ? ? ? ? ? ? ? ?unsigned int revision_id ? ? ? ?: 4; ? ?// bit ?3:0
> >> > #else
> >> > ? ? ? ? ? ? ? ?unsigned int revision_id ? ? ? ?: 4; ? ?// bit ?3:0
> >> > ? ? ? ? ? ? ? ?unsigned int device_id ? ? ? ? ?: 12; ? // bit 15:4
> >> > ? ? ? ? ? ? ? ?unsigned int v_bit_mode ? ? ? ? : 1; ? ?// bit 16
> >> > ? ? ? ? ? ? ? ?unsigned int reserved ? ? ? ? ? : 15; ? // bit 31:17
> >> > #endif
> >>
> >> The other thing is that this endianess redefinition is very starnge since
> >> it should swap bytes and not bits inside this struct. So I assume that
> >> big endian was never tested on this driver and it will not work.
> >> But ofcouse I can be wrong here :)
> >>
> >
> > At this momment my brain restarts in very slow motion mode ;-)
> > This can't work. The definition Storlinksemi uses for swapping bits and
> > bytes are totaly wrong.
> > They never _even_ testet this, or understand little endian or big endian.
> > Take this simple sample
> >
> > typedef union {
> > ? ? ? ?unsigned int bits32;
> > ? ? ? ?struct bit {
> > #if (BIG_ENDIAN==0)
> > ? ? ? ? ? ? ? ?unsigned int a ?: 1;
> > ? ? ? ? ? ? ? ?unsigned int b ?: 31;
> > #else
> > ? ? ? ? ? ? ? ?unsigned int b ?: 31;
> > ? ? ? ? ? ? ? ?unsigned int a ?: 1;
> > #endif
> > ? ? ? ?};
> > } TEST;
> >
> > They swaped the bits inside one byte
>
> Ha! Wait a minute. Looks like we are both wrong...
> Read the beginning of: http://thread.gmane.org/gmane.linux.kernel/1037608
>
> So it means we should use readl/writel and get rid of these non-portable
> bit-fields...
>

get rid of _endianes_.
Endianes is ordering of bytes (octets) in address space and _not_ bits in
registers nor bytes.

With the above example :
a = 1;
b = 0;

you will get on little endian cpu this
0x01, 0x00, 0x00, 0x00 (0x00000001UL) in memory
on big endian
0x00, 0x00, 0x00, 0x01 (0x01000000UL)
_note_ the bits are not swapped, _only_ bytes.

but if we use this _strange_ register layout with BIG_ENDIAN is set
0x00, 0x00, 0x00, 0x80 (0x80000000UL)

I've done this _on_ paper to understand this.

> >> > ? ? ? ?} bits;
> >> > } TOE_VERSION_T;
> >> >
> >> >
> >> >
> >>
>

2010-12-06 10:52:52

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

Hello.

On 29-11-2010 19:45, Arnd Bergmann wrote:

>>> The I/O ordering is probably not what you think it is.
>>> There is no ordering guarantee between __raw_writel and
>>> spin_lock/spin_unlock, so you really should be using
>>> readl/writel.

>> No he really should NOT use readl/writel. The ONLY difference
>> between readl/writel and __raw_readl/__raw_writel is endianess
>> conversion. __raw_*l is not doing it. Which to use depend only
>> on HW.

> There are many differences between readl and __raw_readl, including

> * __raw_readl does not have barriers and does not serialize with
> spinlocks, so it breaks on out-of-order CPUs.
> * __raw_readl does not have a specific endianess, while readl is
> fixed little-endian,

I know I'm late but readl()/writel() are CPU-endian, not LE.

WBR, Sergei

2010-12-06 12:20:23

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH] ARM: Gemini: Add support for PCI BUS

On Monday 06 December 2010, Sergei Shtylyov wrote:
> > There are many differences between readl and __raw_readl, including
>
> > * __raw_readl does not have barriers and does not serialize with
> > spinlocks, so it breaks on out-of-order CPUs.
> > * __raw_readl does not have a specific endianess, while readl is
> > fixed little-endian,
>
> I know I'm late but readl()/writel() are CPU-endian, not LE.

If that was the case, it would be a bug. readl/writel is defined to be
the same endianess as PCI, which is little-endian. Otherwise you would
not be able to use any PCI devices on big-endian ARM machines. The
definition of readl is

#define readl(addr) __le32_to_cpu(__raw_readl(addr))

which converts the little-endian I/O register into a native endian
CPU register.


Arnd