2019-02-22 18:05:59

by Will Deacon

[permalink] [raw]
Subject: [PATCH v2 0/3] Ensure inX() is ordered wrt delay() routines

Hi all,

This is version two of the patches I previously posted here:

https://lore.kernel.org/lkml/[email protected]/T/#u

Changes since v2 include:

* Incorporate riscv changes from Palmer
* Update macro definitions as suggested by Geert
* Extend to cover non-port reads via __io_ar() as well

Feedback welcome,

Will

--->8

Will Deacon (3):
asm-generic/io: Pass result of I/O accessor to __io_[p]ar()
riscv: io: Update __io_[p]ar() macros to take an argument
arm64: io: Hook up __io_par() for inX() ordering

arch/arm64/include/asm/io.h | 1 +
arch/riscv/include/asm/io.h | 36 ++++++++++++++++++------------------
include/asm-generic/io.h | 20 ++++++++++----------
3 files changed, 29 insertions(+), 28 deletions(-)

--
2.11.0



2019-02-22 18:06:03

by Will Deacon

[permalink] [raw]
Subject: [PATCH v2 3/3] arm64: io: Hook up __io_par() for inX() ordering

Ensure that inX() provides the same ordering guarantees as readX()
by hooking up __io_par() so that it maps directly to __iormb().

Reported-by: Andrew Murray <[email protected]>
Signed-off-by: Will Deacon <[email protected]>
---
arch/arm64/include/asm/io.h | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index ee723835c1f4..8bb7210ac286 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -121,6 +121,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
: "memory"); \
})

+#define __io_par(v) __iormb(v)
#define __iowmb() wmb()

#define mmiowb() do { } while (0)
--
2.11.0


2019-02-22 18:06:13

by Will Deacon

[permalink] [raw]
Subject: [PATCH v2 2/3] riscv: io: Update __io_[p]ar() macros to take an argument

The definitions of the __io_[p]ar() macros in asm-generic/io.h take the
value returned by the preceding I/O read as an argument so that
architectures can use this to create order with a subsequent delayX()
routine using a dependency.

Update the riscv barrier definitions to match, although the argument
is currently unused.

Suggested-by: Arnd Bergmann <[email protected]>
Reviewed-by: Palmer Dabbelt <[email protected]>
Signed-off-by: Will Deacon <[email protected]>
---
arch/riscv/include/asm/io.h | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index b269451e7e85..1d9c1376dc64 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -163,20 +163,20 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
* doesn't define any ordering between the memory space and the I/O space.
*/
#define __io_br() do {} while (0)
-#define __io_ar() __asm__ __volatile__ ("fence i,r" : : : "memory");
+#define __io_ar(v) __asm__ __volatile__ ("fence i,r" : : : "memory");
#define __io_bw() __asm__ __volatile__ ("fence w,o" : : : "memory");
#define __io_aw() do {} while (0)

-#define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(); __v; })
-#define readw(c) ({ u16 __v; __io_br(); __v = readw_cpu(c); __io_ar(); __v; })
-#define readl(c) ({ u32 __v; __io_br(); __v = readl_cpu(c); __io_ar(); __v; })
+#define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
+#define readw(c) ({ u16 __v; __io_br(); __v = readw_cpu(c); __io_ar(__v); __v; })
+#define readl(c) ({ u32 __v; __io_br(); __v = readl_cpu(c); __io_ar(__v); __v; })

#define writeb(v,c) ({ __io_bw(); writeb_cpu((v),(c)); __io_aw(); })
#define writew(v,c) ({ __io_bw(); writew_cpu((v),(c)); __io_aw(); })
#define writel(v,c) ({ __io_bw(); writel_cpu((v),(c)); __io_aw(); })

#ifdef CONFIG_64BIT
-#define readq(c) ({ u64 __v; __io_br(); __v = readq_cpu(c); __io_ar(); __v; })
+#define readq(c) ({ u64 __v; __io_br(); __v = readq_cpu(c); __io_ar(__v); __v; })
#define writeq(v,c) ({ __io_bw(); writeq_cpu((v),(c)); __io_aw(); })
#endif

@@ -198,20 +198,20 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
* writes.
*/
#define __io_pbr() __asm__ __volatile__ ("fence io,i" : : : "memory");
-#define __io_par() __asm__ __volatile__ ("fence i,ior" : : : "memory");
+#define __io_par(v) __asm__ __volatile__ ("fence i,ior" : : : "memory");
#define __io_pbw() __asm__ __volatile__ ("fence iow,o" : : : "memory");
#define __io_paw() __asm__ __volatile__ ("fence o,io" : : : "memory");

-#define inb(c) ({ u8 __v; __io_pbr(); __v = readb_cpu((void*)(PCI_IOBASE + (c))); __io_par(); __v; })
-#define inw(c) ({ u16 __v; __io_pbr(); __v = readw_cpu((void*)(PCI_IOBASE + (c))); __io_par(); __v; })
-#define inl(c) ({ u32 __v; __io_pbr(); __v = readl_cpu((void*)(PCI_IOBASE + (c))); __io_par(); __v; })
+#define inb(c) ({ u8 __v; __io_pbr(); __v = readb_cpu((void*)(PCI_IOBASE + (c))); __io_par(__v); __v; })
+#define inw(c) ({ u16 __v; __io_pbr(); __v = readw_cpu((void*)(PCI_IOBASE + (c))); __io_par(__v); __v; })
+#define inl(c) ({ u32 __v; __io_pbr(); __v = readl_cpu((void*)(PCI_IOBASE + (c))); __io_par(__v); __v; })

#define outb(v,c) ({ __io_pbw(); writeb_cpu((v),(void*)(PCI_IOBASE + (c))); __io_paw(); })
#define outw(v,c) ({ __io_pbw(); writew_cpu((v),(void*)(PCI_IOBASE + (c))); __io_paw(); })
#define outl(v,c) ({ __io_pbw(); writel_cpu((v),(void*)(PCI_IOBASE + (c))); __io_paw(); })

#ifdef CONFIG_64BIT
-#define inq(c) ({ u64 __v; __io_pbr(); __v = readq_cpu((void*)(c)); __io_par(); __v; })
+#define inq(c) ({ u64 __v; __io_pbr(); __v = readq_cpu((void*)(c)); __io_par(__v); __v; })
#define outq(v,c) ({ __io_pbw(); writeq_cpu((v),(void*)(c)); __io_paw(); })
#endif

@@ -254,16 +254,16 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
afence; \
}

-__io_reads_ins(reads, u8, b, __io_br(), __io_ar())
-__io_reads_ins(reads, u16, w, __io_br(), __io_ar())
-__io_reads_ins(reads, u32, l, __io_br(), __io_ar())
+__io_reads_ins(reads, u8, b, __io_br(), __io_ar(addr))
+__io_reads_ins(reads, u16, w, __io_br(), __io_ar(addr))
+__io_reads_ins(reads, u32, l, __io_br(), __io_ar(addr))
#define readsb(addr, buffer, count) __readsb(addr, buffer, count)
#define readsw(addr, buffer, count) __readsw(addr, buffer, count)
#define readsl(addr, buffer, count) __readsl(addr, buffer, count)

-__io_reads_ins(ins, u8, b, __io_pbr(), __io_par())
-__io_reads_ins(ins, u16, w, __io_pbr(), __io_par())
-__io_reads_ins(ins, u32, l, __io_pbr(), __io_par())
+__io_reads_ins(ins, u8, b, __io_pbr(), __io_par(addr))
+__io_reads_ins(ins, u16, w, __io_pbr(), __io_par(addr))
+__io_reads_ins(ins, u32, l, __io_pbr(), __io_par(addr))
#define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count)
#define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count)
#define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count)
@@ -283,10 +283,10 @@ __io_writes_outs(outs, u32, l, __io_pbw(), __io_paw())
#define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count)

#ifdef CONFIG_64BIT
-__io_reads_ins(reads, u64, q, __io_br(), __io_ar())
+__io_reads_ins(reads, u64, q, __io_br(), __io_ar(addr))
#define readsq(addr, buffer, count) __readsq(addr, buffer, count)

-__io_reads_ins(ins, u64, q, __io_pbr(), __io_par())
+__io_reads_ins(ins, u64, q, __io_pbr(), __io_par(addr))
#define insq(addr, buffer, count) __insq((void __iomem *)addr, buffer, count)

__io_writes_outs(writes, u64, q, __io_bw(), __io_aw())
--
2.11.0


2019-02-22 18:07:38

by Will Deacon

[permalink] [raw]
Subject: [PATCH v2 1/3] asm-generic/io: Pass result of I/O accessor to __io_[p]ar()

The inX() and readX() I/O accessors must enforce ordering against
subsequent calls to the delay() routines, so that a read-back from a
device can be used to postpone a subsequent write to the same device.

On some architectures, including arm64, this ordering can only be
achieved by creating a dependency on the value returned by the I/O
accessor operation, so we need to pass the value we read to the
__io_par() and __io_ar() macros in these cases.

Acked-by: Arnd Bergmann <[email protected]>
Reported-by: Andrew Murray <[email protected]>
Signed-off-by: Will Deacon <[email protected]>
---
include/asm-generic/io.h | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index d356f802945a..303871651f8a 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -32,9 +32,9 @@
/* prevent prefetching of coherent DMA data ahead of a dma-complete */
#ifndef __io_ar
#ifdef rmb
-#define __io_ar() rmb()
+#define __io_ar(v) rmb()
#else
-#define __io_ar() barrier()
+#define __io_ar(v) barrier()
#endif
#endif

@@ -65,7 +65,7 @@
#endif

#ifndef __io_par
-#define __io_par() __io_ar()
+#define __io_par(v) __io_ar(v)
#endif


@@ -158,7 +158,7 @@ static inline u8 readb(const volatile void __iomem *addr)

__io_br();
val = __raw_readb(addr);
- __io_ar();
+ __io_ar(val);
return val;
}
#endif
@@ -171,7 +171,7 @@ static inline u16 readw(const volatile void __iomem *addr)

__io_br();
val = __le16_to_cpu(__raw_readw(addr));
- __io_ar();
+ __io_ar(val);
return val;
}
#endif
@@ -184,7 +184,7 @@ static inline u32 readl(const volatile void __iomem *addr)

__io_br();
val = __le32_to_cpu(__raw_readl(addr));
- __io_ar();
+ __io_ar(val);
return val;
}
#endif
@@ -198,7 +198,7 @@ static inline u64 readq(const volatile void __iomem *addr)

__io_br();
val = __le64_to_cpu(__raw_readq(addr));
- __io_ar();
+ __io_ar(val);
return val;
}
#endif
@@ -471,7 +471,7 @@ static inline u8 inb(unsigned long addr)

__io_pbr();
val = __raw_readb(PCI_IOBASE + addr);
- __io_par();
+ __io_par(val);
return val;
}
#endif
@@ -484,7 +484,7 @@ static inline u16 inw(unsigned long addr)

__io_pbr();
val = __le16_to_cpu(__raw_readw(PCI_IOBASE + addr));
- __io_par();
+ __io_par(val);
return val;
}
#endif
@@ -497,7 +497,7 @@ static inline u32 inl(unsigned long addr)

__io_pbr();
val = __le32_to_cpu(__raw_readl(PCI_IOBASE + addr));
- __io_par();
+ __io_par(val);
return val;
}
#endif
--
2.11.0


2019-02-26 18:16:31

by Palmer Dabbelt

[permalink] [raw]
Subject: Re: [PATCH v2 0/3] Ensure inX() is ordered wrt delay() routines

On Fri, 22 Feb 2019 10:04:51 PST (-0800), Will Deacon wrote:
> Hi all,
>
> This is version two of the patches I previously posted here:
>
> https://lore.kernel.org/lkml/[email protected]/T/#u
>
> Changes since v2 include:
>
> * Incorporate riscv changes from Palmer
> * Update macro definitions as suggested by Geert
> * Extend to cover non-port reads via __io_ar() as well
>
> Feedback welcome,
>
> Will
>
> --->8
>
> Will Deacon (3):
> asm-generic/io: Pass result of I/O accessor to __io_[p]ar()
> riscv: io: Update __io_[p]ar() macros to take an argument
> arm64: io: Hook up __io_par() for inX() ordering
>
> arch/arm64/include/asm/io.h | 1 +
> arch/riscv/include/asm/io.h | 36 ++++++++++++++++++------------------
> include/asm-generic/io.h | 20 ++++++++++----------
> 3 files changed, 29 insertions(+), 28 deletions(-)

Reviewed-by: Palmer Dabbelt <[email protected]>

Thanks!

2019-02-28 18:37:15

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH v2 0/3] Ensure inX() is ordered wrt delay() routines

On Fri, Feb 22, 2019 at 06:04:51PM +0000, Will Deacon wrote:
> Will Deacon (3):
> asm-generic/io: Pass result of I/O accessor to __io_[p]ar()
> riscv: io: Update __io_[p]ar() macros to take an argument
> arm64: io: Hook up __io_par() for inX() ordering

Since we have the acks in place, I plan to queue these patches via the
arm64 tree. They don't conflict with -next (as of today) but if anyone
expects other issues, please let me know.

--
Catalin