Getting ready to harden readX()/writeX() and inX()/outX() semantics for the
generic implementation.
Defining two set of macros as __io_br() and __io_ar() to indicate actions
to be taken before and after MMIO read.
Defining two set of macros as __io_bw() and __io_aw() to indicate actions
to be taken before and after MMIO write.
Defining two set of macros as __io_pbw() and __io_paw() to indicate actions
to be taken before and after Port IO write.
Defining two set of macros as __io_pbr() and __io_par() to indicate actions
to be taken before and after Port IO read.
If rmb() is available for the architecture, prefer rmb() as the default
implementation of __io_ar()/__io_par().
If wmb() is available for the architecture, prefer wmb() as the default
implementation of __io_bw()/__io_pbw().
Signed-off-by: Sinan Kaya <[email protected]>
---
include/asm-generic/io.h | 43 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 43 insertions(+)
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index b4531e3..a3d349e 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -25,6 +25,49 @@
#define mmiowb() do {} while (0)
#endif
+#ifndef __io_br
+#define __io_br() barrier()
+#endif
+
+#ifndef __io_ar
+#ifdef rmb
+/* prefer rmb() as the default implementation of __io_ar() if supported */
+#define __io_ar() rmb()
+#else
+#define __io_ar() barrier()
+#endif
+#endif
+
+#ifndef __io_bw
+#ifdef wmb
+/* prefer wmb() as the default implementation of __io_bw() if supported */
+#define __io_bw() wmb()
+#else
+#define __io_bw() barrier()
+#endif
+#endif
+
+#ifndef __io_aw
+#define __io_aw() barrier()
+#endif
+
+#ifndef __io_pbw
+#define __io_pbw() __io_bw()
+#endif
+
+#ifndef __io_paw
+#define __io_paw() __io_aw()
+#endif
+
+#ifndef __io_pbr
+#define __io_pbr() __io_br()
+#endif
+
+#ifndef __io_par
+#define __io_par() __io_ar()
+#endif
+
+
/*
* __raw_{read,write}{b,w,l,q}() access memory in native endianness.
*
--
2.7.4
The default implementation of mapping writeX() to __raw_writeX() is wrong.
writeX() has stronger ordering semantics. Compiler is allowed to reorder
memory writes against __raw_writeX().
Use the previously defined __io_aw() and __io_bw() macros to harden
code generation according to architecture support.
Signed-off-by: Sinan Kaya <[email protected]>
---
include/asm-generic/io.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index fc554af..ca268d9 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -207,7 +207,9 @@ static inline u64 readq(const volatile void __iomem *addr)
#define writeb writeb
static inline void writeb(u8 value, volatile void __iomem *addr)
{
+ __io_bw();
__raw_writeb(value, addr);
+ __io_aw();
}
#endif
@@ -215,7 +217,9 @@ static inline void writeb(u8 value, volatile void __iomem *addr)
#define writew writew
static inline void writew(u16 value, volatile void __iomem *addr)
{
+ __io_bw();
__raw_writew(cpu_to_le16(value), addr);
+ __io_aw();
}
#endif
@@ -223,7 +227,9 @@ static inline void writew(u16 value, volatile void __iomem *addr)
#define writel writel
static inline void writel(u32 value, volatile void __iomem *addr)
{
+ __io_bw();
__raw_writel(__cpu_to_le32(value), addr);
+ __io_aw();
}
#endif
@@ -232,7 +238,9 @@ static inline void writel(u32 value, volatile void __iomem *addr)
#define writeq writeq
static inline void writeq(u64 value, volatile void __iomem *addr)
{
+ __io_bw();
__raw_writeq(__cpu_to_le64(value), addr);
+ __io_aw();
}
#endif
#endif /* CONFIG_64BIT */
--
2.7.4
Open code readX() inside inX() so that inX() variants have their own
overrideable Port IO barrier combinations as __io_pbr() and __io_par() for
actions to be taken before port IO and after port IO read.
Signed-off-by: Sinan Kaya <[email protected]>
---
include/asm-generic/io.h | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 38a96d1..da51092 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -432,7 +432,12 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer,
#define inb inb
static inline u8 inb(unsigned long addr)
{
- return readb(PCI_IOBASE + addr);
+ u8 val;
+
+ __io_pbr();
+ val = __raw_readb(PCI_IOBASE + addr);
+ __io_par();
+ return val;
}
#endif
@@ -440,7 +445,12 @@ static inline u8 inb(unsigned long addr)
#define inw inw
static inline u16 inw(unsigned long addr)
{
- return readw(PCI_IOBASE + addr);
+ u16 val;
+
+ __io_pbr();
+ val = __le16_to_cpu(__raw_readw(PCI_IOBASE + addr));
+ __io_par();
+ return val;
}
#endif
@@ -448,7 +458,12 @@ static inline u16 inw(unsigned long addr)
#define inl inl
static inline u32 inl(unsigned long addr)
{
- return readl(PCI_IOBASE + addr);
+ u32 val;
+
+ __io_pbr();
+ val = __le32_to_cpu(__raw_readl(PCI_IOBASE + addr));
+ __io_par();
+ return val;
}
#endif
--
2.7.4
The default implementation of mapping readX() to __raw_readX() is wrong.
readX() has stronger ordering semantics. Compiler is allowed to reorder
__raw_readX() against the memory accesses following register read.
Use the previously defined __io_ar() and __io_br() macros to harden
code generation according to architecture support.
Signed-off-by: Sinan Kaya <[email protected]>
---
include/asm-generic/io.h | 28 ++++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index a3d349e..fc554af 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -153,7 +153,12 @@ static inline void __raw_writeq(u64 value, volatile void __iomem *addr)
#define readb readb
static inline u8 readb(const volatile void __iomem *addr)
{
- return __raw_readb(addr);
+ u8 val;
+
+ __io_br();
+ val = __raw_readb(addr);
+ __io_ar();
+ return val;
}
#endif
@@ -161,7 +166,12 @@ static inline u8 readb(const volatile void __iomem *addr)
#define readw readw
static inline u16 readw(const volatile void __iomem *addr)
{
- return __le16_to_cpu(__raw_readw(addr));
+ u16 val;
+
+ __io_br();
+ val = __le16_to_cpu(__raw_readw(addr));
+ __io_ar();
+ return val;
}
#endif
@@ -169,7 +179,12 @@ static inline u16 readw(const volatile void __iomem *addr)
#define readl readl
static inline u32 readl(const volatile void __iomem *addr)
{
- return __le32_to_cpu(__raw_readl(addr));
+ u32 val;
+
+ __io_br();
+ val = __le32_to_cpu(__raw_readl(addr));
+ __io_ar();
+ return val;
}
#endif
@@ -178,7 +193,12 @@ static inline u32 readl(const volatile void __iomem *addr)
#define readq readq
static inline u64 readq(const volatile void __iomem *addr)
{
- return __le64_to_cpu(__raw_readq(addr));
+ u64 val;
+
+ __io_br();
+ val = __le64_to_cpu(__raw_readq(addr));
+ __io_ar();
+ return val;
}
#endif
#endif /* CONFIG_64BIT */
--
2.7.4
Open code writeX() inside outX() so that outX() variants have their own
overrideable Port IO barrier combinations as __io_pbw() and __io_paw() for
actions to be taken before port IO and after port IO write.
Signed-off-by: Sinan Kaya <[email protected]>
---
include/asm-generic/io.h | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index ca268d9..38a96d1 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -456,7 +456,9 @@ static inline u32 inl(unsigned long addr)
#define outb outb
static inline void outb(u8 value, unsigned long addr)
{
- writeb(value, PCI_IOBASE + addr);
+ __io_pbw();
+ __raw_writeb(value, PCI_IOBASE + addr);
+ __io_paw();
}
#endif
@@ -464,7 +466,9 @@ static inline void outb(u8 value, unsigned long addr)
#define outw outw
static inline void outw(u16 value, unsigned long addr)
{
- writew(value, PCI_IOBASE + addr);
+ __io_pbw();
+ __raw_writew(cpu_to_le16(value), PCI_IOBASE + addr);
+ __io_paw();
}
#endif
@@ -472,7 +476,9 @@ static inline void outw(u16 value, unsigned long addr)
#define outl outl
static inline void outl(u32 value, unsigned long addr)
{
- writel(value, PCI_IOBASE + addr);
+ __io_pbw();
+ __raw_writel(cpu_to_le32(value), PCI_IOBASE + addr);
+ __io_paw();
}
#endif
--
2.7.4
On Thu, Apr 5, 2018 at 1:58 AM, Sinan Kaya <[email protected]> wrote:
Looks good, but I'd change the comments to ones that document exactly
what those barriers are for:
> +#ifndef __io_ar
> +#ifdef rmb
> +/* prefer rmb() as the default implementation of __io_ar() if supported */
> +#define __io_ar() rmb()
/*
* prevent prefetching of coherent DMA data ahead of a dma-complete */
> +#ifndef __io_bw
> +#ifdef wmb
> +/* prefer wmb() as the default implementation of __io_bw() if supported */
> +#define __io_bw() wmb()
> +#else
/* flush writes to coherent DMA data before possibly triggering a DMA read */
> +#ifndef __io_aw
> +#define __io_aw() barrier()
> +#endif
/* serialize device access against a spin_unlock, usually handled there */
The other four patches look perfect already. What's the timing we need for
these patches? Are they 4.18 material, or do we need them in 4.17 and
stable kernels to work around known bugs?
Arnd
On 2018-04-05 03:00, Arnd Bergmann wrote:
> On Thu, Apr 5, 2018 at 1:58 AM, Sinan Kaya <[email protected]>
> wrote:
>
> Looks good, but I'd change the comments to ones that document exactly
> what those barriers are for:
>
>> +#ifndef __io_ar
>> +#ifdef rmb
>> +/* prefer rmb() as the default implementation of __io_ar() if
>> supported */
>> +#define __io_ar() rmb()
>
> /*
> * prevent prefetching of coherent DMA data ahead of a dma-complete */
>
>> +#ifndef __io_bw
>> +#ifdef wmb
>> +/* prefer wmb() as the default implementation of __io_bw() if
>> supported */
>> +#define __io_bw() wmb()
>> +#else
>
> /* flush writes to coherent DMA data before possibly triggering a DMA
> read */
>
>> +#ifndef __io_aw
>> +#define __io_aw() barrier()
>> +#endif
>
> /* serialize device access against a spin_unlock, usually handled there
> */
>
I will add these and post the next version.
> The other four patches look perfect already. What's the timing we need
> for
> these patches? Are they 4.18 material, or do we need them in 4.17 and
> stable kernels to work around known bugs?
I was hoping to get all arch stuff in for 4.17.
Driver developers started removing redundant wmb().
>
> Arnd
On Thu, Apr 5, 2018 at 1:48 PM, <[email protected]> wrote:
> On 2018-04-05 03:00, Arnd Bergmann wrote:
>> The other four patches look perfect already. What's the timing we need
>> for
>> these patches? Are they 4.18 material, or do we need them in 4.17 and
>> stable kernels to work around known bugs?
>
>
> I was hoping to get all arch stuff in for 4.17.
>
> Driver developers started removing redundant wmb().
Ok, so 4.17 but no stable backports then.
Arnd