2020-01-14 12:25:16

by Jiaxun Yang

[permalink] [raw]
Subject: [PATCH] MIPS: Introduce aligned IO memory operations

Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
instructions like lwl or lwr in IO memory access. However, our current
IO memcpy/memset is wired to the generic implementation, which leads
to a fatal result.

Signed-off-by: Jiaxun Yang <[email protected]>
---
arch/mips/Kconfig | 4 ++
arch/mips/include/asm/io.h | 10 ++++
arch/mips/kernel/Makefile | 2 +-
arch/mips/kernel/io.c | 98 ++++++++++++++++++++++++++++++++++++++
4 files changed, 113 insertions(+), 1 deletion(-)
create mode 100644 arch/mips/kernel/io.c

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8b0cd692a43f..15a331aa23a2 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1450,6 +1450,7 @@ config CPU_LOONGSON64
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_HUGEPAGES
select CPU_SUPPORTS_MSA
+ select CPU_NEEDS_ALIGNED_IO
select CPU_HAS_LOAD_STORE_LR
select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
select CPU_MIPSR2_IRQ_VI
@@ -2598,6 +2599,9 @@ config CPU_HAS_LOAD_STORE_LR
LWL, LWR, SWL, SWR (Load/store word left/right).
LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).

+config CPU_NEEDS_ALIGNED_IO
+ bool
+
#
# Vectored interrupt mode is an R2 feature
#
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 3f6ce74335b4..3b0eb4941f23 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -577,6 +577,15 @@ BUILDSTRING(l, u32)
BUILDSTRING(q, u64)
#endif

+#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
+extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
+extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
+extern void __memset_io(volatile void __iomem *, int, size_t);
+
+#define memset_io(c, v, l) __memset_io((c), (v), (l))
+#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
+#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
+#else
static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
{
memset((void __force *) addr, val, count);
@@ -589,6 +598,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
{
memcpy((void __force *) dst, src, count);
}
+#endif

/*
* The caches on some architectures aren't dma-coherent and have need to
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index d6e97df51cfb..b07b97b9385e 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -8,7 +8,7 @@ extra-y := head.o vmlinux.lds
obj-y += cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
process.o prom.o ptrace.o reset.o setup.o signal.o \
syscall.o time.o topology.o traps.o unaligned.o watch.o \
- vdso.o cacheinfo.o
+ vdso.o cacheinfo.o io.o

ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_ftrace.o = -pg
diff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
new file mode 100644
index 000000000000..ca105aa76d4d
--- /dev/null
+++ b/arch/mips/kernel/io.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/io.h>
+
+#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
+
+#if defined(CONFIG_64BIT)
+#define IO_LONG_READ __raw_readq
+#define IO_LONG_WRITE __raw_writeq
+#define IO_LONG_SIZE 8
+#else
+#define IO_LONG_READ __raw_readl
+#define IO_LONG_WRITE __raw_writel
+#define IO_LONG_SIZE 4
+#endif
+
+void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
+{
+ while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
+ !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
+ *(u8 *)to = __raw_readb(from);
+ from++;
+ to++;
+ count--;
+ }
+
+ while (count >= IO_LONG_SIZE) {
+ *(unsigned long *)to = IO_LONG_READ(from);
+ from += IO_LONG_SIZE;
+ to += IO_LONG_SIZE;
+ count -= IO_LONG_SIZE;
+ }
+
+ while (count) {
+ *(u8 *)to = __raw_readb(from);
+ from++;
+ to++;
+ count--;
+ }
+}
+EXPORT_SYMBOL(__memcpy_fromio);
+
+void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
+{
+ while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
+ !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
+ __raw_writeb(*(u8 *)from, to);
+ from++;
+ to++;
+ count--;
+ }
+
+ while (count >= IO_LONG_SIZE) {
+ IO_LONG_WRITE(*(unsigned long *)from, to);
+ from += IO_LONG_SIZE;
+ to += IO_LONG_SIZE;
+ count -= IO_LONG_SIZE;
+ }
+
+ while (count) {
+ __raw_writeb(*(u8 *)from, to);
+ from++;
+ to++;
+ count--;
+ }
+}
+EXPORT_SYMBOL(__memcpy_toio);
+
+void __memset_io(volatile void __iomem *dst, int c, size_t count)
+{
+ unsigned long lc = (u8)c;
+ int i;
+
+ for (i = 1; i < IO_LONG_SIZE; i++)
+ lc |= (u8)c << (i * BITS_PER_BYTE);
+
+ while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
+ __raw_writeb((u8)c, dst);
+ dst++;
+ count--;
+ }
+
+ while (count >= IO_LONG_SIZE) {
+ IO_LONG_WRITE(lc, dst);
+ dst += IO_LONG_SIZE;
+ count -= IO_LONG_SIZE;
+ }
+
+ while (count) {
+ __raw_writeb(c, dst);
+ dst++;
+ count--;
+ }
+}
+EXPORT_SYMBOL(__memset_io);
+#endif
--
2.24.1


2020-01-18 14:42:36

by Philippe Mathieu-Daudé

[permalink] [raw]
Subject: Re: [PATCH] MIPS: Introduce aligned IO memory operations

Hi Jiaxun,

On Tue, Jan 14, 2020 at 1:24 PM Jiaxun Yang <[email protected]> wrote:
>
> Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
> instructions like lwl or lwr in IO memory access. However, our current
> IO memcpy/memset is wired to the generic implementation, which leads
> to a fatal result.

Do you have a handy reproducer to try with QEMU/KVM?

> Signed-off-by: Jiaxun Yang <[email protected]>
> ---
> arch/mips/Kconfig | 4 ++
> arch/mips/include/asm/io.h | 10 ++++
> arch/mips/kernel/Makefile | 2 +-
> arch/mips/kernel/io.c | 98 ++++++++++++++++++++++++++++++++++++++
> 4 files changed, 113 insertions(+), 1 deletion(-)
> create mode 100644 arch/mips/kernel/io.c
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 8b0cd692a43f..15a331aa23a2 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -1450,6 +1450,7 @@ config CPU_LOONGSON64
> select CPU_SUPPORTS_HIGHMEM
> select CPU_SUPPORTS_HUGEPAGES
> select CPU_SUPPORTS_MSA
> + select CPU_NEEDS_ALIGNED_IO
> select CPU_HAS_LOAD_STORE_LR
> select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
> select CPU_MIPSR2_IRQ_VI
> @@ -2598,6 +2599,9 @@ config CPU_HAS_LOAD_STORE_LR
> LWL, LWR, SWL, SWR (Load/store word left/right).
> LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
>
> +config CPU_NEEDS_ALIGNED_IO
> + bool
> +
> #
> # Vectored interrupt mode is an R2 feature
> #
> diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
> index 3f6ce74335b4..3b0eb4941f23 100644
> --- a/arch/mips/include/asm/io.h
> +++ b/arch/mips/include/asm/io.h
> @@ -577,6 +577,15 @@ BUILDSTRING(l, u32)
> BUILDSTRING(q, u64)
> #endif
>
> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
> +extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
> +extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
> +extern void __memset_io(volatile void __iomem *, int, size_t);
> +
> +#define memset_io(c, v, l) __memset_io((c), (v), (l))
> +#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
> +#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
> +#else
> static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
> {
> memset((void __force *) addr, val, count);
> @@ -589,6 +598,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
> {
> memcpy((void __force *) dst, src, count);
> }
> +#endif
>
> /*
> * The caches on some architectures aren't dma-coherent and have need to
> diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
> index d6e97df51cfb..b07b97b9385e 100644
> --- a/arch/mips/kernel/Makefile
> +++ b/arch/mips/kernel/Makefile
> @@ -8,7 +8,7 @@ extra-y := head.o vmlinux.lds
> obj-y += cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
> process.o prom.o ptrace.o reset.o setup.o signal.o \
> syscall.o time.o topology.o traps.o unaligned.o watch.o \
> - vdso.o cacheinfo.o
> + vdso.o cacheinfo.o io.o
>
> ifdef CONFIG_FUNCTION_TRACER
> CFLAGS_REMOVE_ftrace.o = -pg
> diff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
> new file mode 100644
> index 000000000000..ca105aa76d4d
> --- /dev/null
> +++ b/arch/mips/kernel/io.c
> @@ -0,0 +1,98 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#include <linux/export.h>
> +#include <linux/types.h>
> +#include <linux/io.h>
> +
> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
> +
> +#if defined(CONFIG_64BIT)
> +#define IO_LONG_READ __raw_readq
> +#define IO_LONG_WRITE __raw_writeq
> +#define IO_LONG_SIZE 8
> +#else
> +#define IO_LONG_READ __raw_readl
> +#define IO_LONG_WRITE __raw_writel
> +#define IO_LONG_SIZE 4
> +#endif
> +
> +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
> +{
> + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
> + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
> + *(u8 *)to = __raw_readb(from);
> + from++;
> + to++;
> + count--;
> + }
> +
> + while (count >= IO_LONG_SIZE) {
> + *(unsigned long *)to = IO_LONG_READ(from);
> + from += IO_LONG_SIZE;
> + to += IO_LONG_SIZE;
> + count -= IO_LONG_SIZE;
> + }
> +
> + while (count) {
> + *(u8 *)to = __raw_readb(from);
> + from++;
> + to++;
> + count--;
> + }
> +}
> +EXPORT_SYMBOL(__memcpy_fromio);
> +
> +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
> +{
> + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
> + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
> + __raw_writeb(*(u8 *)from, to);
> + from++;
> + to++;
> + count--;
> + }
> +
> + while (count >= IO_LONG_SIZE) {
> + IO_LONG_WRITE(*(unsigned long *)from, to);
> + from += IO_LONG_SIZE;
> + to += IO_LONG_SIZE;
> + count -= IO_LONG_SIZE;
> + }
> +
> + while (count) {
> + __raw_writeb(*(u8 *)from, to);
> + from++;
> + to++;
> + count--;
> + }
> +}
> +EXPORT_SYMBOL(__memcpy_toio);
> +
> +void __memset_io(volatile void __iomem *dst, int c, size_t count)
> +{
> + unsigned long lc = (u8)c;
> + int i;
> +
> + for (i = 1; i < IO_LONG_SIZE; i++)
> + lc |= (u8)c << (i * BITS_PER_BYTE);
> +
> + while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
> + __raw_writeb((u8)c, dst);
> + dst++;
> + count--;
> + }
> +
> + while (count >= IO_LONG_SIZE) {
> + IO_LONG_WRITE(lc, dst);
> + dst += IO_LONG_SIZE;
> + count -= IO_LONG_SIZE;
> + }
> +
> + while (count) {
> + __raw_writeb(c, dst);
> + dst++;
> + count--;
> + }
> +}
> +EXPORT_SYMBOL(__memset_io);
> +#endif
> --
> 2.24.1
>

2020-01-18 15:14:56

by Jiaxun Yang

[permalink] [raw]
Subject: Re: [PATCH] MIPS: Introduce aligned IO memory operations



18.01.2020, 22:41, "Philippe Mathieu-Daudé" <[email protected]>:
> Hi Jiaxun,
>
> On Tue, Jan 14, 2020 at 1:24 PM Jiaxun Yang <[email protected]> wrote:
>>  Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
>>  instructions like lwl or lwr in IO memory access. However, our current
>>  IO memcpy/memset is wired to the generic implementation, which leads
>>  to a fatal result.
>
> Do you have a handy reproducer to try with QEMU/KVM?

It was triggered by QXL DRM driver when I was working on KVM for Loongson
with Huacai.

See arch/mips/kvm/emulate.c, we didn't have unaligned instructions trap
emulation for MMIO. You can construct a simple unaligned memcpy_fromio
case to reproduce it.

Thanks.

>
>>  Signed-off-by: Jiaxun Yang <[email protected]>
>>  ---
>>   arch/mips/Kconfig | 4 ++
>>   arch/mips/include/asm/io.h | 10 ++++
>>   arch/mips/kernel/Makefile | 2 +-
>>   arch/mips/kernel/io.c | 98 ++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 113 insertions(+), 1 deletion(-)
>>   create mode 100644 arch/mips/kernel/io.c
>>
>>  diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
>>  index 8b0cd692a43f..15a331aa23a2 100644
>>  --- a/arch/mips/Kconfig
>>  +++ b/arch/mips/Kconfig
>>  @@ -1450,6 +1450,7 @@ config CPU_LOONGSON64
>>          select CPU_SUPPORTS_HIGHMEM
>>          select CPU_SUPPORTS_HUGEPAGES
>>          select CPU_SUPPORTS_MSA
>>  + select CPU_NEEDS_ALIGNED_IO
>>          select CPU_HAS_LOAD_STORE_LR
>>          select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
>>          select CPU_MIPSR2_IRQ_VI
>>  @@ -2598,6 +2599,9 @@ config CPU_HAS_LOAD_STORE_LR
>>            LWL, LWR, SWL, SWR (Load/store word left/right).
>>            LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
>>
>>  +config CPU_NEEDS_ALIGNED_IO
>>  + bool
>>  +
>>   #
>>   # Vectored interrupt mode is an R2 feature
>>   #
>>  diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
>>  index 3f6ce74335b4..3b0eb4941f23 100644
>>  --- a/arch/mips/include/asm/io.h
>>  +++ b/arch/mips/include/asm/io.h
>>  @@ -577,6 +577,15 @@ BUILDSTRING(l, u32)
>>   BUILDSTRING(q, u64)
>>   #endif
>>
>>  +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
>>  +extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
>>  +extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
>>  +extern void __memset_io(volatile void __iomem *, int, size_t);
>>  +
>>  +#define memset_io(c, v, l) __memset_io((c), (v), (l))
>>  +#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
>>  +#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
>>  +#else
>>   static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
>>   {
>>          memset((void __force *) addr, val, count);
>>  @@ -589,6 +598,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
>>   {
>>          memcpy((void __force *) dst, src, count);
>>   }
>>  +#endif
>>
>>   /*
>>    * The caches on some architectures aren't dma-coherent and have need to
>>  diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
>>  index d6e97df51cfb..b07b97b9385e 100644
>>  --- a/arch/mips/kernel/Makefile
>>  +++ b/arch/mips/kernel/Makefile
>>  @@ -8,7 +8,7 @@ extra-y := head.o vmlinux.lds
>>   obj-y += cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
>>                     process.o prom.o ptrace.o reset.o setup.o signal.o \
>>                     syscall.o time.o topology.o traps.o unaligned.o watch.o \
>>  - vdso.o cacheinfo.o
>>  + vdso.o cacheinfo.o io.o
>>
>>   ifdef CONFIG_FUNCTION_TRACER
>>   CFLAGS_REMOVE_ftrace.o = -pg
>>  diff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
>>  new file mode 100644
>>  index 000000000000..ca105aa76d4d
>>  --- /dev/null
>>  +++ b/arch/mips/kernel/io.c
>>  @@ -0,0 +1,98 @@
>>  +// SPDX-License-Identifier: GPL-2.0-or-later
>>  +
>>  +#include <linux/export.h>
>>  +#include <linux/types.h>
>>  +#include <linux/io.h>
>>  +
>>  +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
>>  +
>>  +#if defined(CONFIG_64BIT)
>>  +#define IO_LONG_READ __raw_readq
>>  +#define IO_LONG_WRITE __raw_writeq
>>  +#define IO_LONG_SIZE 8
>>  +#else
>>  +#define IO_LONG_READ __raw_readl
>>  +#define IO_LONG_WRITE __raw_writel
>>  +#define IO_LONG_SIZE 4
>>  +#endif
>>  +
>>  +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
>>  +{
>>  + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
>>  + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
>>  + *(u8 *)to = __raw_readb(from);
>>  + from++;
>>  + to++;
>>  + count--;
>>  + }
>>  +
>>  + while (count >= IO_LONG_SIZE) {
>>  + *(unsigned long *)to = IO_LONG_READ(from);
>>  + from += IO_LONG_SIZE;
>>  + to += IO_LONG_SIZE;
>>  + count -= IO_LONG_SIZE;
>>  + }
>>  +
>>  + while (count) {
>>  + *(u8 *)to = __raw_readb(from);
>>  + from++;
>>  + to++;
>>  + count--;
>>  + }
>>  +}
>>  +EXPORT_SYMBOL(__memcpy_fromio);
>>  +
>>  +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
>>  +{
>>  + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
>>  + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
>>  + __raw_writeb(*(u8 *)from, to);
>>  + from++;
>>  + to++;
>>  + count--;
>>  + }
>>  +
>>  + while (count >= IO_LONG_SIZE) {
>>  + IO_LONG_WRITE(*(unsigned long *)from, to);
>>  + from += IO_LONG_SIZE;
>>  + to += IO_LONG_SIZE;
>>  + count -= IO_LONG_SIZE;
>>  + }
>>  +
>>  + while (count) {
>>  + __raw_writeb(*(u8 *)from, to);
>>  + from++;
>>  + to++;
>>  + count--;
>>  + }
>>  +}
>>  +EXPORT_SYMBOL(__memcpy_toio);
>>  +
>>  +void __memset_io(volatile void __iomem *dst, int c, size_t count)
>>  +{
>>  + unsigned long lc = (u8)c;
>>  + int i;
>>  +
>>  + for (i = 1; i < IO_LONG_SIZE; i++)
>>  + lc |= (u8)c << (i * BITS_PER_BYTE);
>>  +
>>  + while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
>>  + __raw_writeb((u8)c, dst);
>>  + dst++;
>>  + count--;
>>  + }
>>  +
>>  + while (count >= IO_LONG_SIZE) {
>>  + IO_LONG_WRITE(lc, dst);
>>  + dst += IO_LONG_SIZE;
>>  + count -= IO_LONG_SIZE;
>>  + }
>>  +
>>  + while (count) {
>>  + __raw_writeb(c, dst);
>>  + dst++;
>>  + count--;
>>  + }
>>  +}
>>  +EXPORT_SYMBOL(__memset_io);
>>  +#endif
>>  --
>>  2.24.1

2020-01-22 18:46:21

by Paul Burton

[permalink] [raw]
Subject: Re: [PATCH] MIPS: Introduce aligned IO memory operations

Hi Jiaxun,

On Tue, Jan 14, 2020 at 08:23:43PM +0800, Jiaxun Yang wrote:
> Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
> instructions like lwl or lwr in IO memory access. However, our current
> IO memcpy/memset is wired to the generic implementation, which leads
> to a fatal result.

Hmm, I wonder if we should just do this unconditionally on all systems.
I can't think of a reason it'd ever be a good idea to use lwl/lwr on an
MMIO device. Any thoughts on that?

Thanks,
Paul

> Signed-off-by: Jiaxun Yang <[email protected]>
> ---
> arch/mips/Kconfig | 4 ++
> arch/mips/include/asm/io.h | 10 ++++
> arch/mips/kernel/Makefile | 2 +-
> arch/mips/kernel/io.c | 98 ++++++++++++++++++++++++++++++++++++++
> 4 files changed, 113 insertions(+), 1 deletion(-)
> create mode 100644 arch/mips/kernel/io.c
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 8b0cd692a43f..15a331aa23a2 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -1450,6 +1450,7 @@ config CPU_LOONGSON64
> select CPU_SUPPORTS_HIGHMEM
> select CPU_SUPPORTS_HUGEPAGES
> select CPU_SUPPORTS_MSA
> + select CPU_NEEDS_ALIGNED_IO
> select CPU_HAS_LOAD_STORE_LR
> select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
> select CPU_MIPSR2_IRQ_VI
> @@ -2598,6 +2599,9 @@ config CPU_HAS_LOAD_STORE_LR
> LWL, LWR, SWL, SWR (Load/store word left/right).
> LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
>
> +config CPU_NEEDS_ALIGNED_IO
> + bool
> +
> #
> # Vectored interrupt mode is an R2 feature
> #
> diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
> index 3f6ce74335b4..3b0eb4941f23 100644
> --- a/arch/mips/include/asm/io.h
> +++ b/arch/mips/include/asm/io.h
> @@ -577,6 +577,15 @@ BUILDSTRING(l, u32)
> BUILDSTRING(q, u64)
> #endif
>
> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
> +extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
> +extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
> +extern void __memset_io(volatile void __iomem *, int, size_t);
> +
> +#define memset_io(c, v, l) __memset_io((c), (v), (l))
> +#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
> +#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
> +#else
> static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
> {
> memset((void __force *) addr, val, count);
> @@ -589,6 +598,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
> {
> memcpy((void __force *) dst, src, count);
> }
> +#endif
>
> /*
> * The caches on some architectures aren't dma-coherent and have need to
> diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
> index d6e97df51cfb..b07b97b9385e 100644
> --- a/arch/mips/kernel/Makefile
> +++ b/arch/mips/kernel/Makefile
> @@ -8,7 +8,7 @@ extra-y := head.o vmlinux.lds
> obj-y += cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
> process.o prom.o ptrace.o reset.o setup.o signal.o \
> syscall.o time.o topology.o traps.o unaligned.o watch.o \
> - vdso.o cacheinfo.o
> + vdso.o cacheinfo.o io.o
>
> ifdef CONFIG_FUNCTION_TRACER
> CFLAGS_REMOVE_ftrace.o = -pg
> diff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
> new file mode 100644
> index 000000000000..ca105aa76d4d
> --- /dev/null
> +++ b/arch/mips/kernel/io.c
> @@ -0,0 +1,98 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#include <linux/export.h>
> +#include <linux/types.h>
> +#include <linux/io.h>
> +
> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
> +
> +#if defined(CONFIG_64BIT)
> +#define IO_LONG_READ __raw_readq
> +#define IO_LONG_WRITE __raw_writeq
> +#define IO_LONG_SIZE 8
> +#else
> +#define IO_LONG_READ __raw_readl
> +#define IO_LONG_WRITE __raw_writel
> +#define IO_LONG_SIZE 4
> +#endif
> +
> +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
> +{
> + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
> + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
> + *(u8 *)to = __raw_readb(from);
> + from++;
> + to++;
> + count--;
> + }
> +
> + while (count >= IO_LONG_SIZE) {
> + *(unsigned long *)to = IO_LONG_READ(from);
> + from += IO_LONG_SIZE;
> + to += IO_LONG_SIZE;
> + count -= IO_LONG_SIZE;
> + }
> +
> + while (count) {
> + *(u8 *)to = __raw_readb(from);
> + from++;
> + to++;
> + count--;
> + }
> +}
> +EXPORT_SYMBOL(__memcpy_fromio);
> +
> +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
> +{
> + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
> + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
> + __raw_writeb(*(u8 *)from, to);
> + from++;
> + to++;
> + count--;
> + }
> +
> + while (count >= IO_LONG_SIZE) {
> + IO_LONG_WRITE(*(unsigned long *)from, to);
> + from += IO_LONG_SIZE;
> + to += IO_LONG_SIZE;
> + count -= IO_LONG_SIZE;
> + }
> +
> + while (count) {
> + __raw_writeb(*(u8 *)from, to);
> + from++;
> + to++;
> + count--;
> + }
> +}
> +EXPORT_SYMBOL(__memcpy_toio);
> +
> +void __memset_io(volatile void __iomem *dst, int c, size_t count)
> +{
> + unsigned long lc = (u8)c;
> + int i;
> +
> + for (i = 1; i < IO_LONG_SIZE; i++)
> + lc |= (u8)c << (i * BITS_PER_BYTE);
> +
> + while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
> + __raw_writeb((u8)c, dst);
> + dst++;
> + count--;
> + }
> +
> + while (count >= IO_LONG_SIZE) {
> + IO_LONG_WRITE(lc, dst);
> + dst += IO_LONG_SIZE;
> + count -= IO_LONG_SIZE;
> + }
> +
> + while (count) {
> + __raw_writeb(c, dst);
> + dst++;
> + count--;
> + }
> +}
> +EXPORT_SYMBOL(__memset_io);
> +#endif
> --
> 2.24.1
>

2020-01-24 14:41:14

by Thomas Bogendoerfer

[permalink] [raw]
Subject: Re: [PATCH] MIPS: Introduce aligned IO memory operations

On Wed, Jan 22, 2020 at 10:45:06AM -0800, Paul Burton wrote:
> Hi Jiaxun,
>
> On Tue, Jan 14, 2020 at 08:23:43PM +0800, Jiaxun Yang wrote:
> > Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
> > instructions like lwl or lwr in IO memory access. However, our current
> > IO memcpy/memset is wired to the generic implementation, which leads
> > to a fatal result.
>
> Hmm, I wonder if we should just do this unconditionally on all systems.
> I can't think of a reason it'd ever be a good idea to use lwl/lwr on an
> MMIO device. Any thoughts on that?

depends on the type of device. I can see benefits for framebuffers
and memory devices since memset/memcpy are more optimised than the
function in this patch.

Thomas.

--
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea. [ RFC1925, 2.3 ]

2020-01-25 03:33:56

by Jiaxun Yang

[permalink] [raw]
Subject: Re: [PATCH] MIPS: Introduce aligned IO memory operations



于 2020年1月24日 GMT+08:00 下午10:07:51, Thomas Bogendoerfer <[email protected]> 写到:
>On Wed, Jan 22, 2020 at 10:45:06AM -0800, Paul Burton wrote:
>> Hi Jiaxun,
>>
>> On Tue, Jan 14, 2020 at 08:23:43PM +0800, Jiaxun Yang wrote:
>> > Some platforms, such as Loongson64 or QEMU/KVM, don't support
>unaligned
>> > instructions like lwl or lwr in IO memory access. However, our
>current
>> > IO memcpy/memset is wired to the generic implementation, which
>leads
>> > to a fatal result.
>>
>> Hmm, I wonder if we should just do this unconditionally on all
>systems.
>> I can't think of a reason it'd ever be a good idea to use lwl/lwr on
>an
>> MMIO device. Any thoughts on that?
>
>depends on the type of device. I can see benefits for framebuffers
>and memory devices since memset/memcpy are more optimised than the
>function in this patch.

lwl/lwr is slower than this implementation on your system?
I thought that other platforms support unaligned request can be benefited from speed up of these instructions.

>
>Thomas.

--
Jiaxun Yang