Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
instructions like lwl or lwr in IO memory access. However, our current
IO memcpy/memset is wired to the generic implementation, which leads
to a fatal result.
Signed-off-by: Jiaxun Yang <[email protected]>
---
arch/mips/Kconfig | 4 ++
arch/mips/include/asm/io.h | 10 ++++
arch/mips/kernel/Makefile | 2 +-
arch/mips/kernel/io.c | 98 ++++++++++++++++++++++++++++++++++++++
4 files changed, 113 insertions(+), 1 deletion(-)
create mode 100644 arch/mips/kernel/io.c
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8b0cd692a43f..15a331aa23a2 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1450,6 +1450,7 @@ config CPU_LOONGSON64
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_HUGEPAGES
select CPU_SUPPORTS_MSA
+ select CPU_NEEDS_ALIGNED_IO
select CPU_HAS_LOAD_STORE_LR
select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
select CPU_MIPSR2_IRQ_VI
@@ -2598,6 +2599,9 @@ config CPU_HAS_LOAD_STORE_LR
LWL, LWR, SWL, SWR (Load/store word left/right).
LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
+config CPU_NEEDS_ALIGNED_IO
+ bool
+
#
# Vectored interrupt mode is an R2 feature
#
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 3f6ce74335b4..3b0eb4941f23 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -577,6 +577,15 @@ BUILDSTRING(l, u32)
BUILDSTRING(q, u64)
#endif
+#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
+extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
+extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
+extern void __memset_io(volatile void __iomem *, int, size_t);
+
+#define memset_io(c, v, l) __memset_io((c), (v), (l))
+#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
+#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
+#else
static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
{
memset((void __force *) addr, val, count);
@@ -589,6 +598,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
{
memcpy((void __force *) dst, src, count);
}
+#endif
/*
* The caches on some architectures aren't dma-coherent and have need to
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index d6e97df51cfb..b07b97b9385e 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -8,7 +8,7 @@ extra-y := head.o vmlinux.lds
obj-y += cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
process.o prom.o ptrace.o reset.o setup.o signal.o \
syscall.o time.o topology.o traps.o unaligned.o watch.o \
- vdso.o cacheinfo.o
+ vdso.o cacheinfo.o io.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_ftrace.o = -pg
diff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
new file mode 100644
index 000000000000..ca105aa76d4d
--- /dev/null
+++ b/arch/mips/kernel/io.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/io.h>
+
+#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
+
+#if defined(CONFIG_64BIT)
+#define IO_LONG_READ __raw_readq
+#define IO_LONG_WRITE __raw_writeq
+#define IO_LONG_SIZE 8
+#else
+#define IO_LONG_READ __raw_readl
+#define IO_LONG_WRITE __raw_writel
+#define IO_LONG_SIZE 4
+#endif
+
+void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
+{
+ while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
+ !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
+ *(u8 *)to = __raw_readb(from);
+ from++;
+ to++;
+ count--;
+ }
+
+ while (count >= IO_LONG_SIZE) {
+ *(unsigned long *)to = IO_LONG_READ(from);
+ from += IO_LONG_SIZE;
+ to += IO_LONG_SIZE;
+ count -= IO_LONG_SIZE;
+ }
+
+ while (count) {
+ *(u8 *)to = __raw_readb(from);
+ from++;
+ to++;
+ count--;
+ }
+}
+EXPORT_SYMBOL(__memcpy_fromio);
+
+void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
+{
+ while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
+ !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
+ __raw_writeb(*(u8 *)from, to);
+ from++;
+ to++;
+ count--;
+ }
+
+ while (count >= IO_LONG_SIZE) {
+ IO_LONG_WRITE(*(unsigned long *)from, to);
+ from += IO_LONG_SIZE;
+ to += IO_LONG_SIZE;
+ count -= IO_LONG_SIZE;
+ }
+
+ while (count) {
+ __raw_writeb(*(u8 *)from, to);
+ from++;
+ to++;
+ count--;
+ }
+}
+EXPORT_SYMBOL(__memcpy_toio);
+
+void __memset_io(volatile void __iomem *dst, int c, size_t count)
+{
+ unsigned long lc = (u8)c;
+ int i;
+
+ for (i = 1; i < IO_LONG_SIZE; i++)
+ lc |= (u8)c << (i * BITS_PER_BYTE);
+
+ while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
+ __raw_writeb((u8)c, dst);
+ dst++;
+ count--;
+ }
+
+ while (count >= IO_LONG_SIZE) {
+ IO_LONG_WRITE(lc, dst);
+ dst += IO_LONG_SIZE;
+ count -= IO_LONG_SIZE;
+ }
+
+ while (count) {
+ __raw_writeb(c, dst);
+ dst++;
+ count--;
+ }
+}
+EXPORT_SYMBOL(__memset_io);
+#endif
--
2.24.1
Hi Jiaxun,
On Tue, Jan 14, 2020 at 1:24 PM Jiaxun Yang <[email protected]> wrote:
>
> Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
> instructions like lwl or lwr in IO memory access. However, our current
> IO memcpy/memset is wired to the generic implementation, which leads
> to a fatal result.
Do you have a handy reproducer to try with QEMU/KVM?
> Signed-off-by: Jiaxun Yang <[email protected]>
> ---
> arch/mips/Kconfig | 4 ++
> arch/mips/include/asm/io.h | 10 ++++
> arch/mips/kernel/Makefile | 2 +-
> arch/mips/kernel/io.c | 98 ++++++++++++++++++++++++++++++++++++++
> 4 files changed, 113 insertions(+), 1 deletion(-)
> create mode 100644 arch/mips/kernel/io.c
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 8b0cd692a43f..15a331aa23a2 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -1450,6 +1450,7 @@ config CPU_LOONGSON64
> select CPU_SUPPORTS_HIGHMEM
> select CPU_SUPPORTS_HUGEPAGES
> select CPU_SUPPORTS_MSA
> + select CPU_NEEDS_ALIGNED_IO
> select CPU_HAS_LOAD_STORE_LR
> select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
> select CPU_MIPSR2_IRQ_VI
> @@ -2598,6 +2599,9 @@ config CPU_HAS_LOAD_STORE_LR
> LWL, LWR, SWL, SWR (Load/store word left/right).
> LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
>
> +config CPU_NEEDS_ALIGNED_IO
> + bool
> +
> #
> # Vectored interrupt mode is an R2 feature
> #
> diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
> index 3f6ce74335b4..3b0eb4941f23 100644
> --- a/arch/mips/include/asm/io.h
> +++ b/arch/mips/include/asm/io.h
> @@ -577,6 +577,15 @@ BUILDSTRING(l, u32)
> BUILDSTRING(q, u64)
> #endif
>
> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
> +extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
> +extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
> +extern void __memset_io(volatile void __iomem *, int, size_t);
> +
> +#define memset_io(c, v, l) __memset_io((c), (v), (l))
> +#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
> +#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
> +#else
> static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
> {
> memset((void __force *) addr, val, count);
> @@ -589,6 +598,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
> {
> memcpy((void __force *) dst, src, count);
> }
> +#endif
>
> /*
> * The caches on some architectures aren't dma-coherent and have need to
> diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
> index d6e97df51cfb..b07b97b9385e 100644
> --- a/arch/mips/kernel/Makefile
> +++ b/arch/mips/kernel/Makefile
> @@ -8,7 +8,7 @@ extra-y := head.o vmlinux.lds
> obj-y += cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
> process.o prom.o ptrace.o reset.o setup.o signal.o \
> syscall.o time.o topology.o traps.o unaligned.o watch.o \
> - vdso.o cacheinfo.o
> + vdso.o cacheinfo.o io.o
>
> ifdef CONFIG_FUNCTION_TRACER
> CFLAGS_REMOVE_ftrace.o = -pg
> diff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
> new file mode 100644
> index 000000000000..ca105aa76d4d
> --- /dev/null
> +++ b/arch/mips/kernel/io.c
> @@ -0,0 +1,98 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#include <linux/export.h>
> +#include <linux/types.h>
> +#include <linux/io.h>
> +
> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
> +
> +#if defined(CONFIG_64BIT)
> +#define IO_LONG_READ __raw_readq
> +#define IO_LONG_WRITE __raw_writeq
> +#define IO_LONG_SIZE 8
> +#else
> +#define IO_LONG_READ __raw_readl
> +#define IO_LONG_WRITE __raw_writel
> +#define IO_LONG_SIZE 4
> +#endif
> +
> +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
> +{
> + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
> + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
> + *(u8 *)to = __raw_readb(from);
> + from++;
> + to++;
> + count--;
> + }
> +
> + while (count >= IO_LONG_SIZE) {
> + *(unsigned long *)to = IO_LONG_READ(from);
> + from += IO_LONG_SIZE;
> + to += IO_LONG_SIZE;
> + count -= IO_LONG_SIZE;
> + }
> +
> + while (count) {
> + *(u8 *)to = __raw_readb(from);
> + from++;
> + to++;
> + count--;
> + }
> +}
> +EXPORT_SYMBOL(__memcpy_fromio);
> +
> +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
> +{
> + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
> + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
> + __raw_writeb(*(u8 *)from, to);
> + from++;
> + to++;
> + count--;
> + }
> +
> + while (count >= IO_LONG_SIZE) {
> + IO_LONG_WRITE(*(unsigned long *)from, to);
> + from += IO_LONG_SIZE;
> + to += IO_LONG_SIZE;
> + count -= IO_LONG_SIZE;
> + }
> +
> + while (count) {
> + __raw_writeb(*(u8 *)from, to);
> + from++;
> + to++;
> + count--;
> + }
> +}
> +EXPORT_SYMBOL(__memcpy_toio);
> +
> +void __memset_io(volatile void __iomem *dst, int c, size_t count)
> +{
> + unsigned long lc = (u8)c;
> + int i;
> +
> + for (i = 1; i < IO_LONG_SIZE; i++)
> + lc |= (u8)c << (i * BITS_PER_BYTE);
> +
> + while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
> + __raw_writeb((u8)c, dst);
> + dst++;
> + count--;
> + }
> +
> + while (count >= IO_LONG_SIZE) {
> + IO_LONG_WRITE(lc, dst);
> + dst += IO_LONG_SIZE;
> + count -= IO_LONG_SIZE;
> + }
> +
> + while (count) {
> + __raw_writeb(c, dst);
> + dst++;
> + count--;
> + }
> +}
> +EXPORT_SYMBOL(__memset_io);
> +#endif
> --
> 2.24.1
>
18.01.2020, 22:41, "Philippe Mathieu-Daudé" <[email protected]>:
> Hi Jiaxun,
>
> On Tue, Jan 14, 2020 at 1:24 PM Jiaxun Yang <[email protected]> wrote:
>> Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
>> instructions like lwl or lwr in IO memory access. However, our current
>> IO memcpy/memset is wired to the generic implementation, which leads
>> to a fatal result.
>
> Do you have a handy reproducer to try with QEMU/KVM?
It was triggered by QXL DRM driver when I was working on KVM for Loongson
with Huacai.
See arch/mips/kvm/emulate.c, we didn't have unaligned instructions trap
emulation for MMIO. You can construct a simple unaligned memcpy_fromio
case to reproduce it.
Thanks.
>
>> Signed-off-by: Jiaxun Yang <[email protected]>
>> ---
>> arch/mips/Kconfig | 4 ++
>> arch/mips/include/asm/io.h | 10 ++++
>> arch/mips/kernel/Makefile | 2 +-
>> arch/mips/kernel/io.c | 98 ++++++++++++++++++++++++++++++++++++++
>> 4 files changed, 113 insertions(+), 1 deletion(-)
>> create mode 100644 arch/mips/kernel/io.c
>>
>> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
>> index 8b0cd692a43f..15a331aa23a2 100644
>> --- a/arch/mips/Kconfig
>> +++ b/arch/mips/Kconfig
>> @@ -1450,6 +1450,7 @@ config CPU_LOONGSON64
>> select CPU_SUPPORTS_HIGHMEM
>> select CPU_SUPPORTS_HUGEPAGES
>> select CPU_SUPPORTS_MSA
>> + select CPU_NEEDS_ALIGNED_IO
>> select CPU_HAS_LOAD_STORE_LR
>> select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
>> select CPU_MIPSR2_IRQ_VI
>> @@ -2598,6 +2599,9 @@ config CPU_HAS_LOAD_STORE_LR
>> LWL, LWR, SWL, SWR (Load/store word left/right).
>> LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
>>
>> +config CPU_NEEDS_ALIGNED_IO
>> + bool
>> +
>> #
>> # Vectored interrupt mode is an R2 feature
>> #
>> diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
>> index 3f6ce74335b4..3b0eb4941f23 100644
>> --- a/arch/mips/include/asm/io.h
>> +++ b/arch/mips/include/asm/io.h
>> @@ -577,6 +577,15 @@ BUILDSTRING(l, u32)
>> BUILDSTRING(q, u64)
>> #endif
>>
>> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
>> +extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
>> +extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
>> +extern void __memset_io(volatile void __iomem *, int, size_t);
>> +
>> +#define memset_io(c, v, l) __memset_io((c), (v), (l))
>> +#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
>> +#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
>> +#else
>> static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
>> {
>> memset((void __force *) addr, val, count);
>> @@ -589,6 +598,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
>> {
>> memcpy((void __force *) dst, src, count);
>> }
>> +#endif
>>
>> /*
>> * The caches on some architectures aren't dma-coherent and have need to
>> diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
>> index d6e97df51cfb..b07b97b9385e 100644
>> --- a/arch/mips/kernel/Makefile
>> +++ b/arch/mips/kernel/Makefile
>> @@ -8,7 +8,7 @@ extra-y := head.o vmlinux.lds
>> obj-y += cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
>> process.o prom.o ptrace.o reset.o setup.o signal.o \
>> syscall.o time.o topology.o traps.o unaligned.o watch.o \
>> - vdso.o cacheinfo.o
>> + vdso.o cacheinfo.o io.o
>>
>> ifdef CONFIG_FUNCTION_TRACER
>> CFLAGS_REMOVE_ftrace.o = -pg
>> diff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
>> new file mode 100644
>> index 000000000000..ca105aa76d4d
>> --- /dev/null
>> +++ b/arch/mips/kernel/io.c
>> @@ -0,0 +1,98 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> +
>> +#include <linux/export.h>
>> +#include <linux/types.h>
>> +#include <linux/io.h>
>> +
>> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
>> +
>> +#if defined(CONFIG_64BIT)
>> +#define IO_LONG_READ __raw_readq
>> +#define IO_LONG_WRITE __raw_writeq
>> +#define IO_LONG_SIZE 8
>> +#else
>> +#define IO_LONG_READ __raw_readl
>> +#define IO_LONG_WRITE __raw_writel
>> +#define IO_LONG_SIZE 4
>> +#endif
>> +
>> +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
>> +{
>> + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
>> + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
>> + *(u8 *)to = __raw_readb(from);
>> + from++;
>> + to++;
>> + count--;
>> + }
>> +
>> + while (count >= IO_LONG_SIZE) {
>> + *(unsigned long *)to = IO_LONG_READ(from);
>> + from += IO_LONG_SIZE;
>> + to += IO_LONG_SIZE;
>> + count -= IO_LONG_SIZE;
>> + }
>> +
>> + while (count) {
>> + *(u8 *)to = __raw_readb(from);
>> + from++;
>> + to++;
>> + count--;
>> + }
>> +}
>> +EXPORT_SYMBOL(__memcpy_fromio);
>> +
>> +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
>> +{
>> + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
>> + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
>> + __raw_writeb(*(u8 *)from, to);
>> + from++;
>> + to++;
>> + count--;
>> + }
>> +
>> + while (count >= IO_LONG_SIZE) {
>> + IO_LONG_WRITE(*(unsigned long *)from, to);
>> + from += IO_LONG_SIZE;
>> + to += IO_LONG_SIZE;
>> + count -= IO_LONG_SIZE;
>> + }
>> +
>> + while (count) {
>> + __raw_writeb(*(u8 *)from, to);
>> + from++;
>> + to++;
>> + count--;
>> + }
>> +}
>> +EXPORT_SYMBOL(__memcpy_toio);
>> +
>> +void __memset_io(volatile void __iomem *dst, int c, size_t count)
>> +{
>> + unsigned long lc = (u8)c;
>> + int i;
>> +
>> + for (i = 1; i < IO_LONG_SIZE; i++)
>> + lc |= (u8)c << (i * BITS_PER_BYTE);
>> +
>> + while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
>> + __raw_writeb((u8)c, dst);
>> + dst++;
>> + count--;
>> + }
>> +
>> + while (count >= IO_LONG_SIZE) {
>> + IO_LONG_WRITE(lc, dst);
>> + dst += IO_LONG_SIZE;
>> + count -= IO_LONG_SIZE;
>> + }
>> +
>> + while (count) {
>> + __raw_writeb(c, dst);
>> + dst++;
>> + count--;
>> + }
>> +}
>> +EXPORT_SYMBOL(__memset_io);
>> +#endif
>> --
>> 2.24.1
Hi Jiaxun,
On Tue, Jan 14, 2020 at 08:23:43PM +0800, Jiaxun Yang wrote:
> Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
> instructions like lwl or lwr in IO memory access. However, our current
> IO memcpy/memset is wired to the generic implementation, which leads
> to a fatal result.
Hmm, I wonder if we should just do this unconditionally on all systems.
I can't think of a reason it'd ever be a good idea to use lwl/lwr on an
MMIO device. Any thoughts on that?
Thanks,
Paul
> Signed-off-by: Jiaxun Yang <[email protected]>
> ---
> arch/mips/Kconfig | 4 ++
> arch/mips/include/asm/io.h | 10 ++++
> arch/mips/kernel/Makefile | 2 +-
> arch/mips/kernel/io.c | 98 ++++++++++++++++++++++++++++++++++++++
> 4 files changed, 113 insertions(+), 1 deletion(-)
> create mode 100644 arch/mips/kernel/io.c
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 8b0cd692a43f..15a331aa23a2 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -1450,6 +1450,7 @@ config CPU_LOONGSON64
> select CPU_SUPPORTS_HIGHMEM
> select CPU_SUPPORTS_HUGEPAGES
> select CPU_SUPPORTS_MSA
> + select CPU_NEEDS_ALIGNED_IO
> select CPU_HAS_LOAD_STORE_LR
> select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
> select CPU_MIPSR2_IRQ_VI
> @@ -2598,6 +2599,9 @@ config CPU_HAS_LOAD_STORE_LR
> LWL, LWR, SWL, SWR (Load/store word left/right).
> LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
>
> +config CPU_NEEDS_ALIGNED_IO
> + bool
> +
> #
> # Vectored interrupt mode is an R2 feature
> #
> diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
> index 3f6ce74335b4..3b0eb4941f23 100644
> --- a/arch/mips/include/asm/io.h
> +++ b/arch/mips/include/asm/io.h
> @@ -577,6 +577,15 @@ BUILDSTRING(l, u32)
> BUILDSTRING(q, u64)
> #endif
>
> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
> +extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
> +extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
> +extern void __memset_io(volatile void __iomem *, int, size_t);
> +
> +#define memset_io(c, v, l) __memset_io((c), (v), (l))
> +#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
> +#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
> +#else
> static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
> {
> memset((void __force *) addr, val, count);
> @@ -589,6 +598,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
> {
> memcpy((void __force *) dst, src, count);
> }
> +#endif
>
> /*
> * The caches on some architectures aren't dma-coherent and have need to
> diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
> index d6e97df51cfb..b07b97b9385e 100644
> --- a/arch/mips/kernel/Makefile
> +++ b/arch/mips/kernel/Makefile
> @@ -8,7 +8,7 @@ extra-y := head.o vmlinux.lds
> obj-y += cmpxchg.o cpu-probe.o branch.o elf.o entry.o genex.o idle.o irq.o \
> process.o prom.o ptrace.o reset.o setup.o signal.o \
> syscall.o time.o topology.o traps.o unaligned.o watch.o \
> - vdso.o cacheinfo.o
> + vdso.o cacheinfo.o io.o
>
> ifdef CONFIG_FUNCTION_TRACER
> CFLAGS_REMOVE_ftrace.o = -pg
> diff --git a/arch/mips/kernel/io.c b/arch/mips/kernel/io.c
> new file mode 100644
> index 000000000000..ca105aa76d4d
> --- /dev/null
> +++ b/arch/mips/kernel/io.c
> @@ -0,0 +1,98 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#include <linux/export.h>
> +#include <linux/types.h>
> +#include <linux/io.h>
> +
> +#if defined(CONFIG_CPU_NEEDS_ALIGNED_IO)
> +
> +#if defined(CONFIG_64BIT)
> +#define IO_LONG_READ __raw_readq
> +#define IO_LONG_WRITE __raw_writeq
> +#define IO_LONG_SIZE 8
> +#else
> +#define IO_LONG_READ __raw_readl
> +#define IO_LONG_WRITE __raw_writel
> +#define IO_LONG_SIZE 4
> +#endif
> +
> +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
> +{
> + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
> + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
> + *(u8 *)to = __raw_readb(from);
> + from++;
> + to++;
> + count--;
> + }
> +
> + while (count >= IO_LONG_SIZE) {
> + *(unsigned long *)to = IO_LONG_READ(from);
> + from += IO_LONG_SIZE;
> + to += IO_LONG_SIZE;
> + count -= IO_LONG_SIZE;
> + }
> +
> + while (count) {
> + *(u8 *)to = __raw_readb(from);
> + from++;
> + to++;
> + count--;
> + }
> +}
> +EXPORT_SYMBOL(__memcpy_fromio);
> +
> +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
> +{
> + while (count && !IS_ALIGNED((unsigned long)from, IO_LONG_SIZE) &&
> + !IS_ALIGNED((unsigned long)to, IO_LONG_SIZE)) {
> + __raw_writeb(*(u8 *)from, to);
> + from++;
> + to++;
> + count--;
> + }
> +
> + while (count >= IO_LONG_SIZE) {
> + IO_LONG_WRITE(*(unsigned long *)from, to);
> + from += IO_LONG_SIZE;
> + to += IO_LONG_SIZE;
> + count -= IO_LONG_SIZE;
> + }
> +
> + while (count) {
> + __raw_writeb(*(u8 *)from, to);
> + from++;
> + to++;
> + count--;
> + }
> +}
> +EXPORT_SYMBOL(__memcpy_toio);
> +
> +void __memset_io(volatile void __iomem *dst, int c, size_t count)
> +{
> + unsigned long lc = (u8)c;
> + int i;
> +
> + for (i = 1; i < IO_LONG_SIZE; i++)
> + lc |= (u8)c << (i * BITS_PER_BYTE);
> +
> + while (count && !IS_ALIGNED((unsigned long)dst, IO_LONG_SIZE)) {
> + __raw_writeb((u8)c, dst);
> + dst++;
> + count--;
> + }
> +
> + while (count >= IO_LONG_SIZE) {
> + IO_LONG_WRITE(lc, dst);
> + dst += IO_LONG_SIZE;
> + count -= IO_LONG_SIZE;
> + }
> +
> + while (count) {
> + __raw_writeb(c, dst);
> + dst++;
> + count--;
> + }
> +}
> +EXPORT_SYMBOL(__memset_io);
> +#endif
> --
> 2.24.1
>
On Wed, Jan 22, 2020 at 10:45:06AM -0800, Paul Burton wrote:
> Hi Jiaxun,
>
> On Tue, Jan 14, 2020 at 08:23:43PM +0800, Jiaxun Yang wrote:
> > Some platforms, such as Loongson64 or QEMU/KVM, don't support unaligned
> > instructions like lwl or lwr in IO memory access. However, our current
> > IO memcpy/memset is wired to the generic implementation, which leads
> > to a fatal result.
>
> Hmm, I wonder if we should just do this unconditionally on all systems.
> I can't think of a reason it'd ever be a good idea to use lwl/lwr on an
> MMIO device. Any thoughts on that?
depends on the type of device. I can see benefits for framebuffers
and memory devices since memset/memcpy are more optimised than the
function in this patch.
Thomas.
--
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea. [ RFC1925, 2.3 ]
于 2020年1月24日 GMT+08:00 下午10:07:51, Thomas Bogendoerfer <[email protected]> 写到:
>On Wed, Jan 22, 2020 at 10:45:06AM -0800, Paul Burton wrote:
>> Hi Jiaxun,
>>
>> On Tue, Jan 14, 2020 at 08:23:43PM +0800, Jiaxun Yang wrote:
>> > Some platforms, such as Loongson64 or QEMU/KVM, don't support
>unaligned
>> > instructions like lwl or lwr in IO memory access. However, our
>current
>> > IO memcpy/memset is wired to the generic implementation, which
>leads
>> > to a fatal result.
>>
>> Hmm, I wonder if we should just do this unconditionally on all
>systems.
>> I can't think of a reason it'd ever be a good idea to use lwl/lwr on
>an
>> MMIO device. Any thoughts on that?
>
>depends on the type of device. I can see benefits for framebuffers
>and memory devices since memset/memcpy are more optimised than the
>function in this patch.
lwl/lwr is slower than this implementation on your system?
I thought that other platforms support unaligned request can be benefited from speed up of these instructions.
>
>Thomas.
--
Jiaxun Yang