2022-10-17 02:35:05

by Huacai Chen

[permalink] [raw]
Subject: [PATCH V2] LoongArch: Add unaligned access support

Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
Loongson-3C5000) are configurable whether support unaligned access in
hardware. This patch add unaligned access emulation for those LoongArch
processors without hardware support.

Signed-off-by: Huacai Chen <[email protected]>
---
V2: Simplify READ_FPR and WRITE_FPR.

arch/loongarch/Kconfig | 2 +
arch/loongarch/include/asm/inst.h | 14 ++
arch/loongarch/kernel/Makefile | 3 +-
arch/loongarch/kernel/traps.c | 27 ++
arch/loongarch/kernel/unaligned.c | 393 ++++++++++++++++++++++++++++++
arch/loongarch/lib/Makefile | 2 +-
arch/loongarch/lib/unaligned.S | 93 +++++++
7 files changed, 532 insertions(+), 2 deletions(-)
create mode 100644 arch/loongarch/kernel/unaligned.c
create mode 100644 arch/loongarch/lib/unaligned.S

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 0a6ef613124c..a8dc58e8162a 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -122,6 +122,8 @@ config LOONGARCH
select RTC_LIB
select SMP
select SPARSE_IRQ
+ select SYSCTL_ARCH_UNALIGN_ALLOW
+ select SYSCTL_ARCH_UNALIGN_NO_WARN
select SYSCTL_EXCEPTION_TRACE
select SWIOTLB
select TRACE_IRQFLAGS_SUPPORT
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index fce1843ceebb..e96b5345f389 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -76,6 +76,10 @@ enum reg2i12_op {
ldbu_op = 0xa8,
ldhu_op = 0xa9,
ldwu_op = 0xaa,
+ flds_op = 0xac,
+ fsts_op = 0xad,
+ fldd_op = 0xae,
+ fstd_op = 0xaf,
};

enum reg2i14_op {
@@ -146,6 +150,10 @@ enum reg3_op {
ldxbu_op = 0x7040,
ldxhu_op = 0x7048,
ldxwu_op = 0x7050,
+ fldxs_op = 0x7060,
+ fldxd_op = 0x7068,
+ fstxs_op = 0x7070,
+ fstxd_op = 0x7078,
amswapw_op = 0x70c0,
amswapd_op = 0x70c1,
amaddw_op = 0x70c2,
@@ -566,4 +574,10 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \

DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op)

+struct pt_regs;
+
+unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign);
+unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n);
+void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc);
+
#endif /* _ASM_INST_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 42be564278fa..2ad2555b53ea 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -7,7 +7,8 @@ extra-y := vmlinux.lds

obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
- elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o
+ elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \
+ unaligned.o

obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_EFI) += efi.o
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 1a4dce84ebc6..7ea62faeeadb 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -368,13 +368,40 @@ asmlinkage void noinstr do_ade(struct pt_regs *regs)
irqentry_exit(regs, state);
}

+/* sysctl hooks */
+int unaligned_enabled __read_mostly = 1; /* Enabled by default */
+int no_unaligned_warning __read_mostly = 1; /* Only 1 warning by default */
+
asmlinkage void noinstr do_ale(struct pt_regs *regs)
{
+ unsigned int *pc;
irqentry_state_t state = irqentry_enter(regs);

+ perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr);
+
+ /*
+ * Did we catch a fault trying to load an instruction?
+ */
+ if (regs->csr_badvaddr == regs->csr_era)
+ goto sigbus;
+ if (user_mode(regs) && !test_thread_flag(TIF_FIXADE))
+ goto sigbus;
+ if (!unaligned_enabled)
+ goto sigbus;
+ if (!no_unaligned_warning)
+ show_registers(regs);
+
+ pc = (unsigned int *)exception_era(regs);
+
+ emulate_load_store_insn(regs, (void __user *)regs->csr_badvaddr, pc);
+
+ goto out;
+
+sigbus:
die_if_kernel("Kernel ale access", regs);
force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);

+out:
irqentry_exit(regs, state);
}

diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c
new file mode 100644
index 000000000000..f367424b762a
--- /dev/null
+++ b/arch/loongarch/kernel/unaligned.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handle unaligned accesses by emulation.
+ *
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ *
+ */
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/debugfs.h>
+#include <linux/perf_event.h>
+
+#include <asm/asm.h>
+#include <asm/branch.h>
+#include <asm/fpu.h>
+#include <asm/inst.h>
+
+#include "access-helper.h"
+
+#ifdef CONFIG_DEBUG_FS
+static u32 unaligned_instructions_user;
+static u32 unaligned_instructions_kernel;
+#endif
+
+static inline unsigned long read_fpr(unsigned int fd)
+{
+#define READ_FPR(fd, __value) \
+ __asm__ __volatile__("movfr2gr.d %0, $f"#fd"\n\t" : "=r"(__value));
+
+ unsigned long __value;
+
+ switch (fd) {
+ case 0:
+ READ_FPR(0, __value);
+ break;
+ case 1:
+ READ_FPR(1, __value);
+ break;
+ case 2:
+ READ_FPR(2, __value);
+ break;
+ case 3:
+ READ_FPR(3, __value);
+ break;
+ case 4:
+ READ_FPR(4, __value);
+ break;
+ case 5:
+ READ_FPR(5, __value);
+ break;
+ case 6:
+ READ_FPR(6, __value);
+ break;
+ case 7:
+ READ_FPR(7, __value);
+ break;
+ case 8:
+ READ_FPR(8, __value);
+ break;
+ case 9:
+ READ_FPR(9, __value);
+ break;
+ case 10:
+ READ_FPR(10, __value);
+ break;
+ case 11:
+ READ_FPR(11, __value);
+ break;
+ case 12:
+ READ_FPR(12, __value);
+ break;
+ case 13:
+ READ_FPR(13, __value);
+ break;
+ case 14:
+ READ_FPR(14, __value);
+ break;
+ case 15:
+ READ_FPR(15, __value);
+ break;
+ case 16:
+ READ_FPR(16, __value);
+ break;
+ case 17:
+ READ_FPR(17, __value);
+ break;
+ case 18:
+ READ_FPR(18, __value);
+ break;
+ case 19:
+ READ_FPR(19, __value);
+ break;
+ case 20:
+ READ_FPR(20, __value);
+ break;
+ case 21:
+ READ_FPR(21, __value);
+ break;
+ case 22:
+ READ_FPR(22, __value);
+ break;
+ case 23:
+ READ_FPR(23, __value);
+ break;
+ case 24:
+ READ_FPR(24, __value);
+ break;
+ case 25:
+ READ_FPR(25, __value);
+ break;
+ case 26:
+ READ_FPR(26, __value);
+ break;
+ case 27:
+ READ_FPR(27, __value);
+ break;
+ case 28:
+ READ_FPR(28, __value);
+ break;
+ case 29:
+ READ_FPR(29, __value);
+ break;
+ case 30:
+ READ_FPR(30, __value);
+ break;
+ case 31:
+ READ_FPR(31, __value);
+ break;
+ default:
+ panic("unexpected fd '%d'", fd);
+ }
+#undef READ_FPR
+ return __value;
+}
+
+static inline void write_fpr(unsigned int fd, unsigned long value)
+{
+#define WRITE_FPR(fd, value) \
+ __asm__ __volatile__("movgr2fr.d $f"#fd", %0\n\t" :: "r"(value));
+
+ switch (fd) {
+ case 0:
+ WRITE_FPR(0, value);
+ break;
+ case 1:
+ WRITE_FPR(1, value);
+ break;
+ case 2:
+ WRITE_FPR(2, value);
+ break;
+ case 3:
+ WRITE_FPR(3, value);
+ break;
+ case 4:
+ WRITE_FPR(4, value);
+ break;
+ case 5:
+ WRITE_FPR(5, value);
+ break;
+ case 6:
+ WRITE_FPR(6, value);
+ break;
+ case 7:
+ WRITE_FPR(7, value);
+ break;
+ case 8:
+ WRITE_FPR(8, value);
+ break;
+ case 9:
+ WRITE_FPR(9, value);
+ break;
+ case 10:
+ WRITE_FPR(10, value);
+ break;
+ case 11:
+ WRITE_FPR(11, value);
+ break;
+ case 12:
+ WRITE_FPR(12, value);
+ break;
+ case 13:
+ WRITE_FPR(13, value);
+ break;
+ case 14:
+ WRITE_FPR(14, value);
+ break;
+ case 15:
+ WRITE_FPR(15, value);
+ break;
+ case 16:
+ WRITE_FPR(16, value);
+ break;
+ case 17:
+ WRITE_FPR(17, value);
+ break;
+ case 18:
+ WRITE_FPR(18, value);
+ break;
+ case 19:
+ WRITE_FPR(19, value);
+ break;
+ case 20:
+ WRITE_FPR(20, value);
+ break;
+ case 21:
+ WRITE_FPR(21, value);
+ break;
+ case 22:
+ WRITE_FPR(22, value);
+ break;
+ case 23:
+ WRITE_FPR(23, value);
+ break;
+ case 24:
+ WRITE_FPR(24, value);
+ break;
+ case 25:
+ WRITE_FPR(25, value);
+ break;
+ case 26:
+ WRITE_FPR(26, value);
+ break;
+ case 27:
+ WRITE_FPR(27, value);
+ break;
+ case 28:
+ WRITE_FPR(28, value);
+ break;
+ case 29:
+ WRITE_FPR(29, value);
+ break;
+ case 30:
+ WRITE_FPR(30, value);
+ break;
+ case 31:
+ WRITE_FPR(31, value);
+ break;
+ default:
+ panic("unexpected fd '%d'", fd);
+ }
+#undef WRITE_FPR
+}
+
+void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc)
+{
+ bool user = user_mode(regs);
+ unsigned int res;
+ unsigned long origpc;
+ unsigned long origra;
+ unsigned long value = 0;
+ union loongarch_instruction insn;
+
+ origpc = (unsigned long)pc;
+ origra = regs->regs[1];
+
+ perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+
+ /*
+ * This load never faults.
+ */
+ __get_inst(&insn.word, pc, user);
+ if (user && !access_ok(addr, 8))
+ goto sigbus;
+
+ if (insn.reg2i12_format.opcode == ldd_op ||
+ insn.reg2i14_format.opcode == ldptrd_op ||
+ insn.reg3_format.opcode == ldxd_op) {
+ res = unaligned_read(addr, &value, 8, 1);
+ if (res)
+ goto fault;
+ regs->regs[insn.reg2i12_format.rd] = value;
+ } else if (insn.reg2i12_format.opcode == ldw_op ||
+ insn.reg2i14_format.opcode == ldptrw_op ||
+ insn.reg3_format.opcode == ldxw_op) {
+ res = unaligned_read(addr, &value, 4, 1);
+ if (res)
+ goto fault;
+ regs->regs[insn.reg2i12_format.rd] = value;
+ } else if (insn.reg2i12_format.opcode == ldwu_op ||
+ insn.reg3_format.opcode == ldxwu_op) {
+ res = unaligned_read(addr, &value, 4, 0);
+ if (res)
+ goto fault;
+ regs->regs[insn.reg2i12_format.rd] = value;
+ } else if (insn.reg2i12_format.opcode == ldh_op ||
+ insn.reg3_format.opcode == ldxh_op) {
+ res = unaligned_read(addr, &value, 2, 1);
+ if (res)
+ goto fault;
+ regs->regs[insn.reg2i12_format.rd] = value;
+ } else if (insn.reg2i12_format.opcode == ldhu_op ||
+ insn.reg3_format.opcode == ldxhu_op) {
+ res = unaligned_read(addr, &value, 2, 0);
+ if (res)
+ goto fault;
+ regs->regs[insn.reg2i12_format.rd] = value;
+ } else if (insn.reg2i12_format.opcode == std_op ||
+ insn.reg2i14_format.opcode == stptrd_op ||
+ insn.reg3_format.opcode == stxd_op) {
+ value = regs->regs[insn.reg2i12_format.rd];
+ res = unaligned_write(addr, value, 8);
+ if (res)
+ goto fault;
+ } else if (insn.reg2i12_format.opcode == stw_op ||
+ insn.reg2i14_format.opcode == stptrw_op ||
+ insn.reg3_format.opcode == stxw_op) {
+ value = regs->regs[insn.reg2i12_format.rd];
+ res = unaligned_write(addr, value, 4);
+ if (res)
+ goto fault;
+ } else if (insn.reg2i12_format.opcode == sth_op ||
+ insn.reg3_format.opcode == stxh_op) {
+ value = regs->regs[insn.reg2i12_format.rd];
+ res = unaligned_write(addr, value, 2);
+ if (res)
+ goto fault;
+ } else if (insn.reg2i12_format.opcode == fldd_op ||
+ insn.reg3_format.opcode == fldxd_op) {
+ res = unaligned_read(addr, &value, 8, 1);
+ if (res)
+ goto fault;
+ write_fpr(insn.reg2i12_format.rd, value);
+ } else if (insn.reg2i12_format.opcode == flds_op ||
+ insn.reg3_format.opcode == fldxs_op) {
+ res = unaligned_read(addr, &value, 4, 1);
+ if (res)
+ goto fault;
+ write_fpr(insn.reg2i12_format.rd, value);
+ } else if (insn.reg2i12_format.opcode == fstd_op ||
+ insn.reg3_format.opcode == fstxd_op) {
+ value = read_fpr(insn.reg2i12_format.rd);
+ res = unaligned_write(addr, value, 8);
+ if (res)
+ goto fault;
+ } else if (insn.reg2i12_format.opcode == fsts_op ||
+ insn.reg3_format.opcode == fstxs_op) {
+ value = read_fpr(insn.reg2i12_format.rd);
+ res = unaligned_write(addr, value, 4);
+ if (res)
+ goto fault;
+ } else
+ goto sigbus;
+
+
+#ifdef CONFIG_DEBUG_FS
+ if (user)
+ unaligned_instructions_user++;
+ else
+ unaligned_instructions_kernel++;
+#endif
+
+ compute_return_era(regs);
+ return;
+
+fault:
+ /* roll back jump/branch */
+ regs->csr_era = origpc;
+ regs->regs[1] = origra;
+ /* Did we have an exception handler installed? */
+ if (fixup_exception(regs))
+ return;
+
+ die_if_kernel("Unhandled kernel unaligned access", regs);
+ force_sig(SIGSEGV);
+
+ return;
+
+sigbus:
+ die_if_kernel("Unhandled kernel unaligned access", regs);
+ force_sig(SIGBUS);
+
+ return;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int __init debugfs_unaligned(void)
+{
+ struct dentry *d;
+
+ d = debugfs_create_dir("loongarch", NULL);
+ if (!d)
+ return -ENOMEM;
+
+ debugfs_create_u32("unaligned_instructions_user",
+ S_IRUGO, d, &unaligned_instructions_user);
+ debugfs_create_u32("unaligned_instructions_kernel",
+ S_IRUGO, d, &unaligned_instructions_kernel);
+
+ return 0;
+}
+arch_initcall(debugfs_unaligned);
+#endif
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index e36635fccb69..867895530340 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -3,4 +3,4 @@
# Makefile for LoongArch-specific library files.
#

-lib-y += delay.o clear_user.o copy_user.o dump_tlb.o
+lib-y += delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o
diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S
new file mode 100644
index 000000000000..03210cb5a18d
--- /dev/null
+++ b/arch/loongarch/lib/unaligned.S
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+.macro fixup_ex from, to, fix
+.if \fix
+ .section .fixup, "ax"
+\to: li.w a0, -EFAULT
+ jr ra
+ .previous
+.endif
+ .section __ex_table, "a"
+ PTR \from\()b, \to\()b
+ .previous
+.endm
+
+/*
+ * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign)
+ *
+ * a0: addr
+ * a1: value
+ * a2: n
+ * a3: sign
+ */
+SYM_FUNC_START(unaligned_read)
+ beqz a2, 5f
+
+ li.w t1, 8
+ li.w t2, 0
+
+ addi.d t0, a2, -1
+ mul.d t1, t0, t1
+ add.d a0, a0, t0
+
+ beq a3, zero, 2f
+1: ld.b t3, a0, 0
+ b 3f
+
+2: ld.bu t3, a0, 0
+3: sll.d t3, t3, t1
+ or t2, t2, t3
+ addi.d t1, t1, -8
+ addi.d a0, a0, -1
+ addi.d a2, a2, -1
+ bgt a2, zero, 2b
+4: st.d t2, a1, 0
+
+ move a0, a2
+ jr ra
+
+5: li.w a0, -EFAULT
+ jr ra
+
+ fixup_ex 1, 6, 1
+ fixup_ex 2, 6, 0
+ fixup_ex 4, 6, 0
+SYM_FUNC_END(unaligned_read)
+
+/*
+ * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n)
+ *
+ * a0: addr
+ * a1: value
+ * a2: n
+ */
+SYM_FUNC_START(unaligned_write)
+ beqz a2, 3f
+
+ li.w t0, 0
+1: srl.d t1, a1, t0
+2: st.b t1, a0, 0
+ addi.d t0, t0, 8
+ addi.d a2, a2, -1
+ addi.d a0, a0, 1
+ bgt a2, zero, 1b
+
+ move a0, a2
+ jr ra
+
+3: li.w a0, -EFAULT
+ jr ra
+
+ fixup_ex 2, 4, 1
+SYM_FUNC_END(unaligned_write)
--
2.31.1


2022-10-17 04:54:24

by Jinyang He

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

Hi, Huacai,


On 2022/10/17 上午10:23, Huacai Chen wrote:
> [...]
> + default:
> + panic("unexpected fd '%d'", fd);
Due to the optimization of gcc, the panic() is unused actually and leave
the symbol 'read/write_fpr' in vmlinux. Maybe we can use unreachable() and

always_inline.

> [...]
> +
> +fault:
> + /* roll back jump/branch */
> + regs->csr_era = origpc;
> + regs->regs[1] = origra;

I'm not sure where the csr_era and regs[1] was damaged...

> [...]
>
> +/*
> + * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign)
> + *
> + * a0: addr
> + * a1: value
> + * a2: n
> + * a3: sign
> + */
> +SYM_FUNC_START(unaligned_read)
> + beqz a2, 5f
> +
> + li.w t1, 8
IMHO we can avoid the constant reg t1.
> + li.w t2, 0
> +
> + addi.d t0, a2, -1
> + mul.d t1, t0, t1
> + add.d a0, a0, t0
> +
> + beq a3, zero, 2f
beqz
> +1: ld.b t3, a0, 0
> + b 3f
> +
> +2: ld.bu t3, a0, 0
> +3: sll.d t3, t3, t1
> + or t2, t2, t3
> + addi.d t1, t1, -8
> + addi.d a0, a0, -1
> + addi.d a2, a2, -1
> + bgt a2, zero, 2b
bgtz
> +4: st.d t2, a1, 0
> +
> + move a0, a2
> + jr ra
> +
> +5: li.w a0, -EFAULT
> + jr ra
> +
> + fixup_ex 1, 6, 1
> + fixup_ex 2, 6, 0
> + fixup_ex 4, 6, 0
> +SYM_FUNC_END(unaligned_read)
> +
> +/*
> + * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n)
> + *
> + * a0: addr
> + * a1: value
> + * a2: n
> + */
> +SYM_FUNC_START(unaligned_write)
> + beqz a2, 3f
> +
> + li.w t0, 0
> +1: srl.d t1, a1, t0
> +2: st.b t1, a0, 0
> + addi.d t0, t0, 8
> + addi.d a2, a2, -1
> + addi.d a0, a0, 1
> + bgt a2, zero, 1b
bgtz
> +
> + move a0, a2
> + jr ra
> +
> +3: li.w a0, -EFAULT
> + jr ra
> +
> + fixup_ex 2, 4, 1
> +SYM_FUNC_END(unaligned_write)

Thanks,

Jinyang

2022-10-17 06:27:34

by WANG Xuerui

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

On 2022/10/17 10:23, Huacai Chen wrote:
> Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
> unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
> Loongson-3C5000) are configurable whether support unaligned access in
> hardware. This patch add unaligned access emulation for those LoongArch
> processors without hardware support.
>
> Signed-off-by: Huacai Chen <[email protected]>
> ---
> V2: Simplify READ_FPR and WRITE_FPR.
>
> arch/loongarch/Kconfig | 2 +
> arch/loongarch/include/asm/inst.h | 14 ++
> arch/loongarch/kernel/Makefile | 3 +-
> arch/loongarch/kernel/traps.c | 27 ++
> arch/loongarch/kernel/unaligned.c | 393 ++++++++++++++++++++++++++++++
> arch/loongarch/lib/Makefile | 2 +-
> arch/loongarch/lib/unaligned.S | 93 +++++++
> 7 files changed, 532 insertions(+), 2 deletions(-)
> create mode 100644 arch/loongarch/kernel/unaligned.c
> create mode 100644 arch/loongarch/lib/unaligned.S

Please also update Documentation/admin-guide/sysctl/kernel.rst to
mention loongarch in the respective sysctls' documentation. (Grep for
ARCH_UNALIGN to see.)

>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 0a6ef613124c..a8dc58e8162a 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -122,6 +122,8 @@ config LOONGARCH
> select RTC_LIB
> select SMP
> select SPARSE_IRQ
> + select SYSCTL_ARCH_UNALIGN_ALLOW
> + select SYSCTL_ARCH_UNALIGN_NO_WARN
> select SYSCTL_EXCEPTION_TRACE
> select SWIOTLB
> select TRACE_IRQFLAGS_SUPPORT
> diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
> index fce1843ceebb..e96b5345f389 100644
> --- a/arch/loongarch/include/asm/inst.h
> +++ b/arch/loongarch/include/asm/inst.h
> @@ -76,6 +76,10 @@ enum reg2i12_op {
> ldbu_op = 0xa8,
> ldhu_op = 0xa9,
> ldwu_op = 0xaa,
> + flds_op = 0xac,
> + fsts_op = 0xad,
> + fldd_op = 0xae,
> + fstd_op = 0xaf,
> };
>
> enum reg2i14_op {
> @@ -146,6 +150,10 @@ enum reg3_op {
> ldxbu_op = 0x7040,
> ldxhu_op = 0x7048,
> ldxwu_op = 0x7050,
> + fldxs_op = 0x7060,
> + fldxd_op = 0x7068,
> + fstxs_op = 0x7070,
> + fstxd_op = 0x7078,
> amswapw_op = 0x70c0,
> amswapd_op = 0x70c1,
> amaddw_op = 0x70c2,
> @@ -566,4 +574,10 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \
>
> DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op)
>
> +struct pt_regs;
> +
> +unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign);
> +unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n);
> +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc);
> +
> #endif /* _ASM_INST_H */
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index 42be564278fa..2ad2555b53ea 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -7,7 +7,8 @@ extra-y := vmlinux.lds
>
> obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
> traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
> - elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o
> + elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \
> + unaligned.o
>
> obj-$(CONFIG_ACPI) += acpi.o
> obj-$(CONFIG_EFI) += efi.o
> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> index 1a4dce84ebc6..7ea62faeeadb 100644
> --- a/arch/loongarch/kernel/traps.c
> +++ b/arch/loongarch/kernel/traps.c
> @@ -368,13 +368,40 @@ asmlinkage void noinstr do_ade(struct pt_regs *regs)
> irqentry_exit(regs, state);
> }
>
> +/* sysctl hooks */
> +int unaligned_enabled __read_mostly = 1; /* Enabled by default */
> +int no_unaligned_warning __read_mostly = 1; /* Only 1 warning by default */
> +
> asmlinkage void noinstr do_ale(struct pt_regs *regs)
> {
> + unsigned int *pc;
> irqentry_state_t state = irqentry_enter(regs);
>
> + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr);
> +
> + /*
> + * Did we catch a fault trying to load an instruction?
> + */
> + if (regs->csr_badvaddr == regs->csr_era)
> + goto sigbus;
> + if (user_mode(regs) && !test_thread_flag(TIF_FIXADE))
> + goto sigbus;
> + if (!unaligned_enabled)
> + goto sigbus;
> + if (!no_unaligned_warning)
> + show_registers(regs);
> +
> + pc = (unsigned int *)exception_era(regs);
> +
> + emulate_load_store_insn(regs, (void __user *)regs->csr_badvaddr, pc);
> +
> + goto out;
> +
> +sigbus:
> die_if_kernel("Kernel ale access", regs);
> force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
>
> +out:
> irqentry_exit(regs, state);
> }
>
> diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c
> new file mode 100644
> index 000000000000..f367424b762a
> --- /dev/null
> +++ b/arch/loongarch/kernel/unaligned.c
> @@ -0,0 +1,393 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Handle unaligned accesses by emulation.
> + *
> + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited

MIPS heritage too?

> + *
> + */
> +#include <linux/mm.h>
> +#include <linux/sched.h>
> +#include <linux/signal.h>
> +#include <linux/debugfs.h>
> +#include <linux/perf_event.h>
> +
> +#include <asm/asm.h>
> +#include <asm/branch.h>
> +#include <asm/fpu.h>
> +#include <asm/inst.h>
> +
> +#include "access-helper.h"
> +
> +#ifdef CONFIG_DEBUG_FS
> +static u32 unaligned_instructions_user;
> +static u32 unaligned_instructions_kernel;
> +#endif
> +
> +static inline unsigned long read_fpr(unsigned int fd)
> +{
> +#define READ_FPR(fd, __value) \
> + __asm__ __volatile__("movfr2gr.d %0, $f"#fd"\n\t" : "=r"(__value));
> +
> + unsigned long __value;
> +
> + switch (fd) {
> + case 0:
> + READ_FPR(0, __value);
> + break;
> + case 1:
> + READ_FPR(1, __value);
> + break;
> + case 2:
> + READ_FPR(2, __value);
> + break;
> + case 3:
> + READ_FPR(3, __value);
> + break;
> + case 4:
> + READ_FPR(4, __value);
> + break;
> + case 5:
> + READ_FPR(5, __value);
> + break;
> + case 6:
> + READ_FPR(6, __value);
> + break;
> + case 7:
> + READ_FPR(7, __value);
> + break;
> + case 8:
> + READ_FPR(8, __value);
> + break;
> + case 9:
> + READ_FPR(9, __value);
> + break;
> + case 10:
> + READ_FPR(10, __value);
> + break;
> + case 11:
> + READ_FPR(11, __value);
> + break;
> + case 12:
> + READ_FPR(12, __value);
> + break;
> + case 13:
> + READ_FPR(13, __value);
> + break;
> + case 14:
> + READ_FPR(14, __value);
> + break;
> + case 15:
> + READ_FPR(15, __value);
> + break;
> + case 16:
> + READ_FPR(16, __value);
> + break;
> + case 17:
> + READ_FPR(17, __value);
> + break;
> + case 18:
> + READ_FPR(18, __value);
> + break;
> + case 19:
> + READ_FPR(19, __value);
> + break;
> + case 20:
> + READ_FPR(20, __value);
> + break;
> + case 21:
> + READ_FPR(21, __value);
> + break;
> + case 22:
> + READ_FPR(22, __value);
> + break;
> + case 23:
> + READ_FPR(23, __value);
> + break;
> + case 24:
> + READ_FPR(24, __value);
> + break;
> + case 25:
> + READ_FPR(25, __value);
> + break;
> + case 26:
> + READ_FPR(26, __value);
> + break;
> + case 27:
> + READ_FPR(27, __value);
> + break;
> + case 28:
> + READ_FPR(28, __value);
> + break;
> + case 29:
> + READ_FPR(29, __value);
> + break;
> + case 30:
> + READ_FPR(30, __value);
> + break;
> + case 31:
> + READ_FPR(31, __value);
> + break;
> + default:
> + panic("unexpected fd '%d'", fd);

So this is a bit misleading, I was thinking of file descriptors when I
was reading Feiyang's review comments... maybe something as simple as
"idx", "num" or "regno" will do?

> + }
> +#undef READ_FPR
> + return __value;
> +}
> +
> +static inline void write_fpr(unsigned int fd, unsigned long value)
> +{
> +#define WRITE_FPR(fd, value) \
> + __asm__ __volatile__("movgr2fr.d $f"#fd", %0\n\t" :: "r"(value));
> +
> + switch (fd) {
> + case 0:
> + WRITE_FPR(0, value);
> + break;
> + case 1:
> + WRITE_FPR(1, value);
> + break;
> + case 2:
> + WRITE_FPR(2, value);
> + break;
> + case 3:
> + WRITE_FPR(3, value);
> + break;
> + case 4:
> + WRITE_FPR(4, value);
> + break;
> + case 5:
> + WRITE_FPR(5, value);
> + break;
> + case 6:
> + WRITE_FPR(6, value);
> + break;
> + case 7:
> + WRITE_FPR(7, value);
> + break;
> + case 8:
> + WRITE_FPR(8, value);
> + break;
> + case 9:
> + WRITE_FPR(9, value);
> + break;
> + case 10:
> + WRITE_FPR(10, value);
> + break;
> + case 11:
> + WRITE_FPR(11, value);
> + break;
> + case 12:
> + WRITE_FPR(12, value);
> + break;
> + case 13:
> + WRITE_FPR(13, value);
> + break;
> + case 14:
> + WRITE_FPR(14, value);
> + break;
> + case 15:
> + WRITE_FPR(15, value);
> + break;
> + case 16:
> + WRITE_FPR(16, value);
> + break;
> + case 17:
> + WRITE_FPR(17, value);
> + break;
> + case 18:
> + WRITE_FPR(18, value);
> + break;
> + case 19:
> + WRITE_FPR(19, value);
> + break;
> + case 20:
> + WRITE_FPR(20, value);
> + break;
> + case 21:
> + WRITE_FPR(21, value);
> + break;
> + case 22:
> + WRITE_FPR(22, value);
> + break;
> + case 23:
> + WRITE_FPR(23, value);
> + break;
> + case 24:
> + WRITE_FPR(24, value);
> + break;
> + case 25:
> + WRITE_FPR(25, value);
> + break;
> + case 26:
> + WRITE_FPR(26, value);
> + break;
> + case 27:
> + WRITE_FPR(27, value);
> + break;
> + case 28:
> + WRITE_FPR(28, value);
> + break;
> + case 29:
> + WRITE_FPR(29, value);
> + break;
> + case 30:
> + WRITE_FPR(30, value);
> + break;
> + case 31:
> + WRITE_FPR(31, value);
> + break;
> + default:
> + panic("unexpected fd '%d'", fd);
> + }
> +#undef WRITE_FPR
> +}
> +
> +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc)
> +{
> + bool user = user_mode(regs);
> + unsigned int res;
> + unsigned long origpc;
> + unsigned long origra;
> + unsigned long value = 0;
> + union loongarch_instruction insn;
> +
> + origpc = (unsigned long)pc;
> + origra = regs->regs[1];
> +
> + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
> +
> + /*
> + * This load never faults.
> + */
> + __get_inst(&insn.word, pc, user);
> + if (user && !access_ok(addr, 8))
> + goto sigbus;
> +
> + if (insn.reg2i12_format.opcode == ldd_op ||
> + insn.reg2i14_format.opcode == ldptrd_op ||
> + insn.reg3_format.opcode == ldxd_op) {
> + res = unaligned_read(addr, &value, 8, 1);
> + if (res)
> + goto fault;
> + regs->regs[insn.reg2i12_format.rd] = value;
> + } else if (insn.reg2i12_format.opcode == ldw_op ||
> + insn.reg2i14_format.opcode == ldptrw_op ||
> + insn.reg3_format.opcode == ldxw_op) {
> + res = unaligned_read(addr, &value, 4, 1);
> + if (res)
> + goto fault;
> + regs->regs[insn.reg2i12_format.rd] = value;
> + } else if (insn.reg2i12_format.opcode == ldwu_op ||
> + insn.reg3_format.opcode == ldxwu_op) {
> + res = unaligned_read(addr, &value, 4, 0);
> + if (res)
> + goto fault;
> + regs->regs[insn.reg2i12_format.rd] = value;
> + } else if (insn.reg2i12_format.opcode == ldh_op ||
> + insn.reg3_format.opcode == ldxh_op) {
> + res = unaligned_read(addr, &value, 2, 1);
> + if (res)
> + goto fault;
> + regs->regs[insn.reg2i12_format.rd] = value;
> + } else if (insn.reg2i12_format.opcode == ldhu_op ||
> + insn.reg3_format.opcode == ldxhu_op) {
> + res = unaligned_read(addr, &value, 2, 0);
> + if (res)
> + goto fault;
> + regs->regs[insn.reg2i12_format.rd] = value;
> + } else if (insn.reg2i12_format.opcode == std_op ||
> + insn.reg2i14_format.opcode == stptrd_op ||
> + insn.reg3_format.opcode == stxd_op) {
> + value = regs->regs[insn.reg2i12_format.rd];
> + res = unaligned_write(addr, value, 8);
> + if (res)
> + goto fault;
> + } else if (insn.reg2i12_format.opcode == stw_op ||
> + insn.reg2i14_format.opcode == stptrw_op ||
> + insn.reg3_format.opcode == stxw_op) {
> + value = regs->regs[insn.reg2i12_format.rd];
> + res = unaligned_write(addr, value, 4);
> + if (res)
> + goto fault;
> + } else if (insn.reg2i12_format.opcode == sth_op ||
> + insn.reg3_format.opcode == stxh_op) {
> + value = regs->regs[insn.reg2i12_format.rd];
> + res = unaligned_write(addr, value, 2);
> + if (res)
> + goto fault;
> + } else if (insn.reg2i12_format.opcode == fldd_op ||
> + insn.reg3_format.opcode == fldxd_op) {
> + res = unaligned_read(addr, &value, 8, 1);
> + if (res)
> + goto fault;
> + write_fpr(insn.reg2i12_format.rd, value);
> + } else if (insn.reg2i12_format.opcode == flds_op ||
> + insn.reg3_format.opcode == fldxs_op) {
> + res = unaligned_read(addr, &value, 4, 1);
> + if (res)
> + goto fault;
> + write_fpr(insn.reg2i12_format.rd, value);
> + } else if (insn.reg2i12_format.opcode == fstd_op ||
> + insn.reg3_format.opcode == fstxd_op) {
> + value = read_fpr(insn.reg2i12_format.rd);
> + res = unaligned_write(addr, value, 8);
> + if (res)
> + goto fault;
> + } else if (insn.reg2i12_format.opcode == fsts_op ||
> + insn.reg3_format.opcode == fstxs_op) {
> + value = read_fpr(insn.reg2i12_format.rd);
> + res = unaligned_write(addr, value, 4);
> + if (res)
> + goto fault;
> + } else
> + goto sigbus;
> +
> +
> +#ifdef CONFIG_DEBUG_FS
> + if (user)
> + unaligned_instructions_user++;
> + else
> + unaligned_instructions_kernel++;
> +#endif
> +
> + compute_return_era(regs);
> + return;
> +
> +fault:
> + /* roll back jump/branch */
> + regs->csr_era = origpc;
> + regs->regs[1] = origra;
> + /* Did we have an exception handler installed? */
> + if (fixup_exception(regs))
> + return;
> +
> + die_if_kernel("Unhandled kernel unaligned access", regs);
> + force_sig(SIGSEGV);
> +
> + return;
> +
> +sigbus:
> + die_if_kernel("Unhandled kernel unaligned access", regs);
> + force_sig(SIGBUS);
> +
> + return;
> +}
> +
> +#ifdef CONFIG_DEBUG_FS
> +static int __init debugfs_unaligned(void)
> +{
> + struct dentry *d;
> +
> + d = debugfs_create_dir("loongarch", NULL);
> + if (!d)
> + return -ENOMEM;
> +
> + debugfs_create_u32("unaligned_instructions_user",
> + S_IRUGO, d, &unaligned_instructions_user);
> + debugfs_create_u32("unaligned_instructions_kernel",
> + S_IRUGO, d, &unaligned_instructions_kernel);
> +
> + return 0;
> +}
> +arch_initcall(debugfs_unaligned);
> +#endif
> diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
> index e36635fccb69..867895530340 100644
> --- a/arch/loongarch/lib/Makefile
> +++ b/arch/loongarch/lib/Makefile
> @@ -3,4 +3,4 @@
> # Makefile for LoongArch-specific library files.
> #
>
> -lib-y += delay.o clear_user.o copy_user.o dump_tlb.o
> +lib-y += delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o
> diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S
> new file mode 100644
> index 000000000000..03210cb5a18d
> --- /dev/null
> +++ b/arch/loongarch/lib/unaligned.S
> @@ -0,0 +1,93 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
> + */
> +
> +#include <linux/linkage.h>
> +
> +#include <asm/asm.h>
> +#include <asm/asmmacro.h>
> +#include <asm/errno.h>
> +#include <asm/export.h>
> +#include <asm/regdef.h>
> +
> +.macro fixup_ex from, to, fix
> +.if \fix
> + .section .fixup, "ax"
> +\to: li.w a0, -EFAULT
> + jr ra
> + .previous
> +.endif
> + .section __ex_table, "a"
> + PTR \from\()b, \to\()b
> + .previous
> +.endm
> +
> +/*
> + * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign)
> + *
> + * a0: addr
> + * a1: value
> + * a2: n
> + * a3: sign
> + */
> +SYM_FUNC_START(unaligned_read)
> + beqz a2, 5f
> +
> + li.w t1, 8
> + li.w t2, 0
> +
> + addi.d t0, a2, -1
> + mul.d t1, t0, t1

Remove the `t1 = 8` above, then `slli.d t1, t0, 3` here would be enough
and one cycle is saved.

> + add.d a0, a0, t0
> +
> + beq a3, zero, 2f

beqz

> +1: ld.b t3, a0, 0
> + b 3f
> +
> +2: ld.bu t3, a0, 0
> +3: sll.d t3, t3, t1
> + or t2, t2, t3
> + addi.d t1, t1, -8 > + addi.d a0, a0, -1
> + addi.d a2, a2, -1
> + bgt a2, zero, 2b

bgtz

> +4: st.d t2, a1, 0
> +
> + move a0, a2
> + jr ra
> +
> +5: li.w a0, -EFAULT
> + jr ra
> +
> + fixup_ex 1, 6, 1
> + fixup_ex 2, 6, 0
> + fixup_ex 4, 6, 0
> +SYM_FUNC_END(unaligned_read)
> +
> +/*
> + * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n)
> + *
> + * a0: addr
> + * a1: value
> + * a2: n
> + */
> +SYM_FUNC_START(unaligned_write)
> + beqz a2, 3f
> +
> + li.w t0, 0
> +1: srl.d t1, a1, t0
> +2: st.b t1, a0, 0
> + addi.d t0, t0, 8
> + addi.d a2, a2, -1
> + addi.d a0, a0, 1
> + bgt a2, zero, 1b

bgtz

> +
> + move a0, a2
> + jr ra
> +
> +3: li.w a0, -EFAULT
> + jr ra
> +
> + fixup_ex 2, 4, 1
> +SYM_FUNC_END(unaligned_write)

--
WANG "xen0n" Xuerui

Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/

2022-10-17 07:53:51

by Huacai Chen

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

Hi, Xuerui,

On Mon, Oct 17, 2022 at 2:07 PM WANG Xuerui <[email protected]> wrote:
>
> On 2022/10/17 10:23, Huacai Chen wrote:
> > Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
> > unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
> > Loongson-3C5000) are configurable whether support unaligned access in
> > hardware. This patch add unaligned access emulation for those LoongArch
> > processors without hardware support.
> >
> > Signed-off-by: Huacai Chen <[email protected]>
> > ---
> > V2: Simplify READ_FPR and WRITE_FPR.
> >
> > arch/loongarch/Kconfig | 2 +
> > arch/loongarch/include/asm/inst.h | 14 ++
> > arch/loongarch/kernel/Makefile | 3 +-
> > arch/loongarch/kernel/traps.c | 27 ++
> > arch/loongarch/kernel/unaligned.c | 393 ++++++++++++++++++++++++++++++
> > arch/loongarch/lib/Makefile | 2 +-
> > arch/loongarch/lib/unaligned.S | 93 +++++++
> > 7 files changed, 532 insertions(+), 2 deletions(-)
> > create mode 100644 arch/loongarch/kernel/unaligned.c
> > create mode 100644 arch/loongarch/lib/unaligned.S
>
> Please also update Documentation/admin-guide/sysctl/kernel.rst to
> mention loongarch in the respective sysctls' documentation. (Grep for
> ARCH_UNALIGN to see.)
OK, thanks.

>
> >
> > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> > index 0a6ef613124c..a8dc58e8162a 100644
> > --- a/arch/loongarch/Kconfig
> > +++ b/arch/loongarch/Kconfig
> > @@ -122,6 +122,8 @@ config LOONGARCH
> > select RTC_LIB
> > select SMP
> > select SPARSE_IRQ
> > + select SYSCTL_ARCH_UNALIGN_ALLOW
> > + select SYSCTL_ARCH_UNALIGN_NO_WARN
> > select SYSCTL_EXCEPTION_TRACE
> > select SWIOTLB
> > select TRACE_IRQFLAGS_SUPPORT
> > diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
> > index fce1843ceebb..e96b5345f389 100644
> > --- a/arch/loongarch/include/asm/inst.h
> > +++ b/arch/loongarch/include/asm/inst.h
> > @@ -76,6 +76,10 @@ enum reg2i12_op {
> > ldbu_op = 0xa8,
> > ldhu_op = 0xa9,
> > ldwu_op = 0xaa,
> > + flds_op = 0xac,
> > + fsts_op = 0xad,
> > + fldd_op = 0xae,
> > + fstd_op = 0xaf,
> > };
> >
> > enum reg2i14_op {
> > @@ -146,6 +150,10 @@ enum reg3_op {
> > ldxbu_op = 0x7040,
> > ldxhu_op = 0x7048,
> > ldxwu_op = 0x7050,
> > + fldxs_op = 0x7060,
> > + fldxd_op = 0x7068,
> > + fstxs_op = 0x7070,
> > + fstxd_op = 0x7078,
> > amswapw_op = 0x70c0,
> > amswapd_op = 0x70c1,
> > amaddw_op = 0x70c2,
> > @@ -566,4 +574,10 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \
> >
> > DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op)
> >
> > +struct pt_regs;
> > +
> > +unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign);
> > +unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n);
> > +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc);
> > +
> > #endif /* _ASM_INST_H */
> > diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> > index 42be564278fa..2ad2555b53ea 100644
> > --- a/arch/loongarch/kernel/Makefile
> > +++ b/arch/loongarch/kernel/Makefile
> > @@ -7,7 +7,8 @@ extra-y := vmlinux.lds
> >
> > obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
> > traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
> > - elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o
> > + elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \
> > + unaligned.o
> >
> > obj-$(CONFIG_ACPI) += acpi.o
> > obj-$(CONFIG_EFI) += efi.o
> > diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> > index 1a4dce84ebc6..7ea62faeeadb 100644
> > --- a/arch/loongarch/kernel/traps.c
> > +++ b/arch/loongarch/kernel/traps.c
> > @@ -368,13 +368,40 @@ asmlinkage void noinstr do_ade(struct pt_regs *regs)
> > irqentry_exit(regs, state);
> > }
> >
> > +/* sysctl hooks */
> > +int unaligned_enabled __read_mostly = 1; /* Enabled by default */
> > +int no_unaligned_warning __read_mostly = 1; /* Only 1 warning by default */
> > +
> > asmlinkage void noinstr do_ale(struct pt_regs *regs)
> > {
> > + unsigned int *pc;
> > irqentry_state_t state = irqentry_enter(regs);
> >
> > + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr);
> > +
> > + /*
> > + * Did we catch a fault trying to load an instruction?
> > + */
> > + if (regs->csr_badvaddr == regs->csr_era)
> > + goto sigbus;
> > + if (user_mode(regs) && !test_thread_flag(TIF_FIXADE))
> > + goto sigbus;
> > + if (!unaligned_enabled)
> > + goto sigbus;
> > + if (!no_unaligned_warning)
> > + show_registers(regs);
> > +
> > + pc = (unsigned int *)exception_era(regs);
> > +
> > + emulate_load_store_insn(regs, (void __user *)regs->csr_badvaddr, pc);
> > +
> > + goto out;
> > +
> > +sigbus:
> > die_if_kernel("Kernel ale access", regs);
> > force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
> >
> > +out:
> > irqentry_exit(regs, state);
> > }
> >
> > diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c
> > new file mode 100644
> > index 000000000000..f367424b762a
> > --- /dev/null
> > +++ b/arch/loongarch/kernel/unaligned.c
> > @@ -0,0 +1,393 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Handle unaligned accesses by emulation.
> > + *
> > + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
>
> MIPS heritage too?
>
> > + *
> > + */
> > +#include <linux/mm.h>
> > +#include <linux/sched.h>
> > +#include <linux/signal.h>
> > +#include <linux/debugfs.h>
> > +#include <linux/perf_event.h>
> > +
> > +#include <asm/asm.h>
> > +#include <asm/branch.h>
> > +#include <asm/fpu.h>
> > +#include <asm/inst.h>
> > +
> > +#include "access-helper.h"
> > +
> > +#ifdef CONFIG_DEBUG_FS
> > +static u32 unaligned_instructions_user;
> > +static u32 unaligned_instructions_kernel;
> > +#endif
> > +
> > +static inline unsigned long read_fpr(unsigned int fd)
> > +{
> > +#define READ_FPR(fd, __value) \
> > + __asm__ __volatile__("movfr2gr.d %0, $f"#fd"\n\t" : "=r"(__value));
> > +
> > + unsigned long __value;
> > +
> > + switch (fd) {
> > + case 0:
> > + READ_FPR(0, __value);
> > + break;
> > + case 1:
> > + READ_FPR(1, __value);
> > + break;
> > + case 2:
> > + READ_FPR(2, __value);
> > + break;
> > + case 3:
> > + READ_FPR(3, __value);
> > + break;
> > + case 4:
> > + READ_FPR(4, __value);
> > + break;
> > + case 5:
> > + READ_FPR(5, __value);
> > + break;
> > + case 6:
> > + READ_FPR(6, __value);
> > + break;
> > + case 7:
> > + READ_FPR(7, __value);
> > + break;
> > + case 8:
> > + READ_FPR(8, __value);
> > + break;
> > + case 9:
> > + READ_FPR(9, __value);
> > + break;
> > + case 10:
> > + READ_FPR(10, __value);
> > + break;
> > + case 11:
> > + READ_FPR(11, __value);
> > + break;
> > + case 12:
> > + READ_FPR(12, __value);
> > + break;
> > + case 13:
> > + READ_FPR(13, __value);
> > + break;
> > + case 14:
> > + READ_FPR(14, __value);
> > + break;
> > + case 15:
> > + READ_FPR(15, __value);
> > + break;
> > + case 16:
> > + READ_FPR(16, __value);
> > + break;
> > + case 17:
> > + READ_FPR(17, __value);
> > + break;
> > + case 18:
> > + READ_FPR(18, __value);
> > + break;
> > + case 19:
> > + READ_FPR(19, __value);
> > + break;
> > + case 20:
> > + READ_FPR(20, __value);
> > + break;
> > + case 21:
> > + READ_FPR(21, __value);
> > + break;
> > + case 22:
> > + READ_FPR(22, __value);
> > + break;
> > + case 23:
> > + READ_FPR(23, __value);
> > + break;
> > + case 24:
> > + READ_FPR(24, __value);
> > + break;
> > + case 25:
> > + READ_FPR(25, __value);
> > + break;
> > + case 26:
> > + READ_FPR(26, __value);
> > + break;
> > + case 27:
> > + READ_FPR(27, __value);
> > + break;
> > + case 28:
> > + READ_FPR(28, __value);
> > + break;
> > + case 29:
> > + READ_FPR(29, __value);
> > + break;
> > + case 30:
> > + READ_FPR(30, __value);
> > + break;
> > + case 31:
> > + READ_FPR(31, __value);
> > + break;
> > + default:
> > + panic("unexpected fd '%d'", fd);
>
> So this is a bit misleading, I was thinking of file descriptors when I
> was reading Feiyang's review comments... maybe something as simple as
> "idx", "num" or "regno" will do?
OK, idx is better.

>
> > + }
> > +#undef READ_FPR
> > + return __value;
> > +}
> > +
> > +static inline void write_fpr(unsigned int fd, unsigned long value)
> > +{
> > +#define WRITE_FPR(fd, value) \
> > + __asm__ __volatile__("movgr2fr.d $f"#fd", %0\n\t" :: "r"(value));
> > +
> > + switch (fd) {
> > + case 0:
> > + WRITE_FPR(0, value);
> > + break;
> > + case 1:
> > + WRITE_FPR(1, value);
> > + break;
> > + case 2:
> > + WRITE_FPR(2, value);
> > + break;
> > + case 3:
> > + WRITE_FPR(3, value);
> > + break;
> > + case 4:
> > + WRITE_FPR(4, value);
> > + break;
> > + case 5:
> > + WRITE_FPR(5, value);
> > + break;
> > + case 6:
> > + WRITE_FPR(6, value);
> > + break;
> > + case 7:
> > + WRITE_FPR(7, value);
> > + break;
> > + case 8:
> > + WRITE_FPR(8, value);
> > + break;
> > + case 9:
> > + WRITE_FPR(9, value);
> > + break;
> > + case 10:
> > + WRITE_FPR(10, value);
> > + break;
> > + case 11:
> > + WRITE_FPR(11, value);
> > + break;
> > + case 12:
> > + WRITE_FPR(12, value);
> > + break;
> > + case 13:
> > + WRITE_FPR(13, value);
> > + break;
> > + case 14:
> > + WRITE_FPR(14, value);
> > + break;
> > + case 15:
> > + WRITE_FPR(15, value);
> > + break;
> > + case 16:
> > + WRITE_FPR(16, value);
> > + break;
> > + case 17:
> > + WRITE_FPR(17, value);
> > + break;
> > + case 18:
> > + WRITE_FPR(18, value);
> > + break;
> > + case 19:
> > + WRITE_FPR(19, value);
> > + break;
> > + case 20:
> > + WRITE_FPR(20, value);
> > + break;
> > + case 21:
> > + WRITE_FPR(21, value);
> > + break;
> > + case 22:
> > + WRITE_FPR(22, value);
> > + break;
> > + case 23:
> > + WRITE_FPR(23, value);
> > + break;
> > + case 24:
> > + WRITE_FPR(24, value);
> > + break;
> > + case 25:
> > + WRITE_FPR(25, value);
> > + break;
> > + case 26:
> > + WRITE_FPR(26, value);
> > + break;
> > + case 27:
> > + WRITE_FPR(27, value);
> > + break;
> > + case 28:
> > + WRITE_FPR(28, value);
> > + break;
> > + case 29:
> > + WRITE_FPR(29, value);
> > + break;
> > + case 30:
> > + WRITE_FPR(30, value);
> > + break;
> > + case 31:
> > + WRITE_FPR(31, value);
> > + break;
> > + default:
> > + panic("unexpected fd '%d'", fd);
> > + }
> > +#undef WRITE_FPR
> > +}
> > +
> > +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc)
> > +{
> > + bool user = user_mode(regs);
> > + unsigned int res;
> > + unsigned long origpc;
> > + unsigned long origra;
> > + unsigned long value = 0;
> > + union loongarch_instruction insn;
> > +
> > + origpc = (unsigned long)pc;
> > + origra = regs->regs[1];
> > +
> > + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
> > +
> > + /*
> > + * This load never faults.
> > + */
> > + __get_inst(&insn.word, pc, user);
> > + if (user && !access_ok(addr, 8))
> > + goto sigbus;
> > +
> > + if (insn.reg2i12_format.opcode == ldd_op ||
> > + insn.reg2i14_format.opcode == ldptrd_op ||
> > + insn.reg3_format.opcode == ldxd_op) {
> > + res = unaligned_read(addr, &value, 8, 1);
> > + if (res)
> > + goto fault;
> > + regs->regs[insn.reg2i12_format.rd] = value;
> > + } else if (insn.reg2i12_format.opcode == ldw_op ||
> > + insn.reg2i14_format.opcode == ldptrw_op ||
> > + insn.reg3_format.opcode == ldxw_op) {
> > + res = unaligned_read(addr, &value, 4, 1);
> > + if (res)
> > + goto fault;
> > + regs->regs[insn.reg2i12_format.rd] = value;
> > + } else if (insn.reg2i12_format.opcode == ldwu_op ||
> > + insn.reg3_format.opcode == ldxwu_op) {
> > + res = unaligned_read(addr, &value, 4, 0);
> > + if (res)
> > + goto fault;
> > + regs->regs[insn.reg2i12_format.rd] = value;
> > + } else if (insn.reg2i12_format.opcode == ldh_op ||
> > + insn.reg3_format.opcode == ldxh_op) {
> > + res = unaligned_read(addr, &value, 2, 1);
> > + if (res)
> > + goto fault;
> > + regs->regs[insn.reg2i12_format.rd] = value;
> > + } else if (insn.reg2i12_format.opcode == ldhu_op ||
> > + insn.reg3_format.opcode == ldxhu_op) {
> > + res = unaligned_read(addr, &value, 2, 0);
> > + if (res)
> > + goto fault;
> > + regs->regs[insn.reg2i12_format.rd] = value;
> > + } else if (insn.reg2i12_format.opcode == std_op ||
> > + insn.reg2i14_format.opcode == stptrd_op ||
> > + insn.reg3_format.opcode == stxd_op) {
> > + value = regs->regs[insn.reg2i12_format.rd];
> > + res = unaligned_write(addr, value, 8);
> > + if (res)
> > + goto fault;
> > + } else if (insn.reg2i12_format.opcode == stw_op ||
> > + insn.reg2i14_format.opcode == stptrw_op ||
> > + insn.reg3_format.opcode == stxw_op) {
> > + value = regs->regs[insn.reg2i12_format.rd];
> > + res = unaligned_write(addr, value, 4);
> > + if (res)
> > + goto fault;
> > + } else if (insn.reg2i12_format.opcode == sth_op ||
> > + insn.reg3_format.opcode == stxh_op) {
> > + value = regs->regs[insn.reg2i12_format.rd];
> > + res = unaligned_write(addr, value, 2);
> > + if (res)
> > + goto fault;
> > + } else if (insn.reg2i12_format.opcode == fldd_op ||
> > + insn.reg3_format.opcode == fldxd_op) {
> > + res = unaligned_read(addr, &value, 8, 1);
> > + if (res)
> > + goto fault;
> > + write_fpr(insn.reg2i12_format.rd, value);
> > + } else if (insn.reg2i12_format.opcode == flds_op ||
> > + insn.reg3_format.opcode == fldxs_op) {
> > + res = unaligned_read(addr, &value, 4, 1);
> > + if (res)
> > + goto fault;
> > + write_fpr(insn.reg2i12_format.rd, value);
> > + } else if (insn.reg2i12_format.opcode == fstd_op ||
> > + insn.reg3_format.opcode == fstxd_op) {
> > + value = read_fpr(insn.reg2i12_format.rd);
> > + res = unaligned_write(addr, value, 8);
> > + if (res)
> > + goto fault;
> > + } else if (insn.reg2i12_format.opcode == fsts_op ||
> > + insn.reg3_format.opcode == fstxs_op) {
> > + value = read_fpr(insn.reg2i12_format.rd);
> > + res = unaligned_write(addr, value, 4);
> > + if (res)
> > + goto fault;
> > + } else
> > + goto sigbus;
> > +
> > +
> > +#ifdef CONFIG_DEBUG_FS
> > + if (user)
> > + unaligned_instructions_user++;
> > + else
> > + unaligned_instructions_kernel++;
> > +#endif
> > +
> > + compute_return_era(regs);
> > + return;
> > +
> > +fault:
> > + /* roll back jump/branch */
> > + regs->csr_era = origpc;
> > + regs->regs[1] = origra;
> > + /* Did we have an exception handler installed? */
> > + if (fixup_exception(regs))
> > + return;
> > +
> > + die_if_kernel("Unhandled kernel unaligned access", regs);
> > + force_sig(SIGSEGV);
> > +
> > + return;
> > +
> > +sigbus:
> > + die_if_kernel("Unhandled kernel unaligned access", regs);
> > + force_sig(SIGBUS);
> > +
> > + return;
> > +}
> > +
> > +#ifdef CONFIG_DEBUG_FS
> > +static int __init debugfs_unaligned(void)
> > +{
> > + struct dentry *d;
> > +
> > + d = debugfs_create_dir("loongarch", NULL);
> > + if (!d)
> > + return -ENOMEM;
> > +
> > + debugfs_create_u32("unaligned_instructions_user",
> > + S_IRUGO, d, &unaligned_instructions_user);
> > + debugfs_create_u32("unaligned_instructions_kernel",
> > + S_IRUGO, d, &unaligned_instructions_kernel);
> > +
> > + return 0;
> > +}
> > +arch_initcall(debugfs_unaligned);
> > +#endif
> > diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
> > index e36635fccb69..867895530340 100644
> > --- a/arch/loongarch/lib/Makefile
> > +++ b/arch/loongarch/lib/Makefile
> > @@ -3,4 +3,4 @@
> > # Makefile for LoongArch-specific library files.
> > #
> >
> > -lib-y += delay.o clear_user.o copy_user.o dump_tlb.o
> > +lib-y += delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o
> > diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S
> > new file mode 100644
> > index 000000000000..03210cb5a18d
> > --- /dev/null
> > +++ b/arch/loongarch/lib/unaligned.S
> > @@ -0,0 +1,93 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
> > + */
> > +
> > +#include <linux/linkage.h>
> > +
> > +#include <asm/asm.h>
> > +#include <asm/asmmacro.h>
> > +#include <asm/errno.h>
> > +#include <asm/export.h>
> > +#include <asm/regdef.h>
> > +
> > +.macro fixup_ex from, to, fix
> > +.if \fix
> > + .section .fixup, "ax"
> > +\to: li.w a0, -EFAULT
> > + jr ra
> > + .previous
> > +.endif
> > + .section __ex_table, "a"
> > + PTR \from\()b, \to\()b
> > + .previous
> > +.endm
> > +
> > +/*
> > + * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign)
> > + *
> > + * a0: addr
> > + * a1: value
> > + * a2: n
> > + * a3: sign
> > + */
> > +SYM_FUNC_START(unaligned_read)
> > + beqz a2, 5f
> > +
> > + li.w t1, 8
> > + li.w t2, 0
> > +
> > + addi.d t0, a2, -1
> > + mul.d t1, t0, t1
>
> Remove the `t1 = 8` above, then `slli.d t1, t0, 3` here would be enough
> and one cycle is saved.
OK, thanks.

Huacai
>
> > + add.d a0, a0, t0
> > +
> > + beq a3, zero, 2f
>
> beqz
>
> > +1: ld.b t3, a0, 0
> > + b 3f
> > +
> > +2: ld.bu t3, a0, 0
> > +3: sll.d t3, t3, t1
> > + or t2, t2, t3
> > + addi.d t1, t1, -8 > + addi.d a0, a0, -1
> > + addi.d a2, a2, -1
> > + bgt a2, zero, 2b
>
> bgtz
>
> > +4: st.d t2, a1, 0
> > +
> > + move a0, a2
> > + jr ra
> > +
> > +5: li.w a0, -EFAULT
> > + jr ra
> > +
> > + fixup_ex 1, 6, 1
> > + fixup_ex 2, 6, 0
> > + fixup_ex 4, 6, 0
> > +SYM_FUNC_END(unaligned_read)
> > +
> > +/*
> > + * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n)
> > + *
> > + * a0: addr
> > + * a1: value
> > + * a2: n
> > + */
> > +SYM_FUNC_START(unaligned_write)
> > + beqz a2, 3f
> > +
> > + li.w t0, 0
> > +1: srl.d t1, a1, t0
> > +2: st.b t1, a0, 0
> > + addi.d t0, t0, 8
> > + addi.d a2, a2, -1
> > + addi.d a0, a0, 1
> > + bgt a2, zero, 1b
>
> bgtz
>
> > +
> > + move a0, a2
> > + jr ra
> > +
> > +3: li.w a0, -EFAULT
> > + jr ra
> > +
> > + fixup_ex 2, 4, 1
> > +SYM_FUNC_END(unaligned_write)
>
> --
> WANG "xen0n" Xuerui
>
> Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/
>
>

2022-10-17 07:56:00

by Huacai Chen

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

Hi, Jinyang,

On Mon, Oct 17, 2022 at 12:22 PM Jinyang He <[email protected]> wrote:
>
> Hi, Huacai,
>
>
> On 2022/10/17 上午10:23, Huacai Chen wrote:
> > [...]
> > + default:
> > + panic("unexpected fd '%d'", fd);
> Due to the optimization of gcc, the panic() is unused actually and leave
> the symbol 'read/write_fpr' in vmlinux. Maybe we can use unreachable() and
>
> always_inline.
Seems impossible, I have tried __always_inline() and BUILD_BUG(), then
BUILD_BUG() is triggered, because the reg-number is not a compile time
constant.

>
> > [...]
> > +
> > +fault:
> > + /* roll back jump/branch */
> > + regs->csr_era = origpc;
> > + regs->regs[1] = origra;
>
> I'm not sure where the csr_era and regs[1] was damaged...
Yes, seems not be damaged.

>
> > [...]
> >
> > +/*
> > + * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign)
> > + *
> > + * a0: addr
> > + * a1: value
> > + * a2: n
> > + * a3: sign
> > + */
> > +SYM_FUNC_START(unaligned_read)
> > + beqz a2, 5f
> > +
> > + li.w t1, 8
> IMHO we can avoid the constant reg t1.
OK, thanks.

> > + li.w t2, 0
> > +
> > + addi.d t0, a2, -1
> > + mul.d t1, t0, t1
> > + add.d a0, a0, t0
> > +
> > + beq a3, zero, 2f
> beqz
OK, thanks.

> > +1: ld.b t3, a0, 0
> > + b 3f
> > +
> > +2: ld.bu t3, a0, 0
> > +3: sll.d t3, t3, t1
> > + or t2, t2, t3
> > + addi.d t1, t1, -8
> > + addi.d a0, a0, -1
> > + addi.d a2, a2, -1
> > + bgt a2, zero, 2b
> bgtz
> > +4: st.d t2, a1, 0
> > +
> > + move a0, a2
> > + jr ra
> > +
> > +5: li.w a0, -EFAULT
> > + jr ra
> > +
> > + fixup_ex 1, 6, 1
> > + fixup_ex 2, 6, 0
> > + fixup_ex 4, 6, 0
> > +SYM_FUNC_END(unaligned_read)
> > +
> > +/*
> > + * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n)
> > + *
> > + * a0: addr
> > + * a1: value
> > + * a2: n
> > + */
> > +SYM_FUNC_START(unaligned_write)
> > + beqz a2, 3f
> > +
> > + li.w t0, 0
> > +1: srl.d t1, a1, t0
> > +2: st.b t1, a0, 0
> > + addi.d t0, t0, 8
> > + addi.d a2, a2, -1
> > + addi.d a0, a0, 1
> > + bgt a2, zero, 1b
> bgtz
OK, thanks.

> > +
> > + move a0, a2
> > + jr ra
> > +
> > +3: li.w a0, -EFAULT
> > + jr ra
> > +
> > + fixup_ex 2, 4, 1
> > +SYM_FUNC_END(unaligned_write)
>
> Thanks,
>
> Jinyang
>

2022-10-17 09:33:15

by WANG Rui

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

Hi Huacai,

On Mon, Oct 17, 2022 at 10:25 AM Huacai Chen <[email protected]> wrote:
>
> Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
> unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
> Loongson-3C5000) are configurable whether support unaligned access in
> hardware. This patch add unaligned access emulation for those LoongArch
> processors without hardware support.
>
> Signed-off-by: Huacai Chen <[email protected]>
> ---
> V2: Simplify READ_FPR and WRITE_FPR.
>
> arch/loongarch/Kconfig | 2 +
> arch/loongarch/include/asm/inst.h | 14 ++
> arch/loongarch/kernel/Makefile | 3 +-
> arch/loongarch/kernel/traps.c | 27 ++
> arch/loongarch/kernel/unaligned.c | 393 ++++++++++++++++++++++++++++++
> arch/loongarch/lib/Makefile | 2 +-
> arch/loongarch/lib/unaligned.S | 93 +++++++
> 7 files changed, 532 insertions(+), 2 deletions(-)
> create mode 100644 arch/loongarch/kernel/unaligned.c
> create mode 100644 arch/loongarch/lib/unaligned.S
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 0a6ef613124c..a8dc58e8162a 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -122,6 +122,8 @@ config LOONGARCH
> select RTC_LIB
> select SMP
> select SPARSE_IRQ
> + select SYSCTL_ARCH_UNALIGN_ALLOW
> + select SYSCTL_ARCH_UNALIGN_NO_WARN
> select SYSCTL_EXCEPTION_TRACE
> select SWIOTLB
> select TRACE_IRQFLAGS_SUPPORT
> diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
> index fce1843ceebb..e96b5345f389 100644
> --- a/arch/loongarch/include/asm/inst.h
> +++ b/arch/loongarch/include/asm/inst.h
> @@ -76,6 +76,10 @@ enum reg2i12_op {
> ldbu_op = 0xa8,
> ldhu_op = 0xa9,
> ldwu_op = 0xaa,
> + flds_op = 0xac,
> + fsts_op = 0xad,
> + fldd_op = 0xae,
> + fstd_op = 0xaf,
> };
>
> enum reg2i14_op {
> @@ -146,6 +150,10 @@ enum reg3_op {
> ldxbu_op = 0x7040,
> ldxhu_op = 0x7048,
> ldxwu_op = 0x7050,
> + fldxs_op = 0x7060,
> + fldxd_op = 0x7068,
> + fstxs_op = 0x7070,
> + fstxd_op = 0x7078,
> amswapw_op = 0x70c0,
> amswapd_op = 0x70c1,
> amaddw_op = 0x70c2,
> @@ -566,4 +574,10 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \
>
> DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op)
>
> +struct pt_regs;
> +
> +unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign);
> +unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n);
> +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc);
> +
> #endif /* _ASM_INST_H */
> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> index 42be564278fa..2ad2555b53ea 100644
> --- a/arch/loongarch/kernel/Makefile
> +++ b/arch/loongarch/kernel/Makefile
> @@ -7,7 +7,8 @@ extra-y := vmlinux.lds
>
> obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
> traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
> - elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o
> + elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \
> + unaligned.o
>
> obj-$(CONFIG_ACPI) += acpi.o
> obj-$(CONFIG_EFI) += efi.o
> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> index 1a4dce84ebc6..7ea62faeeadb 100644
> --- a/arch/loongarch/kernel/traps.c
> +++ b/arch/loongarch/kernel/traps.c
> @@ -368,13 +368,40 @@ asmlinkage void noinstr do_ade(struct pt_regs *regs)
> irqentry_exit(regs, state);
> }
>
> +/* sysctl hooks */
> +int unaligned_enabled __read_mostly = 1; /* Enabled by default */
> +int no_unaligned_warning __read_mostly = 1; /* Only 1 warning by default */
> +
> asmlinkage void noinstr do_ale(struct pt_regs *regs)
> {
> + unsigned int *pc;
> irqentry_state_t state = irqentry_enter(regs);
>
> + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr);
> +
> + /*
> + * Did we catch a fault trying to load an instruction?
> + */
> + if (regs->csr_badvaddr == regs->csr_era)
> + goto sigbus;
> + if (user_mode(regs) && !test_thread_flag(TIF_FIXADE))
> + goto sigbus;
> + if (!unaligned_enabled)
> + goto sigbus;
> + if (!no_unaligned_warning)
> + show_registers(regs);
> +
> + pc = (unsigned int *)exception_era(regs);
> +
> + emulate_load_store_insn(regs, (void __user *)regs->csr_badvaddr, pc);
> +
> + goto out;
> +
> +sigbus:
> die_if_kernel("Kernel ale access", regs);
> force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
>
> +out:
> irqentry_exit(regs, state);
> }
>
> diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c
> new file mode 100644
> index 000000000000..f367424b762a
> --- /dev/null
> +++ b/arch/loongarch/kernel/unaligned.c
> @@ -0,0 +1,393 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Handle unaligned accesses by emulation.
> + *
> + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
> + *
> + */
> +#include <linux/mm.h>
> +#include <linux/sched.h>
> +#include <linux/signal.h>
> +#include <linux/debugfs.h>
> +#include <linux/perf_event.h>
> +
> +#include <asm/asm.h>
> +#include <asm/branch.h>
> +#include <asm/fpu.h>
> +#include <asm/inst.h>
> +
> +#include "access-helper.h"
> +
> +#ifdef CONFIG_DEBUG_FS
> +static u32 unaligned_instructions_user;
> +static u32 unaligned_instructions_kernel;
> +#endif
> +
> +static inline unsigned long read_fpr(unsigned int fd)
> +{
> +#define READ_FPR(fd, __value) \
> + __asm__ __volatile__("movfr2gr.d %0, $f"#fd"\n\t" : "=r"(__value));
> +
> + unsigned long __value;
> +
> + switch (fd) {
> + case 0:
> + READ_FPR(0, __value);
> + break;
> + case 1:
> + READ_FPR(1, __value);
> + break;
> + case 2:
> + READ_FPR(2, __value);
> + break;
> + case 3:
> + READ_FPR(3, __value);
> + break;
> + case 4:
> + READ_FPR(4, __value);
> + break;
> + case 5:
> + READ_FPR(5, __value);
> + break;
> + case 6:
> + READ_FPR(6, __value);
> + break;
> + case 7:
> + READ_FPR(7, __value);
> + break;
> + case 8:
> + READ_FPR(8, __value);
> + break;
> + case 9:
> + READ_FPR(9, __value);
> + break;
> + case 10:
> + READ_FPR(10, __value);
> + break;
> + case 11:
> + READ_FPR(11, __value);
> + break;
> + case 12:
> + READ_FPR(12, __value);
> + break;
> + case 13:
> + READ_FPR(13, __value);
> + break;
> + case 14:
> + READ_FPR(14, __value);
> + break;
> + case 15:
> + READ_FPR(15, __value);
> + break;
> + case 16:
> + READ_FPR(16, __value);
> + break;
> + case 17:
> + READ_FPR(17, __value);
> + break;
> + case 18:
> + READ_FPR(18, __value);
> + break;
> + case 19:
> + READ_FPR(19, __value);
> + break;
> + case 20:
> + READ_FPR(20, __value);
> + break;
> + case 21:
> + READ_FPR(21, __value);
> + break;
> + case 22:
> + READ_FPR(22, __value);
> + break;
> + case 23:
> + READ_FPR(23, __value);
> + break;
> + case 24:
> + READ_FPR(24, __value);
> + break;
> + case 25:
> + READ_FPR(25, __value);
> + break;
> + case 26:
> + READ_FPR(26, __value);
> + break;
> + case 27:
> + READ_FPR(27, __value);
> + break;
> + case 28:
> + READ_FPR(28, __value);
> + break;
> + case 29:
> + READ_FPR(29, __value);
> + break;
> + case 30:
> + READ_FPR(30, __value);
> + break;
> + case 31:
> + READ_FPR(31, __value);
> + break;
> + default:
> + panic("unexpected fd '%d'", fd);
> + }
> +#undef READ_FPR
> + return __value;
> +}
> +
> +static inline void write_fpr(unsigned int fd, unsigned long value)
> +{
> +#define WRITE_FPR(fd, value) \
> + __asm__ __volatile__("movgr2fr.d $f"#fd", %0\n\t" :: "r"(value));
> +
> + switch (fd) {
> + case 0:
> + WRITE_FPR(0, value);
> + break;
> + case 1:
> + WRITE_FPR(1, value);
> + break;
> + case 2:
> + WRITE_FPR(2, value);
> + break;
> + case 3:
> + WRITE_FPR(3, value);
> + break;
> + case 4:
> + WRITE_FPR(4, value);
> + break;
> + case 5:
> + WRITE_FPR(5, value);
> + break;
> + case 6:
> + WRITE_FPR(6, value);
> + break;
> + case 7:
> + WRITE_FPR(7, value);
> + break;
> + case 8:
> + WRITE_FPR(8, value);
> + break;
> + case 9:
> + WRITE_FPR(9, value);
> + break;
> + case 10:
> + WRITE_FPR(10, value);
> + break;
> + case 11:
> + WRITE_FPR(11, value);
> + break;
> + case 12:
> + WRITE_FPR(12, value);
> + break;
> + case 13:
> + WRITE_FPR(13, value);
> + break;
> + case 14:
> + WRITE_FPR(14, value);
> + break;
> + case 15:
> + WRITE_FPR(15, value);
> + break;
> + case 16:
> + WRITE_FPR(16, value);
> + break;
> + case 17:
> + WRITE_FPR(17, value);
> + break;
> + case 18:
> + WRITE_FPR(18, value);
> + break;
> + case 19:
> + WRITE_FPR(19, value);
> + break;
> + case 20:
> + WRITE_FPR(20, value);
> + break;
> + case 21:
> + WRITE_FPR(21, value);
> + break;
> + case 22:
> + WRITE_FPR(22, value);
> + break;
> + case 23:
> + WRITE_FPR(23, value);
> + break;
> + case 24:
> + WRITE_FPR(24, value);
> + break;
> + case 25:
> + WRITE_FPR(25, value);
> + break;
> + case 26:
> + WRITE_FPR(26, value);
> + break;
> + case 27:
> + WRITE_FPR(27, value);
> + break;
> + case 28:
> + WRITE_FPR(28, value);
> + break;
> + case 29:
> + WRITE_FPR(29, value);
> + break;
> + case 30:
> + WRITE_FPR(30, value);
> + break;
> + case 31:
> + WRITE_FPR(31, value);
> + break;
> + default:
> + panic("unexpected fd '%d'", fd);
> + }
> +#undef WRITE_FPR
> +}
> +
> +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc)
> +{
> + bool user = user_mode(regs);
> + unsigned int res;
> + unsigned long origpc;
> + unsigned long origra;
> + unsigned long value = 0;
> + union loongarch_instruction insn;
> +
> + origpc = (unsigned long)pc;
> + origra = regs->regs[1];
> +
> + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
> +
> + /*
> + * This load never faults.
> + */
> + __get_inst(&insn.word, pc, user);
> + if (user && !access_ok(addr, 8))
> + goto sigbus;
> +
> + if (insn.reg2i12_format.opcode == ldd_op ||
> + insn.reg2i14_format.opcode == ldptrd_op ||
> + insn.reg3_format.opcode == ldxd_op) {
> + res = unaligned_read(addr, &value, 8, 1);
> + if (res)
> + goto fault;
> + regs->regs[insn.reg2i12_format.rd] = value;
> + } else if (insn.reg2i12_format.opcode == ldw_op ||
> + insn.reg2i14_format.opcode == ldptrw_op ||
> + insn.reg3_format.opcode == ldxw_op) {
> + res = unaligned_read(addr, &value, 4, 1);
> + if (res)
> + goto fault;
> + regs->regs[insn.reg2i12_format.rd] = value;
> + } else if (insn.reg2i12_format.opcode == ldwu_op ||
> + insn.reg3_format.opcode == ldxwu_op) {
> + res = unaligned_read(addr, &value, 4, 0);
> + if (res)
> + goto fault;
> + regs->regs[insn.reg2i12_format.rd] = value;
> + } else if (insn.reg2i12_format.opcode == ldh_op ||
> + insn.reg3_format.opcode == ldxh_op) {
> + res = unaligned_read(addr, &value, 2, 1);
> + if (res)
> + goto fault;
> + regs->regs[insn.reg2i12_format.rd] = value;
> + } else if (insn.reg2i12_format.opcode == ldhu_op ||
> + insn.reg3_format.opcode == ldxhu_op) {
> + res = unaligned_read(addr, &value, 2, 0);
> + if (res)
> + goto fault;
> + regs->regs[insn.reg2i12_format.rd] = value;
> + } else if (insn.reg2i12_format.opcode == std_op ||
> + insn.reg2i14_format.opcode == stptrd_op ||
> + insn.reg3_format.opcode == stxd_op) {
> + value = regs->regs[insn.reg2i12_format.rd];
> + res = unaligned_write(addr, value, 8);
> + if (res)
> + goto fault;
> + } else if (insn.reg2i12_format.opcode == stw_op ||
> + insn.reg2i14_format.opcode == stptrw_op ||
> + insn.reg3_format.opcode == stxw_op) {
> + value = regs->regs[insn.reg2i12_format.rd];
> + res = unaligned_write(addr, value, 4);
> + if (res)
> + goto fault;
> + } else if (insn.reg2i12_format.opcode == sth_op ||
> + insn.reg3_format.opcode == stxh_op) {
> + value = regs->regs[insn.reg2i12_format.rd];
> + res = unaligned_write(addr, value, 2);
> + if (res)
> + goto fault;
> + } else if (insn.reg2i12_format.opcode == fldd_op ||
> + insn.reg3_format.opcode == fldxd_op) {
> + res = unaligned_read(addr, &value, 8, 1);
> + if (res)
> + goto fault;
> + write_fpr(insn.reg2i12_format.rd, value);
> + } else if (insn.reg2i12_format.opcode == flds_op ||
> + insn.reg3_format.opcode == fldxs_op) {
> + res = unaligned_read(addr, &value, 4, 1);
> + if (res)
> + goto fault;
> + write_fpr(insn.reg2i12_format.rd, value);
> + } else if (insn.reg2i12_format.opcode == fstd_op ||
> + insn.reg3_format.opcode == fstxd_op) {
> + value = read_fpr(insn.reg2i12_format.rd);
> + res = unaligned_write(addr, value, 8);
> + if (res)
> + goto fault;
> + } else if (insn.reg2i12_format.opcode == fsts_op ||
> + insn.reg3_format.opcode == fstxs_op) {
> + value = read_fpr(insn.reg2i12_format.rd);
> + res = unaligned_write(addr, value, 4);
> + if (res)
> + goto fault;
> + } else
> + goto sigbus;

So many condtional branches for linear instruction matching, use
switch case is better?

0000000000000238 <emulate_load_store_insn>:
...
35c: 1470180f lu12i.w $t3, 229568(0x380c0)
360: 580141af beq $t1, $t3, 320(0x140) # 4a0
<emulate_load_store_insn+0x268>
364: 1451000f lu12i.w $t3, 165888(0x28800)
368: 5801dd8f beq $t0, $t3, 476(0x1dc) # 544
<emulate_load_store_insn+0x30c>
36c: 0280900f addi.w $t3, $zero, 36(0x24)
370: 5801d5cf beq $t2, $t3, 468(0x1d4) # 544
<emulate_load_store_insn+0x30c>
374: 1470100f lu12i.w $t3, 229504(0x38080)
378: 5801cdaf beq $t1, $t3, 460(0x1cc) # 544
<emulate_load_store_insn+0x30c>
37c: 1455000f lu12i.w $t3, 174080(0x2a800)
380: 5801f18f beq $t0, $t3, 496(0x1f0) # 570
<emulate_load_store_insn+0x338>
384: 1470500f lu12i.w $t3, 230016(0x38280)
388: 5801e9af beq $t1, $t3, 488(0x1e8) # 570
<emulate_load_store_insn+0x338>
38c: 1450800f lu12i.w $t3, 164864(0x28400)
390: 5801ed8f beq $t0, $t3, 492(0x1ec) # 57c
<emulate_load_store_insn+0x344>
394: 1470080f lu12i.w $t3, 229440(0x38040)
398: 5801e5af beq $t1, $t3, 484(0x1e4) # 57c
<emulate_load_store_insn+0x344>
39c: 1454800f lu12i.w $t3, 173056(0x2a400)
3a0: 5801f98f beq $t0, $t3, 504(0x1f8) # 598
<emulate_load_store_insn+0x360>
3a4: 1470480f lu12i.w $t3, 229952(0x38240)
3a8: 5801f1af beq $t1, $t3, 496(0x1f0) # 598
<emulate_load_store_insn+0x360>
3ac: 1453800f lu12i.w $t3, 171008(0x29c00)
3b0: 5802098f beq $t0, $t3, 520(0x208) # 5b8
<emulate_load_store_insn+0x380>
3b4: 02809c0f addi.w $t3, $zero, 39(0x27)
3b8: 580201cf beq $t2, $t3, 512(0x200) # 5b8
<emulate_load_store_insn+0x380>
3bc: 1470380f lu12i.w $t3, 229824(0x381c0)
3c0: 5801f9af beq $t1, $t3, 504(0x1f8) # 5b8
<emulate_load_store_insn+0x380>
3c4: 1453000f lu12i.w $t3, 169984(0x29800)
3c8: 58021d8f beq $t0, $t3, 540(0x21c) # 5e4
<emulate_load_store_insn+0x3ac>
3cc: 0280940f addi.w $t3, $zero, 37(0x25)
3d0: 580215cf beq $t2, $t3, 532(0x214) # 5e4
<emulate_load_store_insn+0x3ac>
3d4: 1470300e lu12i.w $t2, 229760(0x38180)
3d8: 58020dae beq $t1, $t2, 524(0x20c) # 5e4
<emulate_load_store_insn+0x3ac>
3dc: 1452800e lu12i.w $t2, 168960(0x29400)
3e0: 58020d8e beq $t0, $t2, 524(0x20c) # 5ec
<emulate_load_store_insn+0x3b4>
3e4: 1470280e lu12i.w $t2, 229696(0x38140)
3e8: 580205ae beq $t1, $t2, 516(0x204) # 5ec
<emulate_load_store_insn+0x3b4>
3ec: 1457000e lu12i.w $t2, 178176(0x2b800)
3f0: 5802058e beq $t0, $t2, 516(0x204) # 5f4
<emulate_load_store_insn+0x3bc>
3f4: 1470680e lu12i.w $t2, 230208(0x38340)
3f8: 5801fdae beq $t1, $t2, 508(0x1fc) # 5f4
<emulate_load_store_insn+0x3bc>
3fc: 1456000e lu12i.w $t2, 176128(0x2b000)
400: 5802258e beq $t0, $t2, 548(0x224) # 624
<emulate_load_store_insn+0x3ec>
404: 1470600e lu12i.w $t2, 230144(0x38300)
408: 58021dae beq $t1, $t2, 540(0x21c) # 624
<emulate_load_store_insn+0x3ec>
40c: 1457800e lu12i.w $t2, 179200(0x2bc00)
410: 5802218e beq $t0, $t2, 544(0x220) # 630
<emulate_load_store_insn+0x3f8>
414: 1470780e lu12i.w $t2, 230336(0x383c0)
418: 580219ae beq $t1, $t2, 536(0x218) # 630
<emulate_load_store_insn+0x3f8>
41c: 1456800e lu12i.w $t2, 177152(0x2b400)
420: 58000d8e beq $t0, $t2, 12(0xc) # 42c
<emulate_load_store_insn+0x1f4>
424: 1470700c lu12i.w $t0, 230272(0x38380)
428: 5ffed9ac bne $t1, $t0, -296(0x3fed8) # 300
<emulate_load_store_insn+0xc8>
...

Regards,
Rui

> +
> +
> +#ifdef CONFIG_DEBUG_FS
> + if (user)
> + unaligned_instructions_user++;
> + else
> + unaligned_instructions_kernel++;
> +#endif
> +
> + compute_return_era(regs);
> + return;
> +
> +fault:
> + /* roll back jump/branch */
> + regs->csr_era = origpc;
> + regs->regs[1] = origra;
> + /* Did we have an exception handler installed? */
> + if (fixup_exception(regs))
> + return;
> +
> + die_if_kernel("Unhandled kernel unaligned access", regs);
> + force_sig(SIGSEGV);
> +
> + return;
> +
> +sigbus:
> + die_if_kernel("Unhandled kernel unaligned access", regs);
> + force_sig(SIGBUS);
> +
> + return;
> +}
> +
> +#ifdef CONFIG_DEBUG_FS
> +static int __init debugfs_unaligned(void)
> +{
> + struct dentry *d;
> +
> + d = debugfs_create_dir("loongarch", NULL);
> + if (!d)
> + return -ENOMEM;
> +
> + debugfs_create_u32("unaligned_instructions_user",
> + S_IRUGO, d, &unaligned_instructions_user);
> + debugfs_create_u32("unaligned_instructions_kernel",
> + S_IRUGO, d, &unaligned_instructions_kernel);
> +
> + return 0;
> +}
> +arch_initcall(debugfs_unaligned);
> +#endif
> diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
> index e36635fccb69..867895530340 100644
> --- a/arch/loongarch/lib/Makefile
> +++ b/arch/loongarch/lib/Makefile
> @@ -3,4 +3,4 @@
> # Makefile for LoongArch-specific library files.
> #
>
> -lib-y += delay.o clear_user.o copy_user.o dump_tlb.o
> +lib-y += delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o
> diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S
> new file mode 100644
> index 000000000000..03210cb5a18d
> --- /dev/null
> +++ b/arch/loongarch/lib/unaligned.S
> @@ -0,0 +1,93 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
> + */
> +
> +#include <linux/linkage.h>
> +
> +#include <asm/asm.h>
> +#include <asm/asmmacro.h>
> +#include <asm/errno.h>
> +#include <asm/export.h>
> +#include <asm/regdef.h>
> +
> +.macro fixup_ex from, to, fix
> +.if \fix
> + .section .fixup, "ax"
> +\to: li.w a0, -EFAULT
> + jr ra
> + .previous
> +.endif
> + .section __ex_table, "a"
> + PTR \from\()b, \to\()b
> + .previous
> +.endm
> +
> +/*
> + * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign)
> + *
> + * a0: addr
> + * a1: value
> + * a2: n
> + * a3: sign
> + */
> +SYM_FUNC_START(unaligned_read)
> + beqz a2, 5f
> +
> + li.w t1, 8
> + li.w t2, 0
> +
> + addi.d t0, a2, -1
> + mul.d t1, t0, t1
> + add.d a0, a0, t0
> +
> + beq a3, zero, 2f
> +1: ld.b t3, a0, 0
> + b 3f
> +
> +2: ld.bu t3, a0, 0
> +3: sll.d t3, t3, t1
> + or t2, t2, t3
> + addi.d t1, t1, -8
> + addi.d a0, a0, -1
> + addi.d a2, a2, -1
> + bgt a2, zero, 2b
> +4: st.d t2, a1, 0
> +
> + move a0, a2
> + jr ra
> +
> +5: li.w a0, -EFAULT
> + jr ra
> +
> + fixup_ex 1, 6, 1
> + fixup_ex 2, 6, 0
> + fixup_ex 4, 6, 0
> +SYM_FUNC_END(unaligned_read)
> +
> +/*
> + * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n)
> + *
> + * a0: addr
> + * a1: value
> + * a2: n
> + */
> +SYM_FUNC_START(unaligned_write)
> + beqz a2, 3f
> +
> + li.w t0, 0
> +1: srl.d t1, a1, t0
> +2: st.b t1, a0, 0
> + addi.d t0, t0, 8
> + addi.d a2, a2, -1
> + addi.d a0, a0, 1
> + bgt a2, zero, 1b
> +
> + move a0, a2
> + jr ra
> +
> +3: li.w a0, -EFAULT
> + jr ra
> +
> + fixup_ex 2, 4, 1
> +SYM_FUNC_END(unaligned_write)
> --
> 2.31.1
>

2022-10-17 09:57:29

by Huacai Chen

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

On Mon, Oct 17, 2022 at 5:00 PM Rui Wang <[email protected]> wrote:
>
> Hi Huacai,
>
> On Mon, Oct 17, 2022 at 10:25 AM Huacai Chen <[email protected]> wrote:
> >
> > Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
> > unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
> > Loongson-3C5000) are configurable whether support unaligned access in
> > hardware. This patch add unaligned access emulation for those LoongArch
> > processors without hardware support.
> >
> > Signed-off-by: Huacai Chen <[email protected]>
> > ---
> > V2: Simplify READ_FPR and WRITE_FPR.
> >
> > arch/loongarch/Kconfig | 2 +
> > arch/loongarch/include/asm/inst.h | 14 ++
> > arch/loongarch/kernel/Makefile | 3 +-
> > arch/loongarch/kernel/traps.c | 27 ++
> > arch/loongarch/kernel/unaligned.c | 393 ++++++++++++++++++++++++++++++
> > arch/loongarch/lib/Makefile | 2 +-
> > arch/loongarch/lib/unaligned.S | 93 +++++++
> > 7 files changed, 532 insertions(+), 2 deletions(-)
> > create mode 100644 arch/loongarch/kernel/unaligned.c
> > create mode 100644 arch/loongarch/lib/unaligned.S
> >
> > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> > index 0a6ef613124c..a8dc58e8162a 100644
> > --- a/arch/loongarch/Kconfig
> > +++ b/arch/loongarch/Kconfig
> > @@ -122,6 +122,8 @@ config LOONGARCH
> > select RTC_LIB
> > select SMP
> > select SPARSE_IRQ
> > + select SYSCTL_ARCH_UNALIGN_ALLOW
> > + select SYSCTL_ARCH_UNALIGN_NO_WARN
> > select SYSCTL_EXCEPTION_TRACE
> > select SWIOTLB
> > select TRACE_IRQFLAGS_SUPPORT
> > diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
> > index fce1843ceebb..e96b5345f389 100644
> > --- a/arch/loongarch/include/asm/inst.h
> > +++ b/arch/loongarch/include/asm/inst.h
> > @@ -76,6 +76,10 @@ enum reg2i12_op {
> > ldbu_op = 0xa8,
> > ldhu_op = 0xa9,
> > ldwu_op = 0xaa,
> > + flds_op = 0xac,
> > + fsts_op = 0xad,
> > + fldd_op = 0xae,
> > + fstd_op = 0xaf,
> > };
> >
> > enum reg2i14_op {
> > @@ -146,6 +150,10 @@ enum reg3_op {
> > ldxbu_op = 0x7040,
> > ldxhu_op = 0x7048,
> > ldxwu_op = 0x7050,
> > + fldxs_op = 0x7060,
> > + fldxd_op = 0x7068,
> > + fstxs_op = 0x7070,
> > + fstxd_op = 0x7078,
> > amswapw_op = 0x70c0,
> > amswapd_op = 0x70c1,
> > amaddw_op = 0x70c2,
> > @@ -566,4 +574,10 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \
> >
> > DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op)
> >
> > +struct pt_regs;
> > +
> > +unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign);
> > +unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n);
> > +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc);
> > +
> > #endif /* _ASM_INST_H */
> > diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
> > index 42be564278fa..2ad2555b53ea 100644
> > --- a/arch/loongarch/kernel/Makefile
> > +++ b/arch/loongarch/kernel/Makefile
> > @@ -7,7 +7,8 @@ extra-y := vmlinux.lds
> >
> > obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
> > traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
> > - elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o
> > + elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \
> > + unaligned.o
> >
> > obj-$(CONFIG_ACPI) += acpi.o
> > obj-$(CONFIG_EFI) += efi.o
> > diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> > index 1a4dce84ebc6..7ea62faeeadb 100644
> > --- a/arch/loongarch/kernel/traps.c
> > +++ b/arch/loongarch/kernel/traps.c
> > @@ -368,13 +368,40 @@ asmlinkage void noinstr do_ade(struct pt_regs *regs)
> > irqentry_exit(regs, state);
> > }
> >
> > +/* sysctl hooks */
> > +int unaligned_enabled __read_mostly = 1; /* Enabled by default */
> > +int no_unaligned_warning __read_mostly = 1; /* Only 1 warning by default */
> > +
> > asmlinkage void noinstr do_ale(struct pt_regs *regs)
> > {
> > + unsigned int *pc;
> > irqentry_state_t state = irqentry_enter(regs);
> >
> > + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr);
> > +
> > + /*
> > + * Did we catch a fault trying to load an instruction?
> > + */
> > + if (regs->csr_badvaddr == regs->csr_era)
> > + goto sigbus;
> > + if (user_mode(regs) && !test_thread_flag(TIF_FIXADE))
> > + goto sigbus;
> > + if (!unaligned_enabled)
> > + goto sigbus;
> > + if (!no_unaligned_warning)
> > + show_registers(regs);
> > +
> > + pc = (unsigned int *)exception_era(regs);
> > +
> > + emulate_load_store_insn(regs, (void __user *)regs->csr_badvaddr, pc);
> > +
> > + goto out;
> > +
> > +sigbus:
> > die_if_kernel("Kernel ale access", regs);
> > force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
> >
> > +out:
> > irqentry_exit(regs, state);
> > }
> >
> > diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c
> > new file mode 100644
> > index 000000000000..f367424b762a
> > --- /dev/null
> > +++ b/arch/loongarch/kernel/unaligned.c
> > @@ -0,0 +1,393 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Handle unaligned accesses by emulation.
> > + *
> > + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
> > + *
> > + */
> > +#include <linux/mm.h>
> > +#include <linux/sched.h>
> > +#include <linux/signal.h>
> > +#include <linux/debugfs.h>
> > +#include <linux/perf_event.h>
> > +
> > +#include <asm/asm.h>
> > +#include <asm/branch.h>
> > +#include <asm/fpu.h>
> > +#include <asm/inst.h>
> > +
> > +#include "access-helper.h"
> > +
> > +#ifdef CONFIG_DEBUG_FS
> > +static u32 unaligned_instructions_user;
> > +static u32 unaligned_instructions_kernel;
> > +#endif
> > +
> > +static inline unsigned long read_fpr(unsigned int fd)
> > +{
> > +#define READ_FPR(fd, __value) \
> > + __asm__ __volatile__("movfr2gr.d %0, $f"#fd"\n\t" : "=r"(__value));
> > +
> > + unsigned long __value;
> > +
> > + switch (fd) {
> > + case 0:
> > + READ_FPR(0, __value);
> > + break;
> > + case 1:
> > + READ_FPR(1, __value);
> > + break;
> > + case 2:
> > + READ_FPR(2, __value);
> > + break;
> > + case 3:
> > + READ_FPR(3, __value);
> > + break;
> > + case 4:
> > + READ_FPR(4, __value);
> > + break;
> > + case 5:
> > + READ_FPR(5, __value);
> > + break;
> > + case 6:
> > + READ_FPR(6, __value);
> > + break;
> > + case 7:
> > + READ_FPR(7, __value);
> > + break;
> > + case 8:
> > + READ_FPR(8, __value);
> > + break;
> > + case 9:
> > + READ_FPR(9, __value);
> > + break;
> > + case 10:
> > + READ_FPR(10, __value);
> > + break;
> > + case 11:
> > + READ_FPR(11, __value);
> > + break;
> > + case 12:
> > + READ_FPR(12, __value);
> > + break;
> > + case 13:
> > + READ_FPR(13, __value);
> > + break;
> > + case 14:
> > + READ_FPR(14, __value);
> > + break;
> > + case 15:
> > + READ_FPR(15, __value);
> > + break;
> > + case 16:
> > + READ_FPR(16, __value);
> > + break;
> > + case 17:
> > + READ_FPR(17, __value);
> > + break;
> > + case 18:
> > + READ_FPR(18, __value);
> > + break;
> > + case 19:
> > + READ_FPR(19, __value);
> > + break;
> > + case 20:
> > + READ_FPR(20, __value);
> > + break;
> > + case 21:
> > + READ_FPR(21, __value);
> > + break;
> > + case 22:
> > + READ_FPR(22, __value);
> > + break;
> > + case 23:
> > + READ_FPR(23, __value);
> > + break;
> > + case 24:
> > + READ_FPR(24, __value);
> > + break;
> > + case 25:
> > + READ_FPR(25, __value);
> > + break;
> > + case 26:
> > + READ_FPR(26, __value);
> > + break;
> > + case 27:
> > + READ_FPR(27, __value);
> > + break;
> > + case 28:
> > + READ_FPR(28, __value);
> > + break;
> > + case 29:
> > + READ_FPR(29, __value);
> > + break;
> > + case 30:
> > + READ_FPR(30, __value);
> > + break;
> > + case 31:
> > + READ_FPR(31, __value);
> > + break;
> > + default:
> > + panic("unexpected fd '%d'", fd);
> > + }
> > +#undef READ_FPR
> > + return __value;
> > +}
> > +
> > +static inline void write_fpr(unsigned int fd, unsigned long value)
> > +{
> > +#define WRITE_FPR(fd, value) \
> > + __asm__ __volatile__("movgr2fr.d $f"#fd", %0\n\t" :: "r"(value));
> > +
> > + switch (fd) {
> > + case 0:
> > + WRITE_FPR(0, value);
> > + break;
> > + case 1:
> > + WRITE_FPR(1, value);
> > + break;
> > + case 2:
> > + WRITE_FPR(2, value);
> > + break;
> > + case 3:
> > + WRITE_FPR(3, value);
> > + break;
> > + case 4:
> > + WRITE_FPR(4, value);
> > + break;
> > + case 5:
> > + WRITE_FPR(5, value);
> > + break;
> > + case 6:
> > + WRITE_FPR(6, value);
> > + break;
> > + case 7:
> > + WRITE_FPR(7, value);
> > + break;
> > + case 8:
> > + WRITE_FPR(8, value);
> > + break;
> > + case 9:
> > + WRITE_FPR(9, value);
> > + break;
> > + case 10:
> > + WRITE_FPR(10, value);
> > + break;
> > + case 11:
> > + WRITE_FPR(11, value);
> > + break;
> > + case 12:
> > + WRITE_FPR(12, value);
> > + break;
> > + case 13:
> > + WRITE_FPR(13, value);
> > + break;
> > + case 14:
> > + WRITE_FPR(14, value);
> > + break;
> > + case 15:
> > + WRITE_FPR(15, value);
> > + break;
> > + case 16:
> > + WRITE_FPR(16, value);
> > + break;
> > + case 17:
> > + WRITE_FPR(17, value);
> > + break;
> > + case 18:
> > + WRITE_FPR(18, value);
> > + break;
> > + case 19:
> > + WRITE_FPR(19, value);
> > + break;
> > + case 20:
> > + WRITE_FPR(20, value);
> > + break;
> > + case 21:
> > + WRITE_FPR(21, value);
> > + break;
> > + case 22:
> > + WRITE_FPR(22, value);
> > + break;
> > + case 23:
> > + WRITE_FPR(23, value);
> > + break;
> > + case 24:
> > + WRITE_FPR(24, value);
> > + break;
> > + case 25:
> > + WRITE_FPR(25, value);
> > + break;
> > + case 26:
> > + WRITE_FPR(26, value);
> > + break;
> > + case 27:
> > + WRITE_FPR(27, value);
> > + break;
> > + case 28:
> > + WRITE_FPR(28, value);
> > + break;
> > + case 29:
> > + WRITE_FPR(29, value);
> > + break;
> > + case 30:
> > + WRITE_FPR(30, value);
> > + break;
> > + case 31:
> > + WRITE_FPR(31, value);
> > + break;
> > + default:
> > + panic("unexpected fd '%d'", fd);
> > + }
> > +#undef WRITE_FPR
> > +}
> > +
> > +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc)
> > +{
> > + bool user = user_mode(regs);
> > + unsigned int res;
> > + unsigned long origpc;
> > + unsigned long origra;
> > + unsigned long value = 0;
> > + union loongarch_instruction insn;
> > +
> > + origpc = (unsigned long)pc;
> > + origra = regs->regs[1];
> > +
> > + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
> > +
> > + /*
> > + * This load never faults.
> > + */
> > + __get_inst(&insn.word, pc, user);
> > + if (user && !access_ok(addr, 8))
> > + goto sigbus;
> > +
> > + if (insn.reg2i12_format.opcode == ldd_op ||
> > + insn.reg2i14_format.opcode == ldptrd_op ||
> > + insn.reg3_format.opcode == ldxd_op) {
> > + res = unaligned_read(addr, &value, 8, 1);
> > + if (res)
> > + goto fault;
> > + regs->regs[insn.reg2i12_format.rd] = value;
> > + } else if (insn.reg2i12_format.opcode == ldw_op ||
> > + insn.reg2i14_format.opcode == ldptrw_op ||
> > + insn.reg3_format.opcode == ldxw_op) {
> > + res = unaligned_read(addr, &value, 4, 1);
> > + if (res)
> > + goto fault;
> > + regs->regs[insn.reg2i12_format.rd] = value;
> > + } else if (insn.reg2i12_format.opcode == ldwu_op ||
> > + insn.reg3_format.opcode == ldxwu_op) {
> > + res = unaligned_read(addr, &value, 4, 0);
> > + if (res)
> > + goto fault;
> > + regs->regs[insn.reg2i12_format.rd] = value;
> > + } else if (insn.reg2i12_format.opcode == ldh_op ||
> > + insn.reg3_format.opcode == ldxh_op) {
> > + res = unaligned_read(addr, &value, 2, 1);
> > + if (res)
> > + goto fault;
> > + regs->regs[insn.reg2i12_format.rd] = value;
> > + } else if (insn.reg2i12_format.opcode == ldhu_op ||
> > + insn.reg3_format.opcode == ldxhu_op) {
> > + res = unaligned_read(addr, &value, 2, 0);
> > + if (res)
> > + goto fault;
> > + regs->regs[insn.reg2i12_format.rd] = value;
> > + } else if (insn.reg2i12_format.opcode == std_op ||
> > + insn.reg2i14_format.opcode == stptrd_op ||
> > + insn.reg3_format.opcode == stxd_op) {
> > + value = regs->regs[insn.reg2i12_format.rd];
> > + res = unaligned_write(addr, value, 8);
> > + if (res)
> > + goto fault;
> > + } else if (insn.reg2i12_format.opcode == stw_op ||
> > + insn.reg2i14_format.opcode == stptrw_op ||
> > + insn.reg3_format.opcode == stxw_op) {
> > + value = regs->regs[insn.reg2i12_format.rd];
> > + res = unaligned_write(addr, value, 4);
> > + if (res)
> > + goto fault;
> > + } else if (insn.reg2i12_format.opcode == sth_op ||
> > + insn.reg3_format.opcode == stxh_op) {
> > + value = regs->regs[insn.reg2i12_format.rd];
> > + res = unaligned_write(addr, value, 2);
> > + if (res)
> > + goto fault;
> > + } else if (insn.reg2i12_format.opcode == fldd_op ||
> > + insn.reg3_format.opcode == fldxd_op) {
> > + res = unaligned_read(addr, &value, 8, 1);
> > + if (res)
> > + goto fault;
> > + write_fpr(insn.reg2i12_format.rd, value);
> > + } else if (insn.reg2i12_format.opcode == flds_op ||
> > + insn.reg3_format.opcode == fldxs_op) {
> > + res = unaligned_read(addr, &value, 4, 1);
> > + if (res)
> > + goto fault;
> > + write_fpr(insn.reg2i12_format.rd, value);
> > + } else if (insn.reg2i12_format.opcode == fstd_op ||
> > + insn.reg3_format.opcode == fstxd_op) {
> > + value = read_fpr(insn.reg2i12_format.rd);
> > + res = unaligned_write(addr, value, 8);
> > + if (res)
> > + goto fault;
> > + } else if (insn.reg2i12_format.opcode == fsts_op ||
> > + insn.reg3_format.opcode == fstxs_op) {
> > + value = read_fpr(insn.reg2i12_format.rd);
> > + res = unaligned_write(addr, value, 4);
> > + if (res)
> > + goto fault;
> > + } else
> > + goto sigbus;
>
> So many condtional branches for linear instruction matching, use
> switch case is better?
I think emulation is not performance-critical code, and the code here
is difficult to convert to switch-case.

Huacai
>
> 0000000000000238 <emulate_load_store_insn>:
> ...
> 35c: 1470180f lu12i.w $t3, 229568(0x380c0)
> 360: 580141af beq $t1, $t3, 320(0x140) # 4a0
> <emulate_load_store_insn+0x268>
> 364: 1451000f lu12i.w $t3, 165888(0x28800)
> 368: 5801dd8f beq $t0, $t3, 476(0x1dc) # 544
> <emulate_load_store_insn+0x30c>
> 36c: 0280900f addi.w $t3, $zero, 36(0x24)
> 370: 5801d5cf beq $t2, $t3, 468(0x1d4) # 544
> <emulate_load_store_insn+0x30c>
> 374: 1470100f lu12i.w $t3, 229504(0x38080)
> 378: 5801cdaf beq $t1, $t3, 460(0x1cc) # 544
> <emulate_load_store_insn+0x30c>
> 37c: 1455000f lu12i.w $t3, 174080(0x2a800)
> 380: 5801f18f beq $t0, $t3, 496(0x1f0) # 570
> <emulate_load_store_insn+0x338>
> 384: 1470500f lu12i.w $t3, 230016(0x38280)
> 388: 5801e9af beq $t1, $t3, 488(0x1e8) # 570
> <emulate_load_store_insn+0x338>
> 38c: 1450800f lu12i.w $t3, 164864(0x28400)
> 390: 5801ed8f beq $t0, $t3, 492(0x1ec) # 57c
> <emulate_load_store_insn+0x344>
> 394: 1470080f lu12i.w $t3, 229440(0x38040)
> 398: 5801e5af beq $t1, $t3, 484(0x1e4) # 57c
> <emulate_load_store_insn+0x344>
> 39c: 1454800f lu12i.w $t3, 173056(0x2a400)
> 3a0: 5801f98f beq $t0, $t3, 504(0x1f8) # 598
> <emulate_load_store_insn+0x360>
> 3a4: 1470480f lu12i.w $t3, 229952(0x38240)
> 3a8: 5801f1af beq $t1, $t3, 496(0x1f0) # 598
> <emulate_load_store_insn+0x360>
> 3ac: 1453800f lu12i.w $t3, 171008(0x29c00)
> 3b0: 5802098f beq $t0, $t3, 520(0x208) # 5b8
> <emulate_load_store_insn+0x380>
> 3b4: 02809c0f addi.w $t3, $zero, 39(0x27)
> 3b8: 580201cf beq $t2, $t3, 512(0x200) # 5b8
> <emulate_load_store_insn+0x380>
> 3bc: 1470380f lu12i.w $t3, 229824(0x381c0)
> 3c0: 5801f9af beq $t1, $t3, 504(0x1f8) # 5b8
> <emulate_load_store_insn+0x380>
> 3c4: 1453000f lu12i.w $t3, 169984(0x29800)
> 3c8: 58021d8f beq $t0, $t3, 540(0x21c) # 5e4
> <emulate_load_store_insn+0x3ac>
> 3cc: 0280940f addi.w $t3, $zero, 37(0x25)
> 3d0: 580215cf beq $t2, $t3, 532(0x214) # 5e4
> <emulate_load_store_insn+0x3ac>
> 3d4: 1470300e lu12i.w $t2, 229760(0x38180)
> 3d8: 58020dae beq $t1, $t2, 524(0x20c) # 5e4
> <emulate_load_store_insn+0x3ac>
> 3dc: 1452800e lu12i.w $t2, 168960(0x29400)
> 3e0: 58020d8e beq $t0, $t2, 524(0x20c) # 5ec
> <emulate_load_store_insn+0x3b4>
> 3e4: 1470280e lu12i.w $t2, 229696(0x38140)
> 3e8: 580205ae beq $t1, $t2, 516(0x204) # 5ec
> <emulate_load_store_insn+0x3b4>
> 3ec: 1457000e lu12i.w $t2, 178176(0x2b800)
> 3f0: 5802058e beq $t0, $t2, 516(0x204) # 5f4
> <emulate_load_store_insn+0x3bc>
> 3f4: 1470680e lu12i.w $t2, 230208(0x38340)
> 3f8: 5801fdae beq $t1, $t2, 508(0x1fc) # 5f4
> <emulate_load_store_insn+0x3bc>
> 3fc: 1456000e lu12i.w $t2, 176128(0x2b000)
> 400: 5802258e beq $t0, $t2, 548(0x224) # 624
> <emulate_load_store_insn+0x3ec>
> 404: 1470600e lu12i.w $t2, 230144(0x38300)
> 408: 58021dae beq $t1, $t2, 540(0x21c) # 624
> <emulate_load_store_insn+0x3ec>
> 40c: 1457800e lu12i.w $t2, 179200(0x2bc00)
> 410: 5802218e beq $t0, $t2, 544(0x220) # 630
> <emulate_load_store_insn+0x3f8>
> 414: 1470780e lu12i.w $t2, 230336(0x383c0)
> 418: 580219ae beq $t1, $t2, 536(0x218) # 630
> <emulate_load_store_insn+0x3f8>
> 41c: 1456800e lu12i.w $t2, 177152(0x2b400)
> 420: 58000d8e beq $t0, $t2, 12(0xc) # 42c
> <emulate_load_store_insn+0x1f4>
> 424: 1470700c lu12i.w $t0, 230272(0x38380)
> 428: 5ffed9ac bne $t1, $t0, -296(0x3fed8) # 300
> <emulate_load_store_insn+0xc8>
> ...
>
> Regards,
> Rui
>
> > +
> > +
> > +#ifdef CONFIG_DEBUG_FS
> > + if (user)
> > + unaligned_instructions_user++;
> > + else
> > + unaligned_instructions_kernel++;
> > +#endif
> > +
> > + compute_return_era(regs);
> > + return;
> > +
> > +fault:
> > + /* roll back jump/branch */
> > + regs->csr_era = origpc;
> > + regs->regs[1] = origra;
> > + /* Did we have an exception handler installed? */
> > + if (fixup_exception(regs))
> > + return;
> > +
> > + die_if_kernel("Unhandled kernel unaligned access", regs);
> > + force_sig(SIGSEGV);
> > +
> > + return;
> > +
> > +sigbus:
> > + die_if_kernel("Unhandled kernel unaligned access", regs);
> > + force_sig(SIGBUS);
> > +
> > + return;
> > +}
> > +
> > +#ifdef CONFIG_DEBUG_FS
> > +static int __init debugfs_unaligned(void)
> > +{
> > + struct dentry *d;
> > +
> > + d = debugfs_create_dir("loongarch", NULL);
> > + if (!d)
> > + return -ENOMEM;
> > +
> > + debugfs_create_u32("unaligned_instructions_user",
> > + S_IRUGO, d, &unaligned_instructions_user);
> > + debugfs_create_u32("unaligned_instructions_kernel",
> > + S_IRUGO, d, &unaligned_instructions_kernel);
> > +
> > + return 0;
> > +}
> > +arch_initcall(debugfs_unaligned);
> > +#endif
> > diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
> > index e36635fccb69..867895530340 100644
> > --- a/arch/loongarch/lib/Makefile
> > +++ b/arch/loongarch/lib/Makefile
> > @@ -3,4 +3,4 @@
> > # Makefile for LoongArch-specific library files.
> > #
> >
> > -lib-y += delay.o clear_user.o copy_user.o dump_tlb.o
> > +lib-y += delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o
> > diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S
> > new file mode 100644
> > index 000000000000..03210cb5a18d
> > --- /dev/null
> > +++ b/arch/loongarch/lib/unaligned.S
> > @@ -0,0 +1,93 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
> > + */
> > +
> > +#include <linux/linkage.h>
> > +
> > +#include <asm/asm.h>
> > +#include <asm/asmmacro.h>
> > +#include <asm/errno.h>
> > +#include <asm/export.h>
> > +#include <asm/regdef.h>
> > +
> > +.macro fixup_ex from, to, fix
> > +.if \fix
> > + .section .fixup, "ax"
> > +\to: li.w a0, -EFAULT
> > + jr ra
> > + .previous
> > +.endif
> > + .section __ex_table, "a"
> > + PTR \from\()b, \to\()b
> > + .previous
> > +.endm
> > +
> > +/*
> > + * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign)
> > + *
> > + * a0: addr
> > + * a1: value
> > + * a2: n
> > + * a3: sign
> > + */
> > +SYM_FUNC_START(unaligned_read)
> > + beqz a2, 5f
> > +
> > + li.w t1, 8
> > + li.w t2, 0
> > +
> > + addi.d t0, a2, -1
> > + mul.d t1, t0, t1
> > + add.d a0, a0, t0
> > +
> > + beq a3, zero, 2f
> > +1: ld.b t3, a0, 0
> > + b 3f
> > +
> > +2: ld.bu t3, a0, 0
> > +3: sll.d t3, t3, t1
> > + or t2, t2, t3
> > + addi.d t1, t1, -8
> > + addi.d a0, a0, -1
> > + addi.d a2, a2, -1
> > + bgt a2, zero, 2b
> > +4: st.d t2, a1, 0
> > +
> > + move a0, a2
> > + jr ra
> > +
> > +5: li.w a0, -EFAULT
> > + jr ra
> > +
> > + fixup_ex 1, 6, 1
> > + fixup_ex 2, 6, 0
> > + fixup_ex 4, 6, 0
> > +SYM_FUNC_END(unaligned_read)
> > +
> > +/*
> > + * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n)
> > + *
> > + * a0: addr
> > + * a1: value
> > + * a2: n
> > + */
> > +SYM_FUNC_START(unaligned_write)
> > + beqz a2, 3f
> > +
> > + li.w t0, 0
> > +1: srl.d t1, a1, t0
> > +2: st.b t1, a0, 0
> > + addi.d t0, t0, 8
> > + addi.d a2, a2, -1
> > + addi.d a0, a0, 1
> > + bgt a2, zero, 1b
> > +
> > + move a0, a2
> > + jr ra
> > +
> > +3: li.w a0, -EFAULT
> > + jr ra
> > +
> > + fixup_ex 2, 4, 1
> > +SYM_FUNC_END(unaligned_write)
> > --
> > 2.31.1
> >
>

2022-10-17 10:07:32

by WANG Xuerui

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

On 2022/10/17 16:59, Rui Wang wrote:
>> +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc)
>> +{
>> + bool user = user_mode(regs);
>> + unsigned int res;
>> + unsigned long origpc;
>> + unsigned long origra;
>> + unsigned long value = 0;
>> + union loongarch_instruction insn;
>> +
>> + origpc = (unsigned long)pc;
>> + origra = regs->regs[1];
>> +
>> + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
>> +
>> + /*
>> + * This load never faults.
>> + */
>> + __get_inst(&insn.word, pc, user);
>> + if (user && !access_ok(addr, 8))
>> + goto sigbus;
>> +
>> + if (insn.reg2i12_format.opcode == ldd_op ||
>> + insn.reg2i14_format.opcode == ldptrd_op ||
>> + insn.reg3_format.opcode == ldxd_op) {
>> + res = unaligned_read(addr, &value, 8, 1);
>> + if (res)
>> + goto fault;
>> + regs->regs[insn.reg2i12_format.rd] = value;
>> + } else if (insn.reg2i12_format.opcode == ldw_op ||
>> + insn.reg2i14_format.opcode == ldptrw_op ||
>> + insn.reg3_format.opcode == ldxw_op) {
>> + res = unaligned_read(addr, &value, 4, 1);
>> + if (res)
>> + goto fault;
>> + regs->regs[insn.reg2i12_format.rd] = value;
>> + } else if (insn.reg2i12_format.opcode == ldwu_op ||
>> + insn.reg3_format.opcode == ldxwu_op) {
>> + res = unaligned_read(addr, &value, 4, 0);
>> + if (res)
>> + goto fault;
>> + regs->regs[insn.reg2i12_format.rd] = value;
>> + } else if (insn.reg2i12_format.opcode == ldh_op ||
>> + insn.reg3_format.opcode == ldxh_op) {
>> + res = unaligned_read(addr, &value, 2, 1);
>> + if (res)
>> + goto fault;
>> + regs->regs[insn.reg2i12_format.rd] = value;
>> + } else if (insn.reg2i12_format.opcode == ldhu_op ||
>> + insn.reg3_format.opcode == ldxhu_op) {
>> + res = unaligned_read(addr, &value, 2, 0);
>> + if (res)
>> + goto fault;
>> + regs->regs[insn.reg2i12_format.rd] = value;
>> + } else if (insn.reg2i12_format.opcode == std_op ||
>> + insn.reg2i14_format.opcode == stptrd_op ||
>> + insn.reg3_format.opcode == stxd_op) {
>> + value = regs->regs[insn.reg2i12_format.rd];
>> + res = unaligned_write(addr, value, 8);
>> + if (res)
>> + goto fault;
>> + } else if (insn.reg2i12_format.opcode == stw_op ||
>> + insn.reg2i14_format.opcode == stptrw_op ||
>> + insn.reg3_format.opcode == stxw_op) {
>> + value = regs->regs[insn.reg2i12_format.rd];
>> + res = unaligned_write(addr, value, 4);
>> + if (res)
>> + goto fault;
>> + } else if (insn.reg2i12_format.opcode == sth_op ||
>> + insn.reg3_format.opcode == stxh_op) {
>> + value = regs->regs[insn.reg2i12_format.rd];
>> + res = unaligned_write(addr, value, 2);
>> + if (res)
>> + goto fault;
>> + } else if (insn.reg2i12_format.opcode == fldd_op ||
>> + insn.reg3_format.opcode == fldxd_op) {
>> + res = unaligned_read(addr, &value, 8, 1);
>> + if (res)
>> + goto fault;
>> + write_fpr(insn.reg2i12_format.rd, value);
>> + } else if (insn.reg2i12_format.opcode == flds_op ||
>> + insn.reg3_format.opcode == fldxs_op) {
>> + res = unaligned_read(addr, &value, 4, 1);
>> + if (res)
>> + goto fault;
>> + write_fpr(insn.reg2i12_format.rd, value);
>> + } else if (insn.reg2i12_format.opcode == fstd_op ||
>> + insn.reg3_format.opcode == fstxd_op) {
>> + value = read_fpr(insn.reg2i12_format.rd);
>> + res = unaligned_write(addr, value, 8);
>> + if (res)
>> + goto fault;
>> + } else if (insn.reg2i12_format.opcode == fsts_op ||
>> + insn.reg3_format.opcode == fstxs_op) {
>> + value = read_fpr(insn.reg2i12_format.rd);
>> + res = unaligned_write(addr, value, 4);
>> + if (res)
>> + goto fault;
>> + } else
>> + goto sigbus;
>
> So many condtional branches for linear instruction matching, use
> switch case is better?
>
> 0000000000000238 <emulate_load_store_insn>:
> ...
> 35c: 1470180f lu12i.w $t3, 229568(0x380c0)
> 360: 580141af beq $t1, $t3, 320(0x140) # 4a0
> <emulate_load_store_insn+0x268>
> 364: 1451000f lu12i.w $t3, 165888(0x28800)
> 368: 5801dd8f beq $t0, $t3, 476(0x1dc) # 544
> [snip]

The code structure here is basically several switches intermingled with
each other, each matching opcodes for one insn format, but sharing much
code once matched. Quite difficult to express with C without some kind
of code duplication.

But we can try:

{
int size;
bool is_read, is_signed;

switch (insn.reg2i12_format.opcode) {
case ldd_op:
size = 8;
is_read = true;
is_signed = true;
break;

case std_op:
size = 8;
is_read = false;
value = regs->regs[insn.reg2i12_format.rd];
break;

case ldw_op:
size = 4;
is_read = true;
is_signed = true;
break;

case ldwu_op:
size = 4;
is_read = true;
is_signed = false;
break;

/* other opcodes */

default:
/* not 2RI12 */
break;
}

switch (insn.reg2i14_format.opcode) {
case ldptrd_op:
size = 8;
is_read = true;
is_signed = true;
break;

case ldptrw_op:
size = 4;
is_read = true;
is_signed = true;
break;

// ...
}

// other formats

if (!size) {
/* no match */
goto sigbus;
}

if (is_read)
res = unaligned_read(addr, &value, size, is_signed);
else
res = unaligned_write(addr, value, size);

if (res)
goto fault;

// ...
}

This way at least the data flow is clearer, and probably the codegen
quality would benefit as well due to the clearer switch-case structures.
(It's okay if this is not the case, it's not performance-critical anyway
because if we ever hit this at all, performance would have already tanked.)

--
WANG "xen0n" Xuerui

Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/

2022-10-17 13:07:11

by David Laight

[permalink] [raw]
Subject: RE: [PATCH V2] LoongArch: Add unaligned access support

From: Huacai Chen
> Sent: 17 October 2022 03:24
>
> Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
> unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
> Loongson-3C5000) are configurable whether support unaligned access in
> hardware. This patch add unaligned access emulation for those LoongArch
> processors without hardware support.
>
...
> + /*
> + * This load never faults.
> + */
> + __get_inst(&insn.word, pc, user);

On what basis does it never fault?
Any user access can fault.
If nothing else another thread of the process can unmap
the page.

> + if (user && !access_ok(addr, 8))
> + goto sigbus;

Surely that is technically wrong - a two or four byte
access is valid right at the end of valid user addreeses.

> +
> + if (insn.reg2i12_format.opcode == ldd_op ||
> + insn.reg2i14_format.opcode == ldptrd_op ||
> + insn.reg3_format.opcode == ldxd_op) {
> + res = unaligned_read(addr, &value, 8, 1);

That is the most horrid indentation of long lines I've
ever seen.
I'd also guess you can common up some of this code
by looking at the instruction field that include the
transfer width.

The long elsif list will generate horrid code.
But maybe since you've just taken a fault it really
doesn't matter.
Indeed just emulating in C using byte accesses
it probably fine.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2022-10-17 14:34:24

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

Hi Huacai,

I love your patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.1-rc1 next-20221017]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Huacai-Chen/LoongArch-Add-unaligned-access-support/20221017-115912
patch link: https://lore.kernel.org/r/20221017022330.2383060-1-chenhuacai%40loongson.cn
patch subject: [PATCH V2] LoongArch: Add unaligned access support
config: loongarch-randconfig-s031-20221017
compiler: loongarch64-linux-gcc (GCC) 12.1.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.4-39-gce1a6720-dirty
# https://github.com/intel-lab-lkp/linux/commit/43203da95f81c534f5d756124e52f9915a18c08e
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Huacai-Chen/LoongArch-Add-unaligned-access-support/20221017-115912
git checkout 43203da95f81c534f5d756124e52f9915a18c08e
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=loongarch SHELL=/bin/bash arch/loongarch/kernel/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

sparse warnings: (new ones prefixed by >>)
>> arch/loongarch/kernel/unaligned.c:269:38: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __user *addr @@
arch/loongarch/kernel/unaligned.c:269:38: sparse: expected void *addr
arch/loongarch/kernel/unaligned.c:269:38: sparse: got void [noderef] __user *addr
arch/loongarch/kernel/unaligned.c:276:38: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __user *addr @@
arch/loongarch/kernel/unaligned.c:276:38: sparse: expected void *addr
arch/loongarch/kernel/unaligned.c:276:38: sparse: got void [noderef] __user *addr
arch/loongarch/kernel/unaligned.c:282:38: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __user *addr @@
arch/loongarch/kernel/unaligned.c:282:38: sparse: expected void *addr
arch/loongarch/kernel/unaligned.c:282:38: sparse: got void [noderef] __user *addr
arch/loongarch/kernel/unaligned.c:288:38: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __user *addr @@
arch/loongarch/kernel/unaligned.c:288:38: sparse: expected void *addr
arch/loongarch/kernel/unaligned.c:288:38: sparse: got void [noderef] __user *addr
arch/loongarch/kernel/unaligned.c:294:38: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __user *addr @@
arch/loongarch/kernel/unaligned.c:294:38: sparse: expected void *addr
arch/loongarch/kernel/unaligned.c:294:38: sparse: got void [noderef] __user *addr
arch/loongarch/kernel/unaligned.c:302:39: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __user *addr @@
arch/loongarch/kernel/unaligned.c:302:39: sparse: expected void *addr
arch/loongarch/kernel/unaligned.c:302:39: sparse: got void [noderef] __user *addr
arch/loongarch/kernel/unaligned.c:309:39: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __user *addr @@
arch/loongarch/kernel/unaligned.c:309:39: sparse: expected void *addr
arch/loongarch/kernel/unaligned.c:309:39: sparse: got void [noderef] __user *addr
arch/loongarch/kernel/unaligned.c:315:39: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __user *addr @@
arch/loongarch/kernel/unaligned.c:315:39: sparse: expected void *addr
arch/loongarch/kernel/unaligned.c:315:39: sparse: got void [noderef] __user *addr
arch/loongarch/kernel/unaligned.c:320:38: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __user *addr @@
arch/loongarch/kernel/unaligned.c:320:38: sparse: expected void *addr
arch/loongarch/kernel/unaligned.c:320:38: sparse: got void [noderef] __user *addr
arch/loongarch/kernel/unaligned.c:326:38: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __user *addr @@
arch/loongarch/kernel/unaligned.c:326:38: sparse: expected void *addr
arch/loongarch/kernel/unaligned.c:326:38: sparse: got void [noderef] __user *addr
arch/loongarch/kernel/unaligned.c:333:39: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __user *addr @@
arch/loongarch/kernel/unaligned.c:333:39: sparse: expected void *addr
arch/loongarch/kernel/unaligned.c:333:39: sparse: got void [noderef] __user *addr
arch/loongarch/kernel/unaligned.c:339:39: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __user *addr @@
arch/loongarch/kernel/unaligned.c:339:39: sparse: expected void *addr
arch/loongarch/kernel/unaligned.c:339:39: sparse: got void [noderef] __user *addr

vim +269 arch/loongarch/kernel/unaligned.c

244
245 void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc)
246 {
247 bool user = user_mode(regs);
248 unsigned int res;
249 unsigned long origpc;
250 unsigned long origra;
251 unsigned long value = 0;
252 union loongarch_instruction insn;
253
254 origpc = (unsigned long)pc;
255 origra = regs->regs[1];
256
257 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
258
259 /*
260 * This load never faults.
261 */
262 __get_inst(&insn.word, pc, user);
263 if (user && !access_ok(addr, 8))
264 goto sigbus;
265
266 if (insn.reg2i12_format.opcode == ldd_op ||
267 insn.reg2i14_format.opcode == ldptrd_op ||
268 insn.reg3_format.opcode == ldxd_op) {
> 269 res = unaligned_read(addr, &value, 8, 1);
270 if (res)
271 goto fault;
272 regs->regs[insn.reg2i12_format.rd] = value;
273 } else if (insn.reg2i12_format.opcode == ldw_op ||
274 insn.reg2i14_format.opcode == ldptrw_op ||
275 insn.reg3_format.opcode == ldxw_op) {
276 res = unaligned_read(addr, &value, 4, 1);
277 if (res)
278 goto fault;
279 regs->regs[insn.reg2i12_format.rd] = value;
280 } else if (insn.reg2i12_format.opcode == ldwu_op ||
281 insn.reg3_format.opcode == ldxwu_op) {
282 res = unaligned_read(addr, &value, 4, 0);
283 if (res)
284 goto fault;
285 regs->regs[insn.reg2i12_format.rd] = value;
286 } else if (insn.reg2i12_format.opcode == ldh_op ||
287 insn.reg3_format.opcode == ldxh_op) {
288 res = unaligned_read(addr, &value, 2, 1);
289 if (res)
290 goto fault;
291 regs->regs[insn.reg2i12_format.rd] = value;
292 } else if (insn.reg2i12_format.opcode == ldhu_op ||
293 insn.reg3_format.opcode == ldxhu_op) {
294 res = unaligned_read(addr, &value, 2, 0);
295 if (res)
296 goto fault;
297 regs->regs[insn.reg2i12_format.rd] = value;
298 } else if (insn.reg2i12_format.opcode == std_op ||
299 insn.reg2i14_format.opcode == stptrd_op ||
300 insn.reg3_format.opcode == stxd_op) {
301 value = regs->regs[insn.reg2i12_format.rd];
302 res = unaligned_write(addr, value, 8);
303 if (res)
304 goto fault;
305 } else if (insn.reg2i12_format.opcode == stw_op ||
306 insn.reg2i14_format.opcode == stptrw_op ||
307 insn.reg3_format.opcode == stxw_op) {
308 value = regs->regs[insn.reg2i12_format.rd];
309 res = unaligned_write(addr, value, 4);
310 if (res)
311 goto fault;
312 } else if (insn.reg2i12_format.opcode == sth_op ||
313 insn.reg3_format.opcode == stxh_op) {
314 value = regs->regs[insn.reg2i12_format.rd];
315 res = unaligned_write(addr, value, 2);
316 if (res)
317 goto fault;
318 } else if (insn.reg2i12_format.opcode == fldd_op ||
319 insn.reg3_format.opcode == fldxd_op) {
320 res = unaligned_read(addr, &value, 8, 1);
321 if (res)
322 goto fault;
323 write_fpr(insn.reg2i12_format.rd, value);
324 } else if (insn.reg2i12_format.opcode == flds_op ||
325 insn.reg3_format.opcode == fldxs_op) {
326 res = unaligned_read(addr, &value, 4, 1);
327 if (res)
328 goto fault;
329 write_fpr(insn.reg2i12_format.rd, value);
330 } else if (insn.reg2i12_format.opcode == fstd_op ||
331 insn.reg3_format.opcode == fstxd_op) {
332 value = read_fpr(insn.reg2i12_format.rd);
333 res = unaligned_write(addr, value, 8);
334 if (res)
335 goto fault;
336 } else if (insn.reg2i12_format.opcode == fsts_op ||
337 insn.reg3_format.opcode == fstxs_op) {
338 value = read_fpr(insn.reg2i12_format.rd);
339 res = unaligned_write(addr, value, 4);
340 if (res)
341 goto fault;
342 } else
343 goto sigbus;
344
345

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (9.89 kB)
config (154.16 kB)
Download all attachments

2022-10-18 02:50:23

by Huacai Chen

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

Hi, David,

On Mon, Oct 17, 2022 at 8:58 PM David Laight <[email protected]> wrote:
>
> From: Huacai Chen
> > Sent: 17 October 2022 03:24
> >
> > Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
> > unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
> > Loongson-3C5000) are configurable whether support unaligned access in
> > hardware. This patch add unaligned access emulation for those LoongArch
> > processors without hardware support.
> >
> ...
> > + /*
> > + * This load never faults.
> > + */
> > + __get_inst(&insn.word, pc, user);
>
> On what basis does it never fault?
> Any user access can fault.
> If nothing else another thread of the process can unmap
> the page.
Yes, this can happen, since __get_inst() handles fault, we can just
remove the comment.

>
> > + if (user && !access_ok(addr, 8))
> > + goto sigbus;
>
> Surely that is technically wrong - a two or four byte
> access is valid right at the end of valid user addreeses.
Yes, this check should be moved to each case.

>
> > +
> > + if (insn.reg2i12_format.opcode == ldd_op ||
> > + insn.reg2i14_format.opcode == ldptrd_op ||
> > + insn.reg3_format.opcode == ldxd_op) {
> > + res = unaligned_read(addr, &value, 8, 1);
>
> That is the most horrid indentation of long lines I've
> ever seen.
> I'd also guess you can common up some of this code
> by looking at the instruction field that include the
> transfer width.
>
> The long elsif list will generate horrid code.
> But maybe since you've just taken a fault it really
> doesn't matter.
> Indeed just emulating in C using byte accesses
> it probably fine.
I want to keep the assembly, because we can use more efficient methods
with the upcoming alternative mechanism.

Huacai
>
> David
>
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
>

2022-10-18 04:01:45

by WANG Xuerui

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

On 2022/10/18 10:24, Huacai Chen wrote:
> Hi, David,
>
> On Mon, Oct 17, 2022 at 8:58 PM David Laight <[email protected]> wrote:
>>
>> From: Huacai Chen
>>> Sent: 17 October 2022 03:24
>>>
>>> Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
>>> unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
>>> Loongson-3C5000) are configurable whether support unaligned access in
>>> hardware. This patch add unaligned access emulation for those LoongArch
>>> processors without hardware support.
>>>
>> ...
>>> + /*
>>> + * This load never faults.
>>> + */
>>> + __get_inst(&insn.word, pc, user);
>>
>> On what basis does it never fault?
>> Any user access can fault.
>> If nothing else another thread of the process can unmap
>> the page.
> Yes, this can happen, since __get_inst() handles fault, we can just
> remove the comment.
>
>>
>>> + if (user && !access_ok(addr, 8))
>>> + goto sigbus;
>>
>> Surely that is technically wrong - a two or four byte
>> access is valid right at the end of valid user addreeses.
> Yes, this check should be moved to each case.
>
>>
>>> +
>>> + if (insn.reg2i12_format.opcode == ldd_op ||
>>> + insn.reg2i14_format.opcode == ldptrd_op ||
>>> + insn.reg3_format.opcode == ldxd_op) {
>>> + res = unaligned_read(addr, &value, 8, 1);
>>
>> That is the most horrid indentation of long lines I've
>> ever seen.
>> I'd also guess you can common up some of this code
>> by looking at the instruction field that include the
>> transfer width.
>>
>> The long elsif list will generate horrid code.
>> But maybe since you've just taken a fault it really
>> doesn't matter.
>> Indeed just emulating in C using byte accesses
>> it probably fine.
> I want to keep the assembly, because we can use more efficient methods
> with the upcoming alternative mechanism.

What about my more structured approach in another reply that avoids the
huge else-if conditions? Both the terrible line wraps and codegen could
be avoided.

--
WANG "xen0n" Xuerui

Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/

2022-10-18 08:10:51

by Huacai Chen

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

On Tue, Oct 18, 2022 at 11:29 AM WANG Xuerui <[email protected]> wrote:
>
> On 2022/10/18 10:24, Huacai Chen wrote:
> > Hi, David,
> >
> > On Mon, Oct 17, 2022 at 8:58 PM David Laight <[email protected]> wrote:
> >>
> >> From: Huacai Chen
> >>> Sent: 17 October 2022 03:24
> >>>
> >>> Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
> >>> unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
> >>> Loongson-3C5000) are configurable whether support unaligned access in
> >>> hardware. This patch add unaligned access emulation for those LoongArch
> >>> processors without hardware support.
> >>>
> >> ...
> >>> + /*
> >>> + * This load never faults.
> >>> + */
> >>> + __get_inst(&insn.word, pc, user);
> >>
> >> On what basis does it never fault?
> >> Any user access can fault.
> >> If nothing else another thread of the process can unmap
> >> the page.
> > Yes, this can happen, since __get_inst() handles fault, we can just
> > remove the comment.
> >
> >>
> >>> + if (user && !access_ok(addr, 8))
> >>> + goto sigbus;
> >>
> >> Surely that is technically wrong - a two or four byte
> >> access is valid right at the end of valid user addreeses.
> > Yes, this check should be moved to each case.
> >
> >>
> >>> +
> >>> + if (insn.reg2i12_format.opcode == ldd_op ||
> >>> + insn.reg2i14_format.opcode == ldptrd_op ||
> >>> + insn.reg3_format.opcode == ldxd_op) {
> >>> + res = unaligned_read(addr, &value, 8, 1);
> >>
> >> That is the most horrid indentation of long lines I've
> >> ever seen.
> >> I'd also guess you can common up some of this code
> >> by looking at the instruction field that include the
> >> transfer width.
> >>
> >> The long elsif list will generate horrid code.
> >> But maybe since you've just taken a fault it really
> >> doesn't matter.
> >> Indeed just emulating in C using byte accesses
> >> it probably fine.
> > I want to keep the assembly, because we can use more efficient methods
> > with the upcoming alternative mechanism.
>
> What about my more structured approach in another reply that avoids the
> huge else-if conditions? Both the terrible line wraps and codegen could
> be avoided.
OK, let me try.

Huacai
>
> --
> WANG "xen0n" Xuerui
>
> Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/
>

2022-10-18 08:39:52

by David Laight

[permalink] [raw]
Subject: RE: [PATCH V2] LoongArch: Add unaligned access support

From: Huacai Chen
> Sent: 18 October 2022 08:33
...
> > What about my more structured approach in another reply that avoids the
> > huge else-if conditions? Both the terrible line wraps and codegen could
> > be avoided.
...
> OK, let me try.

I suspect you can mask out some 'operand size' bits from the
instructions - instead of checking each opcode.

I'm also pretty sure you can't assume the FP register are live.
If a read from userspace faults then there can be a full
process switch - so by the time you try to write to the
FP registers they no longer belong to the current process.

It might be safer and simpler to just enforce the FP
registers be saved and then act on the save area.
I'd guess they get restored in the 'return to userspace'
code.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2022-10-18 08:46:02

by WANG Xuerui

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

On 2022/10/18 15:48, David Laight wrote:
> From: Huacai Chen
>> Sent: 18 October 2022 08:33
> ...
>>> What about my more structured approach in another reply that avoids the
>>> huge else-if conditions? Both the terrible line wraps and codegen could
>>> be avoided.
> ...
>> OK, let me try.
>
> I suspect you can mask out some 'operand size' bits from the
> instructions - instead of checking each opcode.

Technically LoongArch instruction formats don't contain any "operand
size bit", because most current opcodes seem to be simply sequentially
allocated. While there seem to exist a certain pattern in e.g. encodings
of {LD,ST,FLD,FST}.{B,H,W,D}, I believe it's just coincidence (e.g. bits
23:22 of those instructions seem to represent "B/H/W/D"; but other
instructions clearly don't follow such a pattern, not even the
{LD,ST}.{BU,HU,WU} ones).

For now I'd personally prefer readability and maintainability over
performance, because traps are already expensive enough that
optimizations like this don't really matter.

--
WANG "xen0n" Xuerui

Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/

2022-10-18 10:37:08

by Huacai Chen

[permalink] [raw]
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

On Tue, Oct 18, 2022 at 3:48 PM David Laight <[email protected]> wrote:
>
> From: Huacai Chen
> > Sent: 18 October 2022 08:33
> ...
> > > What about my more structured approach in another reply that avoids the
> > > huge else-if conditions? Both the terrible line wraps and codegen could
> > > be avoided.
> ...
> > OK, let me try.
>
> I suspect you can mask out some 'operand size' bits from the
> instructions - instead of checking each opcode.
>
> I'm also pretty sure you can't assume the FP register are live.
> If a read from userspace faults then there can be a full
> process switch - so by the time you try to write to the
> FP registers they no longer belong to the current process.
>
> It might be safer and simpler to just enforce the FP
> registers be saved and then act on the save area.
> I'd guess they get restored in the 'return to userspace'
> code.
Good catch, will be fixed in V4.

Huacai
>
> David
>
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)