2022-01-22 00:24:46

by Michael T. Kloos

[permalink] [raw]
Subject: [PATCH] Fixed: Misaligned memory access. Fixed pointer comparison.

Rewrote the riscv memmove() assembly implementation. The
previous implementation did not check memory alignment and it
compared 2 pointers with a signed comparison. The misaligned
memory access would cause the kernel to crash on systems that
did not emulate it firmware and did support it in hardware.
Firmware emulation is slow may not exist. Additionally, hardware
support may not exist and would likely still run slower than
aligned accesses even if it did. The RISC-V spec does not
guarantee that support for misaligned memory accesses will exist.
It should not be depended on.

This patch now checks for the maximum granularity of co-alignment
between the pointers and copies them with that, using single-byte
copy for any unaligned data at their terminations. It also now uses
unsigned comparison for the pointers.

Added half-word and, if built for 64-bit, double-word copy.

Migrated to the newer assembler annotations from the now deprecated
ones.

Signed-off-by: Michael T. Kloos <[email protected]>
---
arch/riscv/lib/memmove.S | 264 +++++++++++++++++++++++++++++++--------
1 file changed, 210 insertions(+), 54 deletions(-)

diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S
index 07d1d2152ba5..df7cd7a559ae 100644
--- a/arch/riscv/lib/memmove.S
+++ b/arch/riscv/lib/memmove.S
@@ -1,64 +1,220 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Michael T. Kloos <[email protected]>
+ */

#include <linux/linkage.h>
#include <asm/asm.h>

-ENTRY(__memmove)
-WEAK(memmove)
- move t0, a0
- move t1, a1
+SYM_FUNC_START(__memmove)
+SYM_FUNC_START_ALIAS(memmove)
+ /*
+ * Returns
+ * a0 - dest
+ *
+ * Parameters
+ * a0 - Inclusive first byte of dest
+ * a1 - Inclusive first byte of src
+ * a2 - Length of copy
+ *
+ * Because the return matches the parameter register a0,
+ * we will not clobber or modify that register.
+ */

- beq a0, a1, exit_memcpy
- beqz a2, exit_memcpy
- srli t2, a2, 0x2
+ /* Return if nothing to do */
+ beq a0, a1, exit_memmove
+ beqz a2, exit_memmove

- slt t3, a0, a1
- beqz t3, do_reverse
+ /*
+ * Register Uses
+ * a3 - Inclusive first multibyte of src
+ * a4 - Non-inclusive last multibyte of src
+ * a5 - Non-inclusive last byte of src
+ *
+ * During the copy
+ * Forward Copy: a1 - Index counter of src
+ * Reverse Copy: a5 - Index counter of src
+ * Both Copy Modes: t2 - Index counter of dest
+ * Both Copy Modes: t1 - Temporary for load-store
+ * Both Copy Modes: t0 - Link
+ */

- andi a2, a2, 0x3
- li t4, 1
- beqz t2, byte_copy
+ /*
+ * Solve for last byte now. We will solve the rest when
+ * they are needed for the copy because either byte copy
+ * does not require any of the others (Wasted effort if
+ * byte copy gets used) or we do not yet have enough
+ * information to solve them.
+ */
+ add a5, a1, a2

-word_copy:
- lw t3, 0(a1)
- addi t2, t2, -1
- addi a1, a1, 4
- sw t3, 0(a0)
- addi a0, a0, 4
- bnez t2, word_copy
- beqz a2, exit_memcpy
- j byte_copy
-
-do_reverse:
- add a0, a0, a2
- add a1, a1, a2
- andi a2, a2, 0x3
- li t4, -1
- beqz t2, reverse_byte_copy
-
-reverse_word_copy:
- addi a1, a1, -4
- addi t2, t2, -1
- lw t3, 0(a1)
- addi a0, a0, -4
- sw t3, 0(a0)
- bnez t2, reverse_word_copy
- beqz a2, exit_memcpy
-
-reverse_byte_copy:
- addi a0, a0, -1
- addi a1, a1, -1
+ /*
+ * Byte copy if copying less than SZREG bytes.
+ * This can cause problems with the bulk copy
+ * implementation below and is small enough not
+ * to bother.
+ */
+ andi t0, a2, -SZREG
+ beqz t0, byte_copy
+
+ /* Determine the maximum granularity of co-alignment. */
+ xor t0, a0, a1
+#if SZREG >= 8
+ andi t1, t0, 0x7
+ beqz t1, doubleword_copy
+#endif
+ andi t1, t0, 0x3
+ beqz t1, word_copy
+ andi t1, t0, 0x1
+ beqz t1, halfword_copy
+ /* Fall through to byte copy if nothing larger is found. */

byte_copy:
- lb t3, 0(a1)
- addi a2, a2, -1
- sb t3, 0(a0)
- add a1, a1, t4
- add a0, a0, t4
- bnez a2, byte_copy
-
-exit_memcpy:
- move a0, t0
- move a1, t1
- ret
-END(__memmove)
+ bltu a1, a0, byte_copy_reverse
+
+byte_copy_forward:
+ add t2, a0, zero
+byte_copy_fw_callin:
+ beq a1, a5, exit_memmove
+ lb t1, (a1)
+ sb t1, (t2)
+ addi a1, a1, 1
+ addi t2, t2, 1
+ j byte_copy_fw_callin
+
+byte_copy_reverse:
+ add t2, a0, a2
+byte_copy_rv_callin:
+ beq a1, a5, exit_memmove
+ addi a5, a5, -1
+ addi t2, t2, -1
+ lb t1, (a5)
+ sb t1, (t2)
+ j byte_copy_rv_callin
+
+exit_memmove:
+ ret
+
+copy_bytes_until_aligned_fw:
+ beq a1, a3, 1f /* Reuse the return from the other copy loop */
+ lb t1, (a1)
+ sb t1, (t2)
+ addi a1, a1, 1
+ addi t2, t2, 1
+ j copy_bytes_until_aligned_fw
+
+copy_bytes_until_aligned_rv:
+ beq a4, a5, 1f
+ addi a5, a5, -1
+ addi t2, t2, -1
+ lb t1, (a5)
+ sb t1, (t2)
+ j copy_bytes_until_aligned_rv
+ 1: jalr zero, (t0) /* Return */
+
+#if SZREG >= 8
+doubleword_copy:
+ andi a3, a1, -8
+ andi a4, a5, -8
+ beq a3, a1, 1f
+ addi a3, a3, 8
+ 1:
+ bltu a1, a0, doubleword_copy_reverse
+
+doubleword_copy_forward:
+ add t2, a0, zero
+
+ jal t0, copy_bytes_until_aligned_fw
+
+ 1:
+ beq a1, a4, byte_copy_fw_callin
+ ld t1, (a1)
+ sd t1, (t2)
+ addi a1, a1, 8
+ addi t2, t2, 8
+ j 1b
+
+doubleword_copy_reverse:
+ add t2, a0, a2
+
+ jal t0, copy_bytes_until_aligned_rv
+
+ 1:
+ beq a3, a5, byte_copy_rv_callin
+ addi a5, a5, -8
+ addi t2, t2, -8
+ ld t1, (a5)
+ sd t1, (t2)
+ j 1b
+#endif
+
+word_copy:
+ andi a3, a1, -4
+ andi a4, a5, -4
+ beq a3, a1, 1f
+ addi a3, a3, 4
+ 1:
+ bltu a1, a0, word_copy_reverse
+
+word_copy_forward:
+ add t2, a0, zero
+
+ jal t0, copy_bytes_until_aligned_fw
+
+ 1:
+ beq a1, a4, byte_copy_fw_callin
+ lw t1, (a1)
+ sw t1, (t2)
+ addi a1, a1, 4
+ addi t2, t2, 4
+ j 1b
+
+word_copy_reverse:
+ add t2, a0, a2
+
+ jal t0, copy_bytes_until_aligned_rv
+
+ 1:
+ beq a3, a5, byte_copy_rv_callin
+ addi a5, a5, -4
+ addi t2, t2, -4
+ lw t1, (a5)
+ sw t1, (t2)
+ j 1b
+
+halfword_copy:
+ andi a3, a1, -2
+ andi a4, a5, -2
+ beq a3, a1, 1f
+ addi a3, a3, 2
+ 1:
+ bltu a1, a0, halfword_reverse
+
+halfword_forward:
+ add t2, a0, zero
+
+ jal t0, copy_bytes_until_aligned_fw
+
+ 1:
+ beq a1, a4, byte_copy_fw_callin
+ lh t1, (a1)
+ sh t1, (t2)
+ addi a1, a1, 2
+ addi t2, t2, 2
+ j 1b
+
+halfword_reverse:
+ add t2, a0, a2
+
+ jal t0, copy_bytes_until_aligned_rv
+
+ 1:
+ beq a3, a5, byte_copy_rv_callin
+ addi a5, a5, -2
+ addi t2, t2, -2
+ lh t1, (a5)
+ sh t1, (t2)
+ j 1b
+
+SYM_FUNC_END_ALIAS(memmove)
+SYM_FUNC_END(__memmove)
--
2.34.1


2022-01-23 15:35:37

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] Fixed: Misaligned memory access. Fixed pointer comparison.

Hi "Michael,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.16 next-20220121]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: https://github.com/0day-ci/linux/commits/Michael-T-Kloos/Fixed-Misaligned-memory-access-Fixed-pointer-comparison/20220121-074148
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 2c271fe77d52a0555161926c232cd5bc07178b39
config: riscv-randconfig-r021-20220120 (https://download.01.org/0day-ci/archive/20220123/[email protected]/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 7b3d30728816403d1fd73cc5082e9fb761262bce)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install riscv cross compiling tool for clang build
# apt-get install binutils-riscv64-linux-gnu
# https://github.com/0day-ci/linux/commit/c1930afcc76babc4e2313f67d0fe103a26f07712
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Michael-T-Kloos/Fixed-Misaligned-memory-access-Fixed-pointer-comparison/20220121-074148
git checkout c1930afcc76babc4e2313f67d0fe103a26f07712
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

>> arch/riscv/lib/memmove.S:113:16: error: operand must be a symbol with %lo/%pcrel_lo/%tprel_lo modifier or an integer in the range [-2048, 2047]
1: jalr zero, (t0)
^


vim +113 arch/riscv/lib/memmove.S

8
9 SYM_FUNC_START(__memmove)
10 SYM_FUNC_START_ALIAS(memmove)
11 /*
12 * Returns
13 * a0 - dest
14 *
15 * Parameters
16 * a0 - Inclusive first byte of dest
17 * a1 - Inclusive first byte of src
18 * a2 - Length of copy
19 *
20 * Because the return matches the parameter register a0,
21 * we will not clobber or modify that register.
22 */
23
24 /* Return if nothing to do */
25 beq a0, a1, exit_memmove
26 beqz a2, exit_memmove
27
28 /*
29 * Register Uses
30 * a3 - Inclusive first multibyte of src
31 * a4 - Non-inclusive last multibyte of src
32 * a5 - Non-inclusive last byte of src
33 *
34 * During the copy
35 * Forward Copy: a1 - Index counter of src
36 * Reverse Copy: a5 - Index counter of src
37 * Both Copy Modes: t2 - Index counter of dest
38 * Both Copy Modes: t1 - Temporary for load-store
39 * Both Copy Modes: t0 - Link
40 */
41
42 /*
43 * Solve for last byte now. We will solve the rest when
44 * they are needed for the copy because either byte copy
45 * does not require any of the others (Wasted effort if
46 * byte copy gets used) or we do not yet have enough
47 * information to solve them.
48 */
49 add a5, a1, a2
50
51 /*
52 * Byte copy if copying less than SZREG bytes.
53 * This can cause problems with the bulk copy
54 * implementation below and is small enough not
55 * to bother.
56 */
57 andi t0, a2, -SZREG
58 beqz t0, byte_copy
59
60 /* Determine the maximum granularity of co-alignment. */
61 xor t0, a0, a1
62 #if SZREG >= 8
63 andi t1, t0, 0x7
64 beqz t1, doubleword_copy
65 #endif
66 andi t1, t0, 0x3
67 beqz t1, word_copy
68 andi t1, t0, 0x1
69 beqz t1, halfword_copy
70 /* Fall through to byte copy if nothing larger is found. */
71
72 byte_copy:
73 bltu a1, a0, byte_copy_reverse
74
75 byte_copy_forward:
76 add t2, a0, zero
77 byte_copy_fw_callin:
78 beq a1, a5, exit_memmove
79 lb t1, (a1)
80 sb t1, (t2)
81 addi a1, a1, 1
82 addi t2, t2, 1
83 j byte_copy_fw_callin
84
85 byte_copy_reverse:
86 add t2, a0, a2
87 byte_copy_rv_callin:
88 beq a1, a5, exit_memmove
89 addi a5, a5, -1
90 addi t2, t2, -1
91 lb t1, (a5)
92 sb t1, (t2)
93 j byte_copy_rv_callin
94
95 exit_memmove:
96 ret
97
98 copy_bytes_until_aligned_fw:
99 beq a1, a3, 1f /* Reuse the return from the other copy loop */
100 lb t1, (a1)
101 sb t1, (t2)
102 addi a1, a1, 1
103 addi t2, t2, 1
104 j copy_bytes_until_aligned_fw
105
106 copy_bytes_until_aligned_rv:
107 beq a4, a5, 1f
108 addi a5, a5, -1
109 addi t2, t2, -1
110 lb t1, (a5)
111 sb t1, (t2)
112 j copy_bytes_until_aligned_rv
> 113 1: jalr zero, (t0) /* Return */
114

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]