2021-06-17 16:14:50

by Matteo Croce

[permalink] [raw]
Subject: [PATCH v3 2/3] riscv: optimized memmove

From: Matteo Croce <[email protected]>

When the destination buffer is before the source one, or when the
buffers doesn't overlap, it's safe to use memcpy() instead, which is
optimized to use a bigger data size possible.

Signed-off-by: Matteo Croce <[email protected]>
---
arch/riscv/include/asm/string.h | 6 ++--
arch/riscv/kernel/riscv_ksyms.c | 2 --
arch/riscv/lib/Makefile | 1 -
arch/riscv/lib/memmove.S | 64 ---------------------------------
arch/riscv/lib/string.c | 23 ++++++++++++
5 files changed, 26 insertions(+), 70 deletions(-)
delete mode 100644 arch/riscv/lib/memmove.S

diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 6b5d6fc3eab4..25d9b9078569 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -17,11 +17,11 @@ extern asmlinkage void *__memset(void *, int, size_t);
#define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *dest, const void *src, size_t count);
extern void *__memcpy(void *dest, const void *src, size_t count);
+#define __HAVE_ARCH_MEMMOVE
+extern void *memmove(void *dest, const void *src, size_t count);
+extern void *__memmove(void *dest, const void *src, size_t count);
#endif

-#define __HAVE_ARCH_MEMMOVE
-extern asmlinkage void *memmove(void *, const void *, size_t);
-extern asmlinkage void *__memmove(void *, const void *, size_t);
/* For those files which don't want to check by kasan. */
#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
#define memcpy(dst, src, len) __memcpy(dst, src, len)
diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c
index 3f6d512a5b97..361565c4db7e 100644
--- a/arch/riscv/kernel/riscv_ksyms.c
+++ b/arch/riscv/kernel/riscv_ksyms.c
@@ -10,6 +10,4 @@
* Assembly functions that may be used (directly or indirectly) by modules
*/
EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(__memset);
-EXPORT_SYMBOL(__memmove);
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 2ffe85d4baee..484f5ff7b508 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
lib-y += delay.o
lib-y += memset.o
-lib-y += memmove.o
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE) += string.o
diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S
deleted file mode 100644
index 07d1d2152ba5..000000000000
--- a/arch/riscv/lib/memmove.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <linux/linkage.h>
-#include <asm/asm.h>
-
-ENTRY(__memmove)
-WEAK(memmove)
- move t0, a0
- move t1, a1
-
- beq a0, a1, exit_memcpy
- beqz a2, exit_memcpy
- srli t2, a2, 0x2
-
- slt t3, a0, a1
- beqz t3, do_reverse
-
- andi a2, a2, 0x3
- li t4, 1
- beqz t2, byte_copy
-
-word_copy:
- lw t3, 0(a1)
- addi t2, t2, -1
- addi a1, a1, 4
- sw t3, 0(a0)
- addi a0, a0, 4
- bnez t2, word_copy
- beqz a2, exit_memcpy
- j byte_copy
-
-do_reverse:
- add a0, a0, a2
- add a1, a1, a2
- andi a2, a2, 0x3
- li t4, -1
- beqz t2, reverse_byte_copy
-
-reverse_word_copy:
- addi a1, a1, -4
- addi t2, t2, -1
- lw t3, 0(a1)
- addi a0, a0, -4
- sw t3, 0(a0)
- bnez t2, reverse_word_copy
- beqz a2, exit_memcpy
-
-reverse_byte_copy:
- addi a0, a0, -1
- addi a1, a1, -1
-
-byte_copy:
- lb t3, 0(a1)
- addi a2, a2, -1
- sb t3, 0(a0)
- add a1, a1, t4
- add a0, a0, t4
- bnez a2, byte_copy
-
-exit_memcpy:
- move a0, t0
- move a1, t1
- ret
-END(__memmove)
diff --git a/arch/riscv/lib/string.c b/arch/riscv/lib/string.c
index e48a79a045d8..9c7009d43c39 100644
--- a/arch/riscv/lib/string.c
+++ b/arch/riscv/lib/string.c
@@ -89,3 +89,26 @@ EXPORT_SYMBOL(__memcpy);

void *memcpy(void *dest, const void *src, size_t count) __weak __alias(__memcpy);
EXPORT_SYMBOL(memcpy);
+
+/*
+ * Simply check if the buffer overlaps an call memcpy() in case,
+ * otherwise do a simple one byte at time backward copy.
+ */
+void *__memmove(void *dest, const void *src, size_t count)
+{
+ if (dest < src || src + count <= dest)
+ return memcpy(dest, src, count);
+
+ if (dest > src) {
+ const char *s = src + count;
+ char *tmp = dest + count;
+
+ while (count--)
+ *--tmp = *--s;
+ }
+ return dest;
+}
+EXPORT_SYMBOL(__memmove);
+
+void *memmove(void *dest, const void *src, size_t count) __weak __alias(__memmove);
+EXPORT_SYMBOL(memmove);
--
2.31.1


2021-06-21 14:32:44

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] riscv: optimized memmove

This looks nice, but I think it would be a good candidate for lib/
again.

2021-06-22 00:47:50

by Nick Kossifidis

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] riscv: optimized memmove

Στις 2021-06-17 18:27, Matteo Croce έγραψε:
> +
> +/*
> + * Simply check if the buffer overlaps an call memcpy() in case,
> + * otherwise do a simple one byte at time backward copy.
> + */
> +void *__memmove(void *dest, const void *src, size_t count)
> +{
> + if (dest < src || src + count <= dest)
> + return memcpy(dest, src, count);
> +
> + if (dest > src) {
> + const char *s = src + count;
> + char *tmp = dest + count;
> +
> + while (count--)
> + *--tmp = *--s;
> + }
> + return dest;
> +}
> +EXPORT_SYMBOL(__memmove);
> +

Copying backwards byte-per-byte is suboptimal, I understand this is not
a very common scenario but you could at least check if they are both
word-aligned e.g. (((src + len) | (dst + len)) & mask), or missaligned
by the same offset e.g. (((src + len) ^ (dst + len)) & mask) and still
end up doing word-by-word copying. Ideally it would be great if you
re-used the same technique you used for forwards copying on your memcpy.

> +void *memmove(void *dest, const void *src, size_t count) __weak
> __alias(__memmove);
> +EXPORT_SYMBOL(memmove);

As I mentioned on your memcpy patch, if you implement memmove, you can
just alias memcpy to memmove and we won't have to worry about memcpy
being used on overlapping regions.

2021-06-30 04:48:49

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] riscv: optimized memmove

Hi Matteo,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linux/master]
[also build test ERROR on linus/master v5.13 next-20210629]
[cannot apply to atish-riscv-linux/topo_v3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: https://github.com/0day-ci/linux/commits/Matteo-Croce/riscv-optimized-mem-functions/20210617-232934
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git dd860052c99b1e088352bdd4fb7aef46f8d2ef47
config: riscv-allyesconfig (attached as .config)
compiler: riscv64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/211a65b2ee262f10eec0dfc17896aab1d2f0aea2
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Matteo-Croce/riscv-optimized-mem-functions/20210617-232934
git checkout 211a65b2ee262f10eec0dfc17896aab1d2f0aea2
# save the attached .config to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross O=build_dir ARCH=riscv SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

arch/riscv/lib/string.c: In function '__memmove':
>> arch/riscv/lib/string.c:90:7: error: inlining failed in call to always_inline 'memcpy': function body can be overwritten at link time
90 | void *memcpy(void *dest, const void *src, size_t count) __weak __alias(__memcpy);
| ^~~~~~
arch/riscv/lib/string.c:100:10: note: called from here
100 | return memcpy(dest, src, count);
| ^~~~~~~~~~~~~~~~~~~~~~~~


vim +/memcpy +90 arch/riscv/lib/string.c

c35a3474b67826 Matteo Croce 2021-06-17 89
c35a3474b67826 Matteo Croce 2021-06-17 @90 void *memcpy(void *dest, const void *src, size_t count) __weak __alias(__memcpy);
c35a3474b67826 Matteo Croce 2021-06-17 91 EXPORT_SYMBOL(memcpy);
211a65b2ee262f Matteo Croce 2021-06-17 92

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]


Attachments:
(No filename) (2.43 kB)
.config.gz (68.17 kB)
Download all attachments