2021-09-08 17:49:47

by Greentime Hu

[permalink] [raw]
Subject: [RFC PATCH v8 15/21] riscv: Add vector extension XOR implementation

This patch adds support for vector optimized XOR it is tested in spike and
qemu.

Logs in spike:
[ 0.008365] xor: measuring software checksum speed
[ 0.048885] 8regs : 1719.000 MB/sec
[ 0.089080] 32regs : 1717.000 MB/sec
[ 0.129275] rvv : 7043.000 MB/sec
[ 0.129525] xor: using function: rvv (7043.000 MB/sec)

Logs in qemu:
[ 0.098943] xor: measuring software checksum speed
[ 0.139391] 8regs : 2911.000 MB/sec
[ 0.181079] 32regs : 2813.000 MB/sec
[ 0.224260] rvv : 45.000 MB/sec
[ 0.225586] xor: using function: 8regs (2911.000 MB/sec)

Co-developed-by: Han-Kuan Chen <[email protected]>
Signed-off-by: Han-Kuan Chen <[email protected]>
Signed-off-by: Greentime Hu <[email protected]>
---
arch/riscv/include/asm/xor.h | 74 ++++++++++++++++++++++++++++++++
arch/riscv/lib/Makefile | 1 +
arch/riscv/lib/xor.S | 81 ++++++++++++++++++++++++++++++++++++
3 files changed, 156 insertions(+)
create mode 100644 arch/riscv/include/asm/xor.h
create mode 100644 arch/riscv/lib/xor.S

diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644
index 000000000000..60ee0224913d
--- /dev/null
+++ b/arch/riscv/include/asm/xor.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_VECTOR
+#include <asm/vector.h>
+
+extern void xor_regs_2_(unsigned long bytes, unsigned long *p1,
+ unsigned long *p2);
+extern void xor_regs_3_(unsigned long bytes, unsigned long *p1,
+ unsigned long *p2, unsigned long *p3);
+extern void xor_regs_4_(unsigned long bytes, unsigned long *p1,
+ unsigned long *p2, unsigned long *p3,
+ unsigned long *p4);
+extern void xor_regs_5_(unsigned long bytes, unsigned long *p1,
+ unsigned long *p2, unsigned long *p3, unsigned long *p4,
+ unsigned long *p5);
+
+static void xor_rvv_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ kernel_rvv_begin();
+ xor_regs_2_(bytes, p1, p2);
+ kernel_rvv_end();
+}
+
+static void
+xor_rvv_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ kernel_rvv_begin();
+ xor_regs_3_(bytes, p1, p2, p3);
+ kernel_rvv_end();
+}
+
+static void
+xor_rvv_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ kernel_rvv_begin();
+ xor_regs_4_(bytes, p1, p2, p3, p4);
+ kernel_rvv_end();
+}
+
+static void
+xor_rvv_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ kernel_rvv_begin();
+ xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+ kernel_rvv_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+ .name = "rvv",
+ .do_2 = xor_rvv_2,
+ .do_3 = xor_rvv_3,
+ .do_4 = xor_rvv_4,
+ .do_5 = xor_rvv_5
+};
+
+extern bool has_vector;
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+ do { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_32regs); \
+ if (has_vector) { \
+ xor_speed(&xor_block_rvv);\
+ } \
+ } while (0)
+#endif
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 25d5c9664e57..acd87ac86d24 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -7,3 +7,4 @@ lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o

obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+lib-$(CONFIG_VECTOR) += xor.o
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644
index 000000000000..de2e234c39ed
--- /dev/null
+++ b/arch/riscv/lib/xor.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 SiFive
+ */
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+
+ENTRY(xor_regs_2_)
+ vsetvli a3, a0, e8, m8
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a3
+ vxor.vv v16, v0, v8
+ add a2, a2, a3
+ vse8.v v16, (a1)
+ add a1, a1, a3
+ bnez a0, xor_regs_2_
+ ret
+END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+ENTRY(xor_regs_3_)
+ vsetvli a4, a0, e8, m8
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a4
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a4
+ vxor.vv v16, v0, v16
+ add a3, a3, a4
+ vse8.v v16, (a1)
+ add a1, a1, a4
+ bnez a0, xor_regs_3_
+ ret
+END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+ENTRY(xor_regs_4_)
+ vsetvli a5, a0, e8, m8
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a5
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a5
+ vxor.vv v0, v0, v16
+ vle8.v v24, (a4)
+ add a3, a3, a5
+ vxor.vv v16, v0, v24
+ add a4, a4, a5
+ vse8.v v16, (a1)
+ add a1, a1, a5
+ bnez a0, xor_regs_4_
+ ret
+END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+ENTRY(xor_regs_5_)
+ vsetvli a6, a0, e8, m8
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a6
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a6
+ vxor.vv v0, v0, v16
+ vle8.v v24, (a4)
+ add a3, a3, a6
+ vxor.vv v0, v0, v24
+ vle8.v v8, (a5)
+ add a4, a4, a6
+ vxor.vv v16, v0, v8
+ add a5, a5, a6
+ vse8.v v16, (a1)
+ add a1, a1, a6
+ bnez a0, xor_regs_5_
+ ret
+END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)
--
2.31.1


2021-09-09 06:16:06

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [RFC PATCH v8 15/21] riscv: Add vector extension XOR implementation

On Thu, Sep 09, 2021 at 01:45:27AM +0800, Greentime Hu wrote:
> +extern void xor_regs_2_(unsigned long bytes, unsigned long *p1,
> + unsigned long *p2);
> +extern void xor_regs_3_(unsigned long bytes, unsigned long *p1,
> + unsigned long *p2, unsigned long *p3);
> +extern void xor_regs_4_(unsigned long bytes, unsigned long *p1,
> + unsigned long *p2, unsigned long *p3,
> + unsigned long *p4);
> +extern void xor_regs_5_(unsigned long bytes, unsigned long *p1,
> + unsigned long *p2, unsigned long *p3, unsigned long *p4,
> + unsigned long *p5);

There is no need for externs on function declarations ever.

> +static void xor_rvv_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
> +{
> + kernel_rvv_begin();
> + xor_regs_2_(bytes, p1, p2);
> + kernel_rvv_end();
> +}

This looks strange. Why these wrappers?

2021-09-14 08:31:43

by Ley Foon Tan

[permalink] [raw]
Subject: Re: [RFC PATCH v8 15/21] riscv: Add vector extension XOR implementation

On Thu, Sep 9, 2021 at 1:49 AM Greentime Hu <[email protected]> wrote:
>
> This patch adds support for vector optimized XOR it is tested in spike and
> qemu.
>
> Logs in spike:
> [ 0.008365] xor: measuring software checksum speed
> [ 0.048885] 8regs : 1719.000 MB/sec
> [ 0.089080] 32regs : 1717.000 MB/sec
> [ 0.129275] rvv : 7043.000 MB/sec
> [ 0.129525] xor: using function: rvv (7043.000 MB/sec)
>
> Logs in qemu:
> [ 0.098943] xor: measuring software checksum speed
> [ 0.139391] 8regs : 2911.000 MB/sec
> [ 0.181079] 32regs : 2813.000 MB/sec
> [ 0.224260] rvv : 45.000 MB/sec
> [ 0.225586] xor: using function: 8regs (2911.000 MB/sec)
>
> Co-developed-by: Han-Kuan Chen <[email protected]>
> Signed-off-by: Han-Kuan Chen <[email protected]>
> Signed-off-by: Greentime Hu <[email protected]>
> ---
> arch/riscv/include/asm/xor.h | 74 ++++++++++++++++++++++++++++++++
> arch/riscv/lib/Makefile | 1 +
> arch/riscv/lib/xor.S | 81 ++++++++++++++++++++++++++++++++++++
> 3 files changed, 156 insertions(+)
> create mode 100644 arch/riscv/include/asm/xor.h
> create mode 100644 arch/riscv/lib/xor.S
>
> diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
> new file mode 100644
> index 000000000000..60ee0224913d
> --- /dev/null
> +++ b/arch/riscv/include/asm/xor.h
> @@ -0,0 +1,74 @@


[...]

>
> +extern bool has_vector;
> +#undef XOR_TRY_TEMPLATES
> +#define XOR_TRY_TEMPLATES \
> + do { \
> + xor_speed(&xor_block_8regs); \
> + xor_speed(&xor_block_32regs); \
> + if (has_vector) { \
> + xor_speed(&xor_block_rvv);\
> + } \
> + } while (0)
> +#endif
>
bool has_vector is changed to has_vector() function now, should this
change as well?


Regards
Ley Foon

2021-09-28 07:02:38

by Greentime Hu

[permalink] [raw]
Subject: Re: [RFC PATCH v8 15/21] riscv: Add vector extension XOR implementation

Christoph Hellwig <[email protected]> 於 2021年9月9日 週四 下午2:12寫道:
>
> On Thu, Sep 09, 2021 at 01:45:27AM +0800, Greentime Hu wrote:
> > +extern void xor_regs_2_(unsigned long bytes, unsigned long *p1,
> > + unsigned long *p2);
> > +extern void xor_regs_3_(unsigned long bytes, unsigned long *p1,
> > + unsigned long *p2, unsigned long *p3);
> > +extern void xor_regs_4_(unsigned long bytes, unsigned long *p1,
> > + unsigned long *p2, unsigned long *p3,
> > + unsigned long *p4);
> > +extern void xor_regs_5_(unsigned long bytes, unsigned long *p1,
> > + unsigned long *p2, unsigned long *p3, unsigned long *p4,
> > + unsigned long *p5);
>
> There is no need for externs on function declarations ever.
>
Ok, I'll remove it.

> > +static void xor_rvv_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
> > +{
> > + kernel_rvv_begin();
> > + xor_regs_2_(bytes, p1, p2);
> > + kernel_rvv_end();
> > +}
>
> This looks strange. Why these wrappers?

We don't use rvv in kernel space generally. If we want to use it, we
need to save all vector registers first.
Just like arm64/x86 implementation in
arch/arm64/include/asm/xor.h
arch/x86/include/asm/xor.h

2021-09-28 07:05:32

by Greentime Hu

[permalink] [raw]
Subject: Re: [RFC PATCH v8 15/21] riscv: Add vector extension XOR implementation

Ley Foon Tan <[email protected]> 於 2021年9月14日 週二 下午4:30寫道:
>
> On Thu, Sep 9, 2021 at 1:49 AM Greentime Hu <[email protected]> wrote:
> >
> > This patch adds support for vector optimized XOR it is tested in spike and
> > qemu.
> >
> > Logs in spike:
> > [ 0.008365] xor: measuring software checksum speed
> > [ 0.048885] 8regs : 1719.000 MB/sec
> > [ 0.089080] 32regs : 1717.000 MB/sec
> > [ 0.129275] rvv : 7043.000 MB/sec
> > [ 0.129525] xor: using function: rvv (7043.000 MB/sec)
> >
> > Logs in qemu:
> > [ 0.098943] xor: measuring software checksum speed
> > [ 0.139391] 8regs : 2911.000 MB/sec
> > [ 0.181079] 32regs : 2813.000 MB/sec
> > [ 0.224260] rvv : 45.000 MB/sec
> > [ 0.225586] xor: using function: 8regs (2911.000 MB/sec)
> >
> > Co-developed-by: Han-Kuan Chen <[email protected]>
> > Signed-off-by: Han-Kuan Chen <[email protected]>
> > Signed-off-by: Greentime Hu <[email protected]>
> > ---
> > arch/riscv/include/asm/xor.h | 74 ++++++++++++++++++++++++++++++++
> > arch/riscv/lib/Makefile | 1 +
> > arch/riscv/lib/xor.S | 81 ++++++++++++++++++++++++++++++++++++
> > 3 files changed, 156 insertions(+)
> > create mode 100644 arch/riscv/include/asm/xor.h
> > create mode 100644 arch/riscv/lib/xor.S
> >
> > diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
> > new file mode 100644
> > index 000000000000..60ee0224913d
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/xor.h
> > @@ -0,0 +1,74 @@
>
>
> [...]
>
> >
> > +extern bool has_vector;
> > +#undef XOR_TRY_TEMPLATES
> > +#define XOR_TRY_TEMPLATES \
> > + do { \
> > + xor_speed(&xor_block_8regs); \
> > + xor_speed(&xor_block_32regs); \
> > + if (has_vector) { \
> > + xor_speed(&xor_block_rvv);\
> > + } \
> > + } while (0)
> > +#endif
> >
> bool has_vector is changed to has_vector() function now, should this
> change as well?

That's right. Thank you, LeyFoon.
I'll merge the patch to fix the has_vector() issue in next version patchset.