From: Björn Töpel <[email protected]>
Hi,
This is Andy's kernel mode vector V2 series [1], with my BLAKE2s
AVX-512-to-RISC-V translation patch appended.
I've tagged it as RFC, since Andy's series is still not in-tree yet.
It's a first step towards a Vector aided Wireguard! ;-)
Cheers,
Björn
[1] https://lore.kernel.org/linux-riscv/[email protected]/
Andy Chiu (3):
riscv: sched: defer restoring Vector context for user
riscv: vector: do not pass task_struct into
riscv_v_vstate_{save,restore}()
riscv: vector: allow kernel-mode Vector with preemption
Björn Töpel (1):
riscv: Add BLAKE2s V implementation
Greentime Hu (2):
riscv: Add support for kernel mode vector
riscv: Add vector extension XOR implementation
arch/riscv/Kbuild | 2 +-
arch/riscv/Kconfig | 10 ++
arch/riscv/crypto/Kconfig | 16 +++
arch/riscv/crypto/Makefile | 6 +
arch/riscv/crypto/blake2s-glue.c | 39 ++++++
arch/riscv/crypto/blake2s-v.S | 164 +++++++++++++++++++++++++
arch/riscv/include/asm/entry-common.h | 13 ++
arch/riscv/include/asm/processor.h | 2 +
arch/riscv/include/asm/simd.h | 52 ++++++++
arch/riscv/include/asm/thread_info.h | 6 +
arch/riscv/include/asm/vector.h | 50 ++++++--
arch/riscv/include/asm/xor.h | 82 +++++++++++++
arch/riscv/kernel/Makefile | 1 +
arch/riscv/kernel/asm-offsets.c | 2 +
arch/riscv/kernel/entry.S | 45 +++++++
arch/riscv/kernel/kernel_mode_vector.c | 146 ++++++++++++++++++++++
arch/riscv/kernel/process.c | 10 +-
arch/riscv/kernel/ptrace.c | 2 +-
arch/riscv/kernel/signal.c | 4 +-
arch/riscv/kernel/vector.c | 5 +-
arch/riscv/lib/Makefile | 1 +
arch/riscv/lib/xor.S | 81 ++++++++++++
crypto/Kconfig | 3 +
drivers/net/Kconfig | 1 +
24 files changed, 725 insertions(+), 18 deletions(-)
create mode 100644 arch/riscv/crypto/Kconfig
create mode 100644 arch/riscv/crypto/Makefile
create mode 100644 arch/riscv/crypto/blake2s-glue.c
create mode 100644 arch/riscv/crypto/blake2s-v.S
create mode 100644 arch/riscv/include/asm/simd.h
create mode 100644 arch/riscv/include/asm/xor.h
create mode 100644 arch/riscv/kernel/kernel_mode_vector.c
create mode 100644 arch/riscv/lib/xor.S
base-commit: 0bb80ecc33a8fb5a682236443c1e740d5c917d1d
--
2.39.2
From: Greentime Hu <[email protected]>
This patch adds support for vector optimized XOR and it is tested in
qemu.
Co-developed-by: Han-Kuan Chen <[email protected]>
Signed-off-by: Han-Kuan Chen <[email protected]>
Signed-off-by: Greentime Hu <[email protected]>
Signed-off-by: Andy Chiu <[email protected]>
Acked-by: Conor Dooley <[email protected]>
---
arch/riscv/include/asm/xor.h | 82 ++++++++++++++++++++++++++++++++++++
arch/riscv/lib/Makefile | 1 +
arch/riscv/lib/xor.S | 81 +++++++++++++++++++++++++++++++++++
3 files changed, 164 insertions(+)
create mode 100644 arch/riscv/include/asm/xor.h
create mode 100644 arch/riscv/lib/xor.S
diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644
index 000000000000..903c3275f8d0
--- /dev/null
+++ b/arch/riscv/include/asm/xor.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_RISCV_ISA_V
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+
+void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2);
+void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3);
+void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3,
+ const unsigned long *__restrict p4);
+void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3,
+ const unsigned long *__restrict p4,
+ const unsigned long *__restrict p5);
+
+static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2)
+{
+ kernel_vector_begin();
+ xor_regs_2_(bytes, p1, p2);
+ kernel_vector_end();
+}
+
+static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3)
+{
+ kernel_vector_begin();
+ xor_regs_3_(bytes, p1, p2, p3);
+ kernel_vector_end();
+}
+
+static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3,
+ const unsigned long *__restrict p4)
+{
+ kernel_vector_begin();
+ xor_regs_4_(bytes, p1, p2, p3, p4);
+ kernel_vector_end();
+}
+
+static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3,
+ const unsigned long *__restrict p4,
+ const unsigned long *__restrict p5)
+{
+ kernel_vector_begin();
+ xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+ kernel_vector_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+ .name = "rvv",
+ .do_2 = xor_vector_2,
+ .do_3 = xor_vector_3,
+ .do_4 = xor_vector_4,
+ .do_5 = xor_vector_5
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+ do { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_32regs); \
+ if (has_vector()) { \
+ xor_speed(&xor_block_rvv);\
+ } \
+ } while (0)
+#endif
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 26cb2502ecf8..494f9cd1a00c 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -11,3 +11,4 @@ lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+lib-$(CONFIG_RISCV_ISA_V) += xor.o
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644
index 000000000000..3bc059e18171
--- /dev/null
+++ b/arch/riscv/lib/xor.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+
+ENTRY(xor_regs_2_)
+ vsetvli a3, a0, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a3
+ vxor.vv v16, v0, v8
+ add a2, a2, a3
+ vse8.v v16, (a1)
+ add a1, a1, a3
+ bnez a0, xor_regs_2_
+ ret
+END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+ENTRY(xor_regs_3_)
+ vsetvli a4, a0, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a4
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a4
+ vxor.vv v16, v0, v16
+ add a3, a3, a4
+ vse8.v v16, (a1)
+ add a1, a1, a4
+ bnez a0, xor_regs_3_
+ ret
+END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+ENTRY(xor_regs_4_)
+ vsetvli a5, a0, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a5
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a5
+ vxor.vv v0, v0, v16
+ vle8.v v24, (a4)
+ add a3, a3, a5
+ vxor.vv v16, v0, v24
+ add a4, a4, a5
+ vse8.v v16, (a1)
+ add a1, a1, a5
+ bnez a0, xor_regs_4_
+ ret
+END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+ENTRY(xor_regs_5_)
+ vsetvli a6, a0, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a6
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a6
+ vxor.vv v0, v0, v16
+ vle8.v v24, (a4)
+ add a3, a3, a6
+ vxor.vv v0, v0, v24
+ vle8.v v8, (a5)
+ add a4, a4, a6
+ vxor.vv v16, v0, v8
+ add a5, a5, a6
+ vse8.v v16, (a1)
+ add a1, a1, a6
+ bnez a0, xor_regs_5_
+ ret
+END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)
--
2.39.2
Conor Dooley <[email protected]> writes:
> On Thu, Sep 14, 2023 at 02:59:30PM +0200, Björn Töpel wrote:
>> Conor Dooley <[email protected]> writes:
>>
>> > On Tue, Sep 12, 2023 at 01:57:22PM +0200, Björn Töpel wrote:
>> >> From: Björn Töpel <[email protected]>
>> >>
>> >> Hi,
>> >>
>> >> This is Andy's kernel mode vector V2 series [1], with my BLAKE2s
>> >> AVX-512-to-RISC-V translation patch appended.
>> >>
>> >> I've tagged it as RFC, since Andy's series is still not in-tree yet.
>> >>
>> >> It's a first step towards a Vector aided Wireguard! ;-)
>> >
>> > This has the same problems as Andy's stuff & doesn't build properly for the
>> > automation. What is the plan between yourself and Andy for submitting a
>> > version of the in-kernel vector support that passes build testing?
>>
>> I'll synch up with Andy! I'm not even sure the blake2s patch should part
>> of the "in-kernel vector" series at all.
>
> The in-kernel vector stuff should come with a user, otherwise it's dead
> code :)
Sure, just so we're on the same page; Patch 3 (Vector XOR) is a user
from my perspective, no?
On Thu, Sep 14, 2023 at 10:18 PM Conor Dooley <[email protected]> wrote:
>
> On Thu, Sep 14, 2023 at 04:15:10PM +0200, Björn Töpel wrote:
> > Conor Dooley <[email protected]> writes:
> >
> > > On Thu, Sep 14, 2023 at 02:59:30PM +0200, Björn Töpel wrote:
> > >> Conor Dooley <[email protected]> writes:
> > >>
> > >> > On Tue, Sep 12, 2023 at 01:57:22PM +0200, Björn Töpel wrote:
> > >> >> From: Björn Töpel <[email protected]>
> > >> >>
> > >> >> Hi,
> > >> >>
> > >> >> This is Andy's kernel mode vector V2 series [1], with my BLAKE2s
> > >> >> AVX-512-to-RISC-V translation patch appended.
> > >> >>
> > >> >> I've tagged it as RFC, since Andy's series is still not in-tree yet.
> > >> >>
> > >> >> It's a first step towards a Vector aided Wireguard! ;-)
> > >> >
> > >> > This has the same problems as Andy's stuff & doesn't build properly for the
> > >> > automation. What is the plan between yourself and Andy for submitting a
> > >> > version of the in-kernel vector support that passes build testing?
> > >>
> > >> I'll synch up with Andy! I'm not even sure the blake2s patch should part
> > >> of the "in-kernel vector" series at all.
Hi, yes, I have a plan to fix it recently. Please expect a respin of
the kernel-mode vector in 1~2 weeks, if this doesn't seem too long to
you.
> > >
> > > The in-kernel vector stuff should come with a user, otherwise it's dead
> > > code :)
> >
> > Sure, just so we're on the same page; Patch 3 (Vector XOR) is a user
> > from my perspective, no?
>
> D'oh
Thanks,
Andy
On Thu, Sep 14, 2023 at 02:59:30PM +0200, Bj?rn T?pel wrote:
> Conor Dooley <[email protected]> writes:
>
> > On Tue, Sep 12, 2023 at 01:57:22PM +0200, Bj?rn T?pel wrote:
> >> From: Bj?rn T?pel <[email protected]>
> >>
> >> Hi,
> >>
> >> This is Andy's kernel mode vector V2 series [1], with my BLAKE2s
> >> AVX-512-to-RISC-V translation patch appended.
> >>
> >> I've tagged it as RFC, since Andy's series is still not in-tree yet.
> >>
> >> It's a first step towards a Vector aided Wireguard! ;-)
> >
> > This has the same problems as Andy's stuff & doesn't build properly for the
> > automation. What is the plan between yourself and Andy for submitting a
> > version of the in-kernel vector support that passes build testing?
>
> I'll synch up with Andy! I'm not even sure the blake2s patch should part
> of the "in-kernel vector" series at all.
The in-kernel vector stuff should come with a user, otherwise it's dead
code :)