2023-06-22 23:23:09

by Heiko Stübner

[permalink] [raw]
Subject: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling

From: Heiko Stuebner <[email protected]>

T-Head C9xx cores implement an older version (0.7.1) of the vector
specification.

Relevant changes concerning the kernel are:
- different placement of the SR_VS bit for the vector unit status
- different encoding of the vsetvli instruction
- different instructions for loads and stores

And a fixed VLEN of 128.

The in-kernel access to vector instances is limited to the save and
restore of process states so the above mentioned areas can simply be
handled via the alternatives framework, similar to other T-Head specific
issues.

Signed-off-by: Heiko Stuebner <[email protected]>
---
arch/riscv/Kconfig.errata | 13 +++
arch/riscv/errata/thead/errata.c | 32 ++++++
arch/riscv/include/asm/csr.h | 24 ++++-
arch/riscv/include/asm/errata_list.h | 45 ++++++++-
arch/riscv/include/asm/vector.h | 139 +++++++++++++++++++++++++--
arch/riscv/kernel/vector.c | 2 +-
6 files changed, 238 insertions(+), 17 deletions(-)

diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
index 0c8f4652cd82..b461312dd452 100644
--- a/arch/riscv/Kconfig.errata
+++ b/arch/riscv/Kconfig.errata
@@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU

If you don't know what to do here, say "Y".

+config ERRATA_THEAD_VECTOR
+ bool "Apply T-Head Vector errata"
+ depends on ERRATA_THEAD && RISCV_ISA_V
+ default y
+ help
+ The T-Head C9xx cores implement an earlier version 0.7.1
+ of the vector extensions.
+
+ This will apply the necessary errata to handle the non-standard
+ behaviour via when switch to and from vector mode for processes.
+
+ If you don't know what to do here, say "Y".
+
endmenu # "CPU errata selection"
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index c259dc925ec1..c41ec84bc8a5 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -15,6 +15,7 @@
#include <asm/errata_list.h>
#include <asm/hwprobe.h>
#include <asm/patch.h>
+#include <asm/vector.h>
#include <asm/vendorid_list.h>

static bool errata_probe_pbmt(unsigned int stage,
@@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage,
return true;
}

+static bool errata_probe_vector(unsigned int stage,
+ unsigned long arch_id, unsigned long impid)
+{
+ if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR))
+ return false;
+
+ /* target-c9xx cores report arch_id and impid as 0 */
+ if (arch_id != 0 || impid != 0)
+ return false;
+
+ if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) {
+ /*
+ * Disable VECTOR to detect illegal usage of vector in kernel.
+ * This is normally done in _start_kernel but with the
+ * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's
+ * vector-0.7.1 and the vector-1.0-bits are unused there.
+ */
+ csr_clear(CSR_STATUS, SR_VS_THEAD);
+ return false;
+ }
+
+ /* let has_vector() return true and set the static vlen */
+ riscv_vector_supported();
+ riscv_v_vsize = 128 / 8 * 32;
+
+ return true;
+}
+
static u32 thead_errata_probe(unsigned int stage,
unsigned long archid, unsigned long impid)
{
@@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage,
if (errata_probe_pmu(stage, archid, impid))
cpu_req_errata |= BIT(ERRATA_THEAD_PMU);

+ if (errata_probe_vector(stage, archid, impid))
+ cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR);
+
return cpu_req_errata;
}

diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 2d79bca6ffe8..521b3b939e51 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -24,11 +24,25 @@
#define SR_FS_CLEAN _AC(0x00004000, UL)
#define SR_FS_DIRTY _AC(0x00006000, UL)

-#define SR_VS _AC(0x00000600, UL) /* Vector Status */
-#define SR_VS_OFF _AC(0x00000000, UL)
-#define SR_VS_INITIAL _AC(0x00000200, UL)
-#define SR_VS_CLEAN _AC(0x00000400, UL)
-#define SR_VS_DIRTY _AC(0x00000600, UL)
+#define SR_VS_OFF _AC(0x00000000, UL)
+
+#define SR_VS_1_0 _AC(0x00000600, UL) /* Vector Status */
+#define SR_VS_INITIAL_1_0 _AC(0x00000200, UL)
+#define SR_VS_CLEAN_1_0 _AC(0x00000400, UL)
+#define SR_VS_DIRTY_1_0 _AC(0x00000600, UL)
+
+#define SR_VS_THEAD _AC(0x01800000, UL) /* Vector Status */
+#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL)
+#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL)
+#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL)
+
+/*
+ * Always default to vector-1.0 handling in assembly and let the broken
+ * implementations handle their case separately.
+ */
+#ifdef __ASSEMBLY__
+#define SR_VS SR_VS_1_0
+#endif

#define SR_XS _AC(0x00018000, UL) /* Extension Status */
#define SR_XS_OFF _AC(0x00000000, UL)
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index fb1a810f3d8c..ab21fadbe9c6 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -21,7 +21,8 @@
#define ERRATA_THEAD_PBMT 0
#define ERRATA_THEAD_CMO 1
#define ERRATA_THEAD_PMU 2
-#define ERRATA_THEAD_NUMBER 3
+#define ERRATA_THEAD_VECTOR 3
+#define ERRATA_THEAD_NUMBER 4
#endif

#ifdef __ASSEMBLY__
@@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE( \
: "=r" (__ovl) : \
: "memory")

+#ifdef CONFIG_ERRATA_THEAD_VECTOR
+
+#define THEAD_C9XX_CSR_VXSAT 0x9
+#define THEAD_C9XX_CSR_VXRM 0xa
+
+/*
+ * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
+ * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
+ * vsetvli t4, x0, e8, m8, d1
+ */
+#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t"
+
+/*
+ * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
+ * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
+ * the call resulting in a different encoding and then using a value for
+ * the "mop" field that is not part of vector-0.7.1
+ * So encode specific variants for vstate_save and _restore.
+ */
+#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t"
+#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t"
+#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t"
+#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t"
+#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t"
+#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t"
+#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t"
+#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t"
+
+#define ALT_SR_VS_VECTOR_1_0_SHIFT 9
+#define ALT_SR_VS_THEAD_SHIFT 23
+
+#define ALT_SR_VS(_val, prot) \
+asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \
+ "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \
+ ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \
+ : "=r"(_val) \
+ : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \
+ "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \
+ "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \
+ "I"(ALT_SR_VS_THEAD_SHIFT))
+#endif /* CONFIG_ERRATA_THEAD_VECTOR */
+
#endif /* __ASSEMBLY__ */

#endif
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 315c96d2b4d0..fa47f60f81e3 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -18,6 +18,55 @@
#include <asm/hwcap.h>
#include <asm/csr.h>
#include <asm/asm.h>
+#include <asm/errata_list.h>
+
+#ifdef CONFIG_ERRATA_THEAD_VECTOR
+
+static inline unsigned long riscv_sr_vs(void)
+{
+ u32 val;
+
+ ALT_SR_VS(val, SR_VS);
+ return val;
+}
+
+static inline unsigned long riscv_sr_vs_initial(void)
+{
+ u32 val;
+
+ ALT_SR_VS(val, SR_VS_INITIAL);
+ return val;
+}
+
+static inline unsigned long riscv_sr_vs_clean(void)
+{
+ u32 val;
+
+ ALT_SR_VS(val, SR_VS_CLEAN);
+ return val;
+}
+
+static inline unsigned long riscv_sr_vs_dirty(void)
+{
+ u32 val;
+
+ ALT_SR_VS(val, SR_VS_DIRTY);
+ return val;
+}
+
+#define SR_VS riscv_sr_vs()
+#define SR_VS_INITIAL riscv_sr_vs_initial()
+#define SR_VS_CLEAN riscv_sr_vs_clean()
+#define SR_VS_DIRTY riscv_sr_vs_dirty()
+
+#else /* CONFIG_ERRATA_THEAD_VECTOR */
+
+#define SR_VS SR_VS_1_0
+#define SR_VS_INITIAL SR_VS_INITIAL_1_0
+#define SR_VS_CLEAN SR_VS_CLEAN_1_0
+#define SR_VS_DIRTY SR_VS_DIRTY_1_0
+
+#endif /* CONFIG_ERRATA_THEAD_VECTOR */

extern bool riscv_v_supported;
void riscv_vector_supported(void);
@@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void)

static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
{
- asm volatile (
+ register u32 t1 asm("t1") = (SR_FS);
+
+ /*
+ * CSR_VCSR is defined as
+ * [2:1] - vxrm[1:0]
+ * [0] - vxsat
+ * The earlier vector spec implemented by T-Head uses separate
+ * registers for the same bit-elements, so just combine those
+ * into the existing output field.
+ *
+ * Additionally T-Head cores need FS to be enabled when accessing
+ * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
+ * Though the cores do not implement the VXRM and VXSAT fields in the
+ * FCSR CSR that vector-0.7.1 specifies.
+ */
+ asm volatile (ALTERNATIVE(
"csrr %0, " __stringify(CSR_VSTART) "\n\t"
"csrr %1, " __stringify(CSR_VTYPE) "\n\t"
"csrr %2, " __stringify(CSR_VL) "\n\t"
"csrr %3, " __stringify(CSR_VCSR) "\n\t"
+ __nops(5),
+ "csrs sstatus, t1\n\t"
+ "csrr %0, " __stringify(CSR_VSTART) "\n\t"
+ "csrr %1, " __stringify(CSR_VTYPE) "\n\t"
+ "csrr %2, " __stringify(CSR_VL) "\n\t"
+ "csrr %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t"
+ "slliw %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
+ "csrr t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t"
+ "or %3, %3, t4\n\t"
+ "csrc sstatus, t1\n\t",
+ THEAD_VENDOR_ID,
+ ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
: "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
- "=r" (dest->vcsr) : :);
+ "=r" (dest->vcsr) : "r"(t1) : "t4");
}

static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
{
- asm volatile (
+ register u32 t1 asm("t1") = (SR_FS);
+
+ /*
+ * Similar to __vstate_csr_save above, restore values for the
+ * separate VXRM and VXSAT CSRs from the vcsr variable.
+ */
+ asm volatile (ALTERNATIVE(
".option push\n\t"
".option arch, +v\n\t"
"vsetvl x0, %2, %1\n\t"
".option pop\n\t"
"csrw " __stringify(CSR_VSTART) ", %0\n\t"
"csrw " __stringify(CSR_VCSR) ", %3\n\t"
+ __nops(6),
+ "csrs sstatus, t1\n\t"
+ ".option push\n\t"
+ ".option arch, +v\n\t"
+ "vsetvl x0, %2, %1\n\t"
+ ".option pop\n\t"
+ "csrw " __stringify(CSR_VSTART) ", %0\n\t"
+ "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
+ "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t"
+ "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t"
+ "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t"
+ "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t"
+ "csrc sstatus, t1\n\t",
+ THEAD_VENDOR_ID,
+ ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
- "r" (src->vcsr) :);
+ "r" (src->vcsr), "r"(t1) : "t4");
}

static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
@@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,

riscv_v_enable();
__vstate_csr_save(save_to);
- asm volatile (
+ asm volatile (ALTERNATIVE(
+ "nop\n\t"
".option push\n\t"
".option arch, +v\n\t"
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
@@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
"vse8.v v16, (%1)\n\t"
"add %1, %1, %0\n\t"
"vse8.v v24, (%1)\n\t"
- ".option pop\n\t"
- : "=&r" (vl) : "r" (datap) : "memory");
+ ".option pop\n\t",
+ "mv t0, %1\n\t"
+ THEAD_VSETVLI_T4X0E8M8D1
+ THEAD_VSB_V_V0T0
+ "addi t0, t0, 128\n\t"
+ THEAD_VSB_V_V8T0
+ "addi t0, t0, 128\n\t"
+ THEAD_VSB_V_V16T0
+ "addi t0, t0, 128\n\t"
+ THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
+ ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
+ : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");
riscv_v_disable();
}

@@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
unsigned long vl;

riscv_v_enable();
- asm volatile (
+ asm volatile (ALTERNATIVE(
+ "nop\n\t"
".option push\n\t"
".option arch, +v\n\t"
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
@@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
"vle8.v v16, (%1)\n\t"
"add %1, %1, %0\n\t"
"vle8.v v24, (%1)\n\t"
- ".option pop\n\t"
- : "=&r" (vl) : "r" (datap) : "memory");
+ ".option pop\n\t",
+ "mv t0, %1\n\t"
+ THEAD_VSETVLI_T4X0E8M8D1
+ THEAD_VLB_V_V0T0
+ "addi t0, t0, 128\n\t"
+ THEAD_VLB_V_V8T0
+ "addi t0, t0, 128\n\t"
+ THEAD_VLB_V_V16T0
+ "addi t0, t0, 128\n\t"
+ THEAD_VLB_V_V24T0, THEAD_VENDOR_ID,
+ ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
+ : "=&r" (vl) : "r" (datap) : "t0", "t4");
__vstate_csr_restore(restore_from);
riscv_v_disable();
}
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
index 74178fb71805..51726890a4d0 100644
--- a/arch/riscv/kernel/vector.c
+++ b/arch/riscv/kernel/vector.c
@@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
u32 insn = (u32)regs->badaddr;

/* Do not handle if V is not supported, or disabled */
- if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
+ if (!has_vector())
return false;

/* If V has been enabled then it is not the first-use trap */
--
2.39.2



2023-06-23 03:52:10

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling

Hi Heiko,

kernel test robot noticed the following build errors:

[auto build test ERROR on next-20230622]
[cannot apply to linus/master v6.4-rc7 v6.4-rc6 v6.4-rc5 v6.4-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Heiko-Stuebner/RISC-V-move-vector-available-status-into-a-dedicated-variable/20230623-081314
base: next-20230622
patch link: https://lore.kernel.org/r/20230622231305.631331-4-heiko%40sntech.de
patch subject: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling
config: riscv-randconfig-r042-20230622 (https://download.01.org/0day-ci/archive/20230623/[email protected]/config)
compiler: riscv64-linux-gcc (GCC) 12.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230623/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

In file included from arch/riscv/include/asm/alternative.h:9,
from arch/riscv/include/asm/errata_list.h:8,
from arch/riscv/include/asm/tlbflush.h:12,
from arch/riscv/include/asm/pgtable.h:112,
from arch/riscv/include/asm/uaccess.h:12,
from include/linux/uaccess.h:11,
from include/linux/sched/task.h:11,
from include/linux/sched/signal.h:9,
from include/linux/rcuwait.h:6,
from include/linux/percpu-rwsem.h:7,
from include/linux/fs.h:33,
from include/linux/compat.h:17,
from arch/riscv/include/asm/elf.h:12,
from include/linux/elf.h:6,
from include/linux/module.h:19,
from include/linux/device/driver.h:21,
from include/linux/device.h:32,
from include/linux/energy_model.h:5,
from kernel/sched/fair.c:23:
arch/riscv/include/asm/vector.h: In function '__riscv_v_vstate_save':
>> arch/riscv/include/asm/vector.h:206:17: error: expected ':' or ')' before 'THEAD_VSETVLI_T4X0E8M8D1'
206 | THEAD_VSETVLI_T4X0E8M8D1
| ^~~~~~~~~~~~~~~~~~~~~~~~
arch/riscv/include/asm/alternative-macros.h:78:9: note: in definition of macro 'ALT_NEW_CONTENT'
78 | new_c "\n" \
| ^~~~~
arch/riscv/include/asm/alternative-macros.h:104:9: note: in expansion of macro '__ALTERNATIVE_CFG'
104 | __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k))
| ^~~~~~~~~~~~~~~~~
arch/riscv/include/asm/alternative-macros.h:152:9: note: in expansion of macro '_ALTERNATIVE_CFG'
152 | _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k)
| ^~~~~~~~~~~~~~~~
arch/riscv/include/asm/vector.h:192:23: note: in expansion of macro 'ALTERNATIVE'
192 | asm volatile (ALTERNATIVE(
| ^~~~~~~~~~~
In file included from arch/riscv/include/asm/switch_to.h:11,
from kernel/sched/fair.c:51:
arch/riscv/include/asm/vector.h:188:23: warning: unused variable 'vl' [-Wunused-variable]
188 | unsigned long vl;
| ^~
arch/riscv/include/asm/vector.h: In function '__riscv_v_vstate_restore':
arch/riscv/include/asm/vector.h:239:17: error: expected ':' or ')' before 'THEAD_VSETVLI_T4X0E8M8D1'
239 | THEAD_VSETVLI_T4X0E8M8D1
| ^~~~~~~~~~~~~~~~~~~~~~~~
arch/riscv/include/asm/alternative-macros.h:78:9: note: in definition of macro 'ALT_NEW_CONTENT'
78 | new_c "\n" \
| ^~~~~
arch/riscv/include/asm/alternative-macros.h:104:9: note: in expansion of macro '__ALTERNATIVE_CFG'
104 | __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k))
| ^~~~~~~~~~~~~~~~~
arch/riscv/include/asm/alternative-macros.h:152:9: note: in expansion of macro '_ALTERNATIVE_CFG'
152 | _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k)
| ^~~~~~~~~~~~~~~~
arch/riscv/include/asm/vector.h:225:23: note: in expansion of macro 'ALTERNATIVE'
225 | asm volatile (ALTERNATIVE(
| ^~~~~~~~~~~
arch/riscv/include/asm/vector.h:222:23: warning: unused variable 'vl' [-Wunused-variable]
222 | unsigned long vl;
| ^~
kernel/sched/fair.c: At top level:
kernel/sched/fair.c:688:5: warning: no previous prototype for 'sched_update_scaling' [-Wmissing-prototypes]
688 | int sched_update_scaling(void)
| ^~~~~~~~~~~~~~~~~~~~
--
In file included from arch/riscv/include/asm/alternative.h:9,
from arch/riscv/include/asm/errata_list.h:8,
from arch/riscv/include/asm/tlbflush.h:12,
from arch/riscv/include/asm/pgtable.h:112,
from arch/riscv/include/asm/uaccess.h:12,
from include/linux/uaccess.h:11,
from include/linux/sched/task.h:11,
from include/linux/sched/signal.h:9,
from include/linux/sched/cputime.h:5,
from kernel/sched/build_utility.c:13:
arch/riscv/include/asm/vector.h: In function '__riscv_v_vstate_save':
>> arch/riscv/include/asm/vector.h:206:17: error: expected ':' or ')' before 'THEAD_VSETVLI_T4X0E8M8D1'
206 | THEAD_VSETVLI_T4X0E8M8D1
| ^~~~~~~~~~~~~~~~~~~~~~~~
arch/riscv/include/asm/alternative-macros.h:78:9: note: in definition of macro 'ALT_NEW_CONTENT'
78 | new_c "\n" \
| ^~~~~
arch/riscv/include/asm/alternative-macros.h:104:9: note: in expansion of macro '__ALTERNATIVE_CFG'
104 | __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k))
| ^~~~~~~~~~~~~~~~~
arch/riscv/include/asm/alternative-macros.h:152:9: note: in expansion of macro '_ALTERNATIVE_CFG'
152 | _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k)
| ^~~~~~~~~~~~~~~~
arch/riscv/include/asm/vector.h:192:23: note: in expansion of macro 'ALTERNATIVE'
192 | asm volatile (ALTERNATIVE(
| ^~~~~~~~~~~
In file included from arch/riscv/include/asm/switch_to.h:11,
from kernel/sched/build_utility.c:51:
arch/riscv/include/asm/vector.h:188:23: warning: unused variable 'vl' [-Wunused-variable]
188 | unsigned long vl;
| ^~
arch/riscv/include/asm/vector.h: In function '__riscv_v_vstate_restore':
arch/riscv/include/asm/vector.h:239:17: error: expected ':' or ')' before 'THEAD_VSETVLI_T4X0E8M8D1'
239 | THEAD_VSETVLI_T4X0E8M8D1
| ^~~~~~~~~~~~~~~~~~~~~~~~
arch/riscv/include/asm/alternative-macros.h:78:9: note: in definition of macro 'ALT_NEW_CONTENT'
78 | new_c "\n" \
| ^~~~~
arch/riscv/include/asm/alternative-macros.h:104:9: note: in expansion of macro '__ALTERNATIVE_CFG'
104 | __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k))
| ^~~~~~~~~~~~~~~~~
arch/riscv/include/asm/alternative-macros.h:152:9: note: in expansion of macro '_ALTERNATIVE_CFG'
152 | _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k)
| ^~~~~~~~~~~~~~~~
arch/riscv/include/asm/vector.h:225:23: note: in expansion of macro 'ALTERNATIVE'
225 | asm volatile (ALTERNATIVE(
| ^~~~~~~~~~~
arch/riscv/include/asm/vector.h:222:23: warning: unused variable 'vl' [-Wunused-variable]
222 | unsigned long vl;
| ^~


vim +206 arch/riscv/include/asm/vector.h

184
185 static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
186 void *datap)
187 {
188 unsigned long vl;
189
190 riscv_v_enable();
191 __vstate_csr_save(save_to);
192 asm volatile (ALTERNATIVE(
193 "nop\n\t"
194 ".option push\n\t"
195 ".option arch, +v\n\t"
196 "vsetvli %0, x0, e8, m8, ta, ma\n\t"
197 "vse8.v v0, (%1)\n\t"
198 "add %1, %1, %0\n\t"
199 "vse8.v v8, (%1)\n\t"
200 "add %1, %1, %0\n\t"
201 "vse8.v v16, (%1)\n\t"
202 "add %1, %1, %0\n\t"
203 "vse8.v v24, (%1)\n\t"
204 ".option pop\n\t",
205 "mv t0, %1\n\t"
> 206 THEAD_VSETVLI_T4X0E8M8D1
207 THEAD_VSB_V_V0T0
208 "addi t0, t0, 128\n\t"
209 THEAD_VSB_V_V8T0
210 "addi t0, t0, 128\n\t"
211 THEAD_VSB_V_V16T0
212 "addi t0, t0, 128\n\t"
213 THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
214 ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
215 : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");
216 riscv_v_disable();
217 }
218

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-06-23 09:57:12

by Conor Dooley

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling

Hey Heiko,

On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote:
> From: Heiko Stuebner <[email protected]>
>
> T-Head C9xx cores implement an older version (0.7.1) of the vector
> specification.
>
> Relevant changes concerning the kernel are:
> - different placement of the SR_VS bit for the vector unit status
> - different encoding of the vsetvli instruction
> - different instructions for loads and stores
>
> And a fixed VLEN of 128.
>
> The in-kernel access to vector instances is limited to the save and
> restore of process states so the above mentioned areas can simply be
> handled via the alternatives framework, similar to other T-Head specific
> issues.
>
> Signed-off-by: Heiko Stuebner <[email protected]>
> ---
> arch/riscv/Kconfig.errata | 13 +++
> arch/riscv/errata/thead/errata.c | 32 ++++++
> arch/riscv/include/asm/csr.h | 24 ++++-
> arch/riscv/include/asm/errata_list.h | 45 ++++++++-
> arch/riscv/include/asm/vector.h | 139 +++++++++++++++++++++++++--
> arch/riscv/kernel/vector.c | 2 +-
> 6 files changed, 238 insertions(+), 17 deletions(-)
>
> diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
> index 0c8f4652cd82..b461312dd452 100644
> --- a/arch/riscv/Kconfig.errata
> +++ b/arch/riscv/Kconfig.errata
> @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU
>
> If you don't know what to do here, say "Y".
>
> +config ERRATA_THEAD_VECTOR
> + bool "Apply T-Head Vector errata"
> + depends on ERRATA_THEAD && RISCV_ISA_V
> + default y
> + help
> + The T-Head C9xx cores implement an earlier version 0.7.1
> + of the vector extensions.
> +
> + This will apply the necessary errata to handle the non-standard
> + behaviour via when switch to and from vector mode for processes.
> +
> + If you don't know what to do here, say "Y".
> +
> endmenu # "CPU errata selection"
> diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> index c259dc925ec1..c41ec84bc8a5 100644
> --- a/arch/riscv/errata/thead/errata.c
> +++ b/arch/riscv/errata/thead/errata.c
> @@ -15,6 +15,7 @@
> #include <asm/errata_list.h>
> #include <asm/hwprobe.h>
> #include <asm/patch.h>
> +#include <asm/vector.h>
> #include <asm/vendorid_list.h>
>
> static bool errata_probe_pbmt(unsigned int stage,
> @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage,
> return true;
> }
>
> +static bool errata_probe_vector(unsigned int stage,
> + unsigned long arch_id, unsigned long impid)
> +{
> + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR))
> + return false;
> +
> + /* target-c9xx cores report arch_id and impid as 0 */
> + if (arch_id != 0 || impid != 0)
> + return false;
> +
> + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) {
> + /*
> + * Disable VECTOR to detect illegal usage of vector in kernel.
> + * This is normally done in _start_kernel but with the
> + * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's
> + * vector-0.7.1 and the vector-1.0-bits are unused there.
> + */
> + csr_clear(CSR_STATUS, SR_VS_THEAD);
> + return false;
> + }
> +
> + /* let has_vector() return true and set the static vlen */

Hmm, I was wondering about how you were going to communicate this to
userspace, since you're not going to be setting "v" in your DT, so
there'll be nothing in /proc/cpuinfo indicating it. (I am assuming that
this is your intention, as you'd not need to drop the alternative-based
stuff from has_vector() if it wasn't)

I don't think you can do this, as things stand, because of how hwprobe
operates:

static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
const struct cpumask *cpus)
{
...

if (has_vector())
pair->value |= RISCV_HWPROBE_IMA_V;

...
}

* :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by
version 1.0 of the RISC-V Vector extension manual.

You'll need to change hwprobe to use has_vector() &&
riscv_has_extension_unlikely(v), or similar, as the condition for
reporting. You'll also need some other way to communicate to userspace
that T-Head's vector stuff is supported, no?

I'm also _really_ unconvinced that turning on extensions that were not
indicated in the DT or via ACPI is something we should be doing. Have I
missed something here that'd make that assessment inaccurate?

Cheers,
Conor.

FWIW I am currently working on kernel-side support for the new
extension properties that I have posted dt-binding patches for.
I'll go post it once Palmer has merged his current set of patches in his
staging repo into for-next, as I've got a lot of deps on riscv,isa
parser changes.
I'm really hoping that it provides an easier way to go off probing for
vendor specific stuff for DT-based systems, since it will no longer
require complex probing, just an of_property_match_string() for each
possible cpu and we could very well provide a vendor hook during that
process.
Clearly though, that stuff is not yet merged as it has not even been
posted yet.

Current WIP of that is here:
https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/log/?h=riscv-extensions-strings-on-palmer

> + riscv_vector_supported();
> + riscv_v_vsize = 128 / 8 * 32;
> +
> + return true;
> +}
> +
> static u32 thead_errata_probe(unsigned int stage,
> unsigned long archid, unsigned long impid)
> {
> @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage,
> if (errata_probe_pmu(stage, archid, impid))
> cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
>
> + if (errata_probe_vector(stage, archid, impid))
> + cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR);
> +
> return cpu_req_errata;
> }
>
> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> index 2d79bca6ffe8..521b3b939e51 100644
> --- a/arch/riscv/include/asm/csr.h
> +++ b/arch/riscv/include/asm/csr.h
> @@ -24,11 +24,25 @@
> #define SR_FS_CLEAN _AC(0x00004000, UL)
> #define SR_FS_DIRTY _AC(0x00006000, UL)
>
> -#define SR_VS _AC(0x00000600, UL) /* Vector Status */
> -#define SR_VS_OFF _AC(0x00000000, UL)
> -#define SR_VS_INITIAL _AC(0x00000200, UL)
> -#define SR_VS_CLEAN _AC(0x00000400, UL)
> -#define SR_VS_DIRTY _AC(0x00000600, UL)
> +#define SR_VS_OFF _AC(0x00000000, UL)
> +
> +#define SR_VS_1_0 _AC(0x00000600, UL) /* Vector Status */
> +#define SR_VS_INITIAL_1_0 _AC(0x00000200, UL)
> +#define SR_VS_CLEAN_1_0 _AC(0x00000400, UL)
> +#define SR_VS_DIRTY_1_0 _AC(0x00000600, UL)
> +
> +#define SR_VS_THEAD _AC(0x01800000, UL) /* Vector Status */
> +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL)
> +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL)
> +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL)
> +
> +/*
> + * Always default to vector-1.0 handling in assembly and let the broken
> + * implementations handle their case separately.
> + */
> +#ifdef __ASSEMBLY__
> +#define SR_VS SR_VS_1_0
> +#endif
>
> #define SR_XS _AC(0x00018000, UL) /* Extension Status */
> #define SR_XS_OFF _AC(0x00000000, UL)
> diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> index fb1a810f3d8c..ab21fadbe9c6 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -21,7 +21,8 @@
> #define ERRATA_THEAD_PBMT 0
> #define ERRATA_THEAD_CMO 1
> #define ERRATA_THEAD_PMU 2
> -#define ERRATA_THEAD_NUMBER 3
> +#define ERRATA_THEAD_VECTOR 3
> +#define ERRATA_THEAD_NUMBER 4
> #endif
>
> #ifdef __ASSEMBLY__
> @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE( \
> : "=r" (__ovl) : \
> : "memory")
>
> +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> +
> +#define THEAD_C9XX_CSR_VXSAT 0x9
> +#define THEAD_C9XX_CSR_VXRM 0xa
> +
> +/*
> + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
> + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
> + * vsetvli t4, x0, e8, m8, d1
> + */
> +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t"
> +
> +/*
> + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
> + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
> + * the call resulting in a different encoding and then using a value for
> + * the "mop" field that is not part of vector-0.7.1
> + * So encode specific variants for vstate_save and _restore.
> + */
> +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t"
> +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t"
> +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t"
> +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t"
> +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t"
> +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t"
> +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t"
> +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t"
> +
> +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9
> +#define ALT_SR_VS_THEAD_SHIFT 23
> +
> +#define ALT_SR_VS(_val, prot) \
> +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \
> + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \
> + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \
> + : "=r"(_val) \
> + : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \
> + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \
> + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \
> + "I"(ALT_SR_VS_THEAD_SHIFT))
> +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
> +
> #endif /* __ASSEMBLY__ */
>
> #endif
> diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
> index 315c96d2b4d0..fa47f60f81e3 100644
> --- a/arch/riscv/include/asm/vector.h
> +++ b/arch/riscv/include/asm/vector.h
> @@ -18,6 +18,55 @@
> #include <asm/hwcap.h>
> #include <asm/csr.h>
> #include <asm/asm.h>
> +#include <asm/errata_list.h>
> +
> +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> +
> +static inline unsigned long riscv_sr_vs(void)
> +{
> + u32 val;
> +
> + ALT_SR_VS(val, SR_VS);
> + return val;
> +}
> +
> +static inline unsigned long riscv_sr_vs_initial(void)
> +{
> + u32 val;
> +
> + ALT_SR_VS(val, SR_VS_INITIAL);
> + return val;
> +}
> +
> +static inline unsigned long riscv_sr_vs_clean(void)
> +{
> + u32 val;
> +
> + ALT_SR_VS(val, SR_VS_CLEAN);
> + return val;
> +}
> +
> +static inline unsigned long riscv_sr_vs_dirty(void)
> +{
> + u32 val;
> +
> + ALT_SR_VS(val, SR_VS_DIRTY);
> + return val;
> +}
> +
> +#define SR_VS riscv_sr_vs()
> +#define SR_VS_INITIAL riscv_sr_vs_initial()
> +#define SR_VS_CLEAN riscv_sr_vs_clean()
> +#define SR_VS_DIRTY riscv_sr_vs_dirty()
> +
> +#else /* CONFIG_ERRATA_THEAD_VECTOR */
> +
> +#define SR_VS SR_VS_1_0
> +#define SR_VS_INITIAL SR_VS_INITIAL_1_0
> +#define SR_VS_CLEAN SR_VS_CLEAN_1_0
> +#define SR_VS_DIRTY SR_VS_DIRTY_1_0
> +
> +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
>
> extern bool riscv_v_supported;
> void riscv_vector_supported(void);
> @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void)
>
> static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
> {
> - asm volatile (
> + register u32 t1 asm("t1") = (SR_FS);
> +
> + /*
> + * CSR_VCSR is defined as
> + * [2:1] - vxrm[1:0]
> + * [0] - vxsat
> + * The earlier vector spec implemented by T-Head uses separate
> + * registers for the same bit-elements, so just combine those
> + * into the existing output field.
> + *
> + * Additionally T-Head cores need FS to be enabled when accessing
> + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
> + * Though the cores do not implement the VXRM and VXSAT fields in the
> + * FCSR CSR that vector-0.7.1 specifies.
> + */
> + asm volatile (ALTERNATIVE(
> "csrr %0, " __stringify(CSR_VSTART) "\n\t"
> "csrr %1, " __stringify(CSR_VTYPE) "\n\t"
> "csrr %2, " __stringify(CSR_VL) "\n\t"
> "csrr %3, " __stringify(CSR_VCSR) "\n\t"
> + __nops(5),
> + "csrs sstatus, t1\n\t"
> + "csrr %0, " __stringify(CSR_VSTART) "\n\t"
> + "csrr %1, " __stringify(CSR_VTYPE) "\n\t"
> + "csrr %2, " __stringify(CSR_VL) "\n\t"
> + "csrr %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t"
> + "slliw %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
> + "csrr t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t"
> + "or %3, %3, t4\n\t"
> + "csrc sstatus, t1\n\t",
> + THEAD_VENDOR_ID,
> + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
> - "=r" (dest->vcsr) : :);
> + "=r" (dest->vcsr) : "r"(t1) : "t4");
> }
>
> static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
> {
> - asm volatile (
> + register u32 t1 asm("t1") = (SR_FS);
> +
> + /*
> + * Similar to __vstate_csr_save above, restore values for the
> + * separate VXRM and VXSAT CSRs from the vcsr variable.
> + */
> + asm volatile (ALTERNATIVE(
> ".option push\n\t"
> ".option arch, +v\n\t"
> "vsetvl x0, %2, %1\n\t"
> ".option pop\n\t"
> "csrw " __stringify(CSR_VSTART) ", %0\n\t"
> "csrw " __stringify(CSR_VCSR) ", %3\n\t"
> + __nops(6),
> + "csrs sstatus, t1\n\t"
> + ".option push\n\t"
> + ".option arch, +v\n\t"
> + "vsetvl x0, %2, %1\n\t"
> + ".option pop\n\t"
> + "csrw " __stringify(CSR_VSTART) ", %0\n\t"
> + "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
> + "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t"
> + "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t"
> + "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t"
> + "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t"
> + "csrc sstatus, t1\n\t",
> + THEAD_VENDOR_ID,
> + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
> - "r" (src->vcsr) :);
> + "r" (src->vcsr), "r"(t1) : "t4");
> }
>
> static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
>
> riscv_v_enable();
> __vstate_csr_save(save_to);
> - asm volatile (
> + asm volatile (ALTERNATIVE(
> + "nop\n\t"
> ".option push\n\t"
> ".option arch, +v\n\t"
> "vsetvli %0, x0, e8, m8, ta, ma\n\t"
> @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> "vse8.v v16, (%1)\n\t"
> "add %1, %1, %0\n\t"
> "vse8.v v24, (%1)\n\t"
> - ".option pop\n\t"
> - : "=&r" (vl) : "r" (datap) : "memory");
> + ".option pop\n\t",
> + "mv t0, %1\n\t"
> + THEAD_VSETVLI_T4X0E8M8D1
> + THEAD_VSB_V_V0T0
> + "addi t0, t0, 128\n\t"
> + THEAD_VSB_V_V8T0
> + "addi t0, t0, 128\n\t"
> + THEAD_VSB_V_V16T0
> + "addi t0, t0, 128\n\t"
> + THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
> + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> + : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");
> riscv_v_disable();
> }
>
> @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
> unsigned long vl;
>
> riscv_v_enable();
> - asm volatile (
> + asm volatile (ALTERNATIVE(
> + "nop\n\t"
> ".option push\n\t"
> ".option arch, +v\n\t"
> "vsetvli %0, x0, e8, m8, ta, ma\n\t"
> @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
> "vle8.v v16, (%1)\n\t"
> "add %1, %1, %0\n\t"
> "vle8.v v24, (%1)\n\t"
> - ".option pop\n\t"
> - : "=&r" (vl) : "r" (datap) : "memory");
> + ".option pop\n\t",
> + "mv t0, %1\n\t"
> + THEAD_VSETVLI_T4X0E8M8D1
> + THEAD_VLB_V_V0T0
> + "addi t0, t0, 128\n\t"
> + THEAD_VLB_V_V8T0
> + "addi t0, t0, 128\n\t"
> + THEAD_VLB_V_V16T0
> + "addi t0, t0, 128\n\t"
> + THEAD_VLB_V_V24T0, THEAD_VENDOR_ID,
> + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> + : "=&r" (vl) : "r" (datap) : "t0", "t4");
> __vstate_csr_restore(restore_from);
> riscv_v_disable();
> }
> diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
> index 74178fb71805..51726890a4d0 100644
> --- a/arch/riscv/kernel/vector.c
> +++ b/arch/riscv/kernel/vector.c
> @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
> u32 insn = (u32)regs->badaddr;
>
> /* Do not handle if V is not supported, or disabled */
> - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
> + if (!has_vector())
> return false;
>
> /* If V has been enabled then it is not the first-use trap */
> --
> 2.39.2
>


Attachments:
(No filename) (16.24 kB)
signature.asc (235.00 B)
Download all attachments

2023-06-23 11:26:21

by Heiko Stübner

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling

Hey Conor,

Am Freitag, 23. Juni 2023, 11:49:41 CEST schrieb Conor Dooley:
> On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote:
> > From: Heiko Stuebner <[email protected]>
> >
> > T-Head C9xx cores implement an older version (0.7.1) of the vector
> > specification.
> >
> > Relevant changes concerning the kernel are:
> > - different placement of the SR_VS bit for the vector unit status
> > - different encoding of the vsetvli instruction
> > - different instructions for loads and stores
> >
> > And a fixed VLEN of 128.
> >
> > The in-kernel access to vector instances is limited to the save and
> > restore of process states so the above mentioned areas can simply be
> > handled via the alternatives framework, similar to other T-Head specific
> > issues.
> >
> > Signed-off-by: Heiko Stuebner <[email protected]>
> > ---
> > arch/riscv/Kconfig.errata | 13 +++
> > arch/riscv/errata/thead/errata.c | 32 ++++++
> > arch/riscv/include/asm/csr.h | 24 ++++-
> > arch/riscv/include/asm/errata_list.h | 45 ++++++++-
> > arch/riscv/include/asm/vector.h | 139 +++++++++++++++++++++++++--
> > arch/riscv/kernel/vector.c | 2 +-
> > 6 files changed, 238 insertions(+), 17 deletions(-)
> >
> > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
> > index 0c8f4652cd82..b461312dd452 100644
> > --- a/arch/riscv/Kconfig.errata
> > +++ b/arch/riscv/Kconfig.errata
> > @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU
> >
> > If you don't know what to do here, say "Y".
> >
> > +config ERRATA_THEAD_VECTOR
> > + bool "Apply T-Head Vector errata"
> > + depends on ERRATA_THEAD && RISCV_ISA_V
> > + default y
> > + help
> > + The T-Head C9xx cores implement an earlier version 0.7.1
> > + of the vector extensions.
> > +
> > + This will apply the necessary errata to handle the non-standard
> > + behaviour via when switch to and from vector mode for processes.
> > +
> > + If you don't know what to do here, say "Y".
> > +
> > endmenu # "CPU errata selection"
> > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> > index c259dc925ec1..c41ec84bc8a5 100644
> > --- a/arch/riscv/errata/thead/errata.c
> > +++ b/arch/riscv/errata/thead/errata.c
> > @@ -15,6 +15,7 @@
> > #include <asm/errata_list.h>
> > #include <asm/hwprobe.h>
> > #include <asm/patch.h>
> > +#include <asm/vector.h>
> > #include <asm/vendorid_list.h>
> >
> > static bool errata_probe_pbmt(unsigned int stage,
> > @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage,
> > return true;
> > }
> >
> > +static bool errata_probe_vector(unsigned int stage,
> > + unsigned long arch_id, unsigned long impid)
> > +{
> > + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR))
> > + return false;
> > +
> > + /* target-c9xx cores report arch_id and impid as 0 */
> > + if (arch_id != 0 || impid != 0)
> > + return false;
> > +
> > + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) {
> > + /*
> > + * Disable VECTOR to detect illegal usage of vector in kernel.
> > + * This is normally done in _start_kernel but with the
> > + * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's
> > + * vector-0.7.1 and the vector-1.0-bits are unused there.
> > + */
> > + csr_clear(CSR_STATUS, SR_VS_THEAD);
> > + return false;
> > + }
> > +
> > + /* let has_vector() return true and set the static vlen */
>
> Hmm, I was wondering about how you were going to communicate this to
> userspace, since you're not going to be setting "v" in your DT, so
> there'll be nothing in /proc/cpuinfo indicating it. (I am assuming that
> this is your intention, as you'd not need to drop the alternative-based
> stuff from has_vector() if it wasn't)

I'm working on the assumption that the t-head vector is way to different
from the official vector, that a userspace will definitly need to handle this
in some way specially and we can't claim to use a "real" vector spec.

So in this first step, my goal is to simply allow userspace programs
compiled to use the t-head vector instructions (i.e. 0.7.1 presumably) to
not hang the kernel and do all the necessary bringup and teardown needed
for executing those vector instructions ;-) .


> I don't think you can do this, as things stand, because of how hwprobe
> operates:
>
> static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
> const struct cpumask *cpus)
> {
> ...
>
> if (has_vector())
> pair->value |= RISCV_HWPROBE_IMA_V;
>
> ...
> }
>
> * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by
> version 1.0 of the RISC-V Vector extension manual.
>
> You'll need to change hwprobe to use has_vector() &&
> riscv_has_extension_unlikely(v), or similar, as the condition for
> reporting.

ah right, and yes I need to adapt hwprobe as you wrote.


> You'll also need some other way to communicate to userspace
> that T-Head's vector stuff is supported, no?

As I said above, baby-steps - not-dying first ;-) .


> I'm also _really_ unconvinced that turning on extensions that were not
> indicated in the DT or via ACPI is something we should be doing. Have I
> missed something here that'd make that assessment inaccurate?

Hmm, DT (and ACPI) is a (static) hardware-description, not a configuration
space (sermon of DT maintainers for years), so the ISA string in DT will
simply describe _all_ extensions the hardware supports. So there _should_
never be a case of "I want to disable vectors and will remove the letter
from the ISA string".

For T-Head we _know_ from vendor-id and friends that the core supports
this special brand of vectors.

We're also turning on the t-head equivalent of svpbmt and zicbom with
probably the same reasoning.


> FWIW I am currently working on kernel-side support for the new
> extension properties that I have posted dt-binding patches for.
> I'll go post it once Palmer has merged his current set of patches in his
> staging repo into for-next, as I've got a lot of deps on riscv,isa
> parser changes.
> I'm really hoping that it provides an easier way to go off probing for
> vendor specific stuff for DT-based systems, since it will no longer
> require complex probing, just an of_property_match_string() for each
> possible cpu and we could very well provide a vendor hook during that
> process.
> Clearly though, that stuff is not yet merged as it has not even been
> posted yet.

And with the comments I received, T-Head vector also is not ready for
prime-time yet, so we're all good :-)


Heiko


> Current WIP of that is here:
> https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/log/?h=riscv-extensions-strings-on-palmer
>
> > + riscv_vector_supported();
> > + riscv_v_vsize = 128 / 8 * 32;
> > +
> > + return true;
> > +}
> > +
> > static u32 thead_errata_probe(unsigned int stage,
> > unsigned long archid, unsigned long impid)
> > {
> > @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage,
> > if (errata_probe_pmu(stage, archid, impid))
> > cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
> >
> > + if (errata_probe_vector(stage, archid, impid))
> > + cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR);
> > +
> > return cpu_req_errata;
> > }
> >
> > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> > index 2d79bca6ffe8..521b3b939e51 100644
> > --- a/arch/riscv/include/asm/csr.h
> > +++ b/arch/riscv/include/asm/csr.h
> > @@ -24,11 +24,25 @@
> > #define SR_FS_CLEAN _AC(0x00004000, UL)
> > #define SR_FS_DIRTY _AC(0x00006000, UL)
> >
> > -#define SR_VS _AC(0x00000600, UL) /* Vector Status */
> > -#define SR_VS_OFF _AC(0x00000000, UL)
> > -#define SR_VS_INITIAL _AC(0x00000200, UL)
> > -#define SR_VS_CLEAN _AC(0x00000400, UL)
> > -#define SR_VS_DIRTY _AC(0x00000600, UL)
> > +#define SR_VS_OFF _AC(0x00000000, UL)
> > +
> > +#define SR_VS_1_0 _AC(0x00000600, UL) /* Vector Status */
> > +#define SR_VS_INITIAL_1_0 _AC(0x00000200, UL)
> > +#define SR_VS_CLEAN_1_0 _AC(0x00000400, UL)
> > +#define SR_VS_DIRTY_1_0 _AC(0x00000600, UL)
> > +
> > +#define SR_VS_THEAD _AC(0x01800000, UL) /* Vector Status */
> > +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL)
> > +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL)
> > +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL)
> > +
> > +/*
> > + * Always default to vector-1.0 handling in assembly and let the broken
> > + * implementations handle their case separately.
> > + */
> > +#ifdef __ASSEMBLY__
> > +#define SR_VS SR_VS_1_0
> > +#endif
> >
> > #define SR_XS _AC(0x00018000, UL) /* Extension Status */
> > #define SR_XS_OFF _AC(0x00000000, UL)
> > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> > index fb1a810f3d8c..ab21fadbe9c6 100644
> > --- a/arch/riscv/include/asm/errata_list.h
> > +++ b/arch/riscv/include/asm/errata_list.h
> > @@ -21,7 +21,8 @@
> > #define ERRATA_THEAD_PBMT 0
> > #define ERRATA_THEAD_CMO 1
> > #define ERRATA_THEAD_PMU 2
> > -#define ERRATA_THEAD_NUMBER 3
> > +#define ERRATA_THEAD_VECTOR 3
> > +#define ERRATA_THEAD_NUMBER 4
> > #endif
> >
> > #ifdef __ASSEMBLY__
> > @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE( \
> > : "=r" (__ovl) : \
> > : "memory")
> >
> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> > +
> > +#define THEAD_C9XX_CSR_VXSAT 0x9
> > +#define THEAD_C9XX_CSR_VXRM 0xa
> > +
> > +/*
> > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
> > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
> > + * vsetvli t4, x0, e8, m8, d1
> > + */
> > +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t"
> > +
> > +/*
> > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
> > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
> > + * the call resulting in a different encoding and then using a value for
> > + * the "mop" field that is not part of vector-0.7.1
> > + * So encode specific variants for vstate_save and _restore.
> > + */
> > +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t"
> > +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t"
> > +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t"
> > +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t"
> > +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t"
> > +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t"
> > +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t"
> > +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t"
> > +
> > +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9
> > +#define ALT_SR_VS_THEAD_SHIFT 23
> > +
> > +#define ALT_SR_VS(_val, prot) \
> > +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \
> > + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \
> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \
> > + : "=r"(_val) \
> > + : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \
> > + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \
> > + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \
> > + "I"(ALT_SR_VS_THEAD_SHIFT))
> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
> > +
> > #endif /* __ASSEMBLY__ */
> >
> > #endif
> > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
> > index 315c96d2b4d0..fa47f60f81e3 100644
> > --- a/arch/riscv/include/asm/vector.h
> > +++ b/arch/riscv/include/asm/vector.h
> > @@ -18,6 +18,55 @@
> > #include <asm/hwcap.h>
> > #include <asm/csr.h>
> > #include <asm/asm.h>
> > +#include <asm/errata_list.h>
> > +
> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> > +
> > +static inline unsigned long riscv_sr_vs(void)
> > +{
> > + u32 val;
> > +
> > + ALT_SR_VS(val, SR_VS);
> > + return val;
> > +}
> > +
> > +static inline unsigned long riscv_sr_vs_initial(void)
> > +{
> > + u32 val;
> > +
> > + ALT_SR_VS(val, SR_VS_INITIAL);
> > + return val;
> > +}
> > +
> > +static inline unsigned long riscv_sr_vs_clean(void)
> > +{
> > + u32 val;
> > +
> > + ALT_SR_VS(val, SR_VS_CLEAN);
> > + return val;
> > +}
> > +
> > +static inline unsigned long riscv_sr_vs_dirty(void)
> > +{
> > + u32 val;
> > +
> > + ALT_SR_VS(val, SR_VS_DIRTY);
> > + return val;
> > +}
> > +
> > +#define SR_VS riscv_sr_vs()
> > +#define SR_VS_INITIAL riscv_sr_vs_initial()
> > +#define SR_VS_CLEAN riscv_sr_vs_clean()
> > +#define SR_VS_DIRTY riscv_sr_vs_dirty()
> > +
> > +#else /* CONFIG_ERRATA_THEAD_VECTOR */
> > +
> > +#define SR_VS SR_VS_1_0
> > +#define SR_VS_INITIAL SR_VS_INITIAL_1_0
> > +#define SR_VS_CLEAN SR_VS_CLEAN_1_0
> > +#define SR_VS_DIRTY SR_VS_DIRTY_1_0
> > +
> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
> >
> > extern bool riscv_v_supported;
> > void riscv_vector_supported(void);
> > @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void)
> >
> > static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
> > {
> > - asm volatile (
> > + register u32 t1 asm("t1") = (SR_FS);
> > +
> > + /*
> > + * CSR_VCSR is defined as
> > + * [2:1] - vxrm[1:0]
> > + * [0] - vxsat
> > + * The earlier vector spec implemented by T-Head uses separate
> > + * registers for the same bit-elements, so just combine those
> > + * into the existing output field.
> > + *
> > + * Additionally T-Head cores need FS to be enabled when accessing
> > + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
> > + * Though the cores do not implement the VXRM and VXSAT fields in the
> > + * FCSR CSR that vector-0.7.1 specifies.
> > + */
> > + asm volatile (ALTERNATIVE(
> > "csrr %0, " __stringify(CSR_VSTART) "\n\t"
> > "csrr %1, " __stringify(CSR_VTYPE) "\n\t"
> > "csrr %2, " __stringify(CSR_VL) "\n\t"
> > "csrr %3, " __stringify(CSR_VCSR) "\n\t"
> > + __nops(5),
> > + "csrs sstatus, t1\n\t"
> > + "csrr %0, " __stringify(CSR_VSTART) "\n\t"
> > + "csrr %1, " __stringify(CSR_VTYPE) "\n\t"
> > + "csrr %2, " __stringify(CSR_VL) "\n\t"
> > + "csrr %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t"
> > + "slliw %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
> > + "csrr t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t"
> > + "or %3, %3, t4\n\t"
> > + "csrc sstatus, t1\n\t",
> > + THEAD_VENDOR_ID,
> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> > : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
> > - "=r" (dest->vcsr) : :);
> > + "=r" (dest->vcsr) : "r"(t1) : "t4");
> > }
> >
> > static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
> > {
> > - asm volatile (
> > + register u32 t1 asm("t1") = (SR_FS);
> > +
> > + /*
> > + * Similar to __vstate_csr_save above, restore values for the
> > + * separate VXRM and VXSAT CSRs from the vcsr variable.
> > + */
> > + asm volatile (ALTERNATIVE(
> > ".option push\n\t"
> > ".option arch, +v\n\t"
> > "vsetvl x0, %2, %1\n\t"
> > ".option pop\n\t"
> > "csrw " __stringify(CSR_VSTART) ", %0\n\t"
> > "csrw " __stringify(CSR_VCSR) ", %3\n\t"
> > + __nops(6),
> > + "csrs sstatus, t1\n\t"
> > + ".option push\n\t"
> > + ".option arch, +v\n\t"
> > + "vsetvl x0, %2, %1\n\t"
> > + ".option pop\n\t"
> > + "csrw " __stringify(CSR_VSTART) ", %0\n\t"
> > + "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
> > + "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t"
> > + "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t"
> > + "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t"
> > + "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t"
> > + "csrc sstatus, t1\n\t",
> > + THEAD_VENDOR_ID,
> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> > : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
> > - "r" (src->vcsr) :);
> > + "r" (src->vcsr), "r"(t1) : "t4");
> > }
> >
> > static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> > @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> >
> > riscv_v_enable();
> > __vstate_csr_save(save_to);
> > - asm volatile (
> > + asm volatile (ALTERNATIVE(
> > + "nop\n\t"
> > ".option push\n\t"
> > ".option arch, +v\n\t"
> > "vsetvli %0, x0, e8, m8, ta, ma\n\t"
> > @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> > "vse8.v v16, (%1)\n\t"
> > "add %1, %1, %0\n\t"
> > "vse8.v v24, (%1)\n\t"
> > - ".option pop\n\t"
> > - : "=&r" (vl) : "r" (datap) : "memory");
> > + ".option pop\n\t",
> > + "mv t0, %1\n\t"
> > + THEAD_VSETVLI_T4X0E8M8D1
> > + THEAD_VSB_V_V0T0
> > + "addi t0, t0, 128\n\t"
> > + THEAD_VSB_V_V8T0
> > + "addi t0, t0, 128\n\t"
> > + THEAD_VSB_V_V16T0
> > + "addi t0, t0, 128\n\t"
> > + THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> > + : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");
> > riscv_v_disable();
> > }
> >
> > @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
> > unsigned long vl;
> >
> > riscv_v_enable();
> > - asm volatile (
> > + asm volatile (ALTERNATIVE(
> > + "nop\n\t"
> > ".option push\n\t"
> > ".option arch, +v\n\t"
> > "vsetvli %0, x0, e8, m8, ta, ma\n\t"
> > @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
> > "vle8.v v16, (%1)\n\t"
> > "add %1, %1, %0\n\t"
> > "vle8.v v24, (%1)\n\t"
> > - ".option pop\n\t"
> > - : "=&r" (vl) : "r" (datap) : "memory");
> > + ".option pop\n\t",
> > + "mv t0, %1\n\t"
> > + THEAD_VSETVLI_T4X0E8M8D1
> > + THEAD_VLB_V_V0T0
> > + "addi t0, t0, 128\n\t"
> > + THEAD_VLB_V_V8T0
> > + "addi t0, t0, 128\n\t"
> > + THEAD_VLB_V_V16T0
> > + "addi t0, t0, 128\n\t"
> > + THEAD_VLB_V_V24T0, THEAD_VENDOR_ID,
> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> > + : "=&r" (vl) : "r" (datap) : "t0", "t4");
> > __vstate_csr_restore(restore_from);
> > riscv_v_disable();
> > }
> > diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
> > index 74178fb71805..51726890a4d0 100644
> > --- a/arch/riscv/kernel/vector.c
> > +++ b/arch/riscv/kernel/vector.c
> > @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
> > u32 insn = (u32)regs->badaddr;
> >
> > /* Do not handle if V is not supported, or disabled */
> > - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
> > + if (!has_vector())
> > return false;
> >
> > /* If V has been enabled then it is not the first-use trap */
>





2023-06-23 12:12:47

by Conor Dooley

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling

On Fri, Jun 23, 2023 at 12:40:43PM +0200, Heiko St?bner wrote:
> Am Freitag, 23. Juni 2023, 11:49:41 CEST schrieb Conor Dooley:
> > On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote:
> > > From: Heiko Stuebner <[email protected]>

> > I'm also _really_ unconvinced that turning on extensions that were not
> > indicated in the DT or via ACPI is something we should be doing. Have I
> > missed something here that'd make that assessment inaccurate?
>
> Hmm, DT (and ACPI) is a (static) hardware-description, not a configuration
> space (sermon of DT maintainers for years), so the ISA string in DT will
> simply describe _all_ extensions the hardware supports. So there _should_
> never be a case of "I want to disable vectors and will remove the letter
> from the ISA string".

I think I pointed it out previously, on the thread about using the isa
string in hwcap that you proposed, but it was things like hypervisors
that modify the DT that they pass to guests that I was talking about
here, rather than an end-user. Obviously this doesn't apply to things
that do not have hypervisor support, but if/when those do exist you'd be
relying on them not having the empty arch/impl ids.

> We're also turning on the t-head equivalent of svpbmt and zicbom with
> probably the same reasoning.

I'd argue that we should describe these things in whatever a non isa
string DT property ends up looking like, even if we missed the boat on
putting them in riscv,isa.

Maybe this is a self-serving interpretation, but I see the svpbmt and
zicbom equivalents somewhat differently. They're done under the hood,
ostensibly to make the thing spec compliant (it still claims to be
rv64gc). This one is "turn on a new, user-visible, feature", rather
than "we implement a standard thing, but it is broken, so silently fix
it up". I would probably feel differently about this aspect of things if
there was no intention to actually communicate the presence of the
extension to userspace.

> For T-Head we _know_ from vendor-id and friends that the core supports
> this special brand of vectors.

If we _know_ on Foobar SoC that it supports xyz extension based on
vendor_id etc, should we add detection for that that too, using those
as a basis? I really don't want to have a precedent for T-Head getting
to use this method (will the same logic apply to their bitmanip stuff?),
that is not going to be applied to other vendors.

Hopefully that better explains where I am coming from, lmk if I am
overlooking something that should be obvious.

Cheers,
Conor.


Attachments:
(No filename) (2.55 kB)
signature.asc (235.00 B)
Download all attachments

2023-06-23 14:13:39

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling

Hi Heiko,

kernel test robot noticed the following build errors:

[auto build test ERROR on next-20230622]
[cannot apply to linus/master v6.4-rc7 v6.4-rc6 v6.4-rc5 v6.4-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Heiko-Stuebner/RISC-V-move-vector-available-status-into-a-dedicated-variable/20230623-081314
base: next-20230622
patch link: https://lore.kernel.org/r/20230622231305.631331-4-heiko%40sntech.de
patch subject: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling
config: riscv-rv32_defconfig (https://download.01.org/0day-ci/archive/20230623/[email protected]/config)
compiler: riscv32-linux-gcc (GCC) 12.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230623/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

arch/riscv/include/asm/vector.h: Assembler messages:
>> arch/riscv/include/asm/vector.h:162: Error: unrecognized opcode `slliw a4,a4,1'
>> arch/riscv/include/asm/vector.h:194: Error: unrecognized opcode `srliw t4,a1,1'
>> arch/riscv/include/asm/vector.h:169: Error: attempt to move .org backwards
arch/riscv/include/asm/vector.h:203: Error: attempt to move .org backwards
--
arch/riscv/include/asm/vector.h: Assembler messages:
>> arch/riscv/include/asm/vector.h:162: Error: unrecognized opcode `slliw a4,a4,1'
>> arch/riscv/include/asm/vector.h:169: Error: attempt to move .org backwards
--
arch/riscv/include/asm/vector.h: Assembler messages:
>> arch/riscv/include/asm/vector.h:194: Error: unrecognized opcode `srliw t4,a1,1'
>> arch/riscv/include/asm/vector.h:162: Error: unrecognized opcode `slliw a4,a4,1'
arch/riscv/include/asm/vector.h:203: Error: attempt to move .org backwards
>> arch/riscv/include/asm/vector.h:169: Error: attempt to move .org backwards


vim +162 arch/riscv/include/asm/vector.h

03c3fcd9941a17 Greentime Hu 2023-06-05 150
03c3fcd9941a17 Greentime Hu 2023-06-05 151 static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
03c3fcd9941a17 Greentime Hu 2023-06-05 152 {
5255e253b722bb Heiko Stuebner 2023-06-23 153 register u32 t1 asm("t1") = (SR_FS);
5255e253b722bb Heiko Stuebner 2023-06-23 154
5255e253b722bb Heiko Stuebner 2023-06-23 155 /*
5255e253b722bb Heiko Stuebner 2023-06-23 156 * Similar to __vstate_csr_save above, restore values for the
5255e253b722bb Heiko Stuebner 2023-06-23 157 * separate VXRM and VXSAT CSRs from the vcsr variable.
5255e253b722bb Heiko Stuebner 2023-06-23 158 */
5255e253b722bb Heiko Stuebner 2023-06-23 159 asm volatile (ALTERNATIVE(
03c3fcd9941a17 Greentime Hu 2023-06-05 160 ".option push\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 161 ".option arch, +v\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 @162 "vsetvl x0, %2, %1\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 163 ".option pop\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 164 "csrw " __stringify(CSR_VSTART) ", %0\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 165 "csrw " __stringify(CSR_VCSR) ", %3\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 166 __nops(6),
5255e253b722bb Heiko Stuebner 2023-06-23 167 "csrs sstatus, t1\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 168 ".option push\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 @169 ".option arch, +v\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 170 "vsetvl x0, %2, %1\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 171 ".option pop\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 172 "csrw " __stringify(CSR_VSTART) ", %0\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 173 "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 174 "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 175 "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 176 "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 177 "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 178 "csrc sstatus, t1\n\t",
5255e253b722bb Heiko Stuebner 2023-06-23 179 THEAD_VENDOR_ID,
5255e253b722bb Heiko Stuebner 2023-06-23 180 ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
03c3fcd9941a17 Greentime Hu 2023-06-05 181 : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
5255e253b722bb Heiko Stuebner 2023-06-23 182 "r" (src->vcsr), "r"(t1) : "t4");
03c3fcd9941a17 Greentime Hu 2023-06-05 183 }
03c3fcd9941a17 Greentime Hu 2023-06-05 184
03c3fcd9941a17 Greentime Hu 2023-06-05 185 static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
03c3fcd9941a17 Greentime Hu 2023-06-05 186 void *datap)
03c3fcd9941a17 Greentime Hu 2023-06-05 187 {
03c3fcd9941a17 Greentime Hu 2023-06-05 188 unsigned long vl;
03c3fcd9941a17 Greentime Hu 2023-06-05 189
03c3fcd9941a17 Greentime Hu 2023-06-05 190 riscv_v_enable();
03c3fcd9941a17 Greentime Hu 2023-06-05 191 __vstate_csr_save(save_to);
5255e253b722bb Heiko Stuebner 2023-06-23 192 asm volatile (ALTERNATIVE(
5255e253b722bb Heiko Stuebner 2023-06-23 193 "nop\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 @194 ".option push\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 195 ".option arch, +v\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 196 "vsetvli %0, x0, e8, m8, ta, ma\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 197 "vse8.v v0, (%1)\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 198 "add %1, %1, %0\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 199 "vse8.v v8, (%1)\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 200 "add %1, %1, %0\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 201 "vse8.v v16, (%1)\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 202 "add %1, %1, %0\n\t"
03c3fcd9941a17 Greentime Hu 2023-06-05 203 "vse8.v v24, (%1)\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 204 ".option pop\n\t",
5255e253b722bb Heiko Stuebner 2023-06-23 205 "mv t0, %1\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 206 THEAD_VSETVLI_T4X0E8M8D1
5255e253b722bb Heiko Stuebner 2023-06-23 207 THEAD_VSB_V_V0T0
5255e253b722bb Heiko Stuebner 2023-06-23 208 "addi t0, t0, 128\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 209 THEAD_VSB_V_V8T0
5255e253b722bb Heiko Stuebner 2023-06-23 210 "addi t0, t0, 128\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 211 THEAD_VSB_V_V16T0
5255e253b722bb Heiko Stuebner 2023-06-23 212 "addi t0, t0, 128\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 213 THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
5255e253b722bb Heiko Stuebner 2023-06-23 214 ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
5255e253b722bb Heiko Stuebner 2023-06-23 215 : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");
03c3fcd9941a17 Greentime Hu 2023-06-05 216 riscv_v_disable();
03c3fcd9941a17 Greentime Hu 2023-06-05 217 }
03c3fcd9941a17 Greentime Hu 2023-06-05 218

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-06-24 05:29:38

by Stefan O'Rear

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling

On Fri, Jun 23, 2023, at 6:40 AM, Heiko Stübner wrote:
> Hey Conor,
>
> Am Freitag, 23. Juni 2023, 11:49:41 CEST schrieb Conor Dooley:
>> On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote:
>> > From: Heiko Stuebner <[email protected]>
>> >
>> > T-Head C9xx cores implement an older version (0.7.1) of the vector
>> > specification.
>> >
>> > Relevant changes concerning the kernel are:
>> > - different placement of the SR_VS bit for the vector unit status
>> > - different encoding of the vsetvli instruction
>> > - different instructions for loads and stores
>> >
>> > And a fixed VLEN of 128.
>> >
>> > The in-kernel access to vector instances is limited to the save and
>> > restore of process states so the above mentioned areas can simply be
>> > handled via the alternatives framework, similar to other T-Head specific
>> > issues.
>> >
>> > Signed-off-by: Heiko Stuebner <[email protected]>
>> > ---
>> > arch/riscv/Kconfig.errata | 13 +++
>> > arch/riscv/errata/thead/errata.c | 32 ++++++
>> > arch/riscv/include/asm/csr.h | 24 ++++-
>> > arch/riscv/include/asm/errata_list.h | 45 ++++++++-
>> > arch/riscv/include/asm/vector.h | 139 +++++++++++++++++++++++++--
>> > arch/riscv/kernel/vector.c | 2 +-
>> > 6 files changed, 238 insertions(+), 17 deletions(-)
>> >
>> > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
>> > index 0c8f4652cd82..b461312dd452 100644
>> > --- a/arch/riscv/Kconfig.errata
>> > +++ b/arch/riscv/Kconfig.errata
>> > @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU
>> >
>> > If you don't know what to do here, say "Y".
>> >
>> > +config ERRATA_THEAD_VECTOR
>> > + bool "Apply T-Head Vector errata"
>> > + depends on ERRATA_THEAD && RISCV_ISA_V
>> > + default y
>> > + help
>> > + The T-Head C9xx cores implement an earlier version 0.7.1
>> > + of the vector extensions.
>> > +
>> > + This will apply the necessary errata to handle the non-standard
>> > + behaviour via when switch to and from vector mode for processes.

Doesn't make sense. "This will apply the necessary errata to handle the
non-standard behavior when enabling, disabling, or swapping vector state for
processes."?

>> > +
>> > + If you don't know what to do here, say "Y".
>> > +
>> > endmenu # "CPU errata selection"
>> > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
>> > index c259dc925ec1..c41ec84bc8a5 100644
>> > --- a/arch/riscv/errata/thead/errata.c
>> > +++ b/arch/riscv/errata/thead/errata.c
>> > @@ -15,6 +15,7 @@
>> > #include <asm/errata_list.h>
>> > #include <asm/hwprobe.h>
>> > #include <asm/patch.h>
>> > +#include <asm/vector.h>
>> > #include <asm/vendorid_list.h>
>> >
>> > static bool errata_probe_pbmt(unsigned int stage,
>> > @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage,
>> > return true;
>> > }
>> >
>> > +static bool errata_probe_vector(unsigned int stage,
>> > + unsigned long arch_id, unsigned long impid)
>> > +{
>> > + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR))
>> > + return false;
>> > +
>> > + /* target-c9xx cores report arch_id and impid as 0 */
>> > + if (arch_id != 0 || impid != 0)
>> > + return false;
>> > +
>> > + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) {
>> > + /*
>> > + * Disable VECTOR to detect illegal usage of vector in kernel.
>> > + * This is normally done in _start_kernel but with the
>> > + * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's
>> > + * vector-0.7.1 and the vector-1.0-bits are unused there.
>> > + */
>> > + csr_clear(CSR_STATUS, SR_VS_THEAD);
>> > + return false;
>> > + }
>> > +
>> > + /* let has_vector() return true and set the static vlen */
>>
>> Hmm, I was wondering about how you were going to communicate this to
>> userspace, since you're not going to be setting "v" in your DT, so
>> there'll be nothing in /proc/cpuinfo indicating it. (I am assuming that
>> this is your intention, as you'd not need to drop the alternative-based
>> stuff from has_vector() if it wasn't)
>
> I'm working on the assumption that the t-head vector is way to different
> from the official vector, that a userspace will definitly need to handle this
> in some way specially and we can't claim to use a "real" vector spec.
>
> So in this first step, my goal is to simply allow userspace programs
> compiled to use the t-head vector instructions (i.e. 0.7.1 presumably) to
> not hang the kernel and do all the necessary bringup and teardown needed
> for executing those vector instructions ;-) .
>
>
>> I don't think you can do this, as things stand, because of how hwprobe
>> operates:
>>
>> static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
>> const struct cpumask *cpus)
>> {
>> ...
>>
>> if (has_vector())
>> pair->value |= RISCV_HWPROBE_IMA_V;
>>
>> ...
>> }
>>
>> * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by
>> version 1.0 of the RISC-V Vector extension manual.
>>
>> You'll need to change hwprobe to use has_vector() &&
>> riscv_has_extension_unlikely(v), or similar, as the condition for
>> reporting.
>
> ah right, and yes I need to adapt hwprobe as you wrote.
>
>
>> You'll also need some other way to communicate to userspace
>> that T-Head's vector stuff is supported, no?
>
> As I said above, baby-steps - not-dying first ;-) .

(Count me as a vote for adding a new RISCV_HWPROBE_KEY_ - ints are cheap and
apart from the half-implemented heterogenous case, the only advantage of
hwprobe over hwcap is that we can support a virtually unlimited number of
draft and vendor extensions.)

>> I'm also _really_ unconvinced that turning on extensions that were not
>> indicated in the DT or via ACPI is something we should be doing. Have I
>> missed something here that'd make that assessment inaccurate?
>
> Hmm, DT (and ACPI) is a (static) hardware-description, not a configuration
> space (sermon of DT maintainers for years), so the ISA string in DT will
> simply describe _all_ extensions the hardware supports. So there _should_
> never be a case of "I want to disable vectors and will remove the letter
> from the ISA string".
>
> For T-Head we _know_ from vendor-id and friends that the core supports
> this special brand of vectors.

C906 supports t-head/0.7.1 vectors as a configuration option. The C906 in
the D1 and BL808 has vectors, the recently announced CV1800B has one C906
with vectors and one without, and I vaguely remember seeing a chip with only
a non-vector C906.

C908 (announced, no manual yet) claims V 1.0 support. Presumably it will
not support 0.7.1.

C910 (exists on evaluation boards) lacks vector support.

C920 (TH1520, SG2042, etc) has 0.7.1 support, at least superficially
compatible with C906-with-vectors. Hopefully we can share errata.

This probably needs to be handled as an orthogonal "xtheadv" or "v0p7p1"
extension in whatever replaces riscv,isa.

> We're also turning on the t-head equivalent of svpbmt and zicbom with
> probably the same reasoning.

In an ideal world those would be handled as extensions as well - T-Head fixed
their vectors with the C908 so they might do standards-compliant Svpbmt and
Zicbom in the future.

>> FWIW I am currently working on kernel-side support for the new
>> extension properties that I have posted dt-binding patches for.
>> I'll go post it once Palmer has merged his current set of patches in his
>> staging repo into for-next, as I've got a lot of deps on riscv,isa
>> parser changes.
>> I'm really hoping that it provides an easier way to go off probing for
>> vendor specific stuff for DT-based systems, since it will no longer
>> require complex probing, just an of_property_match_string() for each
>> possible cpu and we could very well provide a vendor hook during that
>> process.
>> Clearly though, that stuff is not yet merged as it has not even been
>> posted yet.
>
> And with the comments I received, T-Head vector also is not ready for
> prime-time yet, so we're all good :-)
>
>
> Heiko
>
>
>> Current WIP of that is here:
>> https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/log/?h=riscv-extensions-strings-on-palmer
>>
>> > + riscv_vector_supported();
>> > + riscv_v_vsize = 128 / 8 * 32;
>> > +
>> > + return true;
>> > +}
>> > +
>> > static u32 thead_errata_probe(unsigned int stage,
>> > unsigned long archid, unsigned long impid)
>> > {
>> > @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage,
>> > if (errata_probe_pmu(stage, archid, impid))
>> > cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
>> >
>> > + if (errata_probe_vector(stage, archid, impid))
>> > + cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR);
>> > +
>> > return cpu_req_errata;
>> > }
>> >
>> > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
>> > index 2d79bca6ffe8..521b3b939e51 100644
>> > --- a/arch/riscv/include/asm/csr.h
>> > +++ b/arch/riscv/include/asm/csr.h
>> > @@ -24,11 +24,25 @@
>> > #define SR_FS_CLEAN _AC(0x00004000, UL)
>> > #define SR_FS_DIRTY _AC(0x00006000, UL)
>> >
>> > -#define SR_VS _AC(0x00000600, UL) /* Vector Status */
>> > -#define SR_VS_OFF _AC(0x00000000, UL)
>> > -#define SR_VS_INITIAL _AC(0x00000200, UL)
>> > -#define SR_VS_CLEAN _AC(0x00000400, UL)
>> > -#define SR_VS_DIRTY _AC(0x00000600, UL)
>> > +#define SR_VS_OFF _AC(0x00000000, UL)
>> > +
>> > +#define SR_VS_1_0 _AC(0x00000600, UL) /* Vector Status */
>> > +#define SR_VS_INITIAL_1_0 _AC(0x00000200, UL)
>> > +#define SR_VS_CLEAN_1_0 _AC(0x00000400, UL)
>> > +#define SR_VS_DIRTY_1_0 _AC(0x00000600, UL)
>> > +
>> > +#define SR_VS_THEAD _AC(0x01800000, UL) /* Vector Status */
>> > +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL)
>> > +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL)
>> > +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL)
>> > +
>> > +/*
>> > + * Always default to vector-1.0 handling in assembly and let the broken
>> > + * implementations handle their case separately.
>> > + */
>> > +#ifdef __ASSEMBLY__
>> > +#define SR_VS SR_VS_1_0
>> > +#endif
>> >
>> > #define SR_XS _AC(0x00018000, UL) /* Extension Status */
>> > #define SR_XS_OFF _AC(0x00000000, UL)
>> > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
>> > index fb1a810f3d8c..ab21fadbe9c6 100644
>> > --- a/arch/riscv/include/asm/errata_list.h
>> > +++ b/arch/riscv/include/asm/errata_list.h
>> > @@ -21,7 +21,8 @@
>> > #define ERRATA_THEAD_PBMT 0
>> > #define ERRATA_THEAD_CMO 1
>> > #define ERRATA_THEAD_PMU 2
>> > -#define ERRATA_THEAD_NUMBER 3
>> > +#define ERRATA_THEAD_VECTOR 3
>> > +#define ERRATA_THEAD_NUMBER 4
>> > #endif
>> >
>> > #ifdef __ASSEMBLY__
>> > @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE( \
>> > : "=r" (__ovl) : \
>> > : "memory")
>> >
>> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR
>> > +
>> > +#define THEAD_C9XX_CSR_VXSAT 0x9
>> > +#define THEAD_C9XX_CSR_VXRM 0xa

These CSR numbers haven't changed. Assuming that we actually need to handle
vxsat and vxrm as part of the vector state (if t-head decided to make them
controlled by sstatus.VS despite being in fcsr), why not unconditionally
define CSR_VXSAT and CSR_VXRM in csr.h?

>> > +
>> > +/*
>> > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
>> > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
>> > + * vsetvli t4, x0, e8, m8, d1
>> > + */
>> > +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t"
>> > +
>> > +/*
>> > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
>> > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
>> > + * the call resulting in a different encoding and then using a value for
>> > + * the "mop" field that is not part of vector-0.7.1
>> > + * So encode specific variants for vstate_save and _restore.
>> > + */
>> > +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t"
>> > +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t"
>> > +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t"
>> > +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t"
>> > +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t"
>> > +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t"
>> > +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t"
>> > +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t"

.insn isn't supported by the kernel's minimum binutils version, but it _is_
supported by the oldest version of binutils that can assemble rvv 1.0
instructions. OP_V requires 2.39 so I use a literal 0x57 instead.

very untested, and I leave it to your judgement whether it actually improves
readability:

#define THEAD_VSETVLI_T4X0E8M8D1 ".insn i 0x57, 7, t4, x0, 3\n\t"
#define THEAD_VSB_V_V0T0 ".insn r STORE_FP, 0, 1, x0, t0, x0\n\t"
#define THEAD_VSB_V_V8T0 ".insn r STORE_FP, 0, 1, x8, t0, x0\n\t"
#define THEAD_VSB_V_V16T0 ".insn r STORE_FP, 0, 1, x16, t0, x0\n\t"
#define THEAD_VSB_V_V24T0 ".insn r STORE_FP, 0, 1, x24, t0, x0\n\t"
#define THEAD_VSB_V_V0T0 ".insn r LOAD_FP, 0, 9, x0, t0, x0\n\t"
#define THEAD_VSB_V_V8T0 ".insn r LOAD_FP, 0, 9, x8, t0, x0\n\t"
#define THEAD_VSB_V_V16T0 ".insn r LOAD_FP, 0, 9, x16, t0, x0\n\t"
#define THEAD_VSB_V_V24T0 ".insn r LOAD_FP, 0, 9, x24, t0, x0\n\t"

>> > +
>> > +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9
>> > +#define ALT_SR_VS_THEAD_SHIFT 23
>> > +
>> > +#define ALT_SR_VS(_val, prot) \
>> > +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \
>> > + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \
>> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \
>> > + : "=r"(_val) \
>> > + : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \
>> > + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \
>> > + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \
>> > + "I"(ALT_SR_VS_THEAD_SHIFT))

I think this can be simplified by removing the shifts and using the li
pseudoinstruction (which will become lui on the _THEAD_ arm).

>> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
>> > +
>> > #endif /* __ASSEMBLY__ */
>> >
>> > #endif
>> > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
>> > index 315c96d2b4d0..fa47f60f81e3 100644
>> > --- a/arch/riscv/include/asm/vector.h
>> > +++ b/arch/riscv/include/asm/vector.h
>> > @@ -18,6 +18,55 @@
>> > #include <asm/hwcap.h>
>> > #include <asm/csr.h>
>> > #include <asm/asm.h>
>> > +#include <asm/errata_list.h>
>> > +
>> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR
>> > +
>> > +static inline unsigned long riscv_sr_vs(void)
>> > +{
>> > + u32 val;
>> > +
>> > + ALT_SR_VS(val, SR_VS);
>> > + return val;
>> > +}
>> > +
>> > +static inline unsigned long riscv_sr_vs_initial(void)
>> > +{
>> > + u32 val;
>> > +
>> > + ALT_SR_VS(val, SR_VS_INITIAL);
>> > + return val;
>> > +}
>> > +
>> > +static inline unsigned long riscv_sr_vs_clean(void)
>> > +{
>> > + u32 val;
>> > +
>> > + ALT_SR_VS(val, SR_VS_CLEAN);
>> > + return val;
>> > +}
>> > +
>> > +static inline unsigned long riscv_sr_vs_dirty(void)
>> > +{
>> > + u32 val;
>> > +
>> > + ALT_SR_VS(val, SR_VS_DIRTY);
>> > + return val;
>> > +}
>> > +
>> > +#define SR_VS riscv_sr_vs()
>> > +#define SR_VS_INITIAL riscv_sr_vs_initial()
>> > +#define SR_VS_CLEAN riscv_sr_vs_clean()
>> > +#define SR_VS_DIRTY riscv_sr_vs_dirty()
>> > +
>> > +#else /* CONFIG_ERRATA_THEAD_VECTOR */
>> > +
>> > +#define SR_VS SR_VS_1_0
>> > +#define SR_VS_INITIAL SR_VS_INITIAL_1_0
>> > +#define SR_VS_CLEAN SR_VS_CLEAN_1_0
>> > +#define SR_VS_DIRTY SR_VS_DIRTY_1_0
>> > +
>> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
>> >
>> > extern bool riscv_v_supported;
>> > void riscv_vector_supported(void);
>> > @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void)
>> >
>> > static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
>> > {
>> > - asm volatile (
>> > + register u32 t1 asm("t1") = (SR_FS);
>> > +
>> > + /*
>> > + * CSR_VCSR is defined as
>> > + * [2:1] - vxrm[1:0]
>> > + * [0] - vxsat
>> > + * The earlier vector spec implemented by T-Head uses separate
>> > + * registers for the same bit-elements, so just combine those
>> > + * into the existing output field.
>> > + *
>> > + * Additionally T-Head cores need FS to be enabled when accessing
>> > + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
>> > + * Though the cores do not implement the VXRM and VXSAT fields in the
>> > + * FCSR CSR that vector-0.7.1 specifies.

(for completeness only: this was found to be inaccurate in the previous thread)

>> > + */
>> > + asm volatile (ALTERNATIVE(
>> > "csrr %0, " __stringify(CSR_VSTART) "\n\t"
>> > "csrr %1, " __stringify(CSR_VTYPE) "\n\t"
>> > "csrr %2, " __stringify(CSR_VL) "\n\t"
>> > "csrr %3, " __stringify(CSR_VCSR) "\n\t"
>> > + __nops(5),
>> > + "csrs sstatus, t1\n\t"
>> > + "csrr %0, " __stringify(CSR_VSTART) "\n\t"
>> > + "csrr %1, " __stringify(CSR_VTYPE) "\n\t"
>> > + "csrr %2, " __stringify(CSR_VL) "\n\t"
>> > + "csrr %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t"
>> > + "slliw %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
>> > + "csrr t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t"
>> > + "or %3, %3, t4\n\t"
>> > + "csrc sstatus, t1\n\t",
>> > + THEAD_VENDOR_ID,
>> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
>> > : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
>> > - "=r" (dest->vcsr) : :);
>> > + "=r" (dest->vcsr) : "r"(t1) : "t4");
>> > }
>> >
>> > static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
>> > {
>> > - asm volatile (
>> > + register u32 t1 asm("t1") = (SR_FS);
>> > +
>> > + /*
>> > + * Similar to __vstate_csr_save above, restore values for the
>> > + * separate VXRM and VXSAT CSRs from the vcsr variable.
>> > + */
>> > + asm volatile (ALTERNATIVE(
>> > ".option push\n\t"
>> > ".option arch, +v\n\t"
>> > "vsetvl x0, %2, %1\n\t"
>> > ".option pop\n\t"
>> > "csrw " __stringify(CSR_VSTART) ", %0\n\t"
>> > "csrw " __stringify(CSR_VCSR) ", %3\n\t"
>> > + __nops(6),
>> > + "csrs sstatus, t1\n\t"
>> > + ".option push\n\t"
>> > + ".option arch, +v\n\t"
>> > + "vsetvl x0, %2, %1\n\t"
>> > + ".option pop\n\t"
>> > + "csrw " __stringify(CSR_VSTART) ", %0\n\t"
>> > + "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
>> > + "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t"
>> > + "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t"
>> > + "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t"
>> > + "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t"
>> > + "csrc sstatus, t1\n\t",
>> > + THEAD_VENDOR_ID,
>> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
>> > : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
>> > - "r" (src->vcsr) :);
>> > + "r" (src->vcsr), "r"(t1) : "t4");
>> > }
>> >
>> > static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
>> > @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
>> >
>> > riscv_v_enable();
>> > __vstate_csr_save(save_to);
>> > - asm volatile (
>> > + asm volatile (ALTERNATIVE(
>> > + "nop\n\t"
>> > ".option push\n\t"
>> > ".option arch, +v\n\t"
>> > "vsetvli %0, x0, e8, m8, ta, ma\n\t"
>> > @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
>> > "vse8.v v16, (%1)\n\t"
>> > "add %1, %1, %0\n\t"
>> > "vse8.v v24, (%1)\n\t"
>> > - ".option pop\n\t"
>> > - : "=&r" (vl) : "r" (datap) : "memory");

Pre-existing bug: The assembly code modifies %1, which is an input operand.
I think this should be

- : "=&r" (vl), "+r" (datap) : : "memory");

>> > + ".option pop\n\t",
>> > + "mv t0, %1\n\t"
>> > + THEAD_VSETVLI_T4X0E8M8D1
>> > + THEAD_VSB_V_V0T0
>> > + "addi t0, t0, 128\n\t"

We don't have a promise from T-Head that they won't introduce a core with
0.7.1 vectors and VLEN=256, and I'd rather not have code lying around that
will cause silent data corruption if that happens. THEAD_VSETVLI_T4X0E8M8D1
has rd=t4 so adding t4 should work in this arm.

>> > + THEAD_VSB_V_V8T0
>> > + "addi t0, t0, 128\n\t"
>> > + THEAD_VSB_V_V16T0
>> > + "addi t0, t0, 128\n\t"
>> > + THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
>> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
>> > + : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");

The bugfix for the existing code isn't needed here since you copied the
address into t0.

>> > riscv_v_disable();
>> > }
>> >
>> > @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
>> > unsigned long vl;
>> >
>> > riscv_v_enable();
>> > - asm volatile (
>> > + asm volatile (ALTERNATIVE(
>> > + "nop\n\t"
>> > ".option push\n\t"
>> > ".option arch, +v\n\t"
>> > "vsetvli %0, x0, e8, m8, ta, ma\n\t"
>> > @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
>> > "vle8.v v16, (%1)\n\t"
>> > "add %1, %1, %0\n\t"
>> > "vle8.v v24, (%1)\n\t"
>> > - ".option pop\n\t"
>> > - : "=&r" (vl) : "r" (datap) : "memory");

- : "=&r" (vl), "+r" (datap) : : "memory");

-s

>> > + ".option pop\n\t",
>> > + "mv t0, %1\n\t"
>> > + THEAD_VSETVLI_T4X0E8M8D1
>> > + THEAD_VLB_V_V0T0
>> > + "addi t0, t0, 128\n\t"
>> > + THEAD_VLB_V_V8T0
>> > + "addi t0, t0, 128\n\t"
>> > + THEAD_VLB_V_V16T0
>> > + "addi t0, t0, 128\n\t"
>> > + THEAD_VLB_V_V24T0, THEAD_VENDOR_ID,
>> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
>> > + : "=&r" (vl) : "r" (datap) : "t0", "t4");
>> > __vstate_csr_restore(restore_from);
>> > riscv_v_disable();
>> > }
>> > diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
>> > index 74178fb71805..51726890a4d0 100644
>> > --- a/arch/riscv/kernel/vector.c
>> > +++ b/arch/riscv/kernel/vector.c
>> > @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
>> > u32 insn = (u32)regs->badaddr;
>> >
>> > /* Do not handle if V is not supported, or disabled */
>> > - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
>> > + if (!has_vector())
>> > return false;
>> >
>> > /* If V has been enabled then it is not the first-use trap */
>>
>
>
>
>
>
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv

2023-06-24 11:31:25

by Andrew Jones

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling

On Sat, Jun 24, 2023 at 01:18:26AM -0400, Stefan O'Rear wrote:
> On Fri, Jun 23, 2023, at 6:40 AM, Heiko St?bner wrote:
...
> >> > +
> >> > +/*
> >> > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
> >> > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
> >> > + * vsetvli t4, x0, e8, m8, d1
> >> > + */
> >> > +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t"
> >> > +
> >> > +/*
> >> > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
> >> > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
> >> > + * the call resulting in a different encoding and then using a value for
> >> > + * the "mop" field that is not part of vector-0.7.1
> >> > + * So encode specific variants for vstate_save and _restore.
> >> > + */
> >> > +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t"
> >> > +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t"
> >> > +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t"
> >> > +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t"
> >> > +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t"
> >> > +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t"
> >> > +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t"
> >> > +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t"
>
> .insn isn't supported by the kernel's minimum binutils version, but it _is_
> supported by the oldest version of binutils that can assemble rvv 1.0
> instructions. OP_V requires 2.39 so I use a literal 0x57 instead.
>
> very untested, and I leave it to your judgement whether it actually improves
> readability:
>
> #define THEAD_VSETVLI_T4X0E8M8D1 ".insn i 0x57, 7, t4, x0, 3\n\t"
> #define THEAD_VSB_V_V0T0 ".insn r STORE_FP, 0, 1, x0, t0, x0\n\t"
> #define THEAD_VSB_V_V8T0 ".insn r STORE_FP, 0, 1, x8, t0, x0\n\t"
> #define THEAD_VSB_V_V16T0 ".insn r STORE_FP, 0, 1, x16, t0, x0\n\t"
> #define THEAD_VSB_V_V24T0 ".insn r STORE_FP, 0, 1, x24, t0, x0\n\t"
> #define THEAD_VSB_V_V0T0 ".insn r LOAD_FP, 0, 9, x0, t0, x0\n\t"
> #define THEAD_VSB_V_V8T0 ".insn r LOAD_FP, 0, 9, x8, t0, x0\n\t"
> #define THEAD_VSB_V_V16T0 ".insn r LOAD_FP, 0, 9, x16, t0, x0\n\t"
> #define THEAD_VSB_V_V24T0 ".insn r LOAD_FP, 0, 9, x24, t0, x0\n\t"
>

We have the INSN_R() macro in arch/riscv/include/asm/insn-def.h for stuff
like this.

Thanks,
drew

2023-06-28 16:16:51

by Andy Chiu

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling

On Fri, Jun 23, 2023 at 5:53 PM Conor Dooley <[email protected]> wrote:
>
> Hey Heiko,
>
> On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote:
> > From: Heiko Stuebner <[email protected]>
> >
> > T-Head C9xx cores implement an older version (0.7.1) of the vector
> > specification.
> >
> > Relevant changes concerning the kernel are:
> > - different placement of the SR_VS bit for the vector unit status
> > - different encoding of the vsetvli instruction
> > - different instructions for loads and stores
> >
> > And a fixed VLEN of 128.
> >
> > The in-kernel access to vector instances is limited to the save and
> > restore of process states so the above mentioned areas can simply be
> > handled via the alternatives framework, similar to other T-Head specific
> > issues.
> >
> > Signed-off-by: Heiko Stuebner <[email protected]>
> > ---
> > arch/riscv/Kconfig.errata | 13 +++
> > arch/riscv/errata/thead/errata.c | 32 ++++++
> > arch/riscv/include/asm/csr.h | 24 ++++-
> > arch/riscv/include/asm/errata_list.h | 45 ++++++++-
> > arch/riscv/include/asm/vector.h | 139 +++++++++++++++++++++++++--
> > arch/riscv/kernel/vector.c | 2 +-
> > 6 files changed, 238 insertions(+), 17 deletions(-)
> >
> > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
> > index 0c8f4652cd82..b461312dd452 100644
> > --- a/arch/riscv/Kconfig.errata
> > +++ b/arch/riscv/Kconfig.errata
> > @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU
> >
> > If you don't know what to do here, say "Y".
> >
> > +config ERRATA_THEAD_VECTOR
> > + bool "Apply T-Head Vector errata"
> > + depends on ERRATA_THEAD && RISCV_ISA_V
> > + default y
> > + help
> > + The T-Head C9xx cores implement an earlier version 0.7.1
> > + of the vector extensions.
> > +
> > + This will apply the necessary errata to handle the non-standard
> > + behaviour via when switch to and from vector mode for processes.
> > +
> > + If you don't know what to do here, say "Y".
> > +
> > endmenu # "CPU errata selection"
> > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> > index c259dc925ec1..c41ec84bc8a5 100644
> > --- a/arch/riscv/errata/thead/errata.c
> > +++ b/arch/riscv/errata/thead/errata.c
> > @@ -15,6 +15,7 @@
> > #include <asm/errata_list.h>
> > #include <asm/hwprobe.h>
> > #include <asm/patch.h>
> > +#include <asm/vector.h>
> > #include <asm/vendorid_list.h>
> >
> > static bool errata_probe_pbmt(unsigned int stage,
> > @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage,
> > return true;
> > }
> >
> > +static bool errata_probe_vector(unsigned int stage,
> > + unsigned long arch_id, unsigned long impid)
> > +{
> > + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR))
> > + return false;
> > +
> > + /* target-c9xx cores report arch_id and impid as 0 */
> > + if (arch_id != 0 || impid != 0)
> > + return false;
> > +
> > + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) {
> > + /*
> > + * Disable VECTOR to detect illegal usage of vector in kernel.
> > + * This is normally done in _start_kernel but with the
> > + * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's
> > + * vector-0.7.1 and the vector-1.0-bits are unused there.
> > + */
> > + csr_clear(CSR_STATUS, SR_VS_THEAD);
> > + return false;
> > + }
> > +
> > + /* let has_vector() return true and set the static vlen */
>
> Hmm, I was wondering about how you were going to communicate this to
> userspace, since you're not going to be setting "v" in your DT, so
> there'll be nothing in /proc/cpuinfo indicating it. (I am assuming that
> this is your intention, as you'd not need to drop the alternative-based
> stuff from has_vector() if it wasn't)
>
> I don't think you can do this, as things stand, because of how hwprobe
> operates:
>
> static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
> const struct cpumask *cpus)
> {
> ...
>
> if (has_vector())
> pair->value |= RISCV_HWPROBE_IMA_V;
>
> ...
> }
>
> * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by
> version 1.0 of the RISC-V Vector extension manual.
>
> You'll need to change hwprobe to use has_vector() &&
> riscv_has_extension_unlikely(v), or similar, as the condition for
> reporting. You'll also need some other way to communicate to userspace
> that T-Head's vector stuff is supported, no?
>
> I'm also _really_ unconvinced that turning on extensions that were not
> indicated in the DT or via ACPI is something we should be doing. Have I
> missed something here that'd make that assessment inaccurate?
>
> Cheers,
> Conor.
>
> FWIW I am currently working on kernel-side support for the new
> extension properties that I have posted dt-binding patches for.
> I'll go post it once Palmer has merged his current set of patches in his
> staging repo into for-next, as I've got a lot of deps on riscv,isa
> parser changes.
> I'm really hoping that it provides an easier way to go off probing for
> vendor specific stuff for DT-based systems, since it will no longer
> require complex probing, just an of_property_match_string() for each
> possible cpu and we could very well provide a vendor hook during that
> process.
> Clearly though, that stuff is not yet merged as it has not even been
> posted yet.
>
> Current WIP of that is here:
> https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/log/?h=riscv-extensions-strings-on-palmer
>
> > + riscv_vector_supported();
> > + riscv_v_vsize = 128 / 8 * 32;
> > +
> > + return true;
> > +}
> > +
> > static u32 thead_errata_probe(unsigned int stage,
> > unsigned long archid, unsigned long impid)
> > {
> > @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage,
> > if (errata_probe_pmu(stage, archid, impid))
> > cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
> >
> > + if (errata_probe_vector(stage, archid, impid))
> > + cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR);
> > +
> > return cpu_req_errata;
> > }
> >
> > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> > index 2d79bca6ffe8..521b3b939e51 100644
> > --- a/arch/riscv/include/asm/csr.h
> > +++ b/arch/riscv/include/asm/csr.h
> > @@ -24,11 +24,25 @@
> > #define SR_FS_CLEAN _AC(0x00004000, UL)
> > #define SR_FS_DIRTY _AC(0x00006000, UL)
> >
> > -#define SR_VS _AC(0x00000600, UL) /* Vector Status */
> > -#define SR_VS_OFF _AC(0x00000000, UL)
> > -#define SR_VS_INITIAL _AC(0x00000200, UL)
> > -#define SR_VS_CLEAN _AC(0x00000400, UL)
> > -#define SR_VS_DIRTY _AC(0x00000600, UL)
> > +#define SR_VS_OFF _AC(0x00000000, UL)
> > +
> > +#define SR_VS_1_0 _AC(0x00000600, UL) /* Vector Status */
> > +#define SR_VS_INITIAL_1_0 _AC(0x00000200, UL)
> > +#define SR_VS_CLEAN_1_0 _AC(0x00000400, UL)
> > +#define SR_VS_DIRTY_1_0 _AC(0x00000600, UL)
> > +
> > +#define SR_VS_THEAD _AC(0x01800000, UL) /* Vector Status */
> > +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL)
> > +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL)
> > +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL)
> > +
> > +/*
> > + * Always default to vector-1.0 handling in assembly and let the broken
> > + * implementations handle their case separately.
> > + */
> > +#ifdef __ASSEMBLY__
> > +#define SR_VS SR_VS_1_0
> > +#endif
> >
> > #define SR_XS _AC(0x00018000, UL) /* Extension Status */
> > #define SR_XS_OFF _AC(0x00000000, UL)
> > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> > index fb1a810f3d8c..ab21fadbe9c6 100644
> > --- a/arch/riscv/include/asm/errata_list.h
> > +++ b/arch/riscv/include/asm/errata_list.h
> > @@ -21,7 +21,8 @@
> > #define ERRATA_THEAD_PBMT 0
> > #define ERRATA_THEAD_CMO 1
> > #define ERRATA_THEAD_PMU 2
> > -#define ERRATA_THEAD_NUMBER 3
> > +#define ERRATA_THEAD_VECTOR 3
> > +#define ERRATA_THEAD_NUMBER 4
> > #endif
> >
> > #ifdef __ASSEMBLY__
> > @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE( \
> > : "=r" (__ovl) : \
> > : "memory")
> >
> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> > +
> > +#define THEAD_C9XX_CSR_VXSAT 0x9
> > +#define THEAD_C9XX_CSR_VXRM 0xa
> > +
> > +/*
> > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
> > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
> > + * vsetvli t4, x0, e8, m8, d1
> > + */
> > +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t"
> > +
> > +/*
> > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
> > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
> > + * the call resulting in a different encoding and then using a value for
> > + * the "mop" field that is not part of vector-0.7.1
> > + * So encode specific variants for vstate_save and _restore.
> > + */
> > +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t"
> > +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t"
> > +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t"
> > +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t"
> > +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t"
> > +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t"
> > +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t"
> > +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t"
> > +
> > +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9
> > +#define ALT_SR_VS_THEAD_SHIFT 23
> > +
> > +#define ALT_SR_VS(_val, prot) \
> > +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \
> > + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \
> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \
> > + : "=r"(_val) \
> > + : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \
> > + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \
> > + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \
> > + "I"(ALT_SR_VS_THEAD_SHIFT))
> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
> > +
> > #endif /* __ASSEMBLY__ */
> >
> > #endif
> > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
> > index 315c96d2b4d0..fa47f60f81e3 100644
> > --- a/arch/riscv/include/asm/vector.h
> > +++ b/arch/riscv/include/asm/vector.h
> > @@ -18,6 +18,55 @@
> > #include <asm/hwcap.h>
> > #include <asm/csr.h>
> > #include <asm/asm.h>
> > +#include <asm/errata_list.h>
> > +
> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> > +
> > +static inline unsigned long riscv_sr_vs(void)
> > +{
> > + u32 val;
> > +
> > + ALT_SR_VS(val, SR_VS);
> > + return val;
> > +}
> > +
> > +static inline unsigned long riscv_sr_vs_initial(void)
> > +{
> > + u32 val;
> > +
> > + ALT_SR_VS(val, SR_VS_INITIAL);
> > + return val;
> > +}
> > +
> > +static inline unsigned long riscv_sr_vs_clean(void)
> > +{
> > + u32 val;
> > +
> > + ALT_SR_VS(val, SR_VS_CLEAN);
> > + return val;
> > +}
> > +
> > +static inline unsigned long riscv_sr_vs_dirty(void)
> > +{
> > + u32 val;
> > +
> > + ALT_SR_VS(val, SR_VS_DIRTY);
> > + return val;
> > +}
> > +
> > +#define SR_VS riscv_sr_vs()
> > +#define SR_VS_INITIAL riscv_sr_vs_initial()
> > +#define SR_VS_CLEAN riscv_sr_vs_clean()
> > +#define SR_VS_DIRTY riscv_sr_vs_dirty()
> > +
> > +#else /* CONFIG_ERRATA_THEAD_VECTOR */
> > +
> > +#define SR_VS SR_VS_1_0
> > +#define SR_VS_INITIAL SR_VS_INITIAL_1_0
> > +#define SR_VS_CLEAN SR_VS_CLEAN_1_0
> > +#define SR_VS_DIRTY SR_VS_DIRTY_1_0
> > +
> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
> >
> > extern bool riscv_v_supported;
> > void riscv_vector_supported(void);
> > @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void)
> >
> > static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
> > {
> > - asm volatile (
> > + register u32 t1 asm("t1") = (SR_FS);
> > +
> > + /*
> > + * CSR_VCSR is defined as
> > + * [2:1] - vxrm[1:0]
> > + * [0] - vxsat
> > + * The earlier vector spec implemented by T-Head uses separate
> > + * registers for the same bit-elements, so just combine those
> > + * into the existing output field.
> > + *
> > + * Additionally T-Head cores need FS to be enabled when accessing
> > + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
> > + * Though the cores do not implement the VXRM and VXSAT fields in the
> > + * FCSR CSR that vector-0.7.1 specifies.
> > + */
> > + asm volatile (ALTERNATIVE(
> > "csrr %0, " __stringify(CSR_VSTART) "\n\t"
> > "csrr %1, " __stringify(CSR_VTYPE) "\n\t"
> > "csrr %2, " __stringify(CSR_VL) "\n\t"
> > "csrr %3, " __stringify(CSR_VCSR) "\n\t"
> > + __nops(5),
> > + "csrs sstatus, t1\n\t"
> > + "csrr %0, " __stringify(CSR_VSTART) "\n\t"
> > + "csrr %1, " __stringify(CSR_VTYPE) "\n\t"
> > + "csrr %2, " __stringify(CSR_VL) "\n\t"
> > + "csrr %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t"
> > + "slliw %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
> > + "csrr t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t"
> > + "or %3, %3, t4\n\t"
> > + "csrc sstatus, t1\n\t",
> > + THEAD_VENDOR_ID,
> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> > : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
> > - "=r" (dest->vcsr) : :);
> > + "=r" (dest->vcsr) : "r"(t1) : "t4");
> > }
> >
> > static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
> > {
> > - asm volatile (
> > + register u32 t1 asm("t1") = (SR_FS);
> > +
> > + /*
> > + * Similar to __vstate_csr_save above, restore values for the
> > + * separate VXRM and VXSAT CSRs from the vcsr variable.
> > + */
> > + asm volatile (ALTERNATIVE(
> > ".option push\n\t"
> > ".option arch, +v\n\t"
> > "vsetvl x0, %2, %1\n\t"
> > ".option pop\n\t"
> > "csrw " __stringify(CSR_VSTART) ", %0\n\t"
> > "csrw " __stringify(CSR_VCSR) ", %3\n\t"
> > + __nops(6),
> > + "csrs sstatus, t1\n\t"
> > + ".option push\n\t"
> > + ".option arch, +v\n\t"
> > + "vsetvl x0, %2, %1\n\t"
> > + ".option pop\n\t"
> > + "csrw " __stringify(CSR_VSTART) ", %0\n\t"
> > + "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
> > + "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t"
> > + "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t"
> > + "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t"
> > + "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t"
> > + "csrc sstatus, t1\n\t",
> > + THEAD_VENDOR_ID,
> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> > : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
> > - "r" (src->vcsr) :);
> > + "r" (src->vcsr), "r"(t1) : "t4");
> > }
> >
> > static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> > @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> >
> > riscv_v_enable();
> > __vstate_csr_save(save_to);
> > - asm volatile (
> > + asm volatile (ALTERNATIVE(
> > + "nop\n\t"
> > ".option push\n\t"
> > ".option arch, +v\n\t"
> > "vsetvli %0, x0, e8, m8, ta, ma\n\t"
> > @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> > "vse8.v v16, (%1)\n\t"
> > "add %1, %1, %0\n\t"
> > "vse8.v v24, (%1)\n\t"
> > - ".option pop\n\t"
> > - : "=&r" (vl) : "r" (datap) : "memory");
> > + ".option pop\n\t",
> > + "mv t0, %1\n\t"
> > + THEAD_VSETVLI_T4X0E8M8D1
> > + THEAD_VSB_V_V0T0
> > + "addi t0, t0, 128\n\t"
> > + THEAD_VSB_V_V8T0
> > + "addi t0, t0, 128\n\t"
> > + THEAD_VSB_V_V16T0
> > + "addi t0, t0, 128\n\t"
> > + THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> > + : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");
> > riscv_v_disable();
> > }
> >
> > @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
> > unsigned long vl;
> >
> > riscv_v_enable();
> > - asm volatile (
> > + asm volatile (ALTERNATIVE(
> > + "nop\n\t"
> > ".option push\n\t"
> > ".option arch, +v\n\t"
> > "vsetvli %0, x0, e8, m8, ta, ma\n\t"
> > @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
> > "vle8.v v16, (%1)\n\t"
> > "add %1, %1, %0\n\t"
> > "vle8.v v24, (%1)\n\t"
> > - ".option pop\n\t"
> > - : "=&r" (vl) : "r" (datap) : "memory");
> > + ".option pop\n\t",
> > + "mv t0, %1\n\t"
> > + THEAD_VSETVLI_T4X0E8M8D1
> > + THEAD_VLB_V_V0T0
> > + "addi t0, t0, 128\n\t"
> > + THEAD_VLB_V_V8T0
> > + "addi t0, t0, 128\n\t"
> > + THEAD_VLB_V_V16T0
> > + "addi t0, t0, 128\n\t"
> > + THEAD_VLB_V_V24T0, THEAD_VENDOR_ID,
> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> > + : "=&r" (vl) : "r" (datap) : "t0", "t4");
> > __vstate_csr_restore(restore_from);
> > riscv_v_disable();
> > }
> > diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
> > index 74178fb71805..51726890a4d0 100644
> > --- a/arch/riscv/kernel/vector.c
> > +++ b/arch/riscv/kernel/vector.c
> > @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
> > u32 insn = (u32)regs->badaddr;
> >
> > /* Do not handle if V is not supported, or disabled */
> > - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
> > + if (!has_vector())
> > return false;

riscv_v_first_use_handler() will not be able to detect if a process is
running with PR_RISCV_V_VSTATE_CTRL_OFF here after applying this
change IIIUC. This is the case where we disable the availability of V
for a process but it still executes V instructions anyway.

> >
> > /* If V has been enabled then it is not the first-use trap */
> > --
> > 2.39.2
> >
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv

2023-06-29 19:59:49

by Rémi Denis-Courmont

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling

Hi,

Le perjantaina 23. kesäkuuta 2023, 2.13.05 EEST Heiko Stuebner a écrit :
> diff --git a/arch/riscv/include/asm/errata_list.h
> b/arch/riscv/include/asm/errata_list.h index fb1a810f3d8c..ab21fadbe9c6
> 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE(
\
>
> : "=r" (__ovl) :
\
> : "memory")
>
> +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> +
> +#define THEAD_C9XX_CSR_VXSAT 0x9
> +#define THEAD_C9XX_CSR_VXRM 0xa
> +
> +/*
> + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
> + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
> + * vsetvli t4, x0, e8, m8, d1
> + */
> +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t"

That is equivalent to, and (IMHO) much less legible than:
".insn i OP_V, 7, t4, x0, 3"
Or even if you don't mind second-guessing RVV 1.0 assemblers:
"vsetvli t4, zero, e8, m8, tu, mu"

Either way, you don't need to hard-code X-register operands in assembler
macros (though you do unfortunately need to hard-code V register operands if
you use .insn).

> +
> +/*
> + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
> + * encoding as the standard vse8.v and vle8.v,

Not only in theory. vse8.v and vle8.v have only one possible encoding each
(for given operands).

> compilers seem to optimize

Nit: By "compilers", do you mean "assemblers"? That's a bit misleading to me.

> + * the call resulting in a different encoding and then using a value for
> + * the "mop" field that is not part of vector-0.7.1

Uh, no? They use mew = 0b0 and mop = 0b00, which corresponds to mop = 0b000.

> + * So encode specific variants for vstate_save and _restore.
> + */
> +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t"

That's "vse8.v v0, (t0)", at least as assembled with binutils 2.40.50.20230625
(from Debian unstable). I don't understand the rationale for hard-coding from
the above comment. Maybe that's just me being an idiot, but if so, then the
comment ought to be clarified.

(I do realise that vse8.v and vsb.v are not exactly equivalent in behaviour,
but here, the concern should be the assembler, not the processor.)

> +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t"
> +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t"
> +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t"
> +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t"

This has one nibble too many for a 32-bit value.

And why use sign-extended loads? Zero-extended loads would have the exact same
encoding as vle8.v, and not need this dark magic, AFAICT.

> +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t"
> +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t"
> +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t"
> +
> +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9
> +#define ALT_SR_VS_THEAD_SHIFT 23
> +
> +#define ALT_SR_VS(_val, prot)
\
> +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3",
\
> + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID,
\
> + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \
> + : "=r"(_val)
\
> + : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \
> + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT),
\
> + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT),
\
> + "I"(ALT_SR_VS_THEAD_SHIFT))
> +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
> +
> #endif /* __ASSEMBLY__ */
>
> #endif

--
レミ・デニ-クールモン
http://www.remlab.net/