Improve the performance of icache flushing by creating a new prctl flag
PR_RISCV_SET_ICACHE_FLUSH_CTX. The interface is left generic to allow
for future expansions such as with the proposed J extension [1].
Documentation is also provided to explain the use case.
[1] https://github.com/riscv/riscv-j-extension
Signed-off-by: Charlie Jenkins <[email protected]>
---
Changes in v5:
- Minor documentation changes (Randy)
- Link to v4: https://lore.kernel.org/r/[email protected]
Changes in v4:
- Add OFF flag to disallow fence.i in userspace (Atish)
- Fix documentation issues (Atish)
- Link to v3: https://lore.kernel.org/r/[email protected]
Changes in v3:
- Check if value force_icache_flush set on thread, rather than in mm
twice (Clément)
- Link to v2: https://lore.kernel.org/r/[email protected]
Changes in v2:
- Fix kernel-doc comment (Conor)
- Link to v1: https://lore.kernel.org/r/[email protected]
---
Charlie Jenkins (2):
riscv: Include riscv_set_icache_flush_ctx prctl
documentation: Document PR_RISCV_SET_ICACHE_FLUSH_CTX prctl
Documentation/arch/riscv/cmodx.rst | 88 ++++++++++++++++++++++++++++++++++++++
Documentation/arch/riscv/index.rst | 1 +
arch/riscv/include/asm/mmu.h | 2 +
arch/riscv/include/asm/processor.h | 6 +++
arch/riscv/mm/cacheflush.c | 56 ++++++++++++++++++++++++
arch/riscv/mm/context.c | 8 ++--
include/uapi/linux/prctl.h | 4 ++
kernel/sys.c | 6 +++
8 files changed, 168 insertions(+), 3 deletions(-)
---
base-commit: b85ea95d086471afb4ad062012a4d73cd328fa86
change-id: 20231117-fencei-f9f60d784fa0
--
- Charlie
Support new prctl with key PR_RISCV_SET_ICACHE_FLUSH_CTX to enable
optimization of cross modifying code. This prctl enables userspace code
to use icache flushing instructions such as fence.i with the guarantee
that the icache will continue to be clean after thread migration.
Signed-off-by: Charlie Jenkins <[email protected]>
---
arch/riscv/include/asm/mmu.h | 2 ++
arch/riscv/include/asm/processor.h | 6 ++++
arch/riscv/mm/cacheflush.c | 56 ++++++++++++++++++++++++++++++++++++++
arch/riscv/mm/context.c | 8 ++++--
include/uapi/linux/prctl.h | 4 +++
kernel/sys.c | 6 ++++
6 files changed, 79 insertions(+), 3 deletions(-)
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 355504b37f8e..60be458e94da 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -19,6 +19,8 @@ typedef struct {
#ifdef CONFIG_SMP
/* A local icache flush is needed before user execution can resume. */
cpumask_t icache_stale_mask;
+ /* Force local icache flush on all migrations. */
+ bool force_icache_flush;
#endif
#ifdef CONFIG_BINFMT_ELF_FDPIC
unsigned long exec_fdpic_loadmap;
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index f19f861cda54..7eda6c75e0f2 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -84,6 +84,9 @@ struct thread_struct {
unsigned long vstate_ctrl;
struct __riscv_v_ext_state vstate;
unsigned long align_ctl;
+#ifdef CONFIG_SMP
+ bool force_icache_flush;
+#endif
};
/* Whitelist the fstate from the task_struct for hardened usercopy */
@@ -145,6 +148,9 @@ extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
#define GET_UNALIGN_CTL(tsk, addr) get_unalign_ctl((tsk), (addr))
#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val))
+#define RISCV_SET_ICACHE_FLUSH_CTX(arg1, arg2) riscv_set_icache_flush_ctx(arg1, arg2)
+extern int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long per_thread);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_PROCESSOR_H */
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 55a34f2020a8..a647b8abbe37 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -5,6 +5,7 @@
#include <linux/acpi.h>
#include <linux/of.h>
+#include <linux/prctl.h>
#include <asm/acpi.h>
#include <asm/cacheflush.h>
@@ -152,3 +153,58 @@ void __init riscv_init_cbo_blocksizes(void)
if (cboz_block_size)
riscv_cboz_block_size = cboz_block_size;
}
+
+/**
+ * riscv_set_icache_flush_ctx() - Enable/disable icache flushing instructions in userspace.
+ * @ctx: Set the type of icache flushing instructions permitted/prohibited.
+ *
+ * * %PR_RISCV_CTX_SW_FENCEI_ON: Allow fence.i in userspace.
+ *
+ * * %PR_RISCV_CTX_SW_FENCEI_OFF: Disallow fence.i in userspace. When
+ * ``per_thread == 0``, this will effect all threads in a process. Therefore,
+ * caution must be taken -- only use this flag when you can guarantee that no
+ * thread in the process will emit fence.i from this point onward.
+ *
+ * @per_thread: When set to 0, will perform operation on process migration. When
+ * set to 1, will perform operation on thread migration.
+ *
+ * When ``per_thread == 0``, all threads in the process are permitted to emit
+ * icache flushing instructions. Whenever any thread in the process is migrated,
+ * the corresponding hart's icache will be guaranteed to be consistent with
+ * instruction storage. Note this does not enforce any guarantees outside of
+ * migration. If a thread modifies an instruction that another thread may
+ * attempt to execute, the other thread must still emit an icache flushing
+ * instruction before attempting to execute the potentially modified
+ * instruction. This must be performed by the userspace program.
+ *
+ * In per-thread context (eg. ``per_thread == 1``), only the thread calling this
+ * function is permitted to emit icache flushing instructions. When the thread
+ * is migrated, the corresponding hart's icache will be guaranteed to be
+ * consistent with instruction storage.
+ *
+ * On kernels configured without SMP, this function is a nop as migrations
+ * across harts will not occur.
+ */
+int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long per_thread)
+{
+#ifdef CONFIG_SMP
+ switch (ctx) {
+ case PR_RISCV_CTX_SW_FENCEI_ON:
+ if (per_thread)
+ current->thread.force_icache_flush = true;
+ else
+ current->mm->context.force_icache_flush = true;
+ break;
+ case PR_RISCV_CTX_SW_FENCEI_OFF:
+ if (per_thread)
+ current->thread.force_icache_flush = false;
+ else
+ current->mm->context.force_icache_flush = false;
+ break;
+
+ default:
+ break;
+ }
+#endif
+ return 0;
+}
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 217fd4de6134..0146c61be0ab 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -297,12 +297,14 @@ static inline void set_mm(struct mm_struct *prev,
*
* The "cpu" argument must be the current local CPU number.
*/
-static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu)
+static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu,
+ struct task_struct *task)
{
#ifdef CONFIG_SMP
cpumask_t *mask = &mm->context.icache_stale_mask;
- if (cpumask_test_cpu(cpu, mask)) {
+ if (cpumask_test_cpu(cpu, mask) || mm->context.force_icache_flush ||
+ (task && task->thread.force_icache_flush)) {
cpumask_clear_cpu(cpu, mask);
/*
* Ensure the remote hart's writes are visible to this hart.
@@ -332,5 +334,5 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
set_mm(prev, next, cpu);
- flush_icache_deferred(next, cpu);
+ flush_icache_deferred(next, cpu, task);
}
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 370ed14b1ae0..ec731dda5b8e 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -306,4 +306,8 @@ struct prctl_mm_map {
# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f
+#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71
+# define PR_RISCV_CTX_SW_FENCEI_ON 0
+# define PR_RISCV_CTX_SW_FENCEI_OFF 1
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 420d9cb9cc8e..e806a8a67c36 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -146,6 +146,9 @@
#ifndef RISCV_V_GET_CONTROL
# define RISCV_V_GET_CONTROL() (-EINVAL)
#endif
+#ifndef RISCV_SET_ICACHE_FLUSH_CTX
+# define RISCV_SET_ICACHE_FLUSH_CTX(a, b) (-EINVAL)
+#endif
/*
* this is where the system-wide overflow UID and GID are defined, for
@@ -2739,6 +2742,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_RISCV_V_GET_CONTROL:
error = RISCV_V_GET_CONTROL();
break;
+ case PR_RISCV_SET_ICACHE_FLUSH_CTX:
+ error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3);
+ break;
default:
error = -EINVAL;
break;
--
2.43.0
Provide documentation that explains how to properly do CMODX in riscv.
Signed-off-by: Charlie Jenkins <[email protected]>
---
Documentation/arch/riscv/cmodx.rst | 88 ++++++++++++++++++++++++++++++++++++++
Documentation/arch/riscv/index.rst | 1 +
2 files changed, 89 insertions(+)
diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst
new file mode 100644
index 000000000000..afd7086c222c
--- /dev/null
+++ b/Documentation/arch/riscv/cmodx.rst
@@ -0,0 +1,88 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================================================
+Concurrent Modification and Execution of Instructions (CMODX) for RISC-V Linux
+==============================================================================
+
+CMODX is a programming technique where a program executes instructions that were
+modified by the program itself. Instruction storage and the instruction cache
+(icache) are not guaranteed to be synchronized on RISC-V hardware. Therefore, the
+program must enforce its own synchronization with the unprivileged fence.i
+instruction.
+
+However, the default Linux ABI prohibits the use of fence.i in userspace
+applications. At any point the scheduler may migrate a task onto a new hart. If
+migration occurs after the userspace synchronized the icache and instruction
+storage with fence.i, the icache will no longer be clean. This is due to the
+behavior of fence.i only affecting the hart that it is called on. Thus, the hart
+that the task has been migrated to may not have synchronized instruction storage
+and icache.
+
+There are two ways to solve this problem: use the riscv_flush_icache() syscall,
+or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
+userspace. The syscall performs a one-off icache flushing operation. The prctl
+changes the Linux ABI to allow userspace to emit icache flushing operations.
+
+prctl() Interface
+---------------------
+
+Call prctl() with ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` as the first argument. The
+remaining arguments will be delegated to the riscv_set_icache_flush_ctx
+function detailed below.
+
+.. kernel-doc:: arch/riscv/mm/cacheflush.c
+ :identifiers: riscv_set_icache_flush_ctx
+
+Example usage:
+
+The following files are meant to be compiled and linked with each other. The
+modify_instruction() function replaces an add with 0 with an add with one,
+causing the instruction sequence in get_value() to change from returning a zero
+to returning a one.
+
+cmodx.c::
+
+ #include <stdio.h>
+ #include <sys/prctl.h>
+
+ extern int get_value();
+ extern void modify_instruction();
+
+ int main()
+ {
+ int value = get_value();
+ printf("Value before cmodx: %d\n", value);
+
+ // Call prctl before first fence.i is called inside modify_instruction
+ prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, 0);
+ modify_instruction();
+
+ value = get_value();
+ printf("Value after cmodx: %d\n", value);
+ return 0;
+ }
+
+cmodx.S::
+
+ .option norvc
+
+ .text
+ .global modify_instruction
+ modify_instruction:
+ lw a0, new_insn
+ lui a5,%hi(old_insn)
+ sw a0,%lo(old_insn)(a5)
+ fence.i
+ ret
+
+ .section modifiable, "awx"
+ .global get_value
+ get_value:
+ li a0, 0
+ old_insn:
+ addi a0, a0, 0
+ ret
+
+ .data
+ new_insn:
+ addi a0, a0, 1
diff --git a/Documentation/arch/riscv/index.rst b/Documentation/arch/riscv/index.rst
index 4dab0cb4b900..eecf347ce849 100644
--- a/Documentation/arch/riscv/index.rst
+++ b/Documentation/arch/riscv/index.rst
@@ -13,6 +13,7 @@ RISC-V architecture
patch-acceptance
uabi
vector
+ cmodx
features
--
2.43.0
On Mon, Jan 8, 2024 at 10:42 AM Charlie Jenkins <[email protected]> wrote:
>
> Support new prctl with key PR_RISCV_SET_ICACHE_FLUSH_CTX to enable
> optimization of cross modifying code. This prctl enables userspace code
> to use icache flushing instructions such as fence.i with the guarantee
> that the icache will continue to be clean after thread migration.
>
> Signed-off-by: Charlie Jenkins <[email protected]>
> ---
> arch/riscv/include/asm/mmu.h | 2 ++
> arch/riscv/include/asm/processor.h | 6 ++++
> arch/riscv/mm/cacheflush.c | 56 ++++++++++++++++++++++++++++++++++++++
> arch/riscv/mm/context.c | 8 ++++--
> include/uapi/linux/prctl.h | 4 +++
> kernel/sys.c | 6 ++++
> 6 files changed, 79 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
> index 355504b37f8e..60be458e94da 100644
> --- a/arch/riscv/include/asm/mmu.h
> +++ b/arch/riscv/include/asm/mmu.h
> @@ -19,6 +19,8 @@ typedef struct {
> #ifdef CONFIG_SMP
> /* A local icache flush is needed before user execution can resume. */
> cpumask_t icache_stale_mask;
> + /* Force local icache flush on all migrations. */
> + bool force_icache_flush;
> #endif
> #ifdef CONFIG_BINFMT_ELF_FDPIC
> unsigned long exec_fdpic_loadmap;
> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> index f19f861cda54..7eda6c75e0f2 100644
> --- a/arch/riscv/include/asm/processor.h
> +++ b/arch/riscv/include/asm/processor.h
> @@ -84,6 +84,9 @@ struct thread_struct {
> unsigned long vstate_ctrl;
> struct __riscv_v_ext_state vstate;
> unsigned long align_ctl;
> +#ifdef CONFIG_SMP
> + bool force_icache_flush;
> +#endif
> };
>
> /* Whitelist the fstate from the task_struct for hardened usercopy */
> @@ -145,6 +148,9 @@ extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
> #define GET_UNALIGN_CTL(tsk, addr) get_unalign_ctl((tsk), (addr))
> #define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val))
>
> +#define RISCV_SET_ICACHE_FLUSH_CTX(arg1, arg2) riscv_set_icache_flush_ctx(arg1, arg2)
> +extern int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long per_thread);
> +
> #endif /* __ASSEMBLY__ */
>
> #endif /* _ASM_RISCV_PROCESSOR_H */
> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
> index 55a34f2020a8..a647b8abbe37 100644
> --- a/arch/riscv/mm/cacheflush.c
> +++ b/arch/riscv/mm/cacheflush.c
> @@ -5,6 +5,7 @@
>
> #include <linux/acpi.h>
> #include <linux/of.h>
> +#include <linux/prctl.h>
> #include <asm/acpi.h>
> #include <asm/cacheflush.h>
>
> @@ -152,3 +153,58 @@ void __init riscv_init_cbo_blocksizes(void)
> if (cboz_block_size)
> riscv_cboz_block_size = cboz_block_size;
> }
> +
> +/**
> + * riscv_set_icache_flush_ctx() - Enable/disable icache flushing instructions in userspace.
> + * @ctx: Set the type of icache flushing instructions permitted/prohibited.
> + *
> + * * %PR_RISCV_CTX_SW_FENCEI_ON: Allow fence.i in userspace.
> + *
> + * * %PR_RISCV_CTX_SW_FENCEI_OFF: Disallow fence.i in userspace. When
> + * ``per_thread == 0``, this will effect all threads in a process. Therefore,
> + * caution must be taken -- only use this flag when you can guarantee that no
> + * thread in the process will emit fence.i from this point onward.
> + *
> + * @per_thread: When set to 0, will perform operation on process migration. When
> + * set to 1, will perform operation on thread migration.
> + *
> + * When ``per_thread == 0``, all threads in the process are permitted to emit
> + * icache flushing instructions. Whenever any thread in the process is migrated,
> + * the corresponding hart's icache will be guaranteed to be consistent with
> + * instruction storage. Note this does not enforce any guarantees outside of
> + * migration. If a thread modifies an instruction that another thread may
> + * attempt to execute, the other thread must still emit an icache flushing
> + * instruction before attempting to execute the potentially modified
> + * instruction. This must be performed by the userspace program.
> + *
> + * In per-thread context (eg. ``per_thread == 1``), only the thread calling this
> + * function is permitted to emit icache flushing instructions. When the thread
> + * is migrated, the corresponding hart's icache will be guaranteed to be
> + * consistent with instruction storage.
> + *
> + * On kernels configured without SMP, this function is a nop as migrations
> + * across harts will not occur.
> + */
> +int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long per_thread)
> +{
> +#ifdef CONFIG_SMP
> + switch (ctx) {
> + case PR_RISCV_CTX_SW_FENCEI_ON:
> + if (per_thread)
> + current->thread.force_icache_flush = true;
> + else
> + current->mm->context.force_icache_flush = true;
> + break;
> + case PR_RISCV_CTX_SW_FENCEI_OFF:
> + if (per_thread)
> + current->thread.force_icache_flush = false;
> + else
> + current->mm->context.force_icache_flush = false;
> + break;
> +
> + default:
> + break;
> + }
> +#endif
> + return 0;
> +}
> diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
> index 217fd4de6134..0146c61be0ab 100644
> --- a/arch/riscv/mm/context.c
> +++ b/arch/riscv/mm/context.c
> @@ -297,12 +297,14 @@ static inline void set_mm(struct mm_struct *prev,
> *
> * The "cpu" argument must be the current local CPU number.
> */
> -static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu)
> +static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu,
> + struct task_struct *task)
> {
> #ifdef CONFIG_SMP
> cpumask_t *mask = &mm->context.icache_stale_mask;
>
> - if (cpumask_test_cpu(cpu, mask)) {
> + if (cpumask_test_cpu(cpu, mask) || mm->context.force_icache_flush ||
> + (task && task->thread.force_icache_flush)) {
> cpumask_clear_cpu(cpu, mask);
> /*
> * Ensure the remote hart's writes are visible to this hart.
> @@ -332,5 +334,5 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
>
> set_mm(prev, next, cpu);
>
> - flush_icache_deferred(next, cpu);
> + flush_icache_deferred(next, cpu, task);
> }
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index 370ed14b1ae0..ec731dda5b8e 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -306,4 +306,8 @@ struct prctl_mm_map {
> # define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
> # define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f
>
> +#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71
> +# define PR_RISCV_CTX_SW_FENCEI_ON 0
> +# define PR_RISCV_CTX_SW_FENCEI_OFF 1
> +
> #endif /* _LINUX_PRCTL_H */
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 420d9cb9cc8e..e806a8a67c36 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -146,6 +146,9 @@
> #ifndef RISCV_V_GET_CONTROL
> # define RISCV_V_GET_CONTROL() (-EINVAL)
> #endif
> +#ifndef RISCV_SET_ICACHE_FLUSH_CTX
> +# define RISCV_SET_ICACHE_FLUSH_CTX(a, b) (-EINVAL)
> +#endif
>
> /*
> * this is where the system-wide overflow UID and GID are defined, for
> @@ -2739,6 +2742,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
> case PR_RISCV_V_GET_CONTROL:
> error = RISCV_V_GET_CONTROL();
> break;
> + case PR_RISCV_SET_ICACHE_FLUSH_CTX:
> + error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3);
> + break;
> default:
> error = -EINVAL;
> break;
>
> --
> 2.43.0
>
Reviewed-by: Atish Patra <[email protected]>
--
Regards,
Atish
On Mon, Jan 8, 2024 at 10:42 AM Charlie Jenkins <[email protected]> wrote:
>
> Provide documentation that explains how to properly do CMODX in riscv.
>
> Signed-off-by: Charlie Jenkins <[email protected]>
> ---
> Documentation/arch/riscv/cmodx.rst | 88 ++++++++++++++++++++++++++++++++++++++
> Documentation/arch/riscv/index.rst | 1 +
> 2 files changed, 89 insertions(+)
>
> diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst
> new file mode 100644
> index 000000000000..afd7086c222c
> --- /dev/null
> +++ b/Documentation/arch/riscv/cmodx.rst
> @@ -0,0 +1,88 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +==============================================================================
> +Concurrent Modification and Execution of Instructions (CMODX) for RISC-V Linux
> +==============================================================================
> +
> +CMODX is a programming technique where a program executes instructions that were
> +modified by the program itself. Instruction storage and the instruction cache
> +(icache) are not guaranteed to be synchronized on RISC-V hardware. Therefore, the
> +program must enforce its own synchronization with the unprivileged fencei
> +instruction.
> +
> +However, the default Linux ABI prohibits the use of fence.i in userspace
> +applications. At any point the scheduler may migrate a task onto a new hart. If
> +migration occurs after the userspace synchronized the icache and instruction
> +storage with fence.i, the icache will no longer be clean. This is due to the
> +behavior of fence.i only affecting the hart that it is called on. Thus, the hart
> +that the task has been migrated to may not have synchronized instruction storage
> +and icache.
> +
> +There are two ways to solve this problem: use the riscv_flush_icache() syscall,
> +or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
> +userspace. The syscall performs a one-off icache flushing operation. The prctl
> +changes the Linux ABI to allow userspace to emit icache flushing operations.
> +
> +prctl() Interface
> +---------------------
> +
> +Call prctl() with ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` as the first argument. The
> +remaining arguments will be delegated to the riscv_set_icache_flush_ctx
> +function detailed below.
> +
> +.. kernel-doc:: arch/riscv/mm/cacheflush.c
> + :identifiers: riscv_set_icache_flush_ctx
> +
Document the arguments of the prctl as well ?
> +Example usage:
> +
> +The following files are meant to be compiled and linked with each other. The
> +modify_instruction() function replaces an add with 0 with an add with one,
> +causing the instruction sequence in get_value() to change from returning a zero
> +to returning a one.
> +
> +cmodx.c::
> +
> + #include <stdio.h>
> + #include <sys/prctl.h>
> +
> + extern int get_value();
> + extern void modify_instruction();
> +
> + int main()
> + {
> + int value = get_value();
> + printf("Value before cmodx: %d\n", value);
> +
> + // Call prctl before first fence.i is called inside modify_instruction
> + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, 0);
> + modify_instruction();
> +
> + value = get_value();
> + printf("Value after cmodx: %d\n", value);
> + return 0;
> + }
> +
> +cmodx.S::
> +
> + .option norvc
> +
> + .text
> + .global modify_instruction
> + modify_instruction:
> + lw a0, new_insn
> + lui a5,%hi(old_insn)
> + sw a0,%lo(old_insn)(a5)
> + fence.i
> + ret
> +
> + .section modifiable, "awx"
> + .global get_value
> + get_value:
> + li a0, 0
> + old_insn:
> + addi a0, a0, 0
> + ret
> +
> + .data
> + new_insn:
> + addi a0, a0, 1
> diff --git a/Documentation/arch/riscv/index.rst b/Documentation/arch/riscv/index.rst
> index 4dab0cb4b900..eecf347ce849 100644
> --- a/Documentation/arch/riscv/index.rst
> +++ b/Documentation/arch/riscv/index.rst
> @@ -13,6 +13,7 @@ RISC-V architecture
> patch-acceptance
> uabi
> vector
> + cmodx
>
> features
>
>
> --
> 2.43.0
>
--
Regards,
Atish
On Mon, Jan 08, 2024 at 05:24:47PM -0800, Atish Patra wrote:
> On Mon, Jan 8, 2024 at 10:42 AM Charlie Jenkins <[email protected]> wrote:
> >
> > Provide documentation that explains how to properly do CMODX in riscv.
> >
> > Signed-off-by: Charlie Jenkins <[email protected]>
> > ---
> > Documentation/arch/riscv/cmodx.rst | 88 ++++++++++++++++++++++++++++++++++++++
> > Documentation/arch/riscv/index.rst | 1 +
> > 2 files changed, 89 insertions(+)
> >
> > diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst
> > new file mode 100644
> > index 000000000000..afd7086c222c
> > --- /dev/null
> > +++ b/Documentation/arch/riscv/cmodx.rst
> > @@ -0,0 +1,88 @@
> > +.. SPDX-License-Identifier: GPL-2.0
> > +
> > +==============================================================================
> > +Concurrent Modification and Execution of Instructions (CMODX) for RISC-V Linux
> > +==============================================================================
> > +
> > +CMODX is a programming technique where a program executes instructions that were
> > +modified by the program itself. Instruction storage and the instruction cache
> > +(icache) are not guaranteed to be synchronized on RISC-V hardware. Therefore, the
> > +program must enforce its own synchronization with the unprivileged fence.i
> > +instruction.
> > +
> > +However, the default Linux ABI prohibits the use of fence.i in userspace
> > +applications. At any point the scheduler may migrate a task onto a new hart. If
> > +migration occurs after the userspace synchronized the icache and instruction
> > +storage with fence.i, the icache will no longer be clean. This is due to the
> > +behavior of fence.i only affecting the hart that it is called on. Thus, the hart
> > +that the task has been migrated to may not have synchronized instruction storage
> > +and icache.
> > +
> > +There are two ways to solve this problem: use the riscv_flush_icache() syscall,
> > +or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
> > +userspace. The syscall performs a one-off icache flushing operation. The prctl
> > +changes the Linux ABI to allow userspace to emit icache flushing operations.
> > +
> > +prctl() Interface
> > +---------------------
> > +
> > +Call prctl() with ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` as the first argument. The
> > +remaining arguments will be delegated to the riscv_set_icache_flush_ctx
> > +function detailed below.
> > +
> > +.. kernel-doc:: arch/riscv/mm/cacheflush.c
> > + :identifiers: riscv_set_icache_flush_ctx
> > +
>
> Document the arguments of the prctl as well ?
Do you mean to include the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` key in the
comment of riscv_set_icache_flush_ctx? The args to
riscv_set_icache_flush_ctx are the args to the prctl except for the key.
- Charlie
>
> > +Example usage:
> > +
> > +The following files are meant to be compiled and linked with each other. The
> > +modify_instruction() function replaces an add with 0 with an add with one,
> > +causing the instruction sequence in get_value() to change from returning a zero
> > +to returning a one.
> > +
> > +cmodx.c::
> > +
> > + #include <stdio.h>
> > + #include <sys/prctl.h>
> > +
> > + extern int get_value();
> > + extern void modify_instruction();
> > +
> > + int main()
> > + {
> > + int value = get_value();
> > + printf("Value before cmodx: %d\n", value);
> > +
> > + // Call prctl before first fence.i is called inside modify_instruction
> > + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, 0);
> > + modify_instruction();
> > +
> > + value = get_value();
> > + printf("Value after cmodx: %d\n", value);
> > + return 0;
> > + }
> > +
> > +cmodx.S::
> > +
> > + .option norvc
> > +
> > + .text
> > + .global modify_instruction
> > + modify_instruction:
> > + lw a0, new_insn
> > + lui a5,%hi(old_insn)
> > + sw a0,%lo(old_insn)(a5)
> > + fence.i
> > + ret
> > +
> > + .section modifiable, "awx"
> > + .global get_value
> > + get_value:
> > + li a0, 0
> > + old_insn:
> > + addi a0, a0, 0
> > + ret
> > +
> > + .data
> > + new_insn:
> > + addi a0, a0, 1
> > diff --git a/Documentation/arch/riscv/index.rst b/Documentation/arch/riscv/index.rst
> > index 4dab0cb4b900..eecf347ce849 100644
> > --- a/Documentation/arch/riscv/index.rst
> > +++ b/Documentation/arch/riscv/index.rst
> > @@ -13,6 +13,7 @@ RISC-V architecture
> > patch-acceptance
> > uabi
> > vector
> > + cmodx
> >
> > features
> >
> >
> > --
> > 2.43.0
> >
>
>
> --
> Regards,
> Atish
On Mon, Jan 8, 2024 at 6:20 PM Charlie Jenkins <[email protected]> wrote:
>
> On Mon, Jan 08, 2024 at 05:24:47PM -0800, Atish Patra wrote:
> > On Mon, Jan 8, 2024 at 10:42 AM Charlie Jenkins <[email protected]> wrote:
> > >
> > > Provide documentation that explains how to properly do CMODX in riscv.
> > >
> > > Signed-off-by: Charlie Jenkins <[email protected]>
> > > ---
> > > Documentation/arch/riscv/cmodx.rst | 88 ++++++++++++++++++++++++++++++++++++++
> > > Documentation/arch/riscv/index.rst | 1 +
> > > 2 files changed, 89 insertions(+)
> > >
> > > diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst
> > > new file mode 100644
> > > index 000000000000..afd7086c222c
> > > --- /dev/null
> > > +++ b/Documentation/arch/riscv/cmodx.rst
> > > @@ -0,0 +1,88 @@
> > > +.. SPDX-License-Identifier: GPL-2.0
> > > +
> > > +==============================================================================
> > > +Concurrent Modification and Execution of Instructions (CMODX) for RISC-V Linux
> > > +==============================================================================
> > > +
> > > +CMODX is a programming technique where a program executes instructions that were
> > > +modified by the program itself. Instruction storage and the instruction cache
> > > +(icache) are not guaranteed to be synchronized on RISC-V hardware. Therefore, the
> > > +program must enforce its own synchronization with the unprivileged fence.i
> > > +instruction.
> > > +
> > > +However, the default Linux ABI prohibits the use of fence.i in userspace
> > > +applications. At any point the scheduler may migrate a task onto a new hart. If
> > > +migration occurs after the userspace synchronized the icache and instruction
> > > +storage with fence.i, the icache will no longer be clean. This is due to the
> > > +behavior of fence.i only affecting the hart that it is called on. Thus, the hart
> > > +that the task has been migrated to may not have synchronized instruction storage
> > > +and icache.
> > > +
> > > +There are two ways to solve this problem: use the riscv_flush_icache() syscall,
> > > +or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
> > > +userspace. The syscall performs a one-off icache flushing operation. The prctl
> > > +changes the Linux ABI to allow userspace to emit icache flushing operations.
> > > +
> > > +prctl() Interface
> > > +---------------------
> > > +
> > > +Call prctl() with ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` as the first argument. The
> > > +remaining arguments will be delegated to the riscv_set_icache_flush_ctx
> > > +function detailed below.
> > > +
> > > +.. kernel-doc:: arch/riscv/mm/cacheflush.c
> > > + :identifiers: riscv_set_icache_flush_ctx
> > > +
> >
> > Document the arguments of the prctl as well ?
>
> Do you mean to include the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` key in the
> comment of riscv_set_icache_flush_ctx? The args to
> riscv_set_icache_flush_ctx are the args to the prctl except for the key.
>
No, I mean describe the argument2(ctx) and argument3(per_thread) as well.
Since this is a documentation of the new prctl, we should document all
args so that an user
can use it without grepping through the kernel sources.
> - Charlie
>
> >
> > > +Example usage:
> > > +
> > > +The following files are meant to be compiled and linked with each other. The
> > > +modify_instruction() function replaces an add with 0 with an add with one,
> > > +causing the instruction sequence in get_value() to change from returning a zero
> > > +to returning a one.
> > > +
> > > +cmodx.c::
> > > +
> > > + #include <stdio.h>
> > > + #include <sys/prctl.h>
> > > +
> > > + extern int get_value();
> > > + extern void modify_instruction();
> > > +
> > > + int main()
> > > + {
> > > + int value = get_value();
> > > + printf("Value before cmodx: %d\n", value);
> > > +
> > > + // Call prctl before first fence.i is called inside modify_instruction
> > > + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, 0);
> > > + modify_instruction();
> > > +
> > > + value = get_value();
> > > + printf("Value after cmodx: %d\n", value);
> > > + return 0;
> > > + }
> > > +
> > > +cmodx.S::
> > > +
> > > + .option norvc
> > > +
> > > + .text
> > > + .global modify_instruction
> > > + modify_instruction:
> > > + lw a0, new_insn
> > > + lui a5,%hi(old_insn)
> > > + sw a0,%lo(old_insn)(a5)
> > > + fence.i
> > > + ret
> > > +
> > > + .section modifiable, "awx"
> > > + .global get_value
> > > + get_value:
> > > + li a0, 0
> > > + old_insn:
> > > + addi a0, a0, 0
> > > + ret
> > > +
> > > + .data
> > > + new_insn:
> > > + addi a0, a0, 1
> > > diff --git a/Documentation/arch/riscv/index.rst b/Documentation/arch/riscv/index.rst
> > > index 4dab0cb4b900..eecf347ce849 100644
> > > --- a/Documentation/arch/riscv/index.rst
> > > +++ b/Documentation/arch/riscv/index.rst
> > > @@ -13,6 +13,7 @@ RISC-V architecture
> > > patch-acceptance
> > > uabi
> > > vector
> > > + cmodx
> > >
> > > features
> > >
> > >
> > > --
> > > 2.43.0
> > >
> >
> >
> > --
> > Regards,
> > Atish
--
Regards,
Atish
On Mon, Jan 8, 2024 at 11:51 PM Atish Patra <[email protected]> wrote:
>
> On Mon, Jan 8, 2024 at 6:20 PM Charlie Jenkins <[email protected]> wrote:
> >
> > On Mon, Jan 08, 2024 at 05:24:47PM -0800, Atish Patra wrote:
> > > On Mon, Jan 8, 2024 at 10:42 AM Charlie Jenkins <[email protected]> wrote:
> > > >
> > > > Provide documentation that explains how to properly do CMODX in riscv.
> > > >
> > > > Signed-off-by: Charlie Jenkins <[email protected]>
> > > > ---
> > > > Documentation/arch/riscv/cmodx.rst | 88 ++++++++++++++++++++++++++++++++++++++
> > > > Documentation/arch/riscv/index.rst | 1 +
> > > > 2 files changed, 89 insertions(+)
> > > >
> > > > diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst
> > > > new file mode 100644
> > > > index 000000000000..afd7086c222c
> > > > --- /dev/null
> > > > +++ b/Documentation/arch/riscv/cmodx.rst
> > > > @@ -0,0 +1,88 @@
> > > > +.. SPDX-License-Identifier: GPL-2.0
> > > > +
> > > > +==============================================================================
> > > > +Concurrent Modification and Execution of Instructions (CMODX) for RISC-V Linux
> > > > +==============================================================================
> > > > +
> > > > +CMODX is a programming technique where a program executes instructions that were
> > > > +modified by the program itself. Instruction storage and the instruction cache
> > > > +(icache) are not guaranteed to be synchronized on RISC-V hardware. Therefore, the
> > > > +program must enforce its own synchronization with the unprivileged fence.i
> > > > +instruction.
> > > > +
> > > > +However, the default Linux ABI prohibits the use of fence.i in userspace
> > > > +applications. At any point the scheduler may migrate a task onto a new hart. If
> > > > +migration occurs after the userspace synchronized the icache and instruction
> > > > +storage with fence.i, the icache will no longer be clean. This is due to the
> > > > +behavior of fence.i only affecting the hart that it is called on. Thus, the hart
> > > > +that the task has been migrated to may not have synchronized instruction storage
> > > > +and icache.
> > > > +
> > > > +There are two ways to solve this problem: use the riscv_flush_icache() syscall,
> > > > +or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
> > > > +userspace. The syscall performs a one-off icache flushing operation. The prctl
> > > > +changes the Linux ABI to allow userspace to emit icache flushing operations.
> > > > +
> > > > +prctl() Interface
> > > > +---------------------
> > > > +
> > > > +Call prctl() with ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` as the first argument. The
> > > > +remaining arguments will be delegated to the riscv_set_icache_flush_ctx
> > > > +function detailed below.
> > > > +
> > > > +.. kernel-doc:: arch/riscv/mm/cacheflush.c
> > > > + :identifiers: riscv_set_icache_flush_ctx
> > > > +
> > >
> > > Document the arguments of the prctl as well ?
> >
> > Do you mean to include the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` key in the
> > comment of riscv_set_icache_flush_ctx? The args to
> > riscv_set_icache_flush_ctx are the args to the prctl except for the key.
> >
>
> No, I mean describe the argument2(ctx) and argument3(per_thread) as well.
> Since this is a documentation of the new prctl, we should document all
> args so that an user
> can use it without grepping through the kernel sources.
>
Never mind. I missed the identifiers tag for kernel doc. Thanks for
the reminder.
The patch looks good to me.
Reviewed-by: Atish Patra <[email protected]>
> > - Charlie
> >
> > >
> > > > +Example usage:
> > > > +
> > > > +The following files are meant to be compiled and linked with each other. The
> > > > +modify_instruction() function replaces an add with 0 with an add with one,
> > > > +causing the instruction sequence in get_value() to change from returning a zero
> > > > +to returning a one.
> > > > +
> > > > +cmodx.c::
> > > > +
> > > > + #include <stdio.h>
> > > > + #include <sys/prctl.h>
> > > > +
> > > > + extern int get_value();
> > > > + extern void modify_instruction();
> > > > +
> > > > + int main()
> > > > + {
> > > > + int value = get_value();
> > > > + printf("Value before cmodx: %d\n", value);
> > > > +
> > > > + // Call prctl before first fence.i is called inside modify_instruction
> > > > + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, 0);
> > > > + modify_instruction();
> > > > +
> > > > + value = get_value();
> > > > + printf("Value after cmodx: %d\n", value);
> > > > + return 0;
> > > > + }
> > > > +
> > > > +cmodx.S::
> > > > +
> > > > + .option norvc
> > > > +
> > > > + .text
> > > > + .global modify_instruction
> > > > + modify_instruction:
> > > > + lw a0, new_insn
> > > > + lui a5,%hi(old_insn)
> > > > + sw a0,%lo(old_insn)(a5)
> > > > + fence.i
> > > > + ret
> > > > +
> > > > + .section modifiable, "awx"
> > > > + .global get_value
> > > > + get_value:
> > > > + li a0, 0
> > > > + old_insn:
> > > > + addi a0, a0, 0
> > > > + ret
> > > > +
> > > > + .data
> > > > + new_insn:
> > > > + addi a0, a0, 1
> > > > diff --git a/Documentation/arch/riscv/index.rst b/Documentation/arch/riscv/index.rst
> > > > index 4dab0cb4b900..eecf347ce849 100644
> > > > --- a/Documentation/arch/riscv/index.rst
> > > > +++ b/Documentation/arch/riscv/index.rst
> > > > @@ -13,6 +13,7 @@ RISC-V architecture
> > > > patch-acceptance
> > > > uabi
> > > > vector
> > > > + cmodx
> > > >
> > > > features
> > > >
> > > >
> > > > --
> > > > 2.43.0
> > > >
> > >
> > >
> > > --
> > > Regards,
> > > Atish
>
>
>
> --
> Regards,
> Atish
--
Regards,
Atish