From: Jiang Liu <[email protected]>
Restore FPSIMD control and status registers to default values
when creating new FPSIMD contexts for kernel context and reset
FPSIMD status register when creating FPSIMD context for signal
handling, otherwise the stale value in FPSIMD control and status
registers may affect the new kernal or signal handling contexts.
Signed-off-by: Jiang Liu <[email protected]>
Cc: Jiang Liu <[email protected]>
---
arch/arm64/include/asm/fpsimd.h | 4 ++++
arch/arm64/kernel/fpsimd.c | 30 ++++++++++++++++++++++++++++--
arch/arm64/kernel/signal.c | 1 +
3 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index c43b4ac..b3c12fd 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -50,6 +50,8 @@ struct fpsimd_state {
#define VFP_STATE_SIZE ((32 * 8) + 4)
#endif
+#define AARCH64_FPCR_DEFAULT_VAL 0
+
struct task_struct;
extern void fpsimd_save_state(struct fpsimd_state *state);
@@ -58,6 +60,8 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
+extern void fpsimd_init_sigctx(struct fpsimd_state *state);
+
#endif
#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index bb785d2..9daee2c 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -33,6 +33,21 @@
#define FPEXC_IXF (1 << 4)
#define FPEXC_IDF (1 << 7)
+static inline void fpsimd_init_hw_state(void)
+{
+ int val = AARCH64_FPCR_DEFAULT_VAL;
+
+ asm volatile ("msr fpcr, %x0\n"
+ "msr fpsr, xzr\n"
+ : : "r"(val) : "memory");
+}
+
+static inline void fpsimd_clear_fpsr(void)
+{
+ asm volatile ("msr fpsr, xzr\n"
+ : : : "memory");
+}
+
/*
* Trapped FP/ASIMD access.
*/
@@ -80,12 +95,21 @@ void fpsimd_thread_switch(struct task_struct *next)
void fpsimd_flush_thread(void)
{
+ struct fpsimd_state *state = ¤t->thread.fpsimd_state;
+
preempt_disable();
- memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
- fpsimd_load_state(¤t->thread.fpsimd_state);
+ memset(state, 0, sizeof(struct fpsimd_state));
+ if (AARCH64_FPCR_DEFAULT_VAL)
+ state->fpcr = AARCH64_FPCR_DEFAULT_VAL;
+ fpsimd_load_state(state);
preempt_enable();
}
+void fpsimd_init_sigctx(struct fpsimd_state *state)
+{
+ fpsimd_clear_fpsr();
+}
+
#ifdef CONFIG_KERNEL_MODE_NEON
/*
@@ -99,6 +123,8 @@ void kernel_neon_begin(void)
if (current->mm)
fpsimd_save_state(¤t->thread.fpsimd_state);
+
+ fpsimd_init_hw_state();
}
EXPORT_SYMBOL(kernel_neon_begin);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 890a591..f2c83e8 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -320,6 +320,7 @@ static void handle_signal(unsigned long sig, struct k_sigaction *ka,
* handler.
*/
user_fastforward_single_step(tsk);
+ fpsimd_init_sigctx(&tsk->thread.fpsimd_state);
signal_delivered(sig, info, ka, regs, 0);
}
--
1.8.1.2
From: Jiang Liu <[email protected]>
Reduce duplicated code when saving/restoring FPSIMD for signal
handling, it also helps to concentrate all FPSIMD hardware related
code into fpsimd.c.
Signed-off-by: Jiang Liu <[email protected]>
Cc: Jiang Liu <[email protected]>
---
arch/arm64/include/asm/fpsimd.h | 4 ++++
arch/arm64/kernel/fpsimd.c | 20 ++++++++++++++++++++
arch/arm64/kernel/process.c | 3 +--
arch/arm64/kernel/signal.c | 11 +++--------
arch/arm64/kernel/signal32.c | 9 +++------
5 files changed, 31 insertions(+), 16 deletions(-)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index b3c12fd..142084f 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -57,10 +57,14 @@ struct task_struct;
extern void fpsimd_save_state(struct fpsimd_state *state);
extern void fpsimd_load_state(struct fpsimd_state *state);
+extern void fpsimd_dup_task_struct(struct task_struct *dst,
+ struct task_struct *src);
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
extern void fpsimd_init_sigctx(struct fpsimd_state *state);
+extern void fpsimd_save_sigctx(struct fpsimd_state *state);
+extern void fpsimd_restore_sigctx(struct fpsimd_state *state);
#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 9daee2c..f43dd58 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -84,6 +84,12 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
send_sig_info(SIGFPE, &info, current);
}
+void fpsimd_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+ fpsimd_save_state(&src->thread.fpsimd_state);
+ *dst = *src;
+}
+
void fpsimd_thread_switch(struct task_struct *next)
{
/* check if not kernel threads */
@@ -110,6 +116,20 @@ void fpsimd_init_sigctx(struct fpsimd_state *state)
fpsimd_clear_fpsr();
}
+void fpsimd_save_sigctx(struct fpsimd_state *state)
+{
+ /* dump the hardware registers to the fpsimd_state structure */
+ fpsimd_save_state(state);
+}
+
+void fpsimd_restore_sigctx(struct fpsimd_state *state)
+{
+ /* load the hardware registers from the fpsimd_state structure */
+ preempt_disable();
+ fpsimd_load_state(state);
+ preempt_enable();
+}
+
#ifdef CONFIG_KERNEL_MODE_NEON
/*
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 7ae8a1f..6796080 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -195,8 +195,7 @@ void release_thread(struct task_struct *dead_task)
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- fpsimd_save_state(¤t->thread.fpsimd_state);
- *dst = *src;
+ fpsimd_dup_task_struct(dst, src);
return 0;
}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index f2c83e8..596c8cf 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -50,8 +50,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state;
int err;
- /* dump the hardware registers to the fpsimd_state structure */
- fpsimd_save_state(fpsimd);
+ fpsimd_save_sigctx(fpsimd);
/* copy the FP and status/control registers */
err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
@@ -85,12 +84,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
__get_user_error(fpsimd.fpsr, &ctx->fpsr, err);
__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
- /* load the hardware registers from the fpsimd_state structure */
- if (!err) {
- preempt_disable();
- fpsimd_load_state(&fpsimd);
- preempt_enable();
- }
+ if (!err)
+ fpsimd_restore_sigctx(&fpsimd);
return err ? -EFAULT : 0;
}
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index e393174..4ce3768 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -247,7 +247,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
* Note that this also saves V16-31, which aren't visible
* in AArch32.
*/
- fpsimd_save_state(fpsimd);
+ fpsimd_save_sigctx(fpsimd);
/* Place structure header on the stack */
__put_user_error(magic, &frame->magic, err);
@@ -310,11 +310,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
* We don't need to touch the exception register, so
* reload the hardware state.
*/
- if (!err) {
- preempt_disable();
- fpsimd_load_state(&fpsimd);
- preempt_enable();
- }
+ if (!err)
+ fpsimd_restore_sigctx(&fpsimd);
return err ? -EFAULT : 0;
}
--
1.8.1.2
From: Jiang Liu <[email protected]>
Reuse FPSIMD hardware context if it hasn't been touched by other thread
yet, so we can get rid of unnecessary FPSIMD context restores. This is
especially useful when switching between kernel thread and user thread
because kernel thread usaually doesn't touch FPSIMD registers.
Signed-off-by: Jiang Liu <[email protected]>
Cc: Jiang Liu <[email protected]>
---
arch/arm64/include/asm/fpsimd.h | 2 ++
arch/arm64/kernel/fpsimd.c | 35 +++++++++++++++++++++++++++++++++--
arch/arm64/kernel/smp.c | 1 +
3 files changed, 36 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 142084f..4356d6e 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -35,6 +35,7 @@ struct fpsimd_state {
__uint128_t vregs[32];
u32 fpsr;
u32 fpcr;
+ int last_cpu;
};
};
};
@@ -56,6 +57,7 @@ struct task_struct;
extern void fpsimd_save_state(struct fpsimd_state *state);
extern void fpsimd_load_state(struct fpsimd_state *state);
+extern void fpsimd_reset_lazy_restore(void);
extern void fpsimd_dup_task_struct(struct task_struct *dst,
struct task_struct *src);
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index f43dd58..5e37d86 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -22,6 +22,7 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/hardirq.h>
+#include <linux/percpu.h>
#include <asm/fpsimd.h>
#include <asm/cputype.h>
@@ -33,6 +34,13 @@
#define FPEXC_IXF (1 << 4)
#define FPEXC_IDF (1 << 7)
+static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_owner);
+
+static inline void fpsimd_set_last_cpu(struct fpsimd_state *state, int cpu)
+{
+ state->last_cpu = cpu;
+}
+
static inline void fpsimd_init_hw_state(void)
{
int val = AARCH64_FPCR_DEFAULT_VAL;
@@ -84,19 +92,41 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
send_sig_info(SIGFPE, &info, current);
}
+static void fpsimd_load_state_lazy(struct fpsimd_state *state)
+{
+ /* Could we reuse the hardware context? */
+ if (state->last_cpu == smp_processor_id() &&
+ __this_cpu_read(fpsimd_owner) == state)
+ return;
+ fpsimd_load_state(state);
+}
+
+static void fpsimd_save_state_lazy(struct fpsimd_state *state)
+{
+ fpsimd_save_state(state);
+ fpsimd_set_last_cpu(state, smp_processor_id());
+ __this_cpu_write(fpsimd_owner, state);
+}
+
+void fpsimd_reset_lazy_restore(void)
+{
+ this_cpu_write(fpsimd_owner, NULL);
+}
+
void fpsimd_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
fpsimd_save_state(&src->thread.fpsimd_state);
*dst = *src;
+ fpsimd_set_last_cpu(&dst->thread.fpsimd_state, -1);
}
void fpsimd_thread_switch(struct task_struct *next)
{
/* check if not kernel threads */
if (current->mm)
- fpsimd_save_state(¤t->thread.fpsimd_state);
+ fpsimd_save_state_lazy(¤t->thread.fpsimd_state);
if (next->mm)
- fpsimd_load_state(&next->thread.fpsimd_state);
+ fpsimd_load_state_lazy(&next->thread.fpsimd_state);
}
void fpsimd_flush_thread(void)
@@ -107,6 +137,7 @@ void fpsimd_flush_thread(void)
memset(state, 0, sizeof(struct fpsimd_state));
if (AARCH64_FPCR_DEFAULT_VAL)
state->fpcr = AARCH64_FPCR_DEFAULT_VAL;
+ fpsimd_set_last_cpu(state, -1);
fpsimd_load_state(state);
preempt_enable();
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 78db90d..aae15c4 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -183,6 +183,7 @@ asmlinkage void secondary_start_kernel(void)
*/
cpu_set_reserved_ttbr0();
flush_tlb_all();
+ fpsimd_reset_lazy_restore();
preempt_disable();
trace_hardirqs_off();
--
1.8.1.2