Issue: In context switch, stack of next task (kernel thread)
is not mapped in previous task PGD.
Issue faced while porting VMAP stack on ARM.
currently forcible mapping is done in case of switching mm, but if
next task is kernel thread then it can cause issue.
Thus Map stack of next task in prev if next task is kernel thread,
as kernel thread will use mm of prev task.
"Since we don't have reproducible setup for x86,
changes verified on ARM. So not sure about arch specific handling
for X86"
Signed-off-by: Vaneet Narang <[email protected]>
Signed-off-by: Maninder Singh <[email protected]>
---
arch/x86/mm/tlb.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e6a9edc5baaf..28328cf8e79c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -161,11 +161,17 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
local_irq_restore(flags);
}
-static void sync_current_stack_to_mm(struct mm_struct *mm)
+static void sync_stack_to_mm(struct mm_struct *mm, struct task_struct *tsk)
{
- unsigned long sp = current_stack_pointer;
- pgd_t *pgd = pgd_offset(mm, sp);
+ unsigned long sp;
+ pgd_t *pgd;
+ if (!tsk)
+ sp = current_stack_pointer;
+ else
+ sp = (unsigned long)tsk->stack;
+
+ pgd = pgd_offset(mm, sp);
if (pgtable_l5_enabled()) {
if (unlikely(pgd_none(*pgd))) {
pgd_t *pgd_ref = pgd_offset_k(sp);
@@ -383,7 +389,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* mapped in the new pgd, we'll double-fault. Forcibly
* map it.
*/
- sync_current_stack_to_mm(next);
+ sync_stack_to_mm(next, NULL);
}
/*
@@ -460,6 +466,15 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
*/
void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ /*
+ * If tsk stack is in vmalloc space and isn't
+ * mapped in the new pgd, we'll double-fault. Forcibly
+ * map it.
+ */
+ sync_stack_to_mm(mm, tsk);
+ }
+
if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
return;
--
2.17.1
On Mon, Nov 18, 2019 at 9:44 PM Maninder Singh <[email protected]> wrote:
>
> Issue: In context switch, stack of next task (kernel thread)
> is not mapped in previous task PGD.
>
> Issue faced while porting VMAP stack on ARM.
> currently forcible mapping is done in case of switching mm, but if
> next task is kernel thread then it can cause issue.
>
> Thus Map stack of next task in prev if next task is kernel thread,
> as kernel thread will use mm of prev task.
>
> "Since we don't have reproducible setup for x86,
> changes verified on ARM. So not sure about arch specific handling
> for X86"
I think the code is correct without your patch and is wrong with your
patch. See below.
>
> Signed-off-by: Vaneet Narang <[email protected]>
> Signed-off-by: Maninder Singh <[email protected]>
> ---
> arch/x86/mm/tlb.c | 23 +++++++++++++++++++----
> 1 file changed, 19 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index e6a9edc5baaf..28328cf8e79c 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -161,11 +161,17 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> local_irq_restore(flags);
> }
>
> -static void sync_current_stack_to_mm(struct mm_struct *mm)
> +static void sync_stack_to_mm(struct mm_struct *mm, struct task_struct *tsk)
> {
> - unsigned long sp = current_stack_pointer;
> - pgd_t *pgd = pgd_offset(mm, sp);
> + unsigned long sp;
> + pgd_t *pgd;
>
> + if (!tsk)
> + sp = current_stack_pointer;
> + else
> + sp = (unsigned long)tsk->stack;
> +
> + pgd = pgd_offset(mm, sp);
> if (pgtable_l5_enabled()) {
> if (unlikely(pgd_none(*pgd))) {
> pgd_t *pgd_ref = pgd_offset_k(sp);
> @@ -383,7 +389,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
> * mapped in the new pgd, we'll double-fault. Forcibly
> * map it.
> */
> - sync_current_stack_to_mm(next);
> + sync_stack_to_mm(next, NULL);
If we set CR3 to point to the next mm's PGD and then touch the current
stack, we'll double-fault. So we need to sync the *current* stack,
not the next stack.
The code in prepare_switch_to() makes sure that the next task's stack is synced.
> }
>
> /*
> @@ -460,6 +466,15 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
> */
> void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
> {
> + if (IS_ENABLED(CONFIG_VMAP_STACK)) {
> + /*
> + * If tsk stack is in vmalloc space and isn't
> + * mapped in the new pgd, we'll double-fault. Forcibly
> + * map it.
> + */
> + sync_stack_to_mm(mm, tsk);
> + }
> +
I don't think this is necessary, since prepare_switch_to() already
does what's needed.