Hi all,
This series is based on arm64 for-next/core and is also dependent
on the series at [1].
A previous version of hypervisor stack unwinding was included
in the series at [2].
This new version of the unwinder splits the unwinding and dumping
of the stack between the hypervisor and host:
- The hyperviosr unwinds its stack and dumps the address entries
into a page shared with the host.
- The host then symnolizes and prints the hyp stacktrace from
the shared page.
The new approach doesn't depend on CONFIG_NVHE_EL2_DEBUG,
and allows dumping hyp stacktraces in prodcution environments
(!CONFIG_NVHE_EL2_DEBUG).
arm64/kernel/stacktrace.c is compiled twice: stacktrace.o for the
host kernel and stacktrace.nvhe.o for the hypervisor: This allows
reusing most of the host unwinding logic in the nVHE hypervisor.
[1] https://lore.kernel.org/r/[email protected]/
[2] https://lore.kernel.org/r/[email protected]/
Thanks,
Kalesh
Kalesh Singh (4):
KVM: arm64: Compile stacktrace.nvhe.o
KVM: arm64: Add hypervisor overflow stack
KVM: arm64: Allocate shared stacktrace pages
KVM: arm64: Unwind and dump nVHE hypervisor stacktrace
arch/arm64/include/asm/kvm_asm.h | 1 +
arch/arm64/include/asm/stacktrace.h | 58 +++++++++--
arch/arm64/kernel/stacktrace.c | 151 +++++++++++++++++++++++-----
arch/arm64/kvm/arm.c | 34 +++++++
arch/arm64/kvm/handle_exit.c | 4 +
arch/arm64/kvm/hyp/nvhe/Makefile | 3 +-
arch/arm64/kvm/hyp/nvhe/host.S | 9 +-
arch/arm64/kvm/hyp/nvhe/setup.c | 11 ++
arch/arm64/kvm/hyp/nvhe/switch.c | 4 +
9 files changed, 231 insertions(+), 44 deletions(-)
--
2.36.0.rc2.479.g8af0fa9b8e-goog
Allocate and switch to 16-byte aligned secondary stack on overflow. This
provides us stack space to better handle overflows; and is used in
a subsequent patch to dump the hypervisor stacktrace.
Signed-off-by: Kalesh Singh <[email protected]>
---
arch/arm64/kernel/stacktrace.c | 3 +++
arch/arm64/kvm/hyp/nvhe/host.S | 9 ++-------
2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index a84e38d41d38..f346b4c66f1c 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -242,4 +242,7 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
unwind(task, &state, consume_entry, cookie);
}
+#else /* __KVM_NVHE_HYPERVISOR__ */
+DEFINE_PER_CPU(unsigned long [PAGE_SIZE/sizeof(long)], overflow_stack)
+ __aligned(16);
#endif /* !__KVM_NVHE_HYPERVISOR__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 09b5254fb497..1cd2de4f039e 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -179,13 +179,8 @@ SYM_FUNC_END(__host_hvc)
b hyp_panic
.L__hyp_sp_overflow\@:
- /*
- * Reset SP to the top of the stack, to allow handling the hyp_panic.
- * This corrupts the stack but is ok, since we won't be attempting
- * any unwinding here.
- */
- ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1
- mov sp, x0
+ /* Switch to the overflow stack */
+ adr_this_cpu sp, overflow_stack + PAGE_SIZE, x0
b hyp_panic_bad_stack
ASM_BUG()
--
2.36.0.rc2.479.g8af0fa9b8e-goog
On hyp_panic(), the hypervisor dumps the addresses for its stacktrace
entries to a page shared with the host. The host then symbolizes and
prints the hyp stacktrace before panicking itself.
Example stacktrace:
[ 122.051187] kvm [380]: Invalid host exception to nVHE hyp!
[ 122.052467] kvm [380]: nVHE HYP call trace:
[ 122.052814] kvm [380]: [<ffff800008f5b550>] __kvm_nvhe___pkvm_vcpu_init_traps+0x1f0/0x1f0
[ 122.053865] kvm [380]: [<ffff800008f560f0>] __kvm_nvhe_hyp_panic+0x130/0x1c0
[ 122.054367] kvm [380]: [<ffff800008f56190>] __kvm_nvhe___kvm_vcpu_run+0x10/0x10
[ 122.054878] kvm [380]: [<ffff800008f57a40>] __kvm_nvhe_handle___kvm_vcpu_run+0x30/0x50
[ 122.055412] kvm [380]: [<ffff800008f57d2c>] __kvm_nvhe_handle_trap+0xbc/0x160
[ 122.055911] kvm [380]: [<ffff800008f56864>] __kvm_nvhe___host_exit+0x64/0x64
[ 122.056417] kvm [380]: ---- end of nVHE HYP call trace ----
Signed-off-by: Kalesh Singh <[email protected]>
---
arch/arm64/include/asm/stacktrace.h | 42 ++++++++++++++--
arch/arm64/kernel/stacktrace.c | 75 +++++++++++++++++++++++++++++
arch/arm64/kvm/handle_exit.c | 4 ++
arch/arm64/kvm/hyp/nvhe/switch.c | 4 ++
4 files changed, 121 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index f5af9a94c5a6..3063912107b0 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -5,6 +5,7 @@
#ifndef __ASM_STACKTRACE_H
#define __ASM_STACKTRACE_H
+#include <linux/kvm_host.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
@@ -19,10 +20,12 @@ enum stack_type {
#ifndef __KVM_NVHE_HYPERVISOR__
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
- STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
+#else /* __KVM_NVHE_HYPERVISOR__ */
+ STACK_TYPE_HYP,
#endif /* !__KVM_NVHE_HYPERVISOR__ */
+ STACK_TYPE_OVERFLOW,
STACK_TYPE_UNKNOWN,
__NR_STACK_TYPES
};
@@ -55,6 +58,9 @@ static inline bool on_stack(unsigned long sp, unsigned long size,
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
const char *loglvl);
+extern void hyp_dump_backtrace(unsigned long hyp_offset);
+
+DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stacktrace_page);
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
static inline bool on_irq_stack(unsigned long sp, unsigned long size,
@@ -91,8 +97,32 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info) { return false; }
#endif
-#endif /* !__KVM_NVHE_HYPERVISOR__ */
+#else /* __KVM_NVHE_HYPERVISOR__ */
+
+extern void hyp_save_backtrace(void);
+
+DECLARE_PER_CPU(unsigned long [PAGE_SIZE/sizeof(long)], overflow_stack);
+DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
+
+static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
+ struct stack_info *info)
+{
+ unsigned long low = (unsigned long)this_cpu_ptr(overflow_stack);
+ unsigned long high = low + PAGE_SIZE;
+
+ return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
+}
+
+static inline bool on_hyp_stack(unsigned long sp, unsigned long size,
+ struct stack_info *info)
+{
+ struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
+ unsigned long high = params->stack_hyp_va;
+ unsigned long low = high - PAGE_SIZE;
+ return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
+}
+#endif /* !__KVM_NVHE_HYPERVISOR__ */
/*
* We can only safely access per-cpu stacks from current in a non-preemptible
@@ -105,6 +135,9 @@ static inline bool on_accessible_stack(const struct task_struct *tsk,
if (info)
info->type = STACK_TYPE_UNKNOWN;
+ if (on_overflow_stack(sp, size, info))
+ return true;
+
#ifndef __KVM_NVHE_HYPERVISOR__
if (on_task_stack(tsk, sp, size, info))
return true;
@@ -112,10 +145,11 @@ static inline bool on_accessible_stack(const struct task_struct *tsk,
return false;
if (on_irq_stack(sp, size, info))
return true;
- if (on_overflow_stack(sp, size, info))
- return true;
if (on_sdei_stack(sp, size, info))
return true;
+#else /* __KVM_NVHE_HYPERVISOR__ */
+ if (on_hyp_stack(sp, size, info))
+ return true;
#endif /* !__KVM_NVHE_HYPERVISOR__ */
return false;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index f346b4c66f1c..c81dea9760ac 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -104,6 +104,7 @@ static int notrace __unwind_next(struct task_struct *tsk,
*
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+ * HYP -> OVERFLOW
*
* ... but the nesting itself is strict. Once we transition from one
* stack to another, it's never valid to unwind back to that first
@@ -242,7 +243,81 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
unwind(task, &state, consume_entry, cookie);
}
+
+/**
+ * Symbolizes and dumps the hypervisor backtrace from the shared
+ * stacktrace page.
+ */
+noinline notrace void hyp_dump_backtrace(unsigned long hyp_offset)
+{
+ unsigned long *stacktrace_pos =
+ (unsigned long *)*this_cpu_ptr(&kvm_arm_hyp_stacktrace_page);
+ unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0);
+ unsigned long pc = *stacktrace_pos++;
+
+ kvm_err("nVHE HYP call trace:\n");
+
+ while (pc) {
+ pc &= va_mask; /* Mask tags */
+ pc += hyp_offset; /* Convert to kern addr */
+ kvm_err("[<%016lx>] %pB\n", pc, (void *)pc);
+ pc = *stacktrace_pos++;
+ }
+
+ kvm_err("---- end of nVHE HYP call trace ----\n");
+}
#else /* __KVM_NVHE_HYPERVISOR__ */
DEFINE_PER_CPU(unsigned long [PAGE_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
+
+static int notrace unwind_next(struct task_struct *tsk,
+ struct unwind_state *state)
+{
+ struct stack_info info;
+
+ return __unwind_next(tsk, state, &info);
+}
+
+/**
+ * Saves a hypervisor stacktrace entry (address) to the shared stacktrace page.
+ */
+static bool hyp_save_backtrace_entry(void *arg, unsigned long where)
+{
+ struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
+ unsigned long **stacktrace_pos = (unsigned long **)arg;
+ unsigned long stacktrace_start, stacktrace_end;
+
+ stacktrace_start = (unsigned long)params->stacktrace_hyp_va;
+ stacktrace_end = stacktrace_start + PAGE_SIZE - (2 * sizeof(long));
+
+ if ((unsigned long) *stacktrace_pos > stacktrace_end)
+ return false;
+
+ /* Save the entry to the current pos in stacktrace page */
+ **stacktrace_pos = where;
+
+ /* A zero entry delimits the end of the stacktrace. */
+ *(*stacktrace_pos + 1) = 0UL;
+
+ /* Increment the current pos */
+ ++*stacktrace_pos;
+
+ return true;
+}
+
+/**
+ * Saves hypervisor stacktrace to the shared stacktrace page.
+ */
+noinline notrace void hyp_save_backtrace(void)
+{
+ struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
+ void *stacktrace_start = (void *)params->stacktrace_hyp_va;
+ struct unwind_state state;
+
+ unwind_init(&state, (unsigned long)__builtin_frame_address(0),
+ _THIS_IP_);
+
+ unwind(NULL, &state, hyp_save_backtrace_entry, &stacktrace_start);
+}
+
#endif /* !__KVM_NVHE_HYPERVISOR__ */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index a377b871bf58..ee5adc9bdb8c 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -17,6 +17,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/debug-monitors.h>
+#include <asm/stacktrace.h>
#include <asm/traps.h>
#include <kvm/arm_hypercalls.h>
@@ -323,6 +324,9 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
(void *)panic_addr);
}
+ /* Dump the hypervisor stacktrace */
+ hyp_dump_backtrace(hyp_offset);
+
/*
* Hyp has panicked and we're going to handle that by panicking the
* kernel. The kernel offset will be revealed in the panic so we're
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 978f1b94fb25..95d810e86c7d 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -25,6 +25,7 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
+#include <asm/stacktrace.h>
#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
@@ -395,6 +396,9 @@ asmlinkage void __noreturn hyp_panic(void)
__sysreg_restore_state_nvhe(host_ctxt);
}
+ /* Save the hypervisor stacktrace */
+ hyp_save_backtrace();
+
__hyp_do_panic(host_ctxt, spsr, elr, par);
unreachable();
}
--
2.36.0.rc2.479.g8af0fa9b8e-goog
The nVHE hypervisor can use this shared area to dump its stacktrace
addresses on hyp_panic(). Symbolization and printing the stacktrace can
then be handled by the host in EL1 (done in a later patch in this series).
Signed-off-by: Kalesh Singh <[email protected]>
---
arch/arm64/include/asm/kvm_asm.h | 1 +
arch/arm64/kvm/arm.c | 34 ++++++++++++++++++++++++++++++++
arch/arm64/kvm/hyp/nvhe/setup.c | 11 +++++++++++
3 files changed, 46 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 2e277f2ed671..ad31ac68264f 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -174,6 +174,7 @@ struct kvm_nvhe_init_params {
unsigned long hcr_el2;
unsigned long vttbr;
unsigned long vtcr;
+ unsigned long stacktrace_hyp_va;
};
/* Translate a kernel address @ptr into its equivalent linear mapping */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index dd257d9f21a2..1b21d5a99bfc 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -50,6 +50,7 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stacktrace_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
@@ -1483,6 +1484,7 @@ static void cpu_prepare_hyp_mode(int cpu)
tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
params->tcr_el2 = tcr;
+ params->stacktrace_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stacktrace_page, cpu));
params->pgd_pa = kvm_mmu_get_httbr();
if (is_protected_kvm_enabled())
params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
@@ -1776,6 +1778,7 @@ static void teardown_hyp_mode(void)
free_hyp_pgds();
for_each_possible_cpu(cpu) {
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+ free_page(per_cpu(kvm_arm_hyp_stacktrace_page, cpu));
free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order());
}
}
@@ -1867,6 +1870,23 @@ static int init_hyp_mode(void)
per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
}
+ /*
+ * Allocate stacktrace pages for Hypervisor-mode.
+ * This is used by the hypervisor to share its stacktrace
+ * with the host on a hyp_panic().
+ */
+ for_each_possible_cpu(cpu) {
+ unsigned long stacktrace_page;
+
+ stacktrace_page = __get_free_page(GFP_KERNEL);
+ if (!stacktrace_page) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ per_cpu(kvm_arm_hyp_stacktrace_page, cpu) = stacktrace_page;
+ }
+
/*
* Allocate and initialize pages for Hypervisor-mode percpu regions.
*/
@@ -1974,6 +1994,20 @@ static int init_hyp_mode(void)
params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
+ /*
+ * Map the hyp stacktrace pages.
+ */
+ for_each_possible_cpu(cpu) {
+ char *stacktrace_page = (char *)per_cpu(kvm_arm_hyp_stacktrace_page, cpu);
+
+ err = create_hyp_mappings(stacktrace_page, stacktrace_page + PAGE_SIZE,
+ PAGE_HYP);
+ if (err) {
+ kvm_err("Cannot map hyp stacktrace page\n");
+ goto out_err;
+ }
+ }
+
for_each_possible_cpu(cpu) {
char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu];
char *percpu_end = percpu_begin + nvhe_percpu_size();
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index e8d4ea2fcfa0..9b81bf2d40d7 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -135,6 +135,17 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
/* Update stack_hyp_va to end of the stack's private VA range */
params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
+
+ /*
+ * Map the stacktrace pages as shared and transfer ownership to
+ * the hypervisor.
+ */
+ prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_OWNED);
+ start = (void *)params->stacktrace_hyp_va;
+ end = start + PAGE_SIZE;
+ ret = pkvm_create_mappings(start, end, prot);
+ if (ret)
+ return ret;
}
/*
--
2.36.0.rc2.479.g8af0fa9b8e-goog
Recompile stack unwinding code for use with the nVHE hypervisor. This is
a preparatory patch that will allow reusing most of the kernel unwinding
logic in the nVHE hypervisor.
Suggested-by: Mark Rutland <[email protected]>
Signed-off-by: Kalesh Singh <[email protected]>
---
arch/arm64/include/asm/stacktrace.h | 18 ++++---
arch/arm64/kernel/stacktrace.c | 73 ++++++++++++++++++-----------
arch/arm64/kvm/hyp/nvhe/Makefile | 3 +-
3 files changed, 60 insertions(+), 34 deletions(-)
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index aec9315bf156..f5af9a94c5a6 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -16,12 +16,14 @@
#include <asm/sdei.h>
enum stack_type {
- STACK_TYPE_UNKNOWN,
+#ifndef __KVM_NVHE_HYPERVISOR__
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
+#endif /* !__KVM_NVHE_HYPERVISOR__ */
+ STACK_TYPE_UNKNOWN,
__NR_STACK_TYPES
};
@@ -31,11 +33,6 @@ struct stack_info {
enum stack_type type;
};
-extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
- const char *loglvl);
-
-DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
-
static inline bool on_stack(unsigned long sp, unsigned long size,
unsigned long low, unsigned long high,
enum stack_type type, struct stack_info *info)
@@ -54,6 +51,12 @@ static inline bool on_stack(unsigned long sp, unsigned long size,
return true;
}
+#ifndef __KVM_NVHE_HYPERVISOR__
+extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
+ const char *loglvl);
+
+DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
+
static inline bool on_irq_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
@@ -88,6 +91,7 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info) { return false; }
#endif
+#endif /* !__KVM_NVHE_HYPERVISOR__ */
/*
@@ -101,6 +105,7 @@ static inline bool on_accessible_stack(const struct task_struct *tsk,
if (info)
info->type = STACK_TYPE_UNKNOWN;
+#ifndef __KVM_NVHE_HYPERVISOR__
if (on_task_stack(tsk, sp, size, info))
return true;
if (tsk != current || preemptible())
@@ -111,6 +116,7 @@ static inline bool on_accessible_stack(const struct task_struct *tsk,
return true;
if (on_sdei_stack(sp, size, info))
return true;
+#endif /* !__KVM_NVHE_HYPERVISOR__ */
return false;
}
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 0467cb79f080..a84e38d41d38 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -81,23 +81,19 @@ NOKPROBE_SYMBOL(unwind_init);
* records (e.g. a cycle), determined based on the location and fp value of A
* and the location (but not the fp value) of B.
*/
-static int notrace unwind_next(struct task_struct *tsk,
- struct unwind_state *state)
+static int notrace __unwind_next(struct task_struct *tsk,
+ struct unwind_state *state,
+ struct stack_info *info)
{
unsigned long fp = state->fp;
- struct stack_info info;
-
- /* Final frame; nothing to unwind */
- if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
- return -ENOENT;
if (fp & 0x7)
return -EINVAL;
- if (!on_accessible_stack(tsk, fp, 16, &info))
+ if (!on_accessible_stack(tsk, fp, 16, info))
return -EINVAL;
- if (test_bit(info.type, state->stacks_done))
+ if (test_bit(info->type, state->stacks_done))
return -EINVAL;
/*
@@ -113,7 +109,7 @@ static int notrace unwind_next(struct task_struct *tsk,
* stack to another, it's never valid to unwind back to that first
* stack.
*/
- if (info.type == state->prev_type) {
+ if (info->type == state->prev_type) {
if (fp <= state->prev_fp)
return -EINVAL;
} else {
@@ -127,7 +123,45 @@ static int notrace unwind_next(struct task_struct *tsk,
state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
state->prev_fp = fp;
- state->prev_type = info.type;
+ state->prev_type = info->type;
+
+ return 0;
+}
+NOKPROBE_SYMBOL(__unwind_next);
+
+static int notrace unwind_next(struct task_struct *tsk,
+ struct unwind_state *state);
+
+static void notrace unwind(struct task_struct *tsk,
+ struct unwind_state *state,
+ stack_trace_consume_fn consume_entry, void *cookie)
+{
+ while (1) {
+ int ret;
+
+ if (!consume_entry(cookie, state->pc))
+ break;
+ ret = unwind_next(tsk, state);
+ if (ret < 0)
+ break;
+ }
+}
+NOKPROBE_SYMBOL(unwind);
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+static int notrace unwind_next(struct task_struct *tsk,
+ struct unwind_state *state)
+{
+ struct stack_info info;
+ int err;
+
+ /* Final frame; nothing to unwind */
+ if (state->fp == (unsigned long)task_pt_regs(tsk)->stackframe)
+ return -ENOENT;
+
+ err = __unwind_next(tsk, state, &info);
+ if (err)
+ return err;
state->pc = ptrauth_strip_insn_pac(state->pc);
@@ -157,22 +191,6 @@ static int notrace unwind_next(struct task_struct *tsk,
}
NOKPROBE_SYMBOL(unwind_next);
-static void notrace unwind(struct task_struct *tsk,
- struct unwind_state *state,
- stack_trace_consume_fn consume_entry, void *cookie)
-{
- while (1) {
- int ret;
-
- if (!consume_entry(cookie, state->pc))
- break;
- ret = unwind_next(tsk, state);
- if (ret < 0)
- break;
- }
-}
-NOKPROBE_SYMBOL(unwind);
-
static bool dump_backtrace_entry(void *arg, unsigned long where)
{
char *loglvl = arg;
@@ -224,3 +242,4 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
unwind(task, &state, consume_entry, cookie);
}
+#endif /* !__KVM_NVHE_HYPERVISOR__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index f9fe4dc21b1f..c0ff0d6fc403 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -14,7 +14,8 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
- cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
+ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o \
+ ../../../kernel/stacktrace.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
obj-$(CONFIG_DEBUG_LIST) += list_debug.o
--
2.36.0.rc2.479.g8af0fa9b8e-goog
On Wed, Apr 27, 2022 at 11:46:59AM -0700, Kalesh Singh wrote:
> On hyp_panic(), the hypervisor dumps the addresses for its stacktrace
> entries to a page shared with the host. The host then symbolizes and
> prints the hyp stacktrace before panicking itself.
>
> Example stacktrace:
>
> [ 122.051187] kvm [380]: Invalid host exception to nVHE hyp!
> [ 122.052467] kvm [380]: nVHE HYP call trace:
> [ 122.052814] kvm [380]: [<ffff800008f5b550>] __kvm_nvhe___pkvm_vcpu_init_traps+0x1f0/0x1f0
> [ 122.053865] kvm [380]: [<ffff800008f560f0>] __kvm_nvhe_hyp_panic+0x130/0x1c0
> [ 122.054367] kvm [380]: [<ffff800008f56190>] __kvm_nvhe___kvm_vcpu_run+0x10/0x10
> [ 122.054878] kvm [380]: [<ffff800008f57a40>] __kvm_nvhe_handle___kvm_vcpu_run+0x30/0x50
> [ 122.055412] kvm [380]: [<ffff800008f57d2c>] __kvm_nvhe_handle_trap+0xbc/0x160
> [ 122.055911] kvm [380]: [<ffff800008f56864>] __kvm_nvhe___host_exit+0x64/0x64
> [ 122.056417] kvm [380]: ---- end of nVHE HYP call trace ----
This will be really helpful!
Reviewed-by: Mark Brown <[email protected]>
On Wed, Apr 27, 2022 at 11:46:56AM -0700, Kalesh Singh wrote:
> Recompile stack unwinding code for use with the nVHE hypervisor. This is
> a preparatory patch that will allow reusing most of the kernel unwinding
> logic in the nVHE hypervisor.
This is substantially more than just the build change that the changelog
would seem to indicate... it would I think be clearer to split this up
further with the code changes separated out and explained a bit more.
It's not just recompling the code for nVHE, there's also refactoring to
split out changes that don't apply in nVHE hypervisor like all the task
related code which is needed but not mentioned in the changlog at all.
Possibly a patch or two for the code motion then a separate patch for
the ifdefs and build changes?
I *think* the code is all fine but I'd need to go through it a few more
times to be sure I didn't miss anything.
On Fri, Apr 29, 2022 at 5:47 AM Mark Brown <[email protected]> wrote:
>
> On Wed, Apr 27, 2022 at 11:46:56AM -0700, Kalesh Singh wrote:
>
> > Recompile stack unwinding code for use with the nVHE hypervisor. This is
> > a preparatory patch that will allow reusing most of the kernel unwinding
> > logic in the nVHE hypervisor.
>
> This is substantially more than just the build change that the changelog
> would seem to indicate... it would I think be clearer to split this up
> further with the code changes separated out and explained a bit more.
> It's not just recompling the code for nVHE, there's also refactoring to
> split out changes that don't apply in nVHE hypervisor like all the task
> related code which is needed but not mentioned in the changlog at all.
> Possibly a patch or two for the code motion then a separate patch for
> the ifdefs and build changes?
Hi Mark,
Thank you for reviewing. I agree - will split this into more
incremental patches in the next version.
Thanks,
Kalesh
>
> I *think* the code is all fine but I'd need to go through it a few more
> times to be sure I didn't miss anything.