2022-06-07 17:19:34

by Kalesh Singh

[permalink] [raw]
Subject: [PATCH v3 5/5] KVM: arm64: Unwind and dump nVHE hypervisor stacktrace

On hyp_panic(), the hypervisor dumps the addresses for its stacktrace
entries to a page shared with the host. The host then symbolizes and
prints the hyp stacktrace before panicking itself.

Example stacktrace:

[ 122.051187] kvm [380]: Invalid host exception to nVHE hyp!
[ 122.052467] kvm [380]: nVHE HYP call trace:
[ 122.052814] kvm [380]: [<ffff800008f5b550>] __kvm_nvhe___pkvm_vcpu_init_traps+0x1f0/0x1f0
[ 122.053865] kvm [380]: [<ffff800008f560f0>] __kvm_nvhe_hyp_panic+0x130/0x1c0
[ 122.054367] kvm [380]: [<ffff800008f56190>] __kvm_nvhe___kvm_vcpu_run+0x10/0x10
[ 122.054878] kvm [380]: [<ffff800008f57a40>] __kvm_nvhe_handle___kvm_vcpu_run+0x30/0x50
[ 122.055412] kvm [380]: [<ffff800008f57d2c>] __kvm_nvhe_handle_trap+0xbc/0x160
[ 122.055911] kvm [380]: [<ffff800008f56864>] __kvm_nvhe___host_exit+0x64/0x64
[ 122.056417] kvm [380]: ---- end of nVHE HYP call trace ----

Signed-off-by: Kalesh Singh <[email protected]>
Reviewed-by: Mark Brown <[email protected]>
---

Changes in v2:
- Add Mark's Reviewed-by tag

arch/arm64/include/asm/stacktrace.h | 42 ++++++++++++++--
arch/arm64/kernel/stacktrace.c | 75 +++++++++++++++++++++++++++++
arch/arm64/kvm/handle_exit.c | 4 ++
arch/arm64/kvm/hyp/nvhe/switch.c | 4 ++
4 files changed, 121 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index f5af9a94c5a6..3063912107b0 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -5,6 +5,7 @@
#ifndef __ASM_STACKTRACE_H
#define __ASM_STACKTRACE_H

+#include <linux/kvm_host.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
@@ -19,10 +20,12 @@ enum stack_type {
#ifndef __KVM_NVHE_HYPERVISOR__
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
- STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
+#else /* __KVM_NVHE_HYPERVISOR__ */
+ STACK_TYPE_HYP,
#endif /* !__KVM_NVHE_HYPERVISOR__ */
+ STACK_TYPE_OVERFLOW,
STACK_TYPE_UNKNOWN,
__NR_STACK_TYPES
};
@@ -55,6 +58,9 @@ static inline bool on_stack(unsigned long sp, unsigned long size,
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
const char *loglvl);

+extern void hyp_dump_backtrace(unsigned long hyp_offset);
+
+DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stacktrace_page);
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);

static inline bool on_irq_stack(unsigned long sp, unsigned long size,
@@ -91,8 +97,32 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info) { return false; }
#endif
-#endif /* !__KVM_NVHE_HYPERVISOR__ */
+#else /* __KVM_NVHE_HYPERVISOR__ */
+
+extern void hyp_save_backtrace(void);
+
+DECLARE_PER_CPU(unsigned long [PAGE_SIZE/sizeof(long)], overflow_stack);
+DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
+
+static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
+ struct stack_info *info)
+{
+ unsigned long low = (unsigned long)this_cpu_ptr(overflow_stack);
+ unsigned long high = low + PAGE_SIZE;
+
+ return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
+}
+
+static inline bool on_hyp_stack(unsigned long sp, unsigned long size,
+ struct stack_info *info)
+{
+ struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
+ unsigned long high = params->stack_hyp_va;
+ unsigned long low = high - PAGE_SIZE;

+ return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
+}
+#endif /* !__KVM_NVHE_HYPERVISOR__ */

/*
* We can only safely access per-cpu stacks from current in a non-preemptible
@@ -105,6 +135,9 @@ static inline bool on_accessible_stack(const struct task_struct *tsk,
if (info)
info->type = STACK_TYPE_UNKNOWN;

+ if (on_overflow_stack(sp, size, info))
+ return true;
+
#ifndef __KVM_NVHE_HYPERVISOR__
if (on_task_stack(tsk, sp, size, info))
return true;
@@ -112,10 +145,11 @@ static inline bool on_accessible_stack(const struct task_struct *tsk,
return false;
if (on_irq_stack(sp, size, info))
return true;
- if (on_overflow_stack(sp, size, info))
- return true;
if (on_sdei_stack(sp, size, info))
return true;
+#else /* __KVM_NVHE_HYPERVISOR__ */
+ if (on_hyp_stack(sp, size, info))
+ return true;
#endif /* !__KVM_NVHE_HYPERVISOR__ */

return false;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index f346b4c66f1c..c81dea9760ac 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -104,6 +104,7 @@ static int notrace __unwind_next(struct task_struct *tsk,
*
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+ * HYP -> OVERFLOW
*
* ... but the nesting itself is strict. Once we transition from one
* stack to another, it's never valid to unwind back to that first
@@ -242,7 +243,81 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,

unwind(task, &state, consume_entry, cookie);
}
+
+/**
+ * Symbolizes and dumps the hypervisor backtrace from the shared
+ * stacktrace page.
+ */
+noinline notrace void hyp_dump_backtrace(unsigned long hyp_offset)
+{
+ unsigned long *stacktrace_pos =
+ (unsigned long *)*this_cpu_ptr(&kvm_arm_hyp_stacktrace_page);
+ unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0);
+ unsigned long pc = *stacktrace_pos++;
+
+ kvm_err("nVHE HYP call trace:\n");
+
+ while (pc) {
+ pc &= va_mask; /* Mask tags */
+ pc += hyp_offset; /* Convert to kern addr */
+ kvm_err("[<%016lx>] %pB\n", pc, (void *)pc);
+ pc = *stacktrace_pos++;
+ }
+
+ kvm_err("---- end of nVHE HYP call trace ----\n");
+}
#else /* __KVM_NVHE_HYPERVISOR__ */
DEFINE_PER_CPU(unsigned long [PAGE_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
+
+static int notrace unwind_next(struct task_struct *tsk,
+ struct unwind_state *state)
+{
+ struct stack_info info;
+
+ return __unwind_next(tsk, state, &info);
+}
+
+/**
+ * Saves a hypervisor stacktrace entry (address) to the shared stacktrace page.
+ */
+static bool hyp_save_backtrace_entry(void *arg, unsigned long where)
+{
+ struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
+ unsigned long **stacktrace_pos = (unsigned long **)arg;
+ unsigned long stacktrace_start, stacktrace_end;
+
+ stacktrace_start = (unsigned long)params->stacktrace_hyp_va;
+ stacktrace_end = stacktrace_start + PAGE_SIZE - (2 * sizeof(long));
+
+ if ((unsigned long) *stacktrace_pos > stacktrace_end)
+ return false;
+
+ /* Save the entry to the current pos in stacktrace page */
+ **stacktrace_pos = where;
+
+ /* A zero entry delimits the end of the stacktrace. */
+ *(*stacktrace_pos + 1) = 0UL;
+
+ /* Increment the current pos */
+ ++*stacktrace_pos;
+
+ return true;
+}
+
+/**
+ * Saves hypervisor stacktrace to the shared stacktrace page.
+ */
+noinline notrace void hyp_save_backtrace(void)
+{
+ struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
+ void *stacktrace_start = (void *)params->stacktrace_hyp_va;
+ struct unwind_state state;
+
+ unwind_init(&state, (unsigned long)__builtin_frame_address(0),
+ _THIS_IP_);
+
+ unwind(NULL, &state, hyp_save_backtrace_entry, &stacktrace_start);
+}
+
#endif /* !__KVM_NVHE_HYPERVISOR__ */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index f66c0142b335..96c5dc5529a1 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -17,6 +17,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/debug-monitors.h>
+#include <asm/stacktrace.h>
#include <asm/traps.h>

#include <kvm/arm_hypercalls.h>
@@ -353,6 +354,9 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
(void *)panic_addr);
}

+ /* Dump the hypervisor stacktrace */
+ hyp_dump_backtrace(hyp_offset);
+
/*
* Hyp has panicked and we're going to handle that by panicking the
* kernel. The kernel offset will be revealed in the panic so we're
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 6db801db8f27..add157f8e3f3 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -25,6 +25,7 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
+#include <asm/stacktrace.h>

#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
@@ -375,6 +376,9 @@ asmlinkage void __noreturn hyp_panic(void)
__sysreg_restore_state_nvhe(host_ctxt);
}

+ /* Save the hypervisor stacktrace */
+ hyp_save_backtrace();
+
__hyp_do_panic(host_ctxt, spsr, elr, par);
unreachable();
}
--
2.36.1.255.ge46751e96f-goog


2022-06-13 07:27:11

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] KVM: arm64: Unwind and dump nVHE hypervisor stacktrace

Hi Kalesh,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on f2906aa863381afb0015a9eb7fefad885d4e5a56]

url: https://github.com/intel-lab-lkp/linux/commits/Kalesh-Singh/KVM-nVHE-Hypervisor-stack-unwinder/20220608-011351
base: f2906aa863381afb0015a9eb7fefad885d4e5a56
config: arm64-randconfig-c024-20220612 (https://download.01.org/0day-ci/archive/20220613/[email protected]/config)
compiler: aarch64-linux-gcc (GCC) 11.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/ac1ce397ffe5b05df06cdb56a30db4099c7428ec
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Kalesh-Singh/KVM-nVHE-Hypervisor-stack-unwinder/20220608-011351
git checkout ac1ce397ffe5b05df06cdb56a30db4099c7428ec
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=arm64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

aarch64-linux-ld: arch/arm64/kernel/stacktrace.o: in function `hyp_dump_backtrace':
>> arch/arm64/kernel/stacktrace.c:254: undefined reference to `kvm_arm_hyp_stacktrace_page'
aarch64-linux-ld: arch/arm64/kernel/stacktrace.o: relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `kvm_arm_hyp_stacktrace_page' which may bind externally can not be used when making a shared object; recompile with -fPIC
arch/arm64/kernel/stacktrace.c:254:(.text+0x634): dangerous relocation: unsupported relocation
>> aarch64-linux-ld: arch/arm64/kernel/stacktrace.c:254: undefined reference to `kvm_arm_hyp_stacktrace_page'
pahole: .tmp_vmlinux.btf: No such file or directory
.btf.vmlinux.bin.o: file not recognized: file format not recognized


vim +254 arch/arm64/kernel/stacktrace.c

246
247 /**
248 * Symbolizes and dumps the hypervisor backtrace from the shared
249 * stacktrace page.
250 */
251 noinline notrace void hyp_dump_backtrace(unsigned long hyp_offset)
252 {
253 unsigned long *stacktrace_pos =
> 254 (unsigned long *)*this_cpu_ptr(&kvm_arm_hyp_stacktrace_page);
255 unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0);
256 unsigned long pc = *stacktrace_pos++;
257
258 kvm_err("nVHE HYP call trace:\n");
259
260 while (pc) {
261 pc &= va_mask; /* Mask tags */
262 pc += hyp_offset; /* Convert to kern addr */
263 kvm_err("[<%016lx>] %pB\n", pc, (void *)pc);
264 pc = *stacktrace_pos++;
265 }
266
267 kvm_err("---- end of nVHE HYP call trace ----\n");
268 }
269 #else /* __KVM_NVHE_HYPERVISOR__ */
270 DEFINE_PER_CPU(unsigned long [PAGE_SIZE/sizeof(long)], overflow_stack)
271 __aligned(16);
272

--
0-DAY CI Kernel Test Service
https://01.org/lkp

2022-06-14 20:32:00

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] KVM: arm64: Unwind and dump nVHE hypervisor stacktrace

Hi Kalesh,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on f2906aa863381afb0015a9eb7fefad885d4e5a56]

url: https://github.com/intel-lab-lkp/linux/commits/Kalesh-Singh/KVM-nVHE-Hypervisor-stack-unwinder/20220608-011351
base: f2906aa863381afb0015a9eb7fefad885d4e5a56
config: arm64-buildonly-randconfig-r003-20220613 (https://download.01.org/0day-ci/archive/20220615/[email protected]/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project c97436f8b6e2718286e8496faf53a2c800e281cf)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm64 cross compiling tool for clang build
# apt-get install binutils-aarch64-linux-gnu
# https://github.com/intel-lab-lkp/linux/commit/ac1ce397ffe5b05df06cdb56a30db4099c7428ec
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Kalesh-Singh/KVM-nVHE-Hypervisor-stack-unwinder/20220608-011351
git checkout ac1ce397ffe5b05df06cdb56a30db4099c7428ec
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

>> ld.lld: error: undefined symbol: kvm_arm_hyp_stacktrace_page
>>> referenced by stacktrace.c:254 (arch/arm64/kernel/stacktrace.c:254)
>>> kernel/stacktrace.o:(hyp_dump_backtrace) in archive arch/arm64/built-in.a
>>> referenced by stacktrace.c:254 (arch/arm64/kernel/stacktrace.c:254)
>>> kernel/stacktrace.o:(hyp_dump_backtrace) in archive arch/arm64/built-in.a

--
0-DAY CI Kernel Test Service
https://01.org/lkp