Hi,
KVM nVHE hyp code runs under different VA mapping than the kernel, which
meant that .hyp.text code had to use PC-relative addressing because
relocations would produce a kernel VA. Programmers had to be extremely
careful with C semantics to not break this fragile setup. See
hyp_symbol_addr comments for details.
Now that we're moving to all nVHE hyp code/data being in separate ELF
sections from the rest of the kernel, it is becoming possible to revisit
relocations during early boot, filter those used by nVHE hyp and
converting those (already relocated) kern VAs to hyp VAs.
Sending this as an RFC, mainly to get feedback but also because it's
only lightly tested. It still feels hacky but much more robust than the
existing approach. The one place where I see somebody breaking this is
the list of ELF sections owned by ELF. That list is currently evolving
but should stabilize over time.
The patches are based on kvmarm/queue (with Marc's "Host EL2 entry
improvements") and my "Opt-in always-on nVHE hypervisor" v2 series.
-David
David Brazdil (6):
kvm: arm64: Set up .hyp.rodata ELF section
kvm: arm64: Fix up RELA relocations in hyp code/data
kvm: arm64: Fix up RELR relocation in hyp code/data
kvm: arm64: Remove patching of fn pointers in hyp
kvm: arm64: Fix constant-pool users in hyp
kvm: arm64: Remove hyp_symbol_addr
arch/arm64/include/asm/kvm_asm.h | 20 ----
arch/arm64/include/asm/kvm_mmu.h | 48 ++++-----
arch/arm64/include/asm/sections.h | 2 +-
arch/arm64/kernel/image-vars.h | 1 -
arch/arm64/kernel/smp.c | 4 +-
arch/arm64/kernel/vmlinux.lds.S | 7 +-
arch/arm64/kvm/arm.c | 7 +-
arch/arm64/kvm/hyp/include/hyp/switch.h | 4 +-
arch/arm64/kvm/hyp/nvhe/host.S | 29 +++---
arch/arm64/kvm/hyp/nvhe/hyp-main.c | 11 +-
arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 4 +-
arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 1 +
arch/arm64/kvm/hyp/nvhe/psci-relay.c | 4 +-
arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 +-
arch/arm64/kvm/va_layout.c | 123 +++++++++++++++++++++--
15 files changed, 175 insertions(+), 92 deletions(-)
--
2.29.2.299.gdc1121823c-goog
Taking a function pointer will now generate a R_AARCH64_RELATIVE that is
fixed up at early boot. Remove the alternative-based mechanism for
converting the address from a kernel VA.
Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/include/asm/kvm_mmu.h | 18 ------------------
arch/arm64/kernel/image-vars.h | 1 -
arch/arm64/kvm/hyp/nvhe/hyp-main.c | 11 ++++-------
arch/arm64/kvm/va_layout.c | 6 ------
4 files changed, 4 insertions(+), 32 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index e5226f7e4732..8cb8974ec9cc 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -121,24 +121,6 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
-static __always_inline unsigned long __kimg_hyp_va(unsigned long v)
-{
- unsigned long offset;
-
- asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
- "movk %0, #0, lsl #16\n"
- "movk %0, #0, lsl #32\n"
- "movk %0, #0, lsl #48\n",
- kvm_update_kimg_phys_offset)
- : "=r" (offset));
-
- return __kern_hyp_va((v - offset) | PAGE_OFFSET);
-}
-
-#define kimg_fn_hyp_va(v) ((typeof(*v))(__kimg_hyp_va((unsigned long)(v))))
-
-#define kimg_fn_ptr(x) (typeof(x) **)(x)
-
/*
* We currently support using a VM-specified IPA size. For backward
* compatibility, the default IPA size is fixed to 40bits.
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 8539f34d7538..6379721236cf 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -64,7 +64,6 @@ __efistub__ctype = _ctype;
/* Alternative callbacks for init-time patching of nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_patch_vector_branch);
KVM_NVHE_ALIAS(kvm_update_va_mask);
-KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset);
KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
/* Global kernel state accessed by nVHE hyp code. */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index b3db5f4eea27..7998eff5f0a2 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -110,9 +110,9 @@ static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
typedef void (*hcall_t)(struct kvm_cpu_context *);
-#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x)
+#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
-static const hcall_t *host_hcall[] = {
+static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
@@ -132,7 +132,6 @@ static const hcall_t *host_hcall[] = {
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned long, id, host_ctxt, 0);
- const hcall_t *kfn;
hcall_t hfn;
id -= KVM_HOST_SMCCC_ID(0);
@@ -140,13 +139,11 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
if (unlikely(id >= ARRAY_SIZE(host_hcall)))
goto inval;
- kfn = host_hcall[id];
- if (unlikely(!kfn))
+ hfn = host_hcall[id];
+ if (unlikely(!hfn))
goto inval;
cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS;
-
- hfn = kimg_fn_hyp_va(kfn);
hfn(host_ctxt);
return;
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index 7f45a98eacfd..0494315f71f2 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -373,12 +373,6 @@ static void generate_mov_q(u64 val, __le32 *origptr, __le32 *updptr, int nr_inst
*updptr++ = cpu_to_le32(insn);
}
-void kvm_update_kimg_phys_offset(struct alt_instr *alt,
- __le32 *origptr, __le32 *updptr, int nr_inst)
-{
- generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst);
-}
-
void kvm_get_kimage_voffset(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst)
{
--
2.29.2.299.gdc1121823c-goog
The helper was used to force PC-relative addressing in hyp code because
absolute addressing via constant-pools used to generate kernel VAs. This
was cumbersome and required programmers to remember to use the helper
whenever they wanted to take a pointer.
Now that hyp relocations are fixed up, there is no need for the helper
any logner. Remove it.
Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/include/asm/kvm_asm.h | 20 --------------------
arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++--
arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 4 ++--
arch/arm64/kvm/hyp/nvhe/psci-relay.c | 4 ++--
arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 +-
5 files changed, 7 insertions(+), 27 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 1a86581e581e..1961d23c0c40 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -203,26 +203,6 @@ extern void __vgic_v3_init_lrs(void);
extern u32 __kvm_get_mdcr_el2(void);
-/*
- * Obtain the PC-relative address of a kernel symbol
- * s: symbol
- *
- * The goal of this macro is to return a symbol's address based on a
- * PC-relative computation, as opposed to a loading the VA from a
- * constant pool or something similar. This works well for HYP, as an
- * absolute VA is guaranteed to be wrong. Only use this if trying to
- * obtain the address of a symbol (i.e. not something you obtained by
- * following a pointer).
- */
-#define hyp_symbol_addr(s) \
- ({ \
- typeof(s) *addr; \
- asm("adrp %0, %1\n" \
- "add %0, %0, :lo12:%1\n" \
- : "=r" (addr) : "S" (&s)); \
- addr; \
- })
-
#define __KVM_EXTABLE(from, to) \
" .pushsection __kvm_ex_table, \"a\"\n" \
" .align 3\n" \
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 84473574c2e7..54f4860cd87c 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -505,8 +505,8 @@ static inline void __kvm_unexpected_el2_exception(void)
struct exception_table_entry *entry, *end;
unsigned long elr_el2 = read_sysreg(elr_el2);
- entry = hyp_symbol_addr(__start___kvm_ex_table);
- end = hyp_symbol_addr(__stop___kvm_ex_table);
+ entry = &__start___kvm_ex_table;
+ end = &__stop___kvm_ex_table;
while (entry < end) {
addr = (unsigned long)&entry->insn + entry->insn;
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
index ceb427aabb91..6870d9f3d4b7 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
@@ -33,8 +33,8 @@ unsigned long __hyp_per_cpu_offset(unsigned int cpu)
if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base))
hyp_panic();
- cpu_base_array = (unsigned long*)hyp_symbol_addr(kvm_arm_hyp_percpu_base);
+ cpu_base_array = (unsigned long*)(&kvm_arm_hyp_percpu_base[0]);
this_cpu_base = kern_hyp_va(cpu_base_array[cpu]);
- elf_base = (unsigned long)hyp_symbol_addr(__per_cpu_start);
+ elf_base = (unsigned long)&__per_cpu_start;
return this_cpu_base - elf_base;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index 313ef42f0eab..f64380a49a72 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -147,7 +147,7 @@ static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
* point if it is a deep sleep state.
*/
ret = psci_call(func_id, power_state,
- __hyp_pa(hyp_symbol_addr(__kvm_hyp_cpu_entry)),
+ __hyp_pa(__kvm_hyp_cpu_entry),
__hyp_pa(cpu_params));
release_reset_state(cpu_state);
@@ -182,7 +182,7 @@ static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
return PSCI_RET_ALREADY_ON;
ret = psci_call(func_id, mpidr,
- __hyp_pa(hyp_symbol_addr(__kvm_hyp_cpu_entry)),
+ __hyp_pa(__kvm_hyp_cpu_entry),
__hyp_pa(cpu_params));
/*
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index 8f0585640241..87a54375bd6e 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -64,7 +64,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
}
rd = kvm_vcpu_dabt_get_rd(vcpu);
- addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va;
+ addr = kvm_vgic_global_state.vcpu_hyp_va;
addr += fault_ipa - vgic->vgic_cpu_base;
if (kvm_vcpu_dabt_iswrite(vcpu)) {
--
2.29.2.299.gdc1121823c-goog
Hyp code used to use absolute addressing via a constant pool to obtain
the kernel VA of 3 symbols - panic, __hyp_panic_string and
__kvm_handle_stub_hvc. This used to work because the kernel would
relocate the addresses in the constant pool to kernel VA at boot and hyp
would simply load them from there.
Now that relocations are fixed up to point to hyp VAs, this does not
work any longer. Rework the helpers to convert hyp VA to kernel VA / PA
as needed.
Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/include/asm/kvm_mmu.h | 29 +++++++++++++++++++----------
arch/arm64/kvm/hyp/nvhe/host.S | 29 +++++++++++++++--------------
2 files changed, 34 insertions(+), 24 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 8cb8974ec9cc..0676ff2105bb 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -72,9 +72,14 @@ alternative_cb kvm_update_va_mask
alternative_cb_end
.endm
+.macro hyp_pa reg, tmp
+ ldr_l \tmp, hyp_physvirt_offset
+ add \reg, \reg, \tmp
+.endm
+
/*
- * Convert a kernel image address to a PA
- * reg: kernel address to be converted in place
+ * Convert a hypervisor VA to a kernel image address
+ * reg: hypervisor address to be converted in place
* tmp: temporary register
*
* The actual code generation takes place in kvm_get_kimage_voffset, and
@@ -82,18 +87,22 @@ alternative_cb_end
* perform the register allocation (kvm_get_kimage_voffset uses the
* specific registers encoded in the instructions).
*/
-.macro kimg_pa reg, tmp
+.macro hyp_kimg reg, tmp
+ /* Convert hyp VA -> PA. */
+ hyp_pa \reg, \tmp
+
+ /* Load kimage_voffset. */
alternative_cb kvm_get_kimage_voffset
- movz \tmp, #0
- movk \tmp, #0, lsl #16
- movk \tmp, #0, lsl #32
- movk \tmp, #0, lsl #48
+ movz \tmp, #0
+ movk \tmp, #0, lsl #16
+ movk \tmp, #0, lsl #32
+ movk \tmp, #0, lsl #48
alternative_cb_end
- /* reg = __pa(reg) */
- sub \reg, \reg, \tmp
+ /* Convert PA -> kimg VA. */
+ add \reg, \reg, \tmp
.endm
-
+
#else
#include <linux/pgtable.h>
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 596dd5ae8e77..bcb80d525d8c 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -74,27 +74,28 @@ SYM_FUNC_END(__host_enter)
* void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
*/
SYM_FUNC_START(__hyp_do_panic)
- /* Load the format arguments into x1-7 */
- mov x6, x3
- get_vcpu_ptr x7, x3
-
- mrs x3, esr_el2
- mrs x4, far_el2
- mrs x5, hpfar_el2
-
/* Prepare and exit to the host's panic funciton. */
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, lr
ldr lr, =panic
+ hyp_kimg lr, x6
msr elr_el2, lr
- /*
- * Set the panic format string and enter the host, conditionally
- * restoring the host context.
- */
+ /* Set the panic format string. Use the, now free, LR as scratch. */
+ ldr lr, =__hyp_panic_string
+ hyp_kimg lr, x6
+
+ /* Load the format arguments into x1-7. */
+ mov x6, x3
+ get_vcpu_ptr x7, x3
+ mrs x3, esr_el2
+ mrs x4, far_el2
+ mrs x5, hpfar_el2
+
+ /* Enter the host, conditionally restoring the host context. */
cmp x0, xzr
- ldr x0, =__hyp_panic_string
+ mov x0, lr
b.eq __host_enter_without_restoring
b __host_enter_for_panic
SYM_FUNC_END(__hyp_do_panic)
@@ -124,7 +125,7 @@ SYM_FUNC_END(__hyp_do_panic)
* Preserve x0-x4, which may contain stub parameters.
*/
ldr x5, =__kvm_handle_stub_hvc
- kimg_pa x5, x6
+ hyp_pa x5, x6
br x5
.L__vect_end\@:
.if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
--
2.29.2.299.gdc1121823c-goog
KVM nVHE code runs under a different VA mapping than the kernel, hence
so far it relied only on PC-relative addressing to avoid accidentally
using a relocated kernel VA from a constant pool (see hyp_symbol_addr).
So as to reduce the possibility of a programmer error, fixup the
relocated addresses instead. Let the kernel relocate them to kernel VA
first, but then iterate over them again, filter those that point to hyp
code/data and convert the kernel VA to hyp VA.
This is done after kvm_compute_layout and before apply_alternatives.
Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/include/asm/kvm_mmu.h | 1 +
arch/arm64/kernel/smp.c | 4 +-
arch/arm64/kvm/va_layout.c | 76 ++++++++++++++++++++++++++++++++
3 files changed, 80 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 5168a0c516ae..e5226f7e4732 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -105,6 +105,7 @@ alternative_cb_end
void kvm_update_va_mask(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
void kvm_compute_layout(void);
+void kvm_fixup_hyp_relocations(void);
static __always_inline unsigned long __kern_hyp_va(unsigned long v)
{
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 18e9727d3f64..30241afc2c93 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
"CPU: CPUs started in inconsistent modes");
else
pr_info("CPU: All CPU(s) started at EL1\n");
- if (IS_ENABLED(CONFIG_KVM))
+ if (IS_ENABLED(CONFIG_KVM)) {
kvm_compute_layout();
+ kvm_fixup_hyp_relocations();
+ }
}
void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index d8cc51bd60bf..b80fab974896 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -10,6 +10,7 @@
#include <asm/alternative.h>
#include <asm/debug-monitors.h>
#include <asm/insn.h>
+#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <asm/memory.h>
@@ -82,6 +83,81 @@ __init void kvm_compute_layout(void)
init_hyp_physvirt_offset();
}
+#define __load_elf_u64(s) \
+ ({ \
+ extern u64 s; \
+ u64 val; \
+ \
+ asm ("ldr %0, =%1" : "=r"(val) : "S"(&s)); \
+ val; \
+ })
+
+static bool __is_within_bounds(u64 addr, char *start, char *end)
+{
+ return start <= (char*)addr && (char*)addr < end;
+}
+
+static bool __is_in_hyp_section(u64 addr)
+{
+ return __is_within_bounds(addr, __hyp_text_start, __hyp_text_end) ||
+ __is_within_bounds(addr, __hyp_rodata_start, __hyp_rodata_end) ||
+ __is_within_bounds(addr,
+ CHOOSE_NVHE_SYM(__per_cpu_start),
+ CHOOSE_NVHE_SYM(__per_cpu_end));
+}
+
+static void __fixup_hyp_rel(u64 addr)
+{
+ u64 *ptr, kern_va, hyp_va;
+
+ /* Adjust the relocation address taken from ELF for KASLR. */
+ addr += kaslr_offset();
+
+ /* Skip addresses not in any of the hyp sections. */
+ if (!__is_in_hyp_section(addr))
+ return;
+
+ /* Get the LM alias of the relocation address. */
+ ptr = (u64*)kvm_ksym_ref((void*)addr);
+
+ /*
+ * Read the value at the relocation address. It has already been
+ * relocated to the actual kernel kimg VA.
+ */
+ kern_va = (u64)kvm_ksym_ref((void*)*ptr);
+
+ /* Convert to hyp VA. */
+ hyp_va = __early_kern_hyp_va(kern_va);
+
+ /* Store hyp VA at the relocation address. */
+ *ptr = __early_kern_hyp_va(kern_va);
+}
+
+static void __fixup_hyp_rela(void)
+{
+ Elf64_Rela *rel;
+ size_t i, n;
+
+ rel = (Elf64_Rela*)(kimage_vaddr + __load_elf_u64(__rela_offset));
+ n = __load_elf_u64(__rela_size) / sizeof(*rel);
+
+ for (i = 0; i < n; ++i)
+ __fixup_hyp_rel(rel[i].r_offset);
+}
+
+/*
+ * The kernel relocated pointers to kernel VA. Iterate over relocations in
+ * the hypervisor ELF sections and convert them to hyp VA. This avoids the
+ * need to only use PC-relative addressing in hyp.
+ */
+__init void kvm_fixup_hyp_relocations(void)
+{
+ if (!IS_ENABLED(CONFIG_RELOCATABLE) || has_vhe())
+ return;
+
+ __fixup_hyp_rela();
+}
+
static u32 compute_instruction(int n, u32 rd, u32 rn)
{
u32 insn = AARCH64_BREAK_FAULT;
--
2.29.2.299.gdc1121823c-goog
The arm64 kernel also supports packing of relocation data using the RELR
format. Implement a parser of RELR data and fixup the relocations using
the same infra as RELA relocs.
Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/kvm/va_layout.c | 41 ++++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index b80fab974896..7f45a98eacfd 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -145,6 +145,43 @@ static void __fixup_hyp_rela(void)
__fixup_hyp_rel(rel[i].r_offset);
}
+#ifdef CONFIG_RELR
+static void __fixup_hyp_relr(void)
+{
+ u64 *rel, *end;
+
+ rel = (u64*)(kimage_vaddr + __load_elf_u64(__relr_offset));
+ end = rel + (__load_elf_u64(__relr_size) / sizeof(*rel));
+
+ while (rel < end) {
+ unsigned n;
+ u64 addr = *(rel++);
+
+ /* Address must not have the LSB set. */
+ BUG_ON(addr & BIT(0));
+
+ /* Fix up the first address of the chain. */
+ __fixup_hyp_rel(addr);
+
+ /*
+ * Loop over bitmaps, i.e. as long as words' LSB is 1.
+ * Each bit (ordered from LSB to MSB) represents one word from
+ * the last full address (exclusive). If the corresponding bit
+ * is 1, there is a relative relocation on that word.
+ */
+ for (n = 0; rel < end && (*rel & BIT(0)); n++) {
+ unsigned i;
+ u64 bitmap = *(rel++);
+
+ for (i = 1; i < 64; ++i) {
+ if ((bitmap & BIT(i)))
+ __fixup_hyp_rel(addr + 8 * (63 * n + i));
+ }
+ }
+ }
+}
+#endif
+
/*
* The kernel relocated pointers to kernel VA. Iterate over relocations in
* the hypervisor ELF sections and convert them to hyp VA. This avoids the
@@ -156,6 +193,10 @@ __init void kvm_fixup_hyp_relocations(void)
return;
__fixup_hyp_rela();
+
+#ifdef CONFIG_RELR
+ __fixup_hyp_relr();
+#endif
}
static u32 compute_instruction(int n, u32 rd, u32 rn)
--
2.29.2.299.gdc1121823c-goog
We will need to recognize pointers in .rodata specific to hyp, so
establish a .hyp.rodata ELF section. Merge it with the existing
.hyp.data..ro_after_init as they are treated the same at runtime.
Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/include/asm/sections.h | 2 +-
arch/arm64/kernel/vmlinux.lds.S | 7 ++++---
arch/arm64/kvm/arm.c | 7 +++----
arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 1 +
4 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 8ff579361731..a6f3557d1ab2 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -11,7 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[];
extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
extern char __hyp_text_start[], __hyp_text_end[];
-extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[];
+extern char __hyp_rodata_start[], __hyp_rodata_end[];
extern char __idmap_text_start[], __idmap_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __inittext_begin[], __inittext_end[];
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 4382b5d0645d..6f2fd9734d63 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -31,10 +31,11 @@ jiffies = jiffies_64;
__stop___kvm_ex_table = .;
#define HYPERVISOR_DATA_SECTIONS \
- HYP_SECTION_NAME(.data..ro_after_init) : { \
- __hyp_data_ro_after_init_start = .; \
+ HYP_SECTION_NAME(.rodata) : { \
+ __hyp_rodata_start = .; \
*(HYP_SECTION_NAME(.data..ro_after_init)) \
- __hyp_data_ro_after_init_end = .; \
+ *(HYP_SECTION_NAME(.rodata)) \
+ __hyp_rodata_end = .; \
}
#define HYPERVISOR_PERCPU_SECTION \
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index d6d5211653b7..119c97e8900a 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1688,11 +1688,10 @@ static int init_hyp_mode(void)
goto out_err;
}
- err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start),
- kvm_ksym_ref(__hyp_data_ro_after_init_end),
- PAGE_HYP_RO);
+ err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
+ kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
if (err) {
- kvm_err("Cannot map .hyp.data..ro_after_init section\n");
+ kvm_err("Cannot map .hyp.rodata section\n");
goto out_err;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
index 5d76ff2ba63e..b0789183d49d 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
@@ -17,4 +17,5 @@ SECTIONS {
PERCPU_INPUT(L1_CACHE_BYTES)
}
HYP_SECTION(.data..ro_after_init)
+ HYP_SECTION(.rodata)
}
--
2.29.2.299.gdc1121823c-goog
On 2020-11-19 16:25, David Brazdil wrote:
> KVM nVHE code runs under a different VA mapping than the kernel, hence
> so far it relied only on PC-relative addressing to avoid accidentally
> using a relocated kernel VA from a constant pool (see hyp_symbol_addr).
>
> So as to reduce the possibility of a programmer error, fixup the
> relocated addresses instead. Let the kernel relocate them to kernel VA
> first, but then iterate over them again, filter those that point to hyp
> code/data and convert the kernel VA to hyp VA.
>
> This is done after kvm_compute_layout and before apply_alternatives.
>
> Signed-off-by: David Brazdil <[email protected]>
> ---
> arch/arm64/include/asm/kvm_mmu.h | 1 +
> arch/arm64/kernel/smp.c | 4 +-
> arch/arm64/kvm/va_layout.c | 76 ++++++++++++++++++++++++++++++++
> 3 files changed, 80 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/kvm_mmu.h
> b/arch/arm64/include/asm/kvm_mmu.h
> index 5168a0c516ae..e5226f7e4732 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -105,6 +105,7 @@ alternative_cb_end
> void kvm_update_va_mask(struct alt_instr *alt,
> __le32 *origptr, __le32 *updptr, int nr_inst);
> void kvm_compute_layout(void);
> +void kvm_fixup_hyp_relocations(void);
>
> static __always_inline unsigned long __kern_hyp_va(unsigned long v)
> {
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 18e9727d3f64..30241afc2c93 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
> "CPU: CPUs started in inconsistent modes");
> else
> pr_info("CPU: All CPU(s) started at EL1\n");
> - if (IS_ENABLED(CONFIG_KVM))
> + if (IS_ENABLED(CONFIG_KVM)) {
> kvm_compute_layout();
> + kvm_fixup_hyp_relocations();
> + }
> }
>
> void __init smp_cpus_done(unsigned int max_cpus)
> diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
> index d8cc51bd60bf..b80fab974896 100644
> --- a/arch/arm64/kvm/va_layout.c
> +++ b/arch/arm64/kvm/va_layout.c
> @@ -10,6 +10,7 @@
> #include <asm/alternative.h>
> #include <asm/debug-monitors.h>
> #include <asm/insn.h>
> +#include <asm/kvm_asm.h>
> #include <asm/kvm_mmu.h>
> #include <asm/memory.h>
>
> @@ -82,6 +83,81 @@ __init void kvm_compute_layout(void)
> init_hyp_physvirt_offset();
> }
>
> +#define __load_elf_u64(s) \
> + ({ \
> + extern u64 s; \
> + u64 val; \
> + \
> + asm ("ldr %0, =%1" : "=r"(val) : "S"(&s)); \
> + val; \
> + })
I'm not sure I get the rational about the naming here. None of this
has much to do with ELF, but seems to just load a value from a
constant pool.
> +
> +static bool __is_within_bounds(u64 addr, char *start, char *end)
> +{
> + return start <= (char*)addr && (char*)addr < end;
> +}
> +
> +static bool __is_in_hyp_section(u64 addr)
> +{
> + return __is_within_bounds(addr, __hyp_text_start, __hyp_text_end) ||
> + __is_within_bounds(addr, __hyp_rodata_start, __hyp_rodata_end)
> ||
> + __is_within_bounds(addr,
> + CHOOSE_NVHE_SYM(__per_cpu_start),
> + CHOOSE_NVHE_SYM(__per_cpu_end));
> +}
> +
> +static void __fixup_hyp_rel(u64 addr)
> +{
> + u64 *ptr, kern_va, hyp_va;
> +
> + /* Adjust the relocation address taken from ELF for KASLR. */
> + addr += kaslr_offset();
> +
> + /* Skip addresses not in any of the hyp sections. */
> + if (!__is_in_hyp_section(addr))
> + return;
> +
> + /* Get the LM alias of the relocation address. */
> + ptr = (u64*)kvm_ksym_ref((void*)addr);
Why the casting? We should be perfectly fine without.
nit: we really need to change the name of this helper, it doesn't have
anything to do with symbols anymore. And actually, lm_alias() *is* the
right thing to use here (we don't relocate anything on VHE).
> +
> + /*
> + * Read the value at the relocation address. It has already been
> + * relocated to the actual kernel kimg VA.
> + */
> + kern_va = (u64)kvm_ksym_ref((void*)*ptr);
Same comment.
> +
> + /* Convert to hyp VA. */
> + hyp_va = __early_kern_hyp_va(kern_va);
> +
> + /* Store hyp VA at the relocation address. */
> + *ptr = __early_kern_hyp_va(kern_va);
> +}
> +
> +static void __fixup_hyp_rela(void)
> +{
> + Elf64_Rela *rel;
> + size_t i, n;
> +
> + rel = (Elf64_Rela*)(kimage_vaddr + __load_elf_u64(__rela_offset));
> + n = __load_elf_u64(__rela_size) / sizeof(*rel);
> +
> + for (i = 0; i < n; ++i)
> + __fixup_hyp_rel(rel[i].r_offset);
> +}
> +
> +/*
> + * The kernel relocated pointers to kernel VA. Iterate over
> relocations in
> + * the hypervisor ELF sections and convert them to hyp VA. This avoids
> the
> + * need to only use PC-relative addressing in hyp.
> + */
> +__init void kvm_fixup_hyp_relocations(void)
> +{
> + if (!IS_ENABLED(CONFIG_RELOCATABLE) || has_vhe())
What do we do if CONFIG_RELOCATABLE is not selected? As far as I can
tell,
bad things will happen...
I'm also worried that at this stage, the kernel is broken, until you
remove the other bits involved in runtime offsetting pointers.
Thanks,
M.
--
Jazz is not dead. It just smells funny...
On 2020-11-19 16:25, David Brazdil wrote:
> The arm64 kernel also supports packing of relocation data using the
> RELR
> format. Implement a parser of RELR data and fixup the relocations using
> the same infra as RELA relocs.
>
> Signed-off-by: David Brazdil <[email protected]>
> ---
> arch/arm64/kvm/va_layout.c | 41 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 41 insertions(+)
>
> diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
> index b80fab974896..7f45a98eacfd 100644
> --- a/arch/arm64/kvm/va_layout.c
> +++ b/arch/arm64/kvm/va_layout.c
> @@ -145,6 +145,43 @@ static void __fixup_hyp_rela(void)
> __fixup_hyp_rel(rel[i].r_offset);
> }
>
> +#ifdef CONFIG_RELR
> +static void __fixup_hyp_relr(void)
> +{
> + u64 *rel, *end;
> +
> + rel = (u64*)(kimage_vaddr + __load_elf_u64(__relr_offset));
> + end = rel + (__load_elf_u64(__relr_size) / sizeof(*rel));
> +
> + while (rel < end) {
> + unsigned n;
> + u64 addr = *(rel++);
> +
> + /* Address must not have the LSB set. */
> + BUG_ON(addr & BIT(0));
> +
> + /* Fix up the first address of the chain. */
> + __fixup_hyp_rel(addr);
> +
> + /*
> + * Loop over bitmaps, i.e. as long as words' LSB is 1.
> + * Each bit (ordered from LSB to MSB) represents one word from
> + * the last full address (exclusive). If the corresponding bit
> + * is 1, there is a relative relocation on that word.
> + */
What is the endianness of this bitmap? Is it guaranteed to be in
CPU-endian format?
> + for (n = 0; rel < end && (*rel & BIT(0)); n++) {
> + unsigned i;
> + u64 bitmap = *(rel++);
nit: if you change this u64 for an unsigned long...
> +
> + for (i = 1; i < 64; ++i) {
> + if ((bitmap & BIT(i)))
> + __fixup_hyp_rel(addr + 8 * (63 * n + i));
> + }
... this can be written as:
i = 1;
for_each_set_bit_from(i, &bitmap, 64)
__fixup_hyp_rel(addr + 8 * (63 * n + i));
> + }
> + }
> +}
> +#endif
> +
> /*
> * The kernel relocated pointers to kernel VA. Iterate over
> relocations in
> * the hypervisor ELF sections and convert them to hyp VA. This avoids
> the
> @@ -156,6 +193,10 @@ __init void kvm_fixup_hyp_relocations(void)
> return;
>
> __fixup_hyp_rela();
> +
> +#ifdef CONFIG_RELR
> + __fixup_hyp_relr();
> +#endif
> }
>
> static u32 compute_instruction(int n, u32 rd, u32 rn)
Thanks,
M.
--
Jazz is not dead. It just smells funny...
On Thu, 19 Nov 2020 at 17:25, David Brazdil <[email protected]> wrote:
>
> The arm64 kernel also supports packing of relocation data using the RELR
> format. Implement a parser of RELR data and fixup the relocations using
> the same infra as RELA relocs.
>
> Signed-off-by: David Brazdil <[email protected]>
> ---
> arch/arm64/kvm/va_layout.c | 41 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 41 insertions(+)
>
> diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
> index b80fab974896..7f45a98eacfd 100644
> --- a/arch/arm64/kvm/va_layout.c
> +++ b/arch/arm64/kvm/va_layout.c
> @@ -145,6 +145,43 @@ static void __fixup_hyp_rela(void)
> __fixup_hyp_rel(rel[i].r_offset);
> }
>
> +#ifdef CONFIG_RELR
Please prefer IS_ENABLED() [below] if the code in question can compile
(but perhaps not link) correctly when the symbol is not set.
> +static void __fixup_hyp_relr(void)
__init ?
> +{
> + u64 *rel, *end;
> +
> + rel = (u64*)(kimage_vaddr + __load_elf_u64(__relr_offset));
> + end = rel + (__load_elf_u64(__relr_size) / sizeof(*rel));
> +
The reason for this little dance with the offset and size exists
because the initial relocation routine runs from the ID mapping, but
the relocation fixups are performed via the kernel's VA mapping, as
the ID mapping does not cover the entire image. So simple adrp/add
pairs aren't suitable there.
In this case (as well as in the previous patch, btw), that problem
does not exist, and so I think we should be able to simply define
start and end markers inside the .rela sections, and reference them
here as symbols with external linkage (which ensures that they are
referenced relatively, although you could add in a
__attribute__((visibility("hidden"))) for good measure)
> + while (rel < end) {
> + unsigned n;
> + u64 addr = *(rel++);
Parens are redundant here (and below)
> +
> + /* Address must not have the LSB set. */
> + BUG_ON(addr & BIT(0));
> +
> + /* Fix up the first address of the chain. */
> + __fixup_hyp_rel(addr);
> +
> + /*
> + * Loop over bitmaps, i.e. as long as words' LSB is 1.
> + * Each bit (ordered from LSB to MSB) represents one word from
> + * the last full address (exclusive). If the corresponding bit
> + * is 1, there is a relative relocation on that word.
> + */
> + for (n = 0; rel < end && (*rel & BIT(0)); n++) {
> + unsigned i;
> + u64 bitmap = *(rel++);
> +
> + for (i = 1; i < 64; ++i) {
> + if ((bitmap & BIT(i)))
> + __fixup_hyp_rel(addr + 8 * (63 * n + i));
> + }
> + }
> + }
> +}
> +#endif
> +
> /*
> * The kernel relocated pointers to kernel VA. Iterate over relocations in
> * the hypervisor ELF sections and convert them to hyp VA. This avoids the
> @@ -156,6 +193,10 @@ __init void kvm_fixup_hyp_relocations(void)
> return;
>
> __fixup_hyp_rela();
> +
> +#ifdef CONFIG_RELR
> + __fixup_hyp_relr();
> +#endif
> }
>
> static u32 compute_instruction(int n, u32 rd, u32 rn)
> --
> 2.29.2.299.gdc1121823c-goog
>
On Thu, 19 Nov 2020 at 17:25, David Brazdil <[email protected]> wrote:
>
> Taking a function pointer will now generate a R_AARCH64_RELATIVE that is
> fixed up at early boot. Remove the alternative-based mechanism for
> converting the address from a kernel VA.
>
> Signed-off-by: David Brazdil <[email protected]>
Acked-by: Ard Biesheuvel <[email protected]>
> ---
> arch/arm64/include/asm/kvm_mmu.h | 18 ------------------
> arch/arm64/kernel/image-vars.h | 1 -
> arch/arm64/kvm/hyp/nvhe/hyp-main.c | 11 ++++-------
> arch/arm64/kvm/va_layout.c | 6 ------
> 4 files changed, 4 insertions(+), 32 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index e5226f7e4732..8cb8974ec9cc 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -121,24 +121,6 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
>
> #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
>
> -static __always_inline unsigned long __kimg_hyp_va(unsigned long v)
> -{
> - unsigned long offset;
> -
> - asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
> - "movk %0, #0, lsl #16\n"
> - "movk %0, #0, lsl #32\n"
> - "movk %0, #0, lsl #48\n",
> - kvm_update_kimg_phys_offset)
> - : "=r" (offset));
> -
> - return __kern_hyp_va((v - offset) | PAGE_OFFSET);
> -}
> -
> -#define kimg_fn_hyp_va(v) ((typeof(*v))(__kimg_hyp_va((unsigned long)(v))))
> -
> -#define kimg_fn_ptr(x) (typeof(x) **)(x)
> -
> /*
> * We currently support using a VM-specified IPA size. For backward
> * compatibility, the default IPA size is fixed to 40bits.
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index 8539f34d7538..6379721236cf 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -64,7 +64,6 @@ __efistub__ctype = _ctype;
> /* Alternative callbacks for init-time patching of nVHE hyp code. */
> KVM_NVHE_ALIAS(kvm_patch_vector_branch);
> KVM_NVHE_ALIAS(kvm_update_va_mask);
> -KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset);
> KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
>
> /* Global kernel state accessed by nVHE hyp code. */
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> index b3db5f4eea27..7998eff5f0a2 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> @@ -110,9 +110,9 @@ static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
>
> typedef void (*hcall_t)(struct kvm_cpu_context *);
>
> -#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x)
> +#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
>
> -static const hcall_t *host_hcall[] = {
> +static const hcall_t host_hcall[] = {
> HANDLE_FUNC(__kvm_vcpu_run),
> HANDLE_FUNC(__kvm_flush_vm_context),
> HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
> @@ -132,7 +132,6 @@ static const hcall_t *host_hcall[] = {
> static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
> {
> DECLARE_REG(unsigned long, id, host_ctxt, 0);
> - const hcall_t *kfn;
> hcall_t hfn;
>
> id -= KVM_HOST_SMCCC_ID(0);
> @@ -140,13 +139,11 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
> if (unlikely(id >= ARRAY_SIZE(host_hcall)))
> goto inval;
>
> - kfn = host_hcall[id];
> - if (unlikely(!kfn))
> + hfn = host_hcall[id];
> + if (unlikely(!hfn))
> goto inval;
>
> cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS;
> -
> - hfn = kimg_fn_hyp_va(kfn);
> hfn(host_ctxt);
>
> return;
> diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
> index 7f45a98eacfd..0494315f71f2 100644
> --- a/arch/arm64/kvm/va_layout.c
> +++ b/arch/arm64/kvm/va_layout.c
> @@ -373,12 +373,6 @@ static void generate_mov_q(u64 val, __le32 *origptr, __le32 *updptr, int nr_inst
> *updptr++ = cpu_to_le32(insn);
> }
>
> -void kvm_update_kimg_phys_offset(struct alt_instr *alt,
> - __le32 *origptr, __le32 *updptr, int nr_inst)
> -{
> - generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst);
> -}
> -
> void kvm_get_kimage_voffset(struct alt_instr *alt,
> __le32 *origptr, __le32 *updptr, int nr_inst)
> {
> --
> 2.29.2.299.gdc1121823c-goog
>
On Thu, 19 Nov 2020 at 17:26, David Brazdil <[email protected]> wrote:
>
> Hyp code used to use absolute addressing via a constant pool to obtain
> the kernel VA of 3 symbols - panic, __hyp_panic_string and
> __kvm_handle_stub_hvc. This used to work because the kernel would
> relocate the addresses in the constant pool to kernel VA at boot and hyp
> would simply load them from there.
>
> Now that relocations are fixed up to point to hyp VAs, this does not
> work any longer. Rework the helpers to convert hyp VA to kernel VA / PA
> as needed.
>
Ok, so the reason for the problem is that the literal exists inside
the HYP text, and all literals are fixed up using the HYP mapping,
even if they don't point to something that is mapped at HYP. Would it
make sense to simply disregard literals that point outside of the HYP
VA mapping?
> Signed-off-by: David Brazdil <[email protected]>
> ---
> arch/arm64/include/asm/kvm_mmu.h | 29 +++++++++++++++++++----------
> arch/arm64/kvm/hyp/nvhe/host.S | 29 +++++++++++++++--------------
> 2 files changed, 34 insertions(+), 24 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index 8cb8974ec9cc..0676ff2105bb 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -72,9 +72,14 @@ alternative_cb kvm_update_va_mask
> alternative_cb_end
> .endm
>
> +.macro hyp_pa reg, tmp
> + ldr_l \tmp, hyp_physvirt_offset
> + add \reg, \reg, \tmp
> +.endm
> +
> /*
> - * Convert a kernel image address to a PA
> - * reg: kernel address to be converted in place
> + * Convert a hypervisor VA to a kernel image address
> + * reg: hypervisor address to be converted in place
> * tmp: temporary register
> *
> * The actual code generation takes place in kvm_get_kimage_voffset, and
> @@ -82,18 +87,22 @@ alternative_cb_end
> * perform the register allocation (kvm_get_kimage_voffset uses the
> * specific registers encoded in the instructions).
> */
> -.macro kimg_pa reg, tmp
> +.macro hyp_kimg reg, tmp
> + /* Convert hyp VA -> PA. */
> + hyp_pa \reg, \tmp
> +
> + /* Load kimage_voffset. */
> alternative_cb kvm_get_kimage_voffset
> - movz \tmp, #0
> - movk \tmp, #0, lsl #16
> - movk \tmp, #0, lsl #32
> - movk \tmp, #0, lsl #48
> + movz \tmp, #0
> + movk \tmp, #0, lsl #16
> + movk \tmp, #0, lsl #32
> + movk \tmp, #0, lsl #48
> alternative_cb_end
>
> - /* reg = __pa(reg) */
> - sub \reg, \reg, \tmp
> + /* Convert PA -> kimg VA. */
> + add \reg, \reg, \tmp
> .endm
> -
> +
> #else
>
> #include <linux/pgtable.h>
> diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
> index 596dd5ae8e77..bcb80d525d8c 100644
> --- a/arch/arm64/kvm/hyp/nvhe/host.S
> +++ b/arch/arm64/kvm/hyp/nvhe/host.S
> @@ -74,27 +74,28 @@ SYM_FUNC_END(__host_enter)
> * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
> */
> SYM_FUNC_START(__hyp_do_panic)
> - /* Load the format arguments into x1-7 */
> - mov x6, x3
> - get_vcpu_ptr x7, x3
> -
> - mrs x3, esr_el2
> - mrs x4, far_el2
> - mrs x5, hpfar_el2
> -
> /* Prepare and exit to the host's panic funciton. */
> mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
> PSR_MODE_EL1h)
> msr spsr_el2, lr
> ldr lr, =panic
> + hyp_kimg lr, x6
> msr elr_el2, lr
>
> - /*
> - * Set the panic format string and enter the host, conditionally
> - * restoring the host context.
> - */
> + /* Set the panic format string. Use the, now free, LR as scratch. */
> + ldr lr, =__hyp_panic_string
> + hyp_kimg lr, x6
> +
> + /* Load the format arguments into x1-7. */
> + mov x6, x3
> + get_vcpu_ptr x7, x3
> + mrs x3, esr_el2
> + mrs x4, far_el2
> + mrs x5, hpfar_el2
> +
> + /* Enter the host, conditionally restoring the host context. */
> cmp x0, xzr
> - ldr x0, =__hyp_panic_string
> + mov x0, lr
> b.eq __host_enter_without_restoring
> b __host_enter_for_panic
> SYM_FUNC_END(__hyp_do_panic)
> @@ -124,7 +125,7 @@ SYM_FUNC_END(__hyp_do_panic)
> * Preserve x0-x4, which may contain stub parameters.
> */
> ldr x5, =__kvm_handle_stub_hvc
> - kimg_pa x5, x6
> + hyp_pa x5, x6
> br x5
> .L__vect_end\@:
> .if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
> --
> 2.29.2.299.gdc1121823c-goog
>
On Thu, 19 Nov 2020 at 17:25, David Brazdil <[email protected]> wrote:
>
> We will need to recognize pointers in .rodata specific to hyp,
Why?
> so
> establish a .hyp.rodata ELF section. Merge it with the existing
> .hyp.data..ro_after_init as they are treated the same at runtime.
>
Does this mean HYP .text, .rodata etc are all writable some time after
the kernel .text/.rodata have been mapped read-only? That is not a
problem per se, but it deserves being called out.
> Signed-off-by: David Brazdil <[email protected]>
> ---
> arch/arm64/include/asm/sections.h | 2 +-
> arch/arm64/kernel/vmlinux.lds.S | 7 ++++---
> arch/arm64/kvm/arm.c | 7 +++----
> arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 1 +
> 4 files changed, 9 insertions(+), 8 deletions(-)
>
> diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
> index 8ff579361731..a6f3557d1ab2 100644
> --- a/arch/arm64/include/asm/sections.h
> +++ b/arch/arm64/include/asm/sections.h
> @@ -11,7 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[];
> extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
> extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
> extern char __hyp_text_start[], __hyp_text_end[];
> -extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[];
> +extern char __hyp_rodata_start[], __hyp_rodata_end[];
> extern char __idmap_text_start[], __idmap_text_end[];
> extern char __initdata_begin[], __initdata_end[];
> extern char __inittext_begin[], __inittext_end[];
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index 4382b5d0645d..6f2fd9734d63 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -31,10 +31,11 @@ jiffies = jiffies_64;
> __stop___kvm_ex_table = .;
>
> #define HYPERVISOR_DATA_SECTIONS \
> - HYP_SECTION_NAME(.data..ro_after_init) : { \
> - __hyp_data_ro_after_init_start = .; \
> + HYP_SECTION_NAME(.rodata) : { \
> + __hyp_rodata_start = .; \
> *(HYP_SECTION_NAME(.data..ro_after_init)) \
> - __hyp_data_ro_after_init_end = .; \
> + *(HYP_SECTION_NAME(.rodata)) \
> + __hyp_rodata_end = .; \
> }
>
> #define HYPERVISOR_PERCPU_SECTION \
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index d6d5211653b7..119c97e8900a 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -1688,11 +1688,10 @@ static int init_hyp_mode(void)
> goto out_err;
> }
>
> - err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start),
> - kvm_ksym_ref(__hyp_data_ro_after_init_end),
> - PAGE_HYP_RO);
> + err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
> + kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
> if (err) {
> - kvm_err("Cannot map .hyp.data..ro_after_init section\n");
> + kvm_err("Cannot map .hyp.rodata section\n");
> goto out_err;
> }
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
> index 5d76ff2ba63e..b0789183d49d 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
> @@ -17,4 +17,5 @@ SECTIONS {
> PERCPU_INPUT(L1_CACHE_BYTES)
> }
> HYP_SECTION(.data..ro_after_init)
> + HYP_SECTION(.rodata)
> }
> --
> 2.29.2.299.gdc1121823c-goog
>
On Thu, 19 Nov 2020 at 17:25, David Brazdil <[email protected]> wrote:
>
> KVM nVHE code runs under a different VA mapping than the kernel, hence
> so far it relied only on PC-relative addressing to avoid accidentally
> using a relocated kernel VA from a constant pool (see hyp_symbol_addr).
>
> So as to reduce the possibility of a programmer error, fixup the
> relocated addresses instead. Let the kernel relocate them to kernel VA
> first, but then iterate over them again, filter those that point to hyp
> code/data and convert the kernel VA to hyp VA.
>
> This is done after kvm_compute_layout and before apply_alternatives.
>
If this is significant enough to call out, please include the reason for it.
> Signed-off-by: David Brazdil <[email protected]>
> ---
> arch/arm64/include/asm/kvm_mmu.h | 1 +
> arch/arm64/kernel/smp.c | 4 +-
> arch/arm64/kvm/va_layout.c | 76 ++++++++++++++++++++++++++++++++
> 3 files changed, 80 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index 5168a0c516ae..e5226f7e4732 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -105,6 +105,7 @@ alternative_cb_end
> void kvm_update_va_mask(struct alt_instr *alt,
> __le32 *origptr, __le32 *updptr, int nr_inst);
> void kvm_compute_layout(void);
> +void kvm_fixup_hyp_relocations(void);
>
> static __always_inline unsigned long __kern_hyp_va(unsigned long v)
> {
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 18e9727d3f64..30241afc2c93 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
> "CPU: CPUs started in inconsistent modes");
> else
> pr_info("CPU: All CPU(s) started at EL1\n");
> - if (IS_ENABLED(CONFIG_KVM))
> + if (IS_ENABLED(CONFIG_KVM)) {
> kvm_compute_layout();
> + kvm_fixup_hyp_relocations();
> + }
> }
>
> void __init smp_cpus_done(unsigned int max_cpus)
> diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
> index d8cc51bd60bf..b80fab974896 100644
> --- a/arch/arm64/kvm/va_layout.c
> +++ b/arch/arm64/kvm/va_layout.c
> @@ -10,6 +10,7 @@
> #include <asm/alternative.h>
> #include <asm/debug-monitors.h>
> #include <asm/insn.h>
> +#include <asm/kvm_asm.h>
> #include <asm/kvm_mmu.h>
> #include <asm/memory.h>
>
> @@ -82,6 +83,81 @@ __init void kvm_compute_layout(void)
> init_hyp_physvirt_offset();
> }
>
> +#define __load_elf_u64(s) \
> + ({ \
> + extern u64 s; \
> + u64 val; \
> + \
> + asm ("ldr %0, =%1" : "=r"(val) : "S"(&s)); \
> + val; \
> + })
> +
Do you need this to ensure that the reference is absolute? There may
be more elegant ways to achieve that, using weak references for
instance.
Also, in the relocation startup code, I deliberately used a 32-bit
quantity here, as it won't get confused for an absolute virtual
address that needs relocation.
> +static bool __is_within_bounds(u64 addr, char *start, char *end)
> +{
> + return start <= (char*)addr && (char*)addr < end;
> +}
> +
> +static bool __is_in_hyp_section(u64 addr)
> +{
> + return __is_within_bounds(addr, __hyp_text_start, __hyp_text_end) ||
> + __is_within_bounds(addr, __hyp_rodata_start, __hyp_rodata_end) ||
> + __is_within_bounds(addr,
> + CHOOSE_NVHE_SYM(__per_cpu_start),
> + CHOOSE_NVHE_SYM(__per_cpu_end));
> +}
> +
It is slightly disappointing that we need to filter these one by one
like this, but I don't think there are any guarantees about the order
in which the R_AARCH64_RELATIVE entries appear.
> +static void __fixup_hyp_rel(u64 addr)
__init ?
> +{
> + u64 *ptr, kern_va, hyp_va;
> +
> + /* Adjust the relocation address taken from ELF for KASLR. */
> + addr += kaslr_offset();
> +
> + /* Skip addresses not in any of the hyp sections. */
> + if (!__is_in_hyp_section(addr))
> + return;
> +
> + /* Get the LM alias of the relocation address. */
> + ptr = (u64*)kvm_ksym_ref((void*)addr);
> +
> + /*
> + * Read the value at the relocation address. It has already been
> + * relocated to the actual kernel kimg VA.
> + */
> + kern_va = (u64)kvm_ksym_ref((void*)*ptr);
> +
> + /* Convert to hyp VA. */
> + hyp_va = __early_kern_hyp_va(kern_va);
> +
> + /* Store hyp VA at the relocation address. */
> + *ptr = __early_kern_hyp_va(kern_va);
> +}
> +
> +static void __fixup_hyp_rela(void)
__init ?
> +{
> + Elf64_Rela *rel;
> + size_t i, n;
> +
> + rel = (Elf64_Rela*)(kimage_vaddr + __load_elf_u64(__rela_offset));
> + n = __load_elf_u64(__rela_size) / sizeof(*rel);
> +
> + for (i = 0; i < n; ++i)
> + __fixup_hyp_rel(rel[i].r_offset);
> +}
> +
> +/*
> + * The kernel relocated pointers to kernel VA. Iterate over relocations in
> + * the hypervisor ELF sections and convert them to hyp VA. This avoids the
> + * need to only use PC-relative addressing in hyp.
> + */
> +__init void kvm_fixup_hyp_relocations(void)
It is more idiomatic to put the __init after the 'void', and someone
is undoubtedly going to send a patch to 'fix' that if we merge it like
this.
> +{
> + if (!IS_ENABLED(CONFIG_RELOCATABLE) || has_vhe())
> + return;
> +
> + __fixup_hyp_rela();
> +}
> +
> static u32 compute_instruction(int n, u32 rd, u32 rn)
> {
> u32 insn = AARCH64_BREAK_FAULT;
> --
> 2.29.2.299.gdc1121823c-goog
>
On Thu, 19 Nov 2020 at 17:26, David Brazdil <[email protected]> wrote:
>
> The helper was used to force PC-relative addressing in hyp code because
> absolute addressing via constant-pools used to generate kernel VAs. This
> was cumbersome and required programmers to remember to use the helper
> whenever they wanted to take a pointer.
>
> Now that hyp relocations are fixed up, there is no need for the helper
> any logner. Remove it.
>
> Signed-off-by: David Brazdil <[email protected]>
Acked-by: Ard Biesheuvel <[email protected]>
> ---
> arch/arm64/include/asm/kvm_asm.h | 20 --------------------
> arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++--
> arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 4 ++--
> arch/arm64/kvm/hyp/nvhe/psci-relay.c | 4 ++--
> arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 +-
> 5 files changed, 7 insertions(+), 27 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index 1a86581e581e..1961d23c0c40 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -203,26 +203,6 @@ extern void __vgic_v3_init_lrs(void);
>
> extern u32 __kvm_get_mdcr_el2(void);
>
> -/*
> - * Obtain the PC-relative address of a kernel symbol
> - * s: symbol
> - *
> - * The goal of this macro is to return a symbol's address based on a
> - * PC-relative computation, as opposed to a loading the VA from a
> - * constant pool or something similar. This works well for HYP, as an
> - * absolute VA is guaranteed to be wrong. Only use this if trying to
> - * obtain the address of a symbol (i.e. not something you obtained by
> - * following a pointer).
> - */
> -#define hyp_symbol_addr(s) \
> - ({ \
> - typeof(s) *addr; \
> - asm("adrp %0, %1\n" \
> - "add %0, %0, :lo12:%1\n" \
> - : "=r" (addr) : "S" (&s)); \
> - addr; \
> - })
> -
> #define __KVM_EXTABLE(from, to) \
> " .pushsection __kvm_ex_table, \"a\"\n" \
> " .align 3\n" \
> diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
> index 84473574c2e7..54f4860cd87c 100644
> --- a/arch/arm64/kvm/hyp/include/hyp/switch.h
> +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
> @@ -505,8 +505,8 @@ static inline void __kvm_unexpected_el2_exception(void)
> struct exception_table_entry *entry, *end;
> unsigned long elr_el2 = read_sysreg(elr_el2);
>
> - entry = hyp_symbol_addr(__start___kvm_ex_table);
> - end = hyp_symbol_addr(__stop___kvm_ex_table);
> + entry = &__start___kvm_ex_table;
> + end = &__stop___kvm_ex_table;
>
> while (entry < end) {
> addr = (unsigned long)&entry->insn + entry->insn;
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
> index ceb427aabb91..6870d9f3d4b7 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
> @@ -33,8 +33,8 @@ unsigned long __hyp_per_cpu_offset(unsigned int cpu)
> if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base))
> hyp_panic();
>
> - cpu_base_array = (unsigned long*)hyp_symbol_addr(kvm_arm_hyp_percpu_base);
> + cpu_base_array = (unsigned long*)(&kvm_arm_hyp_percpu_base[0]);
> this_cpu_base = kern_hyp_va(cpu_base_array[cpu]);
> - elf_base = (unsigned long)hyp_symbol_addr(__per_cpu_start);
> + elf_base = (unsigned long)&__per_cpu_start;
> return this_cpu_base - elf_base;
> }
> diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
> index 313ef42f0eab..f64380a49a72 100644
> --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
> +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
> @@ -147,7 +147,7 @@ static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
> * point if it is a deep sleep state.
> */
> ret = psci_call(func_id, power_state,
> - __hyp_pa(hyp_symbol_addr(__kvm_hyp_cpu_entry)),
> + __hyp_pa(__kvm_hyp_cpu_entry),
> __hyp_pa(cpu_params));
>
> release_reset_state(cpu_state);
> @@ -182,7 +182,7 @@ static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
> return PSCI_RET_ALREADY_ON;
>
> ret = psci_call(func_id, mpidr,
> - __hyp_pa(hyp_symbol_addr(__kvm_hyp_cpu_entry)),
> + __hyp_pa(__kvm_hyp_cpu_entry),
> __hyp_pa(cpu_params));
>
> /*
> diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
> index 8f0585640241..87a54375bd6e 100644
> --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
> +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
> @@ -64,7 +64,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
> }
>
> rd = kvm_vcpu_dabt_get_rd(vcpu);
> - addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va;
> + addr = kvm_vgic_global_state.vcpu_hyp_va;
> addr += fault_ipa - vgic->vgic_cpu_base;
>
> if (kvm_vcpu_dabt_iswrite(vcpu)) {
> --
> 2.29.2.299.gdc1121823c-goog
>
Hey, relized I never replied to this...
On Tue, Nov 24, 2020 at 03:08:20PM +0100, Ard Biesheuvel wrote:
> On Thu, 19 Nov 2020 at 17:26, David Brazdil <[email protected]> wrote:
> >
> > Hyp code used to use absolute addressing via a constant pool to obtain
> > the kernel VA of 3 symbols - panic, __hyp_panic_string and
> > __kvm_handle_stub_hvc. This used to work because the kernel would
> > relocate the addresses in the constant pool to kernel VA at boot and hyp
> > would simply load them from there.
> >
> > Now that relocations are fixed up to point to hyp VAs, this does not
> > work any longer. Rework the helpers to convert hyp VA to kernel VA / PA
> > as needed.
> >
>
> Ok, so the reason for the problem is that the literal exists inside
> the HYP text, and all literals are fixed up using the HYP mapping,
> even if they don't point to something that is mapped at HYP. Would it
> make sense to simply disregard literals that point outside of the HYP
> VA mapping?
That would be possible - I'm about to post a v1 with build-time generation of
these relocs and kernel symbols are easily recognizable as they're undefined
in that ELF object. But I do worry that that would confuse people a lot.
And if somebody referenced a kernel symbol in C (maybe not a use case, but...)
they would get different results depending on which addressing mode the
compiler picked.
>
> > Signed-off-by: David Brazdil <[email protected]>
> > ---
> > arch/arm64/include/asm/kvm_mmu.h | 29 +++++++++++++++++++----------
> > arch/arm64/kvm/hyp/nvhe/host.S | 29 +++++++++++++++--------------
> > 2 files changed, 34 insertions(+), 24 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> > index 8cb8974ec9cc..0676ff2105bb 100644
> > --- a/arch/arm64/include/asm/kvm_mmu.h
> > +++ b/arch/arm64/include/asm/kvm_mmu.h
> > @@ -72,9 +72,14 @@ alternative_cb kvm_update_va_mask
> > alternative_cb_end
> > .endm
> >
> > +.macro hyp_pa reg, tmp
> > + ldr_l \tmp, hyp_physvirt_offset
> > + add \reg, \reg, \tmp
> > +.endm
> > +
> > /*
> > - * Convert a kernel image address to a PA
> > - * reg: kernel address to be converted in place
> > + * Convert a hypervisor VA to a kernel image address
> > + * reg: hypervisor address to be converted in place
> > * tmp: temporary register
> > *
> > * The actual code generation takes place in kvm_get_kimage_voffset, and
> > @@ -82,18 +87,22 @@ alternative_cb_end
> > * perform the register allocation (kvm_get_kimage_voffset uses the
> > * specific registers encoded in the instructions).
> > */
> > -.macro kimg_pa reg, tmp
> > +.macro hyp_kimg reg, tmp
> > + /* Convert hyp VA -> PA. */
> > + hyp_pa \reg, \tmp
> > +
> > + /* Load kimage_voffset. */
> > alternative_cb kvm_get_kimage_voffset
> > - movz \tmp, #0
> > - movk \tmp, #0, lsl #16
> > - movk \tmp, #0, lsl #32
> > - movk \tmp, #0, lsl #48
> > + movz \tmp, #0
> > + movk \tmp, #0, lsl #16
> > + movk \tmp, #0, lsl #32
> > + movk \tmp, #0, lsl #48
> > alternative_cb_end
> >
> > - /* reg = __pa(reg) */
> > - sub \reg, \reg, \tmp
> > + /* Convert PA -> kimg VA. */
> > + add \reg, \reg, \tmp
> > .endm
> > -
> > +
> > #else
> >
> > #include <linux/pgtable.h>
> > diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
> > index 596dd5ae8e77..bcb80d525d8c 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/host.S
> > +++ b/arch/arm64/kvm/hyp/nvhe/host.S
> > @@ -74,27 +74,28 @@ SYM_FUNC_END(__host_enter)
> > * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
> > */
> > SYM_FUNC_START(__hyp_do_panic)
> > - /* Load the format arguments into x1-7 */
> > - mov x6, x3
> > - get_vcpu_ptr x7, x3
> > -
> > - mrs x3, esr_el2
> > - mrs x4, far_el2
> > - mrs x5, hpfar_el2
> > -
> > /* Prepare and exit to the host's panic funciton. */
> > mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
> > PSR_MODE_EL1h)
> > msr spsr_el2, lr
> > ldr lr, =panic
> > + hyp_kimg lr, x6
> > msr elr_el2, lr
> >
> > - /*
> > - * Set the panic format string and enter the host, conditionally
> > - * restoring the host context.
> > - */
> > + /* Set the panic format string. Use the, now free, LR as scratch. */
> > + ldr lr, =__hyp_panic_string
> > + hyp_kimg lr, x6
> > +
> > + /* Load the format arguments into x1-7. */
> > + mov x6, x3
> > + get_vcpu_ptr x7, x3
> > + mrs x3, esr_el2
> > + mrs x4, far_el2
> > + mrs x5, hpfar_el2
> > +
> > + /* Enter the host, conditionally restoring the host context. */
> > cmp x0, xzr
> > - ldr x0, =__hyp_panic_string
> > + mov x0, lr
> > b.eq __host_enter_without_restoring
> > b __host_enter_for_panic
> > SYM_FUNC_END(__hyp_do_panic)
> > @@ -124,7 +125,7 @@ SYM_FUNC_END(__hyp_do_panic)
> > * Preserve x0-x4, which may contain stub parameters.
> > */
> > ldr x5, =__kvm_handle_stub_hvc
> > - kimg_pa x5, x6
> > + hyp_pa x5, x6
> > br x5
> > .L__vect_end\@:
> > .if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
> > --
> > 2.29.2.299.gdc1121823c-goog
> >