2020-12-09 13:20:30

by David Brazdil

[permalink] [raw]
Subject: [PATCH 0/9] KVM: arm64: Relocate absolute hyp VAs

nVHE hyp code is linked into the same kernel binary but executes under
different memory mappings. If the compiler of hyp code chooses absolute
addressing for accessing a symbol, the kernel linker will relocate that
address to a kernel image virtual address, causing a runtime exception.

So far the strategy has been to force PC-relative addressing by wrapping
all symbol references with the hyp_symbol_addr macro. This is error
prone and developer unfriendly.

The series adds a new build-time step for nVHE hyp object file where
positions targeted by R_AARCH64_ABS64 relocations are enumerated and
the information stored in a separate ELF section in the kernel image.
At runtime, the kernel first relocates all absolute addresses to their
actual virtual offset (eg. for KASLR), and then addresses listed in this
section are converted to hyp VAs.

The RFC of this series did not have a build-time step and instead relied
on filtering dynamic relocations at runtime. That approach does not work
if the kernel is built with !CONFIG_RELOCATABLE, hence an always-present
set of relocation positions was added.

The series is based on the current kvmarm/next (commit 3a514592b6) and
structured as follows:
* patch 1 is Jamie's fix of .hyp.data..percpu alignment; already in
master, not yet in kvmarm/next; included to avoid merge conflicts
* patches 2-3 make sure that all sections referred to by hyp code are
handled by the hyp linker script and prefixed with .hyp so they can
be identified by the build-time tool
* patches 4-6 contain the actual changes to identify and relocate VAs
* patches 7-8 fix existing code that assumes kernel VAs
* patch 9 removes the (now redundant) hyp_symbol_addr

The series is also available at:
https://android-kvm.googlesource.com/linux topic/hyp-reloc_v1

-David

David Brazdil (8):
KVM: arm64: Rename .idmap.text in hyp linker script
KVM: arm64: Set up .hyp.rodata ELF section
KVM: arm64: Add symbol at the beginning of each hyp section
KVM: arm64: Generate hyp relocation data
KVM: arm64: Apply hyp relocations at runtime
KVM: arm64: Fix constant-pool users in hyp
KVM: arm64: Remove patching of fn pointers in hyp
KVM: arm64: Remove hyp_symbol_addr

Jamie Iles (1):
KVM: arm64: Correctly align nVHE percpu data

arch/arm64/configs/defconfig | 1 +
arch/arm64/include/asm/hyp_image.h | 29 +-
arch/arm64/include/asm/kvm_asm.h | 20 --
arch/arm64/include/asm/kvm_mmu.h | 61 ++---
arch/arm64/include/asm/sections.h | 3 +-
arch/arm64/kernel/image-vars.h | 1 -
arch/arm64/kernel/smp.c | 4 +-
arch/arm64/kernel/vmlinux.lds.S | 18 +-
arch/arm64/kvm/arm.c | 7 +-
arch/arm64/kvm/hyp/include/hyp/switch.h | 4 +-
arch/arm64/kvm/hyp/nvhe/Makefile | 28 +-
arch/arm64/kvm/hyp/nvhe/gen-hyprel.c | 326 +++++++++++++++++++++++
arch/arm64/kvm/hyp/nvhe/host.S | 29 +-
arch/arm64/kvm/hyp/nvhe/hyp-init.S | 4 +-
arch/arm64/kvm/hyp/nvhe/hyp-main.c | 11 +-
arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 4 +-
arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 14 +-
arch/arm64/kvm/hyp/nvhe/psci-relay.c | 24 +-
arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 +-
arch/arm64/kvm/va_layout.c | 34 ++-
20 files changed, 495 insertions(+), 129 deletions(-)
create mode 100644 arch/arm64/kvm/hyp/nvhe/gen-hyprel.c

--
2.29.2.576.ga3fc446d84-goog


2020-12-09 13:21:26

by David Brazdil

[permalink] [raw]
Subject: [PATCH 3/9] KVM: arm64: Set up .hyp.rodata ELF section

We will need to recognize pointers in .rodata specific to hyp, so
establish a .hyp.rodata ELF section. Merge it with the existing
.hyp.data..ro_after_init as they are treated the same at runtime.

Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/include/asm/sections.h | 2 +-
arch/arm64/kernel/vmlinux.lds.S | 7 ++++---
arch/arm64/kvm/arm.c | 7 +++----
arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 4 +++-
4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 8ff579361731..a6f3557d1ab2 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -11,7 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[];
extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
extern char __hyp_text_start[], __hyp_text_end[];
-extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[];
+extern char __hyp_rodata_start[], __hyp_rodata_end[];
extern char __idmap_text_start[], __idmap_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __inittext_begin[], __inittext_end[];
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 43af13968dfd..f294f2048955 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -31,10 +31,11 @@ jiffies = jiffies_64;
__stop___kvm_ex_table = .;

#define HYPERVISOR_DATA_SECTIONS \
- HYP_SECTION_NAME(.data..ro_after_init) : { \
- __hyp_data_ro_after_init_start = .; \
+ HYP_SECTION_NAME(.rodata) : { \
+ __hyp_rodata_start = .; \
*(HYP_SECTION_NAME(.data..ro_after_init)) \
- __hyp_data_ro_after_init_end = .; \
+ *(HYP_SECTION_NAME(.rodata)) \
+ __hyp_rodata_end = .; \
}

#define HYPERVISOR_PERCPU_SECTION \
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 6e637d2b4cfb..c244e57f9cd9 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1745,11 +1745,10 @@ static int init_hyp_mode(void)
goto out_err;
}

- err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start),
- kvm_ksym_ref(__hyp_data_ro_after_init_end),
- PAGE_HYP_RO);
+ err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
+ kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
if (err) {
- kvm_err("Cannot map .hyp.data..ro_after_init section\n");
+ kvm_err("Cannot map .hyp.rodata section\n");
goto out_err;
}

diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
index 70ac48ccede7..cfdc59b4329b 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
@@ -14,6 +14,9 @@
SECTIONS {
HYP_SECTION(.idmap.text)
HYP_SECTION(.text)
+ HYP_SECTION(.data..ro_after_init)
+ HYP_SECTION(.rodata)
+
/*
* .hyp..data..percpu needs to be page aligned to maintain the same
* alignment for when linking into vmlinux.
@@ -22,5 +25,4 @@ SECTIONS {
HYP_SECTION_NAME(.data..percpu) : {
PERCPU_INPUT(L1_CACHE_BYTES)
}
- HYP_SECTION(.data..ro_after_init)
}
--
2.29.2.576.ga3fc446d84-goog

2020-12-09 13:21:36

by David Brazdil

[permalink] [raw]
Subject: [PATCH 4/9] KVM: arm64: Add symbol at the beginning of each hyp section

Generating hyp relocations will require referencing positions at a given
offset from the beginning of hyp sections. Since the final layout will
not be determined until the linking of `vmlinux`, modify the hyp linker
script to insert a symbol at the first byte of each hyp section to use
as an anchor. The linker of `vmlinux` will place the symbols together
with the sections.

Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/include/asm/hyp_image.h | 29 +++++++++++++++++++++++++++--
arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 4 ++--
2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/hyp_image.h b/arch/arm64/include/asm/hyp_image.h
index daa1a1da539e..65e8008da932 100644
--- a/arch/arm64/include/asm/hyp_image.h
+++ b/arch/arm64/include/asm/hyp_image.h
@@ -7,6 +7,9 @@
#ifndef __ARM64_HYP_IMAGE_H__
#define __ARM64_HYP_IMAGE_H__

+#define HYP_CONCAT(a, b) __HYP_CONCAT(a, b)
+#define __HYP_CONCAT(a, b) a ## b
+
/*
* KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_,
* to separate it from the kernel proper.
@@ -21,9 +24,31 @@
*/
#define HYP_SECTION_NAME(NAME) .hyp##NAME

+/* Symbol defined at the beginning of each hyp section. */
+#define HYP_SECTION_SYMBOL_NAME(NAME) \
+ HYP_CONCAT(__hyp_section_, HYP_SECTION_NAME(NAME))
+
+/*
+ * Helper to generate linker script statements starting a hyp section.
+ *
+ * A symbol with a well-known name is defined at the first byte. This
+ * is used as a base for hyp relocations (see gen-hyprel.c). It must
+ * be defined inside the section so the linker of `vmlinux` cannot
+ * separate it from the section data.
+ */
+#define BEGIN_HYP_SECTION(NAME) \
+ HYP_SECTION_NAME(NAME) : { \
+ HYP_SECTION_SYMBOL_NAME(NAME) = .;
+
+/* Helper to generate linker script statements ending a hyp section. */
+#define END_HYP_SECTION \
+ }
+
/* Defines an ELF hyp section from input section @NAME and its subsections. */
-#define HYP_SECTION(NAME) \
- HYP_SECTION_NAME(NAME) : { *(NAME NAME##.*) }
+#define HYP_SECTION(NAME) \
+ BEGIN_HYP_SECTION(NAME) \
+ *(NAME NAME##.*) \
+ END_HYP_SECTION

/*
* Defines a linker script alias of a kernel-proper symbol referenced by
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
index cfdc59b4329b..cd119d82d8e3 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
@@ -22,7 +22,7 @@ SECTIONS {
* alignment for when linking into vmlinux.
*/
. = ALIGN(PAGE_SIZE);
- HYP_SECTION_NAME(.data..percpu) : {
+ BEGIN_HYP_SECTION(.data..percpu)
PERCPU_INPUT(L1_CACHE_BYTES)
- }
+ END_HYP_SECTION
}
--
2.29.2.576.ga3fc446d84-goog

2020-12-09 13:22:09

by David Brazdil

[permalink] [raw]
Subject: [PATCH 7/9] KVM: arm64: Fix constant-pool users in hyp

Hyp code uses absolute addressing to obtain a kimg VA of a small number
of kernel symbols. Since the kernel now converts constant pool addresses
to hyp VAs, this trick does not work anymore.

Change the helpers to convert from hyp VA back to kimg VA or PA, as
needed and rework the callers accordingly.

Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/include/asm/kvm_mmu.h | 42 ++++++++++++------------------
arch/arm64/kvm/hyp/nvhe/host.S | 29 +++++++++++----------
arch/arm64/kvm/hyp/nvhe/hyp-init.S | 2 --
3 files changed, 31 insertions(+), 42 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6bbb44011c84..adadc468cc71 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -73,49 +73,39 @@ alternative_cb_end
.endm

/*
- * Convert a kernel image address to a PA
- * reg: kernel address to be converted in place
+ * Convert a hypervisor VA to a PA
+ * reg: hypervisor address to be converted in place
* tmp: temporary register
- *
- * The actual code generation takes place in kvm_get_kimage_voffset, and
- * the instructions below are only there to reserve the space and
- * perform the register allocation (kvm_get_kimage_voffset uses the
- * specific registers encoded in the instructions).
*/
-.macro kimg_pa reg, tmp
-alternative_cb kvm_get_kimage_voffset
- movz \tmp, #0
- movk \tmp, #0, lsl #16
- movk \tmp, #0, lsl #32
- movk \tmp, #0, lsl #48
-alternative_cb_end
-
- /* reg = __pa(reg) */
- sub \reg, \reg, \tmp
+.macro hyp_pa reg, tmp
+ ldr_l \tmp, hyp_physvirt_offset
+ add \reg, \reg, \tmp
.endm

/*
- * Convert a kernel image address to a hyp VA
- * reg: kernel address to be converted in place
+ * Convert a hypervisor VA to a kernel image address
+ * reg: hypervisor address to be converted in place
* tmp: temporary register
*
* The actual code generation takes place in kvm_get_kimage_voffset, and
* the instructions below are only there to reserve the space and
- * perform the register allocation (kvm_update_kimg_phys_offset uses the
+ * perform the register allocation (kvm_get_kimage_voffset uses the
* specific registers encoded in the instructions).
*/
-.macro kimg_hyp_va reg, tmp
-alternative_cb kvm_update_kimg_phys_offset
+.macro hyp_kimg_va reg, tmp
+ /* Convert hyp VA -> PA. */
+ hyp_pa \reg, \tmp
+
+ /* Load kimage_voffset. */
+alternative_cb kvm_get_kimage_voffset
movz \tmp, #0
movk \tmp, #0, lsl #16
movk \tmp, #0, lsl #32
movk \tmp, #0, lsl #48
alternative_cb_end

- sub \reg, \reg, \tmp
- mov_q \tmp, PAGE_OFFSET
- orr \reg, \reg, \tmp
- kern_hyp_va \reg
+ /* Convert PA -> kimg VA. */
+ add \reg, \reg, \tmp
.endm

#else
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index a820dfdc9c25..6585a7cbbc56 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -74,27 +74,28 @@ SYM_FUNC_END(__host_enter)
* void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
*/
SYM_FUNC_START(__hyp_do_panic)
- /* Load the format arguments into x1-7 */
- mov x6, x3
- get_vcpu_ptr x7, x3
-
- mrs x3, esr_el2
- mrs x4, far_el2
- mrs x5, hpfar_el2
-
/* Prepare and exit to the host's panic funciton. */
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, lr
ldr lr, =panic
+ hyp_kimg_va lr, x6
msr elr_el2, lr

- /*
- * Set the panic format string and enter the host, conditionally
- * restoring the host context.
- */
+ /* Set the panic format string. Use the, now free, LR as scratch. */
+ ldr lr, =__hyp_panic_string
+ hyp_kimg_va lr, x6
+
+ /* Load the format arguments into x1-7. */
+ mov x6, x3
+ get_vcpu_ptr x7, x3
+ mrs x3, esr_el2
+ mrs x4, far_el2
+ mrs x5, hpfar_el2
+
+ /* Enter the host, conditionally restoring the host context. */
cmp x0, xzr
- ldr x0, =__hyp_panic_string
+ mov x0, lr
b.eq __host_enter_without_restoring
b __host_enter_for_panic
SYM_FUNC_END(__hyp_do_panic)
@@ -124,7 +125,7 @@ SYM_FUNC_END(__hyp_do_panic)
* Preserve x0-x4, which may contain stub parameters.
*/
ldr x5, =__kvm_handle_stub_hvc
- kimg_pa x5, x6
+ hyp_pa x5, x6
br x5
.L__vect_end\@:
.if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 68fd64f2313e..99b408fe09ee 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -139,7 +139,6 @@ alternative_else_nop_endif

/* Set the host vector */
ldr x0, =__kvm_hyp_host_vector
- kimg_hyp_va x0, x1
msr vbar_el2, x0

ret
@@ -198,7 +197,6 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
/* Leave idmap. */
mov x0, x29
ldr x1, =kvm_host_psci_cpu_entry
- kimg_hyp_va x1, x2
br x1
SYM_CODE_END(__kvm_hyp_init_cpu)

--
2.29.2.576.ga3fc446d84-goog

2020-12-09 13:22:40

by David Brazdil

[permalink] [raw]
Subject: [PATCH 2/9] KVM: arm64: Rename .idmap.text in hyp linker script

So far hyp-init.S created a .hyp.idmap.text section directly, without
relying on the hyp linker script to prefix its name. Change it to create
.idmap.text and add a HYP_SECTION entry to hyp.lds.S. This way all .hyp*
sections go through the linker script and can be instrumented there.

Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/kvm/hyp/nvhe/hyp-init.S | 2 +-
arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 31b060a44045..68fd64f2313e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -18,7 +18,7 @@
#include <asm/virt.h>

.text
- .pushsection .hyp.idmap.text, "ax"
+ .pushsection .idmap.text, "ax"

.align 11

diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
index 1206d0d754d5..70ac48ccede7 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
@@ -12,6 +12,7 @@
#include <asm/memory.h>

SECTIONS {
+ HYP_SECTION(.idmap.text)
HYP_SECTION(.text)
/*
* .hyp..data..percpu needs to be page aligned to maintain the same
--
2.29.2.576.ga3fc446d84-goog

2020-12-09 13:24:49

by David Brazdil

[permalink] [raw]
Subject: [PATCH 1/9] KVM: arm64: Correctly align nVHE percpu data

From: Jamie Iles <[email protected]>

The nVHE percpu data is partially linked but the nVHE linker script did
not align the percpu section. The PERCPU_INPUT macro would then align
the data to a page boundary:

#define PERCPU_INPUT(cacheline) \
__per_cpu_start = .; \
*(.data..percpu..first) \
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
. = ALIGN(cacheline); \
*(.data..percpu..read_mostly) \
. = ALIGN(cacheline); \
*(.data..percpu) \
*(.data..percpu..shared_aligned) \
PERCPU_DECRYPTED_SECTION \
__per_cpu_end = .;

but then when the final vmlinux linking happens the hypervisor percpu
data is included after page alignment and so the offsets potentially
don't match. On my build I saw that the .hyp.data..percpu section was
at address 0x20 and then the percpu data would begin at 0x1000 (because
of the page alignment in PERCPU_INPUT), but when linked into vmlinux,
everything would be shifted down by 0x20 bytes.

This manifests as one of the CPUs getting lost when running
kvm-unit-tests or starting any VM and subsequent soft lockup on a Cortex
A72 device.

Fixes: 30c953911c43 ("kvm: arm64: Set up hyp percpu data for nVHE")
Signed-off-by: Jamie Iles <[email protected]>
Signed-off-by: Marc Zyngier <[email protected]>
Acked-by: David Brazdil <[email protected]>
Cc: David Brazdil <[email protected]>
Cc: Marc Zyngier <[email protected]>
Cc: Will Deacon <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
index 5d76ff2ba63e..1206d0d754d5 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
@@ -13,6 +13,11 @@

SECTIONS {
HYP_SECTION(.text)
+ /*
+ * .hyp..data..percpu needs to be page aligned to maintain the same
+ * alignment for when linking into vmlinux.
+ */
+ . = ALIGN(PAGE_SIZE);
HYP_SECTION_NAME(.data..percpu) : {
PERCPU_INPUT(L1_CACHE_BYTES)
}
--
2.29.2.576.ga3fc446d84-goog

2020-12-09 13:25:39

by David Brazdil

[permalink] [raw]
Subject: [PATCH 5/9] KVM: arm64: Generate hyp relocation data

Add a post-processing step to compilation of KVM nVHE hyp code which
calls a custom host tool (gen-hyprel) on the partially linked object
file (hyp sections' names prefixed).

The tool lists all R_AARCH64_ABS64 data relocations targeting hyp
sections and generates an assembly file that will form a new section
.hyp.reloc in the kernel binary. The new section contains an array of
32-bit offsets to the positions targeted by these relocations.

Since these addresses of those positions will not be determined until
linking of `vmlinux`, each 32-bit entry carries a R_AARCH64_PREL32
relocation with addend <section_base_sym> + <r_offset>. The linker of
`vmlinux` will therefore fill the slot accordingly.

This relocation data will be used at runtime to convert the kernel VAs
at those positions to hyp VAs.

Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/kernel/vmlinux.lds.S | 11 +
arch/arm64/kvm/hyp/nvhe/Makefile | 28 ++-
arch/arm64/kvm/hyp/nvhe/gen-hyprel.c | 326 +++++++++++++++++++++++++++
3 files changed, 362 insertions(+), 3 deletions(-)
create mode 100644 arch/arm64/kvm/hyp/nvhe/gen-hyprel.c

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index f294f2048955..93cef9607c0e 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -43,10 +43,19 @@ jiffies = jiffies_64;
HYP_SECTION_NAME(.data..percpu) : { \
*(HYP_SECTION_NAME(.data..percpu)) \
}
+
+#define HYPERVISOR_RELOC_SECTION \
+ .hyp.reloc : ALIGN(4) { \
+ __hyp_reloc_begin = .; \
+ *(.hyp.reloc) \
+ __hyp_reloc_end = .; \
+ }
+
#else /* CONFIG_KVM */
#define HYPERVISOR_EXTABLE
#define HYPERVISOR_DATA_SECTIONS
#define HYPERVISOR_PERCPU_SECTION
+#define HYPERVISOR_RELOC_SECTION
#endif

#define HYPERVISOR_TEXT \
@@ -219,6 +228,8 @@ SECTIONS
PERCPU_SECTION(L1_CACHE_BYTES)
HYPERVISOR_PERCPU_SECTION

+ HYPERVISOR_RELOC_SECTION
+
.rela.dyn : ALIGN(8) {
*(.rela .rela*)
}
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 1f1e351c5fe2..268be1376f74 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -6,6 +6,8 @@
asflags-y := -D__KVM_NVHE_HYPERVISOR__
ccflags-y := -D__KVM_NVHE_HYPERVISOR__

+hostprogs := gen-hyprel
+
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
@@ -19,7 +21,7 @@ obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \

hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y))
obj-y := kvm_nvhe.o
-extra-y := $(hyp-obj) kvm_nvhe.tmp.o hyp.lds
+extra-y := $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o

# 1) Compile all source files to `.nvhe.o` object files. The file extension
# avoids file name clashes for files shared with VHE.
@@ -42,11 +44,31 @@ LDFLAGS_kvm_nvhe.tmp.o := -r -T
$(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE
$(call if_changed,ld)

-# 4) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
+# 4) Generate list of hyp code/data positions that need to be relocated at
+# runtime. Because the hypervisor is part of the kernel binary, relocations
+# produce a kernel VA. We enumerate relocations targeting hyp at build time
+# and convert the kernel VAs at those positions to hyp VAs.
+$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel
+ $(call if_changed,hyprel)
+
+# 5) Compile hyp-reloc.S and link it into the existing partially linked object.
+# The object file now contains a section with pointers to hyp positions that
+# will contain kernel VAs at runtime. These pointers have relocations on them
+# so that they get updated as the hyp object is linked into `vmlinux`.
+LDFLAGS_kvm_nvhe.rel.o := -r
+$(obj)/kvm_nvhe.rel.o: $(obj)/kvm_nvhe.tmp.o $(obj)/hyp-reloc.o FORCE
+ $(call if_changed,ld)
+
+# 6) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
# Prefixes names of ELF symbols with '__kvm_nvhe_'.
-$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.tmp.o FORCE
+$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.rel.o FORCE
$(call if_changed,hypcopy)

+# The HYPREL command calls `gen-hyprel` to generate an assembly file with
+# a list of relocations targeting hyp code/data.
+quiet_cmd_hyprel = HYPREL $@
+ cmd_hyprel = $(obj)/gen-hyprel $< > $@
+
# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names
# to avoid clashes with VHE code/data.
quiet_cmd_hypcopy = HYPCOPY $@
diff --git a/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
new file mode 100644
index 000000000000..c8423eea73e4
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: David Brazdil <[email protected]>
+ *
+ * Generates relocation information used by the kernel to convert
+ * absolute addresses in hyp data from kernel VAs to hyp VAs.
+ *
+ * This is necessary because hyp code is linked into the same binary
+ * as the kernel but executes under different memory mappings.
+ * If the compiler used absolute addressing, those addresses need to
+ * be converted before they are used by hyp code.
+ *
+ * The input of this program is the relocatable ELF object containing
+ * all hyp code/data, not yet linked into vmlinux. Hyp section names
+ * should have been prefixed with `.hyp` at this point.
+ *
+ * The output (printed to stdout) is an assembly file containing
+ * an array of 32-bit integers and static relocations that instruct
+ * the linker of `vmlinux` to populate the array entries with offsets
+ * to positions in the kernel binary containing VAs used by hyp code.
+ *
+ * Note that dynamic relocations could be used for the same purpose.
+ * However, those are only generated if CONFIG_RELOCATABLE=y.
+ */
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#define HYP_SECTION_PREFIX ".hyp"
+#define HYP_RELOC_SECTION ".hyp.reloc"
+#define HYP_SECTION_SYMBOL_PREFIX "__hyp_section_"
+
+static struct {
+ const char *path;
+ char *begin;
+ size_t size;
+ Elf64_Ehdr *ehdr;
+ Elf64_Shdr *sh_table;
+ const char *sh_string;
+} elf;
+
+static size_t asm_reloc_offset;
+
+#define fatal_error(fmt, ...) \
+ ({ \
+ fprintf(stderr, "error: %s: " fmt "\n", \
+ elf.path, ## __VA_ARGS__); \
+ exit(EXIT_FAILURE); \
+ __builtin_unreachable(); \
+ })
+
+#define fatal_perror(msg) \
+ ({ \
+ fprintf(stderr, "error: %s: " msg ": %s\n", \
+ elf.path, strerror(errno)); \
+ exit(EXIT_FAILURE); \
+ __builtin_unreachable(); \
+ })
+
+#define assert_op(lhs, rhs, fmt, op) \
+ ({ \
+ typeof(lhs) _lhs = (lhs); \
+ typeof(rhs) _rhs = (rhs); \
+ \
+ if (!(_lhs op _rhs)) { \
+ fatal_error("assertion " #lhs " " #op " " #rhs \
+ " failed (lhs=" fmt ", rhs=" fmt \
+ ", line=%d)", _lhs, _rhs, __LINE__); \
+ } \
+ })
+
+#define assert_eq(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, ==)
+#define assert_ne(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, !=)
+#define assert_lt(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, <)
+#define assert_ge(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, >=)
+
+/*
+ * Return a pointer of a given type at a given offset from
+ * the beginning of the ELF file.
+ */
+#define elf_ptr(type, off) ((type *)(elf.begin + (off)))
+
+/* Iterate over all sections in the ELF. */
+#define for_each_section(var) \
+ for (var = elf.sh_table; var < elf.sh_table + elf.ehdr->e_shnum; ++var)
+
+/* Iterate over all Elf64_Rela relocations in a given section. */
+#define for_each_rela(shdr, var) \
+ for (var = elf_ptr(Elf64_Rela, shdr->sh_offset); \
+ var < elf_ptr(Elf64_Rela, shdr->sh_offset + shdr->sh_size); var++)
+
+/* True if a string starts with a given prefix. */
+static inline bool starts_with(const char *str, const char *prefix)
+{
+ return memcmp(str, prefix, strlen(prefix)) == 0;
+}
+
+/* Returns a string containing the name of a given section. */
+static inline const char *section_name(Elf64_Shdr *shdr)
+{
+ return elf.sh_string + shdr->sh_name;
+}
+
+/* Returns a pointer to the first byte of section data. */
+static inline const char *section_begin(Elf64_Shdr *shdr)
+{
+ return elf_ptr(char, shdr->sh_offset);
+}
+
+/* Find a section by its offset from the beginning of the file. */
+static inline Elf64_Shdr *section_by_off(Elf64_Off off)
+{
+ assert_ne(off, 0UL, "%lu");
+ return elf_ptr(Elf64_Shdr, off);
+}
+
+/* Find a section by its index. */
+static inline Elf64_Shdr *section_by_idx(uint16_t idx)
+{
+ assert_ne(idx, SHN_UNDEF, "%u");
+ return &elf.sh_table[idx];
+}
+
+/*
+ * Memory-map the given ELF file, perform sanity checks, and
+ * populate global state.
+ */
+static void init_elf(const char *path)
+{
+ int fd, ret;
+ struct stat stat;
+
+ /* Store path in the global struct for error printing. */
+ elf.path = path;
+
+ /* Open the ELF file. */
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ fatal_perror("Could not open ELF file");
+
+ /* Get status of ELF file to obtain its size. */
+ ret = fstat(fd, &stat);
+ if (ret < 0) {
+ close(fd);
+ fatal_perror("Could not get status of ELF file");
+ }
+
+ /* mmap() the entire ELF file read-only at an arbitrary address. */
+ elf.begin = mmap(0, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (elf.begin == MAP_FAILED) {
+ close(fd);
+ fatal_perror("Could not mmap ELF file");
+ }
+
+ /* mmap() was successful, close the FD. */
+ close(fd);
+
+ /* Get pointer to the ELF header. */
+ assert_ge(stat.st_size, sizeof(*elf.ehdr), "%lu");
+ elf.ehdr = elf_ptr(Elf64_Ehdr, 0);
+
+ /* Check the ELF magic. */
+ assert_eq(elf.ehdr->e_ident[EI_MAG0], ELFMAG0, "0x%x");
+ assert_eq(elf.ehdr->e_ident[EI_MAG1], ELFMAG1, "0x%x");
+ assert_eq(elf.ehdr->e_ident[EI_MAG2], ELFMAG2, "0x%x");
+ assert_eq(elf.ehdr->e_ident[EI_MAG3], ELFMAG3, "0x%x");
+
+ /* Sanity check that this is an ELF64 relocatable object for Aarch64. */
+ assert_eq(elf.ehdr->e_ident[EI_CLASS], ELFCLASS64, "%u");
+ assert_eq(elf.ehdr->e_ident[EI_DATA], ELFDATA2LSB, "%u");
+ assert_eq(elf.ehdr->e_type, ET_REL, "%u");
+ assert_eq(elf.ehdr->e_machine, EM_AARCH64, "%u");
+
+ /* Populate fields of the global struct. */
+ elf.sh_table = section_by_off(elf.ehdr->e_shoff);
+ elf.sh_string = section_begin(section_by_idx(elf.ehdr->e_shstrndx));
+}
+
+/* Print the prologue of the output ASM file. */
+static void emit_prologue(void)
+{
+ printf(".data\n"
+ ".pushsection " HYP_RELOC_SECTION ", \"a\"\n");
+}
+
+/*
+ * Print ASM statements to create a hyp relocation entry for a given
+ * R_AARCH64_ABS64 relocation.
+ *
+ * The linker of vmlinux will populate the position given by `rela` with
+ * an absolute 64-bit kernel VA. If the kernel is relocatable, it will
+ * also generate a dynamic relocation entry so that the kernel can shift
+ * the address at runtime for KASLR.
+ *
+ * Emit a 32-bit offset from the current address to the position given
+ * by `rela`. This way the kernel can iterate over all kernel VAs used
+ * by hyp at runtime and convert them to hyp VAs. However, that offset
+ * will not be known until linking of `vmlinux`, so emit a PREL32
+ * relocation referencing a symbol that the hyp linker script put at
+ * the beginning of the relocated section + the offset from `rela`.
+ */
+static void emit_rela_abs64(Elf64_Rela *rela, const char *sh_orig_name)
+{
+ /* Create storage for the 32-bit offset. */
+ printf(".word 0\n");
+
+ /*
+ * Create a PREL32 relocation which instructs the linker of `vmlinux`
+ * to insert offset to position <base> + <offset>, where <base> is
+ * a symbol at the beginning of the relocated section, and <offset>
+ * is `rela->r_offset`.
+ */
+ printf(".reloc %lu, R_AARCH64_PREL32, %s%s + 0x%lx\n",
+ asm_reloc_offset, HYP_SECTION_SYMBOL_PREFIX, sh_orig_name,
+ rela->r_offset);
+ asm_reloc_offset += 4;
+}
+
+/* Print the epilogue of the output ASM file. */
+static void emit_epilogue(void)
+{
+ printf(".popsection\n");
+}
+
+/*
+ * Iterate over all RELA relocations in a given section and emit
+ * hyp relocation data for all absolute addresses in hyp code/data.
+ *
+ * Static relocations that generate PC-relative-addressing are ignored.
+ */
+static void emit_rela_section(Elf64_Shdr *sh_rela)
+{
+ Elf64_Shdr *sh_orig = &elf.sh_table[sh_rela->sh_info];
+ const char *sh_orig_name = section_name(sh_orig);
+ Elf64_Rela *rela;
+
+ /* Skip all non-hyp sections. */
+ if (!starts_with(sh_orig_name, HYP_SECTION_PREFIX))
+ return;
+
+ for_each_rela(sh_rela, rela) {
+ uint32_t type = (uint32_t)rela->r_info;
+
+ /* Check that rela points inside the relocated section. */
+ assert_lt(rela->r_offset, sh_orig->sh_size, "0x%lx");
+
+ switch (type) {
+ /* Relocations to generate PC-relative addressing. */
+ case R_AARCH64_LD_PREL_LO19:
+ case R_AARCH64_ADR_PREL_LO21:
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ case R_AARCH64_LDST128_ABS_LO12_NC:
+ break;
+ /* Relocations for control-flow instructions. */
+ case R_AARCH64_TSTBR14:
+ case R_AARCH64_CONDBR19:
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26:
+ break;
+ /* Group relocations to create PC-relative offset inline. */
+ case R_AARCH64_MOVW_PREL_G0:
+ case R_AARCH64_MOVW_PREL_G0_NC:
+ case R_AARCH64_MOVW_PREL_G1:
+ case R_AARCH64_MOVW_PREL_G1_NC:
+ case R_AARCH64_MOVW_PREL_G2:
+ case R_AARCH64_MOVW_PREL_G2_NC:
+ case R_AARCH64_MOVW_PREL_G3:
+ break;
+ /* Data relocations to generate absolute addressing. */
+ case R_AARCH64_ABS64:
+ emit_rela_abs64(rela, sh_orig_name);
+ break;
+ default:
+ fatal_error("Unexpected RELA type %u", type);
+ }
+ }
+}
+
+/* Iterate over all sections and emit hyp relocation data for RELA sections. */
+static void emit_all_relocs(void)
+{
+ Elf64_Shdr *shdr;
+
+ for_each_section(shdr) {
+ switch (shdr->sh_type) {
+ case SHT_REL:
+ fatal_error("Unexpected SHT_REL section \"%s\"",
+ section_name(shdr));
+ case SHT_RELA:
+ emit_rela_section(shdr);
+ break;
+ }
+ }
+}
+
+int main(int argc, const char **argv)
+{
+ if(argc != 2) {
+ fprintf(stderr, "Usage: %s <elf_input>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ init_elf(argv[1]);
+
+ emit_prologue();
+ emit_all_relocs();
+ emit_epilogue();
+
+ return EXIT_SUCCESS;
+}
--
2.29.2.576.ga3fc446d84-goog

2020-12-09 14:51:45

by David Brazdil

[permalink] [raw]
Subject: [PATCH 8/9] KVM: arm64: Remove patching of fn pointers in hyp

Storing a function pointer in hyp now generates relocation information
used at early boot to convert the address to hyp VA. The existing
alternative-based conversion mechanism is therefore obsolete. Remove it
and simplify its users.

Acked-by: Ard Biesheuvel <[email protected]>
Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/include/asm/kvm_mmu.h | 18 ------------------
arch/arm64/kernel/image-vars.h | 1 -
arch/arm64/kvm/hyp/nvhe/hyp-main.c | 11 ++++-------
arch/arm64/kvm/va_layout.c | 6 ------
4 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index adadc468cc71..90873851f677 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -135,24 +135,6 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)

#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))

-static __always_inline unsigned long __kimg_hyp_va(unsigned long v)
-{
- unsigned long offset;
-
- asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
- "movk %0, #0, lsl #16\n"
- "movk %0, #0, lsl #32\n"
- "movk %0, #0, lsl #48\n",
- kvm_update_kimg_phys_offset)
- : "=r" (offset));
-
- return __kern_hyp_va((v - offset) | PAGE_OFFSET);
-}
-
-#define kimg_fn_hyp_va(v) ((typeof(*v))(__kimg_hyp_va((unsigned long)(v))))
-
-#define kimg_fn_ptr(x) (typeof(x) **)(x)
-
/*
* We currently support using a VM-specified IPA size. For backward
* compatibility, the default IPA size is fixed to 40bits.
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 39289d75118d..3242502f45fa 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -64,7 +64,6 @@ __efistub__ctype = _ctype;
/* Alternative callbacks for init-time patching of nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_patch_vector_branch);
KVM_NVHE_ALIAS(kvm_update_va_mask);
-KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset);
KVM_NVHE_ALIAS(kvm_get_kimage_voffset);

/* Global kernel state accessed by nVHE hyp code. */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index bde658d51404..0cf4b750a090 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -108,9 +108,9 @@ static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)

typedef void (*hcall_t)(struct kvm_cpu_context *);

-#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x)
+#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x

-static const hcall_t *host_hcall[] = {
+static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
@@ -130,7 +130,6 @@ static const hcall_t *host_hcall[] = {
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned long, id, host_ctxt, 0);
- const hcall_t *kfn;
hcall_t hfn;

id -= KVM_HOST_SMCCC_ID(0);
@@ -138,13 +137,11 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
if (unlikely(id >= ARRAY_SIZE(host_hcall)))
goto inval;

- kfn = host_hcall[id];
- if (unlikely(!kfn))
+ hfn = host_hcall[id];
+ if (unlikely(!hfn))
goto inval;

cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS;
-
- hfn = kimg_fn_hyp_va(kfn);
hfn(host_ctxt);

return;
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index fb2ca02b7270..e0021ba960b5 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -284,12 +284,6 @@ static void generate_mov_q(u64 val, __le32 *origptr, __le32 *updptr, int nr_inst
*updptr++ = cpu_to_le32(insn);
}

-void kvm_update_kimg_phys_offset(struct alt_instr *alt,
- __le32 *origptr, __le32 *updptr, int nr_inst)
-{
- generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst);
-}
-
void kvm_get_kimage_voffset(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst)
{
--
2.29.2.576.ga3fc446d84-goog

2020-12-09 14:51:45

by David Brazdil

[permalink] [raw]
Subject: [PATCH 6/9] KVM: arm64: Apply hyp relocations at runtime

KVM nVHE code runs under a different VA mapping than the kernel, hence
so far it avoided using absolute addressing because the VA in a constant
pool is relocated by the linker to a kernel VA (see hyp_symbol_addr).

Now the kernel has access to a list of positions that contain a kimg VA
but will be accessed only in hyp execution context. These are generated
by the gen-hyprel build-time tool and stored in .hyp.reloc.

Add early boot pass over the entries and convert the kimg VAs to hyp VAs.
Note that this requires for .hyp* ELF sections to be mapped read-write
at that point.

Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/configs/defconfig | 1 +
arch/arm64/include/asm/kvm_mmu.h | 1 +
arch/arm64/include/asm/sections.h | 1 +
arch/arm64/kernel/smp.c | 4 +++-
arch/arm64/kvm/va_layout.c | 28 ++++++++++++++++++++++++++++
5 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 5cfe3cf6f2ac..73fc9f2f2661 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1092,3 +1092,4 @@ CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_PREEMPT is not set
# CONFIG_FTRACE is not set
CONFIG_MEMTEST=y
+# CONFIG_ARM64_VHE is not set
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index e52d82aeadca..6bbb44011c84 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -129,6 +129,7 @@ alternative_cb_end
void kvm_update_va_mask(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
void kvm_compute_layout(void);
+void kvm_apply_hyp_relocations(void);

static __always_inline unsigned long __kern_hyp_va(unsigned long v)
{
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index a6f3557d1ab2..2f36b16a5b5d 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -12,6 +12,7 @@ extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
extern char __hyp_text_start[], __hyp_text_end[];
extern char __hyp_rodata_start[], __hyp_rodata_end[];
+extern char __hyp_reloc_begin[], __hyp_reloc_end[];
extern char __idmap_text_start[], __idmap_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __inittext_begin[], __inittext_end[];
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 18e9727d3f64..47142395bc91 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
"CPU: CPUs started in inconsistent modes");
else
pr_info("CPU: All CPU(s) started at EL1\n");
- if (IS_ENABLED(CONFIG_KVM))
+ if (IS_ENABLED(CONFIG_KVM)) {
kvm_compute_layout();
+ kvm_apply_hyp_relocations();
+ }
}

void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index d8cc51bd60bf..fb2ca02b7270 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -82,6 +82,34 @@ __init void kvm_compute_layout(void)
init_hyp_physvirt_offset();
}

+/*
+ * The .hyp.reloc ELF section contains a list of kimg positions that
+ * contains kimg VAs but will be accessed only in hyp execution context.
+ * Convert them to hyp VAs. See gen-hyprel.c for more details.
+ */
+__init void kvm_apply_hyp_relocations(void)
+{
+ int32_t *rel;
+ int32_t *begin = (int32_t*)__hyp_reloc_begin;
+ int32_t *end = (int32_t*)__hyp_reloc_end;
+
+ for (rel = begin; rel < end; ++rel) {
+ uintptr_t *ptr, kimg_va;
+
+ /*
+ * Each entry contains a 32-bit relative offset from itself
+ * to a kimg VA position.
+ */
+ ptr = (uintptr_t*)lm_alias((char*)rel + *rel);
+
+ /* Read the kimg VA value at the relocation address. */
+ kimg_va = *ptr;
+
+ /* Convert to hyp VA and store back to the relocation address. */
+ *ptr = __early_kern_hyp_va((uintptr_t)lm_alias(kimg_va));
+ }
+}
+
static u32 compute_instruction(int n, u32 rd, u32 rn)
{
u32 insn = AARCH64_BREAK_FAULT;
--
2.29.2.576.ga3fc446d84-goog

2020-12-09 14:51:46

by David Brazdil

[permalink] [raw]
Subject: [PATCH 9/9] KVM: arm64: Remove hyp_symbol_addr

Hyp code used the hyp_symbol_addr helper to force PC-relative addressing
because absolute addressing results in kernel VAs due to the way hyp
code is linked. This is not true anymore, so remove the helper and
update all of its users.

Acked-by: Ard Biesheuvel <[email protected]>
Signed-off-by: David Brazdil <[email protected]>
---
arch/arm64/include/asm/kvm_asm.h | 20 --------------------
arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++--
arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 4 ++--
arch/arm64/kvm/hyp/nvhe/psci-relay.c | 24 ++++++++++++------------
arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 +-
5 files changed, 17 insertions(+), 37 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 7ccf770c53d9..22d933e9b59e 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -199,26 +199,6 @@ extern void __vgic_v3_init_lrs(void);

extern u32 __kvm_get_mdcr_el2(void);

-/*
- * Obtain the PC-relative address of a kernel symbol
- * s: symbol
- *
- * The goal of this macro is to return a symbol's address based on a
- * PC-relative computation, as opposed to a loading the VA from a
- * constant pool or something similar. This works well for HYP, as an
- * absolute VA is guaranteed to be wrong. Only use this if trying to
- * obtain the address of a symbol (i.e. not something you obtained by
- * following a pointer).
- */
-#define hyp_symbol_addr(s) \
- ({ \
- typeof(s) *addr; \
- asm("adrp %0, %1\n" \
- "add %0, %0, :lo12:%1\n" \
- : "=r" (addr) : "S" (&s)); \
- addr; \
- })
-
#define __KVM_EXTABLE(from, to) \
" .pushsection __kvm_ex_table, \"a\"\n" \
" .align 3\n" \
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 84473574c2e7..54f4860cd87c 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -505,8 +505,8 @@ static inline void __kvm_unexpected_el2_exception(void)
struct exception_table_entry *entry, *end;
unsigned long elr_el2 = read_sysreg(elr_el2);

- entry = hyp_symbol_addr(__start___kvm_ex_table);
- end = hyp_symbol_addr(__stop___kvm_ex_table);
+ entry = &__start___kvm_ex_table;
+ end = &__stop___kvm_ex_table;

while (entry < end) {
addr = (unsigned long)&entry->insn + entry->insn;
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
index cbab0c6246e2..2048725517f8 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
@@ -33,8 +33,8 @@ unsigned long __hyp_per_cpu_offset(unsigned int cpu)
if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base))
hyp_panic();

- cpu_base_array = (unsigned long *)hyp_symbol_addr(kvm_arm_hyp_percpu_base);
+ cpu_base_array = (unsigned long *)&kvm_arm_hyp_percpu_base;
this_cpu_base = kern_hyp_va(cpu_base_array[cpu]);
- elf_base = (unsigned long)hyp_symbol_addr(__per_cpu_start);
+ elf_base = (unsigned long)&__per_cpu_start;
return this_cpu_base - elf_base;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index 08dc9de69314..746fb7079581 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -151,8 +151,8 @@ static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
if (cpu_id == INVALID_CPU_ID)
return PSCI_RET_INVALID_PARAMS;

- boot_args = per_cpu_ptr(hyp_symbol_addr(cpu_on_args), cpu_id);
- init_params = per_cpu_ptr(hyp_symbol_addr(kvm_init_params), cpu_id);
+ boot_args = per_cpu_ptr(&cpu_on_args, cpu_id);
+ init_params = per_cpu_ptr(&kvm_init_params, cpu_id);

/* Check if the target CPU is already being booted. */
if (!try_acquire_boot_args(boot_args))
@@ -163,7 +163,7 @@ static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
wmb();

ret = psci_call(func_id, mpidr,
- __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_entry)),
+ __hyp_pa(&kvm_hyp_cpu_entry),
__hyp_pa(init_params));

/* If successful, the lock will be released by the target CPU. */
@@ -182,8 +182,8 @@ static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
struct psci_boot_args *boot_args;
struct kvm_nvhe_init_params *init_params;

- boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
- init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
+ boot_args = this_cpu_ptr(&suspend_args);
+ init_params = this_cpu_ptr(&kvm_init_params);

/*
* No need to acquire a lock before writing to boot_args because a core
@@ -197,7 +197,7 @@ static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
* point if it is a deep sleep state.
*/
return psci_call(func_id, power_state,
- __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)),
+ __hyp_pa(&kvm_hyp_cpu_resume),
__hyp_pa(init_params));
}

@@ -209,8 +209,8 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
struct psci_boot_args *boot_args;
struct kvm_nvhe_init_params *init_params;

- boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
- init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
+ boot_args = this_cpu_ptr(&suspend_args);
+ init_params = this_cpu_ptr(&kvm_init_params);

/*
* No need to acquire a lock before writing to boot_args because a core
@@ -221,7 +221,7 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)

/* Will only return on error. */
return psci_call(func_id,
- __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)),
+ __hyp_pa(&kvm_hyp_cpu_resume),
__hyp_pa(init_params), 0);
}

@@ -230,12 +230,12 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on)
struct psci_boot_args *boot_args;
struct kvm_cpu_context *host_ctxt;

- host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt;
+ host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;

if (is_cpu_on)
- boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args));
+ boot_args = this_cpu_ptr(&cpu_on_args);
else
- boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
+ boot_args = this_cpu_ptr(&suspend_args);

cpu_reg(host_ctxt, 0) = boot_args->r0;
write_sysreg_el2(boot_args->pc, SYS_ELR);
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index 8f0585640241..87a54375bd6e 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -64,7 +64,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
}

rd = kvm_vcpu_dabt_get_rd(vcpu);
- addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va;
+ addr = kvm_vgic_global_state.vcpu_hyp_va;
addr += fault_ipa - vgic->vgic_cpu_base;

if (kvm_vcpu_dabt_iswrite(vcpu)) {
--
2.29.2.576.ga3fc446d84-goog