2015-07-08 16:19:36

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 00/13] arm64: Virtualization Host Extension support

ARMv8.1 comes with the "Virtualization Host Extension" (VHE for
short), which enables simpler support of Type-2 hypervisors.

This extension allows the kernel to directly run at EL2, and
significantly reduces the number of system registers shared between
host and guest, reducing the overhead of virtualization.

In order to have the same kernel binary running on all versions of the
architecture, this series makes heavy use of runtime code patching.

The first ten patches massage the KVM code to deal with VHE and enable
Linux to run at EL2.

The next patch catches an ugly case when VHE capable CPUs are paired
with some of their less capable siblings. This should never happen,
but hey...

The last two patches add an optimisation allowing a physical interrupt
to be serviced on the host without doing a full save/restore, leading
to potential reduction in interrupt latency.

This has been tested on the FVP_Base_SLV-V8-A model, and based on
v4.2-rc1. I've put a branch out on:

git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git kvm-arm64/vhe

Marc Zyngier (13):
arm/arm64: Add new is_kernel_in_hyp_mode predicate
arm64: Allow the arch timer to use the HYP timer
arm64: Add ARM64_HAS_VIRT_HOST_EXTN feature
arm64: KVM: skip HYP setup when already running in HYP
arm64: KVM: VHE: macroize VTCR_EL2 setup
arm64: KVM: VHE: Patch out kern_hyp_va
arm64: KVM: VHE: Patch out use of HVC
arm64: KVM: VHE: Preserve VHE config in world switch
arm64: KVM: VHE: Add alternatives for VHE-enabled world-switch
arm64: Add support for running Linux in EL2 mode
arm64: Panic when VHE and non VHE CPUs coexist
arm64: KVM: Split sysreg save/restore
arm64: KVM: VHE: Early interrupt handling

arch/arm/include/asm/virt.h | 5 +
arch/arm/kvm/arm.c | 134 ++++++++-----
arch/arm/kvm/mmu.c | 6 +
arch/arm64/include/asm/cpufeature.h | 3 +-
arch/arm64/include/asm/kvm_arm.h | 1 +
arch/arm64/include/asm/kvm_asm.h | 40 +++-
arch/arm64/include/asm/kvm_emulate.h | 2 +
arch/arm64/include/asm/kvm_mmu.h | 24 ++-
arch/arm64/include/asm/virt.h | 25 +++
arch/arm64/kernel/cpufeature.c | 11 ++
arch/arm64/kernel/head.S | 38 +++-
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kvm/hyp-init.S | 9 +-
arch/arm64/kvm/hyp.S | 363 ++++++++++++++++++++++++-----------
arch/arm64/kvm/vgic-v2-switch.S | 19 +-
arch/arm64/kvm/vgic-v3-switch.S | 33 ++--
arch/arm64/kvm/vhe-macros.h | 54 ++++++
drivers/clocksource/arm_arch_timer.c | 96 +++++----
18 files changed, 638 insertions(+), 229 deletions(-)
create mode 100644 arch/arm64/kvm/vhe-macros.h

--
2.1.4


2015-07-08 16:19:32

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 01/13] arm/arm64: Add new is_kernel_in_hyp_mode predicate

With ARMv8.1 VHE extension, it will be possible to run the kernel
at EL2 (aka HYP mode). In order for the kernel to easily find out
where it is running, add a new predicate that returns whether or
not the kernel is in HYP mode.

For completeness, the 32bit code also get such a predicate (always
returning false) so that code common to both architecture (timers,
KVM) can use it transparently.

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm/include/asm/virt.h | 5 +++++
arch/arm64/include/asm/virt.h | 10 ++++++++++
2 files changed, 15 insertions(+)

diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h
index 4371f45..b6a3cef 100644
--- a/arch/arm/include/asm/virt.h
+++ b/arch/arm/include/asm/virt.h
@@ -74,6 +74,11 @@ static inline bool is_hyp_mode_mismatched(void)
{
return !!(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH);
}
+
+static inline bool is_kernel_in_hyp_mode(void)
+{
+ return false;
+}
#endif

#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 7a5df52..9f22dd6 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -23,6 +23,8 @@

#ifndef __ASSEMBLY__

+#include <asm/ptrace.h>
+
/*
* __boot_cpu_mode records what mode CPUs were booted in.
* A correctly-implemented bootloader must start all CPUs in the same mode:
@@ -50,6 +52,14 @@ static inline bool is_hyp_mode_mismatched(void)
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
}

+static inline bool is_kernel_in_hyp_mode(void)
+{
+ u64 el;
+
+ asm("mrs %0, CurrentEL" : "=r" (el));
+ return el == CurrentEL_EL2;
+}
+
/* The section containing the hypervisor text */
extern char __hyp_text_start[];
extern char __hyp_text_end[];
--
2.1.4

2015-07-08 16:23:30

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 02/13] arm64: Allow the arch timer to use the HYP timer

With the ARMv8.1 VHE, the kernel can run in HYP mode, and thus
use the HYP timer instead of the normal guest timer in a mostly
transparent way, except for the interrupt line.

This patch reworks the arch timer code to allow the selection of
the HYP PPI, possibly falling back to the guest timer if not
available.

Signed-off-by: Marc Zyngier <[email protected]>
---
drivers/clocksource/arm_arch_timer.c | 96 ++++++++++++++++++++++--------------
1 file changed, 59 insertions(+), 37 deletions(-)

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 0aa135d..459d329 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -67,7 +67,7 @@ static int arch_timer_ppi[MAX_TIMER_PPI];

static struct clock_event_device __percpu *arch_timer_evt;

-static bool arch_timer_use_virtual = true;
+static enum ppi_nr arch_timer_uses_ppi = VIRT_PPI;
static bool arch_timer_c3stop;
static bool arch_timer_mem_use_virtual;

@@ -271,14 +271,20 @@ static void __arch_timer_setup(unsigned type,
clk->name = "arch_sys_timer";
clk->rating = 450;
clk->cpumask = cpumask_of(smp_processor_id());
- if (arch_timer_use_virtual) {
- clk->irq = arch_timer_ppi[VIRT_PPI];
+ clk->irq = arch_timer_ppi[arch_timer_uses_ppi];
+ switch (arch_timer_uses_ppi) {
+ case VIRT_PPI:
clk->set_mode = arch_timer_set_mode_virt;
clk->set_next_event = arch_timer_set_next_event_virt;
- } else {
- clk->irq = arch_timer_ppi[PHYS_SECURE_PPI];
+ break;
+ case PHYS_SECURE_PPI:
+ case PHYS_NONSECURE_PPI:
+ case HYP_PPI:
clk->set_mode = arch_timer_set_mode_phys;
clk->set_next_event = arch_timer_set_next_event_phys;
+ break;
+ default:
+ BUG();
}
} else {
clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
@@ -346,17 +352,20 @@ static void arch_counter_set_user_access(void)
arch_timer_set_cntkctl(cntkctl);
}

+static bool arch_timer_has_nonsecure_ppi(void)
+{
+ return (arch_timer_uses_ppi == PHYS_SECURE_PPI &&
+ arch_timer_ppi[PHYS_NONSECURE_PPI]);
+}
+
static int arch_timer_setup(struct clock_event_device *clk)
{
__arch_timer_setup(ARCH_CP15_TIMER, clk);

- if (arch_timer_use_virtual)
- enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0);
- else {
- enable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], 0);
- if (arch_timer_ppi[PHYS_NONSECURE_PPI])
- enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0);
- }
+ enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], 0);
+
+ if (arch_timer_has_nonsecure_ppi())
+ enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0);

arch_counter_set_user_access();
if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM))
@@ -398,7 +407,7 @@ static void arch_timer_banner(unsigned type)
(unsigned long)arch_timer_rate / 1000000,
(unsigned long)(arch_timer_rate / 10000) % 100,
type & ARCH_CP15_TIMER ?
- arch_timer_use_virtual ? "virt" : "phys" :
+ (arch_timer_uses_ppi == VIRT_PPI) ? "virt" : "phys" :
"",
type == (ARCH_CP15_TIMER | ARCH_MEM_TIMER) ? "/" : "",
type & ARCH_MEM_TIMER ?
@@ -468,7 +477,7 @@ static void __init arch_counter_register(unsigned type)

/* Register the CP15 based counter if we have one */
if (type & ARCH_CP15_TIMER) {
- if (IS_ENABLED(CONFIG_ARM64) || arch_timer_use_virtual)
+ if (IS_ENABLED(CONFIG_ARM64) || arch_timer_uses_ppi == VIRT_PPI)
arch_timer_read_counter = arch_counter_get_cntvct;
else
arch_timer_read_counter = arch_counter_get_cntpct;
@@ -498,13 +507,9 @@ static void arch_timer_stop(struct clock_event_device *clk)
pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n",
clk->irq, smp_processor_id());

- if (arch_timer_use_virtual)
- disable_percpu_irq(arch_timer_ppi[VIRT_PPI]);
- else {
- disable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI]);
- if (arch_timer_ppi[PHYS_NONSECURE_PPI])
- disable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI]);
- }
+ disable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi]);
+ if (arch_timer_has_nonsecure_ppi())
+ disable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI]);

clk->set_mode(CLOCK_EVT_MODE_UNUSED, clk);
}
@@ -570,12 +575,14 @@ static int __init arch_timer_register(void)
goto out;
}

- if (arch_timer_use_virtual) {
- ppi = arch_timer_ppi[VIRT_PPI];
+ ppi = arch_timer_ppi[arch_timer_uses_ppi];
+ switch (arch_timer_uses_ppi) {
+ case VIRT_PPI:
err = request_percpu_irq(ppi, arch_timer_handler_virt,
"arch_timer", arch_timer_evt);
- } else {
- ppi = arch_timer_ppi[PHYS_SECURE_PPI];
+ break;
+ case PHYS_SECURE_PPI:
+ case PHYS_NONSECURE_PPI:
err = request_percpu_irq(ppi, arch_timer_handler_phys,
"arch_timer", arch_timer_evt);
if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) {
@@ -586,6 +593,13 @@ static int __init arch_timer_register(void)
free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI],
arch_timer_evt);
}
+ break;
+ case HYP_PPI:
+ err = request_percpu_irq(ppi, arch_timer_handler_phys,
+ "arch_timer", arch_timer_evt);
+ break;
+ default:
+ BUG();
}

if (err) {
@@ -610,15 +624,10 @@ static int __init arch_timer_register(void)
out_unreg_notify:
unregister_cpu_notifier(&arch_timer_cpu_nb);
out_free_irq:
- if (arch_timer_use_virtual)
- free_percpu_irq(arch_timer_ppi[VIRT_PPI], arch_timer_evt);
- else {
- free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI],
+ free_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], arch_timer_evt);
+ if (arch_timer_has_nonsecure_ppi())
+ free_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI],
arch_timer_evt);
- if (arch_timer_ppi[PHYS_NONSECURE_PPI])
- free_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI],
- arch_timer_evt);
- }

out_free:
free_percpu(arch_timer_evt);
@@ -705,12 +714,25 @@ static void __init arch_timer_init(void)
*
* If no interrupt provided for virtual timer, we'll have to
* stick to the physical timer. It'd better be accessible...
+ *
+ * On ARMv8.1 with VH extensions, the kernel runs in HYP. VHE
+ * accesses to CNTP_*_EL1 registers are silently redirected to
+ * their CNTHP_*_EL2 counterparts, and use a different PPI
+ * number.
*/
if (is_hyp_mode_available() || !arch_timer_ppi[VIRT_PPI]) {
- arch_timer_use_virtual = false;
+ bool has_ppi;
+
+ if (is_kernel_in_hyp_mode()) {
+ arch_timer_uses_ppi = HYP_PPI;
+ has_ppi = !!arch_timer_ppi[HYP_PPI];
+ } else {
+ arch_timer_uses_ppi = PHYS_SECURE_PPI;
+ has_ppi = (!!arch_timer_ppi[PHYS_SECURE_PPI] ||
+ !!arch_timer_ppi[PHYS_NONSECURE_PPI]);
+ }

- if (!arch_timer_ppi[PHYS_SECURE_PPI] ||
- !arch_timer_ppi[PHYS_NONSECURE_PPI]) {
+ if (!has_ppi) {
pr_warn("arch_timer: No interrupt available, giving up\n");
return;
}
@@ -743,7 +765,7 @@ static void __init arch_timer_of_init(struct device_node *np)
*/
if (IS_ENABLED(CONFIG_ARM) &&
of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
- arch_timer_use_virtual = false;
+ arch_timer_uses_ppi = PHYS_SECURE_PPI;

arch_timer_init();
}
--
2.1.4

2015-07-08 16:19:41

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 03/13] arm64: Add ARM64_HAS_VIRT_HOST_EXTN feature

Add a new ARM64_HAS_VIRT_HOST_EXTN features to indicate that the
CPU has the ARMv8,1 VHE capability.

This will be used to trigger kernel patching in KVM.

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm64/include/asm/cpufeature.h | 3 ++-
arch/arm64/kernel/cpufeature.c | 11 +++++++++++
2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index c104421..6c3742d 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -25,8 +25,9 @@
#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1
#define ARM64_WORKAROUND_845719 2
#define ARM64_HAS_SYSREG_GIC_CPUIF 3
+#define ARM64_HAS_VIRT_HOST_EXTN 4

-#define ARM64_NCAPS 4
+#define ARM64_NCAPS 5

#ifndef __ASSEMBLY__

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 5ad86ce..e1dcd63 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -21,6 +21,7 @@
#include <linux/types.h>
#include <asm/cpu.h>
#include <asm/cpufeature.h>
+#include <asm/virt.h>

static bool
has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry)
@@ -31,6 +32,11 @@ has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry)
return (val & entry->register_mask) == entry->register_value;
}

+static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
+{
+ return is_kernel_in_hyp_mode();
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
@@ -39,6 +45,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.register_mask = (0xf << 24),
.register_value = (1 << 24),
},
+ {
+ .desc = "Virtualization Host Extensions",
+ .capability = ARM64_HAS_VIRT_HOST_EXTN,
+ .matches = runs_at_el2,
+ },
{},
};

--
2.1.4

2015-07-08 16:22:50

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 04/13] arm64: KVM: skip HYP setup when already running in HYP

With the kernel running at EL2, there is no point trying to
configure page tables for HYP, as the kernel is already mapped.

Take this opportunity to refactor the whole init a bit, allowing
the various parts of the hypervisor bringup to be split across
multiple functions.

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm/kvm/arm.c | 131 +++++++++++++++++++++++++++++++++--------------------
arch/arm/kvm/mmu.c | 6 +++
2 files changed, 89 insertions(+), 48 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index bc738d2..ca3c662 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -969,6 +969,58 @@ static inline void hyp_cpu_pm_init(void)
}
#endif

+static void teardown_common_resources(void)
+{
+ free_percpu(kvm_host_cpu_state);
+}
+
+static int init_common_resources(void)
+{
+ kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
+ if (!kvm_host_cpu_state) {
+ kvm_err("Cannot allocate host CPU state\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int init_subsystems(void)
+{
+ int err;
+
+ /*
+ * Init HYP view of VGIC
+ */
+ err = kvm_vgic_hyp_init();
+ if (err)
+ return err;
+
+ /*
+ * Init HYP architected timer support
+ */
+ err = kvm_timer_hyp_init();
+ if (err)
+ return err;
+
+ kvm_perf_init();
+ kvm_coproc_table_init();
+
+ return 0;
+}
+
+static void teardown_hyp_mode(void)
+{
+ int cpu;
+
+ if (is_kernel_in_hyp_mode())
+ return;
+
+ free_hyp_pgds();
+ for_each_possible_cpu(cpu)
+ free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+}
+
/**
* Inits Hyp-mode on all online CPUs
*/
@@ -977,6 +1029,9 @@ static int init_hyp_mode(void)
int cpu;
int err = 0;

+ if (is_kernel_in_hyp_mode())
+ return 0;
+
/*
* Allocate Hyp PGD and setup Hyp identity mapping
*/
@@ -999,7 +1054,7 @@ static int init_hyp_mode(void)
stack_page = __get_free_page(GFP_KERNEL);
if (!stack_page) {
err = -ENOMEM;
- goto out_free_stack_pages;
+ goto out_err;
}

per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
@@ -1011,7 +1066,7 @@ static int init_hyp_mode(void)
err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
if (err) {
kvm_err("Cannot map world-switch code\n");
- goto out_free_mappings;
+ goto out_err;
}

/*
@@ -1023,20 +1078,10 @@ static int init_hyp_mode(void)

if (err) {
kvm_err("Cannot map hyp stack\n");
- goto out_free_mappings;
+ goto out_err;
}
}

- /*
- * Map the host CPU structures
- */
- kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
- if (!kvm_host_cpu_state) {
- err = -ENOMEM;
- kvm_err("Cannot allocate host CPU state\n");
- goto out_free_mappings;
- }
-
for_each_possible_cpu(cpu) {
kvm_cpu_context_t *cpu_ctxt;

@@ -1045,7 +1090,7 @@ static int init_hyp_mode(void)

if (err) {
kvm_err("Cannot map host CPU state: %d\n", err);
- goto out_free_context;
+ goto out_err;
}
}

@@ -1054,37 +1099,29 @@ static int init_hyp_mode(void)
*/
on_each_cpu(cpu_init_hyp_mode, NULL, 1);

- /*
- * Init HYP view of VGIC
- */
- err = kvm_vgic_hyp_init();
- if (err)
- goto out_free_context;
-
- /*
- * Init HYP architected timer support
- */
- err = kvm_timer_hyp_init();
- if (err)
- goto out_free_mappings;
-
#ifndef CONFIG_HOTPLUG_CPU
free_boot_hyp_pgd();
#endif

- kvm_perf_init();
+ cpu_notifier_register_begin();
+
+ err = __register_cpu_notifier(&hyp_init_cpu_nb);
+
+ cpu_notifier_register_done();
+
+ if (err) {
+ kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
+ goto out_err;
+ }
+
+ hyp_cpu_pm_init();

kvm_info("Hyp mode initialized successfully\n");

return 0;
-out_free_context:
- free_percpu(kvm_host_cpu_state);
-out_free_mappings:
- free_hyp_pgds();
-out_free_stack_pages:
- for_each_possible_cpu(cpu)
- free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+
out_err:
+ teardown_hyp_mode();
kvm_err("error initializing Hyp mode: %d\n", err);
return err;
}
@@ -1128,26 +1165,24 @@ int kvm_arch_init(void *opaque)
}
}

- cpu_notifier_register_begin();
+ err = init_common_resources();
+ if (err)
+ return err;

err = init_hyp_mode();
if (err)
goto out_err;

- err = __register_cpu_notifier(&hyp_init_cpu_nb);
- if (err) {
- kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
- goto out_err;
- }
-
- cpu_notifier_register_done();
-
- hyp_cpu_pm_init();
+ err = init_subsystems();
+ if (err)
+ goto out_hyp;

- kvm_coproc_table_init();
return 0;
+
+out_hyp:
+ teardown_hyp_mode();
out_err:
- cpu_notifier_register_done();
+ teardown_common_resources();
return err;
}

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 7b42012..dd128b7 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -594,6 +594,9 @@ int create_hyp_mappings(void *from, void *to)
unsigned long start = KERN_TO_HYP((unsigned long)from);
unsigned long end = KERN_TO_HYP((unsigned long)to);

+ if (is_kernel_in_hyp_mode())
+ return 0;
+
start = start & PAGE_MASK;
end = PAGE_ALIGN(end);

@@ -626,6 +629,9 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
unsigned long start = KERN_TO_HYP((unsigned long)from);
unsigned long end = KERN_TO_HYP((unsigned long)to);

+ if (is_kernel_in_hyp_mode())
+ return 0;
+
/* Check for a valid kernel IO mapping */
if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
return -EINVAL;
--
2.1.4

2015-07-08 16:19:46

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 05/13] arm64: KVM: VHE: macroize VTCR_EL2 setup

On a VHE-capable system, there is no point in setting VTCR_EL2
at KVM init time. We can perfectly set it up when the kernel
boots, removing the need for a more complicated configuration.

In order to allow this, turn VTCR_EL2 setup into a macro that
we'll be able to reuse at boot time.

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm64/include/asm/kvm_mmu.h | 14 ++++++++++++++
arch/arm64/kvm/hyp-init.S | 9 +--------
2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6150567..b525cd2 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -55,6 +55,20 @@

#ifdef __ASSEMBLY__

+#include <asm/kvm_arm.h>
+
+.macro setup_vtcr tmp1, tmp2
+ mov \tmp1, #(VTCR_EL2_FLAGS & 0xffff)
+ movk \tmp1, #(VTCR_EL2_FLAGS >> 16), lsl #16
+ /*
+ * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
+ * VTCR_EL2.
+ */
+ mrs \tmp2, id_aa64mmfr0_el1
+ bfi \tmp1, \tmp2, #16, #3
+ msr vtcr_el2, \tmp1
+ isb
+.endm
/*
* Convert a kernel VA into a HYP VA.
* reg: VA to be converted.
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 178ba22..4143e2c 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -87,14 +87,7 @@ __do_hyp_init:
#endif
msr tcr_el2, x4

- ldr x4, =VTCR_EL2_FLAGS
- /*
- * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
- * VTCR_EL2.
- */
- mrs x5, ID_AA64MMFR0_EL1
- bfi x4, x5, #16, #3
- msr vtcr_el2, x4
+ setup_vtcr x4, x5

mrs x4, mair_el1
msr mair_el2, x4
--
2.1.4

2015-07-08 16:22:21

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 06/13] arm64: KVM: VHE: Patch out kern_hyp_va

The kern_hyp_va macro is pretty meaninless with VHE, as there is
only one mapping - the kernel one.

In order to keep the code readable and efficient, use runtime
patching to replace the 'and' instruction used to compute the VA
with a 'nop'.

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm64/include/asm/kvm_mmu.h | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b525cd2..285642e 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -55,6 +55,8 @@

#ifdef __ASSEMBLY__

+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
#include <asm/kvm_arm.h>

.macro setup_vtcr tmp1, tmp2
@@ -69,12 +71,18 @@
msr vtcr_el2, \tmp1
isb
.endm
+
+/* Stupid macro to deal with multiple level of macro expansion */
+.macro __vhe_nop inst val
+ alternative_insn "\inst\val", "nop", ARM64_HAS_VIRT_HOST_EXTN
+.endm
+
/*
* Convert a kernel VA into a HYP VA.
* reg: VA to be converted.
*/
.macro kern_hyp_va reg
- and \reg, \reg, #HYP_PAGE_OFFSET_MASK
+ __vhe_nop "and \reg, \reg, #", __stringify(HYP_PAGE_OFFSET_MASK)
.endm

#else
--
2.1.4

2015-07-08 16:21:54

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 07/13] arm64: KVM: VHE: Patch out use of HVC

With VHE, the host never issues an HVC instruction to get into the
KVM code, as we can simply branch there.

Use runtime code patching to simplify things a bit.

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm64/kvm/hyp.S | 43 ++++++++++++++++++++++++++++++++++++-------
arch/arm64/kvm/vhe-macros.h | 36 ++++++++++++++++++++++++++++++++++++
2 files changed, 72 insertions(+), 7 deletions(-)
create mode 100644 arch/arm64/kvm/vhe-macros.h

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 17a8fb1..a65e053 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -30,6 +30,8 @@
#include <asm/kvm_mmu.h>
#include <asm/memory.h>

+#include "vhe-macros.h"
+
#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x)
#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
#define CPU_SPSR_OFFSET(x) CPU_GP_REG_OFFSET(CPU_SPSR + 8*x)
@@ -39,6 +41,19 @@
.pushsection .hyp.text, "ax"
.align PAGE_SHIFT

+.macro do_el2_call
+ /*
+ * Shuffle the parameters before calling the function
+ * pointed to in x0. Assumes parameters in x[1,2,3],
+ * clobbers lr.
+ */
+ mov lr, x0
+ mov x0, x1
+ mov x1, x2
+ mov x2, x3
+ blr lr
+.endm
+
.macro save_common_regs
// x2: base address for cpu context
// x3: tmp register
@@ -1124,7 +1139,23 @@ __hyp_panic_str:
* arch/arm64/kernel/hyp_stub.S.
*/
ENTRY(kvm_call_hyp)
- hvc #0
+ /*
+ * NOP out the hvc/ret sequence on VHE, and fall through.
+ */
+ifnvhe hvc #0, nop
+ifnvhe ret, "push lr, xzr"
+
+ do_el2_call
+
+ /*
+ * We used to rely on having an exception return to get
+ * an implicit isb. In the E2H case, we don't have it anymore.
+ * rather than changing all the leaf functions, just do it here
+ * before returning to the rest of the kernel.
+ */
+ isb
+
+ pop lr, xzr
ret
ENDPROC(kvm_call_hyp)

@@ -1156,7 +1187,9 @@ el1_sync: // Guest trapped into EL2
mrs x1, esr_el2
lsr x2, x1, #ESR_ELx_EC_SHIFT

- cmp x2, #ESR_ELx_EC_HVC64
+ // Ugly shortcut for VHE. We can do this early because the
+ // host cannot do an HVC.
+ifnvhe _S_(cmp x2, #ESR_ELx_EC_HVC64), "b el1_trap"
b.ne el1_trap

mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest
@@ -1177,11 +1210,7 @@ el1_sync: // Guest trapped into EL2
* Compute the function address in EL2, and shuffle the parameters.
*/
kern_hyp_va x0
- mov lr, x0
- mov x0, x1
- mov x1, x2
- mov x2, x3
- blr lr
+ do_el2_call

pop lr, xzr
2: eret
diff --git a/arch/arm64/kvm/vhe-macros.h b/arch/arm64/kvm/vhe-macros.h
new file mode 100644
index 0000000..da7f9da
--- /dev/null
+++ b/arch/arm64/kvm/vhe-macros.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_VHE_MACROS_H__
+#define __ARM64_VHE_MACROS_H__
+
+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
+
+#ifdef __ASSEMBLY__
+
+/* Hack to allow stringification of macros... */
+#define __S__(a,args...) __stringify(a, ##args)
+#define _S_(a,args...) __S__(a, args)
+
+.macro ifnvhe nonvhe vhe
+ alternative_insn "\nonvhe", "\vhe", ARM64_HAS_VIRT_HOST_EXTN
+.endm
+
+#endif
+
+#endif /*__ARM64_VHE_MACROS_H__ */
--
2.1.4

2015-07-08 16:19:48

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 08/13] arm64: KVM: VHE: Preserve VHE config in world switch

Running the kernel in HYP mode requires the HCR_E2H bit to be set
at all times, and the HCR_TGE bit to be set when running as a host
(and cleared when running as a guest).

Also add some cryptic macros to deal with cpp macro expansion inside
asm macros...

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm64/include/asm/kvm_arm.h | 1 +
arch/arm64/include/asm/kvm_emulate.h | 2 ++
arch/arm64/kvm/hyp.S | 3 ++-
3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index ac6fafb..c8998c0 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -23,6 +23,7 @@
#include <asm/types.h>

/* Hyp Configuration Register (HCR) bits */
+#define HCR_E2H (UL(1) << 34)
#define HCR_ID (UL(1) << 33)
#define HCR_CD (UL(1) << 32)
#define HCR_RW_SHIFT 31
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 17e92f0..c1f29fe 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -44,6 +44,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+ if (is_kernel_in_hyp_mode())
+ vcpu->arch.hcr_el2 |= HCR_E2H;
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
vcpu->arch.hcr_el2 &= ~HCR_RW;
}
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index a65e053..ad5821b 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -800,7 +800,8 @@
.endm

.macro deactivate_traps
- mov x2, #HCR_RW
+ifnvhe _S_(mov x2, #HCR_RW), _S_(mov x2, #HCR_RW|HCR_TGE)
+ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
msr hcr_el2, x2
msr cptr_el2, xzr
msr hstr_el2, xzr
--
2.1.4

2015-07-08 16:21:33

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 09/13] arm64: KVM: VHE: Add alternatives for VHE-enabled world-switch

In order to switch between host and guest, a VHE-enabled kernel
must use different accessors for certain system registers.

This patch uses runtime patching to use the right instruction
when required...

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm64/include/asm/kvm_asm.h | 40 ++++++--
arch/arm64/kvm/hyp.S | 210 ++++++++++++++++++++++++++-------------
arch/arm64/kvm/vhe-macros.h | 18 ++++
3 files changed, 191 insertions(+), 77 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 3c5fe68..a932be0 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -18,6 +18,7 @@
#ifndef __ARM_KVM_ASM_H__
#define __ARM_KVM_ASM_H__

+#include <asm/sysreg.h>
#include <asm/virt.h>

/*
@@ -27,7 +28,7 @@
#define MPIDR_EL1 1 /* MultiProcessor Affinity Register */
#define CSSELR_EL1 2 /* Cache Size Selection Register */
#define SCTLR_EL1 3 /* System Control Register */
-#define ACTLR_EL1 4 /* Auxiliary Control Register */
+#define AMAIR_EL1 4 /* Aux Memory Attribute Indirection Register */
#define CPACR_EL1 5 /* Coprocessor Access Control */
#define TTBR0_EL1 6 /* Translation Table Base Register 0 */
#define TTBR1_EL1 7 /* Translation Table Base Register 1 */
@@ -39,13 +40,13 @@
#define MAIR_EL1 13 /* Memory Attribute Indirection Register */
#define VBAR_EL1 14 /* Vector Base Address Register */
#define CONTEXTIDR_EL1 15 /* Context ID Register */
-#define TPIDR_EL0 16 /* Thread ID, User R/W */
-#define TPIDRRO_EL0 17 /* Thread ID, User R/O */
-#define TPIDR_EL1 18 /* Thread ID, Privileged */
-#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */
-#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */
-#define PAR_EL1 21 /* Physical Address Register */
-#define MDSCR_EL1 22 /* Monitor Debug System Control Register */
+#define CNTKCTL_EL1 16 /* Timer Control Register (EL1) */
+#define PAR_EL1 17 /* Physical Address Register */
+#define MDSCR_EL1 18 /* Monitor Debug System Control Register */
+#define TPIDR_EL0 19 /* Thread ID, User R/W */
+#define TPIDRRO_EL0 20 /* Thread ID, User R/O */
+#define TPIDR_EL1 21 /* Thread ID, Privileged */
+#define ACTLR_EL1 22 /* Auxiliary Control Register */
#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */
#define DBGBCR15_EL1 38
#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */
@@ -106,6 +107,29 @@

#define NR_COPRO_REGS (NR_SYS_REGS * 2)

+#define sctlr_EL12 sys_reg(3, 5, 1, 0, 0)
+#define cpacr_EL12 sys_reg(3, 5, 1, 0, 2)
+#define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0)
+#define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1)
+#define tcr_EL12 sys_reg(3, 5, 2, 0, 2)
+#define afsr0_EL12 sys_reg(3, 5, 5, 1, 0)
+#define afsr1_EL12 sys_reg(3, 5, 5, 1, 1)
+#define esr_EL12 sys_reg(3, 5, 5, 2, 0)
+#define far_EL12 sys_reg(3, 5, 6, 0, 0)
+#define mair_EL12 sys_reg(3, 5, 10, 2, 0)
+#define amair_EL12 sys_reg(3, 5, 10, 3, 0)
+#define vbar_EL12 sys_reg(3, 5, 12, 0, 0)
+#define contextidr_EL12 sys_reg(3, 5, 13, 0, 1)
+#define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0)
+#define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0)
+#define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1)
+#define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2)
+#define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0)
+#define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1)
+#define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2)
+#define spsr_EL12 sys_reg(3, 5, 4, 0, 0)
+#define elr_EL12 sys_reg(3, 5, 4, 0, 1)
+
#define ARM_EXCEPTION_IRQ 0
#define ARM_EXCEPTION_TRAP 1

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index ad5821b..b61591b 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012,2013 - ARM Ltd
+ * Copyright (C) 2012-2015 - ARM Ltd
* Author: Marc Zyngier <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
@@ -67,40 +67,52 @@
stp x29, lr, [x3, #80]

mrs x19, sp_el0
- mrs x20, elr_el2 // pc before entering el2
- mrs x21, spsr_el2 // pstate before entering el2
+ str x19, [x3, #96]
+.endm

- stp x19, x20, [x3, #96]
- str x21, [x3, #112]
+.macro save_el1_state
+ mrs_hyp(x20, ELR) // pc before entering el2
+ mrs_hyp(x21, SPSR) // pstate before entering el2

mrs x22, sp_el1
- mrs x23, elr_el1
- mrs x24, spsr_el1
+
+ mrs_el1(x23, elr)
+ mrs_el1(x24, spsr)
+
+ add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
+ stp x20, x21, [x3, #8] // HACK: Store to the regs after SP_EL0

str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
.endm

-.macro restore_common_regs
+.macro restore_el1_state
// x2: base address for cpu context
// x3: tmp register

+ add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
+ ldp x20, x21, [x3, #8] // Same hack again, get guest PC and pstate
+
ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]

+ msr_hyp(ELR, x20) // pc on return from el2
+ msr_hyp(SPSR, x21) // pstate on return from el2
+
msr sp_el1, x22
- msr elr_el1, x23
- msr spsr_el1, x24

- add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
- ldp x19, x20, [x3]
- ldr x21, [x3, #16]
+ msr_el1(elr, x23)
+ msr_el1(spsr, x24)
+.endm

+.macro restore_common_regs
+ // x2: base address for cpu context
+ // x3: tmp register
+
+ ldr x19, [x2, #CPU_XREG_OFFSET(31)] // SP_EL0
msr sp_el0, x19
- msr elr_el2, x20 // pc on return from el2
- msr spsr_el2, x21 // pstate on return from el2

add x3, x2, #CPU_XREG_OFFSET(19)
ldp x19, x20, [x3]
@@ -113,9 +125,15 @@

.macro save_host_regs
save_common_regs
+ifnvhe nop, "b skip_el1_save"
+ save_el1_state
+skip_el1_save:
.endm

.macro restore_host_regs
+ifnvhe nop, "b skip_el1_restore"
+ restore_el1_state
+skip_el1_restore:
restore_common_regs
.endm

@@ -159,6 +177,7 @@
stp x6, x7, [x3, #16]

save_common_regs
+ save_el1_state
.endm

.macro restore_guest_regs
@@ -184,6 +203,7 @@
ldr x18, [x3, #144]

// x19-x29, lr, sp*, elr*, spsr*
+ restore_el1_state
restore_common_regs

// Last bits of the 64bit state
@@ -203,6 +223,38 @@
* In other words, don't touch any of these unless you know what
* you are doing.
*/
+
+.macro save_shared_sysregs
+ // x2: base address for cpu context
+ // x3: tmp register
+
+ add x3, x2, #CPU_SYSREG_OFFSET(TPIDR_EL0)
+
+ mrs x4, tpidr_el0
+ mrs x5, tpidrro_el0
+ mrs x6, tpidr_el1
+ mrs x7, actlr_el1
+
+ stp x4, x5, [x3]
+ stp x6, x7, [x3, #16]
+.endm
+
+.macro restore_shared_sysregs
+ // x2: base address for cpu context
+ // x3: tmp register
+
+ add x3, x2, #CPU_SYSREG_OFFSET(TPIDR_EL0)
+
+ ldp x4, x5, [x3]
+ ldp x6, x7, [x3, #16]
+
+ msr tpidr_el0, x4
+ msr tpidrro_el0, x5
+ msr tpidr_el1, x6
+ msr actlr_el1, x7
+.endm
+
+
.macro save_sysregs
// x2: base address for cpu context
// x3: tmp register
@@ -211,26 +263,27 @@

mrs x4, vmpidr_el2
mrs x5, csselr_el1
- mrs x6, sctlr_el1
- mrs x7, actlr_el1
- mrs x8, cpacr_el1
- mrs x9, ttbr0_el1
- mrs x10, ttbr1_el1
- mrs x11, tcr_el1
- mrs x12, esr_el1
- mrs x13, afsr0_el1
- mrs x14, afsr1_el1
- mrs x15, far_el1
- mrs x16, mair_el1
- mrs x17, vbar_el1
- mrs x18, contextidr_el1
- mrs x19, tpidr_el0
- mrs x20, tpidrro_el0
- mrs x21, tpidr_el1
- mrs x22, amair_el1
- mrs x23, cntkctl_el1
- mrs x24, par_el1
- mrs x25, mdscr_el1
+ mrs_el1(x6, sctlr)
+ mrs_el1(x7, amair)
+ mrs_el1(x8, cpacr)
+ mrs_el1(x9, ttbr0)
+ mrs_el1(x10, ttbr1)
+ mrs_el1(x11, tcr)
+ mrs_el1(x12, esr)
+ mrs_el1(x13, afsr0)
+ mrs_el1(x14, afsr1)
+ mrs_el1(x15, far)
+ mrs_el1(x16, mair)
+ mrs_el1(x17, vbar)
+ mrs_el1(x18, contextidr)
+ mrs_el1(x19, cntkctl)
+ mrs x20, par_el1
+ mrs x21, mdscr_el1
+
+ mrs x22, tpidr_el0
+ mrs x23, tpidrro_el0
+ mrs x24, tpidr_el1
+ mrs x25, actlr_el1

stp x4, x5, [x3]
stp x6, x7, [x3, #16]
@@ -460,26 +513,27 @@

msr vmpidr_el2, x4
msr csselr_el1, x5
- msr sctlr_el1, x6
- msr actlr_el1, x7
- msr cpacr_el1, x8
- msr ttbr0_el1, x9
- msr ttbr1_el1, x10
- msr tcr_el1, x11
- msr esr_el1, x12
- msr afsr0_el1, x13
- msr afsr1_el1, x14
- msr far_el1, x15
- msr mair_el1, x16
- msr vbar_el1, x17
- msr contextidr_el1, x18
- msr tpidr_el0, x19
- msr tpidrro_el0, x20
- msr tpidr_el1, x21
- msr amair_el1, x22
- msr cntkctl_el1, x23
- msr par_el1, x24
- msr mdscr_el1, x25
+ msr_el1(sctlr, x6)
+ msr_el1(amair, x7)
+ msr_el1(cpacr, x8)
+ msr_el1(ttbr0, x9)
+ msr_el1(ttbr1, x10)
+ msr_el1(tcr, x11)
+ msr_el1(esr, x12)
+ msr_el1(afsr0, x13)
+ msr_el1(afsr1, x14)
+ msr_el1(far, x15)
+ msr_el1(mair, x16)
+ msr_el1(vbar, x17)
+ msr_el1(contextidr, x18)
+ msr_el1(cntkctl, x19)
+ msr par_el1, x20
+ msr mdscr_el1, x21
+
+ msr tpidr_el0, x22
+ msr tpidrro_el0, x23
+ msr tpidr_el1, x24
+ msr actlr_el1, x25
.endm

.macro restore_debug
@@ -779,8 +833,11 @@
.macro activate_traps
ldr x2, [x0, #VCPU_HCR_EL2]
msr hcr_el2, x2
- mov x2, #CPTR_EL2_TTA
- msr cptr_el2, x2
+ adr x3, __kvm_hyp_vector
+ifnvhe nop, "msr vbar_el1, x3"
+ifnvhe nop, "mrs x2, cpacr_el1"
+ifnvhe _S_(ldr x2, =(CPTR_EL2_TTA)), "orr x2, x2, #(1 << 28)"
+ifnvhe "msr cptr_el2, x2", "msr cpacr_el1, x2"

mov x2, #(1 << 15) // Trap CP15 Cr=15
msr hstr_el2, x2
@@ -803,12 +860,20 @@
ifnvhe _S_(mov x2, #HCR_RW), _S_(mov x2, #HCR_RW|HCR_TGE)
ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
msr hcr_el2, x2
- msr cptr_el2, xzr
+
+ifnvhe nop, "mrs x2, cpacr_el1"
+ifnvhe nop, "movn x3, #(1 << 12), lsl #16"
+ifnvhe nop, "and x2, x2, x3"
+ifnvhe "msr cptr_el2, xzr", "msr cpacr_el1, x2"
msr hstr_el2, xzr

mrs x2, mdcr_el2
and x2, x2, #MDCR_EL2_HPMN_MASK
msr mdcr_el2, x2
+
+ adrp x2, vectors
+ add x2, x2, #:lo12:vectors
+ifnvhe nop, "msr vbar_el1, x2"
.endm

.macro activate_vm
@@ -853,15 +918,15 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
ldr w3, [x2, #KVM_TIMER_ENABLED]
cbz w3, 1f

- mrs x3, cntv_ctl_el0
+ mrs_el0(x3, cntv_ctl)
and x3, x3, #3
str w3, [x0, #VCPU_TIMER_CNTV_CTL]
bic x3, x3, #1 // Clear Enable
- msr cntv_ctl_el0, x3
+ msr_el0(cntv_ctl, x3)

isb

- mrs x3, cntv_cval_el0
+ mrs_el0(x3, cntv_cval)
str x3, [x0, #VCPU_TIMER_CNTV_CVAL]

1:
@@ -871,7 +936,7 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
msr cnthctl_el2, x2

// Clear cntvoff for the host
- msr cntvoff_el2, xzr
+ifnvhe "msr cntvoff_el2, xzr", nop
.endm

.macro restore_timer_state
@@ -891,12 +956,12 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
ldr x3, [x2, #KVM_TIMER_CNTVOFF]
msr cntvoff_el2, x3
ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL]
- msr cntv_cval_el0, x2
+ msr_el0(cntv_cval, x2)
isb

ldr w2, [x0, #VCPU_TIMER_CNTV_CTL]
and x2, x2, #3
- msr cntv_ctl_el0, x2
+ msr_el0(cntv_ctl, x2)
1:
.endm

@@ -945,8 +1010,10 @@ ENTRY(__kvm_vcpu_run)

save_host_regs
bl __save_fpsimd
- bl __save_sysregs
-
+ifnvhe "bl __save_sysregs", nop
+ifnvhe "b 1f", nop
+ save_shared_sysregs
+1:
compute_debug_state 1f
bl __save_debug
1:
@@ -997,7 +1064,10 @@ __kvm_vcpu_return:
ldr x2, [x0, #VCPU_HOST_CONTEXT]
kern_hyp_va x2

- bl __restore_sysregs
+ifnvhe "bl __restore_sysregs", nop
+ifnvhe "b 1f", nop
+ restore_shared_sysregs
+1:
bl __restore_fpsimd

skip_debug_state x3, 1f
@@ -1104,6 +1174,8 @@ __kvm_hyp_panic:
mrs x6, par_el1
mrs x7, tpidr_el2

+ifnvhe nop, "b panic"
+
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, lr
@@ -1248,7 +1320,7 @@ el1_trap:
* As such, we can use the EL1 translation regime, and don't have
* to distinguish between EL0 and EL1 access.
*/
- mrs x2, far_el2
+ifnvhe "mrs x2, far_el2", "mrs x2, far_el1"
at s1e1r, x2
isb

@@ -1262,7 +1334,7 @@ el1_trap:
b 2f

1: mrs x3, hpfar_el2
- mrs x2, far_el2
+ifnvhe "mrs x2, far_el2", "mrs x2, far_el1"

2: mrs x0, tpidr_el2
str w1, [x0, #VCPU_ESR_EL2]
diff --git a/arch/arm64/kvm/vhe-macros.h b/arch/arm64/kvm/vhe-macros.h
index da7f9da..1e94235 100644
--- a/arch/arm64/kvm/vhe-macros.h
+++ b/arch/arm64/kvm/vhe-macros.h
@@ -31,6 +31,24 @@
alternative_insn "\nonvhe", "\vhe", ARM64_HAS_VIRT_HOST_EXTN
.endm

+#define mrs_el0(reg, sysreg) \
+ ifnvhe _S_(mrs reg, sysreg##_EL0), _S_(mrs_s reg, sysreg##_EL02)
+
+#define msr_el0(sysreg, reg) \
+ ifnvhe _S_(msr sysreg##_EL0, reg), _S_(msr_s sysreg##_EL02, reg)
+
+#define mrs_el1(reg, sysreg) \
+ ifnvhe _S_(mrs reg, sysreg##_EL1), _S_(mrs_s reg, sysreg##_EL12)
+
+#define msr_el1(sysreg, reg) \
+ ifnvhe _S_(msr sysreg##_EL1, reg), _S_(msr_s sysreg##_EL12, reg)
+
+#define mrs_hyp(reg, sysreg) \
+ ifnvhe _S_(mrs reg, sysreg##_EL2), _S_(mrs reg, sysreg##_EL1)
+
+#define msr_hyp(sysreg, reg) \
+ ifnvhe _S_(msr sysreg##_EL2, reg), _S_(msr sysreg##_EL1, reg)
+
#endif

#endif /*__ARM64_VHE_MACROS_H__ */
--
2.1.4

2015-07-08 16:21:13

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 10/13] arm64: Add support for running Linux in EL2 mode

With the ARMv8.1 VHE, the architecture is able to (almost) transparently
run the kernel at EL2, despite being written for EL1.

This patch takes care of the "almost" part, mostly preventing the kernel
from dropping from EL2 to EL1, and setting up the HYP configuration.

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm64/kernel/head.S | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index c0ff3ce..a179747 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -29,6 +29,7 @@
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/cputype.h>
+#include <asm/kvm_mmu.h>
#include <asm/memory.h>
#include <asm/thread_info.h>
#include <asm/pgtable-hwdef.h>
@@ -481,8 +482,16 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1
isb
ret

+ /* Check for VHE being present */
+2: mrs x2, id_aa64mmfr1_el1
+ ubfx x2, x2, #8, #4
+
/* Hyp configuration. */
-2: mov x0, #(1 << 31) // 64-bit EL1
+ mov x0, #HCR_RW // 64-bit EL1
+ cbz x2, set_hcr
+ orr x0, x0, #HCR_TGE // Enable Host Extensions
+ orr x0, x0, #HCR_E2H
+set_hcr:
msr hcr_el2, x0

/* Generic timers. */
@@ -522,6 +531,9 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems

/* Coprocessor traps. */
mov x0, #0x33ff
+ cbz x2, set_cptr
+ orr x0, x0, #(3 << 20) // Don't trap FP
+set_cptr:
msr cptr_el2, x0 // Disable copro. traps to EL2

#ifdef CONFIG_COMPAT
@@ -531,6 +543,15 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
/* Stage-2 translation */
msr vttbr_el2, xzr

+ cbz x2, install_el2_stub
+
+ setup_vtcr x4, x5
+
+ mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
+ isb
+ ret
+
+install_el2_stub:
/* Hypervisor stub */
adrp x0, __hyp_stub_vectors
add x0, x0, #:lo12:__hyp_stub_vectors
--
2.1.4

2015-07-08 16:19:51

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 11/13] arm64: Panic when VHE and non VHE CPUs coexist

Having both VHE and non-VHE capable CPUs in the same system
is likely to be a recipe for disaster.

If the boot CPU has VHE, but a secondary is not, we won't be
able to downgrade and run the kernel at EL1. Add CPU hotplug
to the mix, and this produces a terrifying mess.

Let's solve the problem once and for all. If you mix VHE and
non-VHE CPUs in the same system, you deserve to loose, and this
patch makes sure you don't get a chance.

This is implemented by storing the kernel execution level in
a global variable. Secondaries will park themselves in a
WFI loop if they observe a mismatch. Also, the primary CPU
will detect that the secondary CPU has died on a mismatched
execution level. Panic will follow.

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm64/include/asm/virt.h | 15 +++++++++++++++
arch/arm64/kernel/head.S | 15 +++++++++++++++
arch/arm64/kernel/smp.c | 4 ++++
3 files changed, 34 insertions(+)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 9f22dd6..8e246f7 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -36,6 +36,11 @@
*/
extern u32 __boot_cpu_mode[2];

+/*
+ * __run_cpu_mode records the mode the boot CPU uses for the kernel.
+ */
+extern u32 __run_cpu_mode;
+
void __hyp_set_vectors(phys_addr_t phys_vector_base);
phys_addr_t __hyp_get_vectors(void);

@@ -60,6 +65,16 @@ static inline bool is_kernel_in_hyp_mode(void)
return el == CurrentEL_EL2;
}

+static inline bool is_kernel_mode_mismatched(void)
+{
+ u64 el;
+ u32 mode;
+
+ asm("mrs %0, CurrentEL" : "=r" (el));
+ mode = ACCESS_ONCE(__run_cpu_mode);
+ return el != mode;
+}
+
/* The section containing the hypervisor text */
extern char __hyp_text_start[];
extern char __hyp_text_end[];
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index a179747..318e69f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -578,7 +578,20 @@ ENTRY(set_cpu_boot_mode_flag)
1: str w20, [x1] // This CPU has booted in EL1
dmb sy
dc ivac, x1 // Invalidate potentially stale cache line
+ adr_l x1, __run_cpu_mode
+ ldr w0, [x1]
+ mrs x20, CurrentEL
+ str w20, [x1]
+ dmb sy
+ dc ivac, x1 // Invalidate potentially stale cache line
+ cbz x0, skip_el_check
+ cmp x0, x20
+ bne mismatched_el
+skip_el_check:
ret
+mismatched_el:
+ wfi
+ b mismatched_el
ENDPROC(set_cpu_boot_mode_flag)

/*
@@ -593,6 +606,8 @@ ENDPROC(set_cpu_boot_mode_flag)
ENTRY(__boot_cpu_mode)
.long BOOT_CPU_MODE_EL2
.long BOOT_CPU_MODE_EL1
+ENTRY(__run_cpu_mode)
+ .long 0
.popsection

#ifdef CONFIG_SMP
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 695801a..b467f51 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -52,6 +52,7 @@
#include <asm/sections.h>
#include <asm/tlbflush.h>
#include <asm/ptrace.h>
+#include <asm/virt.h>

#define CREATE_TRACE_POINTS
#include <trace/events/ipi.h>
@@ -112,6 +113,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
pr_crit("CPU%u: failed to come online\n", cpu);
ret = -EIO;
}
+
+ if (is_kernel_mode_mismatched())
+ panic("CPU%u: incompatible execution level", cpu);
} else {
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
}
--
2.1.4

2015-07-08 16:20:33

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 12/13] arm64: KVM: Split sysreg save/restore

As we're starting to get different requirements for non-VHE and VHE
code paths, use a slightly more fine-grained approach:

- __save/restore_sysregs: save/restore non-shared sysregs
- __save/restore_shared_sysregs: save/restore only shared sysregs

Of course, non-VHE always requires both.

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm64/kvm/hyp.S | 91 +++++++++++++++++++++++++---------------------------
1 file changed, 44 insertions(+), 47 deletions(-)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index b61591b..3cbd2c4 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -280,11 +280,6 @@ skip_el1_restore:
mrs x20, par_el1
mrs x21, mdscr_el1

- mrs x22, tpidr_el0
- mrs x23, tpidrro_el0
- mrs x24, tpidr_el1
- mrs x25, actlr_el1
-
stp x4, x5, [x3]
stp x6, x7, [x3, #16]
stp x8, x9, [x3, #32]
@@ -294,8 +289,6 @@ skip_el1_restore:
stp x16, x17, [x3, #96]
stp x18, x19, [x3, #112]
stp x20, x21, [x3, #128]
- stp x22, x23, [x3, #144]
- stp x24, x25, [x3, #160]
.endm

.macro save_debug
@@ -508,8 +501,6 @@ skip_el1_restore:
ldp x16, x17, [x3, #96]
ldp x18, x19, [x3, #112]
ldp x20, x21, [x3, #128]
- ldp x22, x23, [x3, #144]
- ldp x24, x25, [x3, #160]

msr vmpidr_el2, x4
msr csselr_el1, x5
@@ -529,11 +520,6 @@ skip_el1_restore:
msr_el1(cntkctl, x19)
msr par_el1, x20
msr mdscr_el1, x21
-
- msr tpidr_el0, x22
- msr tpidrro_el0, x23
- msr tpidr_el1, x24
- msr actlr_el1, x25
.endm

.macro restore_debug
@@ -913,10 +899,12 @@ ifnvhe nop, "msr vbar_el1, x2"

.macro save_timer_state
// x0: vcpu pointer
- ldr x2, [x0, #VCPU_KVM]
- kern_hyp_va x2
- ldr w3, [x2, #KVM_TIMER_ENABLED]
- cbz w3, 1f
+ // x1: return code
+ // x2: vcpu context
+ ldr x3, [x0, #VCPU_KVM]
+ kern_hyp_va x3
+ ldr w4, [x3, #KVM_TIMER_ENABLED]
+ cbz w4, 1f

mrs_el0(x3, cntv_ctl)
and x3, x3, #3
@@ -931,9 +919,9 @@ ifnvhe nop, "msr vbar_el1, x2"

1:
// Allow physical timer/counter access for the host
- mrs x2, cnthctl_el2
- orr x2, x2, #3
- msr cnthctl_el2, x2
+ mrs x3, cnthctl_el2
+ orr x3, x3, #3
+ msr cnthctl_el2, x3

// Clear cntvoff for the host
ifnvhe "msr cntvoff_el2, xzr", nop
@@ -941,34 +929,43 @@ ifnvhe "msr cntvoff_el2, xzr", nop

.macro restore_timer_state
// x0: vcpu pointer
+ // x2: vcpu context
// Disallow physical timer access for the guest
// Physical counter access is allowed
- mrs x2, cnthctl_el2
- orr x2, x2, #1
- bic x2, x2, #2
- msr cnthctl_el2, x2
-
- ldr x2, [x0, #VCPU_KVM]
- kern_hyp_va x2
- ldr w3, [x2, #KVM_TIMER_ENABLED]
- cbz w3, 1f
-
- ldr x3, [x2, #KVM_TIMER_CNTVOFF]
- msr cntvoff_el2, x3
- ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL]
- msr_el0(cntv_cval, x2)
+ mrs x3, cnthctl_el2
+ orr x3, x3, #1
+ bic x3, x3, #2
+ msr cnthctl_el2, x3
+
+ ldr x3, [x0, #VCPU_KVM]
+ kern_hyp_va x3
+ ldr w4, [x3, #KVM_TIMER_ENABLED]
+ cbz w4, 1f
+
+ ldr x4, [x3, #KVM_TIMER_CNTVOFF]
+ msr cntvoff_el2, x4
+ ldr x4, [x0, #VCPU_TIMER_CNTV_CVAL]
+ msr_el0(cntv_cval, x4)
isb

- ldr w2, [x0, #VCPU_TIMER_CNTV_CTL]
- and x2, x2, #3
- msr_el0(cntv_ctl, x2)
+ ldr w4, [x0, #VCPU_TIMER_CNTV_CTL]
+ and x4, x4, #3
+ msr_el0(cntv_ctl, x4)
1:
.endm

+__save_shared_sysregs:
+ save_shared_sysregs
+ ret
+
__save_sysregs:
save_sysregs
ret

+__restore_shared_sysregs:
+ restore_shared_sysregs
+ ret
+
__restore_sysregs:
restore_sysregs
ret
@@ -1010,10 +1007,9 @@ ENTRY(__kvm_vcpu_run)

save_host_regs
bl __save_fpsimd
-ifnvhe "bl __save_sysregs", nop
-ifnvhe "b 1f", nop
- save_shared_sysregs
-1:
+ifnvhe "bl __save_sysregs", "nop"
+ bl __save_shared_sysregs
+
compute_debug_state 1f
bl __save_debug
1:
@@ -1027,6 +1023,7 @@ ifnvhe "b 1f", nop
add x2, x0, #VCPU_CONTEXT

bl __restore_sysregs
+ bl __restore_shared_sysregs
bl __restore_fpsimd

skip_debug_state x3, 1f
@@ -1048,6 +1045,7 @@ __kvm_vcpu_return:
save_guest_regs
bl __save_fpsimd
bl __save_sysregs
+ bl __save_shared_sysregs

skip_debug_state x3, 1f
bl __save_debug
@@ -1064,10 +1062,8 @@ __kvm_vcpu_return:
ldr x2, [x0, #VCPU_HOST_CONTEXT]
kern_hyp_va x2

-ifnvhe "bl __restore_sysregs", nop
-ifnvhe "b 1f", nop
- restore_shared_sysregs
-1:
+ifnvhe "bl __restore_sysregs", "nop"
+ bl __restore_shared_sysregs
bl __restore_fpsimd

skip_debug_state x3, 1f
@@ -1159,7 +1155,8 @@ __kvm_hyp_panic:
ldr x2, [x0, #VCPU_HOST_CONTEXT]
kern_hyp_va x2

- bl __restore_sysregs
+ifnvhe "bl __restore_sysregs", "nop"
+ bl __restore_shared_sysregs

1: adr x0, __hyp_panic_str
adr x1, 2f
--
2.1.4

2015-07-08 16:20:08

by Marc Zyngier

[permalink] [raw]
Subject: [PATCH 13/13] arm64: KVM: VHE: Early interrupt handling

With VHE enabled, it is possible to let the kernel handle an interrupt
without saving the full guest context, and without restoring the full
host context either. This reduces the latency of handling an interrupt.

When an interrupt fires we can:
- save the guest's general purpose registers, shared system registers
and timer state
- switch to a host context (setting vectors, deactivating traps and
stage-2 translation)
- restore the host's shared sysregs

At that stage, we're able to jump to the interrupt handler.

On return from the handler, we can finish the switch (save/restore
whatever is missing from the above).

Signed-off-by: Marc Zyngier <[email protected]>
---
arch/arm/kvm/arm.c | 3 +++
arch/arm64/kvm/hyp.S | 56 ++++++++++++++++++++++++++++++++++++++---
arch/arm64/kvm/vgic-v2-switch.S | 19 +++++++++++---
arch/arm64/kvm/vgic-v3-switch.S | 33 +++++++++++++-----------
4 files changed, 90 insertions(+), 21 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ca3c662..ab9333f 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -573,6 +573,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
* disabled. Enabling the interrupts now will have
* the effect of taking the interrupt again, in SVC
* mode this time.
+ *
+ * With VHE, the interrupt is likely to have been
+ * already taken already.
*/
local_irq_enable();

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 3cbd2c4..ac95ba3 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -714,8 +714,10 @@ skip_el1_restore:
.endm

.macro skip_32bit_state tmp, target
- // Skip 32bit state if not needed
- mrs \tmp, hcr_el2
+ // Skip 32bit state if not needed.
+ // With VHE, we may have cleared the RW bit in HCR_EL2 early,
+ // and have to rely on the memory version instead.
+ifnvhe "mrs \tmp, hcr_el2", _S_(ldr \tmp, [x0, #VCPU_HCR_EL2])
tbnz \tmp, #HCR_RW_SHIFT, \target
.endm

@@ -1053,11 +1055,18 @@ __kvm_vcpu_return:
save_guest_32bit_state

save_timer_state
+ifnvhe "b 1f", nop
+ alternative_insn "bl __vgic_v2_disable", \
+ "bl __vgic_v3_disable", \
+ ARM64_HAS_SYSREG_GIC_CPUIF
+1:
save_vgic_state

deactivate_traps
deactivate_vm

+__kvm_vcpu_return_irq:
+
// Host context
ldr x2, [x0, #VCPU_HOST_CONTEXT]
kern_hyp_va x2
@@ -1355,8 +1364,47 @@ el1_irq:
push x0, x1
push x2, x3
mrs x0, tpidr_el2
- mov x1, #ARM_EXCEPTION_IRQ
- b __kvm_vcpu_return
+ifnvhe _S_(mov x1, #ARM_EXCEPTION_IRQ), nop
+ifnvhe "b __kvm_vcpu_return", nop
+
+ // Fallthrough for VHE
+ add x2, x0, #VCPU_CONTEXT
+
+ save_guest_regs
+ bl __save_shared_sysregs
+ save_timer_state
+ alternative_insn "bl __vgic_v2_disable", \
+ "bl __vgic_v3_disable", \
+ ARM64_HAS_SYSREG_GIC_CPUIF
+ deactivate_traps
+ deactivate_vm
+
+ isb // Needed to ensure TGE is on and vectors have been switched
+
+ ldr x2, [x0, #VCPU_HOST_CONTEXT]
+ restore_shared_sysregs
+
+ mov x0, x2
+ adrp x1, handle_arch_irq
+ add x1, x1, #:lo12:handle_arch_irq
+ ldr x1, [x1]
+ blr x1
+
+ // Back from the interrupt handler, finalize the world switch
+ mrs x0, tpidr_el2
+ add x2, x0, #VCPU_CONTEXT
+
+ bl __save_fpsimd
+ bl __save_sysregs // We've already saved the shared regs
+ save_guest_32bit_state
+
+ skip_debug_state x3, 1f
+ bl __save_debug
+1:
+ save_vgic_state
+
+ mov x1, #ARM_EXCEPTION_IRQ
+ b __kvm_vcpu_return_irq

.ltorg

diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S
index 3f00071..c8864b4 100644
--- a/arch/arm64/kvm/vgic-v2-switch.S
+++ b/arch/arm64/kvm/vgic-v2-switch.S
@@ -26,16 +26,30 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>

+#include "vhe-macros.h"
+
.text
.pushsection .hyp.text, "ax"

+ENTRY(__vgic_v2_disable)
+ /* Get VGIC VCTRL base into x2 */
+ ldr x2, [x0, #VCPU_KVM]
+ kern_hyp_va x2
+ ldr x2, [x2, #KVM_VGIC_VCTRL]
+ kern_hyp_va x2
+ cbz x2, 1f // disabled
+
+ /* Clear GICH_HCR */
+ str wzr, [x2, #GICH_HCR]
+1: ret
+ENDPROC(__vgic_v2_disable)
+
/*
* Save the VGIC CPU state into memory
* x0: Register pointing to VCPU struct
* Do not corrupt x1!!!
*/
ENTRY(__save_vgic_v2_state)
-__save_vgic_v2_state:
/* Get VGIC VCTRL base into x2 */
ldr x2, [x0, #VCPU_KVM]
kern_hyp_va x2
@@ -75,7 +89,7 @@ CPU_BE( str w10, [x3, #VGIC_V2_CPU_ELRSR] )
str w11, [x3, #VGIC_V2_CPU_APR]

/* Clear GICH_HCR */
- str wzr, [x2, #GICH_HCR]
+ifnvhe _S_(str wzr, [x2, #GICH_HCR]), nop

/* Save list registers */
add x2, x2, #GICH_LR0
@@ -95,7 +109,6 @@ ENDPROC(__save_vgic_v2_state)
* x0: Register pointing to VCPU struct
*/
ENTRY(__restore_vgic_v2_state)
-__restore_vgic_v2_state:
/* Get VGIC VCTRL base into x2 */
ldr x2, [x0, #VCPU_KVM]
kern_hyp_va x2
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
index 3c20730..b9b45e3 100644
--- a/arch/arm64/kvm/vgic-v3-switch.S
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -25,6 +25,8 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_arm.h>

+#include "vhe-macros.h"
+
.text
.pushsection .hyp.text, "ax"

@@ -35,11 +37,21 @@
#define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8)

/*
+ * Disable the vcpu interface, preventing interrupts from
+ * being delivered.
+ */
+ENTRY(__vgic_v3_disable)
+ // Nuke the HCR register.
+ msr_s ICH_HCR_EL2, xzr
+ ret
+ENDPROC(__vgic_v3_disable)
+
+/*
* Save the VGIC CPU state into memory
* x0: Register pointing to VCPU struct
* Do not corrupt x1!!!
*/
-.macro save_vgic_v3_state
+ENTRY(__save_vgic_v3_state)
// Compute the address of struct vgic_cpu
add x3, x0, #VCPU_VGIC_CPU

@@ -58,7 +70,7 @@
str w7, [x3, #VGIC_V3_CPU_EISR]
str w8, [x3, #VGIC_V3_CPU_ELRSR]

- msr_s ICH_HCR_EL2, xzr
+ifnvhe _S_(msr_s ICH_HCR_EL2, xzr), nop

mrs_s x21, ICH_VTR_EL2
mvn w22, w21
@@ -137,15 +149,17 @@
orr x5, x5, #ICC_SRE_EL2_ENABLE
msr_s ICC_SRE_EL2, x5
isb
- mov x5, #1
+ // If using VHE, we don't care about EL1 and can early out.
+ifnvhe "mov x5, #1", ret
msr_s ICC_SRE_EL1, x5
-.endm
+ ret
+ENDPROC(__save_vgic_v3_state)

/*
* Restore the VGIC CPU state from memory
* x0: Register pointing to VCPU struct
*/
-.macro restore_vgic_v3_state
+ENTRY(__restore_vgic_v3_state)
// Compute the address of struct vgic_cpu
add x3, x0, #VCPU_VGIC_CPU

@@ -249,15 +263,6 @@
and x5, x5, #~ICC_SRE_EL2_ENABLE
msr_s ICC_SRE_EL2, x5
1:
-.endm
-
-ENTRY(__save_vgic_v3_state)
- save_vgic_v3_state
- ret
-ENDPROC(__save_vgic_v3_state)
-
-ENTRY(__restore_vgic_v3_state)
- restore_vgic_v3_state
ret
ENDPROC(__restore_vgic_v3_state)

--
2.1.4

2015-07-08 17:14:59

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH 07/13] arm64: KVM: VHE: Patch out use of HVC



On 08/07/2015 18:19, Marc Zyngier wrote:
> +/* Hack to allow stringification of macros... */
> +#define __S__(a,args...) __stringify(a, ##args)
> +#define _S_(a,args...) __S__(a, args)
> +
> +.macro ifnvhe nonvhe vhe
> + alternative_insn "\nonvhe", "\vhe", ARM64_HAS_VIRT_HOST_EXTN
> +.endm

Why not use this in patch 6 too?

Paolo

2015-07-08 17:54:57

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 07/13] arm64: KVM: VHE: Patch out use of HVC

On 08/07/15 18:14, Paolo Bonzini wrote:
>
>
> On 08/07/2015 18:19, Marc Zyngier wrote:
>> +/* Hack to allow stringification of macros... */
>> +#define __S__(a,args...) __stringify(a, ##args)
>> +#define _S_(a,args...) __S__(a, args)
>> +
>> +.macro ifnvhe nonvhe vhe
>> + alternative_insn "\nonvhe", "\vhe", ARM64_HAS_VIRT_HOST_EXTN
>> +.endm
>
> Why not use this in patch 6 too?

I tried, and ended up in #include hell. vhe-macros.h is local to
arch/arm64/kvm, and including it from asm/kvm_mmu.h breaks (we include
it from arch/arm/kvm/ and virt/kvm/arm/).

Alternatively, I could move it to arch/arm64/include/asm (renamed to
kvm_vhe_macros.h?), which would solve this issue. I just gave it a go,
and that seems sensible enough.

Thanks for the suggestion!

M.
--
Jazz is not dead. It just smells funny...

2015-07-09 01:40:05

by Mario Smarduch

[permalink] [raw]
Subject: Re: [PATCH 09/13] arm64: KVM: VHE: Add alternatives for VHE-enabled world-switch

On 07/08/2015 09:19 AM, Marc Zyngier wrote:
> In order to switch between host and guest, a VHE-enabled kernel
> must use different accessors for certain system registers.
>
> This patch uses runtime patching to use the right instruction
> when required...
>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> arch/arm64/include/asm/kvm_asm.h | 40 ++++++--
> arch/arm64/kvm/hyp.S | 210 ++++++++++++++++++++++++++-------------
> arch/arm64/kvm/vhe-macros.h | 18 ++++
> 3 files changed, 191 insertions(+), 77 deletions(-)
>
[....]
> * Author: Marc Zyngier <[email protected]>
> *
> * This program is free software; you can redistribute it and/or modify
> @@ -67,40 +67,52 @@
> stp x29, lr, [x3, #80]
>
> mrs x19, sp_el0
> - mrs x20, elr_el2 // pc before entering el2
> - mrs x21, spsr_el2 // pstate before entering el2
> + str x19, [x3, #96]
> +.endm
>
> - stp x19, x20, [x3, #96]
> - str x21, [x3, #112]

Hi Marc,

trying to make a little sense out of this :)

In the case of VHE kernel the two 'mrs_hyp()' and 'mrs_el1()'
calls would be accessing same registers - namely EL1 variants?
For non VHE EL2, EL1?

The mrs_s and sysreg_EL12 are new, not sure what these mean.

- Mario

> +.macro save_el1_state
> + mrs_hyp(x20, ELR) // pc before entering el2
> + mrs_hyp(x21, SPSR) // pstate before entering el2
>
> mrs x22, sp_el1
> - mrs x23, elr_el1
> - mrs x24, spsr_el1
> +
> + mrs_el1(x23, elr)
> + mrs_el1(x24, spsr)
> +
> + add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
> + stp x20, x21, [x3, #8] // HACK: Store to the regs after SP_EL0
>
> str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
> str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
> str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
> .endm
>
> -.macro restore_common_regs
> +.macro restore_el1_state
> // x2: base address for cpu context
> // x3: tmp register
>
> + add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
> + ldp x20, x21, [x3, #8] // Same hack again, get guest PC and pstate
> +
> ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
> ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
> ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
>
> + msr_hyp(ELR, x20) // pc on return from el2
> + msr_hyp(SPSR, x21) // pstate on return from el2
> +
> msr sp_el1, x22
> - msr elr_el1, x23
> - msr spsr_el1, x24
>
> - add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
> - ldp x19, x20, [x3]
> - ldr x21, [x3, #16]
> + msr_el1(elr, x23)
> + msr_el1(spsr, x24)
> +.endm
>
> +.macro restore_common_regs
> + // x2: base address for cpu context
> + // x3: tmp register
> +
> + ldr x19, [x2, #CPU_XREG_OFFSET(31)] // SP_EL0
> msr sp_el0, x19
> - msr elr_el2, x20 // pc on return from el2
> - msr spsr_el2, x21 // pstate on return from el2
>
> add x3, x2, #CPU_XREG_OFFSET(19)
> ldp x19, x20, [x3]
> @@ -113,9 +125,15 @@
>
> .macro save_host_regs
> save_common_regs
> +ifnvhe nop, "b skip_el1_save"
> + save_el1_state
> +skip_el1_save:
> .endm
>
> .macro restore_host_regs
> +ifnvhe nop, "b skip_el1_restore"
> + restore_el1_state
> +skip_el1_restore:
> restore_common_regs
> .endm
>
> @@ -159,6 +177,7 @@
> stp x6, x7, [x3, #16]
>
> save_common_regs
> + save_el1_state
> .endm
>
> .macro restore_guest_regs
> @@ -184,6 +203,7 @@
> ldr x18, [x3, #144]
>
> // x19-x29, lr, sp*, elr*, spsr*
> + restore_el1_state
> restore_common_regs
>
> // Last bits of the 64bit state
> @@ -203,6 +223,38 @@
> * In other words, don't touch any of these unless you know what
> * you are doing.
> */
> +
> +.macro save_shared_sysregs
> + // x2: base address for cpu context
> + // x3: tmp register
> +
> + add x3, x2, #CPU_SYSREG_OFFSET(TPIDR_EL0)
> +
> + mrs x4, tpidr_el0
> + mrs x5, tpidrro_el0
> + mrs x6, tpidr_el1
> + mrs x7, actlr_el1
> +
> + stp x4, x5, [x3]
> + stp x6, x7, [x3, #16]
> +.endm
> +
> +.macro restore_shared_sysregs
> + // x2: base address for cpu context
> + // x3: tmp register
> +
> + add x3, x2, #CPU_SYSREG_OFFSET(TPIDR_EL0)
> +
> + ldp x4, x5, [x3]
> + ldp x6, x7, [x3, #16]
> +
> + msr tpidr_el0, x4
> + msr tpidrro_el0, x5
> + msr tpidr_el1, x6
> + msr actlr_el1, x7
> +.endm
> +
> +
> .macro save_sysregs
> // x2: base address for cpu context
> // x3: tmp register
> @@ -211,26 +263,27 @@
>
> mrs x4, vmpidr_el2
> mrs x5, csselr_el1
> - mrs x6, sctlr_el1
> - mrs x7, actlr_el1
> - mrs x8, cpacr_el1
> - mrs x9, ttbr0_el1
> - mrs x10, ttbr1_el1
> - mrs x11, tcr_el1
> - mrs x12, esr_el1
> - mrs x13, afsr0_el1
> - mrs x14, afsr1_el1
> - mrs x15, far_el1
> - mrs x16, mair_el1
> - mrs x17, vbar_el1
> - mrs x18, contextidr_el1
> - mrs x19, tpidr_el0
> - mrs x20, tpidrro_el0
> - mrs x21, tpidr_el1
> - mrs x22, amair_el1
> - mrs x23, cntkctl_el1
> - mrs x24, par_el1
> - mrs x25, mdscr_el1
> + mrs_el1(x6, sctlr)
> + mrs_el1(x7, amair)
> + mrs_el1(x8, cpacr)
> + mrs_el1(x9, ttbr0)
> + mrs_el1(x10, ttbr1)
> + mrs_el1(x11, tcr)
> + mrs_el1(x12, esr)
> + mrs_el1(x13, afsr0)
> + mrs_el1(x14, afsr1)
> + mrs_el1(x15, far)
> + mrs_el1(x16, mair)
> + mrs_el1(x17, vbar)
> + mrs_el1(x18, contextidr)
> + mrs_el1(x19, cntkctl)
> + mrs x20, par_el1
> + mrs x21, mdscr_el1
> +
> + mrs x22, tpidr_el0
> + mrs x23, tpidrro_el0
> + mrs x24, tpidr_el1
> + mrs x25, actlr_el1
>
> stp x4, x5, [x3]
> stp x6, x7, [x3, #16]
> @@ -460,26 +513,27 @@
>
> msr vmpidr_el2, x4
> msr csselr_el1, x5
> - msr sctlr_el1, x6
> - msr actlr_el1, x7
> - msr cpacr_el1, x8
> - msr ttbr0_el1, x9
> - msr ttbr1_el1, x10
> - msr tcr_el1, x11
> - msr esr_el1, x12
> - msr afsr0_el1, x13
> - msr afsr1_el1, x14
> - msr far_el1, x15
> - msr mair_el1, x16
> - msr vbar_el1, x17
> - msr contextidr_el1, x18
> - msr tpidr_el0, x19
> - msr tpidrro_el0, x20
> - msr tpidr_el1, x21
> - msr amair_el1, x22
> - msr cntkctl_el1, x23
> - msr par_el1, x24
> - msr mdscr_el1, x25
> + msr_el1(sctlr, x6)
> + msr_el1(amair, x7)
> + msr_el1(cpacr, x8)
> + msr_el1(ttbr0, x9)
> + msr_el1(ttbr1, x10)
> + msr_el1(tcr, x11)
> + msr_el1(esr, x12)
> + msr_el1(afsr0, x13)
> + msr_el1(afsr1, x14)
> + msr_el1(far, x15)
> + msr_el1(mair, x16)
> + msr_el1(vbar, x17)
> + msr_el1(contextidr, x18)
> + msr_el1(cntkctl, x19)
> + msr par_el1, x20
> + msr mdscr_el1, x21
> +
> + msr tpidr_el0, x22
> + msr tpidrro_el0, x23
> + msr tpidr_el1, x24
> + msr actlr_el1, x25
> .endm
>
> .macro restore_debug
> @@ -779,8 +833,11 @@
> .macro activate_traps
> ldr x2, [x0, #VCPU_HCR_EL2]
> msr hcr_el2, x2
> - mov x2, #CPTR_EL2_TTA
> - msr cptr_el2, x2
> + adr x3, __kvm_hyp_vector
> +ifnvhe nop, "msr vbar_el1, x3"
> +ifnvhe nop, "mrs x2, cpacr_el1"
> +ifnvhe _S_(ldr x2, =(CPTR_EL2_TTA)), "orr x2, x2, #(1 << 28)"
> +ifnvhe "msr cptr_el2, x2", "msr cpacr_el1, x2"
>
> mov x2, #(1 << 15) // Trap CP15 Cr=15
> msr hstr_el2, x2
> @@ -803,12 +860,20 @@
> ifnvhe _S_(mov x2, #HCR_RW), _S_(mov x2, #HCR_RW|HCR_TGE)
> ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
> msr hcr_el2, x2
> - msr cptr_el2, xzr
> +
> +ifnvhe nop, "mrs x2, cpacr_el1"
> +ifnvhe nop, "movn x3, #(1 << 12), lsl #16"
> +ifnvhe nop, "and x2, x2, x3"
> +ifnvhe "msr cptr_el2, xzr", "msr cpacr_el1, x2"
> msr hstr_el2, xzr
>
> mrs x2, mdcr_el2
> and x2, x2, #MDCR_EL2_HPMN_MASK
> msr mdcr_el2, x2
> +
> + adrp x2, vectors
> + add x2, x2, #:lo12:vectors
> +ifnvhe nop, "msr vbar_el1, x2"
> .endm
>
> .macro activate_vm
> @@ -853,15 +918,15 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
> ldr w3, [x2, #KVM_TIMER_ENABLED]
> cbz w3, 1f
>
> - mrs x3, cntv_ctl_el0
> + mrs_el0(x3, cntv_ctl)
> and x3, x3, #3
> str w3, [x0, #VCPU_TIMER_CNTV_CTL]
> bic x3, x3, #1 // Clear Enable
> - msr cntv_ctl_el0, x3
> + msr_el0(cntv_ctl, x3)
>
> isb
>
> - mrs x3, cntv_cval_el0
> + mrs_el0(x3, cntv_cval)
> str x3, [x0, #VCPU_TIMER_CNTV_CVAL]
>
> 1:
> @@ -871,7 +936,7 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
> msr cnthctl_el2, x2
>
> // Clear cntvoff for the host
> - msr cntvoff_el2, xzr
> +ifnvhe "msr cntvoff_el2, xzr", nop
> .endm
>
> .macro restore_timer_state
> @@ -891,12 +956,12 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
> ldr x3, [x2, #KVM_TIMER_CNTVOFF]
> msr cntvoff_el2, x3
> ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL]
> - msr cntv_cval_el0, x2
> + msr_el0(cntv_cval, x2)
> isb
>
> ldr w2, [x0, #VCPU_TIMER_CNTV_CTL]
> and x2, x2, #3
> - msr cntv_ctl_el0, x2
> + msr_el0(cntv_ctl, x2)
> 1:
> .endm
>
> @@ -945,8 +1010,10 @@ ENTRY(__kvm_vcpu_run)
>
> save_host_regs
> bl __save_fpsimd
> - bl __save_sysregs
> -
> +ifnvhe "bl __save_sysregs", nop
> +ifnvhe "b 1f", nop
> + save_shared_sysregs
> +1:
> compute_debug_state 1f
> bl __save_debug
> 1:
> @@ -997,7 +1064,10 @@ __kvm_vcpu_return:
> ldr x2, [x0, #VCPU_HOST_CONTEXT]
> kern_hyp_va x2
>
> - bl __restore_sysregs
> +ifnvhe "bl __restore_sysregs", nop
> +ifnvhe "b 1f", nop
> + restore_shared_sysregs
> +1:
> bl __restore_fpsimd
>
> skip_debug_state x3, 1f
> @@ -1104,6 +1174,8 @@ __kvm_hyp_panic:
> mrs x6, par_el1
> mrs x7, tpidr_el2
>
> +ifnvhe nop, "b panic"
> +
> mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
> PSR_MODE_EL1h)
> msr spsr_el2, lr
> @@ -1248,7 +1320,7 @@ el1_trap:
> * As such, we can use the EL1 translation regime, and don't have
> * to distinguish between EL0 and EL1 access.
> */
> - mrs x2, far_el2
> +ifnvhe "mrs x2, far_el2", "mrs x2, far_el1"
> at s1e1r, x2
> isb
>
> @@ -1262,7 +1334,7 @@ el1_trap:
> b 2f
>
> 1: mrs x3, hpfar_el2
> - mrs x2, far_el2
> +ifnvhe "mrs x2, far_el2", "mrs x2, far_el1"
>
> 2: mrs x0, tpidr_el2
> str w1, [x0, #VCPU_ESR_EL2]
> diff --git a/arch/arm64/kvm/vhe-macros.h b/arch/arm64/kvm/vhe-macros.h
> index da7f9da..1e94235 100644
> --- a/arch/arm64/kvm/vhe-macros.h
> +++ b/arch/arm64/kvm/vhe-macros.h
> @@ -31,6 +31,24 @@
> alternative_insn "\nonvhe", "\vhe", ARM64_HAS_VIRT_HOST_EXTN
> .endm
>
> +#define mrs_el0(reg, sysreg) \
> + ifnvhe _S_(mrs reg, sysreg##_EL0), _S_(mrs_s reg, sysreg##_EL02)
> +
> +#define msr_el0(sysreg, reg) \
> + ifnvhe _S_(msr sysreg##_EL0, reg), _S_(msr_s sysreg##_EL02, reg)
> +
> +#define mrs_el1(reg, sysreg) \
> + ifnvhe _S_(mrs reg, sysreg##_EL1), _S_(mrs_s reg, sysreg##_EL12)
> +
> +#define msr_el1(sysreg, reg) \
> + ifnvhe _S_(msr sysreg##_EL1, reg), _S_(msr_s sysreg##_EL12, reg)
> +
> +#define mrs_hyp(reg, sysreg) \
> + ifnvhe _S_(mrs reg, sysreg##_EL2), _S_(mrs reg, sysreg##_EL1)
> +
> +#define msr_hyp(sysreg, reg) \
> + ifnvhe _S_(msr sysreg##_EL2, reg), _S_(msr sysreg##_EL1, reg)
> +
> #endif
>
> #endif /*__ARM64_VHE_MACROS_H__ */
>

2015-07-09 08:06:44

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 09/13] arm64: KVM: VHE: Add alternatives for VHE-enabled world-switch

Hi Mario,

On 09/07/15 02:29, Mario Smarduch wrote:
> On 07/08/2015 09:19 AM, Marc Zyngier wrote:
>> In order to switch between host and guest, a VHE-enabled kernel
>> must use different accessors for certain system registers.
>>
>> This patch uses runtime patching to use the right instruction
>> when required...
>>
>> Signed-off-by: Marc Zyngier <[email protected]>
>> ---
>> arch/arm64/include/asm/kvm_asm.h | 40 ++++++--
>> arch/arm64/kvm/hyp.S | 210 ++++++++++++++++++++++++++-------------
>> arch/arm64/kvm/vhe-macros.h | 18 ++++
>> 3 files changed, 191 insertions(+), 77 deletions(-)
>>
> [....]
>> * Author: Marc Zyngier <[email protected]>
>> *
>> * This program is free software; you can redistribute it and/or modify
>> @@ -67,40 +67,52 @@
>> stp x29, lr, [x3, #80]
>>
>> mrs x19, sp_el0
>> - mrs x20, elr_el2 // pc before entering el2
>> - mrs x21, spsr_el2 // pstate before entering el2
>> + str x19, [x3, #96]
>> +.endm
>>
>> - stp x19, x20, [x3, #96]
>> - str x21, [x3, #112]
>
> Hi Marc,
>
> trying to make a little sense out of this :)

Don't even try, it hurts... ;-)

> In the case of VHE kernel the two 'mrs_hyp()' and 'mrs_el1()'
> calls would be accessing same registers - namely EL1 variants?
> For non VHE EL2, EL1?
>
> The mrs_s and sysreg_EL12 are new, not sure what these mean.

mrs_s and msr_s are just macros to that deal with system registers that
the assembler doesn't know about (yet). They have been in (moderate) use
for about a year, and have been introduced with the GICv3 support.

See arch/arm64/include/asm/sysreg.h for the gory details.

Now, on to sysreg_EL12: The main idea with VHE is that anything that
used to run at EL1 (the kernel) can now run unmodified at EL2, and that
it is the EL2 software that has to change to deal with it.

So when the kernel uses VHE and runs at EL2, an access to sysreg_EL1
really accesses sysreg_EL2, transparently. This is what makes it
possible to run the kernel at EL2 without any change.

But when the KVM world switch wants to access a guest register, it
cannot use sysreg_EL1 anymore (that would hit on the EL2 register
because of the above rule). For this, it must use sysreg_EL12 which
effectively means "access the EL1 register from EL2".

As a consequence, we have the following rules:
- non-VHE: msr_el1 uses EL1, msr_hyp uses EL2
- VHE: msr_el1 uses EL12, msr_hyp uses EL1

Does this help?

M.

> - Mario
>
>> +.macro save_el1_state
>> + mrs_hyp(x20, ELR) // pc before entering el2
>> + mrs_hyp(x21, SPSR) // pstate before entering el2
>>
>> mrs x22, sp_el1
>> - mrs x23, elr_el1
>> - mrs x24, spsr_el1
>> +
>> + mrs_el1(x23, elr)
>> + mrs_el1(x24, spsr)
>> +
>> + add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
>> + stp x20, x21, [x3, #8] // HACK: Store to the regs after SP_EL0
>>
>> str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
>> str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
>> str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
>> .endm
>>
>> -.macro restore_common_regs
>> +.macro restore_el1_state
>> // x2: base address for cpu context
>> // x3: tmp register
>>
>> + add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
>> + ldp x20, x21, [x3, #8] // Same hack again, get guest PC and pstate
>> +
>> ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
>> ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
>> ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
>>
>> + msr_hyp(ELR, x20) // pc on return from el2
>> + msr_hyp(SPSR, x21) // pstate on return from el2
>> +
>> msr sp_el1, x22
>> - msr elr_el1, x23
>> - msr spsr_el1, x24
>>
>> - add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
>> - ldp x19, x20, [x3]
>> - ldr x21, [x3, #16]
>> + msr_el1(elr, x23)
>> + msr_el1(spsr, x24)
>> +.endm
>>
>> +.macro restore_common_regs
>> + // x2: base address for cpu context
>> + // x3: tmp register
>> +
>> + ldr x19, [x2, #CPU_XREG_OFFSET(31)] // SP_EL0
>> msr sp_el0, x19
>> - msr elr_el2, x20 // pc on return from el2
>> - msr spsr_el2, x21 // pstate on return from el2
>>
>> add x3, x2, #CPU_XREG_OFFSET(19)
>> ldp x19, x20, [x3]
>> @@ -113,9 +125,15 @@
>>
>> .macro save_host_regs
>> save_common_regs
>> +ifnvhe nop, "b skip_el1_save"
>> + save_el1_state
>> +skip_el1_save:
>> .endm
>>
>> .macro restore_host_regs
>> +ifnvhe nop, "b skip_el1_restore"
>> + restore_el1_state
>> +skip_el1_restore:
>> restore_common_regs
>> .endm
>>
>> @@ -159,6 +177,7 @@
>> stp x6, x7, [x3, #16]
>>
>> save_common_regs
>> + save_el1_state
>> .endm
>>
>> .macro restore_guest_regs
>> @@ -184,6 +203,7 @@
>> ldr x18, [x3, #144]
>>
>> // x19-x29, lr, sp*, elr*, spsr*
>> + restore_el1_state
>> restore_common_regs
>>
>> // Last bits of the 64bit state
>> @@ -203,6 +223,38 @@
>> * In other words, don't touch any of these unless you know what
>> * you are doing.
>> */
>> +
>> +.macro save_shared_sysregs
>> + // x2: base address for cpu context
>> + // x3: tmp register
>> +
>> + add x3, x2, #CPU_SYSREG_OFFSET(TPIDR_EL0)
>> +
>> + mrs x4, tpidr_el0
>> + mrs x5, tpidrro_el0
>> + mrs x6, tpidr_el1
>> + mrs x7, actlr_el1
>> +
>> + stp x4, x5, [x3]
>> + stp x6, x7, [x3, #16]
>> +.endm
>> +
>> +.macro restore_shared_sysregs
>> + // x2: base address for cpu context
>> + // x3: tmp register
>> +
>> + add x3, x2, #CPU_SYSREG_OFFSET(TPIDR_EL0)
>> +
>> + ldp x4, x5, [x3]
>> + ldp x6, x7, [x3, #16]
>> +
>> + msr tpidr_el0, x4
>> + msr tpidrro_el0, x5
>> + msr tpidr_el1, x6
>> + msr actlr_el1, x7
>> +.endm
>> +
>> +
>> .macro save_sysregs
>> // x2: base address for cpu context
>> // x3: tmp register
>> @@ -211,26 +263,27 @@
>>
>> mrs x4, vmpidr_el2
>> mrs x5, csselr_el1
>> - mrs x6, sctlr_el1
>> - mrs x7, actlr_el1
>> - mrs x8, cpacr_el1
>> - mrs x9, ttbr0_el1
>> - mrs x10, ttbr1_el1
>> - mrs x11, tcr_el1
>> - mrs x12, esr_el1
>> - mrs x13, afsr0_el1
>> - mrs x14, afsr1_el1
>> - mrs x15, far_el1
>> - mrs x16, mair_el1
>> - mrs x17, vbar_el1
>> - mrs x18, contextidr_el1
>> - mrs x19, tpidr_el0
>> - mrs x20, tpidrro_el0
>> - mrs x21, tpidr_el1
>> - mrs x22, amair_el1
>> - mrs x23, cntkctl_el1
>> - mrs x24, par_el1
>> - mrs x25, mdscr_el1
>> + mrs_el1(x6, sctlr)
>> + mrs_el1(x7, amair)
>> + mrs_el1(x8, cpacr)
>> + mrs_el1(x9, ttbr0)
>> + mrs_el1(x10, ttbr1)
>> + mrs_el1(x11, tcr)
>> + mrs_el1(x12, esr)
>> + mrs_el1(x13, afsr0)
>> + mrs_el1(x14, afsr1)
>> + mrs_el1(x15, far)
>> + mrs_el1(x16, mair)
>> + mrs_el1(x17, vbar)
>> + mrs_el1(x18, contextidr)
>> + mrs_el1(x19, cntkctl)
>> + mrs x20, par_el1
>> + mrs x21, mdscr_el1
>> +
>> + mrs x22, tpidr_el0
>> + mrs x23, tpidrro_el0
>> + mrs x24, tpidr_el1
>> + mrs x25, actlr_el1
>>
>> stp x4, x5, [x3]
>> stp x6, x7, [x3, #16]
>> @@ -460,26 +513,27 @@
>>
>> msr vmpidr_el2, x4
>> msr csselr_el1, x5
>> - msr sctlr_el1, x6
>> - msr actlr_el1, x7
>> - msr cpacr_el1, x8
>> - msr ttbr0_el1, x9
>> - msr ttbr1_el1, x10
>> - msr tcr_el1, x11
>> - msr esr_el1, x12
>> - msr afsr0_el1, x13
>> - msr afsr1_el1, x14
>> - msr far_el1, x15
>> - msr mair_el1, x16
>> - msr vbar_el1, x17
>> - msr contextidr_el1, x18
>> - msr tpidr_el0, x19
>> - msr tpidrro_el0, x20
>> - msr tpidr_el1, x21
>> - msr amair_el1, x22
>> - msr cntkctl_el1, x23
>> - msr par_el1, x24
>> - msr mdscr_el1, x25
>> + msr_el1(sctlr, x6)
>> + msr_el1(amair, x7)
>> + msr_el1(cpacr, x8)
>> + msr_el1(ttbr0, x9)
>> + msr_el1(ttbr1, x10)
>> + msr_el1(tcr, x11)
>> + msr_el1(esr, x12)
>> + msr_el1(afsr0, x13)
>> + msr_el1(afsr1, x14)
>> + msr_el1(far, x15)
>> + msr_el1(mair, x16)
>> + msr_el1(vbar, x17)
>> + msr_el1(contextidr, x18)
>> + msr_el1(cntkctl, x19)
>> + msr par_el1, x20
>> + msr mdscr_el1, x21
>> +
>> + msr tpidr_el0, x22
>> + msr tpidrro_el0, x23
>> + msr tpidr_el1, x24
>> + msr actlr_el1, x25
>> .endm
>>
>> .macro restore_debug
>> @@ -779,8 +833,11 @@
>> .macro activate_traps
>> ldr x2, [x0, #VCPU_HCR_EL2]
>> msr hcr_el2, x2
>> - mov x2, #CPTR_EL2_TTA
>> - msr cptr_el2, x2
>> + adr x3, __kvm_hyp_vector
>> +ifnvhe nop, "msr vbar_el1, x3"
>> +ifnvhe nop, "mrs x2, cpacr_el1"
>> +ifnvhe _S_(ldr x2, =(CPTR_EL2_TTA)), "orr x2, x2, #(1 << 28)"
>> +ifnvhe "msr cptr_el2, x2", "msr cpacr_el1, x2"
>>
>> mov x2, #(1 << 15) // Trap CP15 Cr=15
>> msr hstr_el2, x2
>> @@ -803,12 +860,20 @@
>> ifnvhe _S_(mov x2, #HCR_RW), _S_(mov x2, #HCR_RW|HCR_TGE)
>> ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
>> msr hcr_el2, x2
>> - msr cptr_el2, xzr
>> +
>> +ifnvhe nop, "mrs x2, cpacr_el1"
>> +ifnvhe nop, "movn x3, #(1 << 12), lsl #16"
>> +ifnvhe nop, "and x2, x2, x3"
>> +ifnvhe "msr cptr_el2, xzr", "msr cpacr_el1, x2"
>> msr hstr_el2, xzr
>>
>> mrs x2, mdcr_el2
>> and x2, x2, #MDCR_EL2_HPMN_MASK
>> msr mdcr_el2, x2
>> +
>> + adrp x2, vectors
>> + add x2, x2, #:lo12:vectors
>> +ifnvhe nop, "msr vbar_el1, x2"
>> .endm
>>
>> .macro activate_vm
>> @@ -853,15 +918,15 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
>> ldr w3, [x2, #KVM_TIMER_ENABLED]
>> cbz w3, 1f
>>
>> - mrs x3, cntv_ctl_el0
>> + mrs_el0(x3, cntv_ctl)
>> and x3, x3, #3
>> str w3, [x0, #VCPU_TIMER_CNTV_CTL]
>> bic x3, x3, #1 // Clear Enable
>> - msr cntv_ctl_el0, x3
>> + msr_el0(cntv_ctl, x3)
>>
>> isb
>>
>> - mrs x3, cntv_cval_el0
>> + mrs_el0(x3, cntv_cval)
>> str x3, [x0, #VCPU_TIMER_CNTV_CVAL]
>>
>> 1:
>> @@ -871,7 +936,7 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
>> msr cnthctl_el2, x2
>>
>> // Clear cntvoff for the host
>> - msr cntvoff_el2, xzr
>> +ifnvhe "msr cntvoff_el2, xzr", nop
>> .endm
>>
>> .macro restore_timer_state
>> @@ -891,12 +956,12 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
>> ldr x3, [x2, #KVM_TIMER_CNTVOFF]
>> msr cntvoff_el2, x3
>> ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL]
>> - msr cntv_cval_el0, x2
>> + msr_el0(cntv_cval, x2)
>> isb
>>
>> ldr w2, [x0, #VCPU_TIMER_CNTV_CTL]
>> and x2, x2, #3
>> - msr cntv_ctl_el0, x2
>> + msr_el0(cntv_ctl, x2)
>> 1:
>> .endm
>>
>> @@ -945,8 +1010,10 @@ ENTRY(__kvm_vcpu_run)
>>
>> save_host_regs
>> bl __save_fpsimd
>> - bl __save_sysregs
>> -
>> +ifnvhe "bl __save_sysregs", nop
>> +ifnvhe "b 1f", nop
>> + save_shared_sysregs
>> +1:
>> compute_debug_state 1f
>> bl __save_debug
>> 1:
>> @@ -997,7 +1064,10 @@ __kvm_vcpu_return:
>> ldr x2, [x0, #VCPU_HOST_CONTEXT]
>> kern_hyp_va x2
>>
>> - bl __restore_sysregs
>> +ifnvhe "bl __restore_sysregs", nop
>> +ifnvhe "b 1f", nop
>> + restore_shared_sysregs
>> +1:
>> bl __restore_fpsimd
>>
>> skip_debug_state x3, 1f
>> @@ -1104,6 +1174,8 @@ __kvm_hyp_panic:
>> mrs x6, par_el1
>> mrs x7, tpidr_el2
>>
>> +ifnvhe nop, "b panic"
>> +
>> mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
>> PSR_MODE_EL1h)
>> msr spsr_el2, lr
>> @@ -1248,7 +1320,7 @@ el1_trap:
>> * As such, we can use the EL1 translation regime, and don't have
>> * to distinguish between EL0 and EL1 access.
>> */
>> - mrs x2, far_el2
>> +ifnvhe "mrs x2, far_el2", "mrs x2, far_el1"
>> at s1e1r, x2
>> isb
>>
>> @@ -1262,7 +1334,7 @@ el1_trap:
>> b 2f
>>
>> 1: mrs x3, hpfar_el2
>> - mrs x2, far_el2
>> +ifnvhe "mrs x2, far_el2", "mrs x2, far_el1"
>>
>> 2: mrs x0, tpidr_el2
>> str w1, [x0, #VCPU_ESR_EL2]
>> diff --git a/arch/arm64/kvm/vhe-macros.h b/arch/arm64/kvm/vhe-macros.h
>> index da7f9da..1e94235 100644
>> --- a/arch/arm64/kvm/vhe-macros.h
>> +++ b/arch/arm64/kvm/vhe-macros.h
>> @@ -31,6 +31,24 @@
>> alternative_insn "\nonvhe", "\vhe", ARM64_HAS_VIRT_HOST_EXTN
>> .endm
>>
>> +#define mrs_el0(reg, sysreg) \
>> + ifnvhe _S_(mrs reg, sysreg##_EL0), _S_(mrs_s reg, sysreg##_EL02)
>> +
>> +#define msr_el0(sysreg, reg) \
>> + ifnvhe _S_(msr sysreg##_EL0, reg), _S_(msr_s sysreg##_EL02, reg)
>> +
>> +#define mrs_el1(reg, sysreg) \
>> + ifnvhe _S_(mrs reg, sysreg##_EL1), _S_(mrs_s reg, sysreg##_EL12)
>> +
>> +#define msr_el1(sysreg, reg) \
>> + ifnvhe _S_(msr sysreg##_EL1, reg), _S_(msr_s sysreg##_EL12, reg)
>> +
>> +#define mrs_hyp(reg, sysreg) \
>> + ifnvhe _S_(mrs reg, sysreg##_EL2), _S_(mrs reg, sysreg##_EL1)
>> +
>> +#define msr_hyp(sysreg, reg) \
>> + ifnvhe _S_(msr sysreg##_EL2, reg), _S_(msr sysreg##_EL1, reg)
>> +
>> #endif
>>
>> #endif /*__ARM64_VHE_MACROS_H__ */
>>
>


--
Jazz is not dead. It just smells funny...

2015-07-09 09:43:00

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH 01/13] arm/arm64: Add new is_kernel_in_hyp_mode predicate

Hi,

> +static inline bool is_kernel_in_hyp_mode(void)
> +{
> + u64 el;
> +
> + asm("mrs %0, CurrentEL" : "=r" (el));
> + return el == CurrentEL_EL2;
> +}

If you can include cputype.h, I think this can be:

static inline bool is_kernel_in_hyp_mode(void)
{
return read_cpuid(CurrentEL) == CurrentEL_EL2;
}

Mark.

2015-07-09 09:48:53

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH 03/13] arm64: Add ARM64_HAS_VIRT_HOST_EXTN feature

On Wed, Jul 08, 2015 at 05:19:06PM +0100, Marc Zyngier wrote:
> Add a new ARM64_HAS_VIRT_HOST_EXTN features to indicate that the
> CPU has the ARMv8,1 VHE capability.

Nit: s/,/./

It's probably worth mentioning somewhere that we have to check CurrentEL
rather than a feature register in case some prior software dropped us to
EL1N (e.g. if we're a guest under this scheme).

Mark.

>
> This will be used to trigger kernel patching in KVM.
>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> arch/arm64/include/asm/cpufeature.h | 3 ++-
> arch/arm64/kernel/cpufeature.c | 11 +++++++++++
> 2 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index c104421..6c3742d 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -25,8 +25,9 @@
> #define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1
> #define ARM64_WORKAROUND_845719 2
> #define ARM64_HAS_SYSREG_GIC_CPUIF 3
> +#define ARM64_HAS_VIRT_HOST_EXTN 4
>
> -#define ARM64_NCAPS 4
> +#define ARM64_NCAPS 5
>
> #ifndef __ASSEMBLY__
>
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 5ad86ce..e1dcd63 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -21,6 +21,7 @@
> #include <linux/types.h>
> #include <asm/cpu.h>
> #include <asm/cpufeature.h>
> +#include <asm/virt.h>
>
> static bool
> has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry)
> @@ -31,6 +32,11 @@ has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry)
> return (val & entry->register_mask) == entry->register_value;
> }
>
> +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
> +{
> + return is_kernel_in_hyp_mode();
> +}
> +
> static const struct arm64_cpu_capabilities arm64_features[] = {
> {
> .desc = "GIC system register CPU interface",
> @@ -39,6 +45,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
> .register_mask = (0xf << 24),
> .register_value = (1 << 24),
> },
> + {
> + .desc = "Virtualization Host Extensions",
> + .capability = ARM64_HAS_VIRT_HOST_EXTN,
> + .matches = runs_at_el2,
> + },
> {},
> };
>
> --
> 2.1.4
>
> _______________________________________________
> kvmarm mailing list
> [email protected]
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
>

2015-07-09 09:59:49

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 03/13] arm64: Add ARM64_HAS_VIRT_HOST_EXTN feature

On 09/07/15 10:48, Mark Rutland wrote:
> On Wed, Jul 08, 2015 at 05:19:06PM +0100, Marc Zyngier wrote:
>> Add a new ARM64_HAS_VIRT_HOST_EXTN features to indicate that the
>> CPU has the ARMv8,1 VHE capability.
>
> Nit: s/,/./
>
> It's probably worth mentioning somewhere that we have to check CurrentEL
> rather than a feature register in case some prior software dropped us to
> EL1N (e.g. if we're a guest under this scheme).

Good point, this is a leftover from a previous version that actually
checked the feature register. I'll clean up the commit log.

Thanks,

M.

> Mark.
>
>>
>> This will be used to trigger kernel patching in KVM.
>>
>> Signed-off-by: Marc Zyngier <[email protected]>
>> ---
>> arch/arm64/include/asm/cpufeature.h | 3 ++-
>> arch/arm64/kernel/cpufeature.c | 11 +++++++++++
>> 2 files changed, 13 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
>> index c104421..6c3742d 100644
>> --- a/arch/arm64/include/asm/cpufeature.h
>> +++ b/arch/arm64/include/asm/cpufeature.h
>> @@ -25,8 +25,9 @@
>> #define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1
>> #define ARM64_WORKAROUND_845719 2
>> #define ARM64_HAS_SYSREG_GIC_CPUIF 3
>> +#define ARM64_HAS_VIRT_HOST_EXTN 4
>>
>> -#define ARM64_NCAPS 4
>> +#define ARM64_NCAPS 5
>>
>> #ifndef __ASSEMBLY__
>>
>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>> index 5ad86ce..e1dcd63 100644
>> --- a/arch/arm64/kernel/cpufeature.c
>> +++ b/arch/arm64/kernel/cpufeature.c
>> @@ -21,6 +21,7 @@
>> #include <linux/types.h>
>> #include <asm/cpu.h>
>> #include <asm/cpufeature.h>
>> +#include <asm/virt.h>
>>
>> static bool
>> has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry)
>> @@ -31,6 +32,11 @@ has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry)
>> return (val & entry->register_mask) == entry->register_value;
>> }
>>
>> +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
>> +{
>> + return is_kernel_in_hyp_mode();
>> +}
>> +
>> static const struct arm64_cpu_capabilities arm64_features[] = {
>> {
>> .desc = "GIC system register CPU interface",
>> @@ -39,6 +45,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>> .register_mask = (0xf << 24),
>> .register_value = (1 << 24),
>> },
>> + {
>> + .desc = "Virtualization Host Extensions",
>> + .capability = ARM64_HAS_VIRT_HOST_EXTN,
>> + .matches = runs_at_el2,
>> + },
>> {},
>> };
>>
>> --
>> 2.1.4
>>
>> _______________________________________________
>> kvmarm mailing list
>> [email protected]
>> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
>>
>


--
Jazz is not dead. It just smells funny...

2015-07-09 10:05:43

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 01/13] arm/arm64: Add new is_kernel_in_hyp_mode predicate

On 09/07/15 10:42, Mark Rutland wrote:
> Hi,
>
>> +static inline bool is_kernel_in_hyp_mode(void)
>> +{
>> + u64 el;
>> +
>> + asm("mrs %0, CurrentEL" : "=r" (el));
>> + return el == CurrentEL_EL2;
>> +}
>
> If you can include cputype.h, I think this can be:
>
> static inline bool is_kernel_in_hyp_mode(void)
> {
> return read_cpuid(CurrentEL) == CurrentEL_EL2;
> }

This would indeed work, but CurrentEL is hardly an ID register. I feel
slightly uncomfortable using read_cpuid (which might return a cached
version at some point) for random system registers.

Thoughts?

M.
--
Jazz is not dead. It just smells funny...

2015-07-09 10:13:14

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH 01/13] arm/arm64: Add new is_kernel_in_hyp_mode predicate

On Thu, Jul 09, 2015 at 11:05:34AM +0100, Marc Zyngier wrote:
> On 09/07/15 10:42, Mark Rutland wrote:
> > Hi,
> >
> >> +static inline bool is_kernel_in_hyp_mode(void)
> >> +{
> >> + u64 el;
> >> +
> >> + asm("mrs %0, CurrentEL" : "=r" (el));
> >> + return el == CurrentEL_EL2;
> >> +}
> >
> > If you can include cputype.h, I think this can be:
> >
> > static inline bool is_kernel_in_hyp_mode(void)
> > {
> > return read_cpuid(CurrentEL) == CurrentEL_EL2;
> > }
>
> This would indeed work, but CurrentEL is hardly an ID register. I feel
> slightly uncomfortable using read_cpuid (which might return a cached
> version at some point) for random system registers.
>
> Thoughts?

I have no strong feelings either way, but I agree with the general
uneasiness w.r.t. what read_cpuid can be expected to do.

Elsewhere we just use inline asm to read non CPUID system registers, so
let's leave your patch as it was.

Thanks,
Mark.

2015-07-09 20:59:00

by Mario Smarduch

[permalink] [raw]
Subject: Re: [PATCH 09/13] arm64: KVM: VHE: Add alternatives for VHE-enabled world-switch

On 07/09/2015 01:06 AM, Marc Zyngier wrote:
> Hi Mario,
>
> On 09/07/15 02:29, Mario Smarduch wrote:
>> On 07/08/2015 09:19 AM, Marc Zyngier wrote:
>>> In order to switch between host and guest, a VHE-enabled kernel
>>> must use different accessors for certain system registers.
>>>
>>> This patch uses runtime patching to use the right instruction
>>> when required...
>>>
>>> Signed-off-by: Marc Zyngier <[email protected]>
>>> ---
>>> arch/arm64/include/asm/kvm_asm.h | 40 ++++++--
>>> arch/arm64/kvm/hyp.S | 210 ++++++++++++++++++++++++++-------------
>>> arch/arm64/kvm/vhe-macros.h | 18 ++++
>>> 3 files changed, 191 insertions(+), 77 deletions(-)
>>>
>> [....]
>>> * Author: Marc Zyngier <[email protected]>
>>> *
>>> * This program is free software; you can redistribute it and/or modify
>>> @@ -67,40 +67,52 @@
>>> stp x29, lr, [x3, #80]
>>>
>>> mrs x19, sp_el0
>>> - mrs x20, elr_el2 // pc before entering el2
>>> - mrs x21, spsr_el2 // pstate before entering el2
>>> + str x19, [x3, #96]
>>> +.endm
>>>
>>> - stp x19, x20, [x3, #96]
>>> - str x21, [x3, #112]
>>
>> Hi Marc,
>>
>> trying to make a little sense out of this :)
>
> Don't even try, it hurts... ;-)
>
>> In the case of VHE kernel the two 'mrs_hyp()' and 'mrs_el1()'
>> calls would be accessing same registers - namely EL1 variants?
>> For non VHE EL2, EL1?
>>
>> The mrs_s and sysreg_EL12 are new, not sure what these mean.
>
> mrs_s and msr_s are just macros to that deal with system registers that
> the assembler doesn't know about (yet). They have been in (moderate) use
> for about a year, and have been introduced with the GICv3 support.
>
> See arch/arm64/include/asm/sysreg.h for the gory details.
>
> Now, on to sysreg_EL12: The main idea with VHE is that anything that
> used to run at EL1 (the kernel) can now run unmodified at EL2, and that
> it is the EL2 software that has to change to deal with it.
>
> So when the kernel uses VHE and runs at EL2, an access to sysreg_EL1
> really accesses sysreg_EL2, transparently. This is what makes it
> possible to run the kernel at EL2 without any change.
>
> But when the KVM world switch wants to access a guest register, it
> cannot use sysreg_EL1 anymore (that would hit on the EL2 register
> because of the above rule). For this, it must use sysreg_EL12 which
> effectively means "access the EL1 register from EL2".
>
> As a consequence, we have the following rules:
> - non-VHE: msr_el1 uses EL1, msr_hyp uses EL2
> - VHE: msr_el1 uses EL12, msr_hyp uses EL1
>
> Does this help?

Yep it sure does, msr/mrs_hyp() and 12 naming had me confused.

Thanks!
>
> M.
>
>> - Mario
>>
>>> +.macro save_el1_state
>>> + mrs_hyp(x20, ELR) // pc before entering el2
>>> + mrs_hyp(x21, SPSR) // pstate before entering el2
>>>
>>> mrs x22, sp_el1
>>> - mrs x23, elr_el1
>>> - mrs x24, spsr_el1
>>> +
>>> + mrs_el1(x23, elr)
>>> + mrs_el1(x24, spsr)
>>> +
>>> + add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
>>> + stp x20, x21, [x3, #8] // HACK: Store to the regs after SP_EL0
>>>
>>> str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
>>> str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
>>> str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
>>> .endm
>>>
>>> -.macro restore_common_regs
>>> +.macro restore_el1_state
>>> // x2: base address for cpu context
>>> // x3: tmp register
>>>
>>> + add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
>>> + ldp x20, x21, [x3, #8] // Same hack again, get guest PC and pstate
>>> +
>>> ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
>>> ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
>>> ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
>>>
>>> + msr_hyp(ELR, x20) // pc on return from el2
>>> + msr_hyp(SPSR, x21) // pstate on return from el2
>>> +
>>> msr sp_el1, x22
>>> - msr elr_el1, x23
>>> - msr spsr_el1, x24
>>>
>>> - add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
>>> - ldp x19, x20, [x3]
>>> - ldr x21, [x3, #16]
>>> + msr_el1(elr, x23)
>>> + msr_el1(spsr, x24)
>>> +.endm
>>>
>>> +.macro restore_common_regs
>>> + // x2: base address for cpu context
>>> + // x3: tmp register
>>> +
>>> + ldr x19, [x2, #CPU_XREG_OFFSET(31)] // SP_EL0
>>> msr sp_el0, x19
>>> - msr elr_el2, x20 // pc on return from el2
>>> - msr spsr_el2, x21 // pstate on return from el2
>>>
>>> add x3, x2, #CPU_XREG_OFFSET(19)
>>> ldp x19, x20, [x3]
>>> @@ -113,9 +125,15 @@
>>>
>>> .macro save_host_regs
>>> save_common_regs
>>> +ifnvhe nop, "b skip_el1_save"
>>> + save_el1_state
>>> +skip_el1_save:
>>> .endm
>>>
>>> .macro restore_host_regs
>>> +ifnvhe nop, "b skip_el1_restore"
>>> + restore_el1_state
>>> +skip_el1_restore:
>>> restore_common_regs
>>> .endm
>>>
>>> @@ -159,6 +177,7 @@
>>> stp x6, x7, [x3, #16]
>>>
>>> save_common_regs
>>> + save_el1_state
>>> .endm
>>>
>>> .macro restore_guest_regs
>>> @@ -184,6 +203,7 @@
>>> ldr x18, [x3, #144]
>>>
>>> // x19-x29, lr, sp*, elr*, spsr*
>>> + restore_el1_state
>>> restore_common_regs
>>>
>>> // Last bits of the 64bit state
>>> @@ -203,6 +223,38 @@
>>> * In other words, don't touch any of these unless you know what
>>> * you are doing.
>>> */
>>> +
>>> +.macro save_shared_sysregs
>>> + // x2: base address for cpu context
>>> + // x3: tmp register
>>> +
>>> + add x3, x2, #CPU_SYSREG_OFFSET(TPIDR_EL0)
>>> +
>>> + mrs x4, tpidr_el0
>>> + mrs x5, tpidrro_el0
>>> + mrs x6, tpidr_el1
>>> + mrs x7, actlr_el1
>>> +
>>> + stp x4, x5, [x3]
>>> + stp x6, x7, [x3, #16]
>>> +.endm
>>> +
>>> +.macro restore_shared_sysregs
>>> + // x2: base address for cpu context
>>> + // x3: tmp register
>>> +
>>> + add x3, x2, #CPU_SYSREG_OFFSET(TPIDR_EL0)
>>> +
>>> + ldp x4, x5, [x3]
>>> + ldp x6, x7, [x3, #16]
>>> +
>>> + msr tpidr_el0, x4
>>> + msr tpidrro_el0, x5
>>> + msr tpidr_el1, x6
>>> + msr actlr_el1, x7
>>> +.endm
>>> +
>>> +
>>> .macro save_sysregs
>>> // x2: base address for cpu context
>>> // x3: tmp register
>>> @@ -211,26 +263,27 @@
>>>
>>> mrs x4, vmpidr_el2
>>> mrs x5, csselr_el1
>>> - mrs x6, sctlr_el1
>>> - mrs x7, actlr_el1
>>> - mrs x8, cpacr_el1
>>> - mrs x9, ttbr0_el1
>>> - mrs x10, ttbr1_el1
>>> - mrs x11, tcr_el1
>>> - mrs x12, esr_el1
>>> - mrs x13, afsr0_el1
>>> - mrs x14, afsr1_el1
>>> - mrs x15, far_el1
>>> - mrs x16, mair_el1
>>> - mrs x17, vbar_el1
>>> - mrs x18, contextidr_el1
>>> - mrs x19, tpidr_el0
>>> - mrs x20, tpidrro_el0
>>> - mrs x21, tpidr_el1
>>> - mrs x22, amair_el1
>>> - mrs x23, cntkctl_el1
>>> - mrs x24, par_el1
>>> - mrs x25, mdscr_el1
>>> + mrs_el1(x6, sctlr)
>>> + mrs_el1(x7, amair)
>>> + mrs_el1(x8, cpacr)
>>> + mrs_el1(x9, ttbr0)
>>> + mrs_el1(x10, ttbr1)
>>> + mrs_el1(x11, tcr)
>>> + mrs_el1(x12, esr)
>>> + mrs_el1(x13, afsr0)
>>> + mrs_el1(x14, afsr1)
>>> + mrs_el1(x15, far)
>>> + mrs_el1(x16, mair)
>>> + mrs_el1(x17, vbar)
>>> + mrs_el1(x18, contextidr)
>>> + mrs_el1(x19, cntkctl)
>>> + mrs x20, par_el1
>>> + mrs x21, mdscr_el1
>>> +
>>> + mrs x22, tpidr_el0
>>> + mrs x23, tpidrro_el0
>>> + mrs x24, tpidr_el1
>>> + mrs x25, actlr_el1
>>>
>>> stp x4, x5, [x3]
>>> stp x6, x7, [x3, #16]
>>> @@ -460,26 +513,27 @@
>>>
>>> msr vmpidr_el2, x4
>>> msr csselr_el1, x5
>>> - msr sctlr_el1, x6
>>> - msr actlr_el1, x7
>>> - msr cpacr_el1, x8
>>> - msr ttbr0_el1, x9
>>> - msr ttbr1_el1, x10
>>> - msr tcr_el1, x11
>>> - msr esr_el1, x12
>>> - msr afsr0_el1, x13
>>> - msr afsr1_el1, x14
>>> - msr far_el1, x15
>>> - msr mair_el1, x16
>>> - msr vbar_el1, x17
>>> - msr contextidr_el1, x18
>>> - msr tpidr_el0, x19
>>> - msr tpidrro_el0, x20
>>> - msr tpidr_el1, x21
>>> - msr amair_el1, x22
>>> - msr cntkctl_el1, x23
>>> - msr par_el1, x24
>>> - msr mdscr_el1, x25
>>> + msr_el1(sctlr, x6)
>>> + msr_el1(amair, x7)
>>> + msr_el1(cpacr, x8)
>>> + msr_el1(ttbr0, x9)
>>> + msr_el1(ttbr1, x10)
>>> + msr_el1(tcr, x11)
>>> + msr_el1(esr, x12)
>>> + msr_el1(afsr0, x13)
>>> + msr_el1(afsr1, x14)
>>> + msr_el1(far, x15)
>>> + msr_el1(mair, x16)
>>> + msr_el1(vbar, x17)
>>> + msr_el1(contextidr, x18)
>>> + msr_el1(cntkctl, x19)
>>> + msr par_el1, x20
>>> + msr mdscr_el1, x21
>>> +
>>> + msr tpidr_el0, x22
>>> + msr tpidrro_el0, x23
>>> + msr tpidr_el1, x24
>>> + msr actlr_el1, x25
>>> .endm
>>>
>>> .macro restore_debug
>>> @@ -779,8 +833,11 @@
>>> .macro activate_traps
>>> ldr x2, [x0, #VCPU_HCR_EL2]
>>> msr hcr_el2, x2
>>> - mov x2, #CPTR_EL2_TTA
>>> - msr cptr_el2, x2
>>> + adr x3, __kvm_hyp_vector
>>> +ifnvhe nop, "msr vbar_el1, x3"
>>> +ifnvhe nop, "mrs x2, cpacr_el1"
>>> +ifnvhe _S_(ldr x2, =(CPTR_EL2_TTA)), "orr x2, x2, #(1 << 28)"
>>> +ifnvhe "msr cptr_el2, x2", "msr cpacr_el1, x2"
>>>
>>> mov x2, #(1 << 15) // Trap CP15 Cr=15
>>> msr hstr_el2, x2
>>> @@ -803,12 +860,20 @@
>>> ifnvhe _S_(mov x2, #HCR_RW), _S_(mov x2, #HCR_RW|HCR_TGE)
>>> ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
>>> msr hcr_el2, x2
>>> - msr cptr_el2, xzr
>>> +
>>> +ifnvhe nop, "mrs x2, cpacr_el1"
>>> +ifnvhe nop, "movn x3, #(1 << 12), lsl #16"
>>> +ifnvhe nop, "and x2, x2, x3"
>>> +ifnvhe "msr cptr_el2, xzr", "msr cpacr_el1, x2"
>>> msr hstr_el2, xzr
>>>
>>> mrs x2, mdcr_el2
>>> and x2, x2, #MDCR_EL2_HPMN_MASK
>>> msr mdcr_el2, x2
>>> +
>>> + adrp x2, vectors
>>> + add x2, x2, #:lo12:vectors
>>> +ifnvhe nop, "msr vbar_el1, x2"
>>> .endm
>>>
>>> .macro activate_vm
>>> @@ -853,15 +918,15 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
>>> ldr w3, [x2, #KVM_TIMER_ENABLED]
>>> cbz w3, 1f
>>>
>>> - mrs x3, cntv_ctl_el0
>>> + mrs_el0(x3, cntv_ctl)
>>> and x3, x3, #3
>>> str w3, [x0, #VCPU_TIMER_CNTV_CTL]
>>> bic x3, x3, #1 // Clear Enable
>>> - msr cntv_ctl_el0, x3
>>> + msr_el0(cntv_ctl, x3)
>>>
>>> isb
>>>
>>> - mrs x3, cntv_cval_el0
>>> + mrs_el0(x3, cntv_cval)
>>> str x3, [x0, #VCPU_TIMER_CNTV_CVAL]
>>>
>>> 1:
>>> @@ -871,7 +936,7 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
>>> msr cnthctl_el2, x2
>>>
>>> // Clear cntvoff for the host
>>> - msr cntvoff_el2, xzr
>>> +ifnvhe "msr cntvoff_el2, xzr", nop
>>> .endm
>>>
>>> .macro restore_timer_state
>>> @@ -891,12 +956,12 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
>>> ldr x3, [x2, #KVM_TIMER_CNTVOFF]
>>> msr cntvoff_el2, x3
>>> ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL]
>>> - msr cntv_cval_el0, x2
>>> + msr_el0(cntv_cval, x2)
>>> isb
>>>
>>> ldr w2, [x0, #VCPU_TIMER_CNTV_CTL]
>>> and x2, x2, #3
>>> - msr cntv_ctl_el0, x2
>>> + msr_el0(cntv_ctl, x2)
>>> 1:
>>> .endm
>>>
>>> @@ -945,8 +1010,10 @@ ENTRY(__kvm_vcpu_run)
>>>
>>> save_host_regs
>>> bl __save_fpsimd
>>> - bl __save_sysregs
>>> -
>>> +ifnvhe "bl __save_sysregs", nop
>>> +ifnvhe "b 1f", nop
>>> + save_shared_sysregs
>>> +1:
>>> compute_debug_state 1f
>>> bl __save_debug
>>> 1:
>>> @@ -997,7 +1064,10 @@ __kvm_vcpu_return:
>>> ldr x2, [x0, #VCPU_HOST_CONTEXT]
>>> kern_hyp_va x2
>>>
>>> - bl __restore_sysregs
>>> +ifnvhe "bl __restore_sysregs", nop
>>> +ifnvhe "b 1f", nop
>>> + restore_shared_sysregs
>>> +1:
>>> bl __restore_fpsimd
>>>
>>> skip_debug_state x3, 1f
>>> @@ -1104,6 +1174,8 @@ __kvm_hyp_panic:
>>> mrs x6, par_el1
>>> mrs x7, tpidr_el2
>>>
>>> +ifnvhe nop, "b panic"
>>> +
>>> mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
>>> PSR_MODE_EL1h)
>>> msr spsr_el2, lr
>>> @@ -1248,7 +1320,7 @@ el1_trap:
>>> * As such, we can use the EL1 translation regime, and don't have
>>> * to distinguish between EL0 and EL1 access.
>>> */
>>> - mrs x2, far_el2
>>> +ifnvhe "mrs x2, far_el2", "mrs x2, far_el1"
>>> at s1e1r, x2
>>> isb
>>>
>>> @@ -1262,7 +1334,7 @@ el1_trap:
>>> b 2f
>>>
>>> 1: mrs x3, hpfar_el2
>>> - mrs x2, far_el2
>>> +ifnvhe "mrs x2, far_el2", "mrs x2, far_el1"
>>>
>>> 2: mrs x0, tpidr_el2
>>> str w1, [x0, #VCPU_ESR_EL2]
>>> diff --git a/arch/arm64/kvm/vhe-macros.h b/arch/arm64/kvm/vhe-macros.h
>>> index da7f9da..1e94235 100644
>>> --- a/arch/arm64/kvm/vhe-macros.h
>>> +++ b/arch/arm64/kvm/vhe-macros.h
>>> @@ -31,6 +31,24 @@
>>> alternative_insn "\nonvhe", "\vhe", ARM64_HAS_VIRT_HOST_EXTN
>>> .endm
>>>
>>> +#define mrs_el0(reg, sysreg) \
>>> + ifnvhe _S_(mrs reg, sysreg##_EL0), _S_(mrs_s reg, sysreg##_EL02)
>>> +
>>> +#define msr_el0(sysreg, reg) \
>>> + ifnvhe _S_(msr sysreg##_EL0, reg), _S_(msr_s sysreg##_EL02, reg)
>>> +
>>> +#define mrs_el1(reg, sysreg) \
>>> + ifnvhe _S_(mrs reg, sysreg##_EL1), _S_(mrs_s reg, sysreg##_EL12)
>>> +
>>> +#define msr_el1(sysreg, reg) \
>>> + ifnvhe _S_(msr sysreg##_EL1, reg), _S_(msr_s sysreg##_EL12, reg)
>>> +
>>> +#define mrs_hyp(reg, sysreg) \
>>> + ifnvhe _S_(mrs reg, sysreg##_EL2), _S_(mrs reg, sysreg##_EL1)
>>> +
>>> +#define msr_hyp(sysreg, reg) \
>>> + ifnvhe _S_(msr sysreg##_EL2, reg), _S_(msr sysreg##_EL1, reg)
>>> +
>>> #endif
>>>
>>> #endif /*__ARM64_VHE_MACROS_H__ */
>>>
>>
>
>

2015-07-10 11:03:06

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH 07/13] arm64: KVM: VHE: Patch out use of HVC



On 08/07/2015 19:54, Marc Zyngier wrote:
> Alternatively, I could move it to arch/arm64/include/asm (renamed to
> kvm_vhe_macros.h?), which would solve this issue. I just gave it a go,
> and that seems sensible enough.

Yes, that would do. Alternatively:

* create kvm_hyp_macros.h and put more stuff in there from kvm_mmu.h

* just put it in kvm_host.h

Whatever looks better to you.

Paolo

> Thanks for the suggestion!

2015-07-16 17:48:28

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 10/13] arm64: Add support for running Linux in EL2 mode

On Wed, Jul 08, 2015 at 05:19:13PM +0100, Marc Zyngier wrote:
> With the ARMv8.1 VHE, the architecture is able to (almost) transparently
> run the kernel at EL2, despite being written for EL1.
>
> This patch takes care of the "almost" part, mostly preventing the kernel
> from dropping from EL2 to EL1, and setting up the HYP configuration.
>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> arch/arm64/kernel/head.S | 23 ++++++++++++++++++++++-
> 1 file changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index c0ff3ce..a179747 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -29,6 +29,7 @@
> #include <asm/asm-offsets.h>
> #include <asm/cache.h>
> #include <asm/cputype.h>
> +#include <asm/kvm_mmu.h>
> #include <asm/memory.h>
> #include <asm/thread_info.h>
> #include <asm/pgtable-hwdef.h>
> @@ -481,8 +482,16 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1
> isb
> ret
>
> + /* Check for VHE being present */
> +2: mrs x2, id_aa64mmfr1_el1
> + ubfx x2, x2, #8, #4
> +
> /* Hyp configuration. */
> -2: mov x0, #(1 << 31) // 64-bit EL1
> + mov x0, #HCR_RW // 64-bit EL1
> + cbz x2, set_hcr
> + orr x0, x0, #HCR_TGE // Enable Host Extensions
> + orr x0, x0, #HCR_E2H
> +set_hcr:
> msr hcr_el2, x0
>
> /* Generic timers. */
> @@ -522,6 +531,9 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
>
> /* Coprocessor traps. */
> mov x0, #0x33ff

These bits are RES0 with VHE enabled, afaict.

> + cbz x2, set_cptr
> + orr x0, x0, #(3 << 20) // Don't trap FP
> +set_cptr:
> msr cptr_el2, x0 // Disable copro. traps to EL2
>
> #ifdef CONFIG_COMPAT
> @@ -531,6 +543,15 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
> /* Stage-2 translation */
> msr vttbr_el2, xzr
>
> + cbz x2, install_el2_stub
> +
> + setup_vtcr x4, x5
> +
> + mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2

You do this in install_el2_stub as well -- can you move it above the cbz?

Will

2015-07-16 18:03:14

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 11/13] arm64: Panic when VHE and non VHE CPUs coexist

On Wed, Jul 08, 2015 at 05:19:14PM +0100, Marc Zyngier wrote:
> Having both VHE and non-VHE capable CPUs in the same system
> is likely to be a recipe for disaster.
>
> If the boot CPU has VHE, but a secondary is not, we won't be
> able to downgrade and run the kernel at EL1. Add CPU hotplug
> to the mix, and this produces a terrifying mess.
>
> Let's solve the problem once and for all. If you mix VHE and
> non-VHE CPUs in the same system, you deserve to loose, and this
> patch makes sure you don't get a chance.
>
> This is implemented by storing the kernel execution level in
> a global variable. Secondaries will park themselves in a
> WFI loop if they observe a mismatch. Also, the primary CPU
> will detect that the secondary CPU has died on a mismatched
> execution level. Panic will follow.
>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> arch/arm64/include/asm/virt.h | 15 +++++++++++++++
> arch/arm64/kernel/head.S | 15 +++++++++++++++
> arch/arm64/kernel/smp.c | 4 ++++
> 3 files changed, 34 insertions(+)
>
> diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
> index 9f22dd6..8e246f7 100644
> --- a/arch/arm64/include/asm/virt.h
> +++ b/arch/arm64/include/asm/virt.h
> @@ -36,6 +36,11 @@
> */
> extern u32 __boot_cpu_mode[2];
>
> +/*
> + * __run_cpu_mode records the mode the boot CPU uses for the kernel.
> + */
> +extern u32 __run_cpu_mode;
> +
> void __hyp_set_vectors(phys_addr_t phys_vector_base);
> phys_addr_t __hyp_get_vectors(void);
>
> @@ -60,6 +65,16 @@ static inline bool is_kernel_in_hyp_mode(void)
> return el == CurrentEL_EL2;
> }
>
> +static inline bool is_kernel_mode_mismatched(void)
> +{
> + u64 el;
> + u32 mode;
> +
> + asm("mrs %0, CurrentEL" : "=r" (el));
> + mode = ACCESS_ONCE(__run_cpu_mode);
> + return el != mode;

Why the temporary 'mode' variable?

> +}
> +
> /* The section containing the hypervisor text */
> extern char __hyp_text_start[];
> extern char __hyp_text_end[];
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index a179747..318e69f 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -578,7 +578,20 @@ ENTRY(set_cpu_boot_mode_flag)
> 1: str w20, [x1] // This CPU has booted in EL1
> dmb sy
> dc ivac, x1 // Invalidate potentially stale cache line
> + adr_l x1, __run_cpu_mode
> + ldr w0, [x1]
> + mrs x20, CurrentEL

Why x20?

> + str w20, [x1]
> + dmb sy
> + dc ivac, x1 // Invalidate potentially stale cache line

Can we stick __run_cpu_mode and __boot_cpu_mode into a struct in the same
cacheline and avoid the extra maintenance?

> + cbz x0, skip_el_check

w0?

> + cmp x0, x20

w0, w20?

> + bne mismatched_el
> +skip_el_check:
> ret
> +mismatched_el:
> + wfi
> + b mismatched_el
> ENDPROC(set_cpu_boot_mode_flag)
>
> /*
> @@ -593,6 +606,8 @@ ENDPROC(set_cpu_boot_mode_flag)
> ENTRY(__boot_cpu_mode)
> .long BOOT_CPU_MODE_EL2
> .long BOOT_CPU_MODE_EL1
> +ENTRY(__run_cpu_mode)
> + .long 0
> .popsection
>
> #ifdef CONFIG_SMP
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 695801a..b467f51 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -52,6 +52,7 @@
> #include <asm/sections.h>
> #include <asm/tlbflush.h>
> #include <asm/ptrace.h>
> +#include <asm/virt.h>
>
> #define CREATE_TRACE_POINTS
> #include <trace/events/ipi.h>
> @@ -112,6 +113,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
> pr_crit("CPU%u: failed to come online\n", cpu);
> ret = -EIO;
> }
> +
> + if (is_kernel_mode_mismatched())
> + panic("CPU%u: incompatible execution level", cpu);

Might be useful to print the incompatible values, if possible.

Will

2015-07-16 18:04:46

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 03/13] arm64: Add ARM64_HAS_VIRT_HOST_EXTN feature

On Wed, Jul 08, 2015 at 05:19:06PM +0100, Marc Zyngier wrote:
> Add a new ARM64_HAS_VIRT_HOST_EXTN features to indicate that the
> CPU has the ARMv8,1 VHE capability.
>
> This will be used to trigger kernel patching in KVM.
>
> Signed-off-by: Marc Zyngier <[email protected]>

Acked-by: Will Deacon <[email protected]>

Will

2015-07-16 18:08:09

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH 01/13] arm/arm64: Add new is_kernel_in_hyp_mode predicate

On Wed, Jul 08, 2015 at 05:19:04PM +0100, Marc Zyngier wrote:
> With ARMv8.1 VHE extension, it will be possible to run the kernel
> at EL2 (aka HYP mode). In order for the kernel to easily find out
> where it is running, add a new predicate that returns whether or
> not the kernel is in HYP mode.
>
> For completeness, the 32bit code also get such a predicate (always
> returning false) so that code common to both architecture (timers,
> KVM) can use it transparently.
>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> arch/arm/include/asm/virt.h | 5 +++++
> arch/arm64/include/asm/virt.h | 10 ++++++++++
> 2 files changed, 15 insertions(+)

[...]

> +static inline bool is_kernel_in_hyp_mode(void)
> +{
> + u64 el;
> +
> + asm("mrs %0, CurrentEL" : "=r" (el));
> + return el == CurrentEL_EL2;

Missing mask?

Will

2015-08-05 17:57:58

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH 07/13] arm64: KVM: VHE: Patch out use of HVC

On Wed, Jul 08, 2015 at 05:19:10PM +0100, Marc Zyngier wrote:
> --- /dev/null
> +++ b/arch/arm64/kvm/vhe-macros.h
> @@ -0,0 +1,36 @@
> +/*
> + * Copyright (C) 2015 - ARM Ltd
> + * Author: Marc Zyngier <[email protected]>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef __ARM64_VHE_MACROS_H__
> +#define __ARM64_VHE_MACROS_H__
> +
> +#include <asm/alternative.h>
> +#include <asm/cpufeature.h>
> +
> +#ifdef __ASSEMBLY__
> +
> +/* Hack to allow stringification of macros... */
> +#define __S__(a,args...) __stringify(a, ##args)
> +#define _S_(a,args...) __S__(a, args)
> +
> +.macro ifnvhe nonvhe vhe
> + alternative_insn "\nonvhe", "\vhe", ARM64_HAS_VIRT_HOST_EXTN
> +.endm

I always found the alternative_insn tricks hard to read but with
Daniel's patch queued in -next it gets slightly better. If you are not
targeting 4.3, could you do something like (untested):

.macro vhe_if_not
alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
.endm

.macro vhe_else
alternative_else CONFIG_ARM64_VHE
.endm

.macro vhe_endif
alternative_endif CONFIG_ARM64_VHE
.endm


The kvm_call_hyp, for example, would become:

ENTRY(kvm_call_hyp)
vhe_if_not
hvc #0
ret
vhe_else
nop
push lr, xzr
vhe_endif


Or you can even ignore defining new vhe_* macros altogether and just use
"alternative_if_not ARM64_HAS_VIRT_HOST_EXTN" directly (more to type)
but you may be able to avoid a vhe-macros.h file.

--
Catalin

2015-08-06 14:08:35

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH 11/13] arm64: Panic when VHE and non VHE CPUs coexist

On Wed, Jul 08, 2015 at 05:19:14PM +0100, Marc Zyngier wrote:
> Having both VHE and non-VHE capable CPUs in the same system
> is likely to be a recipe for disaster.
>
> If the boot CPU has VHE, but a secondary is not, we won't be
> able to downgrade and run the kernel at EL1. Add CPU hotplug
> to the mix, and this produces a terrifying mess.
>
> Let's solve the problem once and for all. If you mix VHE and
> non-VHE CPUs in the same system, you deserve to loose, and this
> patch makes sure you don't get a chance.
>
> This is implemented by storing the kernel execution level in
> a global variable. Secondaries will park themselves in a
> WFI loop if they observe a mismatch. Also, the primary CPU
> will detect that the secondary CPU has died on a mismatched
> execution level. Panic will follow.

We don't have such system yet but we could try it on the model. Do you
know how far the secondary CPU goes? If it gets to the cpuid sanity
checking, we could do the check together with the rest of the features
(we could add a new CHECK_FATAL macro to make this even more obvious):

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 8e797b2fcc01..36cc81c9b9df 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -29,6 +29,7 @@ struct cpuinfo_arm64 {
u32 reg_cntfrq;
u32 reg_dczid;
u32 reg_midr;
+ u32 reg_currentel;

u64 reg_id_aa64dfr0;
u64 reg_id_aa64dfr1;
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index ee6403df9fe4..de3e8903b5a8 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -118,6 +118,14 @@ static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) ||
(ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1);
}
+
+static inline unsigned int read_currentel(void)
+{
+ u64 el;
+ asm("mrs %0, CurrentEL" : "=r" (el));
+ return el;
+}
+
#endif /* __ASSEMBLY__ */

#endif
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 75d5a867e7fb..c0269210fd54 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -132,6 +132,9 @@ static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
/* If different, timekeeping will be broken (especially with KVM) */
diff |= CHECK(cntfrq, boot, cur, cpu);

+ /* Same EL for all CPUs */
+ diff |= CHECK(currentel, boot, cur, cpu);
+
/*
* The kernel uses self-hosted debug features and expects CPUs to
* support identical debug features. We presently need CTX_CMPs, WRPs,
@@ -205,6 +208,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_ctr = read_cpuid_cachetype();
info->reg_dczid = read_cpuid(DCZID_EL0);
info->reg_midr = read_cpuid_id();
+ info->reg_currentel = read_currentel();

info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1);
info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);

2015-08-06 17:43:30

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH 00/13] arm64: Virtualization Host Extension support

On Wed, Jul 08, 2015 at 05:19:03PM +0100, Marc Zyngier wrote:
> Marc Zyngier (13):
> arm/arm64: Add new is_kernel_in_hyp_mode predicate
> arm64: Allow the arch timer to use the HYP timer
> arm64: Add ARM64_HAS_VIRT_HOST_EXTN feature
> arm64: KVM: skip HYP setup when already running in HYP
> arm64: KVM: VHE: macroize VTCR_EL2 setup
> arm64: KVM: VHE: Patch out kern_hyp_va
> arm64: KVM: VHE: Patch out use of HVC
> arm64: KVM: VHE: Preserve VHE config in world switch
> arm64: KVM: VHE: Add alternatives for VHE-enabled world-switch
> arm64: Add support for running Linux in EL2 mode
> arm64: Panic when VHE and non VHE CPUs coexist
> arm64: KVM: Split sysreg save/restore
> arm64: KVM: VHE: Early interrupt handling

Do we need to do anything with the debug code? Do we have any
hardware breakpoints/watchpoints targeting kernel space (kgdb doesn't
seem to support this)?

If a breakpoint target is EL1, I don't think we trigger it when running
in the EL2/VHE mode, in which case we need a different
DBGBCR.{HMC,SSC,PMC} combination - {1,11,00}.

Another random untested patch below but we need to get Will to remember
the code he wrote (and the VHE implications):

diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 52b484b6aa1a..197af39a5ffb 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -34,8 +34,12 @@ struct arch_hw_breakpoint {

static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl)
{
- return (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
+ u32 reg = (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
ctrl.enabled;
+ /* set HMC and SSC when debug target is EL2 */
+ if (ctrl.privilege == AARCH64_BREAKPOINT_EL2)
+ reg |= (3 << 14) | (1 << 13);
+ return reg
}

static inline void decode_ctrl_reg(u32 reg,
@@ -59,6 +63,7 @@ static inline void decode_ctrl_reg(u32 reg,
#define AARCH64_ESR_ACCESS_MASK (1 << 6)

/* Privilege Levels */
+#define AARCH64_BREAKPOINT_EL2 0
#define AARCH64_BREAKPOINT_EL1 1
#define AARCH64_BREAKPOINT_EL0 2

diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 7a1a5da6c8c1..77866839d1e8 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -162,6 +162,7 @@ static enum debug_el debug_exception_level(int privilege)
case AARCH64_BREAKPOINT_EL0:
return DBG_ACTIVE_EL0;
case AARCH64_BREAKPOINT_EL1:
+ case AARCH64_BREAKPOINT_EL2:
return DBG_ACTIVE_EL1;
default:
pr_warning("invalid breakpoint privilege level %d\n", privilege);
@@ -456,7 +457,8 @@ static int arch_build_bp_info(struct perf_event *bp)
* that would complicate the stepping code.
*/
if (arch_check_bp_in_kernelspace(bp))
- info->ctrl.privilege = AARCH64_BREAKPOINT_EL1;
+ info->ctrl.privilege = is_kernel_in_hyp_mode() ?
+ AARCH64_BREAKPOINT_EL2 : AARCH64_BREAKPOINT_EL1;
else
info->ctrl.privilege = AARCH64_BREAKPOINT_EL0;

@@ -526,7 +528,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* Disallow per-task kernel breakpoints since these would
* complicate the stepping code.
*/
- if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
+ if (info->ctrl.privilege != AARCH64_BREAKPOINT_EL0 && bp->hw.target)
return -EINVAL;

return 0;

--
Catalin

2015-08-26 09:12:54

by Antonios Motakis

[permalink] [raw]
Subject: Re: [PATCH 00/13] arm64: Virtualization Host Extension support

Hello Marc,

On 08-Jul-15 18:19, Marc Zyngier wrote:
> ARMv8.1 comes with the "Virtualization Host Extension" (VHE for
> short), which enables simpler support of Type-2 hypervisors.
>
> This extension allows the kernel to directly run at EL2, and
> significantly reduces the number of system registers shared between
> host and guest, reducing the overhead of virtualization.
>
> In order to have the same kernel binary running on all versions of the
> architecture, this series makes heavy use of runtime code patching.
>
> The first ten patches massage the KVM code to deal with VHE and enable
> Linux to run at EL2.

I am currently working on getting the Jailhouse hypervisor to work on AArch64.

I've been looking at your patches, trying to figure out the implications for Jailhouse. It seems there are a few :)

Jailhouse likes to be loaded by Linux into memory, and then to inject itself at a higher level than Linux (demoting Linux into being the "root cell"). This works on x86 and ARM (AArch32 and eventually AArch64 without VHE). What this means in ARM, is that Jailhouse hooks into the HVC stub exposed by Linux, and happily installs itself in EL2.

With Linux running in EL2 though, that won't be as straightforward. It looks like we can't just demote Linux to EL1 without breaking something. Obviously it's OK for us that KVM won't work, but it looks like at least the timer code will break horribly if we try to do something like that.

Any comments on this? One work around would be to just remap the incoming interrupt from the timer, so Linux never really realizes it's not running in EL2 anymore. Then we would also have to deal with the intricacies of removing and re-adding vCPUs to the Linux root cell, so we would have to maintain the illusion of running in EL2 for each one of them.

Cheers,
Antonios

>
> The next patch catches an ugly case when VHE capable CPUs are paired
> with some of their less capable siblings. This should never happen,
> but hey...
>
> The last two patches add an optimisation allowing a physical interrupt
> to be serviced on the host without doing a full save/restore, leading
> to potential reduction in interrupt latency.
>
> This has been tested on the FVP_Base_SLV-V8-A model, and based on
> v4.2-rc1. I've put a branch out on:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git kvm-arm64/vhe
>
> Marc Zyngier (13):
> arm/arm64: Add new is_kernel_in_hyp_mode predicate
> arm64: Allow the arch timer to use the HYP timer
> arm64: Add ARM64_HAS_VIRT_HOST_EXTN feature
> arm64: KVM: skip HYP setup when already running in HYP
> arm64: KVM: VHE: macroize VTCR_EL2 setup
> arm64: KVM: VHE: Patch out kern_hyp_va
> arm64: KVM: VHE: Patch out use of HVC
> arm64: KVM: VHE: Preserve VHE config in world switch
> arm64: KVM: VHE: Add alternatives for VHE-enabled world-switch
> arm64: Add support for running Linux in EL2 mode
> arm64: Panic when VHE and non VHE CPUs coexist
> arm64: KVM: Split sysreg save/restore
> arm64: KVM: VHE: Early interrupt handling
>
> arch/arm/include/asm/virt.h | 5 +
> arch/arm/kvm/arm.c | 134 ++++++++-----
> arch/arm/kvm/mmu.c | 6 +
> arch/arm64/include/asm/cpufeature.h | 3 +-
> arch/arm64/include/asm/kvm_arm.h | 1 +
> arch/arm64/include/asm/kvm_asm.h | 40 +++-
> arch/arm64/include/asm/kvm_emulate.h | 2 +
> arch/arm64/include/asm/kvm_mmu.h | 24 ++-
> arch/arm64/include/asm/virt.h | 25 +++
> arch/arm64/kernel/cpufeature.c | 11 ++
> arch/arm64/kernel/head.S | 38 +++-
> arch/arm64/kernel/smp.c | 4 +
> arch/arm64/kvm/hyp-init.S | 9 +-
> arch/arm64/kvm/hyp.S | 363 ++++++++++++++++++++++++-----------
> arch/arm64/kvm/vgic-v2-switch.S | 19 +-
> arch/arm64/kvm/vgic-v3-switch.S | 33 ++--
> arch/arm64/kvm/vhe-macros.h | 54 ++++++
> drivers/clocksource/arm_arch_timer.c | 96 +++++----
> 18 files changed, 638 insertions(+), 229 deletions(-)
> create mode 100644 arch/arm64/kvm/vhe-macros.h
>

--
Antonios Motakis
Virtualization Engineer
Huawei Technologies Duesseldorf GmbH
European Research Center
Riesstrasse 25, 80992 M?nchen

2015-08-26 09:22:15

by Jan Kiszka

[permalink] [raw]
Subject: Re: [PATCH 00/13] arm64: Virtualization Host Extension support

On 2015-08-26 11:12, Antonios Motakis wrote:
> Hello Marc,
>
> On 08-Jul-15 18:19, Marc Zyngier wrote:
>> ARMv8.1 comes with the "Virtualization Host Extension" (VHE for
>> short), which enables simpler support of Type-2 hypervisors.
>>
>> This extension allows the kernel to directly run at EL2, and
>> significantly reduces the number of system registers shared between
>> host and guest, reducing the overhead of virtualization.
>>
>> In order to have the same kernel binary running on all versions of the
>> architecture, this series makes heavy use of runtime code patching.
>>
>> The first ten patches massage the KVM code to deal with VHE and enable
>> Linux to run at EL2.
>
> I am currently working on getting the Jailhouse hypervisor to work on AArch64.
>
> I've been looking at your patches, trying to figure out the implications for Jailhouse. It seems there are a few :)
>
> Jailhouse likes to be loaded by Linux into memory, and then to inject itself at a higher level than Linux (demoting Linux into being the "root cell"). This works on x86 and ARM (AArch32 and eventually AArch64 without VHE). What this means in ARM, is that Jailhouse hooks into the HVC stub exposed by Linux, and happily installs itself in EL2.
>
> With Linux running in EL2 though, that won't be as straightforward. It looks like we can't just demote Linux to EL1 without breaking something. Obviously it's OK for us that KVM won't work, but it looks like at least the timer code will break horribly if we try to do something like that.
>
> Any comments on this? One work around would be to just remap the incoming interrupt from the timer, so Linux never really realizes it's not running in EL2 anymore. Then we would also have to deal with the intricacies of removing and re-adding vCPUs to the Linux root cell, so we would have to maintain the illusion of running in EL2 for each one of them.

Without knowing any of the details, I would say there are two strategies
regarding this:

- Disable KVM support in the Linux kernel - then we shouldn't boot into
EL2 in the first place, should we?

- Emulate what Linux is missing after take-over by Jailhouse (we do
this on x86 with VT-d interrupt remapping which cannot be disabled
anymore for Linux once it started with it, and we cannot boot without
it when we want to use the x2APIC).

Jan

--
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux

2015-08-26 09:29:15

by Antonios Motakis

[permalink] [raw]
Subject: Re: [PATCH 00/13] arm64: Virtualization Host Extension support



On 26-Aug-15 11:21, Jan Kiszka wrote:
> On 2015-08-26 11:12, Antonios Motakis wrote:
>> Hello Marc,
>>
>> On 08-Jul-15 18:19, Marc Zyngier wrote:
>>> ARMv8.1 comes with the "Virtualization Host Extension" (VHE for
>>> short), which enables simpler support of Type-2 hypervisors.
>>>
>>> This extension allows the kernel to directly run at EL2, and
>>> significantly reduces the number of system registers shared between
>>> host and guest, reducing the overhead of virtualization.
>>>
>>> In order to have the same kernel binary running on all versions of the
>>> architecture, this series makes heavy use of runtime code patching.
>>>
>>> The first ten patches massage the KVM code to deal with VHE and enable
>>> Linux to run at EL2.
>>
>> I am currently working on getting the Jailhouse hypervisor to work on AArch64.
>>
>> I've been looking at your patches, trying to figure out the implications for Jailhouse. It seems there are a few :)
>>
>> Jailhouse likes to be loaded by Linux into memory, and then to inject itself at a higher level than Linux (demoting Linux into being the "root cell"). This works on x86 and ARM (AArch32 and eventually AArch64 without VHE). What this means in ARM, is that Jailhouse hooks into the HVC stub exposed by Linux, and happily installs itself in EL2.
>>
>> With Linux running in EL2 though, that won't be as straightforward. It looks like we can't just demote Linux to EL1 without breaking something. Obviously it's OK for us that KVM won't work, but it looks like at least the timer code will break horribly if we try to do something like that.
>>
>> Any comments on this? One work around would be to just remap the incoming interrupt from the timer, so Linux never really realizes it's not running in EL2 anymore. Then we would also have to deal with the intricacies of removing and re-adding vCPUs to the Linux root cell, so we would have to maintain the illusion of running in EL2 for each one of them.
>
> Without knowing any of the details, I would say there are two strategies
> regarding this:
>
> - Disable KVM support in the Linux kernel - then we shouldn't boot into
> EL2 in the first place, should we?

We would have to ask the user to patch the kernel, to ignore VHE and keep all the hyp stub magic that we rely on currently. It is an option of course.

>
> - Emulate what Linux is missing after take-over by Jailhouse (we do
> this on x86 with VT-d interrupt remapping which cannot be disabled
> anymore for Linux once it started with it, and we cannot boot without
> it when we want to use the x2APIC).

Essentially what I described above; let's call it nested virtualization without the virtualization parts? :)

>
> Jan
>

--
Antonios Motakis
Virtualization Engineer
Huawei Technologies Duesseldorf GmbH
European Research Center
Riesstrasse 25, 80992 M?nchen

2015-08-26 09:55:15

by Jan Kiszka

[permalink] [raw]
Subject: Re: [PATCH 00/13] arm64: Virtualization Host Extension support

On 2015-08-26 11:28, Antonios Motakis wrote:
>
>
> On 26-Aug-15 11:21, Jan Kiszka wrote:
>> On 2015-08-26 11:12, Antonios Motakis wrote:
>>> Hello Marc,
>>>
>>> On 08-Jul-15 18:19, Marc Zyngier wrote:
>>>> ARMv8.1 comes with the "Virtualization Host Extension" (VHE for
>>>> short), which enables simpler support of Type-2 hypervisors.
>>>>
>>>> This extension allows the kernel to directly run at EL2, and
>>>> significantly reduces the number of system registers shared between
>>>> host and guest, reducing the overhead of virtualization.
>>>>
>>>> In order to have the same kernel binary running on all versions of the
>>>> architecture, this series makes heavy use of runtime code patching.
>>>>
>>>> The first ten patches massage the KVM code to deal with VHE and enable
>>>> Linux to run at EL2.
>>>
>>> I am currently working on getting the Jailhouse hypervisor to work on AArch64.
>>>
>>> I've been looking at your patches, trying to figure out the implications for Jailhouse. It seems there are a few :)
>>>
>>> Jailhouse likes to be loaded by Linux into memory, and then to inject itself at a higher level than Linux (demoting Linux into being the "root cell"). This works on x86 and ARM (AArch32 and eventually AArch64 without VHE). What this means in ARM, is that Jailhouse hooks into the HVC stub exposed by Linux, and happily installs itself in EL2.
>>>
>>> With Linux running in EL2 though, that won't be as straightforward. It looks like we can't just demote Linux to EL1 without breaking something. Obviously it's OK for us that KVM won't work, but it looks like at least the timer code will break horribly if we try to do something like that.
>>>
>>> Any comments on this? One work around would be to just remap the incoming interrupt from the timer, so Linux never really realizes it's not running in EL2 anymore. Then we would also have to deal with the intricacies of removing and re-adding vCPUs to the Linux root cell, so we would have to maintain the illusion of running in EL2 for each one of them.
>>
>> Without knowing any of the details, I would say there are two strategies
>> regarding this:
>>
>> - Disable KVM support in the Linux kernel - then we shouldn't boot into
>> EL2 in the first place, should we?
>
> We would have to ask the user to patch the kernel, to ignore VHE and keep all the hyp stub magic that we rely on currently. It is an option of course.

Patch or reconfigure? CONFIG_KVM isn't mandatory for arm64, is it?

Jan

>
>>
>> - Emulate what Linux is missing after take-over by Jailhouse (we do
>> this on x86 with VT-d interrupt remapping which cannot be disabled
>> anymore for Linux once it started with it, and we cannot boot without
>> it when we want to use the x2APIC).
>
> Essentially what I described above; let's call it nested virtualization without the virtualization parts? :)
>
>>
>> Jan
>>
>

--
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux

2015-08-26 09:59:16

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 00/13] arm64: Virtualization Host Extension support

On 26/08/15 10:21, Jan Kiszka wrote:
> On 2015-08-26 11:12, Antonios Motakis wrote:
>> Hello Marc,
>>
>> On 08-Jul-15 18:19, Marc Zyngier wrote:
>>> ARMv8.1 comes with the "Virtualization Host Extension" (VHE for
>>> short), which enables simpler support of Type-2 hypervisors.
>>>
>>> This extension allows the kernel to directly run at EL2, and
>>> significantly reduces the number of system registers shared
>>> between host and guest, reducing the overhead of virtualization.
>>>
>>> In order to have the same kernel binary running on all versions
>>> of the architecture, this series makes heavy use of runtime code
>>> patching.
>>>
>>> The first ten patches massage the KVM code to deal with VHE and
>>> enable Linux to run at EL2.
>>
>> I am currently working on getting the Jailhouse hypervisor to work
>> on AArch64.
>>
>> I've been looking at your patches, trying to figure out the
>> implications for Jailhouse. It seems there are a few :)
>>
>> Jailhouse likes to be loaded by Linux into memory, and then to
>> inject itself at a higher level than Linux (demoting Linux into
>> being the "root cell"). This works on x86 and ARM (AArch32 and
>> eventually AArch64 without VHE). What this means in ARM, is that
>> Jailhouse hooks into the HVC stub exposed by Linux, and happily
>> installs itself in EL2.
>>
>> With Linux running in EL2 though, that won't be as straightforward.
>> It looks like we can't just demote Linux to EL1 without breaking
>> something. Obviously it's OK for us that KVM won't work, but it
>> looks like at least the timer code will break horribly if we try to
>> do something like that.
>>
>> Any comments on this? One work around would be to just remap the
>> incoming interrupt from the timer, so Linux never really realizes
>> it's not running in EL2 anymore. Then we would also have to deal
>> with the intricacies of removing and re-adding vCPUs to the Linux
>> root cell, so we would have to maintain the illusion of running in
>> EL2 for each one of them.

Unfortunately, there is more to downgrading to EL1 than just interrupts.
You need to migrate the whole VM context from EL2 to EL1 in an atomic
fashion, clear the HCR_EL2.E2H and HCR_EL2.TGE bits while running at EL2
(which is a bit like pulling the rug from under your own feet so you
need to transition via a low mapping or an idmap), reinstall the EL2
stub and do an exception return into EL1.

And that's only for the CPU. Downgrading to EL1 has other fun
consequences at the system level (SMMUs listening to TLB traffic would
need to be reconfigured on the flight - it's a joke, don't even think of
it).

>
> Without knowing any of the details, I would say there are two
> strategies regarding this:
>
> - Disable KVM support in the Linux kernel - then we shouldn't boot
> into EL2 in the first place, should we?

Disabling KVM support won't drop the kernel to EL1. At least that's not
what the current code does (we stay at EL2 if we detect VHE), and that's
way too early to be able to parse a command line option.

> - Emulate what Linux is missing after take-over by Jailhouse (we do
> this on x86 with VT-d interrupt remapping which cannot be disabled
> anymore for Linux once it started with it, and we cannot boot
> without it when we want to use the x2APIC).

I can't really see what you want to emulate (I'm afraid I'm lacking the
x86-specific background).

As far as I can see, the only practical solution to this is to have a
VHE config option, and Jailhouse that can be set to conflict it (depends
on !VHE).

Thanks,

M.
--
Jazz is not dead. It just smells funny...

2015-08-26 11:17:13

by Antonios Motakis

[permalink] [raw]
Subject: Re: [PATCH 00/13] arm64: Virtualization Host Extension support



On 26-Aug-15 11:59, Marc Zyngier wrote:
> On 26/08/15 10:21, Jan Kiszka wrote:
>> On 2015-08-26 11:12, Antonios Motakis wrote:
>>> Hello Marc,
>>>
>>> On 08-Jul-15 18:19, Marc Zyngier wrote:
>>>> ARMv8.1 comes with the "Virtualization Host Extension" (VHE for
>>>> short), which enables simpler support of Type-2 hypervisors.
>>>>
>>>> This extension allows the kernel to directly run at EL2, and
>>>> significantly reduces the number of system registers shared
>>>> between host and guest, reducing the overhead of virtualization.
>>>>
>>>> In order to have the same kernel binary running on all versions
>>>> of the architecture, this series makes heavy use of runtime code
>>>> patching.
>>>>
>>>> The first ten patches massage the KVM code to deal with VHE and
>>>> enable Linux to run at EL2.
>>>
>>> I am currently working on getting the Jailhouse hypervisor to work
>>> on AArch64.
>>>
>>> I've been looking at your patches, trying to figure out the
>>> implications for Jailhouse. It seems there are a few :)
>>>
>>> Jailhouse likes to be loaded by Linux into memory, and then to
>>> inject itself at a higher level than Linux (demoting Linux into
>>> being the "root cell"). This works on x86 and ARM (AArch32 and
>>> eventually AArch64 without VHE). What this means in ARM, is that
>>> Jailhouse hooks into the HVC stub exposed by Linux, and happily
>>> installs itself in EL2.
>>>
>>> With Linux running in EL2 though, that won't be as straightforward.
>>> It looks like we can't just demote Linux to EL1 without breaking
>>> something. Obviously it's OK for us that KVM won't work, but it
>>> looks like at least the timer code will break horribly if we try to
>>> do something like that.
>>>
>>> Any comments on this? One work around would be to just remap the
>>> incoming interrupt from the timer, so Linux never really realizes
>>> it's not running in EL2 anymore. Then we would also have to deal
>>> with the intricacies of removing and re-adding vCPUs to the Linux
>>> root cell, so we would have to maintain the illusion of running in
>>> EL2 for each one of them.
>
> Unfortunately, there is more to downgrading to EL1 than just interrupts.
> You need to migrate the whole VM context from EL2 to EL1 in an atomic
> fashion, clear the HCR_EL2.E2H and HCR_EL2.TGE bits while running at EL2
> (which is a bit like pulling the rug from under your own feet so you
> need to transition via a low mapping or an idmap), reinstall the EL2
> stub and do an exception return into EL1.

When enabling Jailhouse, we already do most of that. We already use identity mapping, since we need to switch on the MMU for EL2, switch the exception level, etc. Jailhouse entry looks a lot like initializing a new kernel; we just save the state of what was running before it and restore it as the "root cell".

So I think we could handle the cpu context switch, with changes only in the Jailhouse entry code. But then of course, Linux would be expecting to be in EL2, while it is running in EL1, so we would have to emulate the differences in behavior. But...

>
> And that's only for the CPU. Downgrading to EL1 has other fun
> consequences at the system level (SMMUs listening to TLB traffic would
> need to be reconfigured on the flight - it's a joke, don't even think of
> it).

...but then there's that.

Hm... even if the kernel is running in EL2, it will still be configuring stage 1 on the SMMU, no? I wonder if this could still be handled somehow... The root cell would be restored with identity mapping, too... Just thinking out loud :)

>
>>
>> Without knowing any of the details, I would say there are two
>> strategies regarding this:
>>
>> - Disable KVM support in the Linux kernel - then we shouldn't boot
>> into EL2 in the first place, should we?
>
> Disabling KVM support won't drop the kernel to EL1. At least that's not
> what the current code does (we stay at EL2 if we detect VHE), and that's
> way too early to be able to parse a command line option.
>
>> - Emulate what Linux is missing after take-over by Jailhouse (we do
>> this on x86 with VT-d interrupt remapping which cannot be disabled
>> anymore for Linux once it started with it, and we cannot boot
>> without it when we want to use the x2APIC).
>
> I can't really see what you want to emulate (I'm afraid I'm lacking the
> x86-specific background).
>
> As far as I can see, the only practical solution to this is to have a
> VHE config option, and Jailhouse that can be set to conflict it (depends
> on !VHE).

Having a toggle to turn VHE off at build time would definitely be the easy way out. Then we can just tell the user that we only support kernels built without it (the Jailhouse driver is out of tree atm).

I don't have access to a VHE model though. Are you considering to add a config option for VHE in the next version of your patches?

In any case thanks for your thoughts,

--
Antonios Motakis
Virtualization Engineer
Huawei Technologies Duesseldorf GmbH
European Research Center
Riesstrasse 25, 80992 M?nchen

2015-08-28 07:04:35

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 00/13] arm64: Virtualization Host Extension support

On Wed, 26 Aug 2015 13:16:52 +0200
Antonios Motakis <[email protected]> wrote:
> On 26-Aug-15 11:59, Marc Zyngier wrote:

[...]

> > Unfortunately, there is more to downgrading to EL1 than just interrupts.
> > You need to migrate the whole VM context from EL2 to EL1 in an atomic
> > fashion, clear the HCR_EL2.E2H and HCR_EL2.TGE bits while running at EL2
> > (which is a bit like pulling the rug from under your own feet so you
> > need to transition via a low mapping or an idmap), reinstall the EL2
> > stub and do an exception return into EL1.
>
> When enabling Jailhouse, we already do most of that. We already use
> identity mapping, since we need to switch on the MMU for EL2, switch
> the exception level, etc. Jailhouse entry looks a lot like
> initializing a new kernel; we just save the state of what was running
> before it and restore it as the "root cell".
>
> So I think we could handle the cpu context switch, with changes only
> in the Jailhouse entry code. But then of course, Linux would be
> expecting to be in EL2, while it is running in EL1, so we would have
> to emulate the differences in behavior. But...

There would be (almost) no difference in behaviour - VHE is designed
for the kernel to be unchanged, and the only difference is the timer
interrupt as you noticed.

What is really tricky is to perform the downgrade, because you're
completely changing the way the code is executed *while running it*.
This is not just about changing the memory map, but also changing the
effect of most system registers.

>
> >
> > And that's only for the CPU. Downgrading to EL1 has other fun
> > consequences at the system level (SMMUs listening to TLB traffic
> > would need to be reconfigured on the flight - it's a joke, don't
> > even think of it).
>
> ...but then there's that.
>
> Hm... even if the kernel is running in EL2, it will still be
> configuring stage 1 on the SMMU, no? I wonder if this could still be
> handled somehow... The root cell would be restored with identity
> mapping, too... Just thinking out loud :)

Stage-1 and EL2 are two vastly unrelated concept. The main issue is
that it is likely that your SMMU knows about VHE as well (it listens to
EL2-VHE DVM messages), and need to be reconfigured as well. Good luck
with that.

[...]

> > As far as I can see, the only practical solution to this is to have
> > a VHE config option, and Jailhouse that can be set to conflict it
> > (depends on !VHE).
>
> Having a toggle to turn VHE off at build time would definitely be the
> easy way out. Then we can just tell the user that we only support
> kernels built without it (the Jailhouse driver is out of tree atm).
>
> I don't have access to a VHE model though. Are you considering to add
> a config option for VHE in the next version of your patches?

Yes, that's the plan.

Thanks,

M.
--
Jazz is not dead. It just smells funny.

2015-08-31 18:45:05

by Christoffer Dall

[permalink] [raw]
Subject: Re: [PATCH 09/13] arm64: KVM: VHE: Add alternatives for VHE-enabled world-switch

On Wed, Jul 08, 2015 at 05:19:12PM +0100, Marc Zyngier wrote:
> In order to switch between host and guest, a VHE-enabled kernel
> must use different accessors for certain system registers.
>
> This patch uses runtime patching to use the right instruction
> when required...

So am I reading this patch correctly as we basically still switch all
the EL1 state for which there is separate EL2 state for the host, on
every exit?

Wouldn't that defeat the entire purpose of VHE, more or less, where you
should only be saving/restoring all this system register state on
vcpu_put?

Or was it a conscious design decision to not introduce anything for
performance in this series, and just make it functionally correct for
now? If so, I think the series deserves a big fat comment explaining
this in the cover letter.

I am a bit concerned overall that this will become hugely difficult to
read and maintain, especially considering if we add a bunch of more
optimizations to the code.

Have you given any thought to simply splitting up the logic between VHE
and non-VHE KVM on arm64 for the low-level stuff?

-Christoffer

>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> arch/arm64/include/asm/kvm_asm.h | 40 ++++++--
> arch/arm64/kvm/hyp.S | 210 ++++++++++++++++++++++++++-------------
> arch/arm64/kvm/vhe-macros.h | 18 ++++
> 3 files changed, 191 insertions(+), 77 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index 3c5fe68..a932be0 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -18,6 +18,7 @@
> #ifndef __ARM_KVM_ASM_H__
> #define __ARM_KVM_ASM_H__
>
> +#include <asm/sysreg.h>
> #include <asm/virt.h>
>
> /*
> @@ -27,7 +28,7 @@
> #define MPIDR_EL1 1 /* MultiProcessor Affinity Register */
> #define CSSELR_EL1 2 /* Cache Size Selection Register */
> #define SCTLR_EL1 3 /* System Control Register */
> -#define ACTLR_EL1 4 /* Auxiliary Control Register */
> +#define AMAIR_EL1 4 /* Aux Memory Attribute Indirection Register */
> #define CPACR_EL1 5 /* Coprocessor Access Control */
> #define TTBR0_EL1 6 /* Translation Table Base Register 0 */
> #define TTBR1_EL1 7 /* Translation Table Base Register 1 */
> @@ -39,13 +40,13 @@
> #define MAIR_EL1 13 /* Memory Attribute Indirection Register */
> #define VBAR_EL1 14 /* Vector Base Address Register */
> #define CONTEXTIDR_EL1 15 /* Context ID Register */
> -#define TPIDR_EL0 16 /* Thread ID, User R/W */
> -#define TPIDRRO_EL0 17 /* Thread ID, User R/O */
> -#define TPIDR_EL1 18 /* Thread ID, Privileged */
> -#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */
> -#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */
> -#define PAR_EL1 21 /* Physical Address Register */
> -#define MDSCR_EL1 22 /* Monitor Debug System Control Register */
> +#define CNTKCTL_EL1 16 /* Timer Control Register (EL1) */
> +#define PAR_EL1 17 /* Physical Address Register */
> +#define MDSCR_EL1 18 /* Monitor Debug System Control Register */
> +#define TPIDR_EL0 19 /* Thread ID, User R/W */
> +#define TPIDRRO_EL0 20 /* Thread ID, User R/O */
> +#define TPIDR_EL1 21 /* Thread ID, Privileged */
> +#define ACTLR_EL1 22 /* Auxiliary Control Register */
> #define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */
> #define DBGBCR15_EL1 38
> #define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */
> @@ -106,6 +107,29 @@
>
> #define NR_COPRO_REGS (NR_SYS_REGS * 2)
>
> +#define sctlr_EL12 sys_reg(3, 5, 1, 0, 0)
> +#define cpacr_EL12 sys_reg(3, 5, 1, 0, 2)
> +#define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0)
> +#define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1)
> +#define tcr_EL12 sys_reg(3, 5, 2, 0, 2)
> +#define afsr0_EL12 sys_reg(3, 5, 5, 1, 0)
> +#define afsr1_EL12 sys_reg(3, 5, 5, 1, 1)
> +#define esr_EL12 sys_reg(3, 5, 5, 2, 0)
> +#define far_EL12 sys_reg(3, 5, 6, 0, 0)
> +#define mair_EL12 sys_reg(3, 5, 10, 2, 0)
> +#define amair_EL12 sys_reg(3, 5, 10, 3, 0)
> +#define vbar_EL12 sys_reg(3, 5, 12, 0, 0)
> +#define contextidr_EL12 sys_reg(3, 5, 13, 0, 1)
> +#define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0)
> +#define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0)
> +#define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1)
> +#define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2)
> +#define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0)
> +#define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1)
> +#define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2)
> +#define spsr_EL12 sys_reg(3, 5, 4, 0, 0)
> +#define elr_EL12 sys_reg(3, 5, 4, 0, 1)
> +
> #define ARM_EXCEPTION_IRQ 0
> #define ARM_EXCEPTION_TRAP 1
>
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index ad5821b..b61591b 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (C) 2012,2013 - ARM Ltd
> + * Copyright (C) 2012-2015 - ARM Ltd
> * Author: Marc Zyngier <[email protected]>
> *
> * This program is free software; you can redistribute it and/or modify
> @@ -67,40 +67,52 @@
> stp x29, lr, [x3, #80]
>
> mrs x19, sp_el0
> - mrs x20, elr_el2 // pc before entering el2
> - mrs x21, spsr_el2 // pstate before entering el2
> + str x19, [x3, #96]
> +.endm
>
> - stp x19, x20, [x3, #96]
> - str x21, [x3, #112]
> +.macro save_el1_state
> + mrs_hyp(x20, ELR) // pc before entering el2
> + mrs_hyp(x21, SPSR) // pstate before entering el2
>
> mrs x22, sp_el1
> - mrs x23, elr_el1
> - mrs x24, spsr_el1
> +
> + mrs_el1(x23, elr)
> + mrs_el1(x24, spsr)
> +
> + add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
> + stp x20, x21, [x3, #8] // HACK: Store to the regs after SP_EL0
>
> str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
> str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
> str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
> .endm
>
> -.macro restore_common_regs
> +.macro restore_el1_state
> // x2: base address for cpu context
> // x3: tmp register
>
> + add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
> + ldp x20, x21, [x3, #8] // Same hack again, get guest PC and pstate
> +
> ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
> ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
> ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
>
> + msr_hyp(ELR, x20) // pc on return from el2
> + msr_hyp(SPSR, x21) // pstate on return from el2
> +
> msr sp_el1, x22
> - msr elr_el1, x23
> - msr spsr_el1, x24
>
> - add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
> - ldp x19, x20, [x3]
> - ldr x21, [x3, #16]
> + msr_el1(elr, x23)
> + msr_el1(spsr, x24)
> +.endm
>
> +.macro restore_common_regs
> + // x2: base address for cpu context
> + // x3: tmp register
> +
> + ldr x19, [x2, #CPU_XREG_OFFSET(31)] // SP_EL0
> msr sp_el0, x19
> - msr elr_el2, x20 // pc on return from el2
> - msr spsr_el2, x21 // pstate on return from el2
>
> add x3, x2, #CPU_XREG_OFFSET(19)
> ldp x19, x20, [x3]
> @@ -113,9 +125,15 @@
>
> .macro save_host_regs
> save_common_regs
> +ifnvhe nop, "b skip_el1_save"
> + save_el1_state
> +skip_el1_save:
> .endm
>
> .macro restore_host_regs
> +ifnvhe nop, "b skip_el1_restore"
> + restore_el1_state
> +skip_el1_restore:
> restore_common_regs
> .endm
>
> @@ -159,6 +177,7 @@
> stp x6, x7, [x3, #16]
>
> save_common_regs
> + save_el1_state
> .endm
>
> .macro restore_guest_regs
> @@ -184,6 +203,7 @@
> ldr x18, [x3, #144]
>
> // x19-x29, lr, sp*, elr*, spsr*
> + restore_el1_state
> restore_common_regs
>
> // Last bits of the 64bit state
> @@ -203,6 +223,38 @@
> * In other words, don't touch any of these unless you know what
> * you are doing.
> */
> +
> +.macro save_shared_sysregs
> + // x2: base address for cpu context
> + // x3: tmp register
> +
> + add x3, x2, #CPU_SYSREG_OFFSET(TPIDR_EL0)
> +
> + mrs x4, tpidr_el0
> + mrs x5, tpidrro_el0
> + mrs x6, tpidr_el1
> + mrs x7, actlr_el1
> +
> + stp x4, x5, [x3]
> + stp x6, x7, [x3, #16]
> +.endm
> +
> +.macro restore_shared_sysregs
> + // x2: base address for cpu context
> + // x3: tmp register
> +
> + add x3, x2, #CPU_SYSREG_OFFSET(TPIDR_EL0)
> +
> + ldp x4, x5, [x3]
> + ldp x6, x7, [x3, #16]
> +
> + msr tpidr_el0, x4
> + msr tpidrro_el0, x5
> + msr tpidr_el1, x6
> + msr actlr_el1, x7
> +.endm
> +
> +
> .macro save_sysregs
> // x2: base address for cpu context
> // x3: tmp register
> @@ -211,26 +263,27 @@
>
> mrs x4, vmpidr_el2
> mrs x5, csselr_el1
> - mrs x6, sctlr_el1
> - mrs x7, actlr_el1
> - mrs x8, cpacr_el1
> - mrs x9, ttbr0_el1
> - mrs x10, ttbr1_el1
> - mrs x11, tcr_el1
> - mrs x12, esr_el1
> - mrs x13, afsr0_el1
> - mrs x14, afsr1_el1
> - mrs x15, far_el1
> - mrs x16, mair_el1
> - mrs x17, vbar_el1
> - mrs x18, contextidr_el1
> - mrs x19, tpidr_el0
> - mrs x20, tpidrro_el0
> - mrs x21, tpidr_el1
> - mrs x22, amair_el1
> - mrs x23, cntkctl_el1
> - mrs x24, par_el1
> - mrs x25, mdscr_el1
> + mrs_el1(x6, sctlr)
> + mrs_el1(x7, amair)
> + mrs_el1(x8, cpacr)
> + mrs_el1(x9, ttbr0)
> + mrs_el1(x10, ttbr1)
> + mrs_el1(x11, tcr)
> + mrs_el1(x12, esr)
> + mrs_el1(x13, afsr0)
> + mrs_el1(x14, afsr1)
> + mrs_el1(x15, far)
> + mrs_el1(x16, mair)
> + mrs_el1(x17, vbar)
> + mrs_el1(x18, contextidr)
> + mrs_el1(x19, cntkctl)
> + mrs x20, par_el1
> + mrs x21, mdscr_el1
> +
> + mrs x22, tpidr_el0
> + mrs x23, tpidrro_el0
> + mrs x24, tpidr_el1
> + mrs x25, actlr_el1
>
> stp x4, x5, [x3]
> stp x6, x7, [x3, #16]
> @@ -460,26 +513,27 @@
>
> msr vmpidr_el2, x4
> msr csselr_el1, x5
> - msr sctlr_el1, x6
> - msr actlr_el1, x7
> - msr cpacr_el1, x8
> - msr ttbr0_el1, x9
> - msr ttbr1_el1, x10
> - msr tcr_el1, x11
> - msr esr_el1, x12
> - msr afsr0_el1, x13
> - msr afsr1_el1, x14
> - msr far_el1, x15
> - msr mair_el1, x16
> - msr vbar_el1, x17
> - msr contextidr_el1, x18
> - msr tpidr_el0, x19
> - msr tpidrro_el0, x20
> - msr tpidr_el1, x21
> - msr amair_el1, x22
> - msr cntkctl_el1, x23
> - msr par_el1, x24
> - msr mdscr_el1, x25
> + msr_el1(sctlr, x6)
> + msr_el1(amair, x7)
> + msr_el1(cpacr, x8)
> + msr_el1(ttbr0, x9)
> + msr_el1(ttbr1, x10)
> + msr_el1(tcr, x11)
> + msr_el1(esr, x12)
> + msr_el1(afsr0, x13)
> + msr_el1(afsr1, x14)
> + msr_el1(far, x15)
> + msr_el1(mair, x16)
> + msr_el1(vbar, x17)
> + msr_el1(contextidr, x18)
> + msr_el1(cntkctl, x19)
> + msr par_el1, x20
> + msr mdscr_el1, x21
> +
> + msr tpidr_el0, x22
> + msr tpidrro_el0, x23
> + msr tpidr_el1, x24
> + msr actlr_el1, x25
> .endm
>
> .macro restore_debug
> @@ -779,8 +833,11 @@
> .macro activate_traps
> ldr x2, [x0, #VCPU_HCR_EL2]
> msr hcr_el2, x2
> - mov x2, #CPTR_EL2_TTA
> - msr cptr_el2, x2
> + adr x3, __kvm_hyp_vector
> +ifnvhe nop, "msr vbar_el1, x3"
> +ifnvhe nop, "mrs x2, cpacr_el1"
> +ifnvhe _S_(ldr x2, =(CPTR_EL2_TTA)), "orr x2, x2, #(1 << 28)"
> +ifnvhe "msr cptr_el2, x2", "msr cpacr_el1, x2"
>
> mov x2, #(1 << 15) // Trap CP15 Cr=15
> msr hstr_el2, x2
> @@ -803,12 +860,20 @@
> ifnvhe _S_(mov x2, #HCR_RW), _S_(mov x2, #HCR_RW|HCR_TGE)
> ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
> msr hcr_el2, x2
> - msr cptr_el2, xzr
> +
> +ifnvhe nop, "mrs x2, cpacr_el1"
> +ifnvhe nop, "movn x3, #(1 << 12), lsl #16"
> +ifnvhe nop, "and x2, x2, x3"
> +ifnvhe "msr cptr_el2, xzr", "msr cpacr_el1, x2"
> msr hstr_el2, xzr
>
> mrs x2, mdcr_el2
> and x2, x2, #MDCR_EL2_HPMN_MASK
> msr mdcr_el2, x2
> +
> + adrp x2, vectors
> + add x2, x2, #:lo12:vectors
> +ifnvhe nop, "msr vbar_el1, x2"
> .endm
>
> .macro activate_vm
> @@ -853,15 +918,15 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
> ldr w3, [x2, #KVM_TIMER_ENABLED]
> cbz w3, 1f
>
> - mrs x3, cntv_ctl_el0
> + mrs_el0(x3, cntv_ctl)
> and x3, x3, #3
> str w3, [x0, #VCPU_TIMER_CNTV_CTL]
> bic x3, x3, #1 // Clear Enable
> - msr cntv_ctl_el0, x3
> + msr_el0(cntv_ctl, x3)
>
> isb
>
> - mrs x3, cntv_cval_el0
> + mrs_el0(x3, cntv_cval)
> str x3, [x0, #VCPU_TIMER_CNTV_CVAL]
>
> 1:
> @@ -871,7 +936,7 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
> msr cnthctl_el2, x2
>
> // Clear cntvoff for the host
> - msr cntvoff_el2, xzr
> +ifnvhe "msr cntvoff_el2, xzr", nop
> .endm
>
> .macro restore_timer_state
> @@ -891,12 +956,12 @@ ifnvhe nop, _S_(orr x2, x2, #HCR_E2H)
> ldr x3, [x2, #KVM_TIMER_CNTVOFF]
> msr cntvoff_el2, x3
> ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL]
> - msr cntv_cval_el0, x2
> + msr_el0(cntv_cval, x2)
> isb
>
> ldr w2, [x0, #VCPU_TIMER_CNTV_CTL]
> and x2, x2, #3
> - msr cntv_ctl_el0, x2
> + msr_el0(cntv_ctl, x2)
> 1:
> .endm
>
> @@ -945,8 +1010,10 @@ ENTRY(__kvm_vcpu_run)
>
> save_host_regs
> bl __save_fpsimd
> - bl __save_sysregs
> -
> +ifnvhe "bl __save_sysregs", nop
> +ifnvhe "b 1f", nop
> + save_shared_sysregs
> +1:
> compute_debug_state 1f
> bl __save_debug
> 1:
> @@ -997,7 +1064,10 @@ __kvm_vcpu_return:
> ldr x2, [x0, #VCPU_HOST_CONTEXT]
> kern_hyp_va x2
>
> - bl __restore_sysregs
> +ifnvhe "bl __restore_sysregs", nop
> +ifnvhe "b 1f", nop
> + restore_shared_sysregs
> +1:
> bl __restore_fpsimd
>
> skip_debug_state x3, 1f
> @@ -1104,6 +1174,8 @@ __kvm_hyp_panic:
> mrs x6, par_el1
> mrs x7, tpidr_el2
>
> +ifnvhe nop, "b panic"
> +
> mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
> PSR_MODE_EL1h)
> msr spsr_el2, lr
> @@ -1248,7 +1320,7 @@ el1_trap:
> * As such, we can use the EL1 translation regime, and don't have
> * to distinguish between EL0 and EL1 access.
> */
> - mrs x2, far_el2
> +ifnvhe "mrs x2, far_el2", "mrs x2, far_el1"
> at s1e1r, x2
> isb
>
> @@ -1262,7 +1334,7 @@ el1_trap:
> b 2f
>
> 1: mrs x3, hpfar_el2
> - mrs x2, far_el2
> +ifnvhe "mrs x2, far_el2", "mrs x2, far_el1"
>
> 2: mrs x0, tpidr_el2
> str w1, [x0, #VCPU_ESR_EL2]
> diff --git a/arch/arm64/kvm/vhe-macros.h b/arch/arm64/kvm/vhe-macros.h
> index da7f9da..1e94235 100644
> --- a/arch/arm64/kvm/vhe-macros.h
> +++ b/arch/arm64/kvm/vhe-macros.h
> @@ -31,6 +31,24 @@
> alternative_insn "\nonvhe", "\vhe", ARM64_HAS_VIRT_HOST_EXTN
> .endm
>
> +#define mrs_el0(reg, sysreg) \
> + ifnvhe _S_(mrs reg, sysreg##_EL0), _S_(mrs_s reg, sysreg##_EL02)
> +
> +#define msr_el0(sysreg, reg) \
> + ifnvhe _S_(msr sysreg##_EL0, reg), _S_(msr_s sysreg##_EL02, reg)
> +
> +#define mrs_el1(reg, sysreg) \
> + ifnvhe _S_(mrs reg, sysreg##_EL1), _S_(mrs_s reg, sysreg##_EL12)
> +
> +#define msr_el1(sysreg, reg) \
> + ifnvhe _S_(msr sysreg##_EL1, reg), _S_(msr_s sysreg##_EL12, reg)
> +
> +#define mrs_hyp(reg, sysreg) \
> + ifnvhe _S_(mrs reg, sysreg##_EL2), _S_(mrs reg, sysreg##_EL1)
> +
> +#define msr_hyp(sysreg, reg) \
> + ifnvhe _S_(msr sysreg##_EL2, reg), _S_(msr sysreg##_EL1, reg)
> +
> #endif
>
> #endif /*__ARM64_VHE_MACROS_H__ */
> --
> 2.1.4
>

2015-08-31 18:47:57

by Christoffer Dall

[permalink] [raw]
Subject: Re: [PATCH 12/13] arm64: KVM: Split sysreg save/restore

On Wed, Jul 08, 2015 at 05:19:15PM +0100, Marc Zyngier wrote:
> As we're starting to get different requirements for non-VHE and VHE
> code paths, use a slightly more fine-grained approach:
>
> - __save/restore_sysregs: save/restore non-shared sysregs
> - __save/restore_shared_sysregs: save/restore only shared sysregs
>
> Of course, non-VHE always requires both.
>

I'm sorry, I don't really understand what this patch does.

What is a 'shared sysreg' exactly? One that is shared between the host
and the guest?

Or one that is shared between EL1 and EL2? Or is that the same thing?

> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> arch/arm64/kvm/hyp.S | 91 +++++++++++++++++++++++++---------------------------
> 1 file changed, 44 insertions(+), 47 deletions(-)
>
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index b61591b..3cbd2c4 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -280,11 +280,6 @@ skip_el1_restore:
> mrs x20, par_el1
> mrs x21, mdscr_el1
>
> - mrs x22, tpidr_el0
> - mrs x23, tpidrro_el0
> - mrs x24, tpidr_el1
> - mrs x25, actlr_el1
> -
> stp x4, x5, [x3]
> stp x6, x7, [x3, #16]
> stp x8, x9, [x3, #32]
> @@ -294,8 +289,6 @@ skip_el1_restore:
> stp x16, x17, [x3, #96]
> stp x18, x19, [x3, #112]
> stp x20, x21, [x3, #128]
> - stp x22, x23, [x3, #144]
> - stp x24, x25, [x3, #160]
> .endm
>
> .macro save_debug
> @@ -508,8 +501,6 @@ skip_el1_restore:
> ldp x16, x17, [x3, #96]
> ldp x18, x19, [x3, #112]
> ldp x20, x21, [x3, #128]
> - ldp x22, x23, [x3, #144]
> - ldp x24, x25, [x3, #160]
>
> msr vmpidr_el2, x4
> msr csselr_el1, x5
> @@ -529,11 +520,6 @@ skip_el1_restore:
> msr_el1(cntkctl, x19)
> msr par_el1, x20
> msr mdscr_el1, x21
> -
> - msr tpidr_el0, x22
> - msr tpidrro_el0, x23
> - msr tpidr_el1, x24
> - msr actlr_el1, x25
> .endm
>
> .macro restore_debug
> @@ -913,10 +899,12 @@ ifnvhe nop, "msr vbar_el1, x2"
>
> .macro save_timer_state
> // x0: vcpu pointer
> - ldr x2, [x0, #VCPU_KVM]
> - kern_hyp_va x2
> - ldr w3, [x2, #KVM_TIMER_ENABLED]
> - cbz w3, 1f
> + // x1: return code
> + // x2: vcpu context
> + ldr x3, [x0, #VCPU_KVM]
> + kern_hyp_va x3
> + ldr w4, [x3, #KVM_TIMER_ENABLED]
> + cbz w4, 1f
>
> mrs_el0(x3, cntv_ctl)
> and x3, x3, #3
> @@ -931,9 +919,9 @@ ifnvhe nop, "msr vbar_el1, x2"
>
> 1:
> // Allow physical timer/counter access for the host
> - mrs x2, cnthctl_el2
> - orr x2, x2, #3
> - msr cnthctl_el2, x2
> + mrs x3, cnthctl_el2
> + orr x3, x3, #3
> + msr cnthctl_el2, x3
>
> // Clear cntvoff for the host
> ifnvhe "msr cntvoff_el2, xzr", nop
> @@ -941,34 +929,43 @@ ifnvhe "msr cntvoff_el2, xzr", nop
>
> .macro restore_timer_state
> // x0: vcpu pointer
> + // x2: vcpu context
> // Disallow physical timer access for the guest
> // Physical counter access is allowed
> - mrs x2, cnthctl_el2
> - orr x2, x2, #1
> - bic x2, x2, #2
> - msr cnthctl_el2, x2
> -
> - ldr x2, [x0, #VCPU_KVM]
> - kern_hyp_va x2
> - ldr w3, [x2, #KVM_TIMER_ENABLED]
> - cbz w3, 1f
> -
> - ldr x3, [x2, #KVM_TIMER_CNTVOFF]
> - msr cntvoff_el2, x3
> - ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL]
> - msr_el0(cntv_cval, x2)
> + mrs x3, cnthctl_el2
> + orr x3, x3, #1
> + bic x3, x3, #2
> + msr cnthctl_el2, x3
> +
> + ldr x3, [x0, #VCPU_KVM]
> + kern_hyp_va x3
> + ldr w4, [x3, #KVM_TIMER_ENABLED]
> + cbz w4, 1f
> +
> + ldr x4, [x3, #KVM_TIMER_CNTVOFF]
> + msr cntvoff_el2, x4
> + ldr x4, [x0, #VCPU_TIMER_CNTV_CVAL]
> + msr_el0(cntv_cval, x4)
> isb
>
> - ldr w2, [x0, #VCPU_TIMER_CNTV_CTL]
> - and x2, x2, #3
> - msr_el0(cntv_ctl, x2)
> + ldr w4, [x0, #VCPU_TIMER_CNTV_CTL]
> + and x4, x4, #3
> + msr_el0(cntv_ctl, x4)
> 1:
> .endm
>
> +__save_shared_sysregs:
> + save_shared_sysregs
> + ret
> +
> __save_sysregs:
> save_sysregs
> ret
>
> +__restore_shared_sysregs:
> + restore_shared_sysregs
> + ret
> +
> __restore_sysregs:
> restore_sysregs
> ret
> @@ -1010,10 +1007,9 @@ ENTRY(__kvm_vcpu_run)
>
> save_host_regs
> bl __save_fpsimd
> -ifnvhe "bl __save_sysregs", nop
> -ifnvhe "b 1f", nop
> - save_shared_sysregs
> -1:
> +ifnvhe "bl __save_sysregs", "nop"
> + bl __save_shared_sysregs
> +
> compute_debug_state 1f
> bl __save_debug
> 1:
> @@ -1027,6 +1023,7 @@ ifnvhe "b 1f", nop
> add x2, x0, #VCPU_CONTEXT
>
> bl __restore_sysregs
> + bl __restore_shared_sysregs
> bl __restore_fpsimd
>
> skip_debug_state x3, 1f
> @@ -1048,6 +1045,7 @@ __kvm_vcpu_return:
> save_guest_regs
> bl __save_fpsimd
> bl __save_sysregs
> + bl __save_shared_sysregs
>
> skip_debug_state x3, 1f
> bl __save_debug
> @@ -1064,10 +1062,8 @@ __kvm_vcpu_return:
> ldr x2, [x0, #VCPU_HOST_CONTEXT]
> kern_hyp_va x2
>
> -ifnvhe "bl __restore_sysregs", nop
> -ifnvhe "b 1f", nop
> - restore_shared_sysregs
> -1:
> +ifnvhe "bl __restore_sysregs", "nop"
> + bl __restore_shared_sysregs
> bl __restore_fpsimd
>
> skip_debug_state x3, 1f
> @@ -1159,7 +1155,8 @@ __kvm_hyp_panic:
> ldr x2, [x0, #VCPU_HOST_CONTEXT]
> kern_hyp_va x2
>
> - bl __restore_sysregs
> +ifnvhe "bl __restore_sysregs", "nop"
> + bl __restore_shared_sysregs
>
> 1: adr x0, __hyp_panic_str
> adr x1, 2f
> --
> 2.1.4
>

2015-08-31 18:51:08

by Christoffer Dall

[permalink] [raw]
Subject: Re: [PATCH 13/13] arm64: KVM: VHE: Early interrupt handling

On Wed, Jul 08, 2015 at 05:19:16PM +0100, Marc Zyngier wrote:
> With VHE enabled, it is possible to let the kernel handle an interrupt
> without saving the full guest context, and without restoring the full
> host context either. This reduces the latency of handling an interrupt.
>
> When an interrupt fires we can:
> - save the guest's general purpose registers, shared system registers
> and timer state
> - switch to a host context (setting vectors, deactivating traps and
> stage-2 translation)
> - restore the host's shared sysregs
>
> At that stage, we're able to jump to the interrupt handler.
>
> On return from the handler, we can finish the switch (save/restore
> whatever is missing from the above).

This feels like a specific optimization, and again it looks to me like
we're just going to see more of this.

For example, sending a virtual IPI shoud ideally be done without having
to do all the crazy save/restore stuff.

Maybe I'm missing something overall here, but I feel the whole design of
this solution requires some justification in the cover letter.

Thanks,
-Christoffer

>
> Signed-off-by: Marc Zyngier <[email protected]>
> ---
> arch/arm/kvm/arm.c | 3 +++
> arch/arm64/kvm/hyp.S | 56 ++++++++++++++++++++++++++++++++++++++---
> arch/arm64/kvm/vgic-v2-switch.S | 19 +++++++++++---
> arch/arm64/kvm/vgic-v3-switch.S | 33 +++++++++++++-----------
> 4 files changed, 90 insertions(+), 21 deletions(-)
>
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index ca3c662..ab9333f 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -573,6 +573,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
> * disabled. Enabling the interrupts now will have
> * the effect of taking the interrupt again, in SVC
> * mode this time.
> + *
> + * With VHE, the interrupt is likely to have been
> + * already taken already.
> */
> local_irq_enable();
>
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index 3cbd2c4..ac95ba3 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -714,8 +714,10 @@ skip_el1_restore:
> .endm
>
> .macro skip_32bit_state tmp, target
> - // Skip 32bit state if not needed
> - mrs \tmp, hcr_el2
> + // Skip 32bit state if not needed.
> + // With VHE, we may have cleared the RW bit in HCR_EL2 early,
> + // and have to rely on the memory version instead.
> +ifnvhe "mrs \tmp, hcr_el2", _S_(ldr \tmp, [x0, #VCPU_HCR_EL2])
> tbnz \tmp, #HCR_RW_SHIFT, \target
> .endm
>
> @@ -1053,11 +1055,18 @@ __kvm_vcpu_return:
> save_guest_32bit_state
>
> save_timer_state
> +ifnvhe "b 1f", nop
> + alternative_insn "bl __vgic_v2_disable", \
> + "bl __vgic_v3_disable", \
> + ARM64_HAS_SYSREG_GIC_CPUIF
> +1:
> save_vgic_state
>
> deactivate_traps
> deactivate_vm
>
> +__kvm_vcpu_return_irq:
> +
> // Host context
> ldr x2, [x0, #VCPU_HOST_CONTEXT]
> kern_hyp_va x2
> @@ -1355,8 +1364,47 @@ el1_irq:
> push x0, x1
> push x2, x3
> mrs x0, tpidr_el2
> - mov x1, #ARM_EXCEPTION_IRQ
> - b __kvm_vcpu_return
> +ifnvhe _S_(mov x1, #ARM_EXCEPTION_IRQ), nop
> +ifnvhe "b __kvm_vcpu_return", nop
> +
> + // Fallthrough for VHE
> + add x2, x0, #VCPU_CONTEXT
> +
> + save_guest_regs
> + bl __save_shared_sysregs
> + save_timer_state
> + alternative_insn "bl __vgic_v2_disable", \
> + "bl __vgic_v3_disable", \
> + ARM64_HAS_SYSREG_GIC_CPUIF
> + deactivate_traps
> + deactivate_vm
> +
> + isb // Needed to ensure TGE is on and vectors have been switched
> +
> + ldr x2, [x0, #VCPU_HOST_CONTEXT]
> + restore_shared_sysregs
> +
> + mov x0, x2
> + adrp x1, handle_arch_irq
> + add x1, x1, #:lo12:handle_arch_irq
> + ldr x1, [x1]
> + blr x1
> +
> + // Back from the interrupt handler, finalize the world switch
> + mrs x0, tpidr_el2
> + add x2, x0, #VCPU_CONTEXT
> +
> + bl __save_fpsimd
> + bl __save_sysregs // We've already saved the shared regs
> + save_guest_32bit_state
> +
> + skip_debug_state x3, 1f
> + bl __save_debug
> +1:
> + save_vgic_state
> +
> + mov x1, #ARM_EXCEPTION_IRQ
> + b __kvm_vcpu_return_irq
>
> .ltorg
>
> diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S
> index 3f00071..c8864b4 100644
> --- a/arch/arm64/kvm/vgic-v2-switch.S
> +++ b/arch/arm64/kvm/vgic-v2-switch.S
> @@ -26,16 +26,30 @@
> #include <asm/kvm_arm.h>
> #include <asm/kvm_mmu.h>
>
> +#include "vhe-macros.h"
> +
> .text
> .pushsection .hyp.text, "ax"
>
> +ENTRY(__vgic_v2_disable)
> + /* Get VGIC VCTRL base into x2 */
> + ldr x2, [x0, #VCPU_KVM]
> + kern_hyp_va x2
> + ldr x2, [x2, #KVM_VGIC_VCTRL]
> + kern_hyp_va x2
> + cbz x2, 1f // disabled
> +
> + /* Clear GICH_HCR */
> + str wzr, [x2, #GICH_HCR]
> +1: ret
> +ENDPROC(__vgic_v2_disable)
> +
> /*
> * Save the VGIC CPU state into memory
> * x0: Register pointing to VCPU struct
> * Do not corrupt x1!!!
> */
> ENTRY(__save_vgic_v2_state)
> -__save_vgic_v2_state:
> /* Get VGIC VCTRL base into x2 */
> ldr x2, [x0, #VCPU_KVM]
> kern_hyp_va x2
> @@ -75,7 +89,7 @@ CPU_BE( str w10, [x3, #VGIC_V2_CPU_ELRSR] )
> str w11, [x3, #VGIC_V2_CPU_APR]
>
> /* Clear GICH_HCR */
> - str wzr, [x2, #GICH_HCR]
> +ifnvhe _S_(str wzr, [x2, #GICH_HCR]), nop
>
> /* Save list registers */
> add x2, x2, #GICH_LR0
> @@ -95,7 +109,6 @@ ENDPROC(__save_vgic_v2_state)
> * x0: Register pointing to VCPU struct
> */
> ENTRY(__restore_vgic_v2_state)
> -__restore_vgic_v2_state:
> /* Get VGIC VCTRL base into x2 */
> ldr x2, [x0, #VCPU_KVM]
> kern_hyp_va x2
> diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
> index 3c20730..b9b45e3 100644
> --- a/arch/arm64/kvm/vgic-v3-switch.S
> +++ b/arch/arm64/kvm/vgic-v3-switch.S
> @@ -25,6 +25,8 @@
> #include <asm/kvm_asm.h>
> #include <asm/kvm_arm.h>
>
> +#include "vhe-macros.h"
> +
> .text
> .pushsection .hyp.text, "ax"
>
> @@ -35,11 +37,21 @@
> #define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8)
>
> /*
> + * Disable the vcpu interface, preventing interrupts from
> + * being delivered.
> + */
> +ENTRY(__vgic_v3_disable)
> + // Nuke the HCR register.
> + msr_s ICH_HCR_EL2, xzr
> + ret
> +ENDPROC(__vgic_v3_disable)
> +
> +/*
> * Save the VGIC CPU state into memory
> * x0: Register pointing to VCPU struct
> * Do not corrupt x1!!!
> */
> -.macro save_vgic_v3_state
> +ENTRY(__save_vgic_v3_state)
> // Compute the address of struct vgic_cpu
> add x3, x0, #VCPU_VGIC_CPU
>
> @@ -58,7 +70,7 @@
> str w7, [x3, #VGIC_V3_CPU_EISR]
> str w8, [x3, #VGIC_V3_CPU_ELRSR]
>
> - msr_s ICH_HCR_EL2, xzr
> +ifnvhe _S_(msr_s ICH_HCR_EL2, xzr), nop
>
> mrs_s x21, ICH_VTR_EL2
> mvn w22, w21
> @@ -137,15 +149,17 @@
> orr x5, x5, #ICC_SRE_EL2_ENABLE
> msr_s ICC_SRE_EL2, x5
> isb
> - mov x5, #1
> + // If using VHE, we don't care about EL1 and can early out.
> +ifnvhe "mov x5, #1", ret
> msr_s ICC_SRE_EL1, x5
> -.endm
> + ret
> +ENDPROC(__save_vgic_v3_state)
>
> /*
> * Restore the VGIC CPU state from memory
> * x0: Register pointing to VCPU struct
> */
> -.macro restore_vgic_v3_state
> +ENTRY(__restore_vgic_v3_state)
> // Compute the address of struct vgic_cpu
> add x3, x0, #VCPU_VGIC_CPU
>
> @@ -249,15 +263,6 @@
> and x5, x5, #~ICC_SRE_EL2_ENABLE
> msr_s ICC_SRE_EL2, x5
> 1:
> -.endm
> -
> -ENTRY(__save_vgic_v3_state)
> - save_vgic_v3_state
> - ret
> -ENDPROC(__save_vgic_v3_state)
> -
> -ENTRY(__restore_vgic_v3_state)
> - restore_vgic_v3_state
> ret
> ENDPROC(__restore_vgic_v3_state)
>
> --
> 2.1.4
>