2024-04-30 18:16:17

by Colton Lewis

[permalink] [raw]
Subject: [PATCH v5] KVM: arm64: Add early_param to control WFx trapping

Add an early_params to control WFI and WFE trapping. This is to
control the degree guests can wait for interrupts on their own without
being trapped by KVM. Options for each param are trap and notrap. trap
enables the trap. notrap disables the trap. Absent an explicitly set
policy, default to current behavior: disabling the trap if only a
single task is running and enabling otherwise.

Signed-off-by: Colton Lewis <[email protected]>
---
v5:

* Move trap configuration to vcpu_reset_hcr(). This required moving
kvm_emulate.h:vcpu_reset_hcr() to arm.c:kvm_vcpu_reset_hcr() to avoid needing
to pull scheduler headers and my enums into kvm_emulate.h. I thought the
function looked too bulky for that header anyway.
* Delete vcpu_{set,clear}_vfx_traps helpers that are no longer used anywhere.
* Remove documentation of explicit option for default behavior to avoid any
implicit suggestion default behavior will stay that way.

v4:
https://lore.kernel.org/kvmarm/[email protected]/

v3:
https://lore.kernel.org/kvmarm/[email protected]/

v2:
https://lore.kernel.org/kvmarm/[email protected]/

v1:
https://lore.kernel.org/kvmarm/[email protected]/

.../admin-guide/kernel-parameters.txt | 16 +++
arch/arm64/include/asm/kvm_emulate.h | 53 ---------
arch/arm64/include/asm/kvm_host.h | 7 ++
arch/arm64/kvm/arm.c | 110 +++++++++++++++++-
4 files changed, 127 insertions(+), 59 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 31b3a25680d0..a4d94d9abbe4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2653,6 +2653,22 @@
[KVM,ARM] Allow use of GICv4 for direct injection of
LPIs.

+ kvm-arm.wfe_trap_policy=
+ [KVM,ARM] Control when to set WFE instruction trap for
+ KVM VMs.
+
+ trap: set WFE instruction trap
+
+ notrap: clear WFE instruction trap
+
+ kvm-arm.wfi_trap_policy=
+ [KVM,ARM] Control when to set WFI instruction trap for
+ KVM VMs.
+
+ trap: set WFI instruction trap
+
+ notrap: clear WFI instruction trap
+
kvm_cma_resv_ratio=n [PPC]
Reserves given percentage from system memory area for
contiguous memory allocation for KVM hash pagetable
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index b804fe832184..c2a9a409ebfe 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -67,64 +67,11 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
}
#endif

-static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
- if (has_vhe() || has_hvhe())
- vcpu->arch.hcr_el2 |= HCR_E2H;
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
- /* route synchronous external abort exceptions to EL2 */
- vcpu->arch.hcr_el2 |= HCR_TEA;
- /* trap error record accesses */
- vcpu->arch.hcr_el2 |= HCR_TERR;
- }
-
- if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) {
- vcpu->arch.hcr_el2 |= HCR_FWB;
- } else {
- /*
- * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
- * get set in SCTLR_EL1 such that we can detect when the guest
- * MMU gets turned on and do the necessary cache maintenance
- * then.
- */
- vcpu->arch.hcr_el2 |= HCR_TVM;
- }
-
- if (cpus_have_final_cap(ARM64_HAS_EVT) &&
- !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
- vcpu->arch.hcr_el2 |= HCR_TID4;
- else
- vcpu->arch.hcr_el2 |= HCR_TID2;
-
- if (vcpu_el1_is_32bit(vcpu))
- vcpu->arch.hcr_el2 &= ~HCR_RW;
-
- if (kvm_has_mte(vcpu->kvm))
- vcpu->arch.hcr_el2 |= HCR_ATA;
-}
-
static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
{
return (unsigned long *)&vcpu->arch.hcr_el2;
}

-static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 &= ~HCR_TWE;
- if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) ||
- vcpu->kvm->arch.vgic.nassgireq)
- vcpu->arch.hcr_el2 &= ~HCR_TWI;
- else
- vcpu->arch.hcr_el2 |= HCR_TWI;
-}
-
-static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 |= HCR_TWE;
- vcpu->arch.hcr_el2 |= HCR_TWI;
-}
-
static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 21c57b812569..315ee7bfc1cb 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -67,6 +67,13 @@ enum kvm_mode {
KVM_MODE_NV,
KVM_MODE_NONE,
};
+
+enum kvm_wfx_trap_policy {
+ KVM_WFX_NOTRAP_SINGLE_TASK, /* Default option */
+ KVM_WFX_NOTRAP,
+ KVM_WFX_TRAP,
+};
+
#ifdef CONFIG_KVM
enum kvm_mode kvm_get_mode(void);
#else
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a25265aca432..5ec52333e042 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -46,6 +46,8 @@
#include <kvm/arm_psci.h>

static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
+static enum kvm_wfx_trap_policy kvm_wfi_trap_policy = KVM_WFX_NOTRAP_SINGLE_TASK;
+static enum kvm_wfx_trap_policy kvm_wfe_trap_policy = KVM_WFX_NOTRAP_SINGLE_TASK;

DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);

@@ -456,11 +458,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);

- if (single_task_running())
- vcpu_clear_wfx_traps(vcpu);
- else
- vcpu_set_wfx_traps(vcpu);
-
if (vcpu_has_ptrauth(vcpu))
vcpu_ptrauth_disable(vcpu);
kvm_arch_vcpu_load_debug_state_flags(vcpu);
@@ -1391,6 +1388,72 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
return 0;
}

+static bool kvm_vcpu_should_clear_twi(struct kvm_vcpu *vcpu)
+{
+ if (likely(kvm_wfi_trap_policy == KVM_WFX_NOTRAP_SINGLE_TASK))
+ return single_task_running() &&
+ (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) ||
+ vcpu->kvm->arch.vgic.nassgireq);
+
+ return kvm_wfi_trap_policy == KVM_WFX_NOTRAP;
+}
+
+static bool kvm_vcpu_should_clear_twe(struct kvm_vcpu *vcpu)
+{
+ if (likely(kvm_wfe_trap_policy == KVM_WFX_NOTRAP_SINGLE_TASK))
+ return single_task_running();
+
+ return kvm_wfe_trap_policy == KVM_WFX_NOTRAP;
+}
+
+static inline void kvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+ if (has_vhe() || has_hvhe())
+ vcpu->arch.hcr_el2 |= HCR_E2H;
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
+ /* route synchronous external abort exceptions to EL2 */
+ vcpu->arch.hcr_el2 |= HCR_TEA;
+ /* trap error record accesses */
+ vcpu->arch.hcr_el2 |= HCR_TERR;
+ }
+
+ if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) {
+ vcpu->arch.hcr_el2 |= HCR_FWB;
+ } else {
+ /*
+ * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
+ * get set in SCTLR_EL1 such that we can detect when the guest
+ * MMU gets turned on and do the necessary cache maintenance
+ * then.
+ */
+ vcpu->arch.hcr_el2 |= HCR_TVM;
+ }
+
+ if (cpus_have_final_cap(ARM64_HAS_EVT) &&
+ !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
+ vcpu->arch.hcr_el2 |= HCR_TID4;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TID2;
+
+ if (vcpu_el1_is_32bit(vcpu))
+ vcpu->arch.hcr_el2 &= ~HCR_RW;
+
+ if (kvm_has_mte(vcpu->kvm))
+ vcpu->arch.hcr_el2 |= HCR_ATA;
+
+
+ if (kvm_vcpu_should_clear_twe(vcpu))
+ vcpu->arch.hcr_el2 &= ~HCR_TWE;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TWE;
+
+ if (kvm_vcpu_should_clear_twi(vcpu))
+ vcpu->arch.hcr_el2 &= ~HCR_TWI;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TWI;
+}
+
static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
struct kvm_vcpu_init *init)
{
@@ -1427,7 +1490,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
icache_inval_all_pou();
}

- vcpu_reset_hcr(vcpu);
+ kvm_vcpu_reset_hcr(vcpu);
vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu);

/*
@@ -2654,6 +2717,41 @@ static int __init early_kvm_mode_cfg(char *arg)
}
early_param("kvm-arm.mode", early_kvm_mode_cfg);

+static int __init early_kvm_wfx_trap_policy_cfg(char *arg, enum kvm_wfx_trap_policy *p)
+{
+ if (!arg)
+ return -EINVAL;
+
+ if (strcmp(arg, "trap") == 0) {
+ *p = KVM_WFX_TRAP;
+ return 0;
+ }
+
+ if (strcmp(arg, "notrap") == 0) {
+ *p = KVM_WFX_NOTRAP;
+ return 0;
+ }
+
+ if (strcmp(arg, "default") == 0) {
+ *p = KVM_WFX_NOTRAP_SINGLE_TASK;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int __init early_kvm_wfi_trap_policy_cfg(char *arg)
+{
+ return early_kvm_wfx_trap_policy_cfg(arg, &kvm_wfi_trap_policy);
+}
+early_param("kvm-arm.wfi_trap_policy", early_kvm_wfi_trap_policy_cfg);
+
+static int __init early_kvm_wfe_trap_policy_cfg(char *arg)
+{
+ return early_kvm_wfx_trap_policy_cfg(arg, &kvm_wfe_trap_policy);
+}
+early_param("kvm-arm.wfe_trap_policy", early_kvm_wfe_trap_policy_cfg);
+
enum kvm_mode kvm_get_mode(void)
{
return kvm_mode;
--
2.45.0.rc0.197.gbae5840b3b-goog


2024-05-10 14:26:51

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH v5] KVM: arm64: Add early_param to control WFx trapping

On Tue, 30 Apr 2024 19:14:44 +0100,
Colton Lewis <[email protected]> wrote:
>
> Add an early_params to control WFI and WFE trapping. This is to
> control the degree guests can wait for interrupts on their own without
> being trapped by KVM. Options for each param are trap and notrap. trap
> enables the trap. notrap disables the trap. Absent an explicitly set
> policy, default to current behavior: disabling the trap if only a
> single task is running and enabling otherwise.
>
> Signed-off-by: Colton Lewis <[email protected]>
> ---
> v5:
>
> * Move trap configuration to vcpu_reset_hcr(). This required moving
> kvm_emulate.h:vcpu_reset_hcr() to arm.c:kvm_vcpu_reset_hcr() to avoid needing
> to pull scheduler headers and my enums into kvm_emulate.h. I thought the
> function looked too bulky for that header anyway.
> * Delete vcpu_{set,clear}_vfx_traps helpers that are no longer used anywhere.
> * Remove documentation of explicit option for default behavior to avoid any
> implicit suggestion default behavior will stay that way.
>
> v4:
> https://lore.kernel.org/kvmarm/[email protected]/
>
> v3:
> https://lore.kernel.org/kvmarm/[email protected]/
>
> v2:
> https://lore.kernel.org/kvmarm/[email protected]/
>
> v1:
> https://lore.kernel.org/kvmarm/[email protected]/
>
> .../admin-guide/kernel-parameters.txt | 16 +++
> arch/arm64/include/asm/kvm_emulate.h | 53 ---------
> arch/arm64/include/asm/kvm_host.h | 7 ++
> arch/arm64/kvm/arm.c | 110 +++++++++++++++++-
> 4 files changed, 127 insertions(+), 59 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 31b3a25680d0..a4d94d9abbe4 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2653,6 +2653,22 @@
> [KVM,ARM] Allow use of GICv4 for direct injection of
> LPIs.
>
> + kvm-arm.wfe_trap_policy=
> + [KVM,ARM] Control when to set WFE instruction trap for
> + KVM VMs.
> +
> + trap: set WFE instruction trap
> +
> + notrap: clear WFE instruction trap
> +
> + kvm-arm.wfi_trap_policy=
> + [KVM,ARM] Control when to set WFI instruction trap for
> + KVM VMs.
> +
> + trap: set WFI instruction trap
> +
> + notrap: clear WFI instruction trap
> +

Please make it clear that neither traps are guaranteed. The
architecture *allows* an implementation to trap when no events (resp.
interrupts) are pending, but nothing more. An implementation is
perfectly allowed to ignore these bits.

> kvm_cma_resv_ratio=n [PPC]
> Reserves given percentage from system memory area for
> contiguous memory allocation for KVM hash pagetable
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index b804fe832184..c2a9a409ebfe 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -67,64 +67,11 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
> }
> #endif
>
> -static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
> -{
> - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
> - if (has_vhe() || has_hvhe())
> - vcpu->arch.hcr_el2 |= HCR_E2H;
> - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
> - /* route synchronous external abort exceptions to EL2 */
> - vcpu->arch.hcr_el2 |= HCR_TEA;
> - /* trap error record accesses */
> - vcpu->arch.hcr_el2 |= HCR_TERR;
> - }
> -
> - if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) {
> - vcpu->arch.hcr_el2 |= HCR_FWB;
> - } else {
> - /*
> - * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
> - * get set in SCTLR_EL1 such that we can detect when the guest
> - * MMU gets turned on and do the necessary cache maintenance
> - * then.
> - */
> - vcpu->arch.hcr_el2 |= HCR_TVM;
> - }
> -
> - if (cpus_have_final_cap(ARM64_HAS_EVT) &&
> - !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
> - vcpu->arch.hcr_el2 |= HCR_TID4;
> - else
> - vcpu->arch.hcr_el2 |= HCR_TID2;
> -
> - if (vcpu_el1_is_32bit(vcpu))
> - vcpu->arch.hcr_el2 &= ~HCR_RW;
> -
> - if (kvm_has_mte(vcpu->kvm))
> - vcpu->arch.hcr_el2 |= HCR_ATA;
> -}
> -
> static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
> {
> return (unsigned long *)&vcpu->arch.hcr_el2;
> }
>
> -static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
> -{
> - vcpu->arch.hcr_el2 &= ~HCR_TWE;
> - if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) ||
> - vcpu->kvm->arch.vgic.nassgireq)
> - vcpu->arch.hcr_el2 &= ~HCR_TWI;
> - else
> - vcpu->arch.hcr_el2 |= HCR_TWI;
> -}
> -
> -static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
> -{
> - vcpu->arch.hcr_el2 |= HCR_TWE;
> - vcpu->arch.hcr_el2 |= HCR_TWI;
> -}
> -
> static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
> {
> vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 21c57b812569..315ee7bfc1cb 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -67,6 +67,13 @@ enum kvm_mode {
> KVM_MODE_NV,
> KVM_MODE_NONE,
> };
> +
> +enum kvm_wfx_trap_policy {
> + KVM_WFX_NOTRAP_SINGLE_TASK, /* Default option */
> + KVM_WFX_NOTRAP,
> + KVM_WFX_TRAP,
> +};

Since this is only ever used in arm.c, it really doesn't need to be
exposed anywhere else.

> +
> #ifdef CONFIG_KVM
> enum kvm_mode kvm_get_mode(void);
> #else
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index a25265aca432..5ec52333e042 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -46,6 +46,8 @@
> #include <kvm/arm_psci.h>
>
> static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
> +static enum kvm_wfx_trap_policy kvm_wfi_trap_policy = KVM_WFX_NOTRAP_SINGLE_TASK;
> +static enum kvm_wfx_trap_policy kvm_wfe_trap_policy = KVM_WFX_NOTRAP_SINGLE_TASK;

It would be worth declaring those as __read_mostly.

>
> DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
>
> @@ -456,11 +458,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
> kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
>
> - if (single_task_running())
> - vcpu_clear_wfx_traps(vcpu);
> - else
> - vcpu_set_wfx_traps(vcpu);
> -
> if (vcpu_has_ptrauth(vcpu))
> vcpu_ptrauth_disable(vcpu);
> kvm_arch_vcpu_load_debug_state_flags(vcpu);
> @@ -1391,6 +1388,72 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
> return 0;
> }
>
> +static bool kvm_vcpu_should_clear_twi(struct kvm_vcpu *vcpu)
> +{
> + if (likely(kvm_wfi_trap_policy == KVM_WFX_NOTRAP_SINGLE_TASK))
> + return single_task_running() &&
> + (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) ||
> + vcpu->kvm->arch.vgic.nassgireq);

So you are evaluating a runtime condition (scheduler queue length,
number of LPIs)...

> +
> + return kvm_wfi_trap_policy == KVM_WFX_NOTRAP;
> +}
> +
> +static bool kvm_vcpu_should_clear_twe(struct kvm_vcpu *vcpu)
> +{
> + if (likely(kvm_wfe_trap_policy == KVM_WFX_NOTRAP_SINGLE_TASK))
> + return single_task_running();
> +
> + return kvm_wfe_trap_policy == KVM_WFX_NOTRAP;
> +}
> +
> +static inline void kvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)

Why the inline?

> +{
> + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
> + if (has_vhe() || has_hvhe())
> + vcpu->arch.hcr_el2 |= HCR_E2H;
> + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
> + /* route synchronous external abort exceptions to EL2 */
> + vcpu->arch.hcr_el2 |= HCR_TEA;
> + /* trap error record accesses */
> + vcpu->arch.hcr_el2 |= HCR_TERR;
> + }
> +
> + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) {
> + vcpu->arch.hcr_el2 |= HCR_FWB;
> + } else {
> + /*
> + * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
> + * get set in SCTLR_EL1 such that we can detect when the guest
> + * MMU gets turned on and do the necessary cache maintenance
> + * then.
> + */
> + vcpu->arch.hcr_el2 |= HCR_TVM;
> + }
> +
> + if (cpus_have_final_cap(ARM64_HAS_EVT) &&
> + !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
> + vcpu->arch.hcr_el2 |= HCR_TID4;
> + else
> + vcpu->arch.hcr_el2 |= HCR_TID2;
> +
> + if (vcpu_el1_is_32bit(vcpu))
> + vcpu->arch.hcr_el2 &= ~HCR_RW;
> +
> + if (kvm_has_mte(vcpu->kvm))
> + vcpu->arch.hcr_el2 |= HCR_ATA;
> +
> +
> + if (kvm_vcpu_should_clear_twe(vcpu))
> + vcpu->arch.hcr_el2 &= ~HCR_TWE;
> + else
> + vcpu->arch.hcr_el2 |= HCR_TWE;
> +
> + if (kvm_vcpu_should_clear_twi(vcpu))
> + vcpu->arch.hcr_el2 &= ~HCR_TWI;
> + else
> + vcpu->arch.hcr_el2 |= HCR_TWI;

.. and from the above runtime conditions you make it a forever
decision, for a vcpu that still hasn't executed a single instruction.
What could possibly go wrong?

> +}
> +
> static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
> struct kvm_vcpu_init *init)
> {
> @@ -1427,7 +1490,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
> icache_inval_all_pou();
> }
>
> - vcpu_reset_hcr(vcpu);
> + kvm_vcpu_reset_hcr(vcpu);
> vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu);
>
> /*
> @@ -2654,6 +2717,41 @@ static int __init early_kvm_mode_cfg(char *arg)
> }
> early_param("kvm-arm.mode", early_kvm_mode_cfg);
>
> +static int __init early_kvm_wfx_trap_policy_cfg(char *arg, enum kvm_wfx_trap_policy *p)
> +{
> + if (!arg)
> + return -EINVAL;
> +
> + if (strcmp(arg, "trap") == 0) {
> + *p = KVM_WFX_TRAP;
> + return 0;
> + }
> +
> + if (strcmp(arg, "notrap") == 0) {
> + *p = KVM_WFX_NOTRAP;
> + return 0;
> + }
> +
> + if (strcmp(arg, "default") == 0) {
> + *p = KVM_WFX_NOTRAP_SINGLE_TASK;
> + return 0;
> + }

Where is this "default" coming from? It's not documented.

M.

--
Without deviation from the norm, progress is not possible.

2024-05-16 16:03:16

by Colton Lewis

[permalink] [raw]
Subject: Re: [PATCH v5] KVM: arm64: Add early_param to control WFx trapping

Hi Marc. Thanks for the review.

Marc Zyngier <[email protected]> writes:

> On Tue, 30 Apr 2024 19:14:44 +0100,
> Colton Lewis <[email protected]> wrote:
>> diff --git a/Documentation/admin-guide/kernel-parameters.txt
>> b/Documentation/admin-guide/kernel-parameters.txt
>> index 31b3a25680d0..a4d94d9abbe4 100644
>> --- a/Documentation/admin-guide/kernel-parameters.txt
>> +++ b/Documentation/admin-guide/kernel-parameters.txt
>> @@ -2653,6 +2653,22 @@
>> [KVM,ARM] Allow use of GICv4 for direct injection of
>> LPIs.

>> + kvm-arm.wfe_trap_policy=
>> + [KVM,ARM] Control when to set WFE instruction trap for
>> + KVM VMs.
>> +
>> + trap: set WFE instruction trap
>> +
>> + notrap: clear WFE instruction trap
>> +
>> + kvm-arm.wfi_trap_policy=
>> + [KVM,ARM] Control when to set WFI instruction trap for
>> + KVM VMs.
>> +
>> + trap: set WFI instruction trap
>> +
>> + notrap: clear WFI instruction trap
>> +

> Please make it clear that neither traps are guaranteed. The
> architecture *allows* an implementation to trap when no events (resp.
> interrupts) are pending, but nothing more. An implementation is
> perfectly allowed to ignore these bits.

Will do. I'll just add an additional sentence stating "Traps are allowed
but not guaranteed by the CPU architecture"

>> diff --git a/arch/arm64/include/asm/kvm_host.h
>> b/arch/arm64/include/asm/kvm_host.h
>> index 21c57b812569..315ee7bfc1cb 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -67,6 +67,13 @@ enum kvm_mode {
>> KVM_MODE_NV,
>> KVM_MODE_NONE,
>> };
>> +
>> +enum kvm_wfx_trap_policy {
>> + KVM_WFX_NOTRAP_SINGLE_TASK, /* Default option */
>> + KVM_WFX_NOTRAP,
>> + KVM_WFX_TRAP,
>> +};

> Since this is only ever used in arm.c, it really doesn't need to be
> exposed anywhere else.

I can move it to there.

>> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
>> index a25265aca432..5ec52333e042 100644
>> --- a/arch/arm64/kvm/arm.c
>> +++ b/arch/arm64/kvm/arm.c
>> @@ -46,6 +46,8 @@
>> #include <kvm/arm_psci.h>

>> static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
>> +static enum kvm_wfx_trap_policy kvm_wfi_trap_policy =
>> KVM_WFX_NOTRAP_SINGLE_TASK;
>> +static enum kvm_wfx_trap_policy kvm_wfe_trap_policy =
>> KVM_WFX_NOTRAP_SINGLE_TASK;

> It would be worth declaring those as __read_mostly.

Will do.

>> +static bool kvm_vcpu_should_clear_twi(struct kvm_vcpu *vcpu)
>> +{
>> + if (likely(kvm_wfi_trap_policy == KVM_WFX_NOTRAP_SINGLE_TASK))
>> + return single_task_running() &&
>> + (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) ||
>> + vcpu->kvm->arch.vgic.nassgireq);

> So you are evaluating a runtime condition (scheduler queue length,
> number of LPIs)...

Yes. Only in the case of default behavior when no option is given, which
should be equivalent to what the code was doing before.

>> +
>> + return kvm_wfi_trap_policy == KVM_WFX_NOTRAP;
>> +}
>> +
>> +static bool kvm_vcpu_should_clear_twe(struct kvm_vcpu *vcpu)
>> +{
>> + if (likely(kvm_wfe_trap_policy == KVM_WFX_NOTRAP_SINGLE_TASK))
>> + return single_task_running();
>> +
>> + return kvm_wfe_trap_policy == KVM_WFX_NOTRAP;
>> +}
>> +
>> +static inline void kvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)

> Why the inline?

Because I moved it from the kvm_emulate.h header with no
modification. It doesn't have to be.

>> +{
>> + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
>> + if (has_vhe() || has_hvhe())
>> + vcpu->arch.hcr_el2 |= HCR_E2H;
>> + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
>> + /* route synchronous external abort exceptions to EL2 */
>> + vcpu->arch.hcr_el2 |= HCR_TEA;
>> + /* trap error record accesses */
>> + vcpu->arch.hcr_el2 |= HCR_TERR;
>> + }
>> +
>> + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) {
>> + vcpu->arch.hcr_el2 |= HCR_FWB;
>> + } else {
>> + /*
>> + * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
>> + * get set in SCTLR_EL1 such that we can detect when the guest
>> + * MMU gets turned on and do the necessary cache maintenance
>> + * then.
>> + */
>> + vcpu->arch.hcr_el2 |= HCR_TVM;
>> + }
>> +
>> + if (cpus_have_final_cap(ARM64_HAS_EVT) &&
>> + !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
>> + vcpu->arch.hcr_el2 |= HCR_TID4;
>> + else
>> + vcpu->arch.hcr_el2 |= HCR_TID2;
>> +
>> + if (vcpu_el1_is_32bit(vcpu))
>> + vcpu->arch.hcr_el2 &= ~HCR_RW;
>> +
>> + if (kvm_has_mte(vcpu->kvm))
>> + vcpu->arch.hcr_el2 |= HCR_ATA;
>> +
>> +
>> + if (kvm_vcpu_should_clear_twe(vcpu))
>> + vcpu->arch.hcr_el2 &= ~HCR_TWE;
>> + else
>> + vcpu->arch.hcr_el2 |= HCR_TWE;
>> +
>> + if (kvm_vcpu_should_clear_twi(vcpu))
>> + vcpu->arch.hcr_el2 &= ~HCR_TWI;
>> + else
>> + vcpu->arch.hcr_el2 |= HCR_TWI;

> ... and from the above runtime conditions you make it a forever
> decision, for a vcpu that still hasn't executed a single instruction.
> What could possibly go wrong?

Oh I see. kvm_arch_vcpu_ioctl_vcpu_init only executes once when the vcpu
is created (makes sense given the name), thereby making the decision
permanent for the life of the vcpu. I misunderstood that fact before.

I will move the decision back to when the vcpu is loaded as it was in
earlier versions of this series.

>> +static int __init early_kvm_wfx_trap_policy_cfg(char *arg, enum
>> kvm_wfx_trap_policy *p)
>> +{
>> + if (!arg)
>> + return -EINVAL;
>> +
>> + if (strcmp(arg, "trap") == 0) {
>> + *p = KVM_WFX_TRAP;
>> + return 0;
>> + }
>> +
>> + if (strcmp(arg, "notrap") == 0) {
>> + *p = KVM_WFX_NOTRAP;
>> + return 0;
>> + }
>> +
>> + if (strcmp(arg, "default") == 0) {
>> + *p = KVM_WFX_NOTRAP_SINGLE_TASK;
>> + return 0;
>> + }

> Where is this "default" coming from? It's not documented.

It was explicitly documented on earlier patch versions, but then I was
told we didn't want people to rely on the default behavior so we have
flexibility to change it in the future.

There isn't much use for it if it isn't documented, so I'll take it out.