2021-09-03 13:41:59

by Juergen Gross

[permalink] [raw]
Subject: [PATCH v2 3/6] x86/kvm: introduce per cpu vcpu masks

In order to support high vcpu numbers per guest don't use on stack
vcpu bitmasks. As all those currently used bitmasks are not used in
functions subject to recursion it is fairly easy to replace them with
percpu bitmasks.

Disable preemption while such a bitmask is being used in order to
avoid double usage in case we'd switch cpus.

Signed-off-by: Juergen Gross <[email protected]>
---
V2:
- use local_lock() instead of preempt_disable() (Paolo Bonzini)
---
arch/x86/include/asm/kvm_host.h | 10 ++++++++++
arch/x86/kvm/hyperv.c | 25 ++++++++++++++++++-------
arch/x86/kvm/irq_comm.c | 9 +++++++--
arch/x86/kvm/x86.c | 22 +++++++++++++++++++++-
4 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3513edee8e22..a809a9e4fa5c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -15,6 +15,7 @@
#include <linux/cpumask.h>
#include <linux/irq_work.h>
#include <linux/irq.h>
+#include <linux/local_lock.h>

#include <linux/kvm.h>
#include <linux/kvm_para.h>
@@ -1591,6 +1592,15 @@ extern bool kvm_has_bus_lock_exit;
/* maximum vcpu-id */
unsigned int kvm_max_vcpu_id(void);

+/* per cpu vcpu bitmasks, protected by kvm_pcpu_mask_lock */
+DECLARE_PER_CPU(local_lock_t, kvm_pcpu_mask_lock);
+extern unsigned long __percpu *kvm_pcpu_vcpu_mask;
+#define KVM_VCPU_MASK_SZ \
+ (sizeof(*kvm_pcpu_vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS))
+extern u64 __percpu *kvm_hv_vp_bitmap;
+#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
+#define KVM_HV_VPMAP_SZ (sizeof(u64) * KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
+
extern u64 kvm_mce_cap_supported;

/*
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 41d2a53c5dea..680743e43c5b 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -41,7 +41,7 @@
/* "Hv#1" signature */
#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648

-#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
+u64 __percpu *kvm_hv_vp_bitmap;

static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
bool vcpu_kick);
@@ -1701,8 +1701,7 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask(
struct kvm_vcpu *vcpu;
int i, bank, sbank = 0;

- memset(vp_bitmap, 0,
- KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
+ memset(vp_bitmap, 0, KVM_HV_VPMAP_SZ);
for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
vp_bitmap[bank] = sparse_banks[sbank++];
@@ -1740,8 +1739,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
struct hv_tlb_flush_ex flush_ex;
struct hv_tlb_flush flush;
- u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
- DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
+ u64 *vp_bitmap;
+ unsigned long *vcpu_bitmap;
unsigned long *vcpu_mask;
u64 valid_bank_mask;
u64 sparse_banks[64];
@@ -1821,6 +1820,10 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool

cpumask_clear(&hv_vcpu->tlb_flush);

+ local_lock(&kvm_pcpu_mask_lock);
+ vcpu_bitmap = this_cpu_ptr(kvm_pcpu_vcpu_mask);
+ vp_bitmap = this_cpu_ptr(kvm_hv_vp_bitmap);
+
vcpu_mask = all_cpus ? NULL :
sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
vp_bitmap, vcpu_bitmap);
@@ -1832,6 +1835,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
NULL, vcpu_mask, &hv_vcpu->tlb_flush);

+ local_unlock(&kvm_pcpu_mask_lock);
+
ret_success:
/* We always do full TLB flush, set 'Reps completed' = 'Rep Count' */
return (u64)HV_STATUS_SUCCESS |
@@ -1862,8 +1867,8 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
struct kvm *kvm = vcpu->kvm;
struct hv_send_ipi_ex send_ipi_ex;
struct hv_send_ipi send_ipi;
- u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
- DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
+ u64 *vp_bitmap;
+ unsigned long *vcpu_bitmap;
unsigned long *vcpu_mask;
unsigned long valid_bank_mask;
u64 sparse_banks[64];
@@ -1920,12 +1925,18 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
return HV_STATUS_INVALID_HYPERCALL_INPUT;

+ local_lock(&kvm_pcpu_mask_lock);
+ vcpu_bitmap = this_cpu_ptr(kvm_pcpu_vcpu_mask);
+ vp_bitmap = this_cpu_ptr(kvm_hv_vp_bitmap);
+
vcpu_mask = all_cpus ? NULL :
sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
vp_bitmap, vcpu_bitmap);

kvm_send_ipi_to_many(kvm, vector, vcpu_mask);

+ local_unlock(&kvm_pcpu_mask_lock);
+
ret_success:
return HV_STATUS_SUCCESS;
}
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index d5b72a08e566..c331204de007 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -47,7 +47,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
{
int i, r = -1;
struct kvm_vcpu *vcpu, *lowest = NULL;
- unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ unsigned long *dest_vcpu_bitmap;
unsigned int dest_vcpus = 0;

if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
@@ -59,7 +59,10 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
irq->delivery_mode = APIC_DM_FIXED;
}

- memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+ local_lock(&kvm_pcpu_mask_lock);
+ dest_vcpu_bitmap = this_cpu_ptr(kvm_pcpu_vcpu_mask);
+
+ memset(dest_vcpu_bitmap, 0, KVM_VCPU_MASK_SZ);

kvm_for_each_vcpu(i, vcpu, kvm) {
if (!kvm_apic_present(vcpu))
@@ -93,6 +96,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
lowest = kvm_get_vcpu(kvm, idx);
}

+ local_unlock(&kvm_pcpu_mask_lock);
+
if (lowest)
r = kvm_apic_set_irq(lowest, irq, dest_map);

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b6f38f0b617..fd19b72a5733 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -213,6 +213,10 @@ unsigned int kvm_max_vcpu_id(void)
}
EXPORT_SYMBOL_GPL(kvm_max_vcpu_id);

+DEFINE_PER_CPU(local_lock_t, kvm_pcpu_mask_lock) =
+ INIT_LOCAL_LOCK(kvm_pcpu_mask_lock);
+unsigned long __percpu *kvm_pcpu_vcpu_mask;
+
/*
* Restoring the host value for MSRs that are only consumed when running in
* usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
@@ -11029,9 +11033,18 @@ int kvm_arch_hardware_setup(void *opaque)
if (boot_cpu_has(X86_FEATURE_XSAVES))
rdmsrl(MSR_IA32_XSS, host_xss);

+ kvm_pcpu_vcpu_mask = __alloc_percpu(KVM_VCPU_MASK_SZ,
+ sizeof(unsigned long));
+ kvm_hv_vp_bitmap = __alloc_percpu(KVM_HV_VPMAP_SZ, sizeof(u64));
+
+ if (!kvm_pcpu_vcpu_mask || !kvm_hv_vp_bitmap) {
+ r = -ENOMEM;
+ goto err;
+ }
+
r = ops->hardware_setup();
if (r != 0)
- return r;
+ goto err;

memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
kvm_ops_static_call_update();
@@ -11059,11 +11072,18 @@ int kvm_arch_hardware_setup(void *opaque)

kvm_init_msr_list();
return 0;
+
+ err:
+ free_percpu(kvm_pcpu_vcpu_mask);
+ free_percpu(kvm_hv_vp_bitmap);
+ return r;
}

void kvm_arch_hardware_unsetup(void)
{
static_call(kvm_x86_hardware_unsetup)();
+ free_percpu(kvm_pcpu_vcpu_mask);
+ free_percpu(kvm_hv_vp_bitmap);
}

int kvm_arch_check_processor_compat(void *opaque)
--
2.26.2


2021-09-03 16:07:10

by Eduardo Habkost

[permalink] [raw]
Subject: Re: [PATCH v2 3/6] x86/kvm: introduce per cpu vcpu masks

On Fri, Sep 03, 2021 at 03:08:04PM +0200, Juergen Gross wrote:
> In order to support high vcpu numbers per guest don't use on stack
> vcpu bitmasks. As all those currently used bitmasks are not used in
> functions subject to recursion it is fairly easy to replace them with
> percpu bitmasks.
>
> Disable preemption while such a bitmask is being used in order to
> avoid double usage in case we'd switch cpus.
>
> Signed-off-by: Juergen Gross <[email protected]>

Note that there's another patch that will introduce another
KVM_MAX_VCPUS bitmap variable on the stack:
https://lore.kernel.org/lkml/[email protected]/

Considering that the patch is a bug fix, should this series be
rebased on top of that?

--
Eduardo

2021-09-06 04:37:25

by Juergen Gross

[permalink] [raw]
Subject: Re: [PATCH v2 3/6] x86/kvm: introduce per cpu vcpu masks

On 03.09.21 18:05, Eduardo Habkost wrote:
> On Fri, Sep 03, 2021 at 03:08:04PM +0200, Juergen Gross wrote:
>> In order to support high vcpu numbers per guest don't use on stack
>> vcpu bitmasks. As all those currently used bitmasks are not used in
>> functions subject to recursion it is fairly easy to replace them with
>> percpu bitmasks.
>>
>> Disable preemption while such a bitmask is being used in order to
>> avoid double usage in case we'd switch cpus.
>>
>> Signed-off-by: Juergen Gross <[email protected]>
>
> Note that there's another patch that will introduce another
> KVM_MAX_VCPUS bitmap variable on the stack:
> https://lore.kernel.org/lkml/[email protected]/
>
> Considering that the patch is a bug fix, should this series be
> rebased on top of that?
>

Yes, I can do that.


Juergen


Attachments:
OpenPGP_0xB0DE9DD628BF132F.asc (3.06 kB)
OpenPGP public key
OpenPGP_signature (505.00 B)
OpenPGP digital signature
Download all attachments

2021-09-07 18:59:10

by Eduardo Habkost

[permalink] [raw]
Subject: Re: [PATCH v2 3/6] x86/kvm: introduce per cpu vcpu masks

On Fri, Sep 03, 2021 at 03:08:04PM +0200, Juergen Gross wrote:
> In order to support high vcpu numbers per guest don't use on stack
> vcpu bitmasks. As all those currently used bitmasks are not used in
> functions subject to recursion it is fairly easy to replace them with
> percpu bitmasks.
>
> Disable preemption while such a bitmask is being used in order to
> avoid double usage in case we'd switch cpus.
>
> Signed-off-by: Juergen Gross <[email protected]>
> ---
> V2:
> - use local_lock() instead of preempt_disable() (Paolo Bonzini)
> ---
> arch/x86/include/asm/kvm_host.h | 10 ++++++++++
> arch/x86/kvm/hyperv.c | 25 ++++++++++++++++++-------
> arch/x86/kvm/irq_comm.c | 9 +++++++--
> arch/x86/kvm/x86.c | 22 +++++++++++++++++++++-
> 4 files changed, 56 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3513edee8e22..a809a9e4fa5c 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -15,6 +15,7 @@
> #include <linux/cpumask.h>
> #include <linux/irq_work.h>
> #include <linux/irq.h>
> +#include <linux/local_lock.h>
>
> #include <linux/kvm.h>
> #include <linux/kvm_para.h>
> @@ -1591,6 +1592,15 @@ extern bool kvm_has_bus_lock_exit;
> /* maximum vcpu-id */
> unsigned int kvm_max_vcpu_id(void);
>
> +/* per cpu vcpu bitmasks, protected by kvm_pcpu_mask_lock */
> +DECLARE_PER_CPU(local_lock_t, kvm_pcpu_mask_lock);
> +extern unsigned long __percpu *kvm_pcpu_vcpu_mask;
> +#define KVM_VCPU_MASK_SZ \
> + (sizeof(*kvm_pcpu_vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS))
> +extern u64 __percpu *kvm_hv_vp_bitmap;
> +#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
> +#define KVM_HV_VPMAP_SZ (sizeof(u64) * KVM_HV_MAX_SPARSE_VCPU_SET_BITS)

I have just realized that the Hyper-V sparse bitmap format can
support only up to 4096 CPUs, and the current implementation of
sparse_set_to_vcpu_mask() won't even work correctly if
KVM_MAX_VCPUS is larger than 4096.

This means vp_bitmap can't and will never be larger than 512
bytes. Isn't a per-CPU variable for vp_bitmap overkill in this
case?

> [...]

--
Eduardo

2021-09-08 09:35:35

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [PATCH v2 3/6] x86/kvm: introduce per cpu vcpu masks

Eduardo Habkost <[email protected]> writes:

> On Fri, Sep 03, 2021 at 03:08:04PM +0200, Juergen Gross wrote:
>> In order to support high vcpu numbers per guest don't use on stack
>> vcpu bitmasks. As all those currently used bitmasks are not used in
>> functions subject to recursion it is fairly easy to replace them with
>> percpu bitmasks.
>>
>> Disable preemption while such a bitmask is being used in order to
>> avoid double usage in case we'd switch cpus.
>>
>> Signed-off-by: Juergen Gross <[email protected]>
>> ---
>> V2:
>> - use local_lock() instead of preempt_disable() (Paolo Bonzini)
>> ---
>> arch/x86/include/asm/kvm_host.h | 10 ++++++++++
>> arch/x86/kvm/hyperv.c | 25 ++++++++++++++++++-------
>> arch/x86/kvm/irq_comm.c | 9 +++++++--
>> arch/x86/kvm/x86.c | 22 +++++++++++++++++++++-
>> 4 files changed, 56 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 3513edee8e22..a809a9e4fa5c 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -15,6 +15,7 @@
>> #include <linux/cpumask.h>
>> #include <linux/irq_work.h>
>> #include <linux/irq.h>
>> +#include <linux/local_lock.h>
>>
>> #include <linux/kvm.h>
>> #include <linux/kvm_para.h>
>> @@ -1591,6 +1592,15 @@ extern bool kvm_has_bus_lock_exit;
>> /* maximum vcpu-id */
>> unsigned int kvm_max_vcpu_id(void);
>>
>> +/* per cpu vcpu bitmasks, protected by kvm_pcpu_mask_lock */
>> +DECLARE_PER_CPU(local_lock_t, kvm_pcpu_mask_lock);
>> +extern unsigned long __percpu *kvm_pcpu_vcpu_mask;
>> +#define KVM_VCPU_MASK_SZ \
>> + (sizeof(*kvm_pcpu_vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS))
>> +extern u64 __percpu *kvm_hv_vp_bitmap;
>> +#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
>> +#define KVM_HV_VPMAP_SZ (sizeof(u64) * KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
>
> I have just realized that the Hyper-V sparse bitmap format can
> support only up to 4096 CPUs, and the current implementation of
> sparse_set_to_vcpu_mask() won't even work correctly if
> KVM_MAX_VCPUS is larger than 4096.

Nice catch! Indeed, we can only encode 64 'banks' with 64 vCPUs each. We
need to enforce the limit somehow. As a big hammer, I can suggest to
fail kvm_hv_vcpu_init() and write to HV_X64_MSR_VP_INDEX for vCPUs above
4095 for now (I seriously doubt there's real need to run such big
Windows guests anywhere but who knows).

>
> This means vp_bitmap can't and will never be larger than 512
> bytes. Isn't a per-CPU variable for vp_bitmap overkill in this
> case?

Yes, it's OK to allocate 512 bytes on stack.

--
Vitaly