Current kvm allocates 8 pages in advance for Posted Interrupt Descriptor
pointer (PID-pointer) table to accommodate vCPUs with APIC ID up to
KVM_MAX_VCPU_IDS - 1. This policy wastes some memory because most of
VMs have less than 512 vCPUs and then just need one page.
If user hypervisor specify max practical vcpu id prior to vCPU creation,
IPIv can allocate only essential memory for PID-pointer table and reduce
the memory footprint of VMs.
Suggested-by: Sean Christopherson <[email protected]>
Signed-off-by: Zeng Guang <[email protected]>
---
arch/x86/kvm/vmx/vmx.c | 45 ++++++++++++++++++++++++++++--------------
1 file changed, 30 insertions(+), 15 deletions(-)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0cb141c277ef..22bfb4953289 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -230,9 +230,6 @@ static const struct {
};
#define L1D_CACHE_ORDER 4
-
-/* PID(Posted-Interrupt Descriptor)-pointer table entry is 64-bit long */
-#define MAX_PID_TABLE_ORDER get_order(KVM_MAX_VCPU_IDS * sizeof(u64))
#define PID_TABLE_ENTRY_VALID 1
static void *vmx_l1d_flush_pages;
@@ -4434,6 +4431,24 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
return exec_control;
}
+static int vmx_alloc_pid_table(struct kvm_vmx *kvm_vmx)
+{
+ struct page *pages;
+
+ if(kvm_vmx->pid_table)
+ return 0;
+
+ pages = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(kvm_vmx->kvm.arch.max_vcpu_id * sizeof(u64)));
+
+ if (!pages)
+ return -ENOMEM;
+
+ kvm_vmx->pid_table = (void *)page_address(pages);
+ kvm_vmx->pid_last_index = kvm_vmx->kvm.arch.max_vcpu_id - 1;
+ return 0;
+}
+
#define VMX_XSS_EXIT_BITMAP 0
static void init_vmcs(struct vcpu_vmx *vmx)
@@ -7159,6 +7174,16 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
goto free_vmcs;
}
+ if (enable_ipiv && kvm_vcpu_apicv_active(vcpu)) {
+ struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
+
+ mutex_lock(&vcpu->kvm->lock);
+ err = vmx_alloc_pid_table(kvm_vmx);
+ mutex_unlock(&vcpu->kvm->lock);
+ if (err)
+ goto free_vmcs;
+ }
+
return 0;
free_vmcs:
@@ -7202,17 +7227,6 @@ static int vmx_vm_init(struct kvm *kvm)
}
}
- if (enable_ipiv) {
- struct page *pages;
-
- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PID_TABLE_ORDER);
- if (!pages)
- return -ENOMEM;
-
- to_kvm_vmx(kvm)->pid_table = (void *)page_address(pages);
- to_kvm_vmx(kvm)->pid_last_index = KVM_MAX_VCPU_IDS - 1;
- }
-
return 0;
}
@@ -7809,7 +7823,8 @@ static void vmx_vm_destroy(struct kvm *kvm)
struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
if (kvm_vmx->pid_table)
- free_pages((unsigned long)kvm_vmx->pid_table, MAX_PID_TABLE_ORDER);
+ free_pages((unsigned long)kvm_vmx->pid_table,
+ get_order((kvm_vmx->pid_last_index + 1) * sizeof(u64)));
}
static struct kvm_x86_ops vmx_x86_ops __initdata = {
--
2.27.0
On Fri, 2022-02-25 at 16:22 +0800, Zeng Guang wrote:
> Current kvm allocates 8 pages in advance for Posted Interrupt Descriptor
> pointer (PID-pointer) table to accommodate vCPUs with APIC ID up to
> KVM_MAX_VCPU_IDS - 1. This policy wastes some memory because most of
> VMs have less than 512 vCPUs and then just need one page.
>
> If user hypervisor specify max practical vcpu id prior to vCPU creation,
> IPIv can allocate only essential memory for PID-pointer table and reduce
> the memory footprint of VMs.
>
> Suggested-by: Sean Christopherson <[email protected]>
> Signed-off-by: Zeng Guang <[email protected]>
> ---
> arch/x86/kvm/vmx/vmx.c | 45 ++++++++++++++++++++++++++++--------------
> 1 file changed, 30 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 0cb141c277ef..22bfb4953289 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -230,9 +230,6 @@ static const struct {
> };
>
> #define L1D_CACHE_ORDER 4
> -
> -/* PID(Posted-Interrupt Descriptor)-pointer table entry is 64-bit long */
> -#define MAX_PID_TABLE_ORDER get_order(KVM_MAX_VCPU_IDS * sizeof(u64))
> #define PID_TABLE_ENTRY_VALID 1
>
> static void *vmx_l1d_flush_pages;
> @@ -4434,6 +4431,24 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
> return exec_control;
> }
>
> +static int vmx_alloc_pid_table(struct kvm_vmx *kvm_vmx)
> +{
> + struct page *pages;
> +
> + if(kvm_vmx->pid_table)
> + return 0;
> +
> + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO,
> + get_order(kvm_vmx->kvm.arch.max_vcpu_id * sizeof(u64)));
> +
> + if (!pages)
> + return -ENOMEM;
> +
> + kvm_vmx->pid_table = (void *)page_address(pages);
> + kvm_vmx->pid_last_index = kvm_vmx->kvm.arch.max_vcpu_id - 1;
> + return 0;
> +}
> +
> #define VMX_XSS_EXIT_BITMAP 0
>
> static void init_vmcs(struct vcpu_vmx *vmx)
> @@ -7159,6 +7174,16 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
> goto free_vmcs;
> }
>
> + if (enable_ipiv && kvm_vcpu_apicv_active(vcpu)) {
> + struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
> +
> + mutex_lock(&vcpu->kvm->lock);
> + err = vmx_alloc_pid_table(kvm_vmx);
> + mutex_unlock(&vcpu->kvm->lock);
> + if (err)
> + goto free_vmcs;
> + }
This could be dangerous. If APICv is temporary inhibited,
this code won't run and we will end up without PID table.
I think that kvm_vcpu_apicv_active should be just dropped
from this condition.
Best regards,
Maxim Levitsky
> +
> return 0;
>
> free_vmcs:
> @@ -7202,17 +7227,6 @@ static int vmx_vm_init(struct kvm *kvm)
> }
> }
>
> - if (enable_ipiv) {
> - struct page *pages;
> -
> - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PID_TABLE_ORDER);
> - if (!pages)
> - return -ENOMEM;
> -
> - to_kvm_vmx(kvm)->pid_table = (void *)page_address(pages);
> - to_kvm_vmx(kvm)->pid_last_index = KVM_MAX_VCPU_IDS - 1;
> - }
> -
> return 0;
> }
>
> @@ -7809,7 +7823,8 @@ static void vmx_vm_destroy(struct kvm *kvm)
> struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
>
> if (kvm_vmx->pid_table)
> - free_pages((unsigned long)kvm_vmx->pid_table, MAX_PID_TABLE_ORDER);
> + free_pages((unsigned long)kvm_vmx->pid_table,
> + get_order((kvm_vmx->pid_last_index + 1) * sizeof(u64)));
> }
>
> static struct kvm_x86_ops vmx_x86_ops __initdata = {
On Fri, Feb 25, 2022 at 07:29:39PM +0200, Maxim Levitsky wrote:
>On Fri, 2022-02-25 at 16:22 +0800, Zeng Guang wrote:
>> Current kvm allocates 8 pages in advance for Posted Interrupt Descriptor
>> pointer (PID-pointer) table to accommodate vCPUs with APIC ID up to
>> KVM_MAX_VCPU_IDS - 1. This policy wastes some memory because most of
>> VMs have less than 512 vCPUs and then just need one page.
>>
>> If user hypervisor specify max practical vcpu id prior to vCPU creation,
>> IPIv can allocate only essential memory for PID-pointer table and reduce
>> the memory footprint of VMs.
>>
>> Suggested-by: Sean Christopherson <[email protected]>
>> Signed-off-by: Zeng Guang <[email protected]>
>> ---
>> arch/x86/kvm/vmx/vmx.c | 45 ++++++++++++++++++++++++++++--------------
>> 1 file changed, 30 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index 0cb141c277ef..22bfb4953289 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -230,9 +230,6 @@ static const struct {
>> };
>>
>> #define L1D_CACHE_ORDER 4
>> -
>> -/* PID(Posted-Interrupt Descriptor)-pointer table entry is 64-bit long */
>> -#define MAX_PID_TABLE_ORDER get_order(KVM_MAX_VCPU_IDS * sizeof(u64))
>> #define PID_TABLE_ENTRY_VALID 1
>>
>> static void *vmx_l1d_flush_pages;
>> @@ -4434,6 +4431,24 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
>> return exec_control;
>> }
>>
>> +static int vmx_alloc_pid_table(struct kvm_vmx *kvm_vmx)
>> +{
>> + struct page *pages;
>> +
>> + if(kvm_vmx->pid_table)
>> + return 0;
>> +
>> + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO,
>> + get_order(kvm_vmx->kvm.arch.max_vcpu_id * sizeof(u64)));
>> +
>> + if (!pages)
>> + return -ENOMEM;
>> +
>> + kvm_vmx->pid_table = (void *)page_address(pages);
>> + kvm_vmx->pid_last_index = kvm_vmx->kvm.arch.max_vcpu_id - 1;
>> + return 0;
>> +}
>> +
>> #define VMX_XSS_EXIT_BITMAP 0
>>
>> static void init_vmcs(struct vcpu_vmx *vmx)
>> @@ -7159,6 +7174,16 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
>> goto free_vmcs;
>> }
>>
>> + if (enable_ipiv && kvm_vcpu_apicv_active(vcpu)) {
>> + struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
>> +
>> + mutex_lock(&vcpu->kvm->lock);
>> + err = vmx_alloc_pid_table(kvm_vmx);
>> + mutex_unlock(&vcpu->kvm->lock);
>> + if (err)
>> + goto free_vmcs;
>> + }
>
>This could be dangerous. If APICv is temporary inhibited,
>this code won't run and we will end up without PID table.
>
>I think that kvm_vcpu_apicv_active should be just dropped
>from this condition.
Agreed. Will fix it.