2020-09-03 02:00:24

by Haiwei Li

[permalink] [raw]
Subject: [PATCH v2] KVM: Check the allocation of pv cpu mask

From: Haiwei Li <[email protected]>

check the allocation of per-cpu __pv_cpu_mask. Initialize ops only when
successful.

Signed-off-by: Haiwei Li <[email protected]>
---
arch/x86/kernel/kvm.c | 24 ++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 08320b0b2b27..d3c062e551d7 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -555,7 +555,6 @@ static void kvm_send_ipi_mask_allbutself(const
struct cpumask *mask, int vector)
static void kvm_setup_pv_ipi(void)
{
apic->send_IPI_mask = kvm_send_ipi_mask;
- apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
pr_info("setup PV IPIs\n");
}

@@ -654,7 +653,6 @@ static void __init kvm_guest_init(void)
}

if (pv_tlb_flush_supported()) {
- pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
pv_ops.mmu.tlb_remove_table = tlb_remove_table;
pr_info("KVM setup pv remote TLB flush\n");
}
@@ -767,6 +765,14 @@ static __init int activate_jump_labels(void)
}
arch_initcall(activate_jump_labels);

+static void kvm_free_pv_cpu_mask(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu)
+ free_cpumask_var(per_cpu(__pv_cpu_mask, cpu));
+}
+
static __init int kvm_alloc_cpumask(void)
{
int cpu;
@@ -785,11 +791,21 @@ static __init int kvm_alloc_cpumask(void)

if (alloc)
for_each_possible_cpu(cpu) {
- zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
- GFP_KERNEL, cpu_to_node(cpu));
+ if (!zalloc_cpumask_var_node(
+ per_cpu_ptr(&__pv_cpu_mask, cpu),
+ GFP_KERNEL, cpu_to_node(cpu)))
+ goto zalloc_cpumask_fail;
}

+#if defined(CONFIG_SMP)
+ apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
+#endif
+ pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
return 0;
+
+zalloc_cpumask_fail:
+ kvm_free_pv_cpu_mask();
+ return -ENOMEM;
}
arch_initcall(kvm_alloc_cpumask);

--
2.18.4


2020-09-03 10:42:11

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: Check the allocation of pv cpu mask

Haiwei Li <[email protected]> writes:

> From: Haiwei Li <[email protected]>
>
> check the allocation of per-cpu __pv_cpu_mask. Initialize ops only when
> successful.
>
> Signed-off-by: Haiwei Li <[email protected]>
> ---
> arch/x86/kernel/kvm.c | 24 ++++++++++++++++++++----
> 1 file changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 08320b0b2b27..d3c062e551d7 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -555,7 +555,6 @@ static void kvm_send_ipi_mask_allbutself(const
> struct cpumask *mask, int vector)
> static void kvm_setup_pv_ipi(void)
> {
> apic->send_IPI_mask = kvm_send_ipi_mask;
> - apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
> pr_info("setup PV IPIs\n");
> }
>
> @@ -654,7 +653,6 @@ static void __init kvm_guest_init(void)
> }
>
> if (pv_tlb_flush_supported()) {
> - pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
> pv_ops.mmu.tlb_remove_table = tlb_remove_table;
> pr_info("KVM setup pv remote TLB flush\n");
> }
> @@ -767,6 +765,14 @@ static __init int activate_jump_labels(void)
> }
> arch_initcall(activate_jump_labels);
>
> +static void kvm_free_pv_cpu_mask(void)
> +{
> + unsigned int cpu;
> +
> + for_each_possible_cpu(cpu)
> + free_cpumask_var(per_cpu(__pv_cpu_mask, cpu));
> +}
> +
> static __init int kvm_alloc_cpumask(void)
> {
> int cpu;
> @@ -785,11 +791,21 @@ static __init int kvm_alloc_cpumask(void)
>
> if (alloc)
> for_each_possible_cpu(cpu) {
> - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
> - GFP_KERNEL, cpu_to_node(cpu));
> + if (!zalloc_cpumask_var_node(
> + per_cpu_ptr(&__pv_cpu_mask, cpu),
> + GFP_KERNEL, cpu_to_node(cpu)))
> + goto zalloc_cpumask_fail;
> }
>
> +#if defined(CONFIG_SMP)
> + apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
> +#endif
> + pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;

This is too late I'm afraid. If I'm not mistaken PV patching happens
earlier, so .init.guest_late_init (kvm_guest_init()) is good and
arch_initcall() is bad.

Have you checked that with this patch kvm_flush_tlb_others() is still
being called?

Actually, there is no need to assign kvm_flush_tlb_others() so late. We
can always check if __pv_cpu_mask was allocated and revert back to the
architectural path if not.

> return 0;
> +
> +zalloc_cpumask_fail:
> + kvm_free_pv_cpu_mask();
> + return -ENOMEM;
> }
> arch_initcall(kvm_alloc_cpumask);
>
> --
> 2.18.4
>

--
Vitaly

2020-09-04 01:03:30

by Haiwei Li

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: Check the allocation of pv cpu mask



On 20/9/3 18:39, Vitaly Kuznetsov wrote:
> Haiwei Li <[email protected]> writes:
>
>> From: Haiwei Li <[email protected]>
>>
>> check the allocation of per-cpu __pv_cpu_mask. Initialize ops only when
>> successful.
>>
>> Signed-off-by: Haiwei Li <[email protected]>
>> ---
>> arch/x86/kernel/kvm.c | 24 ++++++++++++++++++++----
>> 1 file changed, 20 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>> index 08320b0b2b27..d3c062e551d7 100644
>> --- a/arch/x86/kernel/kvm.c
>> +++ b/arch/x86/kernel/kvm.c
>> @@ -555,7 +555,6 @@ static void kvm_send_ipi_mask_allbutself(const
>> struct cpumask *mask, int vector)
>> static void kvm_setup_pv_ipi(void)
>> {
>> apic->send_IPI_mask = kvm_send_ipi_mask;
>> - apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
>> pr_info("setup PV IPIs\n");
>> }
>>
>> @@ -654,7 +653,6 @@ static void __init kvm_guest_init(void)
>> }
>>
>> if (pv_tlb_flush_supported()) {
>> - pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
>> pv_ops.mmu.tlb_remove_table = tlb_remove_table;
>> pr_info("KVM setup pv remote TLB flush\n");
>> }
>> @@ -767,6 +765,14 @@ static __init int activate_jump_labels(void)
>> }
>> arch_initcall(activate_jump_labels);
>>
>> +static void kvm_free_pv_cpu_mask(void)
>> +{
>> + unsigned int cpu;
>> +
>> + for_each_possible_cpu(cpu)
>> + free_cpumask_var(per_cpu(__pv_cpu_mask, cpu));
>> +}
>> +
>> static __init int kvm_alloc_cpumask(void)
>> {
>> int cpu;
>> @@ -785,11 +791,21 @@ static __init int kvm_alloc_cpumask(void)
>>
>> if (alloc)
>> for_each_possible_cpu(cpu) {
>> - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
>> - GFP_KERNEL, cpu_to_node(cpu));
>> + if (!zalloc_cpumask_var_node(
>> + per_cpu_ptr(&__pv_cpu_mask, cpu),
>> + GFP_KERNEL, cpu_to_node(cpu)))
>> + goto zalloc_cpumask_fail;
>> }
>>
>> +#if defined(CONFIG_SMP)
>> + apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
>> +#endif
>> + pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
>
> This is too late I'm afraid. If I'm not mistaken PV patching happens
> earlier, so .init.guest_late_init (kvm_guest_init()) is good and
> arch_initcall() is bad.

.init.guest_late_init (kvm_guest_init()) is called before
arch_initcall() and kvm_flush_tlb_others && kvm_send_ipi_mask_allbutself
rely on __pv_cpu_mask. So, i can not put this assign in kvm_guest_init().

>
> Have you checked that with this patch kvm_flush_tlb_others() is still
> being called?

yes. I add a printk and i get the log.

>
> Actually, there is no need to assign kvm_flush_tlb_others() so late. We
> can always check if __pv_cpu_mask was allocated and revert back to the
> architectural path if not.
I am sorry i don't really understand. Can you explain in more detail? Thx.

>
>> return 0;
>> +
>> +zalloc_cpumask_fail:
>> + kvm_free_pv_cpu_mask();
>> + return -ENOMEM;
>> }
>> arch_initcall(kvm_alloc_cpumask);
>>
>> --
>> 2.18.4
>>
>

2020-09-04 09:54:52

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: Check the allocation of pv cpu mask

Haiwei Li <[email protected]> writes:

> On 20/9/3 18:39, Vitaly Kuznetsov wrote:
>> Haiwei Li <[email protected]> writes:
>>
>>> From: Haiwei Li <[email protected]>
>>>
>>> check the allocation of per-cpu __pv_cpu_mask. Initialize ops only when
>>> successful.
>>>
>>> Signed-off-by: Haiwei Li <[email protected]>
>>> ---
>>> arch/x86/kernel/kvm.c | 24 ++++++++++++++++++++----
>>> 1 file changed, 20 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>>> index 08320b0b2b27..d3c062e551d7 100644
>>> --- a/arch/x86/kernel/kvm.c
>>> +++ b/arch/x86/kernel/kvm.c
>>> @@ -555,7 +555,6 @@ static void kvm_send_ipi_mask_allbutself(const
>>> struct cpumask *mask, int vector)
>>> static void kvm_setup_pv_ipi(void)
>>> {
>>> apic->send_IPI_mask = kvm_send_ipi_mask;
>>> - apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
>>> pr_info("setup PV IPIs\n");
>>> }
>>>
>>> @@ -654,7 +653,6 @@ static void __init kvm_guest_init(void)
>>> }
>>>
>>> if (pv_tlb_flush_supported()) {
>>> - pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
>>> pv_ops.mmu.tlb_remove_table = tlb_remove_table;
>>> pr_info("KVM setup pv remote TLB flush\n");
>>> }
>>> @@ -767,6 +765,14 @@ static __init int activate_jump_labels(void)
>>> }
>>> arch_initcall(activate_jump_labels);
>>>
>>> +static void kvm_free_pv_cpu_mask(void)
>>> +{
>>> + unsigned int cpu;
>>> +
>>> + for_each_possible_cpu(cpu)
>>> + free_cpumask_var(per_cpu(__pv_cpu_mask, cpu));
>>> +}
>>> +
>>> static __init int kvm_alloc_cpumask(void)
>>> {
>>> int cpu;
>>> @@ -785,11 +791,21 @@ static __init int kvm_alloc_cpumask(void)
>>>
>>> if (alloc)
>>> for_each_possible_cpu(cpu) {
>>> - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
>>> - GFP_KERNEL, cpu_to_node(cpu));
>>> + if (!zalloc_cpumask_var_node(
>>> + per_cpu_ptr(&__pv_cpu_mask, cpu),
>>> + GFP_KERNEL, cpu_to_node(cpu)))
>>> + goto zalloc_cpumask_fail;
>>> }
>>>
>>> +#if defined(CONFIG_SMP)
>>> + apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
>>> +#endif
>>> + pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
>>
>> This is too late I'm afraid. If I'm not mistaken PV patching happens
>> earlier, so .init.guest_late_init (kvm_guest_init()) is good and
>> arch_initcall() is bad.
>
> .init.guest_late_init (kvm_guest_init()) is called before
> arch_initcall() and kvm_flush_tlb_others && kvm_send_ipi_mask_allbutself
> rely on __pv_cpu_mask. So, i can not put this assign in kvm_guest_init().
>
>>
>> Have you checked that with this patch kvm_flush_tlb_others() is still
>> being called?
>
> yes. I add a printk and i get the log.
>

This is weird. I do the following on top of your patch:

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d3c062e551d7..f441209ff0a4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -620,6 +620,8 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
struct kvm_steal_time *src;
struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);

+ trace_printk("PV TLB flush %d CPUs\n", cpumask_weight(cpumask));
+
cpumask_copy(flushmask, cpumask);
/*
* We have to call flush only on online vCPUs. And

With your patch I don't see any calls:

# grep -c -v '^#' /sys/kernel/debug/tracing/trace
0

with your patch reverted I see them:

# grep -c -v '^#' /sys/kernel/debug/tracing/trace
4571


>>
>> Actually, there is no need to assign kvm_flush_tlb_others() so late. We
>> can always check if __pv_cpu_mask was allocated and revert back to the
>> architectural path if not.
> I am sorry i don't really understand. Can you explain in more detail? Thx.
>

I mean we can always call e.g. kvm_flush_tlb_others(), even if (very
unlikely) the mask wasn't allocated. We just need to check for
that. Something like (completely untested):

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d3c062e551d7..e3676cdee6a2 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -620,6 +620,11 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
struct kvm_steal_time *src;
struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);

+ if (unlikely(!flushmask)) {
+ flushmask = cpumask;
+ goto do_native;
+ }
+
cpumask_copy(flushmask, cpumask);
/*
* We have to call flush only on online vCPUs. And
@@ -635,6 +640,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
}
}

+do_native:
native_flush_tlb_others(flushmask, info);
}


>>
>>> return 0;
>>> +
>>> +zalloc_cpumask_fail:
>>> + kvm_free_pv_cpu_mask();
>>> + return -ENOMEM;
>>> }
>>> arch_initcall(kvm_alloc_cpumask);
>>>
>>> --
>>> 2.18.4
>>>
>>
>

--
Vitaly

2020-09-04 12:23:24

by Haiwei Li

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: Check the allocation of pv cpu mask



On 20/9/4 17:53, Vitaly Kuznetsov wrote:
> Haiwei Li <[email protected]> writes:
>
>> On 20/9/3 18:39, Vitaly Kuznetsov wrote:
>>> Haiwei Li <[email protected]> writes:
>>>
>>>> From: Haiwei Li <[email protected]>
>>>>
>>>> check the allocation of per-cpu __pv_cpu_mask. Initialize ops only when
>>>> successful.
>>>>
>>>> Signed-off-by: Haiwei Li <[email protected]>
>>>> ---
>>>> arch/x86/kernel/kvm.c | 24 ++++++++++++++++++++----
>>>> 1 file changed, 20 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>>>> index 08320b0b2b27..d3c062e551d7 100644
>>>> --- a/arch/x86/kernel/kvm.c
>>>> +++ b/arch/x86/kernel/kvm.c
>>>> @@ -555,7 +555,6 @@ static void kvm_send_ipi_mask_allbutself(const
>>>> struct cpumask *mask, int vector)
>>>> static void kvm_setup_pv_ipi(void)
>>>> {
>>>> apic->send_IPI_mask = kvm_send_ipi_mask;
>>>> - apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
>>>> pr_info("setup PV IPIs\n");
>>>> }
>>>>
>>>> @@ -654,7 +653,6 @@ static void __init kvm_guest_init(void)
>>>> }
>>>>
>>>> if (pv_tlb_flush_supported()) {
>>>> - pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
>>>> pv_ops.mmu.tlb_remove_table = tlb_remove_table;
>>>> pr_info("KVM setup pv remote TLB flush\n");
>>>> }
>>>> @@ -767,6 +765,14 @@ static __init int activate_jump_labels(void)
>>>> }
>>>> arch_initcall(activate_jump_labels);
>>>>
>>>> +static void kvm_free_pv_cpu_mask(void)
>>>> +{
>>>> + unsigned int cpu;
>>>> +
>>>> + for_each_possible_cpu(cpu)
>>>> + free_cpumask_var(per_cpu(__pv_cpu_mask, cpu));
>>>> +}
>>>> +
>>>> static __init int kvm_alloc_cpumask(void)
>>>> {
>>>> int cpu;
>>>> @@ -785,11 +791,21 @@ static __init int kvm_alloc_cpumask(void)
>>>>
>>>> if (alloc)
>>>> for_each_possible_cpu(cpu) {
>>>> - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
>>>> - GFP_KERNEL, cpu_to_node(cpu));
>>>> + if (!zalloc_cpumask_var_node(
>>>> + per_cpu_ptr(&__pv_cpu_mask, cpu),
>>>> + GFP_KERNEL, cpu_to_node(cpu)))
>>>> + goto zalloc_cpumask_fail;
>>>> }
>>>>
>>>> +#if defined(CONFIG_SMP)
>>>> + apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
>>>> +#endif
>>>> + pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
>>>
>>> This is too late I'm afraid. If I'm not mistaken PV patching happens
>>> earlier, so .init.guest_late_init (kvm_guest_init()) is good and
>>> arch_initcall() is bad.
>>
>> .init.guest_late_init (kvm_guest_init()) is called before
>> arch_initcall() and kvm_flush_tlb_others && kvm_send_ipi_mask_allbutself
>> rely on __pv_cpu_mask. So, i can not put this assign in kvm_guest_init().
>>
>>>
>>> Have you checked that with this patch kvm_flush_tlb_others() is still
>>> being called?
>>
>> yes. I add a printk and i get the log.
>>
>
> This is weird. I do the following on top of your patch:
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index d3c062e551d7..f441209ff0a4 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -620,6 +620,8 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
> struct kvm_steal_time *src;
> struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
>
> + trace_printk("PV TLB flush %d CPUs\n", cpumask_weight(cpumask));
> +
> cpumask_copy(flushmask, cpumask);
> /*
> * We have to call flush only on online vCPUs. And
>
> With your patch I don't see any calls:
>
> # grep -c -v '^#' /sys/kernel/debug/tracing/trace
> 0
>
> with your patch reverted I see them:
>
> # grep -c -v '^#' /sys/kernel/debug/tracing/trace
> 4571

I just retested. You are right. I'm sorry.

>
>
>>>
>>> Actually, there is no need to assign kvm_flush_tlb_others() so late. We
>>> can always check if __pv_cpu_mask was allocated and revert back to the
>>> architectural path if not.
>> I am sorry i don't really understand. Can you explain in more detail? Thx.
>>
>
> I mean we can always call e.g. kvm_flush_tlb_others(), even if (very
> unlikely) the mask wasn't allocated. We just need to check for
> that. Something like (completely untested):
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index d3c062e551d7..e3676cdee6a2 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -620,6 +620,11 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
> struct kvm_steal_time *src;
> struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
>
> + if (unlikely(!flushmask)) {
> + flushmask = cpumask;
> + goto do_native;
> + }
> +

I see. I appreciate your patience and kindness.

I will send a new version.

> cpumask_copy(flushmask, cpumask);
> /*
> * We have to call flush only on online vCPUs. And
> @@ -635,6 +640,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
> }
> }
>
> +do_native:
> native_flush_tlb_others(flushmask, info);
> }
>
>
>>>
>>>> return 0;
>>>> +
>>>> +zalloc_cpumask_fail:
>>>> + kvm_free_pv_cpu_mask();
>>>> + return -ENOMEM;
>>>> }
>>>> arch_initcall(kvm_alloc_cpumask);
>>>>
>>>> --
>>>> 2.18.4
>>>>
>>>
>>
>