LinuxLists.cc - [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS

2018-06-15 16:32:23

Subject: [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} hypercalls when possible

While working on Hyper-V style PV TLB flush support in KVM I noticed that
real Windows guests use TLB flush hypercall in a somewhat smarter way: when
the flush needs to be performed on a subset of first 64 vCPUs or on all
present vCPUs Windows avoids more expensive hypercalls which support
sparse CPU sets and uses their 'cheap' counterparts. This means that
HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED name is actually a misnomer: EX
hypercalls (which support sparse CPU sets) are "available", not
"recommended". This makes sense as they are actually harder to parse.

Nothing stops us from being equally 'smart' in Linux too. Switch to
doing cheaper hypercalls whenever possible.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
arch/x86/hyperv/mmu.c | 68 ++++++++++++++++++++-------------------------------
1 file changed, 27 insertions(+), 41 deletions(-)

diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index de27615c51ea..7519a44b462e 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -16,6 +16,8 @@
/* Each gva in gva_list encodes up to 4096 pages to flush */
#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)

+static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
+ const struct flush_tlb_info *info);

/*
* Fills in gva_list starting from offset. Returns the number of items added.
@@ -93,10 +95,19 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
if (cpumask_equal(cpus, cpu_present_mask)) {
flush->flags |= HV_FLUSH_ALL_PROCESSORS;
} else {
+ /*
+ * It is highly likely that VP ids are in ascending order
+ * matching Linux CPU ids; Check VP index for the highest CPU
+ * in the supplied set to see if EX hypercall is required.
+ * This is just a best guess but should work most of the time.
+ */
+ if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64)
+ goto do_ex_hypercall;
+
for_each_cpu(cpu, cpus) {
vcpu = hv_cpu_number_to_vp_number(cpu);
if (vcpu >= 64)
- goto do_native;
+ goto do_ex_hypercall;

__set_bit(vcpu, (unsigned long *)
&flush->processor_mask);
@@ -123,7 +134,12 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
gva_n, 0, flush, NULL);
}
+ goto check_status;
+
+do_ex_hypercall:
+ status = hyperv_flush_tlb_others_ex(cpus, info);

+check_status:
local_irq_restore(flags);

if (!(status & HV_HYPERCALL_RESULT_MASK))
@@ -132,35 +148,22 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
native_flush_tlb_others(cpus, info);
}

-static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
- const struct flush_tlb_info *info)
+static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
+ const struct flush_tlb_info *info)
{
int nr_bank = 0, max_gvas, gva_n;
struct hv_tlb_flush_ex **flush_pcpu;
struct hv_tlb_flush_ex *flush;
- u64 status = U64_MAX;
- unsigned long flags;
-
- trace_hyperv_mmu_flush_tlb_others(cpus, info);
+ u64 status;

- if (!hv_hypercall_pg)
- goto do_native;
-
- if (cpumask_empty(cpus))
- return;
-
- local_irq_save(flags);
+ if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+ return U64_MAX;

flush_pcpu = (struct hv_tlb_flush_ex **)
this_cpu_ptr(hyperv_pcpu_input_arg);

flush = *flush_pcpu;

- if (unlikely(!flush)) {
- local_irq_restore(flags);
- goto do_native;
- }
-
if (info->mm) {
/*
* AddressSpace argument must match the CR3 with PCID bits
@@ -176,15 +179,8 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,

flush->hv_vp_set.valid_bank_mask = 0;

- if (!cpumask_equal(cpus, cpu_present_mask)) {
- flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
- }
-
- if (!nr_bank) {
- flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
- flush->flags |= HV_FLUSH_ALL_PROCESSORS;
- }
+ flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);

/*
* We can flush not more than max_gvas with one hypercall. Flush the
@@ -213,12 +209,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
gva_n, nr_bank, flush, NULL);
}

- local_irq_restore(flags);
-
- if (!(status & HV_HYPERCALL_RESULT_MASK))
- return;
-do_native:
- native_flush_tlb_others(cpus, info);
+ return status;
}

void hyperv_setup_mmu_ops(void)
@@ -226,11 +217,6 @@ void hyperv_setup_mmu_ops(void)
if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
return;

- if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
- pr_info("Using hypercall for remote TLB flush\n");
- pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
- } else {
- pr_info("Using ext hypercall for remote TLB flush\n");
- pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
- }
+ pr_info("Using hypercall for remote TLB flush\n");
+ pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
}
--
2.14.4

2018-06-19 12:06:06

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} hypercalls when possible

On Fri, 15 Jun 2018, Vitaly Kuznetsov wrote:
> * Fills in gva_list starting from offset. Returns the number of items added.
> @@ -93,10 +95,19 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
> if (cpumask_equal(cpus, cpu_present_mask)) {
> flush->flags |= HV_FLUSH_ALL_PROCESSORS;
> } else {
> + /*
> + * It is highly likely that VP ids are in ascending order
> + * matching Linux CPU ids; Check VP index for the highest CPU
> + * in the supplied set to see if EX hypercall is required.
> + * This is just a best guess but should work most of the time.

TLB flushing based on 'best guess' and 'should work most of the time' is
not a brilliant approach.

Thanks,

tglx

2018-06-19 13:00:33

by Vitaly Kuznetsov

[permalink] [raw]

Subject: Re: [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST, SPACE} hypercalls when possible

Thomas Gleixner <[email protected]> writes:

> On Fri, 15 Jun 2018, Vitaly Kuznetsov wrote:
>> * Fills in gva_list starting from offset. Returns the number of items added.
>> @@ -93,10 +95,19 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
>> if (cpumask_equal(cpus, cpu_present_mask)) {
>> flush->flags |= HV_FLUSH_ALL_PROCESSORS;
>> } else {
>> + /*
>> + * It is highly likely that VP ids are in ascending order
>> + * matching Linux CPU ids; Check VP index for the highest CPU
>> + * in the supplied set to see if EX hypercall is required.
>> + * This is just a best guess but should work most of the time.
>
> TLB flushing based on 'best guess' and 'should work most of the time' is
> not a brilliant approach.
>

Oh no no no, that's not what I meant :-)

We have the following problem: from the supplied CPU set we need to
figure out if we can get away with HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,
SPACE} hypercalls which are cheaper or if we need to use more expensing
HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST, SPACE}_EX ones. The dividing line is
the highest VP_INDEX of the supplied CPU set: in case it is < 64 cheaper
hypercalls are OK. Now how do we check that? In the patch I have the
following approach:
1) Check VP number for the highest CPU in the supplied set. In case it
is > 64 we for sure need more expensive hypercalls. This is the "guess"
which works most of the time because Linux CPU ids usually match
VP_INDEXes.

2) In case the answer to the previous question was negative we start
preparing input for the cheaper hypercall. However, if while walking the
CPU set we meet a CPU with VP_INDEX higher than 64 we'll discard the
prepared input and switch to the more expensive hypercall.

Said that the 'guess' here is just an optimization to avoid walking the
whole CPU set when we find the required answer quickly by looking at the
highest bit. This will help big systems with hundreds of CPUs.

--
Vitaly

2018-06-19 13:07:01

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST, SPACE} hypercalls when possible

On Tue, 19 Jun 2018, Vitaly Kuznetsov wrote:
> Thomas Gleixner <[email protected]> writes:
>
> > On Fri, 15 Jun 2018, Vitaly Kuznetsov wrote:
> >> * Fills in gva_list starting from offset. Returns the number of items added.
> >> @@ -93,10 +95,19 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
> >> if (cpumask_equal(cpus, cpu_present_mask)) {
> >> flush->flags |= HV_FLUSH_ALL_PROCESSORS;
> >> } else {
> >> + /*
> >> + * It is highly likely that VP ids are in ascending order
> >> + * matching Linux CPU ids; Check VP index for the highest CPU
> >> + * in the supplied set to see if EX hypercall is required.
> >> + * This is just a best guess but should work most of the time.
> >
> > TLB flushing based on 'best guess' and 'should work most of the time' is
> > not a brilliant approach.
> >
>
> Oh no no no, that's not what I meant :-)
>
> We have the following problem: from the supplied CPU set we need to
> figure out if we can get away with HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,
> SPACE} hypercalls which are cheaper or if we need to use more expensing
> HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST, SPACE}_EX ones. The dividing line is
> the highest VP_INDEX of the supplied CPU set: in case it is < 64 cheaper
> hypercalls are OK. Now how do we check that? In the patch I have the
> following approach:
> 1) Check VP number for the highest CPU in the supplied set. In case it
> is > 64 we for sure need more expensive hypercalls. This is the "guess"
> which works most of the time because Linux CPU ids usually match
> VP_INDEXes.
>
> 2) In case the answer to the previous question was negative we start
> preparing input for the cheaper hypercall. However, if while walking the
> CPU set we meet a CPU with VP_INDEX higher than 64 we'll discard the
> prepared input and switch to the more expensive hypercall.
>
> Said that the 'guess' here is just an optimization to avoid walking the
> whole CPU set when we find the required answer quickly by looking at the
> highest bit. This will help big systems with hundreds of CPUs.

Care to fix the comment to avoid the offending words?

Thanks,

tglx

2018-06-19 13:20:57

by Vitaly Kuznetsov

[permalink] [raw]

Subject: Re: [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST, SPACE} hypercalls when possible

Thomas Gleixner <[email protected]> writes:

> On Tue, 19 Jun 2018, Vitaly Kuznetsov wrote:
>> Thomas Gleixner <[email protected]> writes:
>>
>> > On Fri, 15 Jun 2018, Vitaly Kuznetsov wrote:
>> >> * Fills in gva_list starting from offset. Returns the number of items added.
>> >> @@ -93,10 +95,19 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
>> >> if (cpumask_equal(cpus, cpu_present_mask)) {
>> >> flush->flags |= HV_FLUSH_ALL_PROCESSORS;
>> >> } else {
>> >> + /*
>> >> + * It is highly likely that VP ids are in ascending order
>> >> + * matching Linux CPU ids; Check VP index for the highest CPU
>> >> + * in the supplied set to see if EX hypercall is required.
>> >> + * This is just a best guess but should work most of the time.
>> >
>> > TLB flushing based on 'best guess' and 'should work most of the time' is
>> > not a brilliant approach.
>> >
>>
>> Oh no no no, that's not what I meant :-)
>>
>> We have the following problem: from the supplied CPU set we need to
>> figure out if we can get away with HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,
>> SPACE} hypercalls which are cheaper or if we need to use more expensing
>> HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST, SPACE}_EX ones. The dividing line is
>> the highest VP_INDEX of the supplied CPU set: in case it is < 64 cheaper
>> hypercalls are OK. Now how do we check that? In the patch I have the
>> following approach:
>> 1) Check VP number for the highest CPU in the supplied set. In case it
>> is > 64 we for sure need more expensive hypercalls. This is the "guess"
>> which works most of the time because Linux CPU ids usually match
>> VP_INDEXes.
>>
>> 2) In case the answer to the previous question was negative we start
>> preparing input for the cheaper hypercall. However, if while walking the
>> CPU set we meet a CPU with VP_INDEX higher than 64 we'll discard the
>> prepared input and switch to the more expensive hypercall.
>>
>> Said that the 'guess' here is just an optimization to avoid walking the
>> whole CPU set when we find the required answer quickly by looking at the
>> highest bit. This will help big systems with hundreds of CPUs.
>
> Care to fix the comment to avoid the offending words?
>

Sure, will re-word in v2.

--
Vitaly

2018-06-19 17:58:48

by Michael Kelley (EOSG)

[permalink] [raw]

Subject: RE: [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} hypercalls when possible

> -----Original Message-----
> From: [email protected] <[email protected]> On Behalf
> Of Vitaly Kuznetsov
> Sent: Friday, June 15, 2018 9:30 AM
> To: [email protected]
> Cc: [email protected]; [email protected]; KY Srinivasan
> <[email protected]>; Haiyang Zhang <[email protected]>; Stephen Hemminger
> <[email protected]>; Thomas Gleixner <[email protected]>; Ingo Molnar
> <[email protected]>; H. Peter Anvin <[email protected]>; Tianyu Lan
> <[email protected]>
> Subject: [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
> hypercalls when possible
>
> While working on Hyper-V style PV TLB flush support in KVM I noticed that
> real Windows guests use TLB flush hypercall in a somewhat smarter way: when
> the flush needs to be performed on a subset of first 64 vCPUs or on all
> present vCPUs Windows avoids more expensive hypercalls which support
> sparse CPU sets and uses their 'cheap' counterparts. This means that
> HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED name is actually a misnomer: EX
> hypercalls (which support sparse CPU sets) are "available", not
> "recommended". This makes sense as they are actually harder to parse.
>
> Nothing stops us from being equally 'smart' in Linux too. Switch to
> doing cheaper hypercalls whenever possible.
>
> Signed-off-by: Vitaly Kuznetsov <[email protected]>
> ---

This is a good idea. We should probably do the same with the hypercalls for sending
IPIs -- try the simpler version first and move to the more complex _EX version only
if necessary.

A complication: We've recently found a problem with the code for doing IPI
hypercalls, and the bug affects the TLB flush code as well. As secondary CPUs
are started, there's a window of time where the hv_vp_index entry for a
secondary CPU is uninitialized. We are seeing IPIs happening in that window, and
the IPI hypercall code uses the uninitialized hv_vp_index entry. Same thing could
happen with the TLB flush hypercall code. I didn't actually see any occurrences of
the TLB case in my tracing, but we should fix it anyway in case a TLB flush gets
added at some point in the future.

KY has a patch coming. In the patch, hv_cpu_number_to_vp_number()
and cpumask_to_vpset() can both return U32_MAX if they encounter an
uninitialized hv_vp_index entry, and the code needs to be able to bail out to
the native functions for that particular IPI or TLB flush operation. Once the
initialization of secondary CPUs is complete, the uninitialized situation won't
happen again, and the hypercall path will always be used.

We'll need to coordinate on these patches. Be aware that the IPI flavor of the
bug is currently causing random failures when booting 4.18 RC1 on Hyper-V VMs
with large vCPU counts.

Reviewed-by: Michael Kelley <[email protected]>

2018-06-19 18:22:08

by KY Srinivasan

[permalink] [raw]

Subject: RE: [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} hypercalls when possible

> -----Original Message-----
> From: Michael Kelley (EOSG)
> Sent: Tuesday, June 19, 2018 10:57 AM
> To: Vitaly Kuznetsov <[email protected]>; [email protected]
> Cc: [email protected]; [email protected]; KY
> Srinivasan <[email protected]>; Haiyang Zhang
> <[email protected]>; Stephen Hemminger
> <[email protected]>; Thomas Gleixner <[email protected]>; Ingo
> Molnar <[email protected]>; H. Peter Anvin <[email protected]>; Tianyu Lan
> <[email protected]>
> Subject: RE: [PATCH] x86/hyper-v: use cheaper
> HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} hypercalls when possible
>
> > -----Original Message-----
> > From: [email protected] <linux-kernel-
> [email protected]> On Behalf
> > Of Vitaly Kuznetsov
> > Sent: Friday, June 15, 2018 9:30 AM
> > To: [email protected]
> > Cc: [email protected]; [email protected]; KY
> Srinivasan
> > <[email protected]>; Haiyang Zhang <[email protected]>;
> Stephen Hemminger
> > <[email protected]>; Thomas Gleixner <[email protected]>; Ingo
> Molnar
> > <[email protected]>; H. Peter Anvin <[email protected]>; Tianyu Lan
> > <[email protected]>
> > Subject: [PATCH] x86/hyper-v: use cheaper
> HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
> > hypercalls when possible
> >
> > While working on Hyper-V style PV TLB flush support in KVM I noticed that
> > real Windows guests use TLB flush hypercall in a somewhat smarter way:
> when
> > the flush needs to be performed on a subset of first 64 vCPUs or on all
> > present vCPUs Windows avoids more expensive hypercalls which support
> > sparse CPU sets and uses their 'cheap' counterparts. This means that
> > HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED name is actually a
> misnomer: EX
> > hypercalls (which support sparse CPU sets) are "available", not
> > "recommended". This makes sense as they are actually harder to parse.
> >
> > Nothing stops us from being equally 'smart' in Linux too. Switch to
> > doing cheaper hypercalls whenever possible.
> >
> > Signed-off-by: Vitaly Kuznetsov <[email protected]>
> > ---
>
> This is a good idea. We should probably do the same with the hypercalls for
> sending
> IPIs -- try the simpler version first and move to the more complex _EX
> version only
> if necessary.
I am not sure if this would work correctly. When I was developing the IPI enlightenment,
what I remember was that the guest is expected to use the API recommended by the Hypervisor.

K. Y
>
> A complication: We've recently found a problem with the code for doing IPI
> hypercalls, and the bug affects the TLB flush code as well. As secondary CPUs
> are started, there's a window of time where the hv_vp_index entry for a
> secondary CPU is uninitialized. We are seeing IPIs happening in that window,
> and
> the IPI hypercall code uses the uninitialized hv_vp_index entry. Same thing
> could
> happen with the TLB flush hypercall code. I didn't actually see any
> occurrences of
> the TLB case in my tracing, but we should fix it anyway in case a TLB flush gets
> added at some point in the future.
>
> KY has a patch coming. In the patch, hv_cpu_number_to_vp_number()
> and cpumask_to_vpset() can both return U32_MAX if they encounter an
> uninitialized hv_vp_index entry, and the code needs to be able to bail out to
> the native functions for that particular IPI or TLB flush operation. Once the
> initialization of secondary CPUs is complete, the uninitialized situation won't
> happen again, and the hypercall path will always be used.
>
> We'll need to coordinate on these patches. Be aware that the IPI flavor of
> the
> bug is currently causing random failures when booting 4.18 RC1 on Hyper-V
> VMs
> with large vCPU counts.
>
> Reviewed-by: Michael Kelley <[email protected]>

2018-06-20 08:29:00

by Vitaly Kuznetsov

[permalink] [raw]

Subject: Re: [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} hypercalls when possible

KY Srinivasan <[email protected]> writes:

>> -----Original Message-----
>> From: Michael Kelley (EOSG)
>> Sent: Tuesday, June 19, 2018 10:57 AM
>> To: Vitaly Kuznetsov <[email protected]>; [email protected]
>> Cc: [email protected]; [email protected]; KY
>> Srinivasan <[email protected]>; Haiyang Zhang
>> <[email protected]>; Stephen Hemminger
>> <[email protected]>; Thomas Gleixner <[email protected]>; Ingo
>> Molnar <[email protected]>; H. Peter Anvin <[email protected]>; Tianyu Lan
>> <[email protected]>
>> Subject: RE: [PATCH] x86/hyper-v: use cheaper
>> HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} hypercalls when possible
>>
>> ...
>>>
>> This is a good idea. We should probably do the same with the hypercalls for
>> sending
>> IPIs -- try the simpler version first and move to the more complex _EX
>> version only
>> if necessary.
> I am not sure if this would work correctly. When I was developing the IPI enlightenment,
> what I remember was that the guest is expected to use the API recommended by the Hypervisor.
>

I was under the same impression when I implemented PV TLB flush. Turns
out HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED is a misnomer or at least
Windows treats it as HV_X64_EX_PROCESSOR_MASKS_AVAILABLE instead using
only when needed.

My guess would be that the situation with IPI is the same. In any case I
can try to implement Hyper-V style PV IPIs for Windows in KVM and we'll
see how Windows uses these hypercalls :-)

--
Vitaly

2018-06-20 08:35:36

by Vitaly Kuznetsov

[permalink] [raw]

Subject: Re: [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} hypercalls when possible

"Michael Kelley (EOSG)" <[email protected]> writes:

>> -----Original Message-----
>> From: [email protected] <[email protected]> On Behalf
>> Of Vitaly Kuznetsov
>> Sent: Friday, June 15, 2018 9:30 AM
>> To: [email protected]
>> Cc: [email protected]; [email protected]; KY Srinivasan
>> <[email protected]>; Haiyang Zhang <[email protected]>; Stephen Hemminger
>> <[email protected]>; Thomas Gleixner <[email protected]>; Ingo Molnar
>> <[email protected]>; H. Peter Anvin <[email protected]>; Tianyu Lan
>> <[email protected]>
>> Subject: [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
>> hypercalls when possible
>>
>> While working on Hyper-V style PV TLB flush support in KVM I noticed that
>> real Windows guests use TLB flush hypercall in a somewhat smarter way: when
>> the flush needs to be performed on a subset of first 64 vCPUs or on all
>> present vCPUs Windows avoids more expensive hypercalls which support
>> sparse CPU sets and uses their 'cheap' counterparts. This means that
>> HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED name is actually a misnomer: EX
>> hypercalls (which support sparse CPU sets) are "available", not
>> "recommended". This makes sense as they are actually harder to parse.
>>
>> Nothing stops us from being equally 'smart' in Linux too. Switch to
>> doing cheaper hypercalls whenever possible.
>>
>> Signed-off-by: Vitaly Kuznetsov <[email protected]>
>> ---
>
> This is a good idea. We should probably do the same with the hypercalls for sending
> IPIs -- try the simpler version first and move to the more complex _EX version only
> if necessary.
>
> A complication: We've recently found a problem with the code for doing IPI
> hypercalls, and the bug affects the TLB flush code as well. As secondary CPUs
> are started, there's a window of time where the hv_vp_index entry for a
> secondary CPU is uninitialized. We are seeing IPIs happening in that window, and
> the IPI hypercall code uses the uninitialized hv_vp_index entry. Same thing could
> happen with the TLB flush hypercall code. I didn't actually see any occurrences of
> the TLB case in my tracing, but we should fix it anyway in case a TLB flush gets
> added at some point in the future.
>
> KY has a patch coming. In the patch, hv_cpu_number_to_vp_number()
> and cpumask_to_vpset() can both return U32_MAX if they encounter an
> uninitialized hv_vp_index entry, and the code needs to be able to bail out to
> the native functions for that particular IPI or TLB flush operation. Once the
> initialization of secondary CPUs is complete, the uninitialized situation won't
> happen again, and the hypercall path will always be used.

Sure,

with TLB flush we can always fall back to doing it natively (by sending
IPIs).

>
> We'll need to coordinate on these patches. Be aware that the IPI flavor of the
> bug is currently causing random failures when booting 4.18 RC1 on Hyper-V VMs
> with large vCPU counts.

Thanks for the heads up! This particular patch is just an optimization
so there's no rush, IPI fix is definitely more important.

>
> Reviewed-by: Michael Kelley <[email protected]>

Thanks!

--
Vitaly

2018-06-20 17:56:53

by KY Srinivasan

[permalink] [raw]

Subject: RE: [PATCH] x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} hypercalls when possible

> -----Original Message-----
> From: Vitaly Kuznetsov <[email protected]>
> Sent: Wednesday, June 20, 2018 1:24 AM
> To: Michael Kelley (EOSG) <[email protected]>
> Cc: [email protected]; [email protected]; linux-
> [email protected]; KY Srinivasan <[email protected]>; Haiyang Zhang
> <[email protected]>; Stephen Hemminger
> <[email protected]>; Thomas Gleixner <[email protected]>; Ingo
> Molnar <[email protected]>; H. Peter Anvin <[email protected]>; Tianyu Lan
> <[email protected]>
> Subject: Re: [PATCH] x86/hyper-v: use cheaper
> HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} hypercalls when possible
>
> "Michael Kelley (EOSG)" <[email protected]> writes:
>
> >> -----Original Message-----
> >> From: [email protected] <linux-kernel-
> [email protected]> On Behalf
> >> Of Vitaly Kuznetsov
> >> Sent: Friday, June 15, 2018 9:30 AM
> >> To: [email protected]
> >> Cc: [email protected]; [email protected]; KY
> Srinivasan
> >> <[email protected]>; Haiyang Zhang <[email protected]>;
> Stephen Hemminger
> >> <[email protected]>; Thomas Gleixner <[email protected]>;
> Ingo Molnar
> >> <[email protected]>; H. Peter Anvin <[email protected]>; Tianyu Lan
> >> <[email protected]>
> >> Subject: [PATCH] x86/hyper-v: use cheaper
> HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
> >> hypercalls when possible
> >>
> >> While working on Hyper-V style PV TLB flush support in KVM I noticed that
> >> real Windows guests use TLB flush hypercall in a somewhat smarter way:
> when
> >> the flush needs to be performed on a subset of first 64 vCPUs or on all
> >> present vCPUs Windows avoids more expensive hypercalls which support
> >> sparse CPU sets and uses their 'cheap' counterparts. This means that
> >> HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED name is actually a
> misnomer: EX
> >> hypercalls (which support sparse CPU sets) are "available", not
> >> "recommended". This makes sense as they are actually harder to parse.
> >>
> >> Nothing stops us from being equally 'smart' in Linux too. Switch to
> >> doing cheaper hypercalls whenever possible.
> >>
> >> Signed-off-by: Vitaly Kuznetsov <[email protected]>
> >> ---
> >
> > This is a good idea. We should probably do the same with the hypercalls for
> sending
> > IPIs -- try the simpler version first and move to the more complex _EX
> version only
> > if necessary.
> >
> > A complication: We've recently found a problem with the code for doing
> IPI
> > hypercalls, and the bug affects the TLB flush code as well. As secondary
> CPUs
> > are started, there's a window of time where the hv_vp_index entry for a
> > secondary CPU is uninitialized. We are seeing IPIs happening in that
> window, and
> > the IPI hypercall code uses the uninitialized hv_vp_index entry. Same
> thing could
> > happen with the TLB flush hypercall code. I didn't actually see any
> occurrences of
> > the TLB case in my tracing, but we should fix it anyway in case a TLB flush
> gets
> > added at some point in the future.
> >
> > KY has a patch coming. In the patch, hv_cpu_number_to_vp_number()
> > and cpumask_to_vpset() can both return U32_MAX if they encounter an
> > uninitialized hv_vp_index entry, and the code needs to be able to bail out
> to
> > the native functions for that particular IPI or TLB flush operation. Once the
> > initialization of secondary CPUs is complete, the uninitialized situation
> won't
> > happen again, and the hypercall path will always be used.
>
> Sure,

I am surprised that we have not seen this issue in tlb flush enlightenments.

K. Y
>
> with TLB flush we can always fall back to doing it natively (by sending
> IPIs).
>
> >
> > We'll need to coordinate on these patches. Be aware that the IPI flavor of
> the
> > bug is currently causing random failures when booting 4.18 RC1 on Hyper-V
> VMs
> > with large vCPU counts.
>
> Thanks for the heads up! This particular patch is just an optimization
> so there's no rush, IPI fix is definitely more important.
>
> >
> > Reviewed-by: Michael Kelley <[email protected]>
>
> Thanks!
>
> --
> Vitaly