LinuxLists.cc - [PATCH 0/4] x86/hyper-v: optimize PV IPIs

2018-06-22 17:09:54

Subject: [PATCH 0/4] x86/hyper-v: optimize PV IPIs

When reviewing my "x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_
{LIST,SPACE} hypercalls when possible" patch Michael suggested to apply the
same idea to PV IPIs. Here we go!

Despite what Hyper-V TLFS says about HVCALL_SEND_IPI hypercall, it can
actually be 'fast' (passing parameters through registers). Use that too.

This series can collide with my "KVM: x86: hyperv: PV IPI support for
Windows guests" series as I rename ipi_arg_non_ex/ipi_arg_ex structures
there. Depending on which one gets in first we may need to do tiny
adjustments.

Vitaly Kuznetsov (4):
x86/hyper-v: implement hv_do_fast_hypercall16
x86/hyper-v: use 'fast' hypercall for HVCALL_SEND_IPI
x86/hyper-v: use cheaper HVCALL_SEND_IPI hypercall when possible
x86/hyper-v: trace PV IPI send

arch/x86/hyperv/hv_apic.c | 57 ++++++++++++++++++++-----------------
arch/x86/include/asm/mshyperv.h | 34 ++++++++++++++++++++++
arch/x86/include/asm/trace/hyperv.h | 15 ++++++++++
3 files changed, 80 insertions(+), 26 deletions(-)

--
2.14.4

2018-06-22 17:08:08

by Vitaly Kuznetsov

[permalink] [raw]

Subject: [PATCH 3/4] x86/hyper-v: use cheaper HVCALL_SEND_IPI hypercall when possible

When there is no need to send an IPI to a CPU with VP number > 64
we can do the job with fast HVCALL_SEND_IPI hypercall.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
arch/x86/hyperv/hv_apic.c | 29 ++++++++++++++++++++---------
1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 90055f89223b..ee962784d25b 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -99,6 +99,9 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
int nr_bank = 0;
int ret = 1;

+ if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+ return false;
+
local_irq_save(flags);
arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);

@@ -140,8 +143,18 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
return false;

- if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
- return __send_ipi_mask_ex(mask, vector);
+ /*
+ * From the supplied CPU set we need to figure out if we can get away
+ * with cheaper HVCALL_SEND_IPI hypercall. This is possible when the
+ * highest VP number in the set is < 64. As VP numbers are usually in
+ * ascending order and match Linux CPU ids, here is an optimization:
+ * we check the VP number for the highest bit in the supplied set first
+ * so we can quickly find out if using HVCALL_SEND_IPI_EX hypercall is
+ * a must. We will also check all VP numbers when walking the supplied
+ * CPU set to remain correct in all cases.
+ */
+ if (hv_cpu_number_to_vp_number(cpumask_last(mask)) >= 64)
+ goto do_ex_hypercall;

ipi_arg.vector = vector;
ipi_arg.cpu_mask = 0;
@@ -153,16 +166,17 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
* only target upto 64 CPUs.
*/
if (vcpu >= 64)
- goto ipi_mask_done;
+ goto do_ex_hypercall;

__set_bit(vcpu, (unsigned long *)&ipi_arg.cpu_mask);
}

ret = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
ipi_arg.cpu_mask);
-
-ipi_mask_done:
return ((ret == 0) ? true : false);
+
+do_ex_hypercall:
+ return __send_ipi_mask_ex(mask, vector);
}

static bool __send_ipi_one(int cpu, int vector)
@@ -218,10 +232,7 @@ static void hv_send_ipi_self(int vector)
void __init hv_apic_init(void)
{
if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) {
- if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
- pr_info("Hyper-V: Using ext hypercalls for IPI\n");
- else
- pr_info("Hyper-V: Using IPI hypercalls\n");
+ pr_info("Hyper-V: Using IPI hypercalls\n");
/*
* Set the IPI entry points.
*/
--
2.14.4

2018-06-22 17:08:29

by Vitaly Kuznetsov

[permalink] [raw]

Subject: [PATCH 4/4] x86/hyper-v: trace PV IPI send

Trace Hyper-V PV IPIs the same way we do PV TLB flush.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
arch/x86/hyperv/hv_apic.c | 4 ++++
arch/x86/include/asm/trace/hyperv.h | 15 +++++++++++++++
2 files changed, 19 insertions(+)

diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index ee962784d25b..657a2b8c738a 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -31,6 +31,8 @@
#include <asm/mshyperv.h>
#include <asm/apic.h>

+#include <asm/trace/hyperv.h>
+
static struct apic orig_apic;

static u64 hv_apic_icr_read(void)
@@ -134,6 +136,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
struct ipi_arg_non_ex ipi_arg;
int ret = 1;

+ trace_hyperv_send_ipi_mask(mask, vector);
+
if (cpumask_empty(mask))
return true;

diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index 4253bca99989..9c0d4b588e3f 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -28,6 +28,21 @@ TRACE_EVENT(hyperv_mmu_flush_tlb_others,
__entry->addr, __entry->end)
);

+TRACE_EVENT(hyperv_send_ipi_mask,
+ TP_PROTO(const struct cpumask *cpus,
+ int vector),
+ TP_ARGS(cpus, vector),
+ TP_STRUCT__entry(
+ __field(unsigned int, ncpus)
+ __field(int, vector)
+ ),
+ TP_fast_assign(__entry->ncpus = cpumask_weight(cpus);
+ __entry->vector = vector;
+ ),
+ TP_printk("ncpus %d vector %x",
+ __entry->ncpus, __entry->vector)
+ );
+
#endif /* CONFIG_HYPERV */

#undef TRACE_INCLUDE_PATH
--
2.14.4

2018-06-22 17:09:24

by Vitaly Kuznetsov

[permalink] [raw]

Subject: [PATCH 2/4] x86/hyper-v: use 'fast' hypercall for HVCALL_SEND_IPI

Current Hyper-V TLFS (v5.0b) claims that HvCallSendSyntheticClusterIpi
hypercall can't be 'fast' (passing parameters through registers) but
apparently this is not true, Windows always uses 'fast' version. We can
do the same in Linux too.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
arch/x86/hyperv/hv_apic.c | 22 ++++++----------------
1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index f68855499391..90055f89223b 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -128,10 +128,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
static bool __send_ipi_mask(const struct cpumask *mask, int vector)
{
int cur_cpu, vcpu;
- struct ipi_arg_non_ex **arg;
- struct ipi_arg_non_ex *ipi_arg;
+ struct ipi_arg_non_ex ipi_arg;
int ret = 1;
- unsigned long flags;

if (cpumask_empty(mask))
return true;
@@ -145,16 +143,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
return __send_ipi_mask_ex(mask, vector);

- local_irq_save(flags);
- arg = (struct ipi_arg_non_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
-
- ipi_arg = *arg;
- if (unlikely(!ipi_arg))
- goto ipi_mask_done;
-
- ipi_arg->vector = vector;
- ipi_arg->reserved = 0;
- ipi_arg->cpu_mask = 0;
+ ipi_arg.vector = vector;
+ ipi_arg.cpu_mask = 0;

for_each_cpu(cur_cpu, mask) {
vcpu = hv_cpu_number_to_vp_number(cur_cpu);
@@ -165,13 +155,13 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
if (vcpu >= 64)
goto ipi_mask_done;

- __set_bit(vcpu, (unsigned long *)&ipi_arg->cpu_mask);
+ __set_bit(vcpu, (unsigned long *)&ipi_arg.cpu_mask);
}

- ret = hv_do_hypercall(HVCALL_SEND_IPI, ipi_arg, NULL);
+ ret = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
+ ipi_arg.cpu_mask);

ipi_mask_done:
- local_irq_restore(flags);
return ((ret == 0) ? true : false);
}

--
2.14.4

2018-06-22 17:10:13

by Vitaly Kuznetsov

[permalink] [raw]

Subject: [PATCH 1/4] x86/hyper-v: implement hv_do_fast_hypercall16

Implement 'Fast' hypercall with two 64-bit input parameter. This is
going to be used for HvCallSendSyntheticClusterIpi hypercall.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
arch/x86/include/asm/mshyperv.h | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)

diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 3cd14311edfa..da25642940d3 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -193,6 +193,40 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
return hv_status;
}

+/* Fast hypercall with 16 bytes of input */
+static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+#ifdef CONFIG_X86_64
+ {
+ __asm__ __volatile__("mov %4, %%r8\n"
+ CALL_NOSPEC
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+ "+c" (control), "+d" (input1)
+ : "r" (input2),
+ THUNK_TARGET(hv_hypercall_pg)
+ : "cc", "r8", "r9", "r10", "r11");
+ }
+#else
+ {
+ u32 input1_hi = upper_32_bits(input1);
+ u32 input1_lo = lower_32_bits(input1);
+ u32 input2_hi = upper_32_bits(input2);
+ u32 input2_lo = lower_32_bits(input2);
+
+ __asm__ __volatile__ (CALL_NOSPEC
+ : "=A"(hv_status),
+ "+c"(input1_lo), ASM_CALL_CONSTRAINT
+ : "A" (control), "b" (input1_hi),
+ "D"(input2_hi), "S"(input2_lo),
+ THUNK_TARGET(hv_hypercall_pg)
+ : "cc");
+ }
+#endif
+ return hv_status;
+}
+
/*
* Rep hypercalls. Callers of this functions are supposed to ensure that
* rep_count and varhead_size comply with Hyper-V hypercall definition.
--
2.14.4

2018-06-25 23:56:28

by Michael Kelley (EOSG)

[permalink] [raw]

Subject: RE: [PATCH 2/4] x86/hyper-v: use 'fast' hypercall for HVCALL_SEND_IPI

> -----Original Message-----
> From: Vitaly Kuznetsov <[email protected]>
> Sent: Friday, June 22, 2018 10:06 AM
> To: [email protected]
> Cc: [email protected]; [email protected]; KY Srinivasan
> <[email protected]>; Haiyang Zhang <[email protected]>; Stephen Hemminger
> <[email protected]>; Thomas Gleixner <[email protected]>; Ingo Molnar
> <[email protected]>; H. Peter Anvin <[email protected]>; Tianyu Lan
> <[email protected]>; Michael Kelley (EOSG) <[email protected]>
> Subject: [PATCH 2/4] x86/hyper-v: use 'fast' hypercall for HVCALL_SEND_IPI
>
> Current Hyper-V TLFS (v5.0b) claims that HvCallSendSyntheticClusterIpi
> hypercall can't be 'fast' (passing parameters through registers) but
> apparently this is not true, Windows always uses 'fast' version. We can
> do the same in Linux too.
>
> Signed-off-by: Vitaly Kuznetsov <[email protected]>

Reviewed-by: Michael Kelley <[email protected]>

2018-06-26 00:04:54

by Michael Kelley (EOSG)

[permalink] [raw]

Subject: RE: [PATCH 3/4] x86/hyper-v: use cheaper HVCALL_SEND_IPI hypercall when possible

> -----Original Message-----
> From: Vitaly Kuznetsov <[email protected]>
> Sent: Friday, June 22, 2018 10:06 AM
> To: [email protected]
> Cc: [email protected]; [email protected]; KY Srinivasan
> <[email protected]>; Haiyang Zhang <[email protected]>; Stephen Hemminger
> <[email protected]>; Thomas Gleixner <[email protected]>; Ingo Molnar
> <[email protected]>; H. Peter Anvin <[email protected]>; Tianyu Lan
> <[email protected]>; Michael Kelley (EOSG) <[email protected]>
> Subject: [PATCH 3/4] x86/hyper-v: use cheaper HVCALL_SEND_IPI hypercall when possible
>
> When there is no need to send an IPI to a CPU with VP number > 64
> we can do the job with fast HVCALL_SEND_IPI hypercall.
>
> Signed-off-by: Vitaly Kuznetsov <[email protected]>

Reviewed-by: Michael Kelley <[email protected]>

2018-06-27 02:30:49

by Wanpeng Li

[permalink] [raw]

Subject: Re: [PATCH 0/4] x86/hyper-v: optimize PV IPIs

Hi Vitaly, (fix my reply mess this time)
On Sat, 23 Jun 2018 at 01:09, Vitaly Kuznetsov <[email protected]> wrote:
>
> When reviewing my "x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_
> {LIST,SPACE} hypercalls when possible" patch Michael suggested to apply the
> same idea to PV IPIs. Here we go!
>
> Despite what Hyper-V TLFS says about HVCALL_SEND_IPI hypercall, it can
> actually be 'fast' (passing parameters through registers). Use that too.
>
> This series can collide with my "KVM: x86: hyperv: PV IPI support for
> Windows guests" series as I rename ipi_arg_non_ex/ipi_arg_ex structures
> there. Depending on which one gets in first we may need to do tiny
> adjustments.

As hyperv PV TLB flush has already been merged, is there any other
obvious multicast IPIs scenarios? qemu supports interrupt remapping
since two years ago, I think windows guest can switch to cluster mode
after entering x2APIC, so sending IPI per cluster. In addition, you
can also post the benchmark result for this PV IPI optimization,
although it also fixes the bug which you mentioned above.

I can post one variant for Linux guest PV IPI if it also makes sense. :)

Regards,
Wanpeng Li

2018-06-27 10:28:51

by Wanpeng Li

[permalink] [raw]

Subject: Re: [PATCH 0/4] x86/hyper-v: optimize PV IPIs

On Wed, 27 Jun 2018 at 17:25, Vitaly Kuznetsov <[email protected]> wrote:
>
> Wanpeng Li <[email protected]> writes:
>
> > Hi Vitaly, (fix my reply mess this time)
> > On Sat, 23 Jun 2018 at 01:09, Vitaly Kuznetsov <[email protected]> wrote:
> >>
> >> When reviewing my "x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_
> >> {LIST,SPACE} hypercalls when possible" patch Michael suggested to apply the
> >> same idea to PV IPIs. Here we go!
> >>
> >> Despite what Hyper-V TLFS says about HVCALL_SEND_IPI hypercall, it can
> >> actually be 'fast' (passing parameters through registers). Use that too.
> >>
> >> This series can collide with my "KVM: x86: hyperv: PV IPI support for
> >> Windows guests" series as I rename ipi_arg_non_ex/ipi_arg_ex structures
> >> there. Depending on which one gets in first we may need to do tiny
> >> adjustments.
> >
> > As hyperv PV TLB flush has already been merged, is there any other
> > obvious multicast IPIs scenarios? qemu supports interrupt remapping
> > since two years ago, I think windows guest can switch to cluster mode
> > after entering x2APIC, so sending IPI per cluster.
>
>I got confused, which of my patch series are you actually looking at?
>:-)

Yeah, actually originally I want to reply the thread which you sent
out to kvm ml "KVM: x86: hyperv: PV IPI support for Windows guests"
and miss to reply this one since the subject is similar.

> When we manifest ourselves as Hyper-V Windows 'forgets' about x2apic
> mode: Hyper-V has a concept of 'Synthetic interrupt controller' - an
> xapic extension which we also support in KVM. I don't really know any
> obvious scenarios for mass IPIs in Windows besides TLB flush but I'm
> worried they may exist. Without PV IPIs any such attempt will likely
> lead to a crash.
>
> In general, I do care more about completeness and correctness of our
> Hyper-V emulation at this point: Windows is only being tested on 'real'
> Hyper-Vs so when we emulate a subset of enlightenments we're on our own
> when something is not working. It is also very helpfult for
> Linux-on-Hyper-V depelopment as we can see how Windows-on-Hyper-v
> behaves :-)
>
> > In addition, you
> > can also post the benchmark result for this PV IPI optimization,
> > although it also fixes the bug which you mentioned above.
>
> I'd love to get to know how to trigger mass IPIs in Windows so a
> benchmark can be performed...

I also not sure about windows. I use
https://lkml.org/lkml/2017/12/19/141 as a linux kernel module to
evaluate broadcast IPI performance in the linux guest laster year. :)

>
> > I can post one variant for Linux guest PV IPI if it also makes
> > sense. :)
>
> With x2apic support I'm actually not sure. Maybe configurations with
> a very large number of vCPUs and IPIs going to > 256 vCPUs can benefit
> from a 'single hypercall' solution.

Each cluster of x2apic cluster mode can just support 16 unique logical
IDs, so I think linux guest can also get benefit as long as VM has >
16 vCPUs. I will cook patches to evaluate it. :)

Regards,
Wanpeng Li

2018-06-27 11:08:22

by Vitaly Kuznetsov

[permalink] [raw]

Subject: Re: [PATCH 0/4] x86/hyper-v: optimize PV IPIs

Wanpeng Li <[email protected]> writes:

> Hi Vitaly, (fix my reply mess this time)
> On Sat, 23 Jun 2018 at 01:09, Vitaly Kuznetsov <[email protected]> wrote:
>>
>> When reviewing my "x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_
>> {LIST,SPACE} hypercalls when possible" patch Michael suggested to apply the
>> same idea to PV IPIs. Here we go!
>>
>> Despite what Hyper-V TLFS says about HVCALL_SEND_IPI hypercall, it can
>> actually be 'fast' (passing parameters through registers). Use that too.
>>
>> This series can collide with my "KVM: x86: hyperv: PV IPI support for
>> Windows guests" series as I rename ipi_arg_non_ex/ipi_arg_ex structures
>> there. Depending on which one gets in first we may need to do tiny
>> adjustments.
>
> As hyperv PV TLB flush has already been merged, is there any other
> obvious multicast IPIs scenarios? qemu supports interrupt remapping
> since two years ago, I think windows guest can switch to cluster mode
> after entering x2APIC, so sending IPI per cluster. In addition, you
> can also post the benchmark result for this PV IPI optimization,
> although it also fixes the bug which you mentioned above.

I got confused, which of my patch series are you actually looking at?
:-)

This particular one ("x86/hyper-v: optimize PV IPIs") is not about
KVM/qemu, it is for Linux running on top on real Hyper-V server. We
already support PV IPIs and here I'm just trying to optimize the way how
we send them by switching to a cheaper hypercall (and using 'fast'
version of it) when possible. I don't actually have a good benchmark
(and I don't remember seeing one when K.Y. posted PV IPI support) but
this can be arranged I guess: I can write a dump 'IPI sender' in kernel
and send e.g. 1000 IPIs.

--
Vitaly

2018-06-27 11:39:18

by Vitaly Kuznetsov

[permalink] [raw]

Subject: Re: [PATCH 0/4] x86/hyper-v: optimize PV IPIs

Wanpeng Li <[email protected]> writes:

> Hi Vitaly, (fix my reply mess this time)
> On Sat, 23 Jun 2018 at 01:09, Vitaly Kuznetsov <[email protected]> wrote:
>>
>> When reviewing my "x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_
>> {LIST,SPACE} hypercalls when possible" patch Michael suggested to apply the
>> same idea to PV IPIs. Here we go!
>>
>> Despite what Hyper-V TLFS says about HVCALL_SEND_IPI hypercall, it can
>> actually be 'fast' (passing parameters through registers). Use that too.
>>
>> This series can collide with my "KVM: x86: hyperv: PV IPI support for
>> Windows guests" series as I rename ipi_arg_non_ex/ipi_arg_ex structures
>> there. Depending on which one gets in first we may need to do tiny
>> adjustments.
>
> As hyperv PV TLB flush has already been merged, is there any other
> obvious multicast IPIs scenarios? qemu supports interrupt remapping
> since two years ago, I think windows guest can switch to cluster mode
> after entering x2APIC, so sending IPI per cluster.

When we manifest ourselves as Hyper-V Windows 'forgets' about x2apic
mode: Hyper-V has a concept of 'Synthetic interrupt controller' - an
xapic extension which we also support in KVM. I don't really know any
obvious scenarios for mass IPIs in Windows besides TLB flush but I'm
worried they may exist. Without PV IPIs any such attempt will likely
lead to a crash.

In general, I do care more about completeness and correctness of our
Hyper-V emulation at this point: Windows is only being tested on 'real'
Hyper-Vs so when we emulate a subset of enlightenments we're on our own
when something is not working. It is also very helpfult for
Linux-on-Hyper-V depelopment as we can see how Windows-on-Hyper-v
behaves :-)

> In addition, you
> can also post the benchmark result for this PV IPI optimization,
> although it also fixes the bug which you mentioned above.

I'd love to get to know how to trigger mass IPIs in Windows so a
benchmark can be performed...

> I can post one variant for Linux guest PV IPI if it also makes
> sense. :)

With x2apic support I'm actually not sure. Maybe configurations with
a very large number of vCPUs and IPIs going to > 256 vCPUs can benefit
from a 'single hypercall' solution.

--
Vitaly

2018-06-28 19:11:35

by Vitaly Kuznetsov

[permalink] [raw]

Subject: Re: [PATCH 0/4] x86/hyper-v: optimize PV IPIs

Vitaly Kuznetsov <[email protected]> writes:

> Wanpeng Li <[email protected]> writes:
>
>> Hi Vitaly, (fix my reply mess this time)
>> On Sat, 23 Jun 2018 at 01:09, Vitaly Kuznetsov <[email protected]> wrote:
>>>
>>> When reviewing my "x86/hyper-v: use cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_
>>> {LIST,SPACE} hypercalls when possible" patch Michael suggested to apply the
>>> same idea to PV IPIs. Here we go!
>>>
>>> Despite what Hyper-V TLFS says about HVCALL_SEND_IPI hypercall, it can
>>> actually be 'fast' (passing parameters through registers). Use that too.
>>>
>>> This series can collide with my "KVM: x86: hyperv: PV IPI support for
>>> Windows guests" series as I rename ipi_arg_non_ex/ipi_arg_ex structures
>>> there. Depending on which one gets in first we may need to do tiny
>>> adjustments.
>>
>> As hyperv PV TLB flush has already been merged, is there any other
>> obvious multicast IPIs scenarios? qemu supports interrupt remapping
>> since two years ago, I think windows guest can switch to cluster mode
>> after entering x2APIC, so sending IPI per cluster. In addition, you
>> can also post the benchmark result for this PV IPI optimization,
>> although it also fixes the bug which you mentioned above.
>
> I got confused, which of my patch series are you actually looking at?
> :-)
>
> This particular one ("x86/hyper-v: optimize PV IPIs") is not about
> KVM/qemu, it is for Linux running on top on real Hyper-V server. We
> already support PV IPIs and here I'm just trying to optimize the way how
> we send them by switching to a cheaper hypercall (and using 'fast'
> version of it) when possible. I don't actually have a good benchmark
> (and I don't remember seeing one when K.Y. posted PV IPI support) but
> this can be arranged I guess: I can write a dump 'IPI sender' in kernel
> and send e.g. 1000 IPIs.

So I used the IPI benchmark (https://lkml.org/lkml/2017/12/19/141,
thanks for the tip!) on this series. On a 16 vCPU guest (WS2016) I'm
getting the following:

Before:
Dry-run: 0 203110
Self-IPI: 6167430 11645550
Normal IPI: 380479300 475881820
Broadcast IPI: 0 2557371420

After:
Dry-run: 0 214280 (not interesting)
Self-IPI: 5706210 10697640 (- 8%)
Normal IPI: 379330010 450158830 (- 5%)
Broadcast IPI: 0 2340427160 (- 8%)

--
Vitaly

2018-07-03 07:05:57

by tip-bot for Vasyl Gomonovych

[permalink] [raw]

Subject: [tip:x86/hyperv] x86/hyper-v: Use 'fast' hypercall for HVCALL_SEND_IPI

Commit-ID: d8e6b232cfdd5d141c03e40a14c1c781480ea05e
Gitweb: https://git.kernel.org/tip/d8e6b232cfdd5d141c03e40a14c1c781480ea05e
Author: Vitaly Kuznetsov <[email protected]>
AuthorDate: Fri, 22 Jun 2018 19:06:23 +0200
Committer: Thomas Gleixner <[email protected]>
CommitDate: Tue, 3 Jul 2018 09:00:33 +0200

x86/hyper-v: Use 'fast' hypercall for HVCALL_SEND_IPI

Current Hyper-V TLFS (v5.0b) claims that HvCallSendSyntheticClusterIpi
hypercall can't be 'fast' (passing parameters through registers) but
apparently this is not true, Windows always uses 'fast' version. We can
do the same in Linux too.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Michael Kelley <[email protected]>
Cc: [email protected]
Cc: "K. Y. Srinivasan" <[email protected]>
Cc: Haiyang Zhang <[email protected]>
Cc: Stephen Hemminger <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Tianyu Lan <[email protected]>
Cc: "Michael Kelley (EOSG)" <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/hyperv/hv_apic.c | 22 ++++++----------------
1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index f68855499391..90055f89223b 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -128,10 +128,8 @@ ipi_mask_ex_done:
static bool __send_ipi_mask(const struct cpumask *mask, int vector)
{
int cur_cpu, vcpu;
- struct ipi_arg_non_ex **arg;
- struct ipi_arg_non_ex *ipi_arg;
+ struct ipi_arg_non_ex ipi_arg;
int ret = 1;
- unsigned long flags;

if (cpumask_empty(mask))
return true;
@@ -145,16 +143,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
return __send_ipi_mask_ex(mask, vector);

- local_irq_save(flags);
- arg = (struct ipi_arg_non_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
-
- ipi_arg = *arg;
- if (unlikely(!ipi_arg))
- goto ipi_mask_done;
-
- ipi_arg->vector = vector;
- ipi_arg->reserved = 0;
- ipi_arg->cpu_mask = 0;
+ ipi_arg.vector = vector;
+ ipi_arg.cpu_mask = 0;

for_each_cpu(cur_cpu, mask) {
vcpu = hv_cpu_number_to_vp_number(cur_cpu);
@@ -165,13 +155,13 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
if (vcpu >= 64)
goto ipi_mask_done;

- __set_bit(vcpu, (unsigned long *)&ipi_arg->cpu_mask);
+ __set_bit(vcpu, (unsigned long *)&ipi_arg.cpu_mask);
}

- ret = hv_do_hypercall(HVCALL_SEND_IPI, ipi_arg, NULL);
+ ret = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
+ ipi_arg.cpu_mask);

ipi_mask_done:
- local_irq_restore(flags);
return ((ret == 0) ? true : false);
}

2018-07-03 07:06:23

by tip-bot for Vasyl Gomonovych

[permalink] [raw]

Subject: [tip:x86/hyperv] x86/hyper-v: Use cheaper HVCALL_SEND_IPI hypercall when possible

Commit-ID: 4bd06060762bc7e4834ecf9daeb78834f7a29582
Gitweb: https://git.kernel.org/tip/4bd06060762bc7e4834ecf9daeb78834f7a29582
Author: Vitaly Kuznetsov <[email protected]>
AuthorDate: Fri, 22 Jun 2018 19:06:24 +0200
Committer: Thomas Gleixner <[email protected]>
CommitDate: Tue, 3 Jul 2018 09:00:34 +0200

x86/hyper-v: Use cheaper HVCALL_SEND_IPI hypercall when possible

When there is no need to send an IPI to a CPU with VP number > 64
we can do the job with fast HVCALL_SEND_IPI hypercall.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Michael Kelley <[email protected]>
Cc: [email protected]
Cc: "K. Y. Srinivasan" <[email protected]>
Cc: Haiyang Zhang <[email protected]>
Cc: Stephen Hemminger <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Tianyu Lan <[email protected]>
Cc: "Michael Kelley (EOSG)" <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/hyperv/hv_apic.c | 29 ++++++++++++++++++++---------
1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 90055f89223b..ee962784d25b 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -99,6 +99,9 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
int nr_bank = 0;
int ret = 1;

+ if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+ return false;
+
local_irq_save(flags);
arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);

@@ -140,8 +143,18 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
return false;

- if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
- return __send_ipi_mask_ex(mask, vector);
+ /*
+ * From the supplied CPU set we need to figure out if we can get away
+ * with cheaper HVCALL_SEND_IPI hypercall. This is possible when the
+ * highest VP number in the set is < 64. As VP numbers are usually in
+ * ascending order and match Linux CPU ids, here is an optimization:
+ * we check the VP number for the highest bit in the supplied set first
+ * so we can quickly find out if using HVCALL_SEND_IPI_EX hypercall is
+ * a must. We will also check all VP numbers when walking the supplied
+ * CPU set to remain correct in all cases.
+ */
+ if (hv_cpu_number_to_vp_number(cpumask_last(mask)) >= 64)
+ goto do_ex_hypercall;

ipi_arg.vector = vector;
ipi_arg.cpu_mask = 0;
@@ -153,16 +166,17 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
* only target upto 64 CPUs.
*/
if (vcpu >= 64)
- goto ipi_mask_done;
+ goto do_ex_hypercall;

__set_bit(vcpu, (unsigned long *)&ipi_arg.cpu_mask);
}

ret = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
ipi_arg.cpu_mask);
-
-ipi_mask_done:
return ((ret == 0) ? true : false);
+
+do_ex_hypercall:
+ return __send_ipi_mask_ex(mask, vector);
}

static bool __send_ipi_one(int cpu, int vector)
@@ -218,10 +232,7 @@ static void hv_send_ipi_self(int vector)
void __init hv_apic_init(void)
{
if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) {
- if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
- pr_info("Hyper-V: Using ext hypercalls for IPI\n");
- else
- pr_info("Hyper-V: Using IPI hypercalls\n");
+ pr_info("Hyper-V: Using IPI hypercalls\n");
/*
* Set the IPI entry points.
*/

2018-07-03 07:07:39

by tip-bot for Vasyl Gomonovych

[permalink] [raw]

Subject: [tip:x86/hyperv] x86/hyper-v: Trace PV IPI send

Commit-ID: 58ec5e9c9044bd7e1c0bcc6ad822b2e909f49732
Gitweb: https://git.kernel.org/tip/58ec5e9c9044bd7e1c0bcc6ad822b2e909f49732
Author: Vitaly Kuznetsov <[email protected]>
AuthorDate: Fri, 22 Jun 2018 19:06:25 +0200
Committer: Thomas Gleixner <[email protected]>
CommitDate: Tue, 3 Jul 2018 09:00:34 +0200

x86/hyper-v: Trace PV IPI send

Trace Hyper-V PV IPIs the same way we do PV TLB flush.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: "K. Y. Srinivasan" <[email protected]>
Cc: Haiyang Zhang <[email protected]>
Cc: Stephen Hemminger <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Tianyu Lan <[email protected]>
Cc: "Michael Kelley (EOSG)" <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/hyperv/hv_apic.c | 4 ++++
arch/x86/include/asm/trace/hyperv.h | 15 +++++++++++++++
2 files changed, 19 insertions(+)

diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index ee962784d25b..657a2b8c738a 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -31,6 +31,8 @@
#include <asm/mshyperv.h>
#include <asm/apic.h>

+#include <asm/trace/hyperv.h>
+
static struct apic orig_apic;

static u64 hv_apic_icr_read(void)
@@ -134,6 +136,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
struct ipi_arg_non_ex ipi_arg;
int ret = 1;

+ trace_hyperv_send_ipi_mask(mask, vector);
+
if (cpumask_empty(mask))
return true;

diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index 4253bca99989..9c0d4b588e3f 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -28,6 +28,21 @@ TRACE_EVENT(hyperv_mmu_flush_tlb_others,
__entry->addr, __entry->end)
);

+TRACE_EVENT(hyperv_send_ipi_mask,
+ TP_PROTO(const struct cpumask *cpus,
+ int vector),
+ TP_ARGS(cpus, vector),
+ TP_STRUCT__entry(
+ __field(unsigned int, ncpus)
+ __field(int, vector)
+ ),
+ TP_fast_assign(__entry->ncpus = cpumask_weight(cpus);
+ __entry->vector = vector;
+ ),
+ TP_printk("ncpus %d vector %x",
+ __entry->ncpus, __entry->vector)
+ );
+
#endif /* CONFIG_HYPERV */

#undef TRACE_INCLUDE_PATH

2018-07-03 07:08:14

by tip-bot for Vasyl Gomonovych

[permalink] [raw]

Subject: [tip:x86/hyperv] x86/hyper-v: Implement hv_do_fast_hypercall16

Commit-ID: 53e52966901a5b14caa2a7c77428a693fe71f734
Gitweb: https://git.kernel.org/tip/53e52966901a5b14caa2a7c77428a693fe71f734
Author: Vitaly Kuznetsov <[email protected]>
AuthorDate: Fri, 22 Jun 2018 19:06:22 +0200
Committer: Thomas Gleixner <[email protected]>
CommitDate: Tue, 3 Jul 2018 09:00:33 +0200

x86/hyper-v: Implement hv_do_fast_hypercall16

Implement 'Fast' hypercall with two 64-bit input parameter. This is
going to be used for HvCallSendSyntheticClusterIpi hypercall.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: "K. Y. Srinivasan" <[email protected]>
Cc: Haiyang Zhang <[email protected]>
Cc: Stephen Hemminger <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Tianyu Lan <[email protected]>
Cc: "Michael Kelley (EOSG)" <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
arch/x86/include/asm/mshyperv.h | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)

diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 3cd14311edfa..da25642940d3 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -193,6 +193,40 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
return hv_status;
}

+/* Fast hypercall with 16 bytes of input */
+static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+#ifdef CONFIG_X86_64
+ {
+ __asm__ __volatile__("mov %4, %%r8\n"
+ CALL_NOSPEC
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+ "+c" (control), "+d" (input1)
+ : "r" (input2),
+ THUNK_TARGET(hv_hypercall_pg)
+ : "cc", "r8", "r9", "r10", "r11");
+ }
+#else
+ {
+ u32 input1_hi = upper_32_bits(input1);
+ u32 input1_lo = lower_32_bits(input1);
+ u32 input2_hi = upper_32_bits(input2);
+ u32 input2_lo = lower_32_bits(input2);
+
+ __asm__ __volatile__ (CALL_NOSPEC
+ : "=A"(hv_status),
+ "+c"(input1_lo), ASM_CALL_CONSTRAINT
+ : "A" (control), "b" (input1_hi),
+ "D"(input2_hi), "S"(input2_lo),
+ THUNK_TARGET(hv_hypercall_pg)
+ : "cc");
+ }
+#endif
+ return hv_status;
+}
+
/*
* Rep hypercalls. Callers of this functions are supposed to ensure that
* rep_count and varhead_size comply with Hyper-V hypercall definition.