LinuxLists.cc - [PATCH v2 3/4] KVM: x86: kvm_hv_flush

2021-04-13 05:58:17

Subject: [PATCH v2 3/4] KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers

Hyper-V supports the use of XMM registers to perform fast hypercalls.
This allows guests to take advantage of the improved performance of the
fast hypercall interface even though a hypercall may require more than
(the current maximum of) two input registers.

The XMM fast hypercall interface uses six additional XMM registers (XMM0
to XMM5) to allow the guest to pass an input parameter block of up to
112 bytes. Hyper-V can also return data back to the guest in the
remaining XMM registers that are not used by the current hypercall.

Add framework to read/write to XMM registers in kvm_hv_hypercall() and
use the additional hypercall inputs from XMM registers in
kvm_hv_flush_tlb() when possible.

Cc: Alexander Graf <[email protected]>
Co-developed-by: Evgeny Iakovlev <[email protected]>
Signed-off-by: Evgeny Iakovlev <[email protected]>
Signed-off-by: Siddharth Chandrasekaran <[email protected]>
---
arch/x86/kvm/hyperv.c | 109 ++++++++++++++++++++++++++++++++++--------
1 file changed, 90 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 8f6babd1ea0d..1f9959aba70d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -36,6 +36,7 @@

#include "trace.h"
#include "irq.h"
+#include "fpu.h"

/* "Hv#1" signature */
#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
@@ -1623,6 +1624,8 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask(
return vcpu_bitmap;
}

+#define KVM_HV_HYPERCALL_MAX_XMM_REGISTERS 6
+
struct kvm_hv_hcall {
u64 param;
u64 ingpa;
@@ -1632,10 +1635,14 @@ struct kvm_hv_hcall {
u16 rep_idx;
bool fast;
bool rep;
+ sse128_t xmm[KVM_HV_HYPERCALL_MAX_XMM_REGISTERS];
+ bool xmm_dirty;
};

static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex)
{
+ int i, j;
+ gpa_t gpa;
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
struct hv_tlb_flush_ex flush_ex;
@@ -1649,8 +1656,15 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
bool all_cpus;

if (!ex) {
- if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, sizeof(flush))))
- return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ if (hc->fast) {
+ flush.address_space = hc->ingpa;
+ flush.flags = hc->outgpa;
+ flush.processor_mask = sse128_lo(hc->xmm[0]);
+ } else {
+ if (unlikely(kvm_read_guest(kvm, hc->ingpa,
+ &flush, sizeof(flush))))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ }

trace_kvm_hv_flush_tlb(flush.processor_mask,
flush.address_space, flush.flags);
@@ -1668,9 +1682,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
flush.processor_mask == 0;
} else {
- if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
- sizeof(flush_ex))))
- return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ if (hc->fast) {
+ flush_ex.address_space = hc->ingpa;
+ flush_ex.flags = hc->outgpa;
+ memcpy(&flush_ex.hv_vp_set,
+ &hc->xmm[0], sizeof(hc->xmm[0]));
+ } else {
+ if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
+ sizeof(flush_ex))))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ }

trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
flush_ex.hv_vp_set.format,
@@ -1681,20 +1702,29 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
all_cpus = flush_ex.hv_vp_set.format !=
HV_GENERIC_SET_SPARSE_4K;

- sparse_banks_len =
- bitmap_weight((unsigned long *)&valid_bank_mask, 64) *
- sizeof(sparse_banks[0]);
+ sparse_banks_len = bitmap_weight((unsigned long *)&valid_bank_mask, 64);

if (!sparse_banks_len && !all_cpus)
goto ret_success;

- if (!all_cpus &&
- kvm_read_guest(kvm,
- hc->ingpa + offsetof(struct hv_tlb_flush_ex,
- hv_vp_set.bank_contents),
- sparse_banks,
- sparse_banks_len))
- return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ if (!all_cpus) {
+ if (hc->fast) {
+ if (sparse_banks_len > KVM_HV_HYPERCALL_MAX_XMM_REGISTERS - 1)
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ for (i = 0, j = 1; i < sparse_banks_len; i += 2, j++) {
+ sparse_banks[i + 0] = sse128_lo(hc->xmm[j]);
+ sparse_banks[i + 1] = sse128_hi(hc->xmm[j]);
+ }
+ } else {
+ gpa = hc->ingpa;
+ gpa += offsetof(struct hv_tlb_flush_ex,
+ hv_vp_set.bank_contents);
+ if (unlikely(kvm_read_guest(kvm, gpa, sparse_banks,
+ sparse_banks_len *
+ sizeof(sparse_banks[0]))))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ }
+ }
}

cpumask_clear(&hv_vcpu->tlb_flush);
@@ -1890,6 +1920,41 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *h
return HV_STATUS_SUCCESS;
}

+static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc)
+{
+ switch (hc->code) {
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
+ return true;
+ }
+
+ return false;
+}
+
+static inline void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)
+{
+ int reg;
+
+ kvm_fpu_get();
+ for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
+ _kvm_read_sse_reg(reg, &hc->xmm[reg]);
+ kvm_fpu_put();
+ hc->xmm_dirty = false;
+}
+
+static inline void kvm_hv_hypercall_write_xmm(struct kvm_hv_hcall *hc)
+{
+ int reg;
+
+ kvm_fpu_get();
+ for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
+ _kvm_write_sse_reg(reg, &hc->xmm[reg]);
+ kvm_fpu_put();
+ hc->xmm_dirty = false;
+}
+
int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
{
struct kvm_hv_hcall hc;
@@ -1926,6 +1991,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
hc.rep = !!(hc.rep_cnt || hc.rep_idx);

+ if (hc.fast && is_xmm_fast_hypercall(&hc))
+ kvm_hv_hypercall_read_xmm(&hc);
+
trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
hc.ingpa, hc.outgpa);

@@ -1961,28 +2029,28 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
kvm_hv_hypercall_complete_userspace;
return 0;
case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
- if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) {
+ if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
}
ret = kvm_hv_flush_tlb(vcpu, &hc, false);
break;
case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
- if (unlikely(hc.fast || hc.rep)) {
+ if (unlikely(hc.rep)) {
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
}
ret = kvm_hv_flush_tlb(vcpu, &hc, false);
break;
case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
- if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) {
+ if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
}
ret = kvm_hv_flush_tlb(vcpu, &hc, true);
break;
case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
- if (unlikely(hc.fast || hc.rep)) {
+ if (unlikely(hc.rep)) {
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
}
@@ -2035,6 +2103,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
break;
}

+ if (hc.xmm_dirty)
+ kvm_hv_hypercall_write_xmm(&hc);
+
return kvm_hv_hypercall_complete(vcpu, ret);
}

--
2.17.1

Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

2021-04-13 07:02:55

by Wei Liu

[permalink] [raw]

Subject: Re: [PATCH v2 3/4] KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers

On Mon, Apr 12, 2021 at 07:00:16PM +0200, Siddharth Chandrasekaran wrote:
> +
> +static inline void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)

Do you really need inline here? The compiler should be smart enough to
inline this function if necessary.

> +{
> + int reg;
> +
> + kvm_fpu_get();
> + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
> + _kvm_read_sse_reg(reg, &hc->xmm[reg]);
> + kvm_fpu_put();
> + hc->xmm_dirty = false;

There is no code that sets xmm_dirty to true? What am I missing? I guess
that's because you haven't implemented the hypercalls that need writing
back to guest?

Wei.

2021-04-13 09:45:46

by Siddharth Chandrasekaran

[permalink] [raw]

Subject: Re: [PATCH v2 3/4] KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers

On Mon, Apr 12, 2021 at 08:13:19PM +0000, Wei Liu wrote:
> On Mon, Apr 12, 2021 at 07:00:16PM +0200, Siddharth Chandrasekaran wrote:
> > +
> > +static inline void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)
>
> Do you really need inline here? The compiler should be smart enough to
> inline this function if necessary.

Removed.

> > +{
> > + int reg;
> > +
> > + kvm_fpu_get();
> > + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
> > + _kvm_read_sse_reg(reg, &hc->xmm[reg]);
> > + kvm_fpu_put();
> > + hc->xmm_dirty = false;
>
> There is no code that sets xmm_dirty to true? What am I missing? I guess
> that's because you haven't implemented the hypercalls that need writing
> back to guest?

Yes, when a hypercall want to return data via XMM registers, it should
update hc->xmm[] and set hc->dirty (I plan on using this in a future
patch). The reason why I didn't differ this change to actual patch
needs it is that it pairs nicely with the read/write xmm_reg() calls in
kvm_hv_hypercall().

~ Sid.

Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

2021-04-13 18:30:49

by Vitaly Kuznetsov

[permalink] [raw]

Subject: Re: [PATCH v2 3/4] KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers

Siddharth Chandrasekaran <[email protected]> writes:

> Hyper-V supports the use of XMM registers to perform fast hypercalls.
> This allows guests to take advantage of the improved performance of the
> fast hypercall interface even though a hypercall may require more than
> (the current maximum of) two input registers.
>
> The XMM fast hypercall interface uses six additional XMM registers (XMM0
> to XMM5) to allow the guest to pass an input parameter block of up to
> 112 bytes. Hyper-V can also return data back to the guest in the
> remaining XMM registers that are not used by the current hypercall.
>
> Add framework to read/write to XMM registers in kvm_hv_hypercall() and
> use the additional hypercall inputs from XMM registers in
> kvm_hv_flush_tlb() when possible.
>
> Cc: Alexander Graf <[email protected]>
> Co-developed-by: Evgeny Iakovlev <[email protected]>
> Signed-off-by: Evgeny Iakovlev <[email protected]>
> Signed-off-by: Siddharth Chandrasekaran <[email protected]>
> ---
> arch/x86/kvm/hyperv.c | 109 ++++++++++++++++++++++++++++++++++--------
> 1 file changed, 90 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 8f6babd1ea0d..1f9959aba70d 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -36,6 +36,7 @@
>
> #include "trace.h"
> #include "irq.h"
> +#include "fpu.h"
>
> /* "Hv#1" signature */
> #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
> @@ -1623,6 +1624,8 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask(
> return vcpu_bitmap;
> }
>
> +#define KVM_HV_HYPERCALL_MAX_XMM_REGISTERS 6

Nitpick: this is not KVM-specific so could probably go to arch/x86/include/asm/hyperv-tlfs.h

> +
> struct kvm_hv_hcall {
> u64 param;
> u64 ingpa;
> @@ -1632,10 +1635,14 @@ struct kvm_hv_hcall {
> u16 rep_idx;
> bool fast;
> bool rep;
> + sse128_t xmm[KVM_HV_HYPERCALL_MAX_XMM_REGISTERS];
> + bool xmm_dirty;
> };
>
> static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex)
> {
> + int i, j;
> + gpa_t gpa;
> struct kvm *kvm = vcpu->kvm;
> struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> struct hv_tlb_flush_ex flush_ex;
> @@ -1649,8 +1656,15 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
> bool all_cpus;
>
> if (!ex) {
> - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, sizeof(flush))))
> - return HV_STATUS_INVALID_HYPERCALL_INPUT;
> + if (hc->fast) {
> + flush.address_space = hc->ingpa;
> + flush.flags = hc->outgpa;
> + flush.processor_mask = sse128_lo(hc->xmm[0]);
> + } else {
> + if (unlikely(kvm_read_guest(kvm, hc->ingpa,
> + &flush, sizeof(flush))))
> + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> + }
>
> trace_kvm_hv_flush_tlb(flush.processor_mask,
> flush.address_space, flush.flags);
> @@ -1668,9 +1682,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
> all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
> flush.processor_mask == 0;
> } else {
> - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
> - sizeof(flush_ex))))
> - return HV_STATUS_INVALID_HYPERCALL_INPUT;
> + if (hc->fast) {
> + flush_ex.address_space = hc->ingpa;
> + flush_ex.flags = hc->outgpa;
> + memcpy(&flush_ex.hv_vp_set,
> + &hc->xmm[0], sizeof(hc->xmm[0]));
> + } else {
> + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
> + sizeof(flush_ex))))
> + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> + }
>
> trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
> flush_ex.hv_vp_set.format,
> @@ -1681,20 +1702,29 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
> all_cpus = flush_ex.hv_vp_set.format !=
> HV_GENERIC_SET_SPARSE_4K;
>
> - sparse_banks_len =
> - bitmap_weight((unsigned long *)&valid_bank_mask, 64) *
> - sizeof(sparse_banks[0]);
> + sparse_banks_len = bitmap_weight((unsigned long *)&valid_bank_mask, 64);
>
> if (!sparse_banks_len && !all_cpus)
> goto ret_success;
>
> - if (!all_cpus &&
> - kvm_read_guest(kvm,
> - hc->ingpa + offsetof(struct hv_tlb_flush_ex,
> - hv_vp_set.bank_contents),
> - sparse_banks,
> - sparse_banks_len))
> - return HV_STATUS_INVALID_HYPERCALL_INPUT;
> + if (!all_cpus) {
> + if (hc->fast) {
> + if (sparse_banks_len > KVM_HV_HYPERCALL_MAX_XMM_REGISTERS - 1)
> + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> + for (i = 0, j = 1; i < sparse_banks_len; i += 2, j++) {

Nitpick: you don't really need 'j' here as 'j == i/2 + 1', right?

> + sparse_banks[i + 0] = sse128_lo(hc->xmm[j]);

Using ' + 0' for identation is ... unusual :-) I'm not opposed to it
here though.

> + sparse_banks[i + 1] = sse128_hi(hc->xmm[j]);
> + }
> + } else {
> + gpa = hc->ingpa;
> + gpa += offsetof(struct hv_tlb_flush_ex,
> + hv_vp_set.bank_contents);

Nitpick: if splitting these into two lines is only done to fit into 80
chars then I'd the requirement is no more so we can be a bit wider.

gpa = hc->ingpa + offsetof(...)

> + if (unlikely(kvm_read_guest(kvm, gpa, sparse_banks,
> + sparse_banks_len *
> + sizeof(sparse_banks[0]))))
> + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> + }
> + }
> }
>
> cpumask_clear(&hv_vcpu->tlb_flush);
> @@ -1890,6 +1920,41 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *h
> return HV_STATUS_SUCCESS;
> }
>
> +static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc)
> +{
> + switch (hc->code) {
> + case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
> + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
> + case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
> + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static inline void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)
> +{
> + int reg;
> +
> + kvm_fpu_get();
> + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
> + _kvm_read_sse_reg(reg, &hc->xmm[reg]);
> + kvm_fpu_put();
> + hc->xmm_dirty = false;
> +}
> +
> +static inline void kvm_hv_hypercall_write_xmm(struct kvm_hv_hcall *hc)
> +{
> + int reg;
> +
> + kvm_fpu_get();
> + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
> + _kvm_write_sse_reg(reg, &hc->xmm[reg]);
> + kvm_fpu_put();
> + hc->xmm_dirty = false;
> +}
> +
> int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> {
> struct kvm_hv_hcall hc;
> @@ -1926,6 +1991,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
> hc.rep = !!(hc.rep_cnt || hc.rep_idx);
>
> + if (hc.fast && is_xmm_fast_hypercall(&hc))
> + kvm_hv_hypercall_read_xmm(&hc);
> +
> trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
> hc.ingpa, hc.outgpa);
>
> @@ -1961,28 +2029,28 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> kvm_hv_hypercall_complete_userspace;
> return 0;
> case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
> - if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) {
> + if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
> ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
> break;
> }
> ret = kvm_hv_flush_tlb(vcpu, &hc, false);
> break;
> case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
> - if (unlikely(hc.fast || hc.rep)) {
> + if (unlikely(hc.rep)) {
> ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
> break;
> }
> ret = kvm_hv_flush_tlb(vcpu, &hc, false);
> break;
> case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
> - if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) {
> + if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
> ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
> break;
> }
> ret = kvm_hv_flush_tlb(vcpu, &hc, true);
> break;
> case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
> - if (unlikely(hc.fast || hc.rep)) {
> + if (unlikely(hc.rep)) {
> ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
> break;
> }
> @@ -2035,6 +2103,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> break;
> }
>
> + if (hc.xmm_dirty)
> + kvm_hv_hypercall_write_xmm(&hc);
> +

Wei already mention that but as 'xmm_dirty' is not being used in this
patch I'd suggest we move it out too.

> return kvm_hv_hypercall_complete(vcpu, ret);
> }

--
Vitaly

2021-04-14 08:06:22

by Siddharth Chandrasekaran

[permalink] [raw]

Subject: Re: [PATCH v2 3/4] KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers

On Tue, Apr 13, 2021 at 04:09:48PM +0200, Vitaly Kuznetsov wrote:
> Siddharth Chandrasekaran <[email protected]> writes:
> > Hyper-V supports the use of XMM registers to perform fast hypercalls.
> > This allows guests to take advantage of the improved performance of the
> > fast hypercall interface even though a hypercall may require more than
> > (the current maximum of) two input registers.
> >
> > The XMM fast hypercall interface uses six additional XMM registers (XMM0
> > to XMM5) to allow the guest to pass an input parameter block of up to
> > 112 bytes. Hyper-V can also return data back to the guest in the
> > remaining XMM registers that are not used by the current hypercall.
> >
> > Add framework to read/write to XMM registers in kvm_hv_hypercall() and
> > use the additional hypercall inputs from XMM registers in
> > kvm_hv_flush_tlb() when possible.
> >
> > Cc: Alexander Graf <[email protected]>
> > Co-developed-by: Evgeny Iakovlev <[email protected]>
> > Signed-off-by: Evgeny Iakovlev <[email protected]>
> > Signed-off-by: Siddharth Chandrasekaran <[email protected]>
> > ---
> > arch/x86/kvm/hyperv.c | 109 ++++++++++++++++++++++++++++++++++--------
> > 1 file changed, 90 insertions(+), 19 deletions(-)
> >
> > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> > index 8f6babd1ea0d..1f9959aba70d 100644
> > --- a/arch/x86/kvm/hyperv.c
> > +++ b/arch/x86/kvm/hyperv.c
> > @@ -36,6 +36,7 @@
> >
> > #include "trace.h"
> > #include "irq.h"
> > +#include "fpu.h"
> >
> > /* "Hv#1" signature */
> > #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
> > @@ -1623,6 +1624,8 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask(
> > return vcpu_bitmap;
> > }
> >
> > +#define KVM_HV_HYPERCALL_MAX_XMM_REGISTERS 6
>
> Nitpick: this is not KVM-specific so could probably go to arch/x86/include/asm/hyperv-tlfs.h

Ack.

> > +
> > struct kvm_hv_hcall {
> > u64 param;
> > u64 ingpa;
> > @@ -1632,10 +1635,14 @@ struct kvm_hv_hcall {
> > u16 rep_idx;
> > bool fast;
> > bool rep;
> > + sse128_t xmm[KVM_HV_HYPERCALL_MAX_XMM_REGISTERS];
> > + bool xmm_dirty;
> > };
> >
> > static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex)
> > {
> > + int i, j;
> > + gpa_t gpa;
> > struct kvm *kvm = vcpu->kvm;
> > struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> > struct hv_tlb_flush_ex flush_ex;
> > @@ -1649,8 +1656,15 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
> > bool all_cpus;
> >
> > if (!ex) {
> > - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, sizeof(flush))))
> > - return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + if (hc->fast) {
> > + flush.address_space = hc->ingpa;
> > + flush.flags = hc->outgpa;
> > + flush.processor_mask = sse128_lo(hc->xmm[0]);
> > + } else {
> > + if (unlikely(kvm_read_guest(kvm, hc->ingpa,
> > + &flush, sizeof(flush))))
> > + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + }
> >
> > trace_kvm_hv_flush_tlb(flush.processor_mask,
> > flush.address_space, flush.flags);
> > @@ -1668,9 +1682,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
> > all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
> > flush.processor_mask == 0;
> > } else {
> > - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
> > - sizeof(flush_ex))))
> > - return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + if (hc->fast) {
> > + flush_ex.address_space = hc->ingpa;
> > + flush_ex.flags = hc->outgpa;
> > + memcpy(&flush_ex.hv_vp_set,
> > + &hc->xmm[0], sizeof(hc->xmm[0]));
> > + } else {
> > + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
> > + sizeof(flush_ex))))
> > + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + }
> >
> > trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
> > flush_ex.hv_vp_set.format,
> > @@ -1681,20 +1702,29 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
> > all_cpus = flush_ex.hv_vp_set.format !=
> > HV_GENERIC_SET_SPARSE_4K;
> >
> > - sparse_banks_len =
> > - bitmap_weight((unsigned long *)&valid_bank_mask, 64) *
> > - sizeof(sparse_banks[0]);
> > + sparse_banks_len = bitmap_weight((unsigned long *)&valid_bank_mask, 64);
> >
> > if (!sparse_banks_len && !all_cpus)
> > goto ret_success;
> >
> > - if (!all_cpus &&
> > - kvm_read_guest(kvm,
> > - hc->ingpa + offsetof(struct hv_tlb_flush_ex,
> > - hv_vp_set.bank_contents),
> > - sparse_banks,
> > - sparse_banks_len))
> > - return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + if (!all_cpus) {
> > + if (hc->fast) {
> > + if (sparse_banks_len > KVM_HV_HYPERCALL_MAX_XMM_REGISTERS - 1)
> > + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + for (i = 0, j = 1; i < sparse_banks_len; i += 2, j++) {
>
> Nitpick: you don't really need 'j' here as 'j == i/2 + 1', right?

Yes, you are right. Changed.

> > + sparse_banks[i + 0] = sse128_lo(hc->xmm[j]);
>
> Using ' + 0' for identation is ... unusual :-) I'm not opposed to it
> here though.

Old habit :-).

> > + sparse_banks[i + 1] = sse128_hi(hc->xmm[j]);
> > + }
> > + } else {
> > + gpa = hc->ingpa;
> > + gpa += offsetof(struct hv_tlb_flush_ex,
> > + hv_vp_set.bank_contents);
>
> Nitpick: if splitting these into two lines is only done to fit into 80
> chars then I'd the requirement is no more so we can be a bit wider.
>
> gpa = hc->ingpa + offsetof(...)

Ack.

> > + if (unlikely(kvm_read_guest(kvm, gpa, sparse_banks,
> > + sparse_banks_len *
> > + sizeof(sparse_banks[0]))))
> > + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + }
> > + }
> > }
> >
> > cpumask_clear(&hv_vcpu->tlb_flush);
> > @@ -1890,6 +1920,41 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *h
> > return HV_STATUS_SUCCESS;
> > }
> >
> > +static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc)
> > +{
> > + switch (hc->code) {
> > + case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
> > + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
> > + case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
> > + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
> > + return true;
> > + }
> > +
> > + return false;
> > +}
> > +
> > +static inline void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)
> > +{
> > + int reg;
> > +
> > + kvm_fpu_get();
> > + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
> > + _kvm_read_sse_reg(reg, &hc->xmm[reg]);
> > + kvm_fpu_put();
> > + hc->xmm_dirty = false;
> > +}
> > +
> > +static inline void kvm_hv_hypercall_write_xmm(struct kvm_hv_hcall *hc)
> > +{
> > + int reg;
> > +
> > + kvm_fpu_get();
> > + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
> > + _kvm_write_sse_reg(reg, &hc->xmm[reg]);
> > + kvm_fpu_put();
> > + hc->xmm_dirty = false;
> > +}
> > +
> > int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> > {
> > struct kvm_hv_hcall hc;
> > @@ -1926,6 +1991,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> > hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
> > hc.rep = !!(hc.rep_cnt || hc.rep_idx);
> >
> > + if (hc.fast && is_xmm_fast_hypercall(&hc))
> > + kvm_hv_hypercall_read_xmm(&hc);
> > +
> > trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
> > hc.ingpa, hc.outgpa);
> >
> > @@ -1961,28 +2029,28 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> > kvm_hv_hypercall_complete_userspace;
> > return 0;
> > case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
> > - if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) {
> > + if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
> > ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
> > break;
> > }
> > ret = kvm_hv_flush_tlb(vcpu, &hc, false);
> > break;
> > case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
> > - if (unlikely(hc.fast || hc.rep)) {
> > + if (unlikely(hc.rep)) {
> > ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
> > break;
> > }
> > ret = kvm_hv_flush_tlb(vcpu, &hc, false);
> > break;
> > case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
> > - if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) {
> > + if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
> > ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
> > break;
> > }
> > ret = kvm_hv_flush_tlb(vcpu, &hc, true);
> > break;
> > case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
> > - if (unlikely(hc.fast || hc.rep)) {
> > + if (unlikely(hc.rep)) {
> > ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
> > break;
> > }
> > @@ -2035,6 +2103,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> > break;
> > }
> >
> > + if (hc.xmm_dirty)
> > + kvm_hv_hypercall_write_xmm(&hc);
> > +
>
> Wei already mention that but as 'xmm_dirty' is not being used in this
> patch I'd suggest we move it out too.

Okay, I should remove HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE for now then.

~ Sid.

Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879