LinuxLists.cc - [PATCH v2 0/2] Add vector-hashing support for lowest-priority interrupts delivery

[permalink] [raw]

Subject: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

Use vector-hashing to deliver lowest-priority interrupts, As an
example, modern Intel CPUs in server platform use this method to
handle lowest-priority interrupts.

Signed-off-by: Feng Wu <[email protected]>
---
arch/x86/kvm/irq_comm.c | 27 ++++++++++++++++++-----
arch/x86/kvm/lapic.c | 57 ++++++++++++++++++++++++++++++++++++++++---------
arch/x86/kvm/lapic.h | 2 ++
arch/x86/kvm/x86.c | 9 ++++++++
arch/x86/kvm/x86.h | 1 +
5 files changed, 81 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 84b96d3..c8c5f61 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -32,6 +32,7 @@
#include "ioapic.h"

#include "lapic.h"
+#include "x86.h"

static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
@@ -53,8 +54,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, unsigned long *dest_map)
{
- int i, r = -1;
+ int i, r = -1, idx = 0;
struct kvm_vcpu *vcpu, *lowest = NULL;
+ unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ unsigned int dest_vcpus = 0;

if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
kvm_lowest_prio_delivery(irq)) {
@@ -65,6 +68,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
return r;

+ memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!kvm_apic_present(vcpu))
continue;
@@ -78,13 +83,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
r = 0;
r += kvm_apic_set_irq(vcpu, irq, dest_map);
} else if (kvm_lapic_enabled(vcpu)) {
- if (!lowest)
- lowest = vcpu;
- else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
- lowest = vcpu;
+ if (!kvm_vector_hashing_enabled()) {
+ if (!lowest)
+ lowest = vcpu;
+ else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
+ lowest = vcpu;
+ } else {
+ __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
+ dest_vcpus++;
+ }
}
}

+ if (dest_vcpus != 0) {
+ idx = kvm_vector_2_index(irq->vector, dest_vcpus,
+ dest_vcpu_bitmap, KVM_MAX_VCPUS);
+
+ lowest = kvm_get_vcpu(kvm, idx - 1);
+ }
+
if (lowest)
r = kvm_apic_set_irq(lowest, irq, dest_map);

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ecd4ea1..e29001f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -678,6 +678,22 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
}
}

+int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
+ const unsigned long *bitmap, u32 bitmap_size)
+{
+ u32 mod;
+ int i, idx = 0;
+
+ mod = vector % dest_vcpus;
+
+ for (i = 0; i <= mod; i++) {
+ idx = find_next_bit(bitmap, bitmap_size, idx) + 1;
+ BUG_ON(idx > bitmap_size);
+ }
+
+ return idx;
+}
+
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
{
@@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
dst = map->logical_map[cid];

if (kvm_lowest_prio_delivery(irq)) {
- int l = -1;
- for_each_set_bit(i, &bitmap, 16) {
- if (!dst[i])
- continue;
- if (l < 0)
- l = i;
- else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
- l = i;
+ if (!kvm_vector_hashing_enabled()) {
+ int l = -1;
+ for_each_set_bit(i, &bitmap, 16) {
+ if (!dst[i])
+ continue;
+ if (l < 0)
+ l = i;
+ else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
+ l = i;
+ }
+ bitmap = (l >= 0) ? 1 << l : 0;
+ } else {
+ int idx = 0;
+ unsigned int dest_vcpus = 0;
+
+ for_each_set_bit(i, &bitmap, 16) {
+ if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
+ __clear_bit(i, &bitmap);
+ continue;
+ }
+ }
+
+ dest_vcpus = hweight16(bitmap);
+
+ if (dest_vcpus != 0) {
+ idx = kvm_vector_2_index(irq->vector,
+ dest_vcpus, &bitmap, 16);
+
+ bitmap = 0;
+ __set_bit(idx-1, &bitmap);
+ }
}
-
- bitmap = (l >= 0) ? 1 << l : 0;
}
}

diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index fde8e35d..6890ef0 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -170,4 +170,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu);

bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
+int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
+ const unsigned long *bitmap, u32 bitmap_size);
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4a6eff1..fb47730 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -118,6 +118,9 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
unsigned int lapic_timer_advance_ns = 0;
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);

+bool __read_mostly enable_vector_hashing = 1;
+module_param(enable_vector_hashing, bool, S_IRUGO);
+
static bool backwards_tsc_observed = false;

#define KVM_NR_SHARED_MSRS 16
@@ -8165,6 +8168,12 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
}

+bool kvm_vector_hashing_enabled(void)
+{
+ return enable_vector_hashing;
+}
+EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index f2afa5f..04bd0f9 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -179,6 +179,7 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
int page_num);
+bool kvm_vector_hashing_enabled(void);

#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
--
2.1.0

2015-12-16 01:55:48

[permalink] [raw]

Subject: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

Use vector-hashing to deliver lowest-priority interrupts for
VT-d posted-interrupts.

Signed-off-by: Feng Wu <[email protected]>
---
arch/x86/kvm/lapic.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/lapic.h | 2 ++
arch/x86/kvm/vmx.c | 12 ++++++++--
3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e29001f..d4f2c8f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -854,6 +854,73 @@ out:
}

/*
+ * This routine handles lowest-priority interrupts using vector-hashing
+ * mechanism. As an example, modern Intel CPUs use this method to handle
+ * lowest-priority interrupts.
+ *
+ * Here is the details about the vector-hashing mechanism:
+ * 1. For lowest-priority interrupts, store all the possible destination
+ * vCPUs in an array.
+ * 2. Use "guest vector % max number of destination vCPUs" to find the right
+ * destination vCPU in the array for the lowest-priority interrupt.
+ */
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+ struct kvm_lapic_irq *irq)
+{
+ struct kvm_apic_map *map;
+ struct kvm_vcpu *vcpu = NULL;
+
+ if (irq->shorthand)
+ return NULL;
+
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
+
+ if (!map)
+ goto out;
+
+ if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
+ kvm_lowest_prio_delivery(irq)) {
+ u16 cid;
+ int i, idx = 0;
+ unsigned long bitmap = 1;
+ unsigned int dest_vcpus = 0;
+ struct kvm_lapic **dst = NULL;
+
+
+ if (!kvm_apic_logical_map_valid(map))
+ goto out;
+
+ apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
+
+ if (cid >= ARRAY_SIZE(map->logical_map))
+ goto out;
+
+ dst = map->logical_map[cid];
+
+ for_each_set_bit(i, &bitmap, 16) {
+ if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
+ clear_bit(i, &bitmap);
+ continue;
+ }
+ }
+
+ dest_vcpus = hweight16(bitmap);
+
+ if (dest_vcpus != 0) {
+ idx = kvm_vector_2_index(irq->vector, dest_vcpus,
+ &bitmap, 16);
+ vcpu = dst[idx-1]->vcpu;
+ }
+ }
+
+out:
+ rcu_read_unlock();
+ return vcpu;
+}
+EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
+
+/*
* Add a pending IRQ into lapic.
* Return 1 if successfully added and 0 if discarded.
*/
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6890ef0..52bffce 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
const unsigned long *bitmap, u32 bitmap_size);
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+ struct kvm_lapic_irq *irq);
#endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5eb56ed..3f89189 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
*/

kvm_set_msi_irq(e, &irq);
- if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
- continue;
+
+ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+ if (!kvm_vector_hashing_enabled() ||
+ irq.delivery_mode != APIC_DM_LOWEST)
+ continue;
+
+ vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
+ if (!vcpu)
+ continue;
+ }

vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
vcpu_info.vector = irq.vector;
--
2.1.0

2015-12-21 01:46:37

[permalink] [raw]

Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

On 2015/12/16 9:37, Feng Wu wrote:
> Use vector-hashing to deliver lowest-priority interrupts, As an
> example, modern Intel CPUs in server platform use this method to
> handle lowest-priority interrupts.
>
> Signed-off-by: Feng Wu <[email protected]>
> ---
> arch/x86/kvm/irq_comm.c | 27 ++++++++++++++++++-----
> arch/x86/kvm/lapic.c | 57 ++++++++++++++++++++++++++++++++++++++++---------
> arch/x86/kvm/lapic.h | 2 ++
> arch/x86/kvm/x86.c | 9 ++++++++
> arch/x86/kvm/x86.h | 1 +
> 5 files changed, 81 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 84b96d3..c8c5f61 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -32,6 +32,7 @@
> #include "ioapic.h"
>
> #include "lapic.h"
> +#include "x86.h"
>
> static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
> struct kvm *kvm, int irq_source_id, int level,
> @@ -53,8 +54,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
> int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
> struct kvm_lapic_irq *irq, unsigned long *dest_map)
> {
> - int i, r = -1;
> + int i, r = -1, idx = 0;
> struct kvm_vcpu *vcpu, *lowest = NULL;
> + unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
> + unsigned int dest_vcpus = 0;
>
> if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
> kvm_lowest_prio_delivery(irq)) {
> @@ -65,6 +68,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
> if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
> return r;
>
> + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
> +
> kvm_for_each_vcpu(i, vcpu, kvm) {
> if (!kvm_apic_present(vcpu))
> continue;
> @@ -78,13 +83,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
> r = 0;
> r += kvm_apic_set_irq(vcpu, irq, dest_map);
> } else if (kvm_lapic_enabled(vcpu)) {
> - if (!lowest)
> - lowest = vcpu;
> - else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
> - lowest = vcpu;
> + if (!kvm_vector_hashing_enabled()) {
> + if (!lowest)
> + lowest = vcpu;
> + else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
> + lowest = vcpu;
> + } else {
> + __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
> + dest_vcpus++;
> + }
> }
> }
>
> + if (dest_vcpus != 0) {
> + idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> + dest_vcpu_bitmap, KVM_MAX_VCPUS);
> +
> + lowest = kvm_get_vcpu(kvm, idx - 1);
> + }
> +
> if (lowest)
> r = kvm_apic_set_irq(lowest, irq, dest_map);
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index ecd4ea1..e29001f 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -678,6 +678,22 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
> }
> }
>
> +int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
> + const unsigned long *bitmap, u32 bitmap_size)
> +{
> + u32 mod;
> + int i, idx = 0;
> +
> + mod = vector % dest_vcpus;
> +
> + for (i = 0; i <= mod; i++) {
> + idx = find_next_bit(bitmap, bitmap_size, idx) + 1;
> + BUG_ON(idx > bitmap_size);
> + }
> +
> + return idx;
> +}
> +
> bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
> {
> @@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> dst = map->logical_map[cid];
>
> if (kvm_lowest_prio_delivery(irq)) {
> - int l = -1;
> - for_each_set_bit(i, &bitmap, 16) {
> - if (!dst[i])
> - continue;
> - if (l < 0)
> - l = i;
> - else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
> - l = i;
> + if (!kvm_vector_hashing_enabled()) {
> + int l = -1;
> + for_each_set_bit(i, &bitmap, 16) {
> + if (!dst[i])
> + continue;
> + if (l < 0)
> + l = i;
> + else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
> + l = i;
> + }
> + bitmap = (l >= 0) ? 1 << l : 0;
> + } else {
> + int idx = 0;
> + unsigned int dest_vcpus = 0;
> +
> + for_each_set_bit(i, &bitmap, 16) {
> + if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {

It should be or(||) not and (&&).

> + __clear_bit(i, &bitmap);
> + continue;
> + }
> + }
> +
> + dest_vcpus = hweight16(bitmap);
> +
> + if (dest_vcpus != 0) {
> + idx = kvm_vector_2_index(irq->vector,
> + dest_vcpus, &bitmap, 16);
> +
> + bitmap = 0;
> + __set_bit(idx-1, &bitmap);
> + }
> }
> -
> - bitmap = (l >= 0) ? 1 << l : 0;
> }
> }
>
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index fde8e35d..6890ef0 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -170,4 +170,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu);
>
> bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
> struct kvm_vcpu **dest_vcpu);
> +int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
> + const unsigned long *bitmap, u32 bitmap_size);
> #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4a6eff1..fb47730 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -118,6 +118,9 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
> unsigned int lapic_timer_advance_ns = 0;
> module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
>
> +bool __read_mostly enable_vector_hashing = 1;
> +module_param(enable_vector_hashing, bool, S_IRUGO);
> +
> static bool backwards_tsc_observed = false;
>
> #define KVM_NR_SHARED_MSRS 16
> @@ -8165,6 +8168,12 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
> return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
> }
>
> +bool kvm_vector_hashing_enabled(void)
> +{
> + return enable_vector_hashing;
> +}
> +EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
> +
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index f2afa5f..04bd0f9 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -179,6 +179,7 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
> int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
> bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
> int page_num);
> +bool kvm_vector_hashing_enabled(void);
>
> #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
> | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
>

--
best regards
yang

2015-12-21 01:50:29

[permalink] [raw]

Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

On 2015/12/16 9:37, Feng Wu wrote:
> Use vector-hashing to deliver lowest-priority interrupts for
> VT-d posted-interrupts.
>
> Signed-off-by: Feng Wu <[email protected]>
> ---
> arch/x86/kvm/lapic.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> arch/x86/kvm/lapic.h | 2 ++
> arch/x86/kvm/vmx.c | 12 ++++++++--
> 3 files changed, 79 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index e29001f..d4f2c8f 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -854,6 +854,73 @@ out:
> }
>
> /*
> + * This routine handles lowest-priority interrupts using vector-hashing
> + * mechanism. As an example, modern Intel CPUs use this method to handle
> + * lowest-priority interrupts.
> + *
> + * Here is the details about the vector-hashing mechanism:
> + * 1. For lowest-priority interrupts, store all the possible destination
> + * vCPUs in an array.
> + * 2. Use "guest vector % max number of destination vCPUs" to find the right
> + * destination vCPU in the array for the lowest-priority interrupt.
> + */
> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> + struct kvm_lapic_irq *irq)
> +{
> + struct kvm_apic_map *map;
> + struct kvm_vcpu *vcpu = NULL;
> +
> + if (irq->shorthand)
> + return NULL;
> +
> + rcu_read_lock();
> + map = rcu_dereference(kvm->arch.apic_map);
> +
> + if (!map)
> + goto out;
> +
> + if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
> + kvm_lowest_prio_delivery(irq)) {
> + u16 cid;
> + int i, idx = 0;
> + unsigned long bitmap = 1;
> + unsigned int dest_vcpus = 0;
> + struct kvm_lapic **dst = NULL;
> +
> +
> + if (!kvm_apic_logical_map_valid(map))
> + goto out;
> +
> + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
> +
> + if (cid >= ARRAY_SIZE(map->logical_map))
> + goto out;
> +
> + dst = map->logical_map[cid];
> +
> + for_each_set_bit(i, &bitmap, 16) {
> + if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
> + clear_bit(i, &bitmap);
> + continue;
> + }
> + }
> +
> + dest_vcpus = hweight16(bitmap);
> +
> + if (dest_vcpus != 0) {
> + idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> + &bitmap, 16);
> + vcpu = dst[idx-1]->vcpu;
> + }
> + }
> +
> +out:
> + rcu_read_unlock();
> + return vcpu;
> +}
> +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
> +
> +/*
> * Add a pending IRQ into lapic.
> * Return 1 if successfully added and 0 if discarded.
> */
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 6890ef0..52bffce 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
> struct kvm_vcpu **dest_vcpu);
> int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
> const unsigned long *bitmap, u32 bitmap_size);
> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> + struct kvm_lapic_irq *irq);
> #endif
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 5eb56ed..3f89189 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
> */
>
> kvm_set_msi_irq(e, &irq);
> - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
> - continue;
> +
> + if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
> + if (!kvm_vector_hashing_enabled() ||
> + irq.delivery_mode != APIC_DM_LOWEST)
> + continue;
> +
> + vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
> + if (!vcpu)
> + continue;
> + }

I am a little confused with the 'continue'. If the destination is not
single vcpu, shouldn't we rollback to use non-PI mode?

>
> vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
> vcpu_info.vector = irq.vector;
>

--
best regards
yang

2015-12-21 01:51:00

[permalink] [raw]

Subject: RE: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

> -----Original Message-----
> From: Yang Zhang [mailto:[email protected]]
> Sent: Monday, December 21, 2015 9:46 AM
> To: Wu, Feng <[email protected]>; [email protected];
> [email protected]
> Cc: [email protected]; [email protected]
> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
>
> On 2015/12/16 9:37, Feng Wu wrote:
> > Use vector-hashing to deliver lowest-priority interrupts, As an
> > example, modern Intel CPUs in server platform use this method to
> > handle lowest-priority interrupts.
> >
> > Signed-off-by: Feng Wu <[email protected]>
> > ---
> > arch/x86/kvm/irq_comm.c | 27 ++++++++++++++++++-----
> > arch/x86/kvm/lapic.c | 57
> ++++++++++++++++++++++++++++++++++++++++---------
> > arch/x86/kvm/lapic.h | 2 ++
> > arch/x86/kvm/x86.c | 9 ++++++++
> > arch/x86/kvm/x86.h | 1 +
> > 5 files changed, 81 insertions(+), 15 deletions(-)
> >
> > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> > index 84b96d3..c8c5f61 100644
> > --- a/arch/x86/kvm/irq_comm.c
> > +++ b/arch/x86/kvm/irq_comm.c
> > @@ -32,6 +32,7 @@
> > #include "ioapic.h"
> >
> > #include "lapic.h"
> > +#include "x86.h"
> >
> > static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
> > struct kvm *kvm, int irq_source_id, int level,
> > @@ -53,8 +54,10 @@ static int kvm_set_ioapic_irq(struct
> kvm_kernel_irq_routing_entry *e,
> > int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
> > struct kvm_lapic_irq *irq, unsigned long *dest_map)
> > {
> > - int i, r = -1;
> > + int i, r = -1, idx = 0;
> > struct kvm_vcpu *vcpu, *lowest = NULL;
> > + unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
> > + unsigned int dest_vcpus = 0;
> >
> > if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
> > kvm_lowest_prio_delivery(irq)) {
> > @@ -65,6 +68,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct
> kvm_lapic *src,
> > if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
> > return r;
> >
> > + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
> > +
> > kvm_for_each_vcpu(i, vcpu, kvm) {
> > if (!kvm_apic_present(vcpu))
> > continue;
> > @@ -78,13 +83,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct
> kvm_lapic *src,
> > r = 0;
> > r += kvm_apic_set_irq(vcpu, irq, dest_map);
> > } else if (kvm_lapic_enabled(vcpu)) {
> > - if (!lowest)
> > - lowest = vcpu;
> > - else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
> > - lowest = vcpu;
> > + if (!kvm_vector_hashing_enabled()) {
> > + if (!lowest)
> > + lowest = vcpu;
> > + else if (kvm_apic_compare_prio(vcpu, lowest) <
> 0)
> > + lowest = vcpu;
> > + } else {
> > + __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
> > + dest_vcpus++;
> > + }
> > }
> > }
> >
> > + if (dest_vcpus != 0) {
> > + idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> > + dest_vcpu_bitmap, KVM_MAX_VCPUS);
> > +
> > + lowest = kvm_get_vcpu(kvm, idx - 1);
> > + }
> > +
> > if (lowest)
> > r = kvm_apic_set_irq(lowest, irq, dest_map);
> >
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index ecd4ea1..e29001f 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -678,6 +678,22 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu,
> struct kvm_lapic *source,
> > }
> > }
> >
> > +int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
> > + const unsigned long *bitmap, u32 bitmap_size)
> > +{
> > + u32 mod;
> > + int i, idx = 0;
> > +
> > + mod = vector % dest_vcpus;
> > +
> > + for (i = 0; i <= mod; i++) {
> > + idx = find_next_bit(bitmap, bitmap_size, idx) + 1;
> > + BUG_ON(idx > bitmap_size);
> > + }
> > +
> > + return idx;
> > +}
> > +
> > bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> > struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
> > {
> > @@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
> *kvm, struct kvm_lapic *src,
> > dst = map->logical_map[cid];
> >
> > if (kvm_lowest_prio_delivery(irq)) {
> > - int l = -1;
> > - for_each_set_bit(i, &bitmap, 16) {
> > - if (!dst[i])
> > - continue;
> > - if (l < 0)
> > - l = i;
> > - else if (kvm_apic_compare_prio(dst[i]->vcpu,
> dst[l]->vcpu) < 0)
> > - l = i;
> > + if (!kvm_vector_hashing_enabled()) {
> > + int l = -1;
> > + for_each_set_bit(i, &bitmap, 16) {
> > + if (!dst[i])
> > + continue;
> > + if (l < 0)
> > + l = i;
> > + else if (kvm_apic_compare_prio(dst[i]-
> >vcpu, dst[l]->vcpu) < 0)
> > + l = i;
> > + }
> > + bitmap = (l >= 0) ? 1 << l : 0;
> > + } else {
> > + int idx = 0;
> > + unsigned int dest_vcpus = 0;
> > +
> > + for_each_set_bit(i, &bitmap, 16) {
> > + if (!dst[i]
> && !kvm_lapic_enabled(dst[i]->vcpu)) {
>
> It should be or(||) not and (&&).

Oh, you are right! My negligence! Thanks for pointing this out, Yang!

Thanks,
Feng

2015-12-21 01:55:47

[permalink] [raw]

Subject: RE: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

> -----Original Message-----
> From: [email protected] [mailto:linux-kernel-
> [email protected]] On Behalf Of Yang Zhang
> Sent: Monday, December 21, 2015 9:50 AM
> To: Wu, Feng <[email protected]>; [email protected];
> [email protected]
> Cc: [email protected]; [email protected]
> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
> posted-interrupts
>
> On 2015/12/16 9:37, Feng Wu wrote:
> > Use vector-hashing to deliver lowest-priority interrupts for
> > VT-d posted-interrupts.
> >
> > Signed-off-by: Feng Wu <[email protected]>
> > ---
> > arch/x86/kvm/lapic.c | 67
> ++++++++++++++++++++++++++++++++++++++++++++++++++++
> > arch/x86/kvm/lapic.h | 2 ++
> > arch/x86/kvm/vmx.c | 12 ++++++++--
> > 3 files changed, 79 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index e29001f..d4f2c8f 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -854,6 +854,73 @@ out:
> > }
> >
> > /*
> > + * This routine handles lowest-priority interrupts using vector-hashing
> > + * mechanism. As an example, modern Intel CPUs use this method to handle
> > + * lowest-priority interrupts.
> > + *
> > + * Here is the details about the vector-hashing mechanism:
> > + * 1. For lowest-priority interrupts, store all the possible destination
> > + * vCPUs in an array.
> > + * 2. Use "guest vector % max number of destination vCPUs" to find the right
> > + * destination vCPU in the array for the lowest-priority interrupt.
> > + */
> > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> > + struct kvm_lapic_irq *irq)
> > +{
> > + struct kvm_apic_map *map;
> > + struct kvm_vcpu *vcpu = NULL;
> > +
> > + if (irq->shorthand)
> > + return NULL;
> > +
> > + rcu_read_lock();
> > + map = rcu_dereference(kvm->arch.apic_map);
> > +
> > + if (!map)
> > + goto out;
> > +
> > + if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
> > + kvm_lowest_prio_delivery(irq)) {
> > + u16 cid;
> > + int i, idx = 0;
> > + unsigned long bitmap = 1;
> > + unsigned int dest_vcpus = 0;
> > + struct kvm_lapic **dst = NULL;
> > +
> > +
> > + if (!kvm_apic_logical_map_valid(map))
> > + goto out;
> > +
> > + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
> > +
> > + if (cid >= ARRAY_SIZE(map->logical_map))
> > + goto out;
> > +
> > + dst = map->logical_map[cid];
> > +
> > + for_each_set_bit(i, &bitmap, 16) {
> > + if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
> > + clear_bit(i, &bitmap);
> > + continue;
> > + }
> > + }
> > +
> > + dest_vcpus = hweight16(bitmap);
> > +
> > + if (dest_vcpus != 0) {
> > + idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> > + &bitmap, 16);
> > + vcpu = dst[idx-1]->vcpu;
> > + }
> > + }
> > +
> > +out:
> > + rcu_read_unlock();
> > + return vcpu;
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
> > +
> > +/*
> > * Add a pending IRQ into lapic.
> > * Return 1 if successfully added and 0 if discarded.
> > */
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index 6890ef0..52bffce 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
> struct kvm_lapic_irq *irq,
> > struct kvm_vcpu **dest_vcpu);
> > int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
> > const unsigned long *bitmap, u32 bitmap_size);
> > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> > + struct kvm_lapic_irq *irq);
> > #endif
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 5eb56ed..3f89189 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm *kvm,
> unsigned int host_irq,
> > */
> >
> > kvm_set_msi_irq(e, &irq);
> > - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
> > - continue;
> > +
> > + if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
> > + if (!kvm_vector_hashing_enabled() ||
> > + irq.delivery_mode !=
> APIC_DM_LOWEST)
> > + continue;
> > +
> > + vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
> > + if (!vcpu)
> > + continue;
> > + }
>
> I am a little confused with the 'continue'. If the destination is not
> single vcpu, shouldn't we rollback to use non-PI mode?

Here is the logic:
- If it is single destination, we will use PI no matter it is fixed or lowest-priority.
- If it is not single destination:
a) It is fixed, we will use non-PI
b) It is lowest-priority and vector-hashing is enabled, we will use PI
c) otherwise, use non-PI

Thanks,
Feng

>
> >
> > vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
> > vcpu_info.vector = irq.vector;
> >
>
>
> --
> best regards
> yang
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2015-12-21 02:01:23

[permalink] [raw]

Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

On 2015/12/21 9:55, Wu, Feng wrote:
>
>
>> -----Original Message-----
>> From: [email protected] [mailto:linux-kernel-
>> [email protected]] On Behalf Of Yang Zhang
>> Sent: Monday, December 21, 2015 9:50 AM
>> To: Wu, Feng <[email protected]>; [email protected];
>> [email protected]
>> Cc: [email protected]; [email protected]
>> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
>> posted-interrupts
>>
>> On 2015/12/16 9:37, Feng Wu wrote:
>>> Use vector-hashing to deliver lowest-priority interrupts for
>>> VT-d posted-interrupts.
>>>
>>> Signed-off-by: Feng Wu <[email protected]>
>>> ---
>>> arch/x86/kvm/lapic.c | 67
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>> arch/x86/kvm/lapic.h | 2 ++
>>> arch/x86/kvm/vmx.c | 12 ++++++++--
>>> 3 files changed, 79 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>> index e29001f..d4f2c8f 100644
>>> --- a/arch/x86/kvm/lapic.c
>>> +++ b/arch/x86/kvm/lapic.c
>>> @@ -854,6 +854,73 @@ out:
>>> }
>>>
>>> /*
>>> + * This routine handles lowest-priority interrupts using vector-hashing
>>> + * mechanism. As an example, modern Intel CPUs use this method to handle
>>> + * lowest-priority interrupts.
>>> + *
>>> + * Here is the details about the vector-hashing mechanism:
>>> + * 1. For lowest-priority interrupts, store all the possible destination
>>> + * vCPUs in an array.
>>> + * 2. Use "guest vector % max number of destination vCPUs" to find the right
>>> + * destination vCPU in the array for the lowest-priority interrupt.
>>> + */
>>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
>>> + struct kvm_lapic_irq *irq)
>>> +{
>>> + struct kvm_apic_map *map;
>>> + struct kvm_vcpu *vcpu = NULL;
>>> +
>>> + if (irq->shorthand)
>>> + return NULL;
>>> +
>>> + rcu_read_lock();
>>> + map = rcu_dereference(kvm->arch.apic_map);
>>> +
>>> + if (!map)
>>> + goto out;
>>> +
>>> + if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
>>> + kvm_lowest_prio_delivery(irq)) {
>>> + u16 cid;
>>> + int i, idx = 0;
>>> + unsigned long bitmap = 1;
>>> + unsigned int dest_vcpus = 0;
>>> + struct kvm_lapic **dst = NULL;
>>> +
>>> +
>>> + if (!kvm_apic_logical_map_valid(map))
>>> + goto out;
>>> +
>>> + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
>>> +
>>> + if (cid >= ARRAY_SIZE(map->logical_map))
>>> + goto out;
>>> +
>>> + dst = map->logical_map[cid];
>>> +
>>> + for_each_set_bit(i, &bitmap, 16) {
>>> + if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
>>> + clear_bit(i, &bitmap);
>>> + continue;
>>> + }
>>> + }
>>> +
>>> + dest_vcpus = hweight16(bitmap);
>>> +
>>> + if (dest_vcpus != 0) {
>>> + idx = kvm_vector_2_index(irq->vector, dest_vcpus,
>>> + &bitmap, 16);
>>> + vcpu = dst[idx-1]->vcpu;
>>> + }
>>> + }
>>> +
>>> +out:
>>> + rcu_read_unlock();
>>> + return vcpu;
>>> +}
>>> +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
>>> +
>>> +/*
>>> * Add a pending IRQ into lapic.
>>> * Return 1 if successfully added and 0 if discarded.
>>> */
>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>> index 6890ef0..52bffce 100644
>>> --- a/arch/x86/kvm/lapic.h
>>> +++ b/arch/x86/kvm/lapic.h
>>> @@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
>> struct kvm_lapic_irq *irq,
>>> struct kvm_vcpu **dest_vcpu);
>>> int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
>>> const unsigned long *bitmap, u32 bitmap_size);
>>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
>>> + struct kvm_lapic_irq *irq);
>>> #endif
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index 5eb56ed..3f89189 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm *kvm,
>> unsigned int host_irq,
>>> */
>>>
>>> kvm_set_msi_irq(e, &irq);
>>> - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
>>> - continue;
>>> +
>>> + if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
>>> + if (!kvm_vector_hashing_enabled() ||
>>> + irq.delivery_mode !=
>> APIC_DM_LOWEST)
>>> + continue;
>>> +
>>> + vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
>>> + if (!vcpu)
>>> + continue;
>>> + }
>>
>> I am a little confused with the 'continue'. If the destination is not
>> single vcpu, shouldn't we rollback to use non-PI mode?
>
> Here is the logic:
> - If it is single destination, we will use PI no matter it is fixed or lowest-priority.
> - If it is not single destination:
> a) It is fixed, we will use non-PI
> b) It is lowest-priority and vector-hashing is enabled, we will use PI
> c) otherwise, use non-PI

If it is single destination previously, then change to no-single mode.
Can current code cover this case?

>
> Thanks,
> Feng
>
>>
>>>
>>> vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
>>> vcpu_info.vector = irq.vector;
>>>
>>
>>
>> --
>> best regards
>> yang
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at http://www.tux.org/lkml/

--
best regards
yang

2015-12-21 02:06:28

[permalink] [raw]

Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

On 2015/12/21 9:50, Wu, Feng wrote:
>
>
>> -----Original Message-----
>> From: Yang Zhang [mailto:[email protected]]
>> Sent: Monday, December 21, 2015 9:46 AM
>> To: Wu, Feng <[email protected]>; [email protected];
>> [email protected]
>> Cc: [email protected]; [email protected]
>> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
>> priority interrupts
>>
>> On 2015/12/16 9:37, Feng Wu wrote:
>>> Use vector-hashing to deliver lowest-priority interrupts, As an
>>> example, modern Intel CPUs in server platform use this method to
>>> handle lowest-priority interrupts.
>>>
>>> Signed-off-by: Feng Wu <[email protected]>
>>> ---
>>> arch/x86/kvm/irq_comm.c | 27 ++++++++++++++++++-----
>>> arch/x86/kvm/lapic.c | 57
>> ++++++++++++++++++++++++++++++++++++++++---------
>>> arch/x86/kvm/lapic.h | 2 ++
>>> arch/x86/kvm/x86.c | 9 ++++++++
>>> arch/x86/kvm/x86.h | 1 +
>>> 5 files changed, 81 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
>>> index 84b96d3..c8c5f61 100644
>>> --- a/arch/x86/kvm/irq_comm.c
>>> +++ b/arch/x86/kvm/irq_comm.c
>>> @@ -32,6 +32,7 @@
>>> #include "ioapic.h"
>>>
>>> #include "lapic.h"
>>> +#include "x86.h"
>>>
>>> static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
>>> struct kvm *kvm, int irq_source_id, int level,
>>> @@ -53,8 +54,10 @@ static int kvm_set_ioapic_irq(struct
>> kvm_kernel_irq_routing_entry *e,
>>> int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>>> struct kvm_lapic_irq *irq, unsigned long *dest_map)
>>> {
>>> - int i, r = -1;
>>> + int i, r = -1, idx = 0;
>>> struct kvm_vcpu *vcpu, *lowest = NULL;
>>> + unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
>>> + unsigned int dest_vcpus = 0;
>>>
>>> if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
>>> kvm_lowest_prio_delivery(irq)) {
>>> @@ -65,6 +68,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct
>> kvm_lapic *src,
>>> if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
>>> return r;
>>>
>>> + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
>>> +
>>> kvm_for_each_vcpu(i, vcpu, kvm) {
>>> if (!kvm_apic_present(vcpu))
>>> continue;
>>> @@ -78,13 +83,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct
>> kvm_lapic *src,
>>> r = 0;
>>> r += kvm_apic_set_irq(vcpu, irq, dest_map);
>>> } else if (kvm_lapic_enabled(vcpu)) {
>>> - if (!lowest)
>>> - lowest = vcpu;
>>> - else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
>>> - lowest = vcpu;
>>> + if (!kvm_vector_hashing_enabled()) {
>>> + if (!lowest)
>>> + lowest = vcpu;
>>> + else if (kvm_apic_compare_prio(vcpu, lowest) <
>> 0)
>>> + lowest = vcpu;
>>> + } else {
>>> + __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
>>> + dest_vcpus++;
>>> + }
>>> }
>>> }
>>>
>>> + if (dest_vcpus != 0) {
>>> + idx = kvm_vector_2_index(irq->vector, dest_vcpus,
>>> + dest_vcpu_bitmap, KVM_MAX_VCPUS);
>>> +
>>> + lowest = kvm_get_vcpu(kvm, idx - 1);
>>> + }
>>> +
>>> if (lowest)
>>> r = kvm_apic_set_irq(lowest, irq, dest_map);
>>>
>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>> index ecd4ea1..e29001f 100644
>>> --- a/arch/x86/kvm/lapic.c
>>> +++ b/arch/x86/kvm/lapic.c
>>> @@ -678,6 +678,22 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu,
>> struct kvm_lapic *source,
>>> }
>>> }
>>>
>>> +int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
>>> + const unsigned long *bitmap, u32 bitmap_size)
>>> +{
>>> + u32 mod;
>>> + int i, idx = 0;
>>> +
>>> + mod = vector % dest_vcpus;
>>> +
>>> + for (i = 0; i <= mod; i++) {
>>> + idx = find_next_bit(bitmap, bitmap_size, idx) + 1;
>>> + BUG_ON(idx > bitmap_size);
>>> + }
>>> +
>>> + return idx;
>>> +}
>>> +
>>> bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
>>> struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
>>> {
>>> @@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
>> *kvm, struct kvm_lapic *src,
>>> dst = map->logical_map[cid];
>>>
>>> if (kvm_lowest_prio_delivery(irq)) {
>>> - int l = -1;
>>> - for_each_set_bit(i, &bitmap, 16) {
>>> - if (!dst[i])
>>> - continue;
>>> - if (l < 0)
>>> - l = i;
>>> - else if (kvm_apic_compare_prio(dst[i]->vcpu,
>> dst[l]->vcpu) < 0)
>>> - l = i;
>>> + if (!kvm_vector_hashing_enabled()) {
>>> + int l = -1;
>>> + for_each_set_bit(i, &bitmap, 16) {
>>> + if (!dst[i])
>>> + continue;
>>> + if (l < 0)
>>> + l = i;
>>> + else if (kvm_apic_compare_prio(dst[i]-
>>> vcpu, dst[l]->vcpu) < 0)
>>> + l = i;
>>> + }
>>> + bitmap = (l >= 0) ? 1 << l : 0;
>>> + } else {
>>> + int idx = 0;
>>> + unsigned int dest_vcpus = 0;
>>> +
>>> + for_each_set_bit(i, &bitmap, 16) {
>>> + if (!dst[i]
>> && !kvm_lapic_enabled(dst[i]->vcpu)) {
>>
>> It should be or(||) not and (&&).
>
> Oh, you are right! My negligence! Thanks for pointing this out, Yang!

btw, i think the kvm_lapic_enabled check is wrong here? Why need it here?

>
> Thanks,
> Feng
>

--
best regards
yang

2015-12-22 04:36:38

[permalink] [raw]

Subject: RE: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

> -----Original Message-----
> From: Yang Zhang [mailto:[email protected]]
> Sent: Monday, December 21, 2015 10:01 AM
> To: Wu, Feng <[email protected]>; [email protected];
> [email protected]
> Cc: [email protected]; [email protected]
> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
> posted-interrupts
>
> On 2015/12/21 9:55, Wu, Feng wrote:
> >
> >
> >> -----Original Message-----
> >> From: [email protected] [mailto:linux-kernel-
> >> [email protected]] On Behalf Of Yang Zhang
> >> Sent: Monday, December 21, 2015 9:50 AM
> >> To: Wu, Feng <[email protected]>; [email protected];
> >> [email protected]
> >> Cc: [email protected]; [email protected]
> >> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
> >> posted-interrupts
> >>
> >> On 2015/12/16 9:37, Feng Wu wrote:
> >>> Use vector-hashing to deliver lowest-priority interrupts for
> >>> VT-d posted-interrupts.
> >>>
> >>> Signed-off-by: Feng Wu <[email protected]>
> >>> ---
> >>> arch/x86/kvm/lapic.c | 67
> >> ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>> arch/x86/kvm/lapic.h | 2 ++
> >>> arch/x86/kvm/vmx.c | 12 ++++++++--
> >>> 3 files changed, 79 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >>> index e29001f..d4f2c8f 100644
> >>> --- a/arch/x86/kvm/lapic.c
> >>> +++ b/arch/x86/kvm/lapic.c
> >>> @@ -854,6 +854,73 @@ out:
> >>> }
> >>>
> >>> /*
> >>> + * This routine handles lowest-priority interrupts using vector-hashing
> >>> + * mechanism. As an example, modern Intel CPUs use this method to
> handle
> >>> + * lowest-priority interrupts.
> >>> + *
> >>> + * Here is the details about the vector-hashing mechanism:
> >>> + * 1. For lowest-priority interrupts, store all the possible destination
> >>> + * vCPUs in an array.
> >>> + * 2. Use "guest vector % max number of destination vCPUs" to find the
> right
> >>> + * destination vCPU in the array for the lowest-priority interrupt.
> >>> + */
> >>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> >>> + struct kvm_lapic_irq *irq)
> >>> +{
> >>> + struct kvm_apic_map *map;
> >>> + struct kvm_vcpu *vcpu = NULL;
> >>> +
> >>> + if (irq->shorthand)
> >>> + return NULL;
> >>> +
> >>> + rcu_read_lock();
> >>> + map = rcu_dereference(kvm->arch.apic_map);
> >>> +
> >>> + if (!map)
> >>> + goto out;
> >>> +
> >>> + if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
> >>> + kvm_lowest_prio_delivery(irq)) {
> >>> + u16 cid;
> >>> + int i, idx = 0;
> >>> + unsigned long bitmap = 1;
> >>> + unsigned int dest_vcpus = 0;
> >>> + struct kvm_lapic **dst = NULL;
> >>> +
> >>> +
> >>> + if (!kvm_apic_logical_map_valid(map))
> >>> + goto out;
> >>> +
> >>> + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
> >>> +
> >>> + if (cid >= ARRAY_SIZE(map->logical_map))
> >>> + goto out;
> >>> +
> >>> + dst = map->logical_map[cid];
> >>> +
> >>> + for_each_set_bit(i, &bitmap, 16) {
> >>> + if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
> >>> + clear_bit(i, &bitmap);
> >>> + continue;
> >>> + }
> >>> + }
> >>> +
> >>> + dest_vcpus = hweight16(bitmap);
> >>> +
> >>> + if (dest_vcpus != 0) {
> >>> + idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> >>> + &bitmap, 16);
> >>> + vcpu = dst[idx-1]->vcpu;
> >>> + }
> >>> + }
> >>> +
> >>> +out:
> >>> + rcu_read_unlock();
> >>> + return vcpu;
> >>> +}
> >>> +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
> >>> +
> >>> +/*
> >>> * Add a pending IRQ into lapic.
> >>> * Return 1 if successfully added and 0 if discarded.
> >>> */
> >>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> >>> index 6890ef0..52bffce 100644
> >>> --- a/arch/x86/kvm/lapic.h
> >>> +++ b/arch/x86/kvm/lapic.h
> >>> @@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
> >> struct kvm_lapic_irq *irq,
> >>> struct kvm_vcpu **dest_vcpu);
> >>> int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
> >>> const unsigned long *bitmap, u32 bitmap_size);
> >>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> >>> + struct kvm_lapic_irq *irq);
> >>> #endif
> >>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >>> index 5eb56ed..3f89189 100644
> >>> --- a/arch/x86/kvm/vmx.c
> >>> +++ b/arch/x86/kvm/vmx.c
> >>> @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm
> *kvm,
> >> unsigned int host_irq,
> >>> */
> >>>
> >>> kvm_set_msi_irq(e, &irq);
> >>> - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
> >>> - continue;
> >>> +
> >>> + if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
> >>> + if (!kvm_vector_hashing_enabled() ||
> >>> + irq.delivery_mode !=
> >> APIC_DM_LOWEST)
> >>> + continue;
> >>> +
> >>> + vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
> >>> + if (!vcpu)
> >>> + continue;
> >>> + }
> >>
> >> I am a little confused with the 'continue'. If the destination is not
> >> single vcpu, shouldn't we rollback to use non-PI mode?
> >
> > Here is the logic:
> > - If it is single destination, we will use PI no matter it is fixed or lowest-priority.
> > - If it is not single destination:
> > a) It is fixed, we will use non-PI
> > b) It is lowest-priority and vector-hashing is enabled, we will use PI
> > c) otherwise, use non-PI
>
> If it is single destination previously, then change to no-single mode.
> Can current code cover this case?

In my test, before setting irq affinity (change single vcpu to non-single vcpu
in this case), the guest will mask the interrupt first, so before getting here, IRTE
has been changed back to remapped mode already(when guest masks the MSIx,
we will change back to remapped mode), hence nothing needed here.

Digging into the linux code (guest) a bit more, I found that if interrupt remapping
is not enabled in the guest (IR is not supported for guest anyway), it will always
mask the MSI/MSIx before setting the irq affinity. So the code should work
well currently.

However, for robustness, I think explicitly changing IRTE back to remapped
mode for the 'continue' case should be a good idea.

Radim, Paolo, what are your guys' options about this? Any comments are
appreciated! Thanks a lot!

Thanks,
Feng

2015-12-22 04:37:32

[permalink] [raw]

Subject: RE: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

> -----Original Message-----
> From: [email protected] [mailto:linux-kernel-
> [email protected]] On Behalf Of Yang Zhang
> Sent: Monday, December 21, 2015 10:06 AM
> To: Wu, Feng <[email protected]>; [email protected];
> [email protected]
> Cc: [email protected]; [email protected]
> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
>
> On 2015/12/21 9:50, Wu, Feng wrote:
> >
> >
> >> -----Original Message-----
> >> From: Yang Zhang [mailto:[email protected]]
> >> Sent: Monday, December 21, 2015 9:46 AM
> >> To: Wu, Feng <[email protected]>; [email protected];
> >> [email protected]
> >> Cc: [email protected]; [email protected]
> >> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> >> priority interrupts
> >>
> >> On 2015/12/16 9:37, Feng Wu wrote:
> >>> Use vector-hashing to deliver lowest-priority interrupts, As an
> >>> example, modern Intel CPUs in server platform use this method to
> >>> handle lowest-priority interrupts.
> >>>
> >>> Signed-off-by: Feng Wu <[email protected]>
> >>> ---
> >>> arch/x86/kvm/irq_comm.c | 27 ++++++++++++++++++-----
> >>> arch/x86/kvm/lapic.c | 57
> >> ++++++++++++++++++++++++++++++++++++++++---------
> >>> arch/x86/kvm/lapic.h | 2 ++
> >>> arch/x86/kvm/x86.c | 9 ++++++++
> >>> arch/x86/kvm/x86.h | 1 +
> >>> 5 files changed, 81 insertions(+), 15 deletions(-)
> >>>
> >>> bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
> *src,
> >>> struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
> >>> {
> >>> @@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
> >> *kvm, struct kvm_lapic *src,
> >>> dst = map->logical_map[cid];
> >>>
> >>> if (kvm_lowest_prio_delivery(irq)) {
> >>> - int l = -1;
> >>> - for_each_set_bit(i, &bitmap, 16) {
> >>> - if (!dst[i])
> >>> - continue;
> >>> - if (l < 0)
> >>> - l = i;
> >>> - else if (kvm_apic_compare_prio(dst[i]->vcpu,
> >> dst[l]->vcpu) < 0)
> >>> - l = i;
> >>> + if (!kvm_vector_hashing_enabled()) {
> >>> + int l = -1;
> >>> + for_each_set_bit(i, &bitmap, 16) {
> >>> + if (!dst[i])
> >>> + continue;
> >>> + if (l < 0)
> >>> + l = i;
> >>> + else if (kvm_apic_compare_prio(dst[i]-
> >>> vcpu, dst[l]->vcpu) < 0)
> >>> + l = i;
> >>> + }
> >>> + bitmap = (l >= 0) ? 1 << l : 0;
> >>> + } else {
> >>> + int idx = 0;
> >>> + unsigned int dest_vcpus = 0;
> >>> +
> >>> + for_each_set_bit(i, &bitmap, 16) {
> >>> + if (!dst[i]
> >> && !kvm_lapic_enabled(dst[i]->vcpu)) {
> >>
> >> It should be or(||) not and (&&).
> >
> > Oh, you are right! My negligence! Thanks for pointing this out, Yang!
>
> btw, i think the kvm_lapic_enabled check is wrong here? Why need it here?

If the lapic is not enabled, I think we cannot recognize it as a candidate, can we?
Maybe Radim can confirm this, Radim, what is your option?

Thanks,
Feng

2015-12-22 06:42:15

[permalink] [raw]

Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

On 2015/12/22 12:36, Wu, Feng wrote:
>
>
>> -----Original Message-----
>> From: Yang Zhang [mailto:[email protected]]
>> Sent: Monday, December 21, 2015 10:01 AM
>> To: Wu, Feng <[email protected]>; [email protected];
>> [email protected]
>> Cc: [email protected]; [email protected]
>> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
>> posted-interrupts
>>
>> On 2015/12/21 9:55, Wu, Feng wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: [email protected] [mailto:linux-kernel-
>>>> [email protected]] On Behalf Of Yang Zhang
>>>> Sent: Monday, December 21, 2015 9:50 AM
>>>> To: Wu, Feng <[email protected]>; [email protected];
>>>> [email protected]
>>>> Cc: [email protected]; [email protected]
>>>> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
>>>> posted-interrupts
>>>>
>>>> On 2015/12/16 9:37, Feng Wu wrote:
>>>>> Use vector-hashing to deliver lowest-priority interrupts for
>>>>> VT-d posted-interrupts.
>>>>>
>>>>> Signed-off-by: Feng Wu <[email protected]>
>>>>> ---
>>>>> arch/x86/kvm/lapic.c | 67
>>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>> arch/x86/kvm/lapic.h | 2 ++
>>>>> arch/x86/kvm/vmx.c | 12 ++++++++--
>>>>> 3 files changed, 79 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>>> index e29001f..d4f2c8f 100644
>>>>> --- a/arch/x86/kvm/lapic.c
>>>>> +++ b/arch/x86/kvm/lapic.c
>>>>> @@ -854,6 +854,73 @@ out:
>>>>> }
>>>>>
>>>>> /*
>>>>> + * This routine handles lowest-priority interrupts using vector-hashing
>>>>> + * mechanism. As an example, modern Intel CPUs use this method to
>> handle
>>>>> + * lowest-priority interrupts.
>>>>> + *
>>>>> + * Here is the details about the vector-hashing mechanism:
>>>>> + * 1. For lowest-priority interrupts, store all the possible destination
>>>>> + * vCPUs in an array.
>>>>> + * 2. Use "guest vector % max number of destination vCPUs" to find the
>> right
>>>>> + * destination vCPU in the array for the lowest-priority interrupt.
>>>>> + */
>>>>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
>>>>> + struct kvm_lapic_irq *irq)
>>>>> +{
>>>>> + struct kvm_apic_map *map;
>>>>> + struct kvm_vcpu *vcpu = NULL;
>>>>> +
>>>>> + if (irq->shorthand)
>>>>> + return NULL;
>>>>> +
>>>>> + rcu_read_lock();
>>>>> + map = rcu_dereference(kvm->arch.apic_map);
>>>>> +
>>>>> + if (!map)
>>>>> + goto out;
>>>>> +
>>>>> + if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
>>>>> + kvm_lowest_prio_delivery(irq)) {
>>>>> + u16 cid;
>>>>> + int i, idx = 0;
>>>>> + unsigned long bitmap = 1;
>>>>> + unsigned int dest_vcpus = 0;
>>>>> + struct kvm_lapic **dst = NULL;
>>>>> +
>>>>> +
>>>>> + if (!kvm_apic_logical_map_valid(map))
>>>>> + goto out;
>>>>> +
>>>>> + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
>>>>> +
>>>>> + if (cid >= ARRAY_SIZE(map->logical_map))
>>>>> + goto out;
>>>>> +
>>>>> + dst = map->logical_map[cid];
>>>>> +
>>>>> + for_each_set_bit(i, &bitmap, 16) {
>>>>> + if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
>>>>> + clear_bit(i, &bitmap);
>>>>> + continue;
>>>>> + }
>>>>> + }
>>>>> +
>>>>> + dest_vcpus = hweight16(bitmap);
>>>>> +
>>>>> + if (dest_vcpus != 0) {
>>>>> + idx = kvm_vector_2_index(irq->vector, dest_vcpus,
>>>>> + &bitmap, 16);
>>>>> + vcpu = dst[idx-1]->vcpu;
>>>>> + }
>>>>> + }
>>>>> +
>>>>> +out:
>>>>> + rcu_read_unlock();
>>>>> + return vcpu;
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
>>>>> +
>>>>> +/*
>>>>> * Add a pending IRQ into lapic.
>>>>> * Return 1 if successfully added and 0 if discarded.
>>>>> */
>>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>>>> index 6890ef0..52bffce 100644
>>>>> --- a/arch/x86/kvm/lapic.h
>>>>> +++ b/arch/x86/kvm/lapic.h
>>>>> @@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
>>>> struct kvm_lapic_irq *irq,
>>>>> struct kvm_vcpu **dest_vcpu);
>>>>> int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
>>>>> const unsigned long *bitmap, u32 bitmap_size);
>>>>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
>>>>> + struct kvm_lapic_irq *irq);
>>>>> #endif
>>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>>> index 5eb56ed..3f89189 100644
>>>>> --- a/arch/x86/kvm/vmx.c
>>>>> +++ b/arch/x86/kvm/vmx.c
>>>>> @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm
>> *kvm,
>>>> unsigned int host_irq,
>>>>> */
>>>>>
>>>>> kvm_set_msi_irq(e, &irq);
>>>>> - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
>>>>> - continue;
>>>>> +
>>>>> + if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
>>>>> + if (!kvm_vector_hashing_enabled() ||
>>>>> + irq.delivery_mode !=
>>>> APIC_DM_LOWEST)
>>>>> + continue;
>>>>> +
>>>>> + vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
>>>>> + if (!vcpu)
>>>>> + continue;
>>>>> + }
>>>>
>>>> I am a little confused with the 'continue'. If the destination is not
>>>> single vcpu, shouldn't we rollback to use non-PI mode?
>>>
>>> Here is the logic:
>>> - If it is single destination, we will use PI no matter it is fixed or lowest-priority.
>>> - If it is not single destination:
>>> a) It is fixed, we will use non-PI
>>> b) It is lowest-priority and vector-hashing is enabled, we will use PI
>>> c) otherwise, use non-PI
>>
>> If it is single destination previously, then change to no-single mode.
>> Can current code cover this case?
>
> In my test, before setting irq affinity (change single vcpu to non-single vcpu
> in this case), the guest will mask the interrupt first, so before getting here, IRTE
> has been changed back to remapped mode already(when guest masks the MSIx,
> we will change back to remapped mode), hence nothing needed here.
>
> Digging into the linux code (guest) a bit more, I found that if interrupt remapping
> is not enabled in the guest (IR is not supported for guest anyway), it will always
> mask the MSI/MSIx before setting the irq affinity. So the code should work
> well currently.

We should not rely on guest's behavior. From code level, it need be fixed.

>
> However, for robustness, I think explicitly changing IRTE back to remapped
> mode for the 'continue' case should be a good idea.

This is what i am looking for.

>
> Radim, Paolo, what are your guys' options about this? Any comments are
> appreciated! Thanks a lot!
>
> Thanks,
> Feng
>

--
best regards
yang

2015-12-22 06:49:20

[permalink] [raw]

Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

On 2015/12/22 12:37, Wu, Feng wrote:
>
>
>> -----Original Message-----
>> From: [email protected] [mailto:linux-kernel-
>> [email protected]] On Behalf Of Yang Zhang
>> Sent: Monday, December 21, 2015 10:06 AM
>> To: Wu, Feng <[email protected]>; [email protected];
>> [email protected]
>> Cc: [email protected]; [email protected]
>> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
>> priority interrupts
>>
>> On 2015/12/21 9:50, Wu, Feng wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Yang Zhang [mailto:[email protected]]
>>>> Sent: Monday, December 21, 2015 9:46 AM
>>>> To: Wu, Feng <[email protected]>; [email protected];
>>>> [email protected]
>>>> Cc: [email protected]; [email protected]
>>>> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
>>>> priority interrupts
>>>>
>>>> On 2015/12/16 9:37, Feng Wu wrote:
>>>>> Use vector-hashing to deliver lowest-priority interrupts, As an
>>>>> example, modern Intel CPUs in server platform use this method to
>>>>> handle lowest-priority interrupts.
>>>>>
>>>>> Signed-off-by: Feng Wu <[email protected]>
>>>>> ---
>>>>> arch/x86/kvm/irq_comm.c | 27 ++++++++++++++++++-----
>>>>> arch/x86/kvm/lapic.c | 57
>>>> ++++++++++++++++++++++++++++++++++++++++---------
>>>>> arch/x86/kvm/lapic.h | 2 ++
>>>>> arch/x86/kvm/x86.c | 9 ++++++++
>>>>> arch/x86/kvm/x86.h | 1 +
>>>>> 5 files changed, 81 insertions(+), 15 deletions(-)
>>>>>
>>>>> bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
>> *src,
>>>>> struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
>>>>> {
>>>>> @@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
>>>> *kvm, struct kvm_lapic *src,
>>>>> dst = map->logical_map[cid];
>>>>>
>>>>> if (kvm_lowest_prio_delivery(irq)) {
>>>>> - int l = -1;
>>>>> - for_each_set_bit(i, &bitmap, 16) {
>>>>> - if (!dst[i])
>>>>> - continue;
>>>>> - if (l < 0)
>>>>> - l = i;
>>>>> - else if (kvm_apic_compare_prio(dst[i]->vcpu,
>>>> dst[l]->vcpu) < 0)
>>>>> - l = i;
>>>>> + if (!kvm_vector_hashing_enabled()) {
>>>>> + int l = -1;
>>>>> + for_each_set_bit(i, &bitmap, 16) {
>>>>> + if (!dst[i])
>>>>> + continue;
>>>>> + if (l < 0)
>>>>> + l = i;
>>>>> + else if (kvm_apic_compare_prio(dst[i]-
>>>>> vcpu, dst[l]->vcpu) < 0)
>>>>> + l = i;
>>>>> + }
>>>>> + bitmap = (l >= 0) ? 1 << l : 0;
>>>>> + } else {
>>>>> + int idx = 0;
>>>>> + unsigned int dest_vcpus = 0;
>>>>> +
>>>>> + for_each_set_bit(i, &bitmap, 16) {
>>>>> + if (!dst[i]
>>>> && !kvm_lapic_enabled(dst[i]->vcpu)) {
>>>>
>>>> It should be or(||) not and (&&).
>>>
>>> Oh, you are right! My negligence! Thanks for pointing this out, Yang!
>>
>> btw, i think the kvm_lapic_enabled check is wrong here? Why need it here?
>
> If the lapic is not enabled, I think we cannot recognize it as a candidate, can we?
> Maybe Radim can confirm this, Radim, what is your option?

Lapic can be disable by hw or sw. Here we only need to check the hw is
enough which is already covered while injecting the interrupt into
guest. I remember we(Glab, Macelo and me) have discussed it several ago,
but i cannot find the mail thread.

>
> Thanks,
> Feng
>

--
best regards
yang

2015-12-22 06:59:42

[permalink] [raw]

Subject: RE: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

> -----Original Message-----
> From: Yang Zhang [mailto:[email protected]]
> Sent: Tuesday, December 22, 2015 2:49 PM
> To: Wu, Feng <[email protected]>; [email protected];
> [email protected]
> Cc: [email protected]; [email protected]; Jiang Liu
> ([email protected]) <[email protected]>
> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
>
> >>>>
> >>>> On 2015/12/16 9:37, Feng Wu wrote:
> >>>>> Use vector-hashing to deliver lowest-priority interrupts, As an
> >>>>> example, modern Intel CPUs in server platform use this method to
> >>>>> handle lowest-priority interrupts.
> >>>>>
> >>>>> Signed-off-by: Feng Wu <[email protected]>
> >>>>> ---
> >>>>> arch/x86/kvm/irq_comm.c | 27 ++++++++++++++++++-----
> >>>>> arch/x86/kvm/lapic.c | 57
> >>>> ++++++++++++++++++++++++++++++++++++++++---------
> >>>>> arch/x86/kvm/lapic.h | 2 ++
> >>>>> arch/x86/kvm/x86.c | 9 ++++++++
> >>>>> arch/x86/kvm/x86.h | 1 +
> >>>>> 5 files changed, 81 insertions(+), 15 deletions(-)
> >>>>>
> >>>>> bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
> >> *src,
> >>>>> struct kvm_lapic_irq *irq, int *r, unsigned long
> *dest_map)
> >>>>> {
> >>>>> @@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
> >>>> *kvm, struct kvm_lapic *src,
> >>>>> dst = map->logical_map[cid];
> >>>>>
> >>>>> if (kvm_lowest_prio_delivery(irq)) {
> >>>>> - int l = -1;
> >>>>> - for_each_set_bit(i, &bitmap, 16) {
> >>>>> - if (!dst[i])
> >>>>> - continue;
> >>>>> - if (l < 0)
> >>>>> - l = i;
> >>>>> - else if (kvm_apic_compare_prio(dst[i]->vcpu,
> >>>> dst[l]->vcpu) < 0)
> >>>>> - l = i;
> >>>>> + if (!kvm_vector_hashing_enabled()) {
> >>>>> + int l = -1;
> >>>>> + for_each_set_bit(i, &bitmap, 16) {
> >>>>> + if (!dst[i])
> >>>>> + continue;
> >>>>> + if (l < 0)
> >>>>> + l = i;
> >>>>> + else if (kvm_apic_compare_prio(dst[i]-
> >>>>> vcpu, dst[l]->vcpu) < 0)
> >>>>> + l = i;
> >>>>> + }
> >>>>> + bitmap = (l >= 0) ? 1 << l : 0;
> >>>>> + } else {
> >>>>> + int idx = 0;
> >>>>> + unsigned int dest_vcpus = 0;
> >>>>> +
> >>>>> + for_each_set_bit(i, &bitmap, 16) {
> >>>>> + if (!dst[i]
> >>>> && !kvm_lapic_enabled(dst[i]->vcpu)) {
> >>>>
> >>>> It should be or(||) not and (&&).
> >>>
> >>> Oh, you are right! My negligence! Thanks for pointing this out, Yang!
> >>
> >> btw, i think the kvm_lapic_enabled check is wrong here? Why need it here?
> >
> > If the lapic is not enabled, I think we cannot recognize it as a candidate, can
> we?
> > Maybe Radim can confirm this, Radim, what is your option?
>
> Lapic can be disable by hw or sw. Here we only need to check the hw is
> enough which is already covered while injecting the interrupt into
> guest. I remember we(Glab, Macelo and me) have discussed it several ago,
> but i cannot find the mail thread.

But if the lapic is disabled by software, we cannot still inject interrupts to
it, can we?

Thanks,
Feng

2015-12-22 07:13:38

[permalink] [raw]

Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

On 2015/12/22 14:59, Wu, Feng wrote:
>
>
>> -----Original Message-----
>> From: Yang Zhang [mailto:[email protected]]
>> Sent: Tuesday, December 22, 2015 2:49 PM
>> To: Wu, Feng <[email protected]>; [email protected];
>> [email protected]
>> Cc: [email protected]; [email protected]; Jiang Liu
>> ([email protected]) <[email protected]>
>> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
>> priority interrupts
>>
>>>>>>
>>>>>> On 2015/12/16 9:37, Feng Wu wrote:
>>>>>>> Use vector-hashing to deliver lowest-priority interrupts, As an
>>>>>>> example, modern Intel CPUs in server platform use this method to
>>>>>>> handle lowest-priority interrupts.
>>>>>>>
>>>>>>> Signed-off-by: Feng Wu <[email protected]>
>>>>>>> ---
>>>>>>> arch/x86/kvm/irq_comm.c | 27 ++++++++++++++++++-----
>>>>>>> arch/x86/kvm/lapic.c | 57
>>>>>> ++++++++++++++++++++++++++++++++++++++++---------
>>>>>>> arch/x86/kvm/lapic.h | 2 ++
>>>>>>> arch/x86/kvm/x86.c | 9 ++++++++
>>>>>>> arch/x86/kvm/x86.h | 1 +
>>>>>>> 5 files changed, 81 insertions(+), 15 deletions(-)
>>>>>>>
>>>>>>> bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
>>>> *src,
>>>>>>> struct kvm_lapic_irq *irq, int *r, unsigned long
>> *dest_map)
>>>>>>> {
>>>>>>> @@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
>>>>>> *kvm, struct kvm_lapic *src,
>>>>>>> dst = map->logical_map[cid];
>>>>>>>
>>>>>>> if (kvm_lowest_prio_delivery(irq)) {
>>>>>>> - int l = -1;
>>>>>>> - for_each_set_bit(i, &bitmap, 16) {
>>>>>>> - if (!dst[i])
>>>>>>> - continue;
>>>>>>> - if (l < 0)
>>>>>>> - l = i;
>>>>>>> - else if (kvm_apic_compare_prio(dst[i]->vcpu,
>>>>>> dst[l]->vcpu) < 0)
>>>>>>> - l = i;
>>>>>>> + if (!kvm_vector_hashing_enabled()) {
>>>>>>> + int l = -1;
>>>>>>> + for_each_set_bit(i, &bitmap, 16) {
>>>>>>> + if (!dst[i])
>>>>>>> + continue;
>>>>>>> + if (l < 0)
>>>>>>> + l = i;
>>>>>>> + else if (kvm_apic_compare_prio(dst[i]-
>>>>>>> vcpu, dst[l]->vcpu) < 0)
>>>>>>> + l = i;
>>>>>>> + }
>>>>>>> + bitmap = (l >= 0) ? 1 << l : 0;
>>>>>>> + } else {
>>>>>>> + int idx = 0;
>>>>>>> + unsigned int dest_vcpus = 0;
>>>>>>> +
>>>>>>> + for_each_set_bit(i, &bitmap, 16) {
>>>>>>> + if (!dst[i]
>>>>>> && !kvm_lapic_enabled(dst[i]->vcpu)) {
>>>>>>
>>>>>> It should be or(||) not and (&&).
>>>>>
>>>>> Oh, you are right! My negligence! Thanks for pointing this out, Yang!
>>>>
>>>> btw, i think the kvm_lapic_enabled check is wrong here? Why need it here?
>>>
>>> If the lapic is not enabled, I think we cannot recognize it as a candidate, can
>> we?
>>> Maybe Radim can confirm this, Radim, what is your option?
>>
>> Lapic can be disable by hw or sw. Here we only need to check the hw is
>> enough which is already covered while injecting the interrupt into
>> guest. I remember we(Glab, Macelo and me) have discussed it several ago,
>> but i cannot find the mail thread.
>
> But if the lapic is disabled by software, we cannot still inject interrupts to
> it, can we?

Yes, We cannot inject the normal interrupt. But this already covered by
current logic and add a check here seems meaningless. Conversely, it may
do bad thing..

--
best regards
yang

2015-12-22 07:19:52

[permalink] [raw]

Subject: RE: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

Hi Radim/Paolo,

> -----Original Message-----
> From: Yang Zhang [mailto:[email protected]]
> Sent: Tuesday, December 22, 2015 3:14 PM
> To: Wu, Feng <[email protected]>; [email protected];
> [email protected]
> Cc: [email protected]; [email protected]; Jiang Liu
> ([email protected]) <[email protected]>
> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
>
> On 2015/12/22 14:59, Wu, Feng wrote:
> >
> >
> >> -----Original Message-----
> >> From: Yang Zhang [mailto:[email protected]]
> >> Sent: Tuesday, December 22, 2015 2:49 PM
> >> To: Wu, Feng <[email protected]>; [email protected];
> >> [email protected]
> >> Cc: [email protected]; [email protected]; Jiang Liu
> >> ([email protected]) <[email protected]>
> >> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> >> priority interrupts
> >>
> >>>>>>
> >>>>>> On 2015/12/16 9:37, Feng Wu wrote:
> >>>>>>> Use vector-hashing to deliver lowest-priority interrupts, As an
> >>>>>>> example, modern Intel CPUs in server platform use this method to
> >>>>>>> handle lowest-priority interrupts.
> >>>>>>>
> >>>>>>> Signed-off-by: Feng Wu <[email protected]>
> >>>>>>> ---
> >>>>>>> arch/x86/kvm/irq_comm.c | 27 ++++++++++++++++++-----
> >>>>>>> arch/x86/kvm/lapic.c | 57
> >>>>>> ++++++++++++++++++++++++++++++++++++++++---------
> >>>>>>> arch/x86/kvm/lapic.h | 2 ++
> >>>>>>> arch/x86/kvm/x86.c | 9 ++++++++
> >>>>>>> arch/x86/kvm/x86.h | 1 +
> >>>>>>> 5 files changed, 81 insertions(+), 15 deletions(-)
> >>>>>>>
> >>>>>>> bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct
> kvm_lapic
> >>>> *src,
> >>>>>>> struct kvm_lapic_irq *irq, int *r, unsigned long
> >> *dest_map)
> >>>>>>> {
> >>>>>>> @@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct
> kvm
> >>>>>> *kvm, struct kvm_lapic *src,
> >>>>>>> dst = map->logical_map[cid];
> >>>>>>>
> >>>>>>> if (kvm_lowest_prio_delivery(irq)) {
> >>>>>>> - int l = -1;
> >>>>>>> - for_each_set_bit(i, &bitmap, 16) {
> >>>>>>> - if (!dst[i])
> >>>>>>> - continue;
> >>>>>>> - if (l < 0)
> >>>>>>> - l = i;
> >>>>>>> - else if (kvm_apic_compare_prio(dst[i]-
> >vcpu,
> >>>>>> dst[l]->vcpu) < 0)
> >>>>>>> - l = i;
> >>>>>>> + if (!kvm_vector_hashing_enabled()) {
> >>>>>>> + int l = -1;
> >>>>>>> + for_each_set_bit(i, &bitmap, 16) {
> >>>>>>> + if (!dst[i])
> >>>>>>> + continue;
> >>>>>>> + if (l < 0)
> >>>>>>> + l = i;
> >>>>>>> + else if
> (kvm_apic_compare_prio(dst[i]-
> >>>>>>> vcpu, dst[l]->vcpu) < 0)
> >>>>>>> + l = i;
> >>>>>>> + }
> >>>>>>> + bitmap = (l >= 0) ? 1 << l : 0;
> >>>>>>> + } else {
> >>>>>>> + int idx = 0;
> >>>>>>> + unsigned int dest_vcpus = 0;
> >>>>>>> +
> >>>>>>> + for_each_set_bit(i, &bitmap, 16) {
> >>>>>>> + if (!dst[i]
> >>>>>> && !kvm_lapic_enabled(dst[i]->vcpu)) {
> >>>>>>
> >>>>>> It should be or(||) not and (&&).
> >>>>>
> >>>>> Oh, you are right! My negligence! Thanks for pointing this out, Yang!
> >>>>
> >>>> btw, i think the kvm_lapic_enabled check is wrong here? Why need it here?
> >>>
> >>> If the lapic is not enabled, I think we cannot recognize it as a candidate, can
> >> we?
> >>> Maybe Radim can confirm this, Radim, what is your option?
> >>
> >> Lapic can be disable by hw or sw. Here we only need to check the hw is
> >> enough which is already covered while injecting the interrupt into
> >> guest. I remember we(Glab, Macelo and me) have discussed it several ago,
> >> but i cannot find the mail thread.
> >
> > But if the lapic is disabled by software, we cannot still inject interrupts to
> > it, can we?
>
> Yes, We cannot inject the normal interrupt. But this already covered by
> current logic and add a check here seems meaningless. Conversely, it may
> do bad thing..
>

Let's wait for Radim/Paolo's opinions about this.

Thanks,
Feng

2015-12-22 19:53:05

[permalink] [raw]

Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

2015-12-22 07:19+0000, Wu, Feng:
>> From: Yang Zhang [mailto:[email protected]]
>> On 2015/12/22 14:59, Wu, Feng wrote:
>> >> From: Yang Zhang [mailto:[email protected]]
>> >>>>>> On 2015/12/16 9:37, Feng Wu wrote:
>> >>>>>>> + for_each_set_bit(i, &bitmap, 16) {
>> >>>>>>> + if (!dst[i]
>> >>>>>> && !kvm_lapic_enabled(dst[i]->vcpu)) {
>> >>>>>>
>> >>>>>> It should be or(||) not and (&&).
>> >>>>>
>> >>>>> Oh, you are right! My negligence! Thanks for pointing this out, Yang!
>> >>>>
>> >>>> btw, i think the kvm_lapic_enabled check is wrong here? Why need it here?
>> >>>
>> >>> If the lapic is not enabled, I think we cannot recognize it as a candidate, can
>> >> we?
>> >>> Maybe Radim can confirm this, Radim, what is your option?

SDM 10.6.2.2 Logical Destination Mode:
For both configurations of logical destination mode, when combined
with lowest priority delivery mode, software is responsible for
ensuring that all of the local APICs included in or addressed by the
IPI or I/O subsystem interrupt are present and enabled to receive the
interrupt.

The case is undefined if some targeted LAPICs weren't hardware enabled
as no interrupts can be delivered to hardware disabled LAPIC, so we can
check for hardware enabled.

It's not obvious if "enabled to receive the interrupt" means hardware or
software enabled, but lowest priority cannot deliver NMI/INIT/..., so
checking for software enabled doesn't restrict any valid uses either.

so ... KVM only musn't blow up when encountering this situation :)

The current code seems correct, but redundant. Just for reference, KVM
now does:
- check for software enabled LAPIC since patch aefd18f01ee8 ("KVM: x86:
In DM_LOWEST, only deliver interrupts to vcpus with enabled LAPIC's")
- check only for hardware enabled LAPIC in the fast path, since
1e08ec4a130e ("KVM: optimize apic interrupt delivery"))

(v1 was arguable better, I pointed the need for enabled LAPIC in v1 only
from looking at one KVM function, sorry.)

>> >> Lapic can be disable by hw or sw. Here we only need to check the hw is
>> >> enough which is already covered while injecting the interrupt into
>> >> guest. I remember we(Glab, Macelo and me) have discussed it several ago,
>> >> but i cannot find the mail thread.
>>
>> >
>> > But if the lapic is disabled by software, we cannot still inject interrupts to
>> > it, can we?
>>
>> Yes, We cannot inject the normal interrupt. But this already covered by
>> current logic and add a check here seems meaningless. Conversely, it may
>> do bad thing..
>>
>
> Let's wait for Radim/Paolo's opinions about this.

I'd pick whatever results in less code: this time it seems like checking
for hardware enabled LAPIC in both paths (implicitly in the fast path).
Maybe it can be done better, I haven't given it much thought.

We should revert aefd18f01ee8 at the same time, so our PI/non-PI slow
paths won't diverge -- I hope it wasn't fixing a bug :)

I'll review the series tomorrow, thanks for your patience.

2015-12-23 02:13:06

[permalink] [raw]

Subject: RE: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

> -----Original Message-----
> From: [email protected] [mailto:[email protected]]
> Sent: Wednesday, December 23, 2015 3:53 AM
> To: Wu, Feng <[email protected]>
> Cc: Yang Zhang <[email protected]>; [email protected];
> [email protected]; [email protected]; Jiang Liu
> ([email protected]) <[email protected]>
> Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
>
> 2015-12-22 07:19+0000, Wu, Feng:
> >> From: Yang Zhang [mailto:[email protected]]
> >> On 2015/12/22 14:59, Wu, Feng wrote:
> >> >> From: Yang Zhang [mailto:[email protected]]
> >> >>>>>> On 2015/12/16 9:37, Feng Wu wrote:
> >> >>>>>>> + for_each_set_bit(i, &bitmap, 16) {
> >> >>>>>>> + if (!dst[i]
> >> >>>>>> && !kvm_lapic_enabled(dst[i]->vcpu)) {
> >> >>>>>>
> >> >>>>>> It should be or(||) not and (&&).
> >> >>>>>
> >> >>>>> Oh, you are right! My negligence! Thanks for pointing this out, Yang!
> >> >>>>
> >> >>>> btw, i think the kvm_lapic_enabled check is wrong here? Why need it
> here?
> >> >>>
> >> >>> If the lapic is not enabled, I think we cannot recognize it as a candidate,
> can
> >> >> we?
> >> >>> Maybe Radim can confirm this, Radim, what is your option?
>
> SDM 10.6.2.2 Logical Destination Mode:
> For both configurations of logical destination mode, when combined
> with lowest priority delivery mode, software is responsible for
> ensuring that all of the local APICs included in or addressed by the
> IPI or I/O subsystem interrupt are present and enabled to receive the
> interrupt.
>

Radim, thanks a lot for your feedback!

> The case is undefined if some targeted LAPICs weren't hardware enabled
> as no interrupts can be delivered to hardware disabled LAPIC, so we can
> check for hardware enabled.
>
> It's not obvious if "enabled to receive the interrupt" means hardware or
> software enabled, but lowest priority cannot deliver NMI/INIT/..., so
> checking for software enabled doesn't restrict any valid uses either.
>
> so ... KVM only musn't blow up when encountering this situation :)
>
> The current code seems correct, but redundant. Just for reference, KVM
> now does:
> - check for software enabled LAPIC since patch aefd18f01ee8 ("KVM: x86:
> In DM_LOWEST, only deliver interrupts to vcpus with enabled LAPIC's")
> - check only for hardware enabled LAPIC in the fast path, since
> 1e08ec4a130e ("KVM: optimize apic interrupt delivery"))

Software enabled LAPIC is also checked in patch 1e08ec4a130e
("KVM: optimize apic interrupt delivery"), however, it was removed
in patch 3b5a5ffa928a3f875b0d5dd284eeb7c322e1688a. Now I am
a little confused about the policy, when and where should we do
the software/hardware enabled check?

>
> (v1 was arguable better, I pointed the need for enabled LAPIC in v1 only
> from looking at one KVM function, sorry.)
>
> >> >> Lapic can be disable by hw or sw. Here we only need to check the hw is
> >> >> enough which is already covered while injecting the interrupt into
> >> >> guest. I remember we(Glab, Macelo and me) have discussed it several ago,
> >> >> but i cannot find the mail thread.
> >>
> >> >
> >> > But if the lapic is disabled by software, we cannot still inject interrupts to
> >> > it, can we?
> >>
> >> Yes, We cannot inject the normal interrupt. But this already covered by
> >> current logic and add a check here seems meaningless. Conversely, it may
> >> do bad thing..
> >>
> >
> > Let's wait for Radim/Paolo's opinions about this.
>
> I'd pick whatever results in less code: this time it seems like checking
> for hardware enabled LAPIC in both paths (implicitly in the fast path).
> Maybe it can be done better, I haven't given it much thought.
>
> We should revert aefd18f01ee8 at the same time, so our PI/non-PI slow
> paths won't diverge -- I hope it wasn't fixing a bug :)

>From the change log, It seems to me this patch was fixing a bug.

Thanks,
Feng

2015-12-23 03:17:56

[permalink] [raw]

Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

On 2015/12/23 3:52, [email protected] wrote:
> 2015-12-22 07:19+0000, Wu, Feng:
>>> From: Yang Zhang [mailto:[email protected]]
>>> On 2015/12/22 14:59, Wu, Feng wrote:
>>>>> From: Yang Zhang [mailto:[email protected]]
>>>>>>>>> On 2015/12/16 9:37, Feng Wu wrote:
>>>>>>>>>> + for_each_set_bit(i, &bitmap, 16) {
>>>>>>>>>> + if (!dst[i]
>>>>>>>>> && !kvm_lapic_enabled(dst[i]->vcpu)) {
>>>>>>>>>
>>>>>>>>> It should be or(||) not and (&&).
>>>>>>>>
>>>>>>>> Oh, you are right! My negligence! Thanks for pointing this out, Yang!
>>>>>>>
>>>>>>> btw, i think the kvm_lapic_enabled check is wrong here? Why need it here?
>>>>>>
>>>>>> If the lapic is not enabled, I think we cannot recognize it as a candidate, can
>>>>> we?
>>>>>> Maybe Radim can confirm this, Radim, what is your option?
>
> SDM 10.6.2.2 Logical Destination Mode:
> For both configurations of logical destination mode, when combined
> with lowest priority delivery mode, software is responsible for
> ensuring that all of the local APICs included in or addressed by the
> IPI or I/O subsystem interrupt are present and enabled to receive the
> interrupt.
>
> The case is undefined if some targeted LAPICs weren't hardware enabled
> as no interrupts can be delivered to hardware disabled LAPIC, so we can
> check for hardware enabled.
>
> It's not obvious if "enabled to receive the interrupt" means hardware or
> software enabled, but lowest priority cannot deliver NMI/INIT/..., so
> checking for software enabled doesn't restrict any valid uses either.

Agree. My understanding is that it is software's responsibility to
ensuring this case not happen. But for hypervisor, we should not check
it for software. What we can do is just follow the SDM.

>
> so ... KVM only musn't blow up when encountering this situation :)
>
> The current code seems correct, but redundant. Just for reference, KVM
> now does:
> - check for software enabled LAPIC since patch aefd18f01ee8 ("KVM: x86:
> In DM_LOWEST, only deliver interrupts to vcpus with enabled LAPIC's")
> - check only for hardware enabled LAPIC in the fast path, since
> 1e08ec4a130e ("KVM: optimize apic interrupt delivery"))
>
> (v1 was arguable better, I pointed the need for enabled LAPIC in v1 only
> from looking at one KVM function, sorry.)
>
>>>>> Lapic can be disable by hw or sw. Here we only need to check the hw is
>>>>> enough which is already covered while injecting the interrupt into
>>>>> guest. I remember we(Glab, Macelo and me) have discussed it several ago,
>>>>> but i cannot find the mail thread.
>>>
>>>>
>>>> But if the lapic is disabled by software, we cannot still inject interrupts to
>>>> it, can we?
>>>
>>> Yes, We cannot inject the normal interrupt. But this already covered by
>>> current logic and add a check here seems meaningless. Conversely, it may
>>> do bad thing..
>>>
>>
>> Let's wait for Radim/Paolo's opinions about this.
>
> I'd pick whatever results in less code: this time it seems like checking
> for hardware enabled LAPIC in both paths (implicitly in the fast path).
> Maybe it can be done better, I haven't given it much thought.
>
> We should revert aefd18f01ee8 at the same time, so our PI/non-PI slow
> paths won't diverge -- I hope it wasn't fixing a bug :)
>
> I'll review the series tomorrow, thanks for your patience.

--
best regards
yang

2015-12-23 16:42:24

[permalink] [raw]

Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

2015-12-23 02:12+0000, Wu, Feng:
>> From: [email protected] [mailto:[email protected]]
>> 2015-12-22 07:19+0000, Wu, Feng:
>> >> From: Yang Zhang [mailto:[email protected]]
>> >> On 2015/12/22 14:59, Wu, Feng wrote:
>> >> >> From: Yang Zhang [mailto:[email protected]]
>> >> >>>>>> On 2015/12/16 9:37, Feng Wu wrote:
>> The case is undefined if some targeted LAPICs weren't hardware enabled
>> as no interrupts can be delivered to hardware disabled LAPIC, so we can
>> check for hardware enabled.
>>
>> It's not obvious if "enabled to receive the interrupt" means hardware or
>> software enabled, but lowest priority cannot deliver NMI/INIT/..., so
>> checking for software enabled doesn't restrict any valid uses either.
>>
>> so ... KVM only musn't blow up when encountering this situation :)
>>
>> The current code seems correct, but redundant. Just for reference, KVM
>> now does:
>> - check for software enabled LAPIC since patch aefd18f01ee8 ("KVM: x86:
>> In DM_LOWEST, only deliver interrupts to vcpus with enabled LAPIC's")
>> - check only for hardware enabled LAPIC in the fast path, since
>> 1e08ec4a130e ("KVM: optimize apic interrupt delivery"))
>
> Software enabled LAPIC is also checked in patch 1e08ec4a130e
> ("KVM: optimize apic interrupt delivery"), however, it was removed
> in patch 3b5a5ffa928a3f875b0d5dd284eeb7c322e1688a.

Right, thanks. (The software check was actually removed in 173beedc1601
("KVM: x86: Software disabled APIC should still deliver NMIs"), which
introduced a two pass mechanism that was later simplified.)

> Now I am
> a little confused about the policy, when and where should we do
> the software/hardware enabled check?

It's a mess, I think we'd like both checks to be done early and ideally
only in one place.

The fast path would like to precompute as much as possible, but only
hardware enabled affects all interrupts (like non-present LAPIC);
software disabled still needs an extra condition for every interrupt.

>> I'd pick whatever results in less code: this time it seems like checking
>> for hardware enabled LAPIC in both paths (implicitly in the fast path).
>> Maybe it can be done better, I haven't given it much thought.
>>
>> We should revert aefd18f01ee8 at the same time, so our PI/non-PI slow
>> paths won't diverge -- I hope it wasn't fixing a bug :)
>
> From the change log, It seems to me this patch was fixing a bug.

Yeah, I found the original discussion
RFC: http://www.spinics.net/lists/kvm/msg36190.html
v1: http://www.spinics.net/lists/kvm/msg36395.html
v2: http://www.spinics.net/lists/kvm/msg36651.html

that led to some explanation in bugzilla:
https://bugzilla.redhat.com/show_bug.cgi?id=596223 (a clone of
https://bugzilla.redhat.com/show_bug.cgi?id=505527)

It seems that kexec on VCPU != 0 did something with BSP APIC ID that
resulted in a wrong delivery -- I didn't look where the bug was, but the
solution we adopted is probably just a lucky workaround.
Makes sense to look deeper into it.

2015-12-23 16:50:19

[permalink] [raw]

Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-12-22 14:42+0800, Yang Zhang:
> On 2015/12/22 12:36, Wu, Feng wrote:
>>>From: Yang Zhang [mailto:[email protected]]
>>>On 2015/12/21 9:55, Wu, Feng wrote:
>>>>>From: [email protected] [mailto:linux-kernel-
>>>>>On 2015/12/16 9:37, Feng Wu wrote:
>>>>>>diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>>>>@@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm
>>>*kvm,
>>>>>unsigned int host_irq,
>>>>>> */
>>>>>>
>>>>>> kvm_set_msi_irq(e, &irq);
>>>>>>- if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
>>>>>>- continue;
>>>>>>+
>>>>>>+ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
>>>>>>+ if (!kvm_vector_hashing_enabled() ||
>>>>>>+ irq.delivery_mode !=
>>>>>APIC_DM_LOWEST)
>>>>>>+ continue;
>>>>>>+
>>>>>>+ vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
>>>>>>+ if (!vcpu)
>>>>>>+ continue;
>>>>>>+ }
>>>>>
>>>>>I am a little confused with the 'continue'. If the destination is not
>>>>>single vcpu, shouldn't we rollback to use non-PI mode?
>>>>
>>>>Here is the logic:
>>>>- If it is single destination, we will use PI no matter it is fixed or lowest-priority.
>>>>- If it is not single destination:
>>>> a) It is fixed, we will use non-PI
>>>> b) It is lowest-priority and vector-hashing is enabled, we will use PI
>>>> c) otherwise, use non-PI
>>>
>>>If it is single destination previously, then change to no-single mode.
>>>Can current code cover this case?
>>
>>In my test, before setting irq affinity (change single vcpu to non-single vcpu
>>in this case), the guest will mask the interrupt first, so before getting here, IRTE
>>has been changed back to remapped mode already(when guest masks the MSIx,
>>we will change back to remapped mode), hence nothing needed here.
>>
>>Digging into the linux code (guest) a bit more, I found that if interrupt remapping
>>is not enabled in the guest (IR is not supported for guest anyway), it will always
>>mask the MSI/MSIx before setting the irq affinity. So the code should work
>>well currently.
>
> We should not rely on guest's behavior. From code level, it need be fixed.
>
>>However, for robustness, I think explicitly changing IRTE back to remapped
>>mode for the 'continue' case should be a good idea.
>
> This is what i am looking for.

I agree, that would be a nice addition.

IIRC, the masking is optional -- if the guest can handle interrupts that
are generated while the device is half-configured, it doesn't need to
disable MSIs.

2015-12-23 17:19:39

[permalink] [raw]

Subject: Re: [PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

2015-12-16 09:37+0800, Feng Wu:
> Use vector-hashing to deliver lowest-priority interrupts, As an
> example, modern Intel CPUs in server platform use this method to
> handle lowest-priority interrupts.
>
> Signed-off-by: Feng Wu <[email protected]>
> ---
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> @@ -78,13 +83,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
> r = 0;
> r += kvm_apic_set_irq(vcpu, irq, dest_map);
> } else if (kvm_lapic_enabled(vcpu)) {
> - if (!lowest)
> - lowest = vcpu;
> - else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
> - lowest = vcpu;
> + if (!kvm_vector_hashing_enabled()) {
> + if (!lowest)
> + lowest = vcpu;
> + else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
> + lowest = vcpu;
> + } else {
> + __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
> + dest_vcpus++;
> + }
> }
> }
>
> + if (dest_vcpus != 0) {
> + idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> + dest_vcpu_bitmap, KVM_MAX_VCPUS);
> +
> + lowest = kvm_get_vcpu(kvm, idx - 1);

This is going to fail with sparse topologies (e.g. 3 cores per socket).
vcpu_id = initial APIC ID and kvm_get_vcpu() uses a compressed array
that has kvm->online_vcpus elements, so we could overflow.

The 'i' in kvm_for_each_vcpu() could be used for the bitmap.
(kvm_get_vcpu_by_id() instead of kvm_get_vcpu() is slightly worse.)

> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> @@ -678,6 +678,22 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
> bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
> {
> @@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> + if (!kvm_vector_hashing_enabled()) {
| [...]
> + } else {
> + int idx = 0;
> + unsigned int dest_vcpus = 0;

Now that we don't need to check for present/enabled LAPICs, I think it
would be better to solve this by assuming that all selected LAPICs are
enabled, so the n-th target is decided only based on vector and
destination.

> + for_each_set_bit(i, &bitmap, 16) {
> + if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
> + __clear_bit(i, &bitmap);
> + continue;
> + }
> + }

=> we could skip this loop.

> +
> + dest_vcpus = hweight16(bitmap);
> +
> + if (dest_vcpus != 0) {
> + idx = kvm_vector_2_index(irq->vector,
> + dest_vcpus, &bitmap, 16);
> +
> + bitmap = 0;
> + __set_bit(idx-1, &bitmap);

And set just this bit.

The drawback is that buggy software that included hardware disabled
APICs to lowest priority destinations could stop working ...
Do you think it's too risky?

> + }
> }

(This is basically the same as converting the message to a fixed delivery
to n-th bit beforehand, so it might be reasonable to to apply something
similar to simplify the slow path as well. Mixed flat/cluster/x2APIC
mode makes me suspect that it won't be reasonable.)

2015-12-23 17:21:45