Optimize the XICS emulation code in KVM as per the 'performance todos'
in the comments of book3s_xics.c.
Performance numbers:
1. Test case: Pgbench run in a KVM on PowerVM guest for 120 secs
2. Time taken by arch_send_call_function_single_ipi() currently measured
with funclatency [1].
$ ./funclatency.py -u arch_send_call_function_single_ipi
usecs : count distribution
0 -> 1 : 7 | |
2 -> 3 : 16 | |
4 -> 7 : 141 | |
8 -> 15 : 4455631 |****************************************|
16 -> 31 : 437981 |*** |
32 -> 63 : 5036 | |
64 -> 127 : 92 | |
avg = 12 usecs, total: 60,532,481 usecs, count: 4,898,904
3. Time taken by arch_send_call_function_single_ipi() with changes in
this series.
$ ./funclatency.py -u arch_send_call_function_single_ipi
usecs : count distribution
0 -> 1 : 15 | |
2 -> 3 : 7 | |
4 -> 7 : 3798 | |
8 -> 15 : 4569610 |****************************************|
16 -> 31 : 339284 |** |
32 -> 63 : 4542 | |
64 -> 127 : 68 | |
128 -> 255 : 0 | |
256 -> 511 : 1 | |
avg = 11 usecs, total: 57,720,612 usecs, count: 4,917,325
4. This patch series has been also tested on KVM on Power8 CPU.
[1]: https://github.com/iovisor/bcc/blob/master/tools/funclatency.py
Changes v1 -> v1 resend
1. Add Cedric to CC
Gautam Menghani (3):
arch/powerpc/kvm: Use bitmap to speed up resend of irqs in ICS
arch/powerpc/kvm: Optimize the server number -> ICP lookup
arch/powerpc/kvm: Reduce lock contention by moving spinlock from ics
to irq_state
arch/powerpc/kvm/book3s_hv_rm_xics.c | 8 ++--
arch/powerpc/kvm/book3s_xics.c | 70 ++++++++++++----------------
arch/powerpc/kvm/book3s_xics.h | 13 ++----
3 files changed, 39 insertions(+), 52 deletions(-)
--
2.44.0
When an irq is to be resent, all 1024 irqs in an ICS are scanned and the
irqs having 'resend' flag set are resent. Optimize this flow using bitmap
array to speed up the resends.
Signed-off-by: Gautam Menghani <[email protected]>
---
arch/powerpc/kvm/book3s_xics.c | 22 +++++++++++-----------
arch/powerpc/kvm/book3s_xics.h | 1 +
2 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 589a8f257120..12de526f04c4 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -47,9 +47,6 @@
* TODO
* ====
*
- * - To speed up resends, keep a bitmap of "resend" set bits in the
- * ICS
- *
* - Speed up server# -> ICP lookup (array ? hash table ?)
*
* - Make ICS lockless as well, or at least a per-interrupt lock or hashed
@@ -125,15 +122,17 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
struct kvmppc_icp *icp)
{
- int i;
+ u32 irq;
+ struct ics_irq_state *state;
- for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
- struct ics_irq_state *state = &ics->irq_state[i];
- if (state->resend) {
- XICS_DBG("resend %#x prio %#x\n", state->number,
- state->priority);
- icp_deliver_irq(xics, icp, state->number, true);
- }
+ for_each_set_bit(irq, ics->resend_map, KVMPPC_XICS_IRQ_PER_ICS) {
+ state = &ics->irq_state[irq];
+
+ if (!test_and_clear_bit(irq, ics->resend_map))
+ continue;
+ if (!state)
+ continue;
+ icp_deliver_irq(xics, icp, state->number, true);
}
}
@@ -489,6 +488,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
*/
smp_wmb();
set_bit(ics->icsid, icp->resend_map);
+ set_bit(src, ics->resend_map);
/*
* If the need_resend flag got cleared in the ICP some time
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index 08fb0843faf5..8fcb34ea47a4 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -98,6 +98,7 @@ struct kvmppc_ics {
arch_spinlock_t lock;
u16 icsid;
struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
+ DECLARE_BITMAP(resend_map, KVMPPC_XICS_IRQ_PER_ICS);
};
struct kvmppc_xics {
--
2.44.0
Given a server number, kvmppc_xics_find_server() does a linear search
over the vcpus of a VM. Optimize this logic by using an array to
maintain the mapping between server number -> icp.
Signed-off-by: Gautam Menghani <[email protected]>
---
arch/powerpc/kvm/book3s_xics.c | 4 ++--
arch/powerpc/kvm/book3s_xics.h | 10 ++--------
2 files changed, 4 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 12de526f04c4..1dc2f77571e7 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -47,8 +47,6 @@
* TODO
* ====
*
- * - Speed up server# -> ICP lookup (array ? hash table ?)
- *
* - Make ICS lockless as well, or at least a per-interrupt lock or hashed
* locks array to improve scalability
*/
@@ -1062,6 +1060,7 @@ static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
static int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
{
struct kvmppc_icp *icp;
+ struct kvm *kvm = vcpu->kvm;
if (!vcpu->kvm->arch.xics)
return -ENODEV;
@@ -1078,6 +1077,7 @@ static int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_nu
icp->state.mfrr = MASKED;
icp->state.pending_pri = MASKED;
vcpu->arch.icp = icp;
+ kvm->arch.xics->icps[server_num] = icp;
XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index 8fcb34ea47a4..feeb0897d555 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -111,19 +111,13 @@ struct kvmppc_xics {
u32 err_noics;
u32 err_noicp;
struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
+ DECLARE_FLEX_ARRAY(struct kvmppc_icp *, icps);
};
static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
u32 nr)
{
- struct kvm_vcpu *vcpu = NULL;
- unsigned long i;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
- return vcpu->arch.icp;
- }
- return NULL;
+ return kvm->arch.xics->icps[nr];
}
static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
--
2.44.0
Take a spinlock on state of an IRQ instead of an entire ICS. This
improves scalability by reducing contention.
Signed-off-by: Gautam Menghani <[email protected]>
---
arch/powerpc/kvm/book3s_hv_rm_xics.c | 8 ++---
arch/powerpc/kvm/book3s_xics.c | 44 ++++++++++++----------------
arch/powerpc/kvm/book3s_xics.h | 2 +-
3 files changed, 23 insertions(+), 31 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index e42984878503..178bc869b519 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -308,7 +308,7 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
state = &ics->irq_state[src];
/* Get a lock on the ICS */
- arch_spin_lock(&ics->lock);
+ arch_spin_lock(&state->lock);
/* Get our server */
if (!icp || state->server != icp->server_num) {
@@ -368,7 +368,7 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* Delivery was successful, did we reject somebody else ?
*/
if (reject && reject != XICS_IPI) {
- arch_spin_unlock(&ics->lock);
+ arch_spin_unlock(&state->lock);
icp->n_reject++;
new_irq = reject;
check_resend = 0;
@@ -397,13 +397,13 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
smp_mb();
if (!icp->state.need_resend) {
state->resend = 0;
- arch_spin_unlock(&ics->lock);
+ arch_spin_unlock(&state->lock);
check_resend = 0;
goto again;
}
}
out:
- arch_spin_unlock(&ics->lock);
+ arch_spin_unlock(&state->lock);
}
static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 1dc2f77571e7..466c92cf49fb 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -36,21 +36,13 @@
* LOCKING
* =======
*
- * Each ICS has a spin lock protecting the information about the IRQ
- * sources and avoiding simultaneous deliveries of the same interrupt.
+ * Each IRQ has a spin lock protecting its state sources and avoiding
+ * simultaneous deliveries of the same interrupt.
*
* ICP operations are done via a single compare & swap transaction
* (most ICP state fits in the union kvmppc_icp_state)
*/
-/*
- * TODO
- * ====
- *
- * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
- * locks array to improve scalability
- */
-
/* -- ICS routines -- */
static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
@@ -142,7 +134,7 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
unsigned long flags;
local_irq_save(flags);
- arch_spin_lock(&ics->lock);
+ arch_spin_lock(&state->lock);
state->server = server;
state->priority = priority;
@@ -154,7 +146,7 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
deliver = true;
}
- arch_spin_unlock(&ics->lock);
+ arch_spin_unlock(&state->lock);
local_irq_restore(flags);
return deliver;
@@ -207,10 +199,10 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
state = &ics->irq_state[src];
local_irq_save(flags);
- arch_spin_lock(&ics->lock);
+ arch_spin_lock(&state->lock);
*server = state->server;
*priority = state->priority;
- arch_spin_unlock(&ics->lock);
+ arch_spin_unlock(&state->lock);
local_irq_restore(flags);
return 0;
@@ -406,7 +398,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
/* Get a lock on the ICS */
local_irq_save(flags);
- arch_spin_lock(&ics->lock);
+ arch_spin_lock(&state->lock);
/* Get our server */
if (!icp || state->server != icp->server_num) {
@@ -467,7 +459,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* Delivery was successful, did we reject somebody else ?
*/
if (reject && reject != XICS_IPI) {
- arch_spin_unlock(&ics->lock);
+ arch_spin_unlock(&state->lock);
local_irq_restore(flags);
new_irq = reject;
check_resend = false;
@@ -497,14 +489,14 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
smp_mb();
if (!icp->state.need_resend) {
state->resend = 0;
- arch_spin_unlock(&ics->lock);
+ arch_spin_unlock(&state->lock);
local_irq_restore(flags);
check_resend = false;
goto again;
}
}
out:
- arch_spin_unlock(&ics->lock);
+ arch_spin_unlock(&state->lock);
local_irq_restore(flags);
}
@@ -992,20 +984,20 @@ static int xics_debug_show(struct seq_file *m, void *private)
seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
icsid);
- local_irq_save(flags);
- arch_spin_lock(&ics->lock);
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
struct ics_irq_state *irq = &ics->irq_state[i];
+ local_irq_save(flags);
+ arch_spin_lock(&irq->lock);
seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x pq_state %d resend %d masked pending %d\n",
irq->number, irq->server, irq->priority,
irq->saved_priority, irq->pq_state,
irq->resend, irq->masked_pending);
+ arch_spin_unlock(&irq->lock);
+ local_irq_restore(flags);
}
- arch_spin_unlock(&ics->lock);
- local_irq_restore(flags);
}
return 0;
}
@@ -1189,7 +1181,7 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
irqp = &ics->irq_state[idx];
local_irq_save(flags);
- arch_spin_lock(&ics->lock);
+ arch_spin_lock(&irqp->lock);
ret = -ENOENT;
if (irqp->exists) {
val = irqp->server;
@@ -1214,7 +1206,7 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
ret = 0;
}
- arch_spin_unlock(&ics->lock);
+ arch_spin_unlock(&irqp->lock);
local_irq_restore(flags);
if (!ret && put_user(val, ubufp))
@@ -1254,7 +1246,7 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
return -EINVAL;
local_irq_save(flags);
- arch_spin_lock(&ics->lock);
+ arch_spin_lock(&irqp->lock);
irqp->server = server;
irqp->saved_priority = prio;
if (val & KVM_XICS_MASKED)
@@ -1272,7 +1264,7 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
if (val & KVM_XICS_QUEUED)
irqp->pq_state |= PQ_QUEUED;
irqp->exists = 1;
- arch_spin_unlock(&ics->lock);
+ arch_spin_unlock(&irqp->lock);
local_irq_restore(flags);
if (val & KVM_XICS_PENDING)
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index feeb0897d555..1ee62b7a8fdf 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -45,6 +45,7 @@ struct ics_irq_state {
u8 exists;
int intr_cpu;
u32 host_irq;
+ arch_spinlock_t lock;
};
/* Atomic ICP state, updated with a single compare & swap */
@@ -95,7 +96,6 @@ struct kvmppc_icp {
};
struct kvmppc_ics {
- arch_spinlock_t lock;
u16 icsid;
struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
DECLARE_BITMAP(resend_map, KVMPPC_XICS_IRQ_PER_ICS);
--
2.44.0
Hello,
Please review this series and let me know if any changes are needed.
Thanks,
Gautam