As discussed, here is a slimmed-down series for addressing lock
serialization in the vgic_get_irq() / vgic_put_irq() path for LPIs. The
bulk of it is using an xarray to represent LPIs and leveraging RCU to
avoid serializing readers of the LPI configuration state.
There's a lot of potential for clean-up, but this is intentionally
deferred until after we fix up the LPI translation cache.
v2: https://lore.kernel.org/kvmarm/[email protected]/
v2 -> v3:
- Fix the stupid lock imbalance once and for all (Dan)
- Drop the tracepoints / stats I used for debugging my own crap (Marc)
Oliver Upton (10):
KVM: arm64: vgic: Store LPIs in an xarray
KVM: arm64: vgic: Use xarray to find LPI in vgic_get_lpi()
KVM: arm64: vgic-v3: Iterate the xarray to find pending LPIs
KVM: arm64: vgic-its: Walk the LPI xarray in vgic_copy_lpi_list()
KVM: arm64: vgic: Get rid of the LPI linked-list
KVM: arm64: vgic: Use atomics to count LPIs
KVM: arm64: vgic: Free LPI vgic_irq structs in an RCU-safe manner
KVM: arm64: vgic: Rely on RCU protection in vgic_get_lpi()
KVM: arm64: vgic: Ensure the irq refcount is nonzero when taking a ref
KVM: arm64: vgic: Don't acquire the lpi_list_lock in vgic_put_irq()
arch/arm64/kvm/vgic/vgic-debug.c | 2 +-
arch/arm64/kvm/vgic/vgic-init.c | 4 ++-
arch/arm64/kvm/vgic/vgic-its.c | 53 +++++++++++++++++++-----------
arch/arm64/kvm/vgic/vgic-v3.c | 3 +-
arch/arm64/kvm/vgic/vgic.c | 56 ++++++++++----------------------
arch/arm64/kvm/vgic/vgic.h | 15 ++++++---
include/kvm/arm_vgic.h | 9 ++---
7 files changed, 73 insertions(+), 69 deletions(-)
base-commit: 6613476e225e090cc9aad49be7fa504e290dd33d
--
2.44.0.rc0.258.g7320e95886-goog
Using a linked-list for LPIs is less than ideal as it of course requires
iterative searches to find a particular entry. An xarray is a better
data structure for this use case, as it provides faster searches and can
still handle a potentially sparse range of INTID allocations.
Start by storing LPIs in an xarray, punting usage of the xarray to a
subsequent change.
Signed-off-by: Oliver Upton <[email protected]>
---
arch/arm64/kvm/vgic/vgic-init.c | 3 +++
arch/arm64/kvm/vgic/vgic-its.c | 16 ++++++++++++++++
arch/arm64/kvm/vgic/vgic.c | 1 +
include/kvm/arm_vgic.h | 2 ++
4 files changed, 22 insertions(+)
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index e949e1d0fd9f..411719053107 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -56,6 +56,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
INIT_LIST_HEAD(&dist->lpi_list_head);
INIT_LIST_HEAD(&dist->lpi_translation_cache);
raw_spin_lock_init(&dist->lpi_list_lock);
+ xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
}
/* CREATION */
@@ -366,6 +367,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
if (vgic_supports_direct_msis(kvm))
vgic_v4_teardown(kvm);
+
+ xa_destroy(&dist->lpi_xa);
}
static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index e2764d0ffa9f..fb2d3c356984 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -52,6 +52,12 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
if (!irq)
return ERR_PTR(-ENOMEM);
+ ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
+ if (ret) {
+ kfree(irq);
+ return ERR_PTR(ret);
+ }
+
INIT_LIST_HEAD(&irq->lpi_list);
INIT_LIST_HEAD(&irq->ap_list);
raw_spin_lock_init(&irq->irq_lock);
@@ -86,12 +92,22 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
goto out_unlock;
}
+ ret = xa_err(xa_store(&dist->lpi_xa, intid, irq, 0));
+ if (ret) {
+ xa_release(&dist->lpi_xa, intid);
+ kfree(irq);
+ goto out_unlock;
+ }
+
list_add_tail(&irq->lpi_list, &dist->lpi_list_head);
dist->lpi_list_count++;
out_unlock:
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ if (ret)
+ return ERR_PTR(ret);
+
/*
* We "cache" the configuration table entries in our struct vgic_irq's.
* However we only have those structs for mapped IRQs, so we read in
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index db2a95762b1b..c126014f8395 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
return;
list_del(&irq->lpi_list);
+ xa_erase(&dist->lpi_xa, irq->intid);
dist->lpi_list_count--;
kfree(irq);
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 8cc38e836f54..795b35656b54 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -13,6 +13,7 @@
#include <linux/spinlock.h>
#include <linux/static_key.h>
#include <linux/types.h>
+#include <linux/xarray.h>
#include <kvm/iodev.h>
#include <linux/list.h>
#include <linux/jump_label.h>
@@ -275,6 +276,7 @@ struct vgic_dist {
/* Protects the lpi_list and the count value below. */
raw_spinlock_t lpi_list_lock;
+ struct xarray lpi_xa;
struct list_head lpi_list_head;
int lpi_list_count;
--
2.44.0.rc0.258.g7320e95886-goog
Iterating over the LPI linked-list is less than ideal when the desired
index is already known. Use the INTID to index the LPI xarray instead.
Signed-off-by: Oliver Upton <[email protected]>
---
arch/arm64/kvm/vgic/vgic.c | 19 +++++--------------
1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index c126014f8395..d90c42ff051d 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -54,8 +54,9 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
*/
/*
- * Iterate over the VM's list of mapped LPIs to find the one with a
- * matching interrupt ID and return a reference to the IRQ structure.
+ * Index the VM's xarray of mapped LPIs and return a reference to the IRQ
+ * structure. The caller is expected to call vgic_put_irq() later once it's
+ * finished with the IRQ.
*/
static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
{
@@ -65,20 +66,10 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
- list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
- if (irq->intid != intid)
- continue;
-
- /*
- * This increases the refcount, the caller is expected to
- * call vgic_put_irq() later once it's finished with the IRQ.
- */
+ irq = xa_load(&dist->lpi_xa, intid);
+ if (irq)
vgic_get_irq_kref(irq);
- goto out_unlock;
- }
- irq = NULL;
-out_unlock:
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
return irq;
--
2.44.0.rc0.258.g7320e95886-goog
Start walking the LPI xarray to find pending LPIs in preparation for
the removal of the LPI linked-list. Note that the 'basic' iterator
is chosen here as each iteration needs to drop the xarray read lock
(RCU) as reads/writes to guest memory can potentially block.
Signed-off-by: Oliver Upton <[email protected]>
---
arch/arm64/kvm/vgic/vgic-v3.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 9465d3706ab9..4ea3340786b9 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -380,6 +380,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
struct vgic_irq *irq;
gpa_t last_ptr = ~(gpa_t)0;
bool vlpi_avail = false;
+ unsigned long index;
int ret = 0;
u8 val;
@@ -396,7 +397,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
vlpi_avail = true;
}
- list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
+ xa_for_each(&dist->lpi_xa, index, irq) {
int byte_offset, bit_nr;
struct kvm_vcpu *vcpu;
gpa_t pendbase, ptr;
--
2.44.0.rc0.258.g7320e95886-goog
Start iterating the LPI xarray in anticipation of removing the LPI
linked-list.
Signed-off-by: Oliver Upton <[email protected]>
---
arch/arm64/kvm/vgic/vgic-its.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index fb2d3c356984..9ce2edfadd11 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -335,6 +335,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
{
struct vgic_dist *dist = &kvm->arch.vgic;
+ XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET);
struct vgic_irq *irq;
unsigned long flags;
u32 *intids;
@@ -353,7 +354,9 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
return -ENOMEM;
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
- list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
+ rcu_read_lock();
+
+ xas_for_each(&xas, irq, INTERRUPT_ID_BITS_ITS) {
if (i == irq_count)
break;
/* We don't need to "get" the IRQ, as we hold the list lock. */
@@ -361,6 +364,8 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
continue;
intids[i++] = irq->intid;
}
+
+ rcu_read_unlock();
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
*intid_ptr = intids;
--
2.44.0.rc0.258.g7320e95886-goog
All readers of LPI configuration have been transitioned to use the LPI
xarray. Get rid of the linked-list altogether.
Signed-off-by: Oliver Upton <[email protected]>
---
arch/arm64/kvm/vgic/vgic-init.c | 1 -
arch/arm64/kvm/vgic/vgic-its.c | 8 ++------
arch/arm64/kvm/vgic/vgic.c | 1 -
include/kvm/arm_vgic.h | 2 --
4 files changed, 2 insertions(+), 10 deletions(-)
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 411719053107..e25672d6e846 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -53,7 +53,6 @@ void kvm_vgic_early_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- INIT_LIST_HEAD(&dist->lpi_list_head);
INIT_LIST_HEAD(&dist->lpi_translation_cache);
raw_spin_lock_init(&dist->lpi_list_lock);
xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 9ce2edfadd11..0d1cfc776f47 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -58,7 +58,6 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
return ERR_PTR(ret);
}
- INIT_LIST_HEAD(&irq->lpi_list);
INIT_LIST_HEAD(&irq->ap_list);
raw_spin_lock_init(&irq->irq_lock);
@@ -74,10 +73,8 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
* There could be a race with another vgic_add_lpi(), so we need to
* check that we don't add a second list entry with the same LPI.
*/
- list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) {
- if (oldirq->intid != intid)
- continue;
-
+ oldirq = xa_load(&dist->lpi_xa, intid);
+ if (oldirq) {
/* Someone was faster with adding this LPI, lets use that. */
kfree(irq);
irq = oldirq;
@@ -99,7 +96,6 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
goto out_unlock;
}
- list_add_tail(&irq->lpi_list, &dist->lpi_list_head);
dist->lpi_list_count++;
out_unlock:
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index d90c42ff051d..e58ce68e325c 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -121,7 +121,6 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
if (!kref_put(&irq->refcount, vgic_irq_release))
return;
- list_del(&irq->lpi_list);
xa_erase(&dist->lpi_xa, irq->intid);
dist->lpi_list_count--;
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 795b35656b54..aeff363e3ba6 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -117,7 +117,6 @@ struct irq_ops {
struct vgic_irq {
raw_spinlock_t irq_lock; /* Protects the content of the struct */
- struct list_head lpi_list; /* Used to link all LPIs together */
struct list_head ap_list;
struct kvm_vcpu *vcpu; /* SGIs and PPIs: The VCPU
@@ -277,7 +276,6 @@ struct vgic_dist {
/* Protects the lpi_list and the count value below. */
raw_spinlock_t lpi_list_lock;
struct xarray lpi_xa;
- struct list_head lpi_list_head;
int lpi_list_count;
/* LPI translation cache */
--
2.44.0.rc0.258.g7320e95886-goog
Switch to using atomics for LPI accounting, allowing vgic_irq references
to be dropped in parallel.
Signed-off-by: Oliver Upton <[email protected]>
---
arch/arm64/kvm/vgic/vgic-debug.c | 2 +-
arch/arm64/kvm/vgic/vgic-its.c | 4 ++--
arch/arm64/kvm/vgic/vgic.c | 2 +-
include/kvm/arm_vgic.h | 4 ++--
4 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index 85606a531dc3..389025ce7749 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -149,7 +149,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
if (v3)
- seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count);
+ seq_printf(s, "nr_lpis:\t%d\n", atomic_read(&dist->lpi_count));
seq_printf(s, "enabled:\t%d\n", dist->enabled);
seq_printf(s, "\n");
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 0d1cfc776f47..27a7451ad8b9 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -96,7 +96,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
goto out_unlock;
}
- dist->lpi_list_count++;
+ atomic_inc(&dist->lpi_count);
out_unlock:
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
@@ -344,7 +344,7 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
* command). If coming from another path (such as enabling LPIs),
* we must be careful not to overrun the array.
*/
- irq_count = READ_ONCE(dist->lpi_list_count);
+ irq_count = atomic_read(&dist->lpi_count);
intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT);
if (!intids)
return -ENOMEM;
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index e58ce68e325c..5988d162b765 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -122,7 +122,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
return;
xa_erase(&dist->lpi_xa, irq->intid);
- dist->lpi_list_count--;
+ atomic_dec(&dist->lpi_count);
kfree(irq);
}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index aeff363e3ba6..71e9d719533b 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -273,10 +273,10 @@ struct vgic_dist {
*/
u64 propbaser;
- /* Protects the lpi_list and the count value below. */
+ /* Protects the lpi_list. */
raw_spinlock_t lpi_list_lock;
struct xarray lpi_xa;
- int lpi_list_count;
+ atomic_t lpi_count;
/* LPI translation cache */
struct list_head lpi_translation_cache;
--
2.44.0.rc0.258.g7320e95886-goog
The LPI xarray's xa_lock is sufficient for synchronizing writers when
freeing a given LPI. Furthermore, readers can only take a new reference
on an IRQ if it was already nonzero.
Stop taking the lpi_list_lock unnecessarily and get rid of
__vgic_put_lpi_locked().
Signed-off-by: Oliver Upton <[email protected]>
---
arch/arm64/kvm/vgic/vgic-its.c | 4 ++--
arch/arm64/kvm/vgic/vgic.c | 21 ++++-----------------
arch/arm64/kvm/vgic/vgic.h | 1 -
3 files changed, 6 insertions(+), 20 deletions(-)
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index c3d1bca0b458..d84cb7618c59 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -647,7 +647,7 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
* was in the cache, and increment it on the new interrupt.
*/
if (cte->irq)
- __vgic_put_lpi_locked(kvm, cte->irq);
+ vgic_put_irq(kvm, cte->irq);
/*
* The irq refcount is guaranteed to be nonzero while holding the
@@ -684,7 +684,7 @@ void vgic_its_invalidate_cache(struct kvm *kvm)
if (!cte->irq)
break;
- __vgic_put_lpi_locked(kvm, cte->irq);
+ vgic_put_irq(kvm, cte->irq);
cte->irq = NULL;
}
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 2a288d6c0be7..c8208f81fdc8 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -110,13 +110,13 @@ static void vgic_irq_release(struct kref *ref)
{
}
-/*
- * Drop the refcount on the LPI. Must be called with lpi_list_lock held.
- */
-void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
+void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{
struct vgic_dist *dist = &kvm->arch.vgic;
+ if (irq->intid < VGIC_MIN_LPI)
+ return;
+
if (!kref_put(&irq->refcount, vgic_irq_release))
return;
@@ -126,19 +126,6 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
kfree_rcu(irq, rcu);
}
-void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- unsigned long flags;
-
- if (irq->intid < VGIC_MIN_LPI)
- return;
-
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
- __vgic_put_lpi_locked(kvm, irq);
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
-}
-
void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index f874b9932c5a..0c2b82de8fa3 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -180,7 +180,6 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
gpa_t addr, int len);
struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
u32 intid);
-void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq);
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
bool vgic_get_phys_line_level(struct vgic_irq *irq);
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
--
2.44.0.rc0.258.g7320e95886-goog
Free the vgic_irq structs in an RCU-safe manner to allow reads of the
LPI configuration data to happen in parallel with the release of LPIs.
Signed-off-by: Oliver Upton <[email protected]>
---
arch/arm64/kvm/vgic/vgic.c | 2 +-
include/kvm/arm_vgic.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 5988d162b765..be3ed4c5e1fa 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -124,7 +124,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
xa_erase(&dist->lpi_xa, irq->intid);
atomic_dec(&dist->lpi_count);
- kfree(irq);
+ kfree_rcu(irq, rcu);
}
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 71e9d719533b..47035946648e 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -117,6 +117,7 @@ struct irq_ops {
struct vgic_irq {
raw_spinlock_t irq_lock; /* Protects the content of the struct */
+ struct rcu_head rcu;
struct list_head ap_list;
struct kvm_vcpu *vcpu; /* SGIs and PPIs: The VCPU
--
2.44.0.rc0.258.g7320e95886-goog
Stop acquiring the lpi_list_lock in favor of RCU for protecting
the read-side critical section in vgic_get_lpi(). In order for this to
be safe, we also need to be careful not to take a reference on an irq
with a refcount of 0, as it is about to be freed.
Signed-off-by: Oliver Upton <[email protected]>
---
arch/arm64/kvm/vgic/vgic.c | 9 ++++-----
arch/arm64/kvm/vgic/vgic.h | 14 +++++++++++---
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index be3ed4c5e1fa..128ae53a0a55 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -62,15 +62,14 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct vgic_irq *irq = NULL;
- unsigned long flags;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+ rcu_read_lock();
irq = xa_load(&dist->lpi_xa, intid);
- if (irq)
- vgic_get_irq_kref(irq);
+ if (!vgic_try_get_irq_kref(irq))
+ irq = NULL;
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ rcu_read_unlock();
return irq;
}
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 8d134569d0a1..f874b9932c5a 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -220,12 +220,20 @@ void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
void vgic_v2_save_state(struct kvm_vcpu *vcpu);
void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
-static inline void vgic_get_irq_kref(struct vgic_irq *irq)
+static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq)
{
+ if (!irq)
+ return false;
+
if (irq->intid < VGIC_MIN_LPI)
- return;
+ return true;
- kref_get(&irq->refcount);
+ return kref_get_unless_zero(&irq->refcount);
+}
+
+static inline void vgic_get_irq_kref(struct vgic_irq *irq)
+{
+ WARN_ON_ONCE(!vgic_try_get_irq_kref(irq));
}
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
--
2.44.0.rc0.258.g7320e95886-goog
It will soon be possible for get() and put() calls to happen in
parallel, which means in most cases we must ensure the refcount is
nonzero when taking a new reference. Switch to using
vgic_try_get_irq_kref() where necessary, and document the few conditions
where an IRQ's refcount is guaranteed to be nonzero.
Signed-off-by: Oliver Upton <[email protected]>
---
arch/arm64/kvm/vgic/vgic-its.c | 18 ++++++++----------
arch/arm64/kvm/vgic/vgic.c | 3 ++-
2 files changed, 10 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 27a7451ad8b9..c3d1bca0b458 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -74,18 +74,11 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
* check that we don't add a second list entry with the same LPI.
*/
oldirq = xa_load(&dist->lpi_xa, intid);
- if (oldirq) {
+ if (vgic_try_get_irq_kref(oldirq)) {
/* Someone was faster with adding this LPI, lets use that. */
kfree(irq);
irq = oldirq;
- /*
- * This increases the refcount, the caller is expected to
- * call vgic_put_irq() on the returned pointer once it's
- * finished with the IRQ.
- */
- vgic_get_irq_kref(irq);
-
goto out_unlock;
}
@@ -609,8 +602,8 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
irq = __vgic_its_check_cache(dist, db, devid, eventid);
- if (irq)
- vgic_get_irq_kref(irq);
+ if (!vgic_try_get_irq_kref(irq))
+ irq = NULL;
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
@@ -656,6 +649,11 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
if (cte->irq)
__vgic_put_lpi_locked(kvm, cte->irq);
+ /*
+ * The irq refcount is guaranteed to be nonzero while holding the
+ * its_lock, as the ITE (and the reference it holds) cannot be freed.
+ */
+ lockdep_assert_held(&its->its_lock);
vgic_get_irq_kref(irq);
cte->db = db;
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 128ae53a0a55..2a288d6c0be7 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -394,7 +394,8 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
/*
* Grab a reference to the irq to reflect the fact that it is
- * now in the ap_list.
+ * now in the ap_list. This is safe as the caller must already hold a
+ * reference on the irq.
*/
vgic_get_irq_kref(irq);
list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
--
2.44.0.rc0.258.g7320e95886-goog
On 2024/2/17 2:41, Oliver Upton wrote:
> Start iterating the LPI xarray in anticipation of removing the LPI
> linked-list.
>
> Signed-off-by: Oliver Upton <[email protected]>
> ---
> arch/arm64/kvm/vgic/vgic-its.c | 7 ++++++-
> 1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
> index fb2d3c356984..9ce2edfadd11 100644
> --- a/arch/arm64/kvm/vgic/vgic-its.c
> +++ b/arch/arm64/kvm/vgic/vgic-its.c
> @@ -335,6 +335,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
> int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
> {
> struct vgic_dist *dist = &kvm->arch.vgic;
> + XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET);
> struct vgic_irq *irq;
> unsigned long flags;
> u32 *intids;
> @@ -353,7 +354,9 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
> return -ENOMEM;
>
> raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
> - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
> + rcu_read_lock();
> +
> + xas_for_each(&xas, irq, INTERRUPT_ID_BITS_ITS) {
We should use '1 << INTERRUPT_ID_BITS_ITS - 1' to represent the maximum
LPI interrupt ID.
> if (i == irq_count)
> break;
> /* We don't need to "get" the IRQ, as we hold the list lock. */
> @@ -361,6 +364,8 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
> continue;
> intids[i++] = irq->intid;
> }
> +
> + rcu_read_unlock();
> raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
>
> *intid_ptr = intids;
Thanks,
Zenghui
On Sun, 18 Feb 2024 08:46:53 +0000,
Zenghui Yu <[email protected]> wrote:
>
> On 2024/2/17 2:41, Oliver Upton wrote:
> > Start iterating the LPI xarray in anticipation of removing the LPI
> > linked-list.
> >
> > Signed-off-by: Oliver Upton <[email protected]>
> > ---
> > arch/arm64/kvm/vgic/vgic-its.c | 7 ++++++-
> > 1 file changed, 6 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
> > index fb2d3c356984..9ce2edfadd11 100644
> > --- a/arch/arm64/kvm/vgic/vgic-its.c
> > +++ b/arch/arm64/kvm/vgic/vgic-its.c
> > @@ -335,6 +335,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
> > int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
> > {
> > struct vgic_dist *dist = &kvm->arch.vgic;
> > + XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET);
> > struct vgic_irq *irq;
> > unsigned long flags;
> > u32 *intids;
> > @@ -353,7 +354,9 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
> > return -ENOMEM;
> > raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
> > - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
> > + rcu_read_lock();
> > +
> > + xas_for_each(&xas, irq, INTERRUPT_ID_BITS_ITS) {
>
> We should use '1 << INTERRUPT_ID_BITS_ITS - 1' to represent the maximum
> LPI interrupt ID.
Huh, well caught! I'm not even sure how it works, as that's way
smaller than the start of the walk (8192). Probably doesn't.
An alternative would be to use max_lpis_propbaser(), but I'm not sure
we always have a valid PROPBASER value set when we start using this
function. Worth investigating though.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
On Sun, Feb 18, 2024 at 10:28:19AM +0000, Marc Zyngier wrote:
> On Sun, 18 Feb 2024 08:46:53 +0000,
> Zenghui Yu <[email protected]> wrote:
> >
> > On 2024/2/17 2:41, Oliver Upton wrote:
> > > Start iterating the LPI xarray in anticipation of removing the LPI
> > > linked-list.
> > >
> > > Signed-off-by: Oliver Upton <[email protected]>
> > > ---
> > > arch/arm64/kvm/vgic/vgic-its.c | 7 ++++++-
> > > 1 file changed, 6 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
> > > index fb2d3c356984..9ce2edfadd11 100644
> > > --- a/arch/arm64/kvm/vgic/vgic-its.c
> > > +++ b/arch/arm64/kvm/vgic/vgic-its.c
> > > @@ -335,6 +335,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
> > > int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
> > > {
> > > struct vgic_dist *dist = &kvm->arch.vgic;
> > > + XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET);
> > > struct vgic_irq *irq;
> > > unsigned long flags;
> > > u32 *intids;
> > > @@ -353,7 +354,9 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
> > > return -ENOMEM;
> > > raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
> > > - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
> > > + rcu_read_lock();
> > > +
> > > + xas_for_each(&xas, irq, INTERRUPT_ID_BITS_ITS) {
> >
> > We should use '1 << INTERRUPT_ID_BITS_ITS - 1' to represent the maximum
> > LPI interrupt ID.
/facepalm
Thanks Zenghui!
> Huh, well caught! I'm not even sure how it works, as that's way
> smaller than the start of the walk (8192). Probably doesn't.
>
> An alternative would be to use max_lpis_propbaser(), but I'm not sure
> we always have a valid PROPBASER value set when we start using this
> function. Worth investigating though.
Given the plans to eventually replace this with xarray marks, I'd vote
for doing the lazy thing and deciding this at compile time.
I can squash this in when I apply the series if the rest of it isn't
offensive, otherwise respin with the change.
--
Thanks,
Oliver
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index d84cb7618c59..f6025886071c 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -316,6 +316,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
return 0;
}
+#define GIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1)
+
/*
* Create a snapshot of the current LPIs targeting @vcpu, so that we can
* enumerate those LPIs without holding any lock.
@@ -345,7 +347,7 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
rcu_read_lock();
- xas_for_each(&xas, irq, INTERRUPT_ID_BITS_ITS) {
+ xas_for_each(&xas, irq, GIC_LPI_MAX_INTID) {
if (i == irq_count)
break;
/* We don't need to "get" the IRQ, as we hold the list lock. */
On 2024/2/17 02:41, Oliver Upton wrote:
> Using a linked-list for LPIs is less than ideal as it of course requires
> iterative searches to find a particular entry. An xarray is a better
> data structure for this use case, as it provides faster searches and can
> still handle a potentially sparse range of INTID allocations.
>
> Start by storing LPIs in an xarray, punting usage of the xarray to a
> subsequent change.
>
> Signed-off-by: Oliver Upton <[email protected]>
[..]
> diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
> index db2a95762b1b..c126014f8395 100644
> --- a/arch/arm64/kvm/vgic/vgic.c
> +++ b/arch/arm64/kvm/vgic/vgic.c
> @@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
> return;
>
> list_del(&irq->lpi_list);
> + xa_erase(&dist->lpi_xa, irq->intid);
We can get here *after* grabbing the vgic_cpu->ap_list_lock (e.g.,
vgic_flush_pending_lpis()/vgic_put_irq()). And as according to vGIC's
"Locking order", we should disable interrupts before taking the xa_lock
in xa_erase() and we would otherwise see bad things like deadlock..
It's not a problem before patch #10, where we drop the lpi_list_lock and
start taking the xa_lock with interrupts enabled. Consider switching to
use xa_erase_irq() instead?
> dist->lpi_list_count--;
>
> kfree(irq);
Thanks,
Zenghui
Hi Zenghui,
On Wed, Feb 21, 2024 at 12:30:24AM +0800, Zenghui Yu wrote:
> On 2024/2/17 02:41, Oliver Upton wrote:
> > Using a linked-list for LPIs is less than ideal as it of course requires
> > iterative searches to find a particular entry. An xarray is a better
> > data structure for this use case, as it provides faster searches and can
> > still handle a potentially sparse range of INTID allocations.
> >
> > Start by storing LPIs in an xarray, punting usage of the xarray to a
> > subsequent change.
> >
> > Signed-off-by: Oliver Upton <[email protected]>
>
> [..]
>
> > diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
> > index db2a95762b1b..c126014f8395 100644
> > --- a/arch/arm64/kvm/vgic/vgic.c
> > +++ b/arch/arm64/kvm/vgic/vgic.c
> > @@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
> > return;
> > list_del(&irq->lpi_list);
> > + xa_erase(&dist->lpi_xa, irq->intid);
>
> We can get here *after* grabbing the vgic_cpu->ap_list_lock (e.g.,
> vgic_flush_pending_lpis()/vgic_put_irq()). And as according to vGIC's
> "Locking order", we should disable interrupts before taking the xa_lock
> in xa_erase() and we would otherwise see bad things like deadlock..
Nice catch!
Yeah, the general intention was to disable interrupts outside of the
xa_lock, however:
> It's not a problem before patch #10, where we drop the lpi_list_lock and
> start taking the xa_lock with interrupts enabled. Consider switching to
> use xa_erase_irq() instead?
I don't think this change is safe until #10, as the implied xa_unlock_irq()
would re-enable interrupts before the lpi_list_lock is dropped. Or do I
have wires crossed?
--
Thanks,
Oliver
On Tue, 20 Feb 2024 16:30:24 +0000,
Zenghui Yu <[email protected]> wrote:
>
> On 2024/2/17 02:41, Oliver Upton wrote:
> > Using a linked-list for LPIs is less than ideal as it of course requires
> > iterative searches to find a particular entry. An xarray is a better
> > data structure for this use case, as it provides faster searches and can
> > still handle a potentially sparse range of INTID allocations.
> >
> > Start by storing LPIs in an xarray, punting usage of the xarray to a
> > subsequent change.
> >
> > Signed-off-by: Oliver Upton <[email protected]>
>
> [..]
>
> > diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
> > index db2a95762b1b..c126014f8395 100644
> > --- a/arch/arm64/kvm/vgic/vgic.c
> > +++ b/arch/arm64/kvm/vgic/vgic.c
> > @@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
> > return;
> > list_del(&irq->lpi_list);
> > + xa_erase(&dist->lpi_xa, irq->intid);
>
> We can get here *after* grabbing the vgic_cpu->ap_list_lock (e.g.,
> vgic_flush_pending_lpis()/vgic_put_irq()). And as according to vGIC's
> "Locking order", we should disable interrupts before taking the xa_lock
> in xa_erase() and we would otherwise see bad things like deadlock..
>
> It's not a problem before patch #10, where we drop the lpi_list_lock and
> start taking the xa_lock with interrupts enabled. Consider switching to
> use xa_erase_irq() instead?
But does it actually work? xa_erase_irq() uses spin_lock_irq(),
followed by spin_unlock_irq(). So if we were already in interrupt
context, we would end-up reenabling interrupts. At least, this should
be the irqsave version.
The question is whether we manipulate LPIs (in the get/put sense) on
the back of an interrupt handler (like we do for the timer). It isn't
obvious to me that it is the case, but I haven't spent much time
staring at this code recently.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
On Tue, Feb 20, 2024 at 05:24:50PM +0000, Marc Zyngier wrote:
> On Tue, 20 Feb 2024 16:30:24 +0000,
> Zenghui Yu <[email protected]> wrote:
> >
> > On 2024/2/17 02:41, Oliver Upton wrote:
> > > Using a linked-list for LPIs is less than ideal as it of course requires
> > > iterative searches to find a particular entry. An xarray is a better
> > > data structure for this use case, as it provides faster searches and can
> > > still handle a potentially sparse range of INTID allocations.
> > >
> > > Start by storing LPIs in an xarray, punting usage of the xarray to a
> > > subsequent change.
> > >
> > > Signed-off-by: Oliver Upton <[email protected]>
> >
> > [..]
> >
> > > diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
> > > index db2a95762b1b..c126014f8395 100644
> > > --- a/arch/arm64/kvm/vgic/vgic.c
> > > +++ b/arch/arm64/kvm/vgic/vgic.c
> > > @@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
> > > return;
> > > list_del(&irq->lpi_list);
> > > + xa_erase(&dist->lpi_xa, irq->intid);
> >
> > We can get here *after* grabbing the vgic_cpu->ap_list_lock (e.g.,
> > vgic_flush_pending_lpis()/vgic_put_irq()). And as according to vGIC's
> > "Locking order", we should disable interrupts before taking the xa_lock
> > in xa_erase() and we would otherwise see bad things like deadlock..
> >
> > It's not a problem before patch #10, where we drop the lpi_list_lock and
> > start taking the xa_lock with interrupts enabled. Consider switching to
> > use xa_erase_irq() instead?
>
> But does it actually work? xa_erase_irq() uses spin_lock_irq(),
> followed by spin_unlock_irq(). So if we were already in interrupt
> context, we would end-up reenabling interrupts. At least, this should
> be the irqsave version.
This is what I was planning to do, although I may kick it out to patch
10 to avoid churn.
> The question is whether we manipulate LPIs (in the get/put sense) on
> the back of an interrupt handler (like we do for the timer). It isn't
> obvious to me that it is the case, but I haven't spent much time
> staring at this code recently.
I think we can get into here both from contexts w/ interrupts disabled
or enabled. irqfd_wakeup() expects to be called w/ interrupts disabled.
All the more reason to use irqsave() / irqrestore() flavors of all of
this, and a reminder to go check all callsites that implicitly take the
xa_lock.
--
Thanks,
Oliver
On Tue, 20 Feb 2024 17:43:03 +0000,
Oliver Upton <[email protected]> wrote:
>
> On Tue, Feb 20, 2024 at 05:24:50PM +0000, Marc Zyngier wrote:
> > On Tue, 20 Feb 2024 16:30:24 +0000,
> > Zenghui Yu <[email protected]> wrote:
> > >
> > > On 2024/2/17 02:41, Oliver Upton wrote:
> > > > Using a linked-list for LPIs is less than ideal as it of course requires
> > > > iterative searches to find a particular entry. An xarray is a better
> > > > data structure for this use case, as it provides faster searches and can
> > > > still handle a potentially sparse range of INTID allocations.
> > > >
> > > > Start by storing LPIs in an xarray, punting usage of the xarray to a
> > > > subsequent change.
> > > >
> > > > Signed-off-by: Oliver Upton <[email protected]>
> > >
> > > [..]
> > >
> > > > diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
> > > > index db2a95762b1b..c126014f8395 100644
> > > > --- a/arch/arm64/kvm/vgic/vgic.c
> > > > +++ b/arch/arm64/kvm/vgic/vgic.c
> > > > @@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
> > > > return;
> > > > list_del(&irq->lpi_list);
> > > > + xa_erase(&dist->lpi_xa, irq->intid);
> > >
> > > We can get here *after* grabbing the vgic_cpu->ap_list_lock (e.g.,
> > > vgic_flush_pending_lpis()/vgic_put_irq()). And as according to vGIC's
> > > "Locking order", we should disable interrupts before taking the xa_lock
> > > in xa_erase() and we would otherwise see bad things like deadlock..
> > >
> > > It's not a problem before patch #10, where we drop the lpi_list_lock and
> > > start taking the xa_lock with interrupts enabled. Consider switching to
> > > use xa_erase_irq() instead?
> >
> > But does it actually work? xa_erase_irq() uses spin_lock_irq(),
> > followed by spin_unlock_irq(). So if we were already in interrupt
> > context, we would end-up reenabling interrupts. At least, this should
> > be the irqsave version.
>
> This is what I was planning to do, although I may kick it out to patch
> 10 to avoid churn.
>
> > The question is whether we manipulate LPIs (in the get/put sense) on
> > the back of an interrupt handler (like we do for the timer). It isn't
> > obvious to me that it is the case, but I haven't spent much time
> > staring at this code recently.
>
> I think we can get into here both from contexts w/ interrupts disabled
> or enabled. irqfd_wakeup() expects to be called w/ interrupts disabled.
>
> All the more reason to use irqsave() / irqrestore() flavors of all of
> this, and a reminder to go check all callsites that implicitly take the
> xa_lock.
Sounds good. Maybe you can also update the locking order
"documentation" to include the xa_lock? I expect that it will
ultimately replace lpi_list_lock.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
On Tue, Feb 20, 2024 at 05:53:55PM +0000, Marc Zyngier wrote:
> On Tue, 20 Feb 2024 17:43:03 +0000, Oliver Upton <[email protected]> wrote:
> > I think we can get into here both from contexts w/ interrupts disabled
> > or enabled. irqfd_wakeup() expects to be called w/ interrupts disabled.
> >
> > All the more reason to use irqsave() / irqrestore() flavors of all of
> > this, and a reminder to go check all callsites that implicitly take the
> > xa_lock.
>
> Sounds good. Maybe you can also update the locking order
> "documentation" to include the xa_lock? I expect that it will
> ultimately replace lpi_list_lock.
Yep, I got to the point of deleting the lpi_list_lock on the full
series, which is where I update the documentation. I really didn't want
people to know I'm adding yet another layer of locking in the interim...
Anyways, I think there's sufficient feedback to justify a respin. I'll
make sure the documentation is updated w/ the xa_lock for the stuff I'm
trying to land in 6.9.
--
Thanks,
Oliver
On 2024/2/21 1:15, Oliver Upton wrote:
> Hi Zenghui,
>
> On Wed, Feb 21, 2024 at 12:30:24AM +0800, Zenghui Yu wrote:
>> On 2024/2/17 02:41, Oliver Upton wrote:
>>> Using a linked-list for LPIs is less than ideal as it of course requires
>>> iterative searches to find a particular entry. An xarray is a better
>>> data structure for this use case, as it provides faster searches and can
>>> still handle a potentially sparse range of INTID allocations.
>>>
>>> Start by storing LPIs in an xarray, punting usage of the xarray to a
>>> subsequent change.
>>>
>>> Signed-off-by: Oliver Upton <[email protected]>
>>
>> [..]
>>
>>> diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
>>> index db2a95762b1b..c126014f8395 100644
>>> --- a/arch/arm64/kvm/vgic/vgic.c
>>> +++ b/arch/arm64/kvm/vgic/vgic.c
>>> @@ -131,6 +131,7 @@ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
>>> return;
>>> list_del(&irq->lpi_list);
>>> + xa_erase(&dist->lpi_xa, irq->intid);
>>
>> We can get here *after* grabbing the vgic_cpu->ap_list_lock (e.g.,
>> vgic_flush_pending_lpis()/vgic_put_irq()). And as according to vGIC's
>> "Locking order", we should disable interrupts before taking the xa_lock
>> in xa_erase() and we would otherwise see bad things like deadlock..
>
> Nice catch!
>
> Yeah, the general intention was to disable interrupts outside of the
> xa_lock, however:
>
>> It's not a problem before patch #10, where we drop the lpi_list_lock and
>> start taking the xa_lock with interrupts enabled. Consider switching to
>> use xa_erase_irq() instead?
>
> I don't think this change is safe until #10, as the implied xa_unlock_irq()
> would re-enable interrupts before the lpi_list_lock is dropped. Or do I
> have wires crossed?
No, you're right. My intention was to fix it in patch #10. And as
you've both pointed out, using xa_erase_irq() can hardly be the correct
fix. My mistake :-( .
Thanks,
Zenghui
On Wed, Feb 21, 2024 at 01:11:02PM +0800, Zenghui Yu wrote:
> No, you're right. My intention was to fix it in patch #10. And as
> you've both pointed out, using xa_erase_irq() can hardly be the correct
> fix. My mistake :-( .
Still -- thank you for pointing out the very obvious bug at the end of
the series :)
--
Thanks,
Oliver