Legacy device assignment attempted to only do lightweight work when
injecting interrupts from atomic context. This will be important
if we let VFIO inject interrupts from a non-threaded interrupt handler.
This series lets irqfd ditinguish between atomic-context and generic
interrupt injection.
Patch 1 is the real change, everything else cleans up what's left behind.
Paolo
Paolo Bonzini (3):
KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic
KVM: device assignment: remove pointless #ifdefs
KVM: x86: move kvm_set_irq_inatomic to legacy device assignment
arch/x86/kvm/assigned-dev.c | 62 +++++++++++++++++++++++++++------------------
arch/x86/kvm/irq_comm.c | 44 +++++---------------------------
include/linux/kvm_host.h | 8 +++---
virt/kvm/eventfd.c | 11 +++-----
4 files changed, 50 insertions(+), 75 deletions(-)
--
1.8.3.1
We do not want to do too much work in atomic context, in particular
not walking all the VCPUs of the virtual machine. So we want
to distinguish the architecture-specific injection function for irqfd
from kvm_set_msi. Since it's still empty, reuse the newly added
kvm_arch_set_irq and rename it to kvm_arch_set_irq_inatomic.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/irq_comm.c | 14 ++++++++------
include/linux/kvm_host.h | 7 +++----
virt/kvm/eventfd.c | 11 ++++-------
3 files changed, 15 insertions(+), 17 deletions(-)
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8f4499c7ffc1..26d830c1b7af 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -124,12 +124,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
}
-static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
- struct kvm *kvm)
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
{
struct kvm_lapic_irq irq;
int r;
+ if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
+ return -EWOULDBLOCK;
+
kvm_set_msi_irq(e, &irq);
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
@@ -165,10 +169,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
idx = srcu_read_lock(&kvm->irq_srcu);
if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
e = &entries[0];
- if (likely(e->type == KVM_IRQ_ROUTING_MSI))
- ret = kvm_set_msi_inatomic(e, kvm);
- else
- ret = -EWOULDBLOCK;
+ ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
+ irq, level);
}
srcu_read_unlock(&kvm->irq_srcu, idx);
return ret;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index eba9caebc9c1..42fb9e089fc9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -828,10 +828,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
int irq_source_id, int level, bool line_status);
-
-int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
- int irq_source_id, int level, bool line_status);
-
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id,
+ int level, bool line_status);
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
void kvm_notify_acked_gsi(struct kvm *kvm, int gsi);
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index e29fd2640709..46dbc0a7dfc1 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -171,7 +171,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
}
-int __attribute__((weak)) kvm_arch_set_irq(
+int __attribute__((weak)) kvm_arch_set_irq_inatomic(
struct kvm_kernel_irq_routing_entry *irq,
struct kvm *kvm, int irq_source_id,
int level,
@@ -201,12 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
irq = irqfd->irq_entry;
} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
/* An event has been signaled, inject an interrupt */
- if (irq.type == KVM_IRQ_ROUTING_MSI)
- kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
- false);
- else if (kvm_arch_set_irq(&irq, kvm,
- KVM_USERSPACE_IRQ_SOURCE_ID, 1,
- false) == -EWOULDBLOCK)
+ if (kvm_arch_set_irq_inatomic(&irq, kvm,
+ KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+ false) == -EWOULDBLOCK)
schedule_work(&irqfd->inject);
srcu_read_unlock(&kvm->irq_srcu, idx);
}
--
1.8.3.1
The symbols are always defined.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/assigned-dev.c | 25 -------------------------
1 file changed, 25 deletions(-)
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index d090ecf08809..1c17ee807ef7 100644
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -131,7 +131,6 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
return IRQ_HANDLED;
}
-#ifdef __KVM_HAVE_MSI
static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
{
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
@@ -150,9 +149,7 @@ static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
return IRQ_HANDLED;
}
-#endif
-#ifdef __KVM_HAVE_MSIX
static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
{
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
@@ -183,7 +180,6 @@ static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
return IRQ_HANDLED;
}
-#endif
/* Ack the irq line for an assigned device */
static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
@@ -386,7 +382,6 @@ static int assigned_device_enable_host_intx(struct kvm *kvm,
return 0;
}
-#ifdef __KVM_HAVE_MSI
static int assigned_device_enable_host_msi(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev)
{
@@ -408,9 +403,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
return 0;
}
-#endif
-#ifdef __KVM_HAVE_MSIX
static int assigned_device_enable_host_msix(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev)
{
@@ -443,8 +436,6 @@ err:
return r;
}
-#endif
-
static int assigned_device_enable_guest_intx(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
struct kvm_assigned_irq *irq)
@@ -454,7 +445,6 @@ static int assigned_device_enable_guest_intx(struct kvm *kvm,
return 0;
}
-#ifdef __KVM_HAVE_MSI
static int assigned_device_enable_guest_msi(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
struct kvm_assigned_irq *irq)
@@ -463,9 +453,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm,
dev->ack_notifier.gsi = -1;
return 0;
}
-#endif
-#ifdef __KVM_HAVE_MSIX
static int assigned_device_enable_guest_msix(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
struct kvm_assigned_irq *irq)
@@ -474,7 +462,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm,
dev->ack_notifier.gsi = -1;
return 0;
}
-#endif
static int assign_host_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
@@ -492,16 +479,12 @@ static int assign_host_irq(struct kvm *kvm,
case KVM_DEV_IRQ_HOST_INTX:
r = assigned_device_enable_host_intx(kvm, dev);
break;
-#ifdef __KVM_HAVE_MSI
case KVM_DEV_IRQ_HOST_MSI:
r = assigned_device_enable_host_msi(kvm, dev);
break;
-#endif
-#ifdef __KVM_HAVE_MSIX
case KVM_DEV_IRQ_HOST_MSIX:
r = assigned_device_enable_host_msix(kvm, dev);
break;
-#endif
default:
r = -EINVAL;
}
@@ -534,16 +517,12 @@ static int assign_guest_irq(struct kvm *kvm,
case KVM_DEV_IRQ_GUEST_INTX:
r = assigned_device_enable_guest_intx(kvm, dev, irq);
break;
-#ifdef __KVM_HAVE_MSI
case KVM_DEV_IRQ_GUEST_MSI:
r = assigned_device_enable_guest_msi(kvm, dev, irq);
break;
-#endif
-#ifdef __KVM_HAVE_MSIX
case KVM_DEV_IRQ_GUEST_MSIX:
r = assigned_device_enable_guest_msix(kvm, dev, irq);
break;
-#endif
default:
r = -EINVAL;
}
@@ -826,7 +805,6 @@ out:
}
-#ifdef __KVM_HAVE_MSIX
static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
struct kvm_assigned_msix_nr *entry_nr)
{
@@ -906,7 +884,6 @@ msix_entry_out:
return r;
}
-#endif
static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
@@ -1012,7 +989,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
goto out;
break;
}
-#ifdef __KVM_HAVE_MSIX
case KVM_ASSIGN_SET_MSIX_NR: {
struct kvm_assigned_msix_nr entry_nr;
r = -EFAULT;
@@ -1033,7 +1009,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
goto out;
break;
}
-#endif
case KVM_ASSIGN_SET_INTX_MASK: {
struct kvm_assigned_pci_dev assigned_dev;
--
1.8.3.1
The function is not used outside device assignment, and
kvm_arch_set_irq_inatomic has a different prototype. Move it here and
make it static to avoid confusion.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/assigned-dev.c | 37 +++++++++++++++++++++++++++++++++++++
arch/x86/kvm/irq_comm.c | 34 ----------------------------------
include/linux/kvm_host.h | 1 -
3 files changed, 37 insertions(+), 35 deletions(-)
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index 1c17ee807ef7..9dc091acd5fb 100644
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -21,6 +21,7 @@
#include <linux/fs.h>
#include "irq.h"
#include "assigned-dev.h"
+#include "trace/events/kvm.h"
struct kvm_assigned_dev_kernel {
struct kvm_irq_ack_notifier ack_notifier;
@@ -131,6 +132,42 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
return IRQ_HANDLED;
}
+/*
+ * Deliver an IRQ in an atomic context if we can, or return a failure,
+ * user can retry in a process context.
+ * Return value:
+ * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
+ * Other values - No need to retry.
+ */
+static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
+ int level)
+{
+ struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+ struct kvm_kernel_irq_routing_entry *e;
+ int ret = -EINVAL;
+ int idx;
+
+ trace_kvm_set_irq(irq, level, irq_source_id);
+
+ /*
+ * Injection into either PIC or IOAPIC might need to scan all CPUs,
+ * which would need to be retried from thread context; when same GSI
+ * is connected to both PIC and IOAPIC, we'd have to report a
+ * partial failure here.
+ * Since there's no easy way to do this, we only support injecting MSI
+ * which is limited to 1:1 GSI mapping.
+ */
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
+ e = &entries[0];
+ ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
+ irq, level);
+ }
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+ return ret;
+}
+
+
static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
{
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 26d830c1b7af..35b0dfb7f704 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -142,40 +142,6 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
return -EWOULDBLOCK;
}
-/*
- * Deliver an IRQ in an atomic context if we can, or return a failure,
- * user can retry in a process context.
- * Return value:
- * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
- * Other values - No need to retry.
- */
-int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
-{
- struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
- struct kvm_kernel_irq_routing_entry *e;
- int ret = -EINVAL;
- int idx;
-
- trace_kvm_set_irq(irq, level, irq_source_id);
-
- /*
- * Injection into either PIC or IOAPIC might need to scan all CPUs,
- * which would need to be retried from thread context; when same GSI
- * is connected to both PIC and IOAPIC, we'd have to report a
- * partial failure here.
- * Since there's no easy way to do this, we only support injecting MSI
- * which is limited to 1:1 GSI mapping.
- */
- idx = srcu_read_lock(&kvm->irq_srcu);
- if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
- e = &entries[0];
- ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
- irq, level);
- }
- srcu_read_unlock(&kvm->irq_srcu, idx);
- return ret;
-}
-
int kvm_request_irq_source_id(struct kvm *kvm)
{
unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 42fb9e089fc9..e5d517a9d922 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -825,7 +825,6 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status);
-int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
int irq_source_id, int level, bool line_status);
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
--
1.8.3.1
2015-11-02 13:21+0100, Paolo Bonzini:
> We do not want to do too much work in atomic context, in particular
> not walking all the VCPUs of the virtual machine. So we want
> to distinguish the architecture-specific injection function for irqfd
> from kvm_set_msi. Since it's still empty, reuse the newly added
> kvm_arch_set_irq and rename it to kvm_arch_set_irq_inatomic.
kvm/queue uses kvm_arch_set_irq since b7184313f4b9 ("kvm/x86: Hyper-V
synthetic interrupt controller").
Is synic going to be dropped before this patch is merged?
On 02/11/2015 15:59, Radim Krcmar wrote:
> > We do not want to do too much work in atomic context, in particular
> > not walking all the VCPUs of the virtual machine. So we want
> > to distinguish the architecture-specific injection function for irqfd
> > from kvm_set_msi. Since it's still empty, reuse the newly added
> > kvm_arch_set_irq and rename it to kvm_arch_set_irq_inatomic.
>
> kvm/queue uses kvm_arch_set_irq since b7184313f4b9 ("kvm/x86: Hyper-V
> synthetic interrupt controller").
>
> Is synic going to be dropped before this patch is merged?
Yes. Both because the Virtuozzo people confirmed that kvm_arch_set_irq
isn't needed for synic, and because synic is currently broken with APICv.
Paolo
2015-11-02 17:08+0100, Paolo Bonzini:
> On 02/11/2015 15:59, Radim Krcmar wrote:
>>> We do not want to do too much work in atomic context, in particular
>>> not walking all the VCPUs of the virtual machine. So we want
>>> to distinguish the architecture-specific injection function for irqfd
>>> from kvm_set_msi. Since it's still empty, reuse the newly added
>>> kvm_arch_set_irq and rename it to kvm_arch_set_irq_inatomic.
>>
>> kvm/queue uses kvm_arch_set_irq since b7184313f4b9 ("kvm/x86: Hyper-V
>> synthetic interrupt controller").
>>
>> Is synic going to be dropped before this patch is merged?
>
> Yes. Both because the Virtuozzo people confirmed that kvm_arch_set_irq
> isn't needed for synic, and because synic is currently broken with APICv.
Thanks.
(We can add direct delivery for |online vcpus| < X if performance with
low number of VCPUs happens to regress because of the schedule_work,)
Reviewed-by: Radim Krčmář <[email protected]>
[2/3] and [3/3] are good too.
On 02/11/2015 18:01, Radim Krcmar wrote:
>> > Yes. Both because the Virtuozzo people confirmed that kvm_arch_set_irq
>> > isn't needed for synic, and because synic is currently broken with APICv.
> Thanks.
>
> (We can add direct delivery for |online vcpus| < X if performance with
> low number of VCPUs happens to regress because of the schedule_work,)
Yeah, we will change the VFIO interrupt handler to non-threaded, which
will do more or less the same (what you lose now through schedule_work,
you will recoup by doing things directly in the ISR).
Paolo