As the current virtio-mmio only support single irq,
so some advanced features such as vhost-net with irqfd
are not supported. And the net performance is not
the best without vhost-net and irqfd supporting.
This patch support virtio-mmio to request multiple
irqs like virtio-pci. With this patch and qemu assigning
multiple irqs for virtio-mmio device, it's ok to use
vhost-net with irqfd on arm/arm64.
As arm doesn't support msi-x now, we use GSI for
multiple irq. In this patch we use "vm_try_to_find_vqs"
to check whether multiple irqs are supported like
virtio-pci.
Is this the right direction? is there other ways to
make virtio-mmio support multiple irq? Hope for feedback.
Thanks.
Signed-off-by: Shannon Zhao <[email protected]>
---
drivers/virtio/virtio_mmio.c | 234 ++++++++++++++++++++++++++++++++++++------
1 files changed, 203 insertions(+), 31 deletions(-)
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index c600ccf..2b7d935 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -122,6 +122,15 @@ struct virtio_mmio_device {
/* a list of queues so we can dispatch IRQs */
spinlock_t lock;
struct list_head virtqueues;
+
+ /* multiple irq support */
+ int single_irq_enabled;
+ /* Number of available irqs */
+ unsigned num_irqs;
+ /* Used number of irqs */
+ int used_irqs;
+ /* Name strings for interrupts. */
+ char (*vm_vq_names)[256];
};
struct virtio_mmio_vq_info {
@@ -229,33 +238,53 @@ static bool vm_notify(struct virtqueue *vq)
return true;
}
+/* Handle a configuration change: Tell driver if it wants to know. */
+static irqreturn_t vm_config_changed(int irq, void *opaque)
+{
+ struct virtio_mmio_device *vm_dev = opaque;
+ struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
+ struct virtio_driver, driver);
+
+ if (vdrv && vdrv->config_changed)
+ vdrv->config_changed(&vm_dev->vdev);
+ return IRQ_HANDLED;
+}
+
/* Notify all virtqueues on an interrupt. */
-static irqreturn_t vm_interrupt(int irq, void *opaque)
+static irqreturn_t vm_vring_interrupt(int irq, void *opaque)
{
struct virtio_mmio_device *vm_dev = opaque;
struct virtio_mmio_vq_info *info;
- struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
- struct virtio_driver, driver);
- unsigned long status;
+ irqreturn_t ret = IRQ_NONE;
unsigned long flags;
+
+ spin_lock_irqsave(&vm_dev->lock, flags);
+ list_for_each_entry(info, &vm_dev->virtqueues, node) {
+ if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
+ ret = IRQ_HANDLED;
+ }
+ spin_unlock_irqrestore(&vm_dev->lock, flags);
+
+ return ret;
+}
+
+/* Notify all virtqueues and handle a configuration
+ * change on an interrupt. */
+static irqreturn_t vm_interrupt(int irq, void *opaque)
+{
+ struct virtio_mmio_device *vm_dev = opaque;
+ unsigned long status;
irqreturn_t ret = IRQ_NONE;
/* Read and acknowledge interrupts */
status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS);
writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK);
- if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)
- && vdrv && vdrv->config_changed) {
- vdrv->config_changed(&vm_dev->vdev);
- ret = IRQ_HANDLED;
- }
+ if (unlikely(status & VIRTIO_MMIO_INT_CONFIG))
+ return vm_config_changed(irq, opaque);
- if (likely(status & VIRTIO_MMIO_INT_VRING)) {
- spin_lock_irqsave(&vm_dev->lock, flags);
- list_for_each_entry(info, &vm_dev->virtqueues, node)
- ret |= vring_interrupt(irq, info->vq);
- spin_unlock_irqrestore(&vm_dev->lock, flags);
- }
+ if (likely(status & VIRTIO_MMIO_INT_VRING))
+ return vm_vring_interrupt(irq, opaque);
return ret;
}
@@ -284,18 +313,98 @@ static void vm_del_vq(struct virtqueue *vq)
kfree(info);
}
-static void vm_del_vqs(struct virtio_device *vdev)
+static void vm_free_irqs(struct virtio_device *vdev)
{
+ int i;
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+
+ if (vm_dev->single_irq_enabled) {
+ free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
+ vm_dev->single_irq_enabled = 0;
+ }
+
+ for (i = 0; i < vm_dev->used_irqs; ++i)
+ free_irq(platform_get_irq(vm_dev->pdev, i), vm_dev);
+
+ vm_dev->num_irqs = 0;
+ vm_dev->used_irqs = 0;
+ kfree(vm_dev->vm_vq_names);
+ vm_dev->vm_vq_names = NULL;
+}
+
+static void vm_del_vqs(struct virtio_device *vdev)
+{
struct virtqueue *vq, *n;
list_for_each_entry_safe(vq, n, &vdev->vqs, list)
vm_del_vq(vq);
- free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
+ vm_free_irqs(vdev);
+}
+
+static int vm_request_multiple_irqs(struct virtio_device *vdev, int nirqs,
+ bool per_vq_irq)
+{
+ int err = -ENOMEM;
+ struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+ unsigned i, v;
+ int irq = 0;
+
+ vm_dev->num_irqs = nirqs;
+ vm_dev->used_irqs = 0;
+
+ vm_dev->vm_vq_names = kmalloc_array(nirqs, sizeof(*vm_dev->vm_vq_names),
+ GFP_KERNEL);
+ if (!vm_dev->vm_vq_names)
+ goto error;
+
+ for (i = 0; i < nirqs; i++) {
+ irq = platform_get_irq(vm_dev->pdev, i);
+ if (irq == -ENXIO)
+ goto error;
+ }
+
+ /* Set the irq used for configuration */
+ v = vm_dev->used_irqs;
+ snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
+ "%s-config", dev_name(&vdev->dev));
+ irq = platform_get_irq(vm_dev->pdev, v);
+ err = request_irq(irq, vm_config_changed, 0,
+ vm_dev->vm_vq_names[v], vm_dev);
+ ++vm_dev->used_irqs;
+ if (err)
+ goto error;
+
+ if (!per_vq_irq) {
+ /* Shared irq for all VQs */
+ v = vm_dev->used_irqs;
+ snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
+ "%s-virtqueues", dev_name(&vdev->dev));
+ irq = platform_get_irq(vm_dev->pdev, v);
+ err = request_irq(irq, vm_vring_interrupt, 0,
+ vm_dev->vm_vq_names[v], vm_dev);
+ if (err)
+ goto error;
+ ++vm_dev->used_irqs;
+ }
+ return 0;
+error:
+ vm_free_irqs(vdev);
+ return err;
}
+static int vm_request_single_irq(struct virtio_device *vdev)
+{
+ int err;
+ struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+ int irq = platform_get_irq(vm_dev->pdev, 0);
+ err = request_irq(irq, vm_interrupt, IRQF_SHARED,
+ dev_name(&vdev->dev), vm_dev);
+ if (!err)
+ vm_dev->single_irq_enabled = 1;
+ return err;
+}
static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
void (*callback)(struct virtqueue *vq),
@@ -392,29 +501,92 @@ error_available:
return ERR_PTR(err);
}
-static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
- struct virtqueue *vqs[],
- vq_callback_t *callbacks[],
- const char *names[])
+static int vm_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char *names[],
+ bool use_multiple_irq,
+ bool per_vq_irq)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
- unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
- int i, err;
+ int i, err, nirqs, irq;
+
+ if (!use_multiple_irq) {
+ /* Old style: one normal interrupt for change and all vqs. */
+ err = vm_request_single_irq(vdev);
+ if (err)
+ goto error_request;
+ } else {
+ if (per_vq_irq) {
+ /* Best option: one for change interrupt, one per vq. */
+ nirqs = 1;
+ for (i = 0; i < nvqs; ++i)
+ if (callbacks[i])
+ ++nirqs;
+ } else {
+ /* Second best: one for change, shared for all vqs. */
+ nirqs = 2;
+ }
- err = request_irq(irq, vm_interrupt, IRQF_SHARED,
- dev_name(&vdev->dev), vm_dev);
- if (err)
- return err;
+ err = vm_request_multiple_irqs(vdev, nirqs, per_vq_irq);
+ if (err)
+ goto error_request;
+ }
- for (i = 0; i < nvqs; ++i) {
+ for (i = 0; i < nvqs; i++) {
+ if (!names[i]) {
+ vqs[i] = NULL;
+ continue;
+ }
vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
if (IS_ERR(vqs[i])) {
- vm_del_vqs(vdev);
- return PTR_ERR(vqs[i]);
+ err = PTR_ERR(vqs[i]);
+ goto error_find;
+ }
+ if (!per_vq_irq || !callbacks[i])
+ continue;
+ /* allocate per-vq irq if available and necessary */
+ snprintf(vm_dev->vm_vq_names[vm_dev->used_irqs],
+ sizeof(*vm_dev->vm_vq_names),
+ "%s-%s",
+ dev_name(&vm_dev->vdev.dev), names[i]);
+ irq = platform_get_irq(vm_dev->pdev, vm_dev->used_irqs);
+ err = request_irq(irq, vring_interrupt, 0,
+ vm_dev->vm_vq_names[vm_dev->used_irqs], vqs[i]);
+ if (err) {
+ vm_del_vq(vqs[i]);
+ goto error_find;
}
+ ++vm_dev->used_irqs;
}
-
return 0;
+error_find:
+ vm_del_vqs(vdev);
+
+error_request:
+ return err;
+}
+
+static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char *names[])
+{
+ int err;
+
+ /* Try multiple irqs with one irq per queue. */
+ err = vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
+ if (!err)
+ return 0;
+ /* Fallback: multiple irqs with one irq for config,
+ * one shared for queues. */
+ err = vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
+ true, false);
+ if (!err)
+ return 0;
+ /* Finally fall back to regular single interrupts. */
+ return vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
+ false, false);
}
static const char *vm_bus_name(struct virtio_device *vdev)
--
1.7.1
On 2014/11/4 17:35, Shannon Zhao wrote:
> As the current virtio-mmio only support single irq,
> so some advanced features such as vhost-net with irqfd
> are not supported. And the net performance is not
> the best without vhost-net and irqfd supporting.
>
Hi Joel, Peter, Mst,
Some virtio-net with virtio-mmio performance data on ARM added as followed:
Type of backend bandwith(GBytes/sec)
virtio-net 0.66
vhost-net 1.49
vhost-net with irqfd 2.01
Test cmd: ./iperf -c 192.168.0.2 -P 1 -i 10 -p 5001 -f G -t 60
>From this test data, irqfd has great improvement (about 30%) on performance.
So maybe it's necessary to enable multiple irq support to make vhost-net
with virtio-mmio on ARM be able to use irqfd.
How do you guys think? Look forward for your feedback.
Thanks,
Shannon
> This patch support virtio-mmio to request multiple
> irqs like virtio-pci. With this patch and qemu assigning
> multiple irqs for virtio-mmio device, it's ok to use
> vhost-net with irqfd on arm/arm64.
>
Hi Shannon,
>Type of backend bandwith(GBytes/sec)
>virtio-net 0.66
>vhost-net 1.49
>vhost-net with irqfd 2.01
>
>Test cmd: ./iperf -c 192.168.0.2 -P 1 -i 10 -p 5001 -f G -t 60
Impressive results !
Could you please detail your setup ? which platform are you using and which GbE controller ?
As a reference, it would be good also to have result with an iperf to the HOST to see how far we are from a native configuration...
Also, I assume a pending Qemu patch is necessary to assign multiple irqs ? I'm correct ?
Thanks a lot,
Best regards
R?my
-----Message d'origine-----
De?: Shannon Zhao [mailto:[email protected]]
Envoy??: mercredi 5 novembre 2014 09:00
??: [email protected]
Cc?: [email protected]; [email protected]; [email protected]; [email protected]; GAUGUEY R?my 228890; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
Objet?: Re: [RFC PATCH] virtio-mmio: support for multiple irqs
On 2014/11/4 17:35, Shannon Zhao wrote:
> As the current virtio-mmio only support single irq, so some advanced
> features such as vhost-net with irqfd are not supported. And the net
> performance is not the best without vhost-net and irqfd supporting.
>
Hi Joel, Peter, Mst,
Some virtio-net with virtio-mmio performance data on ARM added as followed:
Type of backend bandwith(GBytes/sec)
virtio-net 0.66
vhost-net 1.49
vhost-net with irqfd 2.01
Test cmd: ./iperf -c 192.168.0.2 -P 1 -i 10 -p 5001 -f G -t 60
>From this test data, irqfd has great improvement (about 30%) on performance.
So maybe it's necessary to enable multiple irq support to make vhost-net with virtio-mmio on ARM be able to use irqfd.
How do you guys think? Look forward for your feedback.
Thanks,
Shannon
> This patch support virtio-mmio to request multiple irqs like
> virtio-pci. With this patch and qemu assigning multiple irqs for
> virtio-mmio device, it's ok to use vhost-net with irqfd on arm/arm64.
>
Hi R?my,
On 2014/11/5 16:26, GAUGUEY R?my 228890 wrote:
> Hi Shannon,
>
>> Type of backend bandwith(GBytes/sec)
>> virtio-net 0.66
>> vhost-net 1.49
>> vhost-net with irqfd 2.01
>>
>> Test cmd: ./iperf -c 192.168.0.2 -P 1 -i 10 -p 5001 -f G -t 60
>
> Impressive results !
> Could you please detail your setup ? which platform are you using and which GbE controller ?
Sorry for not telling the test scenario. This test scenario is from Host to Guest. It just
compare the performance of different backends. I did this test on ARM64 platform.
The setup was based on:
1)on host kvm-arm should support ioeventfd and irqfd
The irqfd patch is from Eric "ARM: KVM: add irqfd support".
http://www.spinics.net/lists/kvm-arm/msg11014.html
The ioeventfd patch is reworked by me from Antonios.
http://www.spinics.net/lists/kvm-arm/msg08413.html
2)qemu should enable ioeventfd support for virtio-mmio
This patch is refer to Ying-Shiuan Pan and reworked for new qemu branch.
https://lists.gnu.org/archive/html/qemu-devel/2014-11/msg00594.html
3)qemu should enable multiple irqs for virtio-mmio
This patch isn't sent to qemu maillist as we want to check whether this is the right direction.
If you want to test, I'll send it to you.
4)in guest should enable support virtio-mmio to request multiple irqs
This is what this patch do.
> As a reference, it would be good also to have result with an iperf to the HOST to see how far we are from a native configuration...
Agree!
>
> Also, I assume a pending Qemu patch is necessary to assign multiple irqs ? I'm correct ?
Yes, the patch is on it's way :)
>
> Thanks a lot,
> Best regards
> R?my
>
> -----Message d'origine-----
> De : Shannon Zhao [mailto:[email protected]]
> Envoy? : mercredi 5 novembre 2014 09:00
> ? : [email protected]
> Cc : [email protected]; [email protected]; [email protected]; [email protected]; GAUGUEY R?my 228890; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
> Objet : Re: [RFC PATCH] virtio-mmio: support for multiple irqs
>
>
> On 2014/11/4 17:35, Shannon Zhao wrote:
>> As the current virtio-mmio only support single irq, so some advanced
>> features such as vhost-net with irqfd are not supported. And the net
>> performance is not the best without vhost-net and irqfd supporting.
>>
> Hi Joel, Peter, Mst,
>
> Some virtio-net with virtio-mmio performance data on ARM added as followed:
>
> Type of backend bandwith(GBytes/sec)
> virtio-net 0.66
> vhost-net 1.49
> vhost-net with irqfd 2.01
>
> Test cmd: ./iperf -c 192.168.0.2 -P 1 -i 10 -p 5001 -f G -t 60
>
>>From this test data, irqfd has great improvement (about 30%) on performance.
> So maybe it's necessary to enable multiple irq support to make vhost-net with virtio-mmio on ARM be able to use irqfd.
>
> How do you guys think? Look forward for your feedback.
>
> Thanks,
> Shannon
>
>> This patch support virtio-mmio to request multiple irqs like
>> virtio-pci. With this patch and qemu assigning multiple irqs for
>> virtio-mmio device, it's ok to use vhost-net with irqfd on arm/arm64.
>>
> .
>
--
Shannon
On 11/05/2014 03:12 AM, Shannon Zhao wrote:
> Hi R?my,
>
> On 2014/11/5 16:26, GAUGUEY R?my 228890 wrote:
>> Hi Shannon,
>>
>>> Type of backend bandwith(GBytes/sec)
>>> virtio-net 0.66
>>> vhost-net 1.49
>>> vhost-net with irqfd 2.01
>>>
>>> Test cmd: ./iperf -c 192.168.0.2 -P 1 -i 10 -p 5001 -f G -t 60
>> Impressive results !
>> Could you please detail your setup ? which platform are you using and which GbE controller ?
> Sorry for not telling the test scenario. This test scenario is from Host to Guest. It just
> compare the performance of different backends. I did this test on ARM64 platform.
>
> The setup was based on:
> 1)on host kvm-arm should support ioeventfd and irqfd
> The irqfd patch is from Eric "ARM: KVM: add irqfd support".
> http://www.spinics.net/lists/kvm-arm/msg11014.html
>
> The ioeventfd patch is reworked by me from Antonios.
> http://www.spinics.net/lists/kvm-arm/msg08413.html
>
> 2)qemu should enable ioeventfd support for virtio-mmio
> This patch is refer to Ying-Shiuan Pan and reworked for new qemu branch.
> https://lists.gnu.org/archive/html/qemu-devel/2014-11/msg00594.html
>
> 3)qemu should enable multiple irqs for virtio-mmio
> This patch isn't sent to qemu maillist as we want to check whether this is the right direction.
> If you want to test, I'll send it to you.
I'm not a maintainer so my opinion isn't worth a lot here, but this
seems like the right direction to me. I'd like to see the qemu patch
(do mention the dependency on the kernel patch) on the qemu-devel
mailing list. I think these numbers also support some of the prereqs
listed above that have gone through several iterations getting queued up
for 3.19.
On 2014/11/5 23:27, Joel Schopp wrote:
>
> On 11/05/2014 03:12 AM, Shannon Zhao wrote:
>> Hi R?my,
>>
>> On 2014/11/5 16:26, GAUGUEY R?my 228890 wrote:
>>> Hi Shannon,
>>>
>>>> Type of backend bandwith(GBytes/sec)
>>>> virtio-net 0.66
>>>> vhost-net 1.49
>>>> vhost-net with irqfd 2.01
>>>>
>>>> Test cmd: ./iperf -c 192.168.0.2 -P 1 -i 10 -p 5001 -f G -t 60
>>> Impressive results !
>>> Could you please detail your setup ? which platform are you using and which GbE controller ?
>> Sorry for not telling the test scenario. This test scenario is from Host to Guest. It just
>> compare the performance of different backends. I did this test on ARM64 platform.
>>
>> The setup was based on:
>> 1)on host kvm-arm should support ioeventfd and irqfd
>> The irqfd patch is from Eric "ARM: KVM: add irqfd support".
>> http://www.spinics.net/lists/kvm-arm/msg11014.html
>>
>> The ioeventfd patch is reworked by me from Antonios.
>> http://www.spinics.net/lists/kvm-arm/msg08413.html
>>
>> 2)qemu should enable ioeventfd support for virtio-mmio
>> This patch is refer to Ying-Shiuan Pan and reworked for new qemu branch.
>> https://lists.gnu.org/archive/html/qemu-devel/2014-11/msg00594.html
>>
>> 3)qemu should enable multiple irqs for virtio-mmio
>> This patch isn't sent to qemu maillist as we want to check whether this is the right direction.
>> If you want to test, I'll send it to you.
> I'm not a maintainer so my opinion isn't worth a lot here, but this
> seems like the right direction to me. I'd like to see the qemu patch
> (do mention the dependency on the kernel patch) on the qemu-devel
> mailing list. I think these numbers also support some of the prereqs
> listed above that have gone through several iterations getting queued up
> for 3.19.
> .
>
Hi,
Thanks for your reply :)
If this patch get accepted by maintainers, I'll send the qemu patch to qemu-devel mailing list.
In addition, I think this is worth on arm32 platform as arm32 with GICv2 doesn't support MSI.
So it can't use virtio-pci if I don't miss something. And it's also worth on arm64 platform
as we can't make sure when the PCI and MSI are supported on arm. It give users an alternative
choice for virtualization network.
Rusty, MST, Peter Maydell, do you have any ideas about this? Waiting for your feedback :)
Thanks
--
Shannon
On Tue, Nov 04, 2014 at 05:35:12PM +0800, Shannon Zhao wrote:
> As the current virtio-mmio only support single irq,
> so some advanced features such as vhost-net with irqfd
> are not supported. And the net performance is not
> the best without vhost-net and irqfd supporting.
>
> This patch support virtio-mmio to request multiple
> irqs like virtio-pci. With this patch and qemu assigning
> multiple irqs for virtio-mmio device, it's ok to use
> vhost-net with irqfd on arm/arm64.
>
> As arm doesn't support msi-x now, we use GSI for
> multiple irq. In this patch we use "vm_try_to_find_vqs"
> to check whether multiple irqs are supported like
> virtio-pci.
>
> Is this the right direction? is there other ways to
> make virtio-mmio support multiple irq? Hope for feedback.
> Thanks.
>
> Signed-off-by: Shannon Zhao <[email protected]>
So how does guest discover whether host device supports multiple IRQs?
Could you please document the new interface?
E.g. send a patch for virtio spec.
I think this really should be controlled by hypervisor, per device.
I'm also tempted to make this a virtio 1.0 only feature.
> ---
> drivers/virtio/virtio_mmio.c | 234 ++++++++++++++++++++++++++++++++++++------
> 1 files changed, 203 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
> index c600ccf..2b7d935 100644
> --- a/drivers/virtio/virtio_mmio.c
> +++ b/drivers/virtio/virtio_mmio.c
> @@ -122,6 +122,15 @@ struct virtio_mmio_device {
> /* a list of queues so we can dispatch IRQs */
> spinlock_t lock;
> struct list_head virtqueues;
> +
> + /* multiple irq support */
> + int single_irq_enabled;
> + /* Number of available irqs */
> + unsigned num_irqs;
> + /* Used number of irqs */
> + int used_irqs;
> + /* Name strings for interrupts. */
> + char (*vm_vq_names)[256];
> };
>
> struct virtio_mmio_vq_info {
> @@ -229,33 +238,53 @@ static bool vm_notify(struct virtqueue *vq)
> return true;
> }
>
> +/* Handle a configuration change: Tell driver if it wants to know. */
> +static irqreturn_t vm_config_changed(int irq, void *opaque)
> +{
> + struct virtio_mmio_device *vm_dev = opaque;
> + struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
> + struct virtio_driver, driver);
> +
> + if (vdrv && vdrv->config_changed)
> + vdrv->config_changed(&vm_dev->vdev);
> + return IRQ_HANDLED;
> +}
> +
> /* Notify all virtqueues on an interrupt. */
> -static irqreturn_t vm_interrupt(int irq, void *opaque)
> +static irqreturn_t vm_vring_interrupt(int irq, void *opaque)
> {
> struct virtio_mmio_device *vm_dev = opaque;
> struct virtio_mmio_vq_info *info;
> - struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
> - struct virtio_driver, driver);
> - unsigned long status;
> + irqreturn_t ret = IRQ_NONE;
> unsigned long flags;
> +
> + spin_lock_irqsave(&vm_dev->lock, flags);
> + list_for_each_entry(info, &vm_dev->virtqueues, node) {
> + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
> + ret = IRQ_HANDLED;
> + }
> + spin_unlock_irqrestore(&vm_dev->lock, flags);
> +
> + return ret;
> +}
> +
> +/* Notify all virtqueues and handle a configuration
> + * change on an interrupt. */
> +static irqreturn_t vm_interrupt(int irq, void *opaque)
> +{
> + struct virtio_mmio_device *vm_dev = opaque;
> + unsigned long status;
> irqreturn_t ret = IRQ_NONE;
>
> /* Read and acknowledge interrupts */
> status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS);
> writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK);
>
> - if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)
> - && vdrv && vdrv->config_changed) {
> - vdrv->config_changed(&vm_dev->vdev);
> - ret = IRQ_HANDLED;
> - }
> + if (unlikely(status & VIRTIO_MMIO_INT_CONFIG))
> + return vm_config_changed(irq, opaque);
>
> - if (likely(status & VIRTIO_MMIO_INT_VRING)) {
> - spin_lock_irqsave(&vm_dev->lock, flags);
> - list_for_each_entry(info, &vm_dev->virtqueues, node)
> - ret |= vring_interrupt(irq, info->vq);
> - spin_unlock_irqrestore(&vm_dev->lock, flags);
> - }
> + if (likely(status & VIRTIO_MMIO_INT_VRING))
> + return vm_vring_interrupt(irq, opaque);
>
> return ret;
> }
> @@ -284,18 +313,98 @@ static void vm_del_vq(struct virtqueue *vq)
> kfree(info);
> }
>
> -static void vm_del_vqs(struct virtio_device *vdev)
> +static void vm_free_irqs(struct virtio_device *vdev)
> {
> + int i;
> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> +
> + if (vm_dev->single_irq_enabled) {
> + free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
> + vm_dev->single_irq_enabled = 0;
> + }
> +
> + for (i = 0; i < vm_dev->used_irqs; ++i)
> + free_irq(platform_get_irq(vm_dev->pdev, i), vm_dev);
> +
> + vm_dev->num_irqs = 0;
> + vm_dev->used_irqs = 0;
> + kfree(vm_dev->vm_vq_names);
> + vm_dev->vm_vq_names = NULL;
> +}
> +
> +static void vm_del_vqs(struct virtio_device *vdev)
> +{
> struct virtqueue *vq, *n;
>
> list_for_each_entry_safe(vq, n, &vdev->vqs, list)
> vm_del_vq(vq);
>
> - free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
> + vm_free_irqs(vdev);
> +}
> +
> +static int vm_request_multiple_irqs(struct virtio_device *vdev, int nirqs,
> + bool per_vq_irq)
> +{
> + int err = -ENOMEM;
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> + unsigned i, v;
> + int irq = 0;
> +
> + vm_dev->num_irqs = nirqs;
> + vm_dev->used_irqs = 0;
> +
> + vm_dev->vm_vq_names = kmalloc_array(nirqs, sizeof(*vm_dev->vm_vq_names),
> + GFP_KERNEL);
> + if (!vm_dev->vm_vq_names)
> + goto error;
> +
> + for (i = 0; i < nirqs; i++) {
> + irq = platform_get_irq(vm_dev->pdev, i);
> + if (irq == -ENXIO)
> + goto error;
> + }
> +
> + /* Set the irq used for configuration */
> + v = vm_dev->used_irqs;
> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
> + "%s-config", dev_name(&vdev->dev));
> + irq = platform_get_irq(vm_dev->pdev, v);
> + err = request_irq(irq, vm_config_changed, 0,
> + vm_dev->vm_vq_names[v], vm_dev);
> + ++vm_dev->used_irqs;
> + if (err)
> + goto error;
> +
> + if (!per_vq_irq) {
> + /* Shared irq for all VQs */
> + v = vm_dev->used_irqs;
> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
> + "%s-virtqueues", dev_name(&vdev->dev));
> + irq = platform_get_irq(vm_dev->pdev, v);
> + err = request_irq(irq, vm_vring_interrupt, 0,
> + vm_dev->vm_vq_names[v], vm_dev);
> + if (err)
> + goto error;
> + ++vm_dev->used_irqs;
> + }
> + return 0;
> +error:
> + vm_free_irqs(vdev);
> + return err;
> }
>
> +static int vm_request_single_irq(struct virtio_device *vdev)
> +{
> + int err;
> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> + int irq = platform_get_irq(vm_dev->pdev, 0);
>
> + err = request_irq(irq, vm_interrupt, IRQF_SHARED,
> + dev_name(&vdev->dev), vm_dev);
> + if (!err)
> + vm_dev->single_irq_enabled = 1;
> + return err;
> +}
>
> static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
> void (*callback)(struct virtqueue *vq),
> @@ -392,29 +501,92 @@ error_available:
> return ERR_PTR(err);
> }
>
> -static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> - struct virtqueue *vqs[],
> - vq_callback_t *callbacks[],
> - const char *names[])
> +static int vm_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> + struct virtqueue *vqs[],
> + vq_callback_t *callbacks[],
> + const char *names[],
> + bool use_multiple_irq,
> + bool per_vq_irq)
> {
> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> - unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
> - int i, err;
> + int i, err, nirqs, irq;
> +
> + if (!use_multiple_irq) {
> + /* Old style: one normal interrupt for change and all vqs. */
> + err = vm_request_single_irq(vdev);
> + if (err)
> + goto error_request;
> + } else {
> + if (per_vq_irq) {
> + /* Best option: one for change interrupt, one per vq. */
> + nirqs = 1;
> + for (i = 0; i < nvqs; ++i)
> + if (callbacks[i])
> + ++nirqs;
> + } else {
> + /* Second best: one for change, shared for all vqs. */
> + nirqs = 2;
> + }
>
> - err = request_irq(irq, vm_interrupt, IRQF_SHARED,
> - dev_name(&vdev->dev), vm_dev);
> - if (err)
> - return err;
> + err = vm_request_multiple_irqs(vdev, nirqs, per_vq_irq);
> + if (err)
> + goto error_request;
> + }
>
> - for (i = 0; i < nvqs; ++i) {
> + for (i = 0; i < nvqs; i++) {
> + if (!names[i]) {
> + vqs[i] = NULL;
> + continue;
> + }
> vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
> if (IS_ERR(vqs[i])) {
> - vm_del_vqs(vdev);
> - return PTR_ERR(vqs[i]);
> + err = PTR_ERR(vqs[i]);
> + goto error_find;
> + }
> + if (!per_vq_irq || !callbacks[i])
> + continue;
> + /* allocate per-vq irq if available and necessary */
> + snprintf(vm_dev->vm_vq_names[vm_dev->used_irqs],
> + sizeof(*vm_dev->vm_vq_names),
> + "%s-%s",
> + dev_name(&vm_dev->vdev.dev), names[i]);
> + irq = platform_get_irq(vm_dev->pdev, vm_dev->used_irqs);
> + err = request_irq(irq, vring_interrupt, 0,
> + vm_dev->vm_vq_names[vm_dev->used_irqs], vqs[i]);
> + if (err) {
> + vm_del_vq(vqs[i]);
> + goto error_find;
> }
> + ++vm_dev->used_irqs;
> }
> -
> return 0;
> +error_find:
> + vm_del_vqs(vdev);
> +
> +error_request:
> + return err;
> +}
> +
> +static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> + struct virtqueue *vqs[],
> + vq_callback_t *callbacks[],
> + const char *names[])
> +{
> + int err;
> +
> + /* Try multiple irqs with one irq per queue. */
> + err = vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
> + if (!err)
> + return 0;
> + /* Fallback: multiple irqs with one irq for config,
> + * one shared for queues. */
> + err = vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
> + true, false);
> + if (!err)
> + return 0;
> + /* Finally fall back to regular single interrupts. */
> + return vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
> + false, false);
> }
>
> static const char *vm_bus_name(struct virtio_device *vdev)
> --
> 1.7.1
On 2014/11/6 17:34, Michael S. Tsirkin wrote:
> On Tue, Nov 04, 2014 at 05:35:12PM +0800, Shannon Zhao wrote:
>> As the current virtio-mmio only support single irq,
>> so some advanced features such as vhost-net with irqfd
>> are not supported. And the net performance is not
>> the best without vhost-net and irqfd supporting.
>>
>> This patch support virtio-mmio to request multiple
>> irqs like virtio-pci. With this patch and qemu assigning
>> multiple irqs for virtio-mmio device, it's ok to use
>> vhost-net with irqfd on arm/arm64.
>>
>> As arm doesn't support msi-x now, we use GSI for
>> multiple irq. In this patch we use "vm_try_to_find_vqs"
>> to check whether multiple irqs are supported like
>> virtio-pci.
>>
>> Is this the right direction? is there other ways to
>> make virtio-mmio support multiple irq? Hope for feedback.
>> Thanks.
>>
>> Signed-off-by: Shannon Zhao <[email protected]>
>
>
> So how does guest discover whether host device supports multiple IRQs?
Guest uses vm_try_to_find_vqs to check whether it can get multiple IRQs
like virtio-pci uses vp_try_to_find_vqs. And within function
vm_request_multiple_irqs, guest check whether the number of IRQs host
device gives is equal to the number we want.
for (i = 0; i < nirqs; i++) {
irq = platform_get_irq(vm_dev->pdev, i);
if (irq == -ENXIO)
goto error;
}
If we can't get the expected number of IRQs, return error and this try
fails. Then guest will try two IRQS and single IRQ like virtio-pci.
> Could you please document the new interface?
> E.g. send a patch for virtio spec.
Ok, I'll send it later. Thank you very much :)
Shannon
> I think this really should be controlled by hypervisor, per device.
> I'm also tempted to make this a virtio 1.0 only feature.
>
>
>
>> ---
>> drivers/virtio/virtio_mmio.c | 234 ++++++++++++++++++++++++++++++++++++------
>> 1 files changed, 203 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
>> index c600ccf..2b7d935 100644
>> --- a/drivers/virtio/virtio_mmio.c
>> +++ b/drivers/virtio/virtio_mmio.c
>> @@ -122,6 +122,15 @@ struct virtio_mmio_device {
>> /* a list of queues so we can dispatch IRQs */
>> spinlock_t lock;
>> struct list_head virtqueues;
>> +
>> + /* multiple irq support */
>> + int single_irq_enabled;
>> + /* Number of available irqs */
>> + unsigned num_irqs;
>> + /* Used number of irqs */
>> + int used_irqs;
>> + /* Name strings for interrupts. */
>> + char (*vm_vq_names)[256];
>> };
>>
>> struct virtio_mmio_vq_info {
>> @@ -229,33 +238,53 @@ static bool vm_notify(struct virtqueue *vq)
>> return true;
>> }
>>
>> +/* Handle a configuration change: Tell driver if it wants to know. */
>> +static irqreturn_t vm_config_changed(int irq, void *opaque)
>> +{
>> + struct virtio_mmio_device *vm_dev = opaque;
>> + struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
>> + struct virtio_driver, driver);
>> +
>> + if (vdrv && vdrv->config_changed)
>> + vdrv->config_changed(&vm_dev->vdev);
>> + return IRQ_HANDLED;
>> +}
>> +
>> /* Notify all virtqueues on an interrupt. */
>> -static irqreturn_t vm_interrupt(int irq, void *opaque)
>> +static irqreturn_t vm_vring_interrupt(int irq, void *opaque)
>> {
>> struct virtio_mmio_device *vm_dev = opaque;
>> struct virtio_mmio_vq_info *info;
>> - struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
>> - struct virtio_driver, driver);
>> - unsigned long status;
>> + irqreturn_t ret = IRQ_NONE;
>> unsigned long flags;
>> +
>> + spin_lock_irqsave(&vm_dev->lock, flags);
>> + list_for_each_entry(info, &vm_dev->virtqueues, node) {
>> + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
>> + ret = IRQ_HANDLED;
>> + }
>> + spin_unlock_irqrestore(&vm_dev->lock, flags);
>> +
>> + return ret;
>> +}
>> +
>> +/* Notify all virtqueues and handle a configuration
>> + * change on an interrupt. */
>> +static irqreturn_t vm_interrupt(int irq, void *opaque)
>> +{
>> + struct virtio_mmio_device *vm_dev = opaque;
>> + unsigned long status;
>> irqreturn_t ret = IRQ_NONE;
>>
>> /* Read and acknowledge interrupts */
>> status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS);
>> writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK);
>>
>> - if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)
>> - && vdrv && vdrv->config_changed) {
>> - vdrv->config_changed(&vm_dev->vdev);
>> - ret = IRQ_HANDLED;
>> - }
>> + if (unlikely(status & VIRTIO_MMIO_INT_CONFIG))
>> + return vm_config_changed(irq, opaque);
>>
>> - if (likely(status & VIRTIO_MMIO_INT_VRING)) {
>> - spin_lock_irqsave(&vm_dev->lock, flags);
>> - list_for_each_entry(info, &vm_dev->virtqueues, node)
>> - ret |= vring_interrupt(irq, info->vq);
>> - spin_unlock_irqrestore(&vm_dev->lock, flags);
>> - }
>> + if (likely(status & VIRTIO_MMIO_INT_VRING))
>> + return vm_vring_interrupt(irq, opaque);
>>
>> return ret;
>> }
>> @@ -284,18 +313,98 @@ static void vm_del_vq(struct virtqueue *vq)
>> kfree(info);
>> }
>>
>> -static void vm_del_vqs(struct virtio_device *vdev)
>> +static void vm_free_irqs(struct virtio_device *vdev)
>> {
>> + int i;
>> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
>> +
>> + if (vm_dev->single_irq_enabled) {
>> + free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
>> + vm_dev->single_irq_enabled = 0;
>> + }
>> +
>> + for (i = 0; i < vm_dev->used_irqs; ++i)
>> + free_irq(platform_get_irq(vm_dev->pdev, i), vm_dev);
>> +
>> + vm_dev->num_irqs = 0;
>> + vm_dev->used_irqs = 0;
>> + kfree(vm_dev->vm_vq_names);
>> + vm_dev->vm_vq_names = NULL;
>> +}
>> +
>> +static void vm_del_vqs(struct virtio_device *vdev)
>> +{
>> struct virtqueue *vq, *n;
>>
>> list_for_each_entry_safe(vq, n, &vdev->vqs, list)
>> vm_del_vq(vq);
>>
>> - free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
>> + vm_free_irqs(vdev);
>> +}
>> +
>> +static int vm_request_multiple_irqs(struct virtio_device *vdev, int nirqs,
>> + bool per_vq_irq)
>> +{
>> + int err = -ENOMEM;
>> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
>> + unsigned i, v;
>> + int irq = 0;
>> +
>> + vm_dev->num_irqs = nirqs;
>> + vm_dev->used_irqs = 0;
>> +
>> + vm_dev->vm_vq_names = kmalloc_array(nirqs, sizeof(*vm_dev->vm_vq_names),
>> + GFP_KERNEL);
>> + if (!vm_dev->vm_vq_names)
>> + goto error;
>> +
>> + for (i = 0; i < nirqs; i++) {
>> + irq = platform_get_irq(vm_dev->pdev, i);
>> + if (irq == -ENXIO)
>> + goto error;
>> + }
>> +
>> + /* Set the irq used for configuration */
>> + v = vm_dev->used_irqs;
>> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
>> + "%s-config", dev_name(&vdev->dev));
>> + irq = platform_get_irq(vm_dev->pdev, v);
>> + err = request_irq(irq, vm_config_changed, 0,
>> + vm_dev->vm_vq_names[v], vm_dev);
>> + ++vm_dev->used_irqs;
>> + if (err)
>> + goto error;
>> +
>> + if (!per_vq_irq) {
>> + /* Shared irq for all VQs */
>> + v = vm_dev->used_irqs;
>> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
>> + "%s-virtqueues", dev_name(&vdev->dev));
>> + irq = platform_get_irq(vm_dev->pdev, v);
>> + err = request_irq(irq, vm_vring_interrupt, 0,
>> + vm_dev->vm_vq_names[v], vm_dev);
>> + if (err)
>> + goto error;
>> + ++vm_dev->used_irqs;
>> + }
>> + return 0;
>> +error:
>> + vm_free_irqs(vdev);
>> + return err;
>> }
>>
>> +static int vm_request_single_irq(struct virtio_device *vdev)
>> +{
>> + int err;
>> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
>> + int irq = platform_get_irq(vm_dev->pdev, 0);
>>
>> + err = request_irq(irq, vm_interrupt, IRQF_SHARED,
>> + dev_name(&vdev->dev), vm_dev);
>> + if (!err)
>> + vm_dev->single_irq_enabled = 1;
>> + return err;
>> +}
>>
>> static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
>> void (*callback)(struct virtqueue *vq),
>> @@ -392,29 +501,92 @@ error_available:
>> return ERR_PTR(err);
>> }
>>
>> -static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
>> - struct virtqueue *vqs[],
>> - vq_callback_t *callbacks[],
>> - const char *names[])
>> +static int vm_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
>> + struct virtqueue *vqs[],
>> + vq_callback_t *callbacks[],
>> + const char *names[],
>> + bool use_multiple_irq,
>> + bool per_vq_irq)
>> {
>> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
>> - unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
>> - int i, err;
>> + int i, err, nirqs, irq;
>> +
>> + if (!use_multiple_irq) {
>> + /* Old style: one normal interrupt for change and all vqs. */
>> + err = vm_request_single_irq(vdev);
>> + if (err)
>> + goto error_request;
>> + } else {
>> + if (per_vq_irq) {
>> + /* Best option: one for change interrupt, one per vq. */
>> + nirqs = 1;
>> + for (i = 0; i < nvqs; ++i)
>> + if (callbacks[i])
>> + ++nirqs;
>> + } else {
>> + /* Second best: one for change, shared for all vqs. */
>> + nirqs = 2;
>> + }
>>
>> - err = request_irq(irq, vm_interrupt, IRQF_SHARED,
>> - dev_name(&vdev->dev), vm_dev);
>> - if (err)
>> - return err;
>> + err = vm_request_multiple_irqs(vdev, nirqs, per_vq_irq);
>> + if (err)
>> + goto error_request;
>> + }
>>
>> - for (i = 0; i < nvqs; ++i) {
>> + for (i = 0; i < nvqs; i++) {
>> + if (!names[i]) {
>> + vqs[i] = NULL;
>> + continue;
>> + }
>> vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
>> if (IS_ERR(vqs[i])) {
>> - vm_del_vqs(vdev);
>> - return PTR_ERR(vqs[i]);
>> + err = PTR_ERR(vqs[i]);
>> + goto error_find;
>> + }
>> + if (!per_vq_irq || !callbacks[i])
>> + continue;
>> + /* allocate per-vq irq if available and necessary */
>> + snprintf(vm_dev->vm_vq_names[vm_dev->used_irqs],
>> + sizeof(*vm_dev->vm_vq_names),
>> + "%s-%s",
>> + dev_name(&vm_dev->vdev.dev), names[i]);
>> + irq = platform_get_irq(vm_dev->pdev, vm_dev->used_irqs);
>> + err = request_irq(irq, vring_interrupt, 0,
>> + vm_dev->vm_vq_names[vm_dev->used_irqs], vqs[i]);
>> + if (err) {
>> + vm_del_vq(vqs[i]);
>> + goto error_find;
>> }
>> + ++vm_dev->used_irqs;
>> }
>> -
>> return 0;
>> +error_find:
>> + vm_del_vqs(vdev);
>> +
>> +error_request:
>> + return err;
>> +}
>> +
>> +static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
>> + struct virtqueue *vqs[],
>> + vq_callback_t *callbacks[],
>> + const char *names[])
>> +{
>> + int err;
>> +
>> + /* Try multiple irqs with one irq per queue. */
>> + err = vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
>> + if (!err)
>> + return 0;
>> + /* Fallback: multiple irqs with one irq for config,
>> + * one shared for queues. */
>> + err = vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
>> + true, false);
>> + if (!err)
>> + return 0;
>> + /* Finally fall back to regular single interrupts. */
>> + return vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
>> + false, false);
>> }
>>
>> static const char *vm_bus_name(struct virtio_device *vdev)
>> --
>> 1.7.1
>
> .
>
--
Shannon
On Thu, Nov 06, 2014 at 05:54:54PM +0800, Shannon Zhao wrote:
> On 2014/11/6 17:34, Michael S. Tsirkin wrote:
> > On Tue, Nov 04, 2014 at 05:35:12PM +0800, Shannon Zhao wrote:
> >> As the current virtio-mmio only support single irq,
> >> so some advanced features such as vhost-net with irqfd
> >> are not supported. And the net performance is not
> >> the best without vhost-net and irqfd supporting.
> >>
> >> This patch support virtio-mmio to request multiple
> >> irqs like virtio-pci. With this patch and qemu assigning
> >> multiple irqs for virtio-mmio device, it's ok to use
> >> vhost-net with irqfd on arm/arm64.
> >>
> >> As arm doesn't support msi-x now, we use GSI for
> >> multiple irq. In this patch we use "vm_try_to_find_vqs"
> >> to check whether multiple irqs are supported like
> >> virtio-pci.
> >>
> >> Is this the right direction? is there other ways to
> >> make virtio-mmio support multiple irq? Hope for feedback.
> >> Thanks.
> >>
> >> Signed-off-by: Shannon Zhao <[email protected]>
> >
> >
> > So how does guest discover whether host device supports multiple IRQs?
>
> Guest uses vm_try_to_find_vqs to check whether it can get multiple IRQs
> like virtio-pci uses vp_try_to_find_vqs. And within function
> vm_request_multiple_irqs, guest check whether the number of IRQs host
> device gives is equal to the number we want.
OK but how does host specify the number of IRQs for a device?
for pci this is done through the MSI-X capability register.
> for (i = 0; i < nirqs; i++) {
> irq = platform_get_irq(vm_dev->pdev, i);
> if (irq == -ENXIO)
> goto error;
> }
>
> If we can't get the expected number of IRQs, return error and this try
> fails. Then guest will try two IRQS and single IRQ like virtio-pci.
>
> > Could you please document the new interface?
> > E.g. send a patch for virtio spec.
>
> Ok, I'll send it later. Thank you very much :)
>
> Shannon
>
> > I think this really should be controlled by hypervisor, per device.
> > I'm also tempted to make this a virtio 1.0 only feature.
> >
> >
> >
> >> ---
> >> drivers/virtio/virtio_mmio.c | 234 ++++++++++++++++++++++++++++++++++++------
> >> 1 files changed, 203 insertions(+), 31 deletions(-)
> >>
> >> diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
> >> index c600ccf..2b7d935 100644
> >> --- a/drivers/virtio/virtio_mmio.c
> >> +++ b/drivers/virtio/virtio_mmio.c
> >> @@ -122,6 +122,15 @@ struct virtio_mmio_device {
> >> /* a list of queues so we can dispatch IRQs */
> >> spinlock_t lock;
> >> struct list_head virtqueues;
> >> +
> >> + /* multiple irq support */
> >> + int single_irq_enabled;
> >> + /* Number of available irqs */
> >> + unsigned num_irqs;
> >> + /* Used number of irqs */
> >> + int used_irqs;
> >> + /* Name strings for interrupts. */
> >> + char (*vm_vq_names)[256];
> >> };
> >>
> >> struct virtio_mmio_vq_info {
> >> @@ -229,33 +238,53 @@ static bool vm_notify(struct virtqueue *vq)
> >> return true;
> >> }
> >>
> >> +/* Handle a configuration change: Tell driver if it wants to know. */
> >> +static irqreturn_t vm_config_changed(int irq, void *opaque)
> >> +{
> >> + struct virtio_mmio_device *vm_dev = opaque;
> >> + struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
> >> + struct virtio_driver, driver);
> >> +
> >> + if (vdrv && vdrv->config_changed)
> >> + vdrv->config_changed(&vm_dev->vdev);
> >> + return IRQ_HANDLED;
> >> +}
> >> +
> >> /* Notify all virtqueues on an interrupt. */
> >> -static irqreturn_t vm_interrupt(int irq, void *opaque)
> >> +static irqreturn_t vm_vring_interrupt(int irq, void *opaque)
> >> {
> >> struct virtio_mmio_device *vm_dev = opaque;
> >> struct virtio_mmio_vq_info *info;
> >> - struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
> >> - struct virtio_driver, driver);
> >> - unsigned long status;
> >> + irqreturn_t ret = IRQ_NONE;
> >> unsigned long flags;
> >> +
> >> + spin_lock_irqsave(&vm_dev->lock, flags);
> >> + list_for_each_entry(info, &vm_dev->virtqueues, node) {
> >> + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
> >> + ret = IRQ_HANDLED;
> >> + }
> >> + spin_unlock_irqrestore(&vm_dev->lock, flags);
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +/* Notify all virtqueues and handle a configuration
> >> + * change on an interrupt. */
> >> +static irqreturn_t vm_interrupt(int irq, void *opaque)
> >> +{
> >> + struct virtio_mmio_device *vm_dev = opaque;
> >> + unsigned long status;
> >> irqreturn_t ret = IRQ_NONE;
> >>
> >> /* Read and acknowledge interrupts */
> >> status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS);
> >> writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK);
> >>
> >> - if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)
> >> - && vdrv && vdrv->config_changed) {
> >> - vdrv->config_changed(&vm_dev->vdev);
> >> - ret = IRQ_HANDLED;
> >> - }
> >> + if (unlikely(status & VIRTIO_MMIO_INT_CONFIG))
> >> + return vm_config_changed(irq, opaque);
> >>
> >> - if (likely(status & VIRTIO_MMIO_INT_VRING)) {
> >> - spin_lock_irqsave(&vm_dev->lock, flags);
> >> - list_for_each_entry(info, &vm_dev->virtqueues, node)
> >> - ret |= vring_interrupt(irq, info->vq);
> >> - spin_unlock_irqrestore(&vm_dev->lock, flags);
> >> - }
> >> + if (likely(status & VIRTIO_MMIO_INT_VRING))
> >> + return vm_vring_interrupt(irq, opaque);
> >>
> >> return ret;
> >> }
> >> @@ -284,18 +313,98 @@ static void vm_del_vq(struct virtqueue *vq)
> >> kfree(info);
> >> }
> >>
> >> -static void vm_del_vqs(struct virtio_device *vdev)
> >> +static void vm_free_irqs(struct virtio_device *vdev)
> >> {
> >> + int i;
> >> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> >> +
> >> + if (vm_dev->single_irq_enabled) {
> >> + free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
> >> + vm_dev->single_irq_enabled = 0;
> >> + }
> >> +
> >> + for (i = 0; i < vm_dev->used_irqs; ++i)
> >> + free_irq(platform_get_irq(vm_dev->pdev, i), vm_dev);
> >> +
> >> + vm_dev->num_irqs = 0;
> >> + vm_dev->used_irqs = 0;
> >> + kfree(vm_dev->vm_vq_names);
> >> + vm_dev->vm_vq_names = NULL;
> >> +}
> >> +
> >> +static void vm_del_vqs(struct virtio_device *vdev)
> >> +{
> >> struct virtqueue *vq, *n;
> >>
> >> list_for_each_entry_safe(vq, n, &vdev->vqs, list)
> >> vm_del_vq(vq);
> >>
> >> - free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
> >> + vm_free_irqs(vdev);
> >> +}
> >> +
> >> +static int vm_request_multiple_irqs(struct virtio_device *vdev, int nirqs,
> >> + bool per_vq_irq)
> >> +{
> >> + int err = -ENOMEM;
> >> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> >> + unsigned i, v;
> >> + int irq = 0;
> >> +
> >> + vm_dev->num_irqs = nirqs;
> >> + vm_dev->used_irqs = 0;
> >> +
> >> + vm_dev->vm_vq_names = kmalloc_array(nirqs, sizeof(*vm_dev->vm_vq_names),
> >> + GFP_KERNEL);
> >> + if (!vm_dev->vm_vq_names)
> >> + goto error;
> >> +
> >> + for (i = 0; i < nirqs; i++) {
> >> + irq = platform_get_irq(vm_dev->pdev, i);
> >> + if (irq == -ENXIO)
> >> + goto error;
> >> + }
> >> +
> >> + /* Set the irq used for configuration */
> >> + v = vm_dev->used_irqs;
> >> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
> >> + "%s-config", dev_name(&vdev->dev));
> >> + irq = platform_get_irq(vm_dev->pdev, v);
> >> + err = request_irq(irq, vm_config_changed, 0,
> >> + vm_dev->vm_vq_names[v], vm_dev);
> >> + ++vm_dev->used_irqs;
> >> + if (err)
> >> + goto error;
> >> +
> >> + if (!per_vq_irq) {
> >> + /* Shared irq for all VQs */
> >> + v = vm_dev->used_irqs;
> >> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
> >> + "%s-virtqueues", dev_name(&vdev->dev));
> >> + irq = platform_get_irq(vm_dev->pdev, v);
> >> + err = request_irq(irq, vm_vring_interrupt, 0,
> >> + vm_dev->vm_vq_names[v], vm_dev);
> >> + if (err)
> >> + goto error;
> >> + ++vm_dev->used_irqs;
> >> + }
> >> + return 0;
> >> +error:
> >> + vm_free_irqs(vdev);
> >> + return err;
> >> }
> >>
> >> +static int vm_request_single_irq(struct virtio_device *vdev)
> >> +{
> >> + int err;
> >> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> >> + int irq = platform_get_irq(vm_dev->pdev, 0);
> >>
> >> + err = request_irq(irq, vm_interrupt, IRQF_SHARED,
> >> + dev_name(&vdev->dev), vm_dev);
> >> + if (!err)
> >> + vm_dev->single_irq_enabled = 1;
> >> + return err;
> >> +}
> >>
> >> static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
> >> void (*callback)(struct virtqueue *vq),
> >> @@ -392,29 +501,92 @@ error_available:
> >> return ERR_PTR(err);
> >> }
> >>
> >> -static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> >> - struct virtqueue *vqs[],
> >> - vq_callback_t *callbacks[],
> >> - const char *names[])
> >> +static int vm_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> >> + struct virtqueue *vqs[],
> >> + vq_callback_t *callbacks[],
> >> + const char *names[],
> >> + bool use_multiple_irq,
> >> + bool per_vq_irq)
> >> {
> >> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
> >> - unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
> >> - int i, err;
> >> + int i, err, nirqs, irq;
> >> +
> >> + if (!use_multiple_irq) {
> >> + /* Old style: one normal interrupt for change and all vqs. */
> >> + err = vm_request_single_irq(vdev);
> >> + if (err)
> >> + goto error_request;
> >> + } else {
> >> + if (per_vq_irq) {
> >> + /* Best option: one for change interrupt, one per vq. */
> >> + nirqs = 1;
> >> + for (i = 0; i < nvqs; ++i)
> >> + if (callbacks[i])
> >> + ++nirqs;
> >> + } else {
> >> + /* Second best: one for change, shared for all vqs. */
> >> + nirqs = 2;
> >> + }
> >>
> >> - err = request_irq(irq, vm_interrupt, IRQF_SHARED,
> >> - dev_name(&vdev->dev), vm_dev);
> >> - if (err)
> >> - return err;
> >> + err = vm_request_multiple_irqs(vdev, nirqs, per_vq_irq);
> >> + if (err)
> >> + goto error_request;
> >> + }
> >>
> >> - for (i = 0; i < nvqs; ++i) {
> >> + for (i = 0; i < nvqs; i++) {
> >> + if (!names[i]) {
> >> + vqs[i] = NULL;
> >> + continue;
> >> + }
> >> vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
> >> if (IS_ERR(vqs[i])) {
> >> - vm_del_vqs(vdev);
> >> - return PTR_ERR(vqs[i]);
> >> + err = PTR_ERR(vqs[i]);
> >> + goto error_find;
> >> + }
> >> + if (!per_vq_irq || !callbacks[i])
> >> + continue;
> >> + /* allocate per-vq irq if available and necessary */
> >> + snprintf(vm_dev->vm_vq_names[vm_dev->used_irqs],
> >> + sizeof(*vm_dev->vm_vq_names),
> >> + "%s-%s",
> >> + dev_name(&vm_dev->vdev.dev), names[i]);
> >> + irq = platform_get_irq(vm_dev->pdev, vm_dev->used_irqs);
> >> + err = request_irq(irq, vring_interrupt, 0,
> >> + vm_dev->vm_vq_names[vm_dev->used_irqs], vqs[i]);
> >> + if (err) {
> >> + vm_del_vq(vqs[i]);
> >> + goto error_find;
> >> }
> >> + ++vm_dev->used_irqs;
> >> }
> >> -
> >> return 0;
> >> +error_find:
> >> + vm_del_vqs(vdev);
> >> +
> >> +error_request:
> >> + return err;
> >> +}
> >> +
> >> +static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> >> + struct virtqueue *vqs[],
> >> + vq_callback_t *callbacks[],
> >> + const char *names[])
> >> +{
> >> + int err;
> >> +
> >> + /* Try multiple irqs with one irq per queue. */
> >> + err = vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
> >> + if (!err)
> >> + return 0;
> >> + /* Fallback: multiple irqs with one irq for config,
> >> + * one shared for queues. */
> >> + err = vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
> >> + true, false);
> >> + if (!err)
> >> + return 0;
> >> + /* Finally fall back to regular single interrupts. */
> >> + return vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
> >> + false, false);
> >> }
> >>
> >> static const char *vm_bus_name(struct virtio_device *vdev)
> >> --
> >> 1.7.1
> >
> > .
> >
>
>
> --
> Shannon
On 2014/11/6 19:09, Michael S. Tsirkin wrote:
> On Thu, Nov 06, 2014 at 05:54:54PM +0800, Shannon Zhao wrote:
>> On 2014/11/6 17:34, Michael S. Tsirkin wrote:
>>> On Tue, Nov 04, 2014 at 05:35:12PM +0800, Shannon Zhao wrote:
>>>> As the current virtio-mmio only support single irq,
>>>> so some advanced features such as vhost-net with irqfd
>>>> are not supported. And the net performance is not
>>>> the best without vhost-net and irqfd supporting.
>>>>
>>>> This patch support virtio-mmio to request multiple
>>>> irqs like virtio-pci. With this patch and qemu assigning
>>>> multiple irqs for virtio-mmio device, it's ok to use
>>>> vhost-net with irqfd on arm/arm64.
>>>>
>>>> As arm doesn't support msi-x now, we use GSI for
>>>> multiple irq. In this patch we use "vm_try_to_find_vqs"
>>>> to check whether multiple irqs are supported like
>>>> virtio-pci.
>>>>
>>>> Is this the right direction? is there other ways to
>>>> make virtio-mmio support multiple irq? Hope for feedback.
>>>> Thanks.
>>>>
>>>> Signed-off-by: Shannon Zhao <[email protected]>
>>>
>>>
>>> So how does guest discover whether host device supports multiple IRQs?
>>
>> Guest uses vm_try_to_find_vqs to check whether it can get multiple IRQs
>> like virtio-pci uses vp_try_to_find_vqs. And within function
>> vm_request_multiple_irqs, guest check whether the number of IRQs host
>> device gives is equal to the number we want.
>
> OK but how does host specify the number of IRQs for a device?
> for pci this is done through the MSI-X capability register.
>
For example, qemu command line of a typical virtio-net device on arm is as followed:
-device virtio-net-device,netdev=net0,mac="52:54:00:12:34:55",vectors=3 \
-netdev type=tap,id=net0,script=/home/v2r1/qemu-ifup,downscript=no,vhost=on,queues=1 \
When qemu create a virtio-mmio device, qemu get the number of IRQs through the "vectors"
and according to this qemu will connect "vectors" IRQ lines between virtio-mmio and GIC.
The patch about how qemu create a virtio-mmio device with multiple IRQs is as followed:
driver = qemu_opt_get(opts, "driver");
if (strncmp(driver, "virtio-", 7) != 0) {
continue;
}
vectors = qemu_opt_get(opts, "vectors");
if (vectors == NULL) {
nvector = 1;
} else {
nvector = atoi(vectors);
}
hwaddr base = vbi->memmap[VIRT_MMIO].base + i * size;
dev = qdev_create(NULL, "virtio-mmio");
qdev_prop_set_uint32(dev, "nvector", nvector);
s = SYS_BUS_DEVICE(dev);
qdev_init_nofail(dev);
if (base != (hwaddr)-1) {
sysbus_mmio_map(s, 0, base);
}
/* This is the code that qemu connect multiple IRQs between virtio-mmio and GIC */
for (n = 0; n < nvector; n++) {
sysbus_connect_irq(s, n, pic[irq+n]);
}
char *nodename;
nodename = g_strdup_printf("/virtio_mmio@%" PRIx64, base);
qemu_fdt_add_subnode(vbi->fdt, nodename);
qemu_fdt_setprop_string(vbi->fdt, nodename,
"compatible", "virtio,mmio");
qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
2, base, 2, size);
int qdt_size = nvector * sizeof(uint32_t) * 3;
uint32_t *qdt_tmp = (uint32_t *)malloc(qdt_size);
/* This is the code that qemu prepare the dts for virtio-mmio with multiple IRQs */
for (n = 0; n < nvector; n++) {
*(qdt_tmp+n*3) = cpu_to_be32(GIC_FDT_IRQ_TYPE_SPI);
*(qdt_tmp+n*3+1) = cpu_to_be32(irq+n);
*(qdt_tmp+n*3+2) = cpu_to_be32(GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
}
qemu_fdt_setprop(vbi->fdt, nodename, "interrupts", qdt_tmp, qdt_size);
g_free(nodename);
>> for (i = 0; i < nirqs; i++) {
>> irq = platform_get_irq(vm_dev->pdev, i);
>> if (irq == -ENXIO)
>> goto error;
>> }
>>
>> If we can't get the expected number of IRQs, return error and this try
>> fails. Then guest will try two IRQS and single IRQ like virtio-pci.
>>
>>> Could you please document the new interface?
>>> E.g. send a patch for virtio spec.
>>
>> Ok, I'll send it later. Thank you very much :)
>>
>> Shannon
>>
>>> I think this really should be controlled by hypervisor, per device.
>>> I'm also tempted to make this a virtio 1.0 only feature.
>>>
>>>
>>>
>>>> ---
>>>> drivers/virtio/virtio_mmio.c | 234 ++++++++++++++++++++++++++++++++++++------
>>>> 1 files changed, 203 insertions(+), 31 deletions(-)
>>>>
>>>> diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
>>>> index c600ccf..2b7d935 100644
>>>> --- a/drivers/virtio/virtio_mmio.c
>>>> +++ b/drivers/virtio/virtio_mmio.c
>>>> @@ -122,6 +122,15 @@ struct virtio_mmio_device {
>>>> /* a list of queues so we can dispatch IRQs */
>>>> spinlock_t lock;
>>>> struct list_head virtqueues;
>>>> +
>>>> + /* multiple irq support */
>>>> + int single_irq_enabled;
>>>> + /* Number of available irqs */
>>>> + unsigned num_irqs;
>>>> + /* Used number of irqs */
>>>> + int used_irqs;
>>>> + /* Name strings for interrupts. */
>>>> + char (*vm_vq_names)[256];
>>>> };
>>>>
>>>> struct virtio_mmio_vq_info {
>>>> @@ -229,33 +238,53 @@ static bool vm_notify(struct virtqueue *vq)
>>>> return true;
>>>> }
>>>>
>>>> +/* Handle a configuration change: Tell driver if it wants to know. */
>>>> +static irqreturn_t vm_config_changed(int irq, void *opaque)
>>>> +{
>>>> + struct virtio_mmio_device *vm_dev = opaque;
>>>> + struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
>>>> + struct virtio_driver, driver);
>>>> +
>>>> + if (vdrv && vdrv->config_changed)
>>>> + vdrv->config_changed(&vm_dev->vdev);
>>>> + return IRQ_HANDLED;
>>>> +}
>>>> +
>>>> /* Notify all virtqueues on an interrupt. */
>>>> -static irqreturn_t vm_interrupt(int irq, void *opaque)
>>>> +static irqreturn_t vm_vring_interrupt(int irq, void *opaque)
>>>> {
>>>> struct virtio_mmio_device *vm_dev = opaque;
>>>> struct virtio_mmio_vq_info *info;
>>>> - struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
>>>> - struct virtio_driver, driver);
>>>> - unsigned long status;
>>>> + irqreturn_t ret = IRQ_NONE;
>>>> unsigned long flags;
>>>> +
>>>> + spin_lock_irqsave(&vm_dev->lock, flags);
>>>> + list_for_each_entry(info, &vm_dev->virtqueues, node) {
>>>> + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
>>>> + ret = IRQ_HANDLED;
>>>> + }
>>>> + spin_unlock_irqrestore(&vm_dev->lock, flags);
>>>> +
>>>> + return ret;
>>>> +}
>>>> +
>>>> +/* Notify all virtqueues and handle a configuration
>>>> + * change on an interrupt. */
>>>> +static irqreturn_t vm_interrupt(int irq, void *opaque)
>>>> +{
>>>> + struct virtio_mmio_device *vm_dev = opaque;
>>>> + unsigned long status;
>>>> irqreturn_t ret = IRQ_NONE;
>>>>
>>>> /* Read and acknowledge interrupts */
>>>> status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS);
>>>> writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK);
>>>>
>>>> - if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)
>>>> - && vdrv && vdrv->config_changed) {
>>>> - vdrv->config_changed(&vm_dev->vdev);
>>>> - ret = IRQ_HANDLED;
>>>> - }
>>>> + if (unlikely(status & VIRTIO_MMIO_INT_CONFIG))
>>>> + return vm_config_changed(irq, opaque);
>>>>
>>>> - if (likely(status & VIRTIO_MMIO_INT_VRING)) {
>>>> - spin_lock_irqsave(&vm_dev->lock, flags);
>>>> - list_for_each_entry(info, &vm_dev->virtqueues, node)
>>>> - ret |= vring_interrupt(irq, info->vq);
>>>> - spin_unlock_irqrestore(&vm_dev->lock, flags);
>>>> - }
>>>> + if (likely(status & VIRTIO_MMIO_INT_VRING))
>>>> + return vm_vring_interrupt(irq, opaque);
>>>>
>>>> return ret;
>>>> }
>>>> @@ -284,18 +313,98 @@ static void vm_del_vq(struct virtqueue *vq)
>>>> kfree(info);
>>>> }
>>>>
>>>> -static void vm_del_vqs(struct virtio_device *vdev)
>>>> +static void vm_free_irqs(struct virtio_device *vdev)
>>>> {
>>>> + int i;
>>>> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
>>>> +
>>>> + if (vm_dev->single_irq_enabled) {
>>>> + free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
>>>> + vm_dev->single_irq_enabled = 0;
>>>> + }
>>>> +
>>>> + for (i = 0; i < vm_dev->used_irqs; ++i)
>>>> + free_irq(platform_get_irq(vm_dev->pdev, i), vm_dev);
>>>> +
>>>> + vm_dev->num_irqs = 0;
>>>> + vm_dev->used_irqs = 0;
>>>> + kfree(vm_dev->vm_vq_names);
>>>> + vm_dev->vm_vq_names = NULL;
>>>> +}
>>>> +
>>>> +static void vm_del_vqs(struct virtio_device *vdev)
>>>> +{
>>>> struct virtqueue *vq, *n;
>>>>
>>>> list_for_each_entry_safe(vq, n, &vdev->vqs, list)
>>>> vm_del_vq(vq);
>>>>
>>>> - free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
>>>> + vm_free_irqs(vdev);
>>>> +}
>>>> +
>>>> +static int vm_request_multiple_irqs(struct virtio_device *vdev, int nirqs,
>>>> + bool per_vq_irq)
>>>> +{
>>>> + int err = -ENOMEM;
>>>> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
>>>> + unsigned i, v;
>>>> + int irq = 0;
>>>> +
>>>> + vm_dev->num_irqs = nirqs;
>>>> + vm_dev->used_irqs = 0;
>>>> +
>>>> + vm_dev->vm_vq_names = kmalloc_array(nirqs, sizeof(*vm_dev->vm_vq_names),
>>>> + GFP_KERNEL);
>>>> + if (!vm_dev->vm_vq_names)
>>>> + goto error;
>>>> +
>>>> + for (i = 0; i < nirqs; i++) {
>>>> + irq = platform_get_irq(vm_dev->pdev, i);
>>>> + if (irq == -ENXIO)
>>>> + goto error;
>>>> + }
>>>> +
>>>> + /* Set the irq used for configuration */
>>>> + v = vm_dev->used_irqs;
>>>> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
>>>> + "%s-config", dev_name(&vdev->dev));
>>>> + irq = platform_get_irq(vm_dev->pdev, v);
>>>> + err = request_irq(irq, vm_config_changed, 0,
>>>> + vm_dev->vm_vq_names[v], vm_dev);
>>>> + ++vm_dev->used_irqs;
>>>> + if (err)
>>>> + goto error;
>>>> +
>>>> + if (!per_vq_irq) {
>>>> + /* Shared irq for all VQs */
>>>> + v = vm_dev->used_irqs;
>>>> + snprintf(vm_dev->vm_vq_names[v], sizeof(*vm_dev->vm_vq_names),
>>>> + "%s-virtqueues", dev_name(&vdev->dev));
>>>> + irq = platform_get_irq(vm_dev->pdev, v);
>>>> + err = request_irq(irq, vm_vring_interrupt, 0,
>>>> + vm_dev->vm_vq_names[v], vm_dev);
>>>> + if (err)
>>>> + goto error;
>>>> + ++vm_dev->used_irqs;
>>>> + }
>>>> + return 0;
>>>> +error:
>>>> + vm_free_irqs(vdev);
>>>> + return err;
>>>> }
>>>>
>>>> +static int vm_request_single_irq(struct virtio_device *vdev)
>>>> +{
>>>> + int err;
>>>> + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
>>>> + int irq = platform_get_irq(vm_dev->pdev, 0);
>>>>
>>>> + err = request_irq(irq, vm_interrupt, IRQF_SHARED,
>>>> + dev_name(&vdev->dev), vm_dev);
>>>> + if (!err)
>>>> + vm_dev->single_irq_enabled = 1;
>>>> + return err;
>>>> +}
>>>>
>>>> static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
>>>> void (*callback)(struct virtqueue *vq),
>>>> @@ -392,29 +501,92 @@ error_available:
>>>> return ERR_PTR(err);
>>>> }
>>>>
>>>> -static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
>>>> - struct virtqueue *vqs[],
>>>> - vq_callback_t *callbacks[],
>>>> - const char *names[])
>>>> +static int vm_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
>>>> + struct virtqueue *vqs[],
>>>> + vq_callback_t *callbacks[],
>>>> + const char *names[],
>>>> + bool use_multiple_irq,
>>>> + bool per_vq_irq)
>>>> {
>>>> struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
>>>> - unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
>>>> - int i, err;
>>>> + int i, err, nirqs, irq;
>>>> +
>>>> + if (!use_multiple_irq) {
>>>> + /* Old style: one normal interrupt for change and all vqs. */
>>>> + err = vm_request_single_irq(vdev);
>>>> + if (err)
>>>> + goto error_request;
>>>> + } else {
>>>> + if (per_vq_irq) {
>>>> + /* Best option: one for change interrupt, one per vq. */
>>>> + nirqs = 1;
>>>> + for (i = 0; i < nvqs; ++i)
>>>> + if (callbacks[i])
>>>> + ++nirqs;
>>>> + } else {
>>>> + /* Second best: one for change, shared for all vqs. */
>>>> + nirqs = 2;
>>>> + }
>>>>
>>>> - err = request_irq(irq, vm_interrupt, IRQF_SHARED,
>>>> - dev_name(&vdev->dev), vm_dev);
>>>> - if (err)
>>>> - return err;
>>>> + err = vm_request_multiple_irqs(vdev, nirqs, per_vq_irq);
>>>> + if (err)
>>>> + goto error_request;
>>>> + }
>>>>
>>>> - for (i = 0; i < nvqs; ++i) {
>>>> + for (i = 0; i < nvqs; i++) {
>>>> + if (!names[i]) {
>>>> + vqs[i] = NULL;
>>>> + continue;
>>>> + }
>>>> vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
>>>> if (IS_ERR(vqs[i])) {
>>>> - vm_del_vqs(vdev);
>>>> - return PTR_ERR(vqs[i]);
>>>> + err = PTR_ERR(vqs[i]);
>>>> + goto error_find;
>>>> + }
>>>> + if (!per_vq_irq || !callbacks[i])
>>>> + continue;
>>>> + /* allocate per-vq irq if available and necessary */
>>>> + snprintf(vm_dev->vm_vq_names[vm_dev->used_irqs],
>>>> + sizeof(*vm_dev->vm_vq_names),
>>>> + "%s-%s",
>>>> + dev_name(&vm_dev->vdev.dev), names[i]);
>>>> + irq = platform_get_irq(vm_dev->pdev, vm_dev->used_irqs);
>>>> + err = request_irq(irq, vring_interrupt, 0,
>>>> + vm_dev->vm_vq_names[vm_dev->used_irqs], vqs[i]);
>>>> + if (err) {
>>>> + vm_del_vq(vqs[i]);
>>>> + goto error_find;
>>>> }
>>>> + ++vm_dev->used_irqs;
>>>> }
>>>> -
>>>> return 0;
>>>> +error_find:
>>>> + vm_del_vqs(vdev);
>>>> +
>>>> +error_request:
>>>> + return err;
>>>> +}
>>>> +
>>>> +static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
>>>> + struct virtqueue *vqs[],
>>>> + vq_callback_t *callbacks[],
>>>> + const char *names[])
>>>> +{
>>>> + int err;
>>>> +
>>>> + /* Try multiple irqs with one irq per queue. */
>>>> + err = vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
>>>> + if (!err)
>>>> + return 0;
>>>> + /* Fallback: multiple irqs with one irq for config,
>>>> + * one shared for queues. */
>>>> + err = vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
>>>> + true, false);
>>>> + if (!err)
>>>> + return 0;
>>>> + /* Finally fall back to regular single interrupts. */
>>>> + return vm_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
>>>> + false, false);
>>>> }
>>>>
>>>> static const char *vm_bus_name(struct virtio_device *vdev)
>>>> --
>>>> 1.7.1
>>>
>>> .
>>>
>>
>>
>> --
>> Shannon
>
> .
>
--
Shannon
On Tue, 2014-11-04 at 09:35 +0000, Shannon Zhao wrote:
> As the current virtio-mmio only support single irq,
> so some advanced features such as vhost-net with irqfd
> are not supported. And the net performance is not
> the best without vhost-net and irqfd supporting.
Could you, please, help understanding me where does the main issue is?
Is it about:
1. The fact that the existing implementation blindly kicks all queues,
instead only of the updated ones?
or:
2. Literally having a dedicated interrupt line (remember, we're talking
"real" interrupts here, not message signalled ones) per queue, so they
can be handled by different processors at the same time?
Now, if it's only about 1, the simplest solution would be to extend the
VIRTIO_MMIO_INTERRUPT_STATUS register to signal up to 30 queues
"readiness" in bits 2-31, still keeping bit 0 as a "combined"
VIRTIO_MMIO_INT_VRING. In case when VIRTIO_MMIO_INT_VRING is set and
none of the "individual" bits is (a device which doesn't support this
feature or one that has more than 30 queues and of of those is ready) we
would fall back to the original "kick all queues" approach. This could
be a useful (and pretty simple) extension. In the worst case scenario it
could be a post-1.0 standard addition, as it would provide backward
compatibility.
However, if it's about 2, we're talking larger changes here. From the
device perspective, we can define it as having per-queue (plus one for
config) interrupt output *and* a "combined" output, being simple logical
"or" of all the others. Then, the Device Tree bindings would be used to
express the implementation choices (I'd keep the kernel parameter
approach supporting the single interrupt case only). This is a very
popular and well understood approach for memory mapped peripherals (for
example, see the . It allows the system integrator to make a decision
when it's coming to latency vs number interrupt lines trade off. The
main issue is that we can't really impose a limit on a number of queues,
therefore on a number of interrupts. This would require adding a new
"interrupt acknowledge" register, which would take a number of the queue
(or a symbolic value for the config one) instead of a bit mask. And I
must say that I'm not enjoying the idea of such substantial change to
the specification that late in the process... (in other words: you'll
have to put extra effort into convincing me :-)
> This patch support virtio-mmio to request multiple
> irqs like virtio-pci. With this patch and qemu assigning
> multiple irqs for virtio-mmio device, it's ok to use
> vhost-net with irqfd on arm/arm64.
Could you please tell me how many queues (interrupts) are we talking
about in this case? 5? A dozen? Hundreds?
Disclaimer: I have no personal experience with virtio and network (due
to the fact how our Fast Models are implemented, I mostly us block
devices and 9p protocol over virtio and I get enough performance from
them :-).
> As arm doesn't support msi-x now,
To be precise: "ARM" does "support" MSI-X :-) (google for GICv2m)
The correct statement would be: "normal memory mapped devices have no
interface for message signalled interrupts (like MSI-X)"
> we use GSI for multiple irq.
I'm not sure what GSI stands for, but looking at the code I assume it's
just a "normal" peripheral interrupt.
> In this patch we use "vm_try_to_find_vqs"
> to check whether multiple irqs are supported like
> virtio-pci.
Yeah, I can see that you have followed virtio-pci quite literally. I'm
particularly not convinced to the one interrupt for config, one for all
queues option. Doesn't make any sense to me here.
> Is this the right direction? is there other ways to
> make virtio-mmio support multiple irq? Hope for feedback.
One point I'd like to make is that the device was intentionally designed
with simplicity in mind first, performance later (something about
"embedded" etc" :-). Changing this assumption is of course possible, but
- I must say - makes me slightly uncomfortable... The extensions we're
discussing here seem doable, but I've noticed your other patches doing
with a shared memory region and I didn't like them at all, sorry.
I see the subject has been already touched in the discussions, but let
me bring PCI to the surface again. We're getting more server-class SOCs
in the market, which obviously bring PCI with them to both arm and arm64
world, something unheard of in the "mobile past". I believe the PCI
patches for the arm64 have been already merged in the kernel.
Therefore: I'm not your boss so, obviously, I can't tell you what to do,
but could you consider redirecting your efforts into getting the "ARM
PCI" up and running in qemu so you can simply use the existing
infrastructure? This would save us a lot of work and pain in doing late
functional changes to the standard and will be probably more
future-proof from your perspective (PCI will happen, sooner or later -
you can make it sooner ;-)
Regards
Pawel
On 2014/11/11 23:11, Pawel Moll wrote:
> On Tue, 2014-11-04 at 09:35 +0000, Shannon Zhao wrote:
>> As the current virtio-mmio only support single irq,
>> so some advanced features such as vhost-net with irqfd
>> are not supported. And the net performance is not
>> the best without vhost-net and irqfd supporting.
>
> Could you, please, help understanding me where does the main issue is?
> Is it about:
>
> 1. The fact that the existing implementation blindly kicks all queues,
> instead only of the updated ones?
>
> or:
>
> 2. Literally having a dedicated interrupt line (remember, we're talking
> "real" interrupts here, not message signalled ones) per queue, so they
> can be handled by different processors at the same time?
>
The main issue is that current virtio-mmio only support one interrupt which is shared by
config and queues. Therefore the virtio-mmio driver should read the
"VIRTIO_MMIO_INTERRUPT_STATUS" to get the interrupt reason and check whom this interrupt is to.
If we use vhost-net which uses irqfd to inject interrupt, the vhost-net doesn't update
"VIRTIO_MMIO_INTERRUPT_STATUS", then the guest driver can't read the interrupt reason and
doesn't call a handler to process.
So we can assign a dedicated interrupt line per queue for virtio-mmio and it can work with
irqfd.
> Now, if it's only about 1, the simplest solution would be to extend the
> VIRTIO_MMIO_INTERRUPT_STATUS register to signal up to 30 queues
> "readiness" in bits 2-31, still keeping bit 0 as a "combined"
> VIRTIO_MMIO_INT_VRING. In case when VIRTIO_MMIO_INT_VRING is set and
> none of the "individual" bits is (a device which doesn't support this
> feature or one that has more than 30 queues and of of those is ready) we
> would fall back to the original "kick all queues" approach. This could
> be a useful (and pretty simple) extension. In the worst case scenario it
> could be a post-1.0 standard addition, as it would provide backward
> compatibility.
>
> However, if it's about 2, we're talking larger changes here. From the
> device perspective, we can define it as having per-queue (plus one for
> config) interrupt output *and* a "combined" output, being simple logical
> "or" of all the others. Then, the Device Tree bindings would be used to
> express the implementation choices (I'd keep the kernel parameter
> approach supporting the single interrupt case only). This is a very
> popular and well understood approach for memory mapped peripherals (for
> example, see the . It allows the system integrator to make a decision
> when it's coming to latency vs number interrupt lines trade off. The
> main issue is that we can't really impose a limit on a number of queues,
> therefore on a number of interrupts. This would require adding a new
> "interrupt acknowledge" register, which would take a number of the queue
> (or a symbolic value for the config one) instead of a bit mask. And I
Yes, maybe should add a new "interrupt acknowledge" register for backend and frontend to
consult the number of queues.
> must say that I'm not enjoying the idea of such substantial change to
> the specification that late in the process... (in other words: you'll
> have to put extra effort into convincing me :-)
>
>> This patch support virtio-mmio to request multiple
>> irqs like virtio-pci. With this patch and qemu assigning
>> multiple irqs for virtio-mmio device, it's ok to use
>> vhost-net with irqfd on arm/arm64.
>
> Could you please tell me how many queues (interrupts) are we talking
> about in this case? 5? A dozen? Hundreds?
>
Theoretically the number of interrupts has no limit, but as the limit of ARM interrupt line,
the number should be less than ARM interrupt lines. In the real situation, I think, the number
is generally less than 17 (8 pairs of vring interrupts and one config interrupt).
> Disclaimer: I have no personal experience with virtio and network (due
> to the fact how our Fast Models are implemented, I mostly us block
> devices and 9p protocol over virtio and I get enough performance from
> them :-).
>
>> As arm doesn't support msi-x now,
>
> To be precise: "ARM" does "support" MSI-X :-) (google for GICv2m)
Sorry, I mean ARM with GICv2.
>
> The correct statement would be: "normal memory mapped devices have no
> interface for message signalled interrupts (like MSI-X)"
>
Yes, that's right.
>> we use GSI for multiple irq.
>
> I'm not sure what GSI stands for, but looking at the code I assume it's
> just a "normal" peripheral interrupt.
>
>> In this patch we use "vm_try_to_find_vqs"
>> to check whether multiple irqs are supported like
>> virtio-pci.
>
> Yeah, I can see that you have followed virtio-pci quite literally. I'm
> particularly not convinced to the one interrupt for config, one for all
> queues option. Doesn't make any sense to me here.
>
About one interrupt for all queues, it's not a typical case. But just offer
one more choice for users. Users should configure the number of interrupts
according to their situation.
>> Is this the right direction? is there other ways to
>> make virtio-mmio support multiple irq? Hope for feedback.
>
> One point I'd like to make is that the device was intentionally designed
> with simplicity in mind first, performance later (something about
> "embedded" etc" :-). Changing this assumption is of course possible, but
Ah, I think ARM is not only about embedded things. Maybe it could has a wider application
such as micro server. Just my personal opinion.
> - I must say - makes me slightly uncomfortable... The extensions we're
> discussing here seem doable, but I've noticed your other patches doing
> with a shared memory region and I didn't like them at all, sorry.
>
The approach with a shared memory region is dropped as you can see from the mailing list.
The approach of this patch get a net performance improvement about 30%.
This maybe makes sense to the paltform without MSI support(e.g ARM with GICv2).
> I see the subject has been already touched in the discussions, but let
> me bring PCI to the surface again. We're getting more server-class SOCs
> in the market, which obviously bring PCI with them to both arm and arm64
> world, something unheard of in the "mobile past". I believe the PCI
> patches for the arm64 have been already merged in the kernel.
>
> Therefore: I'm not your boss so, obviously, I can't tell you what to do,
> but could you consider redirecting your efforts into getting the "ARM
> PCI" up and running in qemu so you can simply use the existing
> infrastructure? This would save us a lot of work and pain in doing late
> functional changes to the standard and will be probably more
> future-proof from your perspective (PCI will happen, sooner or later -
> you can make it sooner ;-)
>
> Regards
>
> Pawel
>
>
> .
>
--
Shannon
On Wed, 2014-11-12 at 08:32 +0000, Shannon Zhao wrote:
> On 2014/11/11 23:11, Pawel Moll wrote:
> > On Tue, 2014-11-04 at 09:35 +0000, Shannon Zhao wrote:
> >> As the current virtio-mmio only support single irq,
> >> so some advanced features such as vhost-net with irqfd
> >> are not supported. And the net performance is not
> >> the best without vhost-net and irqfd supporting.
> >
> > Could you, please, help understanding me where does the main issue is?
> > Is it about:
> >
> > 1. The fact that the existing implementation blindly kicks all queues,
> > instead only of the updated ones?
> >
> > or:
> >
> > 2. Literally having a dedicated interrupt line (remember, we're talking
> > "real" interrupts here, not message signalled ones) per queue, so they
> > can be handled by different processors at the same time?
> >
> The main issue is that current virtio-mmio only support one interrupt which is shared by
> config and queues.
Yes.
Additional question: is your device changing the configuration space
often? Other devices (for example block devices) change configuration
very rarely (eg. change of the media size, usually meaning
hot-un-plugging), so unless you tell me that the config space is
frequently changes, I'll consider the config event irrelevant from the
performance point of view.
> Therefore the virtio-mmio driver should read the
> "VIRTIO_MMIO_INTERRUPT_STATUS" to get the interrupt reason and check whom this interrupt is to.
Correct.
> If we use vhost-net which uses irqfd to inject interrupt, the
> vhost-net doesn't update "VIRTIO_MMIO_INTERRUPT_STATUS", then the
> guest driver can't read the interrupt reason and doesn't call a
> handler to process.
Ok, so this is the bit I don't understand. Explain, please how does this
whole thing work. And keep in mind that I've just looked up "irqfd" for
the first time in my life, so know nothing about its implementation. The
same applies to "vhost-net". I'm simply coming from a completely
different background, so bear with me on this.
Now, the virtio-mmio device *must* behave in a certain way, as specified
in both the old and the new version of the spec. If a Used Ring of *any*
of the queues has been updated the device *must* set bit 0 of the
interrupt status register, and - in effect - generate the interrupt.
But you are telling me that in some circumstances the interrupt is *not*
generated when a queue requires handling. Sounds like a bug to me, as it
is not following the requirements of the device specification.
It is quite likely that I simply don't see some facts which are obvious
for you, so please - help me understand.
> So we can assign a dedicated interrupt line per queue for virtio-mmio
> and it can work with irqfd.
>
If my reasoning above is correct, I'd say that you are just working
around a functional bug, not improving performance. And this is a wrong
way of solving the problem.
> Theoretically the number of interrupts has no limit, but as the limit
> of ARM interrupt line, the number should be less than ARM interrupt
> lines.
Let me just emphasize the fact that virtio-mmio is not related to ARM in
any way, it's just a memory mapped device with a generic interrupt
output. Any limitations of a particular hardware platform (I guess
you're referring to the maximum number of peripheral interrupts a GIC
can take) are irrelevant - if we go with multiple interrupts, it must
cater for as many interrupt lines as one wishes... :-)
> In the real situation, I think, the number
> is generally less than 17 (8 pairs of vring interrupts and one config
> interrupt).
... but of course we could optimize for the real scenarios. And I'm glad
to hear that the number you quoted is less then 30 :-)
> >> we use GSI for multiple irq.
> >
> > I'm not sure what GSI stands for, but looking at the code I assume it's
> > just a "normal" peripheral interrupt.
> >
> >> In this patch we use "vm_try_to_find_vqs"
> >> to check whether multiple irqs are supported like
> >> virtio-pci.
> >
> > Yeah, I can see that you have followed virtio-pci quite literally. I'm
> > particularly not convinced to the one interrupt for config, one for all
> > queues option. Doesn't make any sense to me here.
> >
> About one interrupt for all queues, it's not a typical case. But just offer
> one more choice for users. Users should configure the number of interrupts
> according to their situation.
> >> Is this the right direction? is there other ways to
> >> make virtio-mmio support multiple irq? Hope for feedback.
> >
> > One point I'd like to make is that the device was intentionally designed
> > with simplicity in mind first, performance later (something about
> > "embedded" etc" :-). Changing this assumption is of course possible, but
> Ah, I think ARM is not only about embedded things. Maybe it could has a wider application
> such as micro server. Just my personal opinion.
>
> > - I must say - makes me slightly uncomfortable... The extensions we're
> > discussing here seem doable, but I've noticed your other patches doing
> > with a shared memory region and I didn't like them at all, sorry.
> >
> The approach with a shared memory region is dropped as you can see from the mailing list.
Glad to hear that :-)
> The approach of this patch get a net performance improvement about 30%.
> This maybe makes sense to the paltform without MSI support(e.g ARM with GICv2).
I appreciate the improvement. I'm just cautions when it comes to
significant changes to the standard so late in the process.
Pawel
On 2014/11/13 2:33, Pawel Moll wrote:
> On Wed, 2014-11-12 at 08:32 +0000, Shannon Zhao wrote:
>> On 2014/11/11 23:11, Pawel Moll wrote:
>>> On Tue, 2014-11-04 at 09:35 +0000, Shannon Zhao wrote:
>>>> As the current virtio-mmio only support single irq,
>>>> so some advanced features such as vhost-net with irqfd
>>>> are not supported. And the net performance is not
>>>> the best without vhost-net and irqfd supporting.
>>>
>>> Could you, please, help understanding me where does the main issue is?
>>> Is it about:
>>>
>>> 1. The fact that the existing implementation blindly kicks all queues,
>>> instead only of the updated ones?
>>>
>>> or:
>>>
>>> 2. Literally having a dedicated interrupt line (remember, we're talking
>>> "real" interrupts here, not message signalled ones) per queue, so they
>>> can be handled by different processors at the same time?
>>>
>> The main issue is that current virtio-mmio only support one interrupt which is shared by
>> config and queues.
>
> Yes.
>
> Additional question: is your device changing the configuration space
> often? Other devices (for example block devices) change configuration
> very rarely (eg. change of the media size, usually meaning
> hot-un-plugging), so unless you tell me that the config space is
> frequently changes, I'll consider the config event irrelevant from the
> performance point of view.
>
No, change the configuration space not often.
>> Therefore the virtio-mmio driver should read the
>> "VIRTIO_MMIO_INTERRUPT_STATUS" to get the interrupt reason and check whom this interrupt is to.
>
> Correct.
>
>> If we use vhost-net which uses irqfd to inject interrupt, the
>> vhost-net doesn't update "VIRTIO_MMIO_INTERRUPT_STATUS", then the
>> guest driver can't read the interrupt reason and doesn't call a
>> handler to process.
>
> Ok, so this is the bit I don't understand. Explain, please how does this
> whole thing work. And keep in mind that I've just looked up "irqfd" for
> the first time in my life, so know nothing about its implementation. The
> same applies to "vhost-net". I'm simply coming from a completely
> different background, so bear with me on this.
>
Ok, sorry. I ignored this.
When we use only virtio-mmio or vhost-net without irqfd, the device uses qemu_set_irq(within qemu)
to inject interrupt and at the same time qemu update "VIRTIO_MMIO_INTERRUPT_STATUS" to tell guest
driver whom this interrupt to. All these things are done by qemu. Though qemu_set_irq will call
kvm_set_irq to do the real interrupt inject, but the trigger is qemu and it can update the
"VIRTIO_MMIO_INTERRUPT_STATUS" register before injecting the interrupt.
But if we use vhost-net with irqfd, the device uses ioeventfd mechanism to inject interrupt.
When an interrupt happened, it doesn't transfer to qemu, while the irqfd finally call kvm_set_irq
to inject the interrupt directly. All these things are done in kvm and it can't update the
"VIRTIO_MMIO_INTERRUPT_STATUS" register.
So if the guest driver still uses the old interrupt handler, it has to read the
"VIRTIO_MMIO_INTERRUPT_STATUS" register to get the interrupt reason and run different handlers
for different reasons. But the register has nothing and none of handlers will be called.
I make myself clear? :-)
> Now, the virtio-mmio device *must* behave in a certain way, as specified
> in both the old and the new version of the spec. If a Used Ring of *any*
> of the queues has been updated the device *must* set bit 0 of the
> interrupt status register, and - in effect - generate the interrupt.
>
> But you are telling me that in some circumstances the interrupt is *not*
> generated when a queue requires handling. Sounds like a bug to me, as it
> is not following the requirements of the device specification.
>
> It is quite likely that I simply don't see some facts which are obvious
> for you, so please - help me understand.
>
>> So we can assign a dedicated interrupt line per queue for virtio-mmio
>> and it can work with irqfd.
>>
> If my reasoning above is correct, I'd say that you are just working
> around a functional bug, not improving performance. And this is a wrong
> way of solving the problem.
>
>> Theoretically the number of interrupts has no limit, but as the limit
>> of ARM interrupt line, the number should be less than ARM interrupt
>> lines.
>
> Let me just emphasize the fact that virtio-mmio is not related to ARM in
> any way, it's just a memory mapped device with a generic interrupt
> output. Any limitations of a particular hardware platform (I guess
> you're referring to the maximum number of peripheral interrupts a GIC
> can take) are irrelevant - if we go with multiple interrupts, it must
> cater for as many interrupt lines as one wishes... :-)
>
>> In the real situation, I think, the number
>> is generally less than 17 (8 pairs of vring interrupts and one config
>> interrupt).
>
> ... but of course we could optimize for the real scenarios. And I'm glad
> to hear that the number you quoted is less then 30 :-)
>
>>>> we use GSI for multiple irq.
>>>
>>> I'm not sure what GSI stands for, but looking at the code I assume it's
>>> just a "normal" peripheral interrupt.
>>>
>>>> In this patch we use "vm_try_to_find_vqs"
>>>> to check whether multiple irqs are supported like
>>>> virtio-pci.
>>>
>>> Yeah, I can see that you have followed virtio-pci quite literally. I'm
>>> particularly not convinced to the one interrupt for config, one for all
>>> queues option. Doesn't make any sense to me here.
>>>
>> About one interrupt for all queues, it's not a typical case. But just offer
>> one more choice for users. Users should configure the number of interrupts
>> according to their situation.
>
>
>>>> Is this the right direction? is there other ways to
>>>> make virtio-mmio support multiple irq? Hope for feedback.
>>>
>>> One point I'd like to make is that the device was intentionally designed
>>> with simplicity in mind first, performance later (something about
>>> "embedded" etc" :-). Changing this assumption is of course possible, but
>> Ah, I think ARM is not only about embedded things. Maybe it could has a wider application
>> such as micro server. Just my personal opinion.
>>
>>> - I must say - makes me slightly uncomfortable... The extensions we're
>>> discussing here seem doable, but I've noticed your other patches doing
>>> with a shared memory region and I didn't like them at all, sorry.
>>>
>> The approach with a shared memory region is dropped as you can see from the mailing list.
>
> Glad to hear that :-)
>
>> The approach of this patch get a net performance improvement about 30%.
>> This maybe makes sense to the paltform without MSI support(e.g ARM with GICv2).
>
> I appreciate the improvement. I'm just cautions when it comes to
> significant changes to the standard so late in the process.
>
Sorry, I didn't notice it's at the final phase of the process. It's the first time in my life
to make a change for virtio-spec like you first time saw irqfd.:-)
In a word, the Multi-IRQ make vhost-net with irqfd based on virtio-mmio work and the irqfd reduces
vm-exit and context switch between qemu and kvm. So it can bring performance improvement.
> Pawel
>
>
> .
>
--
Shannon
On Thu, 2014-11-13 at 09:39 +0000, Shannon Zhao wrote:
> When we use only virtio-mmio or vhost-net without irqfd, the device uses qemu_set_irq(within qemu)
> to inject interrupt and at the same time qemu update "VIRTIO_MMIO_INTERRUPT_STATUS" to tell guest
> driver whom this interrupt to. All these things are done by qemu. Though qemu_set_irq will call
> kvm_set_irq to do the real interrupt inject, but the trigger is qemu and it can update the
> "VIRTIO_MMIO_INTERRUPT_STATUS" register before injecting the interrupt.
>
> But if we use vhost-net with irqfd, the device uses ioeventfd mechanism to inject interrupt.
> When an interrupt happened, it doesn't transfer to qemu, while the irqfd finally call kvm_set_irq
> to inject the interrupt directly. All these things are done in kvm and it can't update the
> "VIRTIO_MMIO_INTERRUPT_STATUS" register.
> So if the guest driver still uses the old interrupt handler, it has to read the
> "VIRTIO_MMIO_INTERRUPT_STATUS" register to get the interrupt reason and run different handlers
> for different reasons. But the register has nothing and none of handlers will be called.
>
> I make myself clear? :-)
I see... (well, at least I believe I see ;-)
Of course this means that with irqfd, the device is simply non-compliant
with the spec. And that extending the status register doesn't help you
at all, so we can drop this idea.
Paradoxically, it's a good news (of a sort :-) because I don't think we
should be doing such a fundamental change in the spec at this date. I'll
discuss it with others in the TC and I'm open to be convinced otherwise,
but I believe the spec should stay as it is.
Having said that, I see no problem with experimenting with the device
model so the next version of the standard is extended. Two suggestions I
have would be:
1. Drop the virtio-pci like case with two interrupts (one for config
changes, one for all queues), as it doesn't bring anything new. Just
make it all interrupts individual.
2. Change the way the interrupts are acknowledge - instead of a bitmask,
have a register which takes a queue number to ack the queue's interrupt
and ~0 to ack the config interrupt.
3. Change the version of the device to (intially) 0x80000003 - I've just
made an executive decision :-) that non-standard compliant devices
should have the MSB of the version number set (I'll propose to reserve
this range of versions in future version of the spec). We'll consider it
a "prototype of the version 3". Then make the driver behave in the new
way when (and only when) such device version is observed.
Also, I remembered I haven't addressed one of your previous comments:
On Wed, 2014-11-12 at 08:32 +0000, Shannon Zhao wrote:
> > One point I'd like to make is that the device was intentionally designed
> > with simplicity in mind first, performance later (something about
> > "embedded" etc" :-). Changing this assumption is of course possible, but
> Ah, I think ARM is not only about embedded things. Maybe it could has
> a wider application > such as micro server. Just my personal opinion.
By all means, I couldn't agree more. But there's one thing you have to
keep in mind - it doesn't matter whether the real hardware has got PCI
or not, but what is emulated by qemu/KVM. Therefore, if only you can get
the PCI host controller working in the qemu virt machine (which should
be much simpler now, as we have generic support for PCI
controllers/bridges in the kernel now), you'll be able to forget the
issue of virtio-mmio and multiple interrupts and still enjoy your
performance gains :-)
Does it all make sense?
Pawel