2024-03-26 04:08:59

by Nipun Gupta

[permalink] [raw]
Subject: [PATCH v5 1/2] genirq/msi: add wrapper msi allocation API and export msi functions

MSI functions for allocation and free can be directly used by
the device drivers without any wrapper provided by bus drivers.
So export these MSI functions.

Also, add a wrapper API to allocate MSIs providing only the
number of interrupts rather than range for simpler driver usage.

Signed-off-by: Nipun Gupta <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
---

Changes in v4->v5:
- updated commit description as per the comments.
- Rebased on 6.9-rc1

Changes in v3->v4:
- No change

Changes in v3:
- New in this patch series. VFIO-CDX uses the new wrapper API
msi_domain_alloc_irqs and exported APIs. (This patch is moved
from CDX interrupt support to vfio-cdx patch, where these APIs
are used).

include/linux/msi.h | 6 ++++++
kernel/irq/msi.c | 2 ++
2 files changed, 8 insertions(+)

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 26d07e23052e..765a65581a66 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -676,6 +676,12 @@ int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nve
void platform_device_msi_free_irqs_all(struct device *dev);

bool msi_device_has_isolated_msi(struct device *dev);
+
+static inline int msi_domain_alloc_irqs(struct device *dev, unsigned int domid, int nirqs)
+{
+ return msi_domain_alloc_irqs_range(dev, domid, 0, nirqs - 1);
+}
+
#else /* CONFIG_GENERIC_MSI_IRQ */
static inline bool msi_device_has_isolated_msi(struct device *dev)
{
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index f90952ebc494..2024f89baea4 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -1434,6 +1434,7 @@ int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
msi_unlock_descs(dev);
return ret;
}
+EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range);

/**
* msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain
@@ -1680,6 +1681,7 @@ void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
msi_domain_free_irqs_range_locked(dev, domid, first, last);
msi_unlock_descs(dev);
}
+EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all);

/**
* msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain
--
2.34.1



2024-03-26 04:09:14

by Nipun Gupta

[permalink] [raw]
Subject: [PATCH v5 2/2] vfio/cdx: add interrupt support

Support the following ioctls for CDX devices:
- VFIO_DEVICE_GET_IRQ_INFO
- VFIO_DEVICE_SET_IRQS

This allows user to set an eventfd for cdx device interrupts and
trigger this interrupt eventfd from userspace.
All CDX device interrupts are MSIs. The MSIs are allocated from the
CDX-MSI domain.

Signed-off-by: Nipun Gupta <[email protected]>
Reviewed-by: Pieter Jansen van Vuuren <[email protected]>
---

Changes v4->v5:
- Rebased on 6.9-rc1

Changes v3->v4:
- Added VFIO_IRQ_INFO_NORESIZE flag

Changes v2->v3:
- Use generic MSI alloc/free APIs instead of CDX MSI APIs
- Rebased on Linux 6.8-rc6

Changes v1->v2:
- Rebased on Linux 6.7-rc1

drivers/vfio/cdx/Makefile | 2 +-
drivers/vfio/cdx/intr.c | 211 +++++++++++++++++++++++++++++++++++++
drivers/vfio/cdx/main.c | 60 ++++++++++-
drivers/vfio/cdx/private.h | 18 ++++
4 files changed, 289 insertions(+), 2 deletions(-)
create mode 100644 drivers/vfio/cdx/intr.c

diff --git a/drivers/vfio/cdx/Makefile b/drivers/vfio/cdx/Makefile
index cd4a2e6fe609..df92b320122a 100644
--- a/drivers/vfio/cdx/Makefile
+++ b/drivers/vfio/cdx/Makefile
@@ -5,4 +5,4 @@

obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o

-vfio-cdx-objs := main.o
+vfio-cdx-objs := main.o intr.o
diff --git a/drivers/vfio/cdx/intr.c b/drivers/vfio/cdx/intr.c
new file mode 100644
index 000000000000..4637b57d0242
--- /dev/null
+++ b/drivers/vfio/cdx/intr.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#include <linux/vfio.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/eventfd.h>
+#include <linux/msi.h>
+#include <linux/interrupt.h>
+
+#include "linux/cdx/cdx_bus.h"
+#include "private.h"
+
+static irqreturn_t vfio_cdx_msihandler(int irq_no, void *arg)
+{
+ struct eventfd_ctx *trigger = arg;
+
+ eventfd_signal(trigger);
+ return IRQ_HANDLED;
+}
+
+static int vfio_cdx_msi_enable(struct vfio_cdx_device *vdev, int nvec)
+{
+ struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
+ struct device *dev = vdev->vdev.dev;
+ int msi_idx, ret;
+
+ vdev->cdx_irqs = kcalloc(nvec, sizeof(struct vfio_cdx_irq), GFP_KERNEL);
+ if (!vdev->cdx_irqs)
+ return -ENOMEM;
+
+ ret = cdx_enable_msi(cdx_dev);
+ if (ret) {
+ kfree(vdev->cdx_irqs);
+ return ret;
+ }
+
+ /* Allocate cdx MSIs */
+ ret = msi_domain_alloc_irqs(dev, MSI_DEFAULT_DOMAIN, nvec);
+ if (ret) {
+ cdx_disable_msi(cdx_dev);
+ kfree(vdev->cdx_irqs);
+ return ret;
+ }
+
+ for (msi_idx = 0; msi_idx < nvec; msi_idx++)
+ vdev->cdx_irqs[msi_idx].irq_no = msi_get_virq(dev, msi_idx);
+
+ vdev->irq_count = nvec;
+ vdev->config_msi = 1;
+
+ return 0;
+}
+
+static int vfio_cdx_msi_set_vector_signal(struct vfio_cdx_device *vdev,
+ int vector, int fd)
+{
+ struct eventfd_ctx *trigger;
+ int irq_no, ret;
+
+ if (vector < 0 || vector >= vdev->irq_count)
+ return -EINVAL;
+
+ irq_no = vdev->cdx_irqs[vector].irq_no;
+
+ if (vdev->cdx_irqs[vector].trigger) {
+ free_irq(irq_no, vdev->cdx_irqs[vector].trigger);
+ kfree(vdev->cdx_irqs[vector].name);
+ eventfd_ctx_put(vdev->cdx_irqs[vector].trigger);
+ vdev->cdx_irqs[vector].trigger = NULL;
+ }
+
+ if (fd < 0)
+ return 0;
+
+ vdev->cdx_irqs[vector].name = kasprintf(GFP_KERNEL, "vfio-msi[%d](%s)",
+ vector, dev_name(vdev->vdev.dev));
+ if (!vdev->cdx_irqs[vector].name)
+ return -ENOMEM;
+
+ trigger = eventfd_ctx_fdget(fd);
+ if (IS_ERR(trigger)) {
+ kfree(vdev->cdx_irqs[vector].name);
+ return PTR_ERR(trigger);
+ }
+
+ ret = request_irq(irq_no, vfio_cdx_msihandler, 0,
+ vdev->cdx_irqs[vector].name, trigger);
+ if (ret) {
+ kfree(vdev->cdx_irqs[vector].name);
+ eventfd_ctx_put(trigger);
+ return ret;
+ }
+
+ vdev->cdx_irqs[vector].trigger = trigger;
+
+ return 0;
+}
+
+static int vfio_cdx_msi_set_block(struct vfio_cdx_device *vdev,
+ unsigned int start, unsigned int count,
+ int32_t *fds)
+{
+ int i, j, ret = 0;
+
+ if (start >= vdev->irq_count || start + count > vdev->irq_count)
+ return -EINVAL;
+
+ for (i = 0, j = start; i < count && !ret; i++, j++) {
+ int fd = fds ? fds[i] : -1;
+
+ ret = vfio_cdx_msi_set_vector_signal(vdev, j, fd);
+ }
+
+ if (ret) {
+ for (--j; j >= (int)start; j--)
+ vfio_cdx_msi_set_vector_signal(vdev, j, -1);
+ }
+
+ return ret;
+}
+
+static void vfio_cdx_msi_disable(struct vfio_cdx_device *vdev)
+{
+ struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
+ struct device *dev = vdev->vdev.dev;
+
+ vfio_cdx_msi_set_block(vdev, 0, vdev->irq_count, NULL);
+
+ if (!vdev->config_msi)
+ return;
+
+ msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN);
+ cdx_disable_msi(cdx_dev);
+ kfree(vdev->cdx_irqs);
+
+ vdev->cdx_irqs = NULL;
+ vdev->irq_count = 0;
+ vdev->config_msi = 0;
+}
+
+static int vfio_cdx_set_msi_trigger(struct vfio_cdx_device *vdev,
+ unsigned int index, unsigned int start,
+ unsigned int count, u32 flags,
+ void *data)
+{
+ struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
+ int i;
+
+ if (start + count > cdx_dev->num_msi)
+ return -EINVAL;
+
+ if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
+ vfio_cdx_msi_disable(vdev);
+ return 0;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ s32 *fds = data;
+ int ret;
+
+ if (vdev->config_msi)
+ return vfio_cdx_msi_set_block(vdev, start, count,
+ fds);
+ ret = vfio_cdx_msi_enable(vdev, cdx_dev->num_msi);
+ if (ret)
+ return ret;
+
+ ret = vfio_cdx_msi_set_block(vdev, start, count, fds);
+ if (ret)
+ vfio_cdx_msi_disable(vdev);
+
+ return ret;
+ }
+
+ for (i = start; i < start + count; i++) {
+ if (!vdev->cdx_irqs[i].trigger)
+ continue;
+ if (flags & VFIO_IRQ_SET_DATA_NONE)
+ eventfd_signal(vdev->cdx_irqs[i].trigger);
+ }
+
+ return 0;
+}
+
+int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev,
+ u32 flags, unsigned int index,
+ unsigned int start, unsigned int count,
+ void *data)
+{
+ if (flags & VFIO_IRQ_SET_ACTION_TRIGGER)
+ return vfio_cdx_set_msi_trigger(vdev, index, start,
+ count, flags, data);
+ else
+ return -EINVAL;
+}
+
+/* Free All IRQs for the given device */
+void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev)
+{
+ /*
+ * Device does not support any interrupt or the interrupts
+ * were not configured
+ */
+ if (!vdev->cdx_irqs)
+ return;
+
+ vfio_cdx_set_msi_trigger(vdev, 1, 0, 0, VFIO_IRQ_SET_DATA_NONE, NULL);
+}
diff --git a/drivers/vfio/cdx/main.c b/drivers/vfio/cdx/main.c
index 9cff8d75789e..f0861a38ae10 100644
--- a/drivers/vfio/cdx/main.c
+++ b/drivers/vfio/cdx/main.c
@@ -61,6 +61,7 @@ static void vfio_cdx_close_device(struct vfio_device *core_vdev)

kfree(vdev->regions);
cdx_dev_reset(core_vdev->dev);
+ vfio_cdx_irqs_cleanup(vdev);
}

static int vfio_cdx_bm_ctrl(struct vfio_device *core_vdev, u32 flags,
@@ -123,7 +124,7 @@ static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev,
info.flags |= VFIO_DEVICE_FLAGS_RESET;

info.num_regions = cdx_dev->res_count;
- info.num_irqs = 0;
+ info.num_irqs = cdx_dev->num_msi ? 1 : 0;

return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
}
@@ -152,6 +153,59 @@ static int vfio_cdx_ioctl_get_region_info(struct vfio_cdx_device *vdev,
return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
}

+static int vfio_cdx_ioctl_get_irq_info(struct vfio_cdx_device *vdev,
+ struct vfio_irq_info __user *arg)
+{
+ unsigned long minsz = offsetofend(struct vfio_irq_info, count);
+ struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
+ struct vfio_irq_info info;
+
+ if (copy_from_user(&info, arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ if (info.index >= 1)
+ return -EINVAL;
+
+ info.flags = VFIO_IRQ_INFO_EVENTFD | VFIO_IRQ_INFO_NORESIZE;
+ info.count = cdx_dev->num_msi;
+
+ return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
+}
+
+static int vfio_cdx_ioctl_set_irqs(struct vfio_cdx_device *vdev,
+ struct vfio_irq_set __user *arg)
+{
+ unsigned long minsz = offsetofend(struct vfio_irq_set, count);
+ struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
+ struct vfio_irq_set hdr;
+ size_t data_size = 0;
+ u8 *data = NULL;
+ int ret = 0;
+
+ if (copy_from_user(&hdr, arg, minsz))
+ return -EFAULT;
+
+ ret = vfio_set_irqs_validate_and_prepare(&hdr, cdx_dev->num_msi,
+ 1, &data_size);
+ if (ret)
+ return ret;
+
+ if (data_size) {
+ data = memdup_user(arg->data, data_size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+ }
+
+ ret = vfio_cdx_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
+ hdr.start, hdr.count, data);
+ kfree(data);
+
+ return ret;
+}
+
static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
unsigned int cmd, unsigned long arg)
{
@@ -164,6 +218,10 @@ static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
return vfio_cdx_ioctl_get_info(vdev, uarg);
case VFIO_DEVICE_GET_REGION_INFO:
return vfio_cdx_ioctl_get_region_info(vdev, uarg);
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ return vfio_cdx_ioctl_get_irq_info(vdev, uarg);
+ case VFIO_DEVICE_SET_IRQS:
+ return vfio_cdx_ioctl_set_irqs(vdev, uarg);
case VFIO_DEVICE_RESET:
return cdx_dev_reset(core_vdev->dev);
default:
diff --git a/drivers/vfio/cdx/private.h b/drivers/vfio/cdx/private.h
index 8e9d25913728..7a8477ae4652 100644
--- a/drivers/vfio/cdx/private.h
+++ b/drivers/vfio/cdx/private.h
@@ -13,6 +13,14 @@ static inline u64 vfio_cdx_index_to_offset(u32 index)
return ((u64)(index) << VFIO_CDX_OFFSET_SHIFT);
}

+struct vfio_cdx_irq {
+ u32 flags;
+ u32 count;
+ int irq_no;
+ struct eventfd_ctx *trigger;
+ char *name;
+};
+
struct vfio_cdx_region {
u32 flags;
u32 type;
@@ -25,6 +33,16 @@ struct vfio_cdx_device {
struct vfio_cdx_region *regions;
u32 flags;
#define BME_SUPPORT BIT(0)
+ struct vfio_cdx_irq *cdx_irqs;
+ u32 irq_count;
+ u32 config_msi;
};

+int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev,
+ u32 flags, unsigned int index,
+ unsigned int start, unsigned int count,
+ void *data);
+
+void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev);
+
#endif /* VFIO_CDX_PRIVATE_H */
--
2.34.1


2024-04-19 22:31:54

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH v5 2/2] vfio/cdx: add interrupt support

On Tue, 26 Mar 2024 09:38:04 +0530
Nipun Gupta <[email protected]> wrote:

> Support the following ioctls for CDX devices:
> - VFIO_DEVICE_GET_IRQ_INFO
> - VFIO_DEVICE_SET_IRQS
>
> This allows user to set an eventfd for cdx device interrupts and
> trigger this interrupt eventfd from userspace.
> All CDX device interrupts are MSIs. The MSIs are allocated from the
> CDX-MSI domain.
>
> Signed-off-by: Nipun Gupta <[email protected]>
> Reviewed-by: Pieter Jansen van Vuuren <[email protected]>
> ---
>
> Changes v4->v5:
> - Rebased on 6.9-rc1
>
> Changes v3->v4:
> - Added VFIO_IRQ_INFO_NORESIZE flag
>
> Changes v2->v3:
> - Use generic MSI alloc/free APIs instead of CDX MSI APIs
> - Rebased on Linux 6.8-rc6
>
> Changes v1->v2:
> - Rebased on Linux 6.7-rc1
>
> drivers/vfio/cdx/Makefile | 2 +-
> drivers/vfio/cdx/intr.c | 211 +++++++++++++++++++++++++++++++++++++
> drivers/vfio/cdx/main.c | 60 ++++++++++-
> drivers/vfio/cdx/private.h | 18 ++++
> 4 files changed, 289 insertions(+), 2 deletions(-)
> create mode 100644 drivers/vfio/cdx/intr.c
>
> diff --git a/drivers/vfio/cdx/Makefile b/drivers/vfio/cdx/Makefile
> index cd4a2e6fe609..df92b320122a 100644
> --- a/drivers/vfio/cdx/Makefile
> +++ b/drivers/vfio/cdx/Makefile
> @@ -5,4 +5,4 @@
>
> obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o
>
> -vfio-cdx-objs := main.o
> +vfio-cdx-objs := main.o intr.o
> diff --git a/drivers/vfio/cdx/intr.c b/drivers/vfio/cdx/intr.c
> new file mode 100644
> index 000000000000..4637b57d0242
> --- /dev/null
> +++ b/drivers/vfio/cdx/intr.c
> @@ -0,0 +1,211 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
> + */
> +
> +#include <linux/vfio.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +#include <linux/eventfd.h>
> +#include <linux/msi.h>
> +#include <linux/interrupt.h>
> +
> +#include "linux/cdx/cdx_bus.h"
> +#include "private.h"
> +
> +static irqreturn_t vfio_cdx_msihandler(int irq_no, void *arg)
> +{
> + struct eventfd_ctx *trigger = arg;
> +
> + eventfd_signal(trigger);
> + return IRQ_HANDLED;
> +}
> +
> +static int vfio_cdx_msi_enable(struct vfio_cdx_device *vdev, int nvec)
> +{
> + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> + struct device *dev = vdev->vdev.dev;
> + int msi_idx, ret;
> +
> + vdev->cdx_irqs = kcalloc(nvec, sizeof(struct vfio_cdx_irq), GFP_KERNEL);
> + if (!vdev->cdx_irqs)
> + return -ENOMEM;
> +
> + ret = cdx_enable_msi(cdx_dev);
> + if (ret) {
> + kfree(vdev->cdx_irqs);
> + return ret;
> + }
> +
> + /* Allocate cdx MSIs */
> + ret = msi_domain_alloc_irqs(dev, MSI_DEFAULT_DOMAIN, nvec);
> + if (ret) {
> + cdx_disable_msi(cdx_dev);
> + kfree(vdev->cdx_irqs);
> + return ret;
> + }
> +
> + for (msi_idx = 0; msi_idx < nvec; msi_idx++)
> + vdev->cdx_irqs[msi_idx].irq_no = msi_get_virq(dev, msi_idx);
> +
> + vdev->irq_count = nvec;
> + vdev->config_msi = 1;
> +
> + return 0;
> +}
> +
> +static int vfio_cdx_msi_set_vector_signal(struct vfio_cdx_device *vdev,
> + int vector, int fd)
> +{
> + struct eventfd_ctx *trigger;
> + int irq_no, ret;
> +
> + if (vector < 0 || vector >= vdev->irq_count)
> + return -EINVAL;
> +
> + irq_no = vdev->cdx_irqs[vector].irq_no;
> +
> + if (vdev->cdx_irqs[vector].trigger) {
> + free_irq(irq_no, vdev->cdx_irqs[vector].trigger);
> + kfree(vdev->cdx_irqs[vector].name);
> + eventfd_ctx_put(vdev->cdx_irqs[vector].trigger);
> + vdev->cdx_irqs[vector].trigger = NULL;
> + }
> +
> + if (fd < 0)
> + return 0;
> +
> + vdev->cdx_irqs[vector].name = kasprintf(GFP_KERNEL, "vfio-msi[%d](%s)",
> + vector, dev_name(vdev->vdev.dev));
> + if (!vdev->cdx_irqs[vector].name)
> + return -ENOMEM;
> +
> + trigger = eventfd_ctx_fdget(fd);
> + if (IS_ERR(trigger)) {
> + kfree(vdev->cdx_irqs[vector].name);
> + return PTR_ERR(trigger);
> + }
> +
> + ret = request_irq(irq_no, vfio_cdx_msihandler, 0,
> + vdev->cdx_irqs[vector].name, trigger);
> + if (ret) {
> + kfree(vdev->cdx_irqs[vector].name);
> + eventfd_ctx_put(trigger);
> + return ret;
> + }
> +
> + vdev->cdx_irqs[vector].trigger = trigger;
> +
> + return 0;
> +}
> +
> +static int vfio_cdx_msi_set_block(struct vfio_cdx_device *vdev,
> + unsigned int start, unsigned int count,
> + int32_t *fds)
> +{
> + int i, j, ret = 0;
> +
> + if (start >= vdev->irq_count || start + count > vdev->irq_count)
> + return -EINVAL;
> +
> + for (i = 0, j = start; i < count && !ret; i++, j++) {
> + int fd = fds ? fds[i] : -1;
> +
> + ret = vfio_cdx_msi_set_vector_signal(vdev, j, fd);
> + }
> +
> + if (ret) {
> + for (--j; j >= (int)start; j--)
> + vfio_cdx_msi_set_vector_signal(vdev, j, -1);
> + }
> +
> + return ret;
> +}
> +
> +static void vfio_cdx_msi_disable(struct vfio_cdx_device *vdev)
> +{
> + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> + struct device *dev = vdev->vdev.dev;
> +
> + vfio_cdx_msi_set_block(vdev, 0, vdev->irq_count, NULL);
> +
> + if (!vdev->config_msi)
> + return;
> +
> + msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN);
> + cdx_disable_msi(cdx_dev);
> + kfree(vdev->cdx_irqs);
> +
> + vdev->cdx_irqs = NULL;
> + vdev->irq_count = 0;
> + vdev->config_msi = 0;
> +}
> +
> +static int vfio_cdx_set_msi_trigger(struct vfio_cdx_device *vdev,
> + unsigned int index, unsigned int start,
> + unsigned int count, u32 flags,
> + void *data)
> +{
> + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> + int i;
> +
> + if (start + count > cdx_dev->num_msi)
> + return -EINVAL;
> +
> + if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
> + vfio_cdx_msi_disable(vdev);
> + return 0;
> + }
> +
> + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
> + s32 *fds = data;
> + int ret;
> +
> + if (vdev->config_msi)
> + return vfio_cdx_msi_set_block(vdev, start, count,
> + fds);
> + ret = vfio_cdx_msi_enable(vdev, cdx_dev->num_msi);
> + if (ret)
> + return ret;
> +
> + ret = vfio_cdx_msi_set_block(vdev, start, count, fds);
> + if (ret)
> + vfio_cdx_msi_disable(vdev);
> +
> + return ret;
> + }
> +
> + for (i = start; i < start + count; i++) {
> + if (!vdev->cdx_irqs[i].trigger)
> + continue;
> + if (flags & VFIO_IRQ_SET_DATA_NONE)
> + eventfd_signal(vdev->cdx_irqs[i].trigger);

Typically DATA_BOOL support is also added here.

> + }
> +
> + return 0;
> +}
> +
> +int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev,
> + u32 flags, unsigned int index,
> + unsigned int start, unsigned int count,
> + void *data)
> +{
> + if (flags & VFIO_IRQ_SET_ACTION_TRIGGER)
> + return vfio_cdx_set_msi_trigger(vdev, index, start,
> + count, flags, data);
> + else
> + return -EINVAL;
> +}
> +
> +/* Free All IRQs for the given device */
> +void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev)
> +{
> + /*
> + * Device does not support any interrupt or the interrupts
> + * were not configured
> + */
> + if (!vdev->cdx_irqs)
> + return;
> +
> + vfio_cdx_set_msi_trigger(vdev, 1, 0, 0, VFIO_IRQ_SET_DATA_NONE, NULL);

@index is passed as 1 here. AFAICT only index zero is supported. The
SET_IRQS ioctl path catches this in
vfio_set_irqs_validate_and_prepare() but it might cause some strange
behavior here if another index were ever added.

> +}
> diff --git a/drivers/vfio/cdx/main.c b/drivers/vfio/cdx/main.c
> index 9cff8d75789e..f0861a38ae10 100644
> --- a/drivers/vfio/cdx/main.c
> +++ b/drivers/vfio/cdx/main.c
> @@ -61,6 +61,7 @@ static void vfio_cdx_close_device(struct vfio_device *core_vdev)
>
> kfree(vdev->regions);
> cdx_dev_reset(core_vdev->dev);
> + vfio_cdx_irqs_cleanup(vdev);
> }
>
> static int vfio_cdx_bm_ctrl(struct vfio_device *core_vdev, u32 flags,
> @@ -123,7 +124,7 @@ static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev,
> info.flags |= VFIO_DEVICE_FLAGS_RESET;
>
> info.num_regions = cdx_dev->res_count;
> - info.num_irqs = 0;
> + info.num_irqs = cdx_dev->num_msi ? 1 : 0;
>
> return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
> }
> @@ -152,6 +153,59 @@ static int vfio_cdx_ioctl_get_region_info(struct vfio_cdx_device *vdev,
> return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
> }
>
> +static int vfio_cdx_ioctl_get_irq_info(struct vfio_cdx_device *vdev,
> + struct vfio_irq_info __user *arg)
> +{
> + unsigned long minsz = offsetofend(struct vfio_irq_info, count);
> + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> + struct vfio_irq_info info;
> +
> + if (copy_from_user(&info, arg, minsz))
> + return -EFAULT;
> +
> + if (info.argsz < minsz)
> + return -EINVAL;
> +
> + if (info.index >= 1)
> + return -EINVAL;
> +
> + info.flags = VFIO_IRQ_INFO_EVENTFD | VFIO_IRQ_INFO_NORESIZE;
> + info.count = cdx_dev->num_msi;

This should return -EINVAL if cdx_dev->num_msi is zero.

> +
> + return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
> +}
> +
> +static int vfio_cdx_ioctl_set_irqs(struct vfio_cdx_device *vdev,
> + struct vfio_irq_set __user *arg)
> +{
> + unsigned long minsz = offsetofend(struct vfio_irq_set, count);
> + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> + struct vfio_irq_set hdr;
> + size_t data_size = 0;
> + u8 *data = NULL;
> + int ret = 0;
> +
> + if (copy_from_user(&hdr, arg, minsz))
> + return -EFAULT;
> +
> + ret = vfio_set_irqs_validate_and_prepare(&hdr, cdx_dev->num_msi,
> + 1, &data_size);
> + if (ret)
> + return ret;
> +
> + if (data_size) {
> + data = memdup_user(arg->data, data_size);
> + if (IS_ERR(data))
> + return PTR_ERR(data);
> + }
> +
> + ret = vfio_cdx_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
> + hdr.start, hdr.count, data);
> + kfree(data);
> +
> + return ret;
> +}
> +
> static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
> unsigned int cmd, unsigned long arg)
> {
> @@ -164,6 +218,10 @@ static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
> return vfio_cdx_ioctl_get_info(vdev, uarg);
> case VFIO_DEVICE_GET_REGION_INFO:
> return vfio_cdx_ioctl_get_region_info(vdev, uarg);
> + case VFIO_DEVICE_GET_IRQ_INFO:
> + return vfio_cdx_ioctl_get_irq_info(vdev, uarg);
> + case VFIO_DEVICE_SET_IRQS:
> + return vfio_cdx_ioctl_set_irqs(vdev, uarg);
> case VFIO_DEVICE_RESET:
> return cdx_dev_reset(core_vdev->dev);
> default:
> diff --git a/drivers/vfio/cdx/private.h b/drivers/vfio/cdx/private.h
> index 8e9d25913728..7a8477ae4652 100644
> --- a/drivers/vfio/cdx/private.h
> +++ b/drivers/vfio/cdx/private.h
> @@ -13,6 +13,14 @@ static inline u64 vfio_cdx_index_to_offset(u32 index)
> return ((u64)(index) << VFIO_CDX_OFFSET_SHIFT);
> }
>
> +struct vfio_cdx_irq {
> + u32 flags;
> + u32 count;
> + int irq_no;
> + struct eventfd_ctx *trigger;
> + char *name;
> +};
> +
> struct vfio_cdx_region {
> u32 flags;
> u32 type;
> @@ -25,6 +33,16 @@ struct vfio_cdx_device {
> struct vfio_cdx_region *regions;
> u32 flags;
> #define BME_SUPPORT BIT(0)
> + struct vfio_cdx_irq *cdx_irqs;
> + u32 irq_count;
> + u32 config_msi;
> };

You might want to reorder these to avoid holes in the data structures.
vfio_cdx_irq will have a 4byte hole in the middle, vfio_cdx_device will
have a 4byte hole after flags. config_msi is used as a bool, I'm not
sure why it's defined as a u32. irq_count also holds a value up to 1,
so a u32 might be oversized.

There's clearly latent support here for devices with multiple MSI
vectors, is this just copying vfio-pci-core or will CDX devices have
multiple MSIs? Thanks,

Alex

>
> +int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev,
> + u32 flags, unsigned int index,
> + unsigned int start, unsigned int count,
> + void *data);
> +
> +void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev);
> +
> #endif /* VFIO_CDX_PRIVATE_H */


2024-04-22 12:56:08

by Nipun Gupta

[permalink] [raw]
Subject: Re: [PATCH v5 2/2] vfio/cdx: add interrupt support



On 4/20/2024 4:01 AM, Alex Williamson wrote:
> On Tue, 26 Mar 2024 09:38:04 +0530
> Nipun Gupta <[email protected]> wrote:
>
>> Support the following ioctls for CDX devices:
>> - VFIO_DEVICE_GET_IRQ_INFO
>> - VFIO_DEVICE_SET_IRQS
>>
>> This allows user to set an eventfd for cdx device interrupts and
>> trigger this interrupt eventfd from userspace.
>> All CDX device interrupts are MSIs. The MSIs are allocated from the
>> CDX-MSI domain.
>>
>> Signed-off-by: Nipun Gupta <[email protected]>
>> Reviewed-by: Pieter Jansen van Vuuren <[email protected]>
>> ---
>>
>> Changes v4->v5:
>> - Rebased on 6.9-rc1

<snip>

>> +
>> + for (i = start; i < start + count; i++) {
>> + if (!vdev->cdx_irqs[i].trigger)
>> + continue;
>> + if (flags & VFIO_IRQ_SET_DATA_NONE)
>> + eventfd_signal(vdev->cdx_irqs[i].trigger);
>
> Typically DATA_BOOL support is also added here.

Sure. will add it.

>
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev,
>> + u32 flags, unsigned int index,
>> + unsigned int start, unsigned int count,
>> + void *data)
>> +{
>> + if (flags & VFIO_IRQ_SET_ACTION_TRIGGER)
>> + return vfio_cdx_set_msi_trigger(vdev, index, start,
>> + count, flags, data);
>> + else
>> + return -EINVAL;
>> +}
>> +
>> +/* Free All IRQs for the given device */
>> +void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev)
>> +{
>> + /*
>> + * Device does not support any interrupt or the interrupts
>> + * were not configured
>> + */
>> + if (!vdev->cdx_irqs)
>> + return;
>> +
>> + vfio_cdx_set_msi_trigger(vdev, 1, 0, 0, VFIO_IRQ_SET_DATA_NONE, NULL);
>
> @index is passed as 1 here. AFAICT only index zero is supported. The
> SET_IRQS ioctl path catches this in
> vfio_set_irqs_validate_and_prepare() but it might cause some strange
> behavior here if another index were ever added.

Agree, index should be passed as 0 here.

>
>> +}
>> diff --git a/drivers/vfio/cdx/main.c b/drivers/vfio/cdx/main.c
>> index 9cff8d75789e..f0861a38ae10 100644
>> --- a/drivers/vfio/cdx/main.c
>> +++ b/drivers/vfio/cdx/main.c
>> @@ -61,6 +61,7 @@ static void vfio_cdx_close_device(struct vfio_device *core_vdev)
>>
>> kfree(vdev->regions);
>> cdx_dev_reset(core_vdev->dev);
>> + vfio_cdx_irqs_cleanup(vdev);
>> }
>>
>> static int vfio_cdx_bm_ctrl(struct vfio_device *core_vdev, u32 flags,
>> @@ -123,7 +124,7 @@ static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev,
>> info.flags |= VFIO_DEVICE_FLAGS_RESET;
>>
>> info.num_regions = cdx_dev->res_count;
>> - info.num_irqs = 0;
>> + info.num_irqs = cdx_dev->num_msi ? 1 : 0;
>>
>> return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
>> }
>> @@ -152,6 +153,59 @@ static int vfio_cdx_ioctl_get_region_info(struct vfio_cdx_device *vdev,
>> return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
>> }
>>
>> +static int vfio_cdx_ioctl_get_irq_info(struct vfio_cdx_device *vdev,
>> + struct vfio_irq_info __user *arg)
>> +{
>> + unsigned long minsz = offsetofend(struct vfio_irq_info, count);
>> + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
>> + struct vfio_irq_info info;
>> +
>> + if (copy_from_user(&info, arg, minsz))
>> + return -EFAULT;
>> +
>> + if (info.argsz < minsz)
>> + return -EINVAL;
>> +
>> + if (info.index >= 1)
>> + return -EINVAL;
>> +
>> + info.flags = VFIO_IRQ_INFO_EVENTFD | VFIO_IRQ_INFO_NORESIZE;
>> + info.count = cdx_dev->num_msi;
>
> This should return -EINVAL if cdx_dev->num_msi is zero.

Agree. Will add this check.

>
>> +
>> + return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
>> +}
>> +
>> +static int vfio_cdx_ioctl_set_irqs(struct vfio_cdx_device *vdev,
>> + struct vfio_irq_set __user *arg)
>> +{
>> + unsigned long minsz = offsetofend(struct vfio_irq_set, count);
>> + struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
>> + struct vfio_irq_set hdr;
>> + size_t data_size = 0;
>> + u8 *data = NULL;
>> + int ret = 0;
>> +
>> + if (copy_from_user(&hdr, arg, minsz))
>> + return -EFAULT;
>> +
>> + ret = vfio_set_irqs_validate_and_prepare(&hdr, cdx_dev->num_msi,
>> + 1, &data_size);
>> + if (ret)
>> + return ret;
>> +
>> + if (data_size) {
>> + data = memdup_user(arg->data, data_size);
>> + if (IS_ERR(data))
>> + return PTR_ERR(data);
>> + }
>> +
>> + ret = vfio_cdx_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
>> + hdr.start, hdr.count, data);
>> + kfree(data);
>> +
>> + return ret;
>> +}
>> +
>> static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
>> unsigned int cmd, unsigned long arg)
>> {
>> @@ -164,6 +218,10 @@ static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
>> return vfio_cdx_ioctl_get_info(vdev, uarg);
>> case VFIO_DEVICE_GET_REGION_INFO:
>> return vfio_cdx_ioctl_get_region_info(vdev, uarg);
>> + case VFIO_DEVICE_GET_IRQ_INFO:
>> + return vfio_cdx_ioctl_get_irq_info(vdev, uarg);
>> + case VFIO_DEVICE_SET_IRQS:
>> + return vfio_cdx_ioctl_set_irqs(vdev, uarg);
>> case VFIO_DEVICE_RESET:
>> return cdx_dev_reset(core_vdev->dev);
>> default:
>> diff --git a/drivers/vfio/cdx/private.h b/drivers/vfio/cdx/private.h
>> index 8e9d25913728..7a8477ae4652 100644
>> --- a/drivers/vfio/cdx/private.h
>> +++ b/drivers/vfio/cdx/private.h
>> @@ -13,6 +13,14 @@ static inline u64 vfio_cdx_index_to_offset(u32 index)
>> return ((u64)(index) << VFIO_CDX_OFFSET_SHIFT);
>> }
>>
>> +struct vfio_cdx_irq {
>> + u32 flags;
>> + u32 count;
>> + int irq_no;
>> + struct eventfd_ctx *trigger;
>> + char *name;
>> +};
>> +
>> struct vfio_cdx_region {
>> u32 flags;
>> u32 type;
>> @@ -25,6 +33,16 @@ struct vfio_cdx_device {
>> struct vfio_cdx_region *regions;
>> u32 flags;
>> #define BME_SUPPORT BIT(0)
>> + struct vfio_cdx_irq *cdx_irqs;
>> + u32 irq_count;
>> + u32 config_msi;
>> };
>
> You might want to reorder these to avoid holes in the data structures.
> vfio_cdx_irq will have a 4byte hole in the middle, vfio_cdx_device will
> have a 4byte hole after flags. config_msi is used as a bool, I'm not
> sure why it's defined as a u32. irq_count also holds a value up to 1,
> so a u32 might be oversized.
>
> There's clearly latent support here for devices with multiple MSI
> vectors, is this just copying vfio-pci-core or will CDX devices have
> multiple MSIs? Thanks,

Will reorder structure fields to avoid holes. CDX support multiple MSIs
and 'irq_count' is actually number of MSIs which can be more than 1. We
can replace irq_count to msi_count to avoid confusion.

Thanks,
Nipun

>
> Alex