2023-02-28 09:42:45

by Yongji Xie

[permalink] [raw]
Subject: [PATCH v3 00/11] VDUSE: Improve performance

Hi all,

This series introduces some ways to improve VDUSE performance.

Patch 1 ~ 6 bring current interrupt affinity spreading mechanism
to vduse device and make it possible for the virtio-blk driver
to build the blk-mq queues based on it. This would be useful to
mitigate the virtqueue lock contention in virtio-blk driver. In
our test, with those patches, we could get ~50% improvement (600k
iops -> 900k iops) when using per-cpu virtqueue.

Patch 7 adds a sysfs interface for each vduse virtqueue to change
the affinity for IRQ callback. It would be helpful for performance
tuning when the affinity mask contains more than one CPU.

Patch 8 ~ 9 associate an eventfd to the vdpa callback so that
we can signal it directly during irq injection without scheduling
an additional workqueue thread to do that.

Patch 10, 11 add a sysfs interface to support specifying bounce
buffer size in virtio-vdpa case. The high throughput workloads
can benefit from it. And we can also use it to reduce the memory
overhead for small throughput workloads.

Please review, thanks!

V2 to V3:
- Rebased to newest kernel tree
- Export group_cpus_evenly() instead of irq_create_affinity_masks() [MST]
- Remove the sysfs for workqueue control [Jason]
- Associate an eventfd to the vdpa callback [Jason]
- Signal the eventfd directly in vhost-vdpa case [Jason]
- Use round-robin to spread IRQs between CPUs in the affinity mask [Jason]
- Handle the cpu hotplug case on IRQ injection [Jason]
- Remove effective IRQ affinity and balance mechanism for IRQ allocation

V1 to V2:
- Export irq_create_affinity_masks()
- Add set/get_vq_affinity and set_irq_affinity callbacks in vDPA
framework
- Add automatic irq callback affinity support in VDUSE driver [Jason]
- Add more backgrounds information in commit log [Jason]
- Only support changing effective affinity when the value is a subset
of the IRQ callback affinity mask

Xie Yongji (11):
lib/group_cpus: Export group_cpus_evenly()
vdpa: Add set/get_vq_affinity callbacks in vdpa_config_ops
vdpa: Add set_irq_affinity callback in vdpa_config_ops
vduse: Refactor allocation for vduse virtqueues
vduse: Support automatic irq callback affinity
vduse: Support set/get_vq_affinity callbacks
vduse: Add sysfs interface for irq callback affinity
vdpa: Add eventfd for the vdpa callback
vduse: Signal interrupt's eventfd directly in vhost-vdpa case
vduse: Delay iova domain creation
vduse: Support specifying bounce buffer size via sysfs

drivers/vdpa/vdpa_user/vduse_dev.c | 490 +++++++++++++++++++++++++----
drivers/vhost/vdpa.c | 2 +
drivers/virtio/virtio_vdpa.c | 33 ++
include/linux/vdpa.h | 25 ++
lib/group_cpus.c | 1 +
5 files changed, 488 insertions(+), 63 deletions(-)

--
2.20.1



2023-02-28 09:42:48

by Yongji Xie

[permalink] [raw]
Subject: [PATCH v3 07/11] vduse: Add sysfs interface for irq callback affinity

Add sysfs interface for each vduse virtqueue to
get/set the affinity for irq callback. This might
be useful for performance tuning when the irq callback
affinity mask contains more than one CPU.

Signed-off-by: Xie Yongji <[email protected]>
---
drivers/vdpa/vdpa_user/vduse_dev.c | 124 ++++++++++++++++++++++++++---
1 file changed, 113 insertions(+), 11 deletions(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index e2988a1476e4..869cc7860d82 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -63,6 +63,7 @@ struct vduse_virtqueue {
struct work_struct kick;
int irq_effective_cpu;
struct cpumask irq_affinity;
+ struct kobject kobj;
};

struct vduse_dev;
@@ -1466,6 +1467,96 @@ static const struct file_operations vduse_dev_fops = {
.llseek = noop_llseek,
};

+static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
+{
+ return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
+}
+
+static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
+ const char *buf, size_t count)
+{
+ cpumask_var_t new_value;
+ int ret;
+
+ if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = cpumask_parse(buf, new_value);
+ if (ret)
+ goto free_mask;
+
+ ret = -EINVAL;
+ if (!cpumask_intersects(new_value, cpu_online_mask))
+ goto free_mask;
+
+ cpumask_copy(&vq->irq_affinity, new_value);
+ ret = count;
+free_mask:
+ free_cpumask_var(new_value);
+ return ret;
+}
+
+struct vq_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
+ ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
+ size_t count);
+};
+
+static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
+
+static struct attribute *vq_attrs[] = {
+ &irq_cb_affinity_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(vq);
+
+static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct vduse_virtqueue *vq = container_of(kobj,
+ struct vduse_virtqueue, kobj);
+ struct vq_sysfs_entry *entry = container_of(attr,
+ struct vq_sysfs_entry, attr);
+
+ if (!entry->show)
+ return -EIO;
+
+ return entry->show(vq, buf);
+}
+
+static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vduse_virtqueue *vq = container_of(kobj,
+ struct vduse_virtqueue, kobj);
+ struct vq_sysfs_entry *entry = container_of(attr,
+ struct vq_sysfs_entry, attr);
+
+ if (!entry->store)
+ return -EIO;
+
+ return entry->store(vq, buf, count);
+}
+
+static const struct sysfs_ops vq_sysfs_ops = {
+ .show = vq_attr_show,
+ .store = vq_attr_store,
+};
+
+static void vq_release(struct kobject *kobj)
+{
+ struct vduse_virtqueue *vq = container_of(kobj,
+ struct vduse_virtqueue, kobj);
+ kfree(vq);
+}
+
+static const struct kobj_type vq_type = {
+ .release = vq_release,
+ .sysfs_ops = &vq_sysfs_ops,
+ .default_groups = vq_groups,
+};
+
static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
{
int i;
@@ -1474,13 +1565,13 @@ static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
return;

for (i = 0; i < dev->vq_num; i++)
- kfree(dev->vqs[i]);
+ kobject_put(&dev->vqs[i]->kobj);
kfree(dev->vqs);
}

static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
{
- int i;
+ int ret, i;

dev->vq_align = vq_align;
dev->vq_num = vq_num;
@@ -1490,8 +1581,10 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)

for (i = 0; i < vq_num; i++) {
dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
- if (!dev->vqs[i])
+ if (!dev->vqs[i]) {
+ ret = -ENOMEM;
goto err;
+ }

dev->vqs[i]->index = i;
dev->vqs[i]->irq_effective_cpu = -1;
@@ -1500,15 +1593,23 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
spin_lock_init(&dev->vqs[i]->kick_lock);
spin_lock_init(&dev->vqs[i]->irq_lock);
cpumask_setall(&dev->vqs[i]->irq_affinity);
+
+ kobject_init(&dev->vqs[i]->kobj, &vq_type);
+ ret = kobject_add(&dev->vqs[i]->kobj,
+ &dev->dev->kobj, "vq%d", i);
+ if (ret) {
+ kfree(dev->vqs[i]);
+ goto err;
+ }
}

return 0;
err:
while (i--)
- kfree(dev->vqs[i]);
+ kobject_put(&dev->vqs[i]->kobj);
kfree(dev->vqs);
dev->vqs = NULL;
- return -ENOMEM;
+ return ret;
}

static struct vduse_dev *vduse_dev_create(void)
@@ -1686,10 +1787,6 @@ static int vduse_create_dev(struct vduse_dev_config *config,
dev->config = config_buf;
dev->config_size = config->config_size;

- ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
- if (ret)
- goto err_vqs;
-
ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
if (ret < 0)
goto err_idr;
@@ -1703,14 +1800,19 @@ static int vduse_create_dev(struct vduse_dev_config *config,
ret = PTR_ERR(dev->dev);
goto err_dev;
}
+
+ ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
+ if (ret)
+ goto err_vqs;
+
__module_get(THIS_MODULE);

return 0;
+err_vqs:
+ device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
err_dev:
idr_remove(&vduse_idr, dev->minor);
err_idr:
- vduse_dev_deinit_vqs(dev);
-err_vqs:
vduse_domain_destroy(dev->domain);
err_domain:
kfree(dev->name);
--
2.20.1


2023-02-28 09:42:52

by Yongji Xie

[permalink] [raw]
Subject: [PATCH v3 02/11] vdpa: Add set/get_vq_affinity callbacks in vdpa_config_ops

This introduces set/get_vq_affinity callbacks in
vdpa_config_ops to support interrupt affinity
management for vdpa device drivers.

Signed-off-by: Xie Yongji <[email protected]>
Acked-by: Jason Wang <[email protected]>
---
drivers/virtio/virtio_vdpa.c | 28 ++++++++++++++++++++++++++++
include/linux/vdpa.h | 13 +++++++++++++
2 files changed, 41 insertions(+)

diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index d7f5af62ddaa..f72696b4c1c2 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -337,6 +337,32 @@ static const char *virtio_vdpa_bus_name(struct virtio_device *vdev)
return dev_name(&vdpa->dev);
}

+static int virtio_vdpa_set_vq_affinity(struct virtqueue *vq,
+ const struct cpumask *cpu_mask)
+{
+ struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vq->vdev);
+ struct vdpa_device *vdpa = vd_dev->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+ unsigned int index = vq->index;
+
+ if (ops->set_vq_affinity)
+ return ops->set_vq_affinity(vdpa, index, cpu_mask);
+
+ return 0;
+}
+
+static const struct cpumask *
+virtio_vdpa_get_vq_affinity(struct virtio_device *vdev, int index)
+{
+ struct vdpa_device *vdpa = vd_get_vdpa(vdev);
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ if (ops->get_vq_affinity)
+ return ops->get_vq_affinity(vdpa, index);
+
+ return NULL;
+}
+
static const struct virtio_config_ops virtio_vdpa_config_ops = {
.get = virtio_vdpa_get,
.set = virtio_vdpa_set,
@@ -349,6 +375,8 @@ static const struct virtio_config_ops virtio_vdpa_config_ops = {
.get_features = virtio_vdpa_get_features,
.finalize_features = virtio_vdpa_finalize_features,
.bus_name = virtio_vdpa_bus_name,
+ .set_vq_affinity = virtio_vdpa_set_vq_affinity,
+ .get_vq_affinity = virtio_vdpa_get_vq_affinity,
};

static void virtio_vdpa_release_dev(struct device *_d)
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 43f59ef10cc9..d61f369f9cd6 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -250,6 +250,15 @@ struct vdpa_map_file {
* @vdev: vdpa device
* Returns the iova range supported by
* the device.
+ * @set_vq_affinity: Set the irq affinity of virtqueue (optional)
+ * @vdev: vdpa device
+ * @idx: virtqueue index
+ * @cpu_mask: irq affinity mask
+ * Returns integer: success (0) or error (< 0)
+ * @get_vq_affinity: Get the irq affinity of virtqueue (optional)
+ * @vdev: vdpa device
+ * @idx: virtqueue index
+ * Returns the irq affinity mask
* @set_group_asid: Set address space identifier for a
* virtqueue group (optional)
* @vdev: vdpa device
@@ -340,6 +349,10 @@ struct vdpa_config_ops {
const void *buf, unsigned int len);
u32 (*get_generation)(struct vdpa_device *vdev);
struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev);
+ int (*set_vq_affinity)(struct vdpa_device *vdev, u16 idx,
+ const struct cpumask *cpu_mask);
+ const struct cpumask *(*get_vq_affinity)(struct vdpa_device *vdev,
+ u16 idx);

/* DMA ops */
int (*set_map)(struct vdpa_device *vdev, unsigned int asid,
--
2.20.1


2023-02-28 09:43:03

by Yongji Xie

[permalink] [raw]
Subject: [PATCH v3 04/11] vduse: Refactor allocation for vduse virtqueues

Allocate memory for vduse virtqueues one by one instead of
doing one allocation for all of them.

This is a preparation for adding sysfs interface for virtqueues.

Signed-off-by: Xie Yongji <[email protected]>
Acked-by: Jason Wang <[email protected]>
---
drivers/vdpa/vdpa_user/vduse_dev.c | 98 ++++++++++++++++++++----------
1 file changed, 66 insertions(+), 32 deletions(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 0c3b48616a9f..98359d87a06f 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -76,7 +76,7 @@ struct vduse_umem {
struct vduse_dev {
struct vduse_vdpa *vdev;
struct device *dev;
- struct vduse_virtqueue *vqs;
+ struct vduse_virtqueue **vqs;
struct vduse_iova_domain *domain;
char *name;
struct mutex lock;
@@ -434,7 +434,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
flush_work(&dev->inject);

for (i = 0; i < dev->vq_num; i++) {
- struct vduse_virtqueue *vq = &dev->vqs[i];
+ struct vduse_virtqueue *vq = dev->vqs[i];

vq->ready = false;
vq->desc_addr = 0;
@@ -466,7 +466,7 @@ static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
u64 device_area)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];

vq->desc_addr = desc_area;
vq->driver_addr = driver_area;
@@ -500,7 +500,7 @@ static void vduse_vq_kick_work(struct work_struct *work)
static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];

if (!eventfd_signal_allowed()) {
schedule_work(&vq->kick);
@@ -513,7 +513,7 @@ static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
struct vdpa_callback *cb)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];

spin_lock(&vq->irq_lock);
vq->cb.callback = cb->callback;
@@ -524,7 +524,7 @@ static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];

vq->num = num;
}
@@ -533,7 +533,7 @@ static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
u16 idx, bool ready)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];

vq->ready = ready;
}
@@ -541,7 +541,7 @@ static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];

return vq->ready;
}
@@ -550,7 +550,7 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
const struct vdpa_vq_state *state)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];

if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
vq->state.packed.last_avail_counter =
@@ -569,7 +569,7 @@ static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
struct vdpa_vq_state *state)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];

if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
@@ -624,8 +624,8 @@ static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
int i;

for (i = 0; i < dev->vq_num; i++)
- if (num_max < dev->vqs[i].num_max)
- num_max = dev->vqs[i].num_max;
+ if (num_max < dev->vqs[i]->num_max)
+ num_max = dev->vqs[i]->num_max;

return num_max;
}
@@ -863,7 +863,7 @@ static int vduse_kickfd_setup(struct vduse_dev *dev,
return -EINVAL;

index = array_index_nospec(eventfd->index, dev->vq_num);
- vq = &dev->vqs[index];
+ vq = dev->vqs[index];
if (eventfd->fd >= 0) {
ctx = eventfd_ctx_fdget(eventfd->fd);
if (IS_ERR(ctx))
@@ -889,7 +889,7 @@ static bool vduse_dev_is_ready(struct vduse_dev *dev)
int i;

for (i = 0; i < dev->vq_num; i++)
- if (!dev->vqs[i].num_max)
+ if (!dev->vqs[i]->num_max)
return false;

return true;
@@ -1130,7 +1130,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;

index = array_index_nospec(config.index, dev->vq_num);
- dev->vqs[index].num_max = config.max_size;
+ dev->vqs[index]->num_max = config.max_size;
ret = 0;
break;
}
@@ -1148,7 +1148,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;

index = array_index_nospec(vq_info.index, dev->vq_num);
- vq = &dev->vqs[index];
+ vq = dev->vqs[index];
vq_info.desc_addr = vq->desc_addr;
vq_info.driver_addr = vq->driver_addr;
vq_info.device_addr = vq->device_addr;
@@ -1198,7 +1198,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;

index = array_index_nospec(index, dev->vq_num);
- ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
+ ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject);
break;
}
case VDUSE_IOTLB_REG_UMEM: {
@@ -1339,6 +1339,49 @@ static const struct file_operations vduse_dev_fops = {
.llseek = noop_llseek,
};

+static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
+{
+ int i;
+
+ if (!dev->vqs)
+ return;
+
+ for (i = 0; i < dev->vq_num; i++)
+ kfree(dev->vqs[i]);
+ kfree(dev->vqs);
+}
+
+static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
+{
+ int i;
+
+ dev->vq_align = vq_align;
+ dev->vq_num = vq_num;
+ dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
+ if (!dev->vqs)
+ return -ENOMEM;
+
+ for (i = 0; i < vq_num; i++) {
+ dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
+ if (!dev->vqs[i])
+ goto err;
+
+ dev->vqs[i]->index = i;
+ INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
+ INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
+ spin_lock_init(&dev->vqs[i]->kick_lock);
+ spin_lock_init(&dev->vqs[i]->irq_lock);
+ }
+
+ return 0;
+err:
+ while (i--)
+ kfree(dev->vqs[i]);
+ kfree(dev->vqs);
+ dev->vqs = NULL;
+ return -ENOMEM;
+}
+
static struct vduse_dev *vduse_dev_create(void)
{
struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -1396,7 +1439,7 @@ static int vduse_destroy_dev(char *name)
device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
idr_remove(&vduse_idr, dev->minor);
kvfree(dev->config);
- kfree(dev->vqs);
+ vduse_dev_deinit_vqs(dev);
vduse_domain_destroy(dev->domain);
kfree(dev->name);
vduse_dev_destroy(dev);
@@ -1486,7 +1529,7 @@ ATTRIBUTE_GROUPS(vduse_dev);
static int vduse_create_dev(struct vduse_dev_config *config,
void *config_buf, u64 api_version)
{
- int i, ret;
+ int ret;
struct vduse_dev *dev;

ret = -EEXIST;
@@ -1513,19 +1556,10 @@ static int vduse_create_dev(struct vduse_dev_config *config,

dev->config = config_buf;
dev->config_size = config->config_size;
- dev->vq_align = config->vq_align;
- dev->vq_num = config->vq_num;
- dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
- if (!dev->vqs)
- goto err_vqs;

- for (i = 0; i < dev->vq_num; i++) {
- dev->vqs[i].index = i;
- INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
- INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
- spin_lock_init(&dev->vqs[i].kick_lock);
- spin_lock_init(&dev->vqs[i].irq_lock);
- }
+ ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
+ if (ret)
+ goto err_vqs;

ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
if (ret < 0)
@@ -1546,7 +1580,7 @@ static int vduse_create_dev(struct vduse_dev_config *config,
err_dev:
idr_remove(&vduse_idr, dev->minor);
err_idr:
- kfree(dev->vqs);
+ vduse_dev_deinit_vqs(dev);
err_vqs:
vduse_domain_destroy(dev->domain);
err_domain:
--
2.20.1


2023-02-28 09:43:06

by Yongji Xie

[permalink] [raw]
Subject: [PATCH v3 10/11] vduse: Delay iova domain creation

Delay creating iova domain until the vduse device is
registered to vdpa bus.

This is a preparation for adding sysfs interface to
support specifying bounce buffer size for the iova
domain.

Signed-off-by: Xie Yongji <[email protected]>
Acked-by: Jason Wang <[email protected]>
---
drivers/vdpa/vdpa_user/vduse_dev.c | 75 +++++++++++++++++++++---------
1 file changed, 53 insertions(+), 22 deletions(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 56f3c2480c2a..1702565efc82 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -113,6 +113,8 @@ struct vduse_dev {
u32 vq_align;
struct vduse_umem *umem;
struct mutex mem_lock;
+ unsigned int bounce_size;
+ struct mutex domain_lock;
};

struct vduse_dev_msg {
@@ -427,7 +429,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
struct vduse_iova_domain *domain = dev->domain;

/* The coherent mappings are handled in vduse_dev_free_coherent() */
- if (domain->bounce_map)
+ if (domain && domain->bounce_map)
vduse_domain_reset_bounce_map(domain);

down_write(&dev->rwsem);
@@ -1069,6 +1071,9 @@ static int vduse_dev_dereg_umem(struct vduse_dev *dev,
goto unlock;

ret = -EINVAL;
+ if (!dev->domain)
+ goto unlock;
+
if (dev->umem->iova != iova || size != dev->domain->bounce_size)
goto unlock;

@@ -1095,7 +1100,7 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
unsigned long npages, lock_limit;
int ret;

- if (!dev->domain->bounce_map ||
+ if (!dev->domain || !dev->domain->bounce_map ||
size != dev->domain->bounce_size ||
iova != 0 || uaddr & ~PAGE_MASK)
return -EINVAL;
@@ -1185,7 +1190,6 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
struct vduse_iotlb_entry entry;
struct vhost_iotlb_map *map;
struct vdpa_map_file *map_file;
- struct vduse_iova_domain *domain = dev->domain;
struct file *f = NULL;

ret = -EFAULT;
@@ -1196,8 +1200,13 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (entry.start > entry.last)
break;

- spin_lock(&domain->iotlb_lock);
- map = vhost_iotlb_itree_first(domain->iotlb,
+ mutex_lock(&dev->domain_lock);
+ if (!dev->domain) {
+ mutex_unlock(&dev->domain_lock);
+ break;
+ }
+ spin_lock(&dev->domain->iotlb_lock);
+ map = vhost_iotlb_itree_first(dev->domain->iotlb,
entry.start, entry.last);
if (map) {
map_file = (struct vdpa_map_file *)map->opaque;
@@ -1207,7 +1216,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
entry.last = map->last;
entry.perm = map->perm;
}
- spin_unlock(&domain->iotlb_lock);
+ spin_unlock(&dev->domain->iotlb_lock);
+ mutex_unlock(&dev->domain_lock);
ret = -EINVAL;
if (!f)
break;
@@ -1360,8 +1370,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
sizeof(umem.reserved)))
break;

+ mutex_lock(&dev->domain_lock);
ret = vduse_dev_reg_umem(dev, umem.iova,
umem.uaddr, umem.size);
+ mutex_unlock(&dev->domain_lock);
break;
}
case VDUSE_IOTLB_DEREG_UMEM: {
@@ -1375,15 +1387,15 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (!is_mem_zero((const char *)umem.reserved,
sizeof(umem.reserved)))
break;
-
+ mutex_lock(&dev->domain_lock);
ret = vduse_dev_dereg_umem(dev, umem.iova,
umem.size);
+ mutex_unlock(&dev->domain_lock);
break;
}
case VDUSE_IOTLB_GET_INFO: {
struct vduse_iova_info info;
struct vhost_iotlb_map *map;
- struct vduse_iova_domain *domain = dev->domain;

ret = -EFAULT;
if (copy_from_user(&info, argp, sizeof(info)))
@@ -1397,18 +1409,24 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
sizeof(info.reserved)))
break;

- spin_lock(&domain->iotlb_lock);
- map = vhost_iotlb_itree_first(domain->iotlb,
+ mutex_lock(&dev->domain_lock);
+ if (!dev->domain) {
+ mutex_unlock(&dev->domain_lock);
+ break;
+ }
+ spin_lock(&dev->domain->iotlb_lock);
+ map = vhost_iotlb_itree_first(dev->domain->iotlb,
info.start, info.last);
if (map) {
info.start = map->start;
info.last = map->last;
info.capability = 0;
- if (domain->bounce_map && map->start == 0 &&
- map->last == domain->bounce_size - 1)
+ if (dev->domain->bounce_map && map->start == 0 &&
+ map->last == dev->domain->bounce_size - 1)
info.capability |= VDUSE_IOVA_CAP_UMEM;
}
- spin_unlock(&domain->iotlb_lock);
+ spin_unlock(&dev->domain->iotlb_lock);
+ mutex_unlock(&dev->domain_lock);
if (!map)
break;

@@ -1431,7 +1449,10 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
{
struct vduse_dev *dev = file->private_data;

- vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
+ mutex_lock(&dev->domain_lock);
+ if (dev->domain)
+ vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
+ mutex_unlock(&dev->domain_lock);
spin_lock(&dev->msg_lock);
/* Make sure the inflight messages can processed after reconncection */
list_splice_init(&dev->recv_list, &dev->send_list);
@@ -1640,6 +1661,7 @@ static struct vduse_dev *vduse_dev_create(void)

mutex_init(&dev->lock);
mutex_init(&dev->mem_lock);
+ mutex_init(&dev->domain_lock);
spin_lock_init(&dev->msg_lock);
INIT_LIST_HEAD(&dev->send_list);
INIT_LIST_HEAD(&dev->recv_list);
@@ -1689,7 +1711,8 @@ static int vduse_destroy_dev(char *name)
idr_remove(&vduse_idr, dev->minor);
kvfree(dev->config);
vduse_dev_deinit_vqs(dev);
- vduse_domain_destroy(dev->domain);
+ if (dev->domain)
+ vduse_domain_destroy(dev->domain);
kfree(dev->name);
vduse_dev_destroy(dev);
module_put(THIS_MODULE);
@@ -1798,11 +1821,7 @@ static int vduse_create_dev(struct vduse_dev_config *config,
if (!dev->name)
goto err_str;

- dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
- VDUSE_BOUNCE_SIZE);
- if (!dev->domain)
- goto err_domain;
-
+ dev->bounce_size = VDUSE_BOUNCE_SIZE;
dev->config = config_buf;
dev->config_size = config->config_size;

@@ -1832,8 +1851,6 @@ static int vduse_create_dev(struct vduse_dev_config *config,
err_dev:
idr_remove(&vduse_idr, dev->minor);
err_idr:
- vduse_domain_destroy(dev->domain);
-err_domain:
kfree(dev->name);
err_str:
vduse_dev_destroy(dev);
@@ -2000,9 +2017,23 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
if (ret)
return ret;

+ mutex_lock(&dev->domain_lock);
+ if (!dev->domain)
+ dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
+ dev->bounce_size);
+ mutex_unlock(&dev->domain_lock);
+ if (!dev->domain) {
+ put_device(&dev->vdev->vdpa.dev);
+ return -ENOMEM;
+ }
+
ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
if (ret) {
put_device(&dev->vdev->vdpa.dev);
+ mutex_lock(&dev->domain_lock);
+ vduse_domain_destroy(dev->domain);
+ dev->domain = NULL;
+ mutex_unlock(&dev->domain_lock);
return ret;
}

--
2.20.1


2023-02-28 09:43:19

by Yongji Xie

[permalink] [raw]
Subject: [PATCH v3 06/11] vduse: Support set/get_vq_affinity callbacks

Since we already support irq callback affinity management,
let's implement the set/get_vq_affinity callbacks to make
it possible for the virtio device driver to change or be
aware of the affinity. This would make it possible for the
virtio-blk driver to build the blk-mq queues based on the
irq callback affinity.

Signed-off-by: Xie Yongji <[email protected]>
---
drivers/vdpa/vdpa_user/vduse_dev.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index bde28a8692d5..e2988a1476e4 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -775,6 +775,23 @@ create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
return masks;
}

+static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
+ const struct cpumask *cpu_mask)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
+ return 0;
+}
+
+static const struct cpumask *
+vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return &dev->vqs[idx]->irq_affinity;
+}
+
static void vduse_vdpa_set_irq_affinity(struct vdpa_device *vdpa,
struct irq_affinity *desc)
{
@@ -841,6 +858,8 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
.get_config = vduse_vdpa_get_config,
.set_config = vduse_vdpa_set_config,
.get_generation = vduse_vdpa_get_generation,
+ .set_vq_affinity = vduse_vdpa_set_vq_affinity,
+ .get_vq_affinity = vduse_vdpa_get_vq_affinity,
.set_irq_affinity = vduse_vdpa_set_irq_affinity,
.reset = vduse_vdpa_reset,
.set_map = vduse_vdpa_set_map,
--
2.20.1


2023-02-28 09:43:23

by Yongji Xie

[permalink] [raw]
Subject: [PATCH v3 08/11] vdpa: Add eventfd for the vdpa callback

Add eventfd for the vdpa callback so that user
can signal it directly instead of running the
callback. It will be used for vhost-vdpa case.

Signed-off-by: Xie Yongji <[email protected]>
---
drivers/vhost/vdpa.c | 2 ++
drivers/virtio/virtio_vdpa.c | 1 +
include/linux/vdpa.h | 3 +++
3 files changed, 6 insertions(+)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index dc12dbd5b43b..ae89c0ccc2bb 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -599,9 +599,11 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
if (vq->call_ctx.ctx) {
cb.callback = vhost_vdpa_virtqueue_cb;
cb.private = vq;
+ cb.irq_ctx = vq->call_ctx.ctx;
} else {
cb.callback = NULL;
cb.private = NULL;
+ cb.irq_ctx = NULL;
}
ops->set_vq_cb(vdpa, idx, &cb);
vhost_vdpa_setup_vq_irq(v, idx);
diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index 9eee8afabda8..a5cecafbc2d1 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -195,6 +195,7 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
/* Setup virtqueue callback */
cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL;
cb.private = info;
+ cb.irq_ctx = NULL;
ops->set_vq_cb(vdpa, index, &cb);
ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq));

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 10bd22387276..94a7ec49583a 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -13,10 +13,13 @@
* struct vdpa_calllback - vDPA callback definition.
* @callback: interrupt callback function
* @private: the data passed to the callback function
+ * @irq_ctx: the eventfd for the callback, user can signal
+ * it directly instead of running the callback
*/
struct vdpa_callback {
irqreturn_t (*callback)(void *data);
void *private;
+ struct eventfd_ctx *irq_ctx;
};

/**
--
2.20.1


2023-02-28 09:43:33

by Yongji Xie

[permalink] [raw]
Subject: [PATCH v3 09/11] vduse: Signal interrupt's eventfd directly in vhost-vdpa case

Now the vdpa callback will associate an eventfd in
vhost-vdpa case. For performance reasons, VDUSE can
signal it directly during irq injection.

Signed-off-by: Xie Yongji <[email protected]>
---
drivers/vdpa/vdpa_user/vduse_dev.c | 27 +++++++++++++++++++++++----
1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 869cc7860d82..56f3c2480c2a 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -461,6 +461,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
spin_lock(&vq->irq_lock);
vq->cb.callback = NULL;
vq->cb.private = NULL;
+ vq->cb.irq_ctx = NULL;
spin_unlock(&vq->irq_lock);
flush_work(&vq->inject);
flush_work(&vq->kick);
@@ -526,6 +527,7 @@ static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
spin_lock(&vq->irq_lock);
vq->cb.callback = cb->callback;
vq->cb.private = cb->private;
+ vq->cb.irq_ctx = cb->irq_ctx;
spin_unlock(&vq->irq_lock);
}

@@ -1020,6 +1022,20 @@ static void vduse_vq_irq_inject(struct work_struct *work)
spin_unlock_irq(&vq->irq_lock);
}

+static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
+{
+ bool signal = false;
+
+ spin_lock_irq(&vq->irq_lock);
+ if (vq->ready && vq->cb.irq_ctx) {
+ eventfd_signal(vq->cb.irq_ctx, 1);
+ signal = true;
+ }
+ spin_unlock_irq(&vq->irq_lock);
+
+ return signal;
+}
+
static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
struct work_struct *irq_work,
int irq_effective_cpu)
@@ -1322,11 +1338,14 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (index >= dev->vq_num)
break;

+ ret = 0;
index = array_index_nospec(index, dev->vq_num);
-
- vduse_vq_update_effective_cpu(dev->vqs[index]);
- ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
- dev->vqs[index]->irq_effective_cpu);
+ if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
+ vduse_vq_update_effective_cpu(dev->vqs[index]);
+ ret = vduse_dev_queue_irq_work(dev,
+ &dev->vqs[index]->inject,
+ dev->vqs[index]->irq_effective_cpu);
+ }
break;
}
case VDUSE_IOTLB_REG_UMEM: {
--
2.20.1


2023-02-28 09:43:39

by Yongji Xie

[permalink] [raw]
Subject: [PATCH v3 11/11] vduse: Support specifying bounce buffer size via sysfs

As discussed in [1], this adds sysfs interface to support
specifying bounce buffer size in virtio-vdpa case. It would
be a performance tuning parameter for high throughput workloads.

[1] https://lore.kernel.org/netdev/[email protected]/

Signed-off-by: Xie Yongji <[email protected]>
Acked-by: Jason Wang <[email protected]>
---
drivers/vdpa/vdpa_user/vduse_dev.c | 45 +++++++++++++++++++++++++++++-
1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 1702565efc82..a0f796d20027 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -39,8 +39,11 @@
#define DRV_LICENSE "GPL v2"

#define VDUSE_DEV_MAX (1U << MINORBITS)
+#define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
+#define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
-#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
+/* 128 MB reserved for virtqueue creation */
+#define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
#define VDUSE_MSG_DEFAULT_TIMEOUT 30

#define IRQ_UNBOUND -1
@@ -1791,8 +1794,48 @@ static ssize_t msg_timeout_store(struct device *device,

static DEVICE_ATTR_RW(msg_timeout);

+static ssize_t bounce_size_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct vduse_dev *dev = dev_get_drvdata(device);
+
+ return sysfs_emit(buf, "%u\n", dev->bounce_size);
+}
+
+static ssize_t bounce_size_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vduse_dev *dev = dev_get_drvdata(device);
+ unsigned int bounce_size;
+ int ret;
+
+ ret = -EPERM;
+ mutex_lock(&dev->domain_lock);
+ if (dev->domain)
+ goto unlock;
+
+ ret = kstrtouint(buf, 10, &bounce_size);
+ if (ret < 0)
+ goto unlock;
+
+ ret = -EINVAL;
+ if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
+ bounce_size < VDUSE_MIN_BOUNCE_SIZE)
+ goto unlock;
+
+ dev->bounce_size = bounce_size & PAGE_MASK;
+ ret = count;
+unlock:
+ mutex_unlock(&dev->domain_lock);
+ return ret;
+}
+
+static DEVICE_ATTR_RW(bounce_size);
+
static struct attribute *vduse_dev_attrs[] = {
&dev_attr_msg_timeout.attr,
+ &dev_attr_bounce_size.attr,
NULL
};

--
2.20.1


2023-03-10 08:53:10

by Michael S. Tsirkin

[permalink] [raw]
Subject: Re: [PATCH v3 00/11] VDUSE: Improve performance

On Tue, Feb 28, 2023 at 05:40:59PM +0800, Xie Yongji wrote:
> Hi all,
>
> This series introduces some ways to improve VDUSE performance.


Pls fix warnings reported by 0-day infra, dropping this for now.


> Patch 1 ~ 6 bring current interrupt affinity spreading mechanism
> to vduse device and make it possible for the virtio-blk driver
> to build the blk-mq queues based on it. This would be useful to
> mitigate the virtqueue lock contention in virtio-blk driver. In
> our test, with those patches, we could get ~50% improvement (600k
> iops -> 900k iops) when using per-cpu virtqueue.
>
> Patch 7 adds a sysfs interface for each vduse virtqueue to change
> the affinity for IRQ callback. It would be helpful for performance
> tuning when the affinity mask contains more than one CPU.
>
> Patch 8 ~ 9 associate an eventfd to the vdpa callback so that
> we can signal it directly during irq injection without scheduling
> an additional workqueue thread to do that.
>
> Patch 10, 11 add a sysfs interface to support specifying bounce
> buffer size in virtio-vdpa case. The high throughput workloads
> can benefit from it. And we can also use it to reduce the memory
> overhead for small throughput workloads.
>
> Please review, thanks!
>
> V2 to V3:
> - Rebased to newest kernel tree
> - Export group_cpus_evenly() instead of irq_create_affinity_masks() [MST]
> - Remove the sysfs for workqueue control [Jason]
> - Associate an eventfd to the vdpa callback [Jason]
> - Signal the eventfd directly in vhost-vdpa case [Jason]
> - Use round-robin to spread IRQs between CPUs in the affinity mask [Jason]
> - Handle the cpu hotplug case on IRQ injection [Jason]
> - Remove effective IRQ affinity and balance mechanism for IRQ allocation
>
> V1 to V2:
> - Export irq_create_affinity_masks()
> - Add set/get_vq_affinity and set_irq_affinity callbacks in vDPA
> framework
> - Add automatic irq callback affinity support in VDUSE driver [Jason]
> - Add more backgrounds information in commit log [Jason]
> - Only support changing effective affinity when the value is a subset
> of the IRQ callback affinity mask
>
> Xie Yongji (11):
> lib/group_cpus: Export group_cpus_evenly()
> vdpa: Add set/get_vq_affinity callbacks in vdpa_config_ops
> vdpa: Add set_irq_affinity callback in vdpa_config_ops
> vduse: Refactor allocation for vduse virtqueues
> vduse: Support automatic irq callback affinity
> vduse: Support set/get_vq_affinity callbacks
> vduse: Add sysfs interface for irq callback affinity
> vdpa: Add eventfd for the vdpa callback
> vduse: Signal interrupt's eventfd directly in vhost-vdpa case
> vduse: Delay iova domain creation
> vduse: Support specifying bounce buffer size via sysfs
>
> drivers/vdpa/vdpa_user/vduse_dev.c | 490 +++++++++++++++++++++++++----
> drivers/vhost/vdpa.c | 2 +
> drivers/virtio/virtio_vdpa.c | 33 ++
> include/linux/vdpa.h | 25 ++
> lib/group_cpus.c | 1 +
> 5 files changed, 488 insertions(+), 63 deletions(-)
>
> --
> 2.20.1


2023-03-10 09:44:08

by Jason Wang

[permalink] [raw]
Subject: Re: [PATCH v3 00/11] VDUSE: Improve performance

On Fri, Mar 10, 2023 at 4:50 PM Michael S. Tsirkin <[email protected]> wrote:
>
> On Tue, Feb 28, 2023 at 05:40:59PM +0800, Xie Yongji wrote:
> > Hi all,
> >
> > This series introduces some ways to improve VDUSE performance.
>
>
> Pls fix warnings reported by 0-day infra, dropping this for now.

Note that I plan to review this next week.

Thanks

>
>
> > Patch 1 ~ 6 bring current interrupt affinity spreading mechanism
> > to vduse device and make it possible for the virtio-blk driver
> > to build the blk-mq queues based on it. This would be useful to
> > mitigate the virtqueue lock contention in virtio-blk driver. In
> > our test, with those patches, we could get ~50% improvement (600k
> > iops -> 900k iops) when using per-cpu virtqueue.
> >
> > Patch 7 adds a sysfs interface for each vduse virtqueue to change
> > the affinity for IRQ callback. It would be helpful for performance
> > tuning when the affinity mask contains more than one CPU.
> >
> > Patch 8 ~ 9 associate an eventfd to the vdpa callback so that
> > we can signal it directly during irq injection without scheduling
> > an additional workqueue thread to do that.
> >
> > Patch 10, 11 add a sysfs interface to support specifying bounce
> > buffer size in virtio-vdpa case. The high throughput workloads
> > can benefit from it. And we can also use it to reduce the memory
> > overhead for small throughput workloads.
> >
> > Please review, thanks!
> >
> > V2 to V3:
> > - Rebased to newest kernel tree
> > - Export group_cpus_evenly() instead of irq_create_affinity_masks() [MST]
> > - Remove the sysfs for workqueue control [Jason]
> > - Associate an eventfd to the vdpa callback [Jason]
> > - Signal the eventfd directly in vhost-vdpa case [Jason]
> > - Use round-robin to spread IRQs between CPUs in the affinity mask [Jason]
> > - Handle the cpu hotplug case on IRQ injection [Jason]
> > - Remove effective IRQ affinity and balance mechanism for IRQ allocation
> >
> > V1 to V2:
> > - Export irq_create_affinity_masks()
> > - Add set/get_vq_affinity and set_irq_affinity callbacks in vDPA
> > framework
> > - Add automatic irq callback affinity support in VDUSE driver [Jason]
> > - Add more backgrounds information in commit log [Jason]
> > - Only support changing effective affinity when the value is a subset
> > of the IRQ callback affinity mask
> >
> > Xie Yongji (11):
> > lib/group_cpus: Export group_cpus_evenly()
> > vdpa: Add set/get_vq_affinity callbacks in vdpa_config_ops
> > vdpa: Add set_irq_affinity callback in vdpa_config_ops
> > vduse: Refactor allocation for vduse virtqueues
> > vduse: Support automatic irq callback affinity
> > vduse: Support set/get_vq_affinity callbacks
> > vduse: Add sysfs interface for irq callback affinity
> > vdpa: Add eventfd for the vdpa callback
> > vduse: Signal interrupt's eventfd directly in vhost-vdpa case
> > vduse: Delay iova domain creation
> > vduse: Support specifying bounce buffer size via sysfs
> >
> > drivers/vdpa/vdpa_user/vduse_dev.c | 490 +++++++++++++++++++++++++----
> > drivers/vhost/vdpa.c | 2 +
> > drivers/virtio/virtio_vdpa.c | 33 ++
> > include/linux/vdpa.h | 25 ++
> > lib/group_cpus.c | 1 +
> > 5 files changed, 488 insertions(+), 63 deletions(-)
> >
> > --
> > 2.20.1
>


2023-03-16 03:30:17

by Jason Wang

[permalink] [raw]
Subject: Re: [PATCH v3 02/11] vdpa: Add set/get_vq_affinity callbacks in vdpa_config_ops


在 2023/2/28 17:41, Xie Yongji 写道:
> This introduces set/get_vq_affinity callbacks in
> vdpa_config_ops to support interrupt affinity
> management for vdpa device drivers.
>
> Signed-off-by: Xie Yongji <[email protected]>
> Acked-by: Jason Wang <[email protected]>
> ---
> drivers/virtio/virtio_vdpa.c | 28 ++++++++++++++++++++++++++++
> include/linux/vdpa.h | 13 +++++++++++++
> 2 files changed, 41 insertions(+)
>
> diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
> index d7f5af62ddaa..f72696b4c1c2 100644
> --- a/drivers/virtio/virtio_vdpa.c
> +++ b/drivers/virtio/virtio_vdpa.c
> @@ -337,6 +337,32 @@ static const char *virtio_vdpa_bus_name(struct virtio_device *vdev)
> return dev_name(&vdpa->dev);
> }
>
> +static int virtio_vdpa_set_vq_affinity(struct virtqueue *vq,
> + const struct cpumask *cpu_mask)
> +{
> + struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vq->vdev);
> + struct vdpa_device *vdpa = vd_dev->vdpa;
> + const struct vdpa_config_ops *ops = vdpa->config;
> + unsigned int index = vq->index;
> +
> + if (ops->set_vq_affinity)
> + return ops->set_vq_affinity(vdpa, index, cpu_mask);
> +
> + return 0;
> +}
> +
> +static const struct cpumask *
> +virtio_vdpa_get_vq_affinity(struct virtio_device *vdev, int index)
> +{
> + struct vdpa_device *vdpa = vd_get_vdpa(vdev);
> + const struct vdpa_config_ops *ops = vdpa->config;
> +
> + if (ops->get_vq_affinity)
> + return ops->get_vq_affinity(vdpa, index);
> +
> + return NULL;
> +}
> +
> static const struct virtio_config_ops virtio_vdpa_config_ops = {
> .get = virtio_vdpa_get,
> .set = virtio_vdpa_set,
> @@ -349,6 +375,8 @@ static const struct virtio_config_ops virtio_vdpa_config_ops = {
> .get_features = virtio_vdpa_get_features,
> .finalize_features = virtio_vdpa_finalize_features,
> .bus_name = virtio_vdpa_bus_name,
> + .set_vq_affinity = virtio_vdpa_set_vq_affinity,
> + .get_vq_affinity = virtio_vdpa_get_vq_affinity,
> };
>
> static void virtio_vdpa_release_dev(struct device *_d)
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index 43f59ef10cc9..d61f369f9cd6 100644
> --- a/include/linux/vdpa.h
> +++ b/include/linux/vdpa.h
> @@ -250,6 +250,15 @@ struct vdpa_map_file {
> * @vdev: vdpa device
> * Returns the iova range supported by
> * the device.
> + * @set_vq_affinity: Set the irq affinity of virtqueue (optional)


Nit: it's better not mention IRQ here because the virtqueue notification
is not necessarily backed on IRQ.

Thanks


> + * @vdev: vdpa device
> + * @idx: virtqueue index
> + * @cpu_mask: irq affinity mask
> + * Returns integer: success (0) or error (< 0)
> + * @get_vq_affinity: Get the irq affinity of virtqueue (optional)
> + * @vdev: vdpa device
> + * @idx: virtqueue index
> + * Returns the irq affinity mask
> * @set_group_asid: Set address space identifier for a
> * virtqueue group (optional)
> * @vdev: vdpa device
> @@ -340,6 +349,10 @@ struct vdpa_config_ops {
> const void *buf, unsigned int len);
> u32 (*get_generation)(struct vdpa_device *vdev);
> struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev);
> + int (*set_vq_affinity)(struct vdpa_device *vdev, u16 idx,
> + const struct cpumask *cpu_mask);
> + const struct cpumask *(*get_vq_affinity)(struct vdpa_device *vdev,
> + u16 idx);
>
> /* DMA ops */
> int (*set_map)(struct vdpa_device *vdev, unsigned int asid,


2023-03-16 09:26:39

by Jason Wang

[permalink] [raw]
Subject: Re: [PATCH v3 08/11] vdpa: Add eventfd for the vdpa callback


在 2023/2/28 17:41, Xie Yongji 写道:
> Add eventfd for the vdpa callback so that user
> can signal it directly instead of running the
> callback. It will be used for vhost-vdpa case.
>
> Signed-off-by: Xie Yongji <[email protected]>
> ---
> drivers/vhost/vdpa.c | 2 ++
> drivers/virtio/virtio_vdpa.c | 1 +
> include/linux/vdpa.h | 3 +++
> 3 files changed, 6 insertions(+)
>
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index dc12dbd5b43b..ae89c0ccc2bb 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -599,9 +599,11 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> if (vq->call_ctx.ctx) {
> cb.callback = vhost_vdpa_virtqueue_cb;
> cb.private = vq;
> + cb.irq_ctx = vq->call_ctx.ctx;
> } else {
> cb.callback = NULL;
> cb.private = NULL;
> + cb.irq_ctx = NULL;
> }
> ops->set_vq_cb(vdpa, idx, &cb);
> vhost_vdpa_setup_vq_irq(v, idx);
> diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
> index 9eee8afabda8..a5cecafbc2d1 100644
> --- a/drivers/virtio/virtio_vdpa.c
> +++ b/drivers/virtio/virtio_vdpa.c
> @@ -195,6 +195,7 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
> /* Setup virtqueue callback */
> cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL;
> cb.private = info;
> + cb.irq_ctx = NULL;
> ops->set_vq_cb(vdpa, index, &cb);
> ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq));
>
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index 10bd22387276..94a7ec49583a 100644
> --- a/include/linux/vdpa.h
> +++ b/include/linux/vdpa.h
> @@ -13,10 +13,13 @@
> * struct vdpa_calllback - vDPA callback definition.
> * @callback: interrupt callback function
> * @private: the data passed to the callback function
> + * @irq_ctx: the eventfd for the callback, user can signal
> + * it directly instead of running the callback


I'd suggest to do more tweaks to mention:

1) irq_ctx is optional
2) that when the irq_ctx is set, the vDPA driver must guarantee that
signaling it is functional equivalent to triggering the callback. When
set, vDPA parent can signal it directly instead of triggering the callback.

> */
> struct vdpa_callback {
> irqreturn_t (*callback)(void *data);
> void *private;
> + struct eventfd_ctx *irq_ctx;


There's no IRQ concept at the virtual vDPA bus level, so it's probably
better to rename it as "trigger".

Btw, should we select EVENTFD for vDPA?

Thanks


> };
>
> /**


2023-03-16 09:31:38

by Jason Wang

[permalink] [raw]
Subject: Re: [PATCH v3 09/11] vduse: Signal interrupt's eventfd directly in vhost-vdpa case

On Tue, Feb 28, 2023 at 5:42 PM Xie Yongji <[email protected]> wrote:
>
> Now the vdpa callback will associate an eventfd in
> vhost-vdpa case.

I'd suggest avoiding mentioning drivers since vDPA parents should not
know which vDPA driver is bound.

We could say "signal vq trigger eventfd directly if possible"?

With those tweaked.

Acked-by: Jason Wang <[email protected]>

Thanks

> For performance reasons, VDUSE can
> signal it directly during irq injection.
>
> Signed-off-by: Xie Yongji <[email protected]>
> ---
> drivers/vdpa/vdpa_user/vduse_dev.c | 27 +++++++++++++++++++++++----
> 1 file changed, 23 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> index 869cc7860d82..56f3c2480c2a 100644
> --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> @@ -461,6 +461,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
> spin_lock(&vq->irq_lock);
> vq->cb.callback = NULL;
> vq->cb.private = NULL;
> + vq->cb.irq_ctx = NULL;
> spin_unlock(&vq->irq_lock);
> flush_work(&vq->inject);
> flush_work(&vq->kick);
> @@ -526,6 +527,7 @@ static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
> spin_lock(&vq->irq_lock);
> vq->cb.callback = cb->callback;
> vq->cb.private = cb->private;
> + vq->cb.irq_ctx = cb->irq_ctx;
> spin_unlock(&vq->irq_lock);
> }
>
> @@ -1020,6 +1022,20 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> spin_unlock_irq(&vq->irq_lock);
> }
>
> +static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
> +{
> + bool signal = false;
> +
> + spin_lock_irq(&vq->irq_lock);
> + if (vq->ready && vq->cb.irq_ctx) {
> + eventfd_signal(vq->cb.irq_ctx, 1);
> + signal = true;
> + }
> + spin_unlock_irq(&vq->irq_lock);
> +
> + return signal;
> +}
> +
> static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> struct work_struct *irq_work,
> int irq_effective_cpu)
> @@ -1322,11 +1338,14 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> if (index >= dev->vq_num)
> break;
>
> + ret = 0;
> index = array_index_nospec(index, dev->vq_num);
> -
> - vduse_vq_update_effective_cpu(dev->vqs[index]);
> - ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
> - dev->vqs[index]->irq_effective_cpu);
> + if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
> + vduse_vq_update_effective_cpu(dev->vqs[index]);
> + ret = vduse_dev_queue_irq_work(dev,
> + &dev->vqs[index]->inject,
> + dev->vqs[index]->irq_effective_cpu);
> + }
> break;
> }
> case VDUSE_IOTLB_REG_UMEM: {
> --
> 2.20.1
>


2023-03-16 09:41:37

by Jason Wang

[permalink] [raw]
Subject: Re: [PATCH v3 08/11] vdpa: Add eventfd for the vdpa callback

On Thu, Mar 16, 2023 at 5:25 PM Jason Wang <[email protected]> wrote:
>
>
> 在 2023/2/28 17:41, Xie Yongji 写道:
> > Add eventfd for the vdpa callback so that user
> > can signal it directly instead of running the
> > callback. It will be used for vhost-vdpa case.
> >
> > Signed-off-by: Xie Yongji <[email protected]>
> > ---
> > drivers/vhost/vdpa.c | 2 ++
> > drivers/virtio/virtio_vdpa.c | 1 +
> > include/linux/vdpa.h | 3 +++
> > 3 files changed, 6 insertions(+)
> >
> > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > index dc12dbd5b43b..ae89c0ccc2bb 100644
> > --- a/drivers/vhost/vdpa.c
> > +++ b/drivers/vhost/vdpa.c
> > @@ -599,9 +599,11 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> > if (vq->call_ctx.ctx) {
> > cb.callback = vhost_vdpa_virtqueue_cb;
> > cb.private = vq;
> > + cb.irq_ctx = vq->call_ctx.ctx;
> > } else {
> > cb.callback = NULL;
> > cb.private = NULL;
> > + cb.irq_ctx = NULL;
> > }
> > ops->set_vq_cb(vdpa, idx, &cb);
> > vhost_vdpa_setup_vq_irq(v, idx);
> > diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
> > index 9eee8afabda8..a5cecafbc2d1 100644
> > --- a/drivers/virtio/virtio_vdpa.c
> > +++ b/drivers/virtio/virtio_vdpa.c
> > @@ -195,6 +195,7 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
> > /* Setup virtqueue callback */
> > cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL;
> > cb.private = info;
> > + cb.irq_ctx = NULL;
> > ops->set_vq_cb(vdpa, index, &cb);
> > ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq));
> >
> > diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> > index 10bd22387276..94a7ec49583a 100644
> > --- a/include/linux/vdpa.h
> > +++ b/include/linux/vdpa.h
> > @@ -13,10 +13,13 @@
> > * struct vdpa_calllback - vDPA callback definition.
> > * @callback: interrupt callback function
> > * @private: the data passed to the callback function
> > + * @irq_ctx: the eventfd for the callback, user can signal
> > + * it directly instead of running the callback
>
>
> I'd suggest to do more tweaks to mention:
>
> 1) irq_ctx is optional
> 2) that when the irq_ctx is set, the vDPA driver must guarantee that
> signaling it is functional equivalent to triggering the callback. When
> set, vDPA parent can signal it directly instead of triggering the callback.
>
> > */
> > struct vdpa_callback {
> > irqreturn_t (*callback)(void *data);
> > void *private;
> > + struct eventfd_ctx *irq_ctx;
>
>
> There's no IRQ concept at the virtual vDPA bus level, so it's probably
> better to rename it as "trigger".
>
> Btw, should we select EVENTFD for vDPA?

Looks like we are fine here since we only use the pointer to the eventfd_ctx.

Thanks

>
> Thanks
>
>
> > };
> >
> > /**


2023-03-17 06:57:42

by Yongji Xie

[permalink] [raw]
Subject: Re: [PATCH v3 08/11] vdpa: Add eventfd for the vdpa callback

On Thu, Mar 16, 2023 at 5:26 PM Jason Wang <[email protected]> wrote:
>
>
> 在 2023/2/28 17:41, Xie Yongji 写道:
> > Add eventfd for the vdpa callback so that user
> > can signal it directly instead of running the
> > callback. It will be used for vhost-vdpa case.
> >
> > Signed-off-by: Xie Yongji <[email protected]>
> > ---
> > drivers/vhost/vdpa.c | 2 ++
> > drivers/virtio/virtio_vdpa.c | 1 +
> > include/linux/vdpa.h | 3 +++
> > 3 files changed, 6 insertions(+)
> >
> > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > index dc12dbd5b43b..ae89c0ccc2bb 100644
> > --- a/drivers/vhost/vdpa.c
> > +++ b/drivers/vhost/vdpa.c
> > @@ -599,9 +599,11 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> > if (vq->call_ctx.ctx) {
> > cb.callback = vhost_vdpa_virtqueue_cb;
> > cb.private = vq;
> > + cb.irq_ctx = vq->call_ctx.ctx;
> > } else {
> > cb.callback = NULL;
> > cb.private = NULL;
> > + cb.irq_ctx = NULL;
> > }
> > ops->set_vq_cb(vdpa, idx, &cb);
> > vhost_vdpa_setup_vq_irq(v, idx);
> > diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
> > index 9eee8afabda8..a5cecafbc2d1 100644
> > --- a/drivers/virtio/virtio_vdpa.c
> > +++ b/drivers/virtio/virtio_vdpa.c
> > @@ -195,6 +195,7 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
> > /* Setup virtqueue callback */
> > cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL;
> > cb.private = info;
> > + cb.irq_ctx = NULL;
> > ops->set_vq_cb(vdpa, index, &cb);
> > ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq));
> >
> > diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> > index 10bd22387276..94a7ec49583a 100644
> > --- a/include/linux/vdpa.h
> > +++ b/include/linux/vdpa.h
> > @@ -13,10 +13,13 @@
> > * struct vdpa_calllback - vDPA callback definition.
> > * @callback: interrupt callback function
> > * @private: the data passed to the callback function
> > + * @irq_ctx: the eventfd for the callback, user can signal
> > + * it directly instead of running the callback
>
>
> I'd suggest to do more tweaks to mention:
>
> 1) irq_ctx is optional
> 2) that when the irq_ctx is set, the vDPA driver must guarantee that
> signaling it is functional equivalent to triggering the callback. When
> set, vDPA parent can signal it directly instead of triggering the callback.
>

OK, I will add more comments for it.

> > */
> > struct vdpa_callback {
> > irqreturn_t (*callback)(void *data);
> > void *private;
> > + struct eventfd_ctx *irq_ctx;
>
>
> There's no IRQ concept at the virtual vDPA bus level, so it's probably
> better to rename it as "trigger".
>

LGTM.

Thanks,
Yongji

2023-03-17 07:01:51

by Yongji Xie

[permalink] [raw]
Subject: Re: [PATCH v3 09/11] vduse: Signal interrupt's eventfd directly in vhost-vdpa case

On Thu, Mar 16, 2023 at 5:31 PM Jason Wang <[email protected]> wrote:
>
> On Tue, Feb 28, 2023 at 5:42 PM Xie Yongji <[email protected]> wrote:
> >
> > Now the vdpa callback will associate an eventfd in
> > vhost-vdpa case.
>
> I'd suggest avoiding mentioning drivers since vDPA parents should not
> know which vDPA driver is bound.
>
> We could say "signal vq trigger eventfd directly if possible"?
>

It makes sense to me.

Thanks,
Yongji

2023-03-17 07:10:25

by Yongji Xie

[permalink] [raw]
Subject: Re: [PATCH v3 02/11] vdpa: Add set/get_vq_affinity callbacks in vdpa_config_ops

On Thu, Mar 16, 2023 at 11:27 AM Jason Wang <[email protected]> wrote:
>
>
> 在 2023/2/28 17:41, Xie Yongji 写道:
> > This introduces set/get_vq_affinity callbacks in
> > vdpa_config_ops to support interrupt affinity
> > management for vdpa device drivers.
> >
> > Signed-off-by: Xie Yongji <[email protected]>
> > Acked-by: Jason Wang <[email protected]>
> > ---
> > drivers/virtio/virtio_vdpa.c | 28 ++++++++++++++++++++++++++++
> > include/linux/vdpa.h | 13 +++++++++++++
> > 2 files changed, 41 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
> > index d7f5af62ddaa..f72696b4c1c2 100644
> > --- a/drivers/virtio/virtio_vdpa.c
> > +++ b/drivers/virtio/virtio_vdpa.c
> > @@ -337,6 +337,32 @@ static const char *virtio_vdpa_bus_name(struct virtio_device *vdev)
> > return dev_name(&vdpa->dev);
> > }
> >
> > +static int virtio_vdpa_set_vq_affinity(struct virtqueue *vq,
> > + const struct cpumask *cpu_mask)
> > +{
> > + struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vq->vdev);
> > + struct vdpa_device *vdpa = vd_dev->vdpa;
> > + const struct vdpa_config_ops *ops = vdpa->config;
> > + unsigned int index = vq->index;
> > +
> > + if (ops->set_vq_affinity)
> > + return ops->set_vq_affinity(vdpa, index, cpu_mask);
> > +
> > + return 0;
> > +}
> > +
> > +static const struct cpumask *
> > +virtio_vdpa_get_vq_affinity(struct virtio_device *vdev, int index)
> > +{
> > + struct vdpa_device *vdpa = vd_get_vdpa(vdev);
> > + const struct vdpa_config_ops *ops = vdpa->config;
> > +
> > + if (ops->get_vq_affinity)
> > + return ops->get_vq_affinity(vdpa, index);
> > +
> > + return NULL;
> > +}
> > +
> > static const struct virtio_config_ops virtio_vdpa_config_ops = {
> > .get = virtio_vdpa_get,
> > .set = virtio_vdpa_set,
> > @@ -349,6 +375,8 @@ static const struct virtio_config_ops virtio_vdpa_config_ops = {
> > .get_features = virtio_vdpa_get_features,
> > .finalize_features = virtio_vdpa_finalize_features,
> > .bus_name = virtio_vdpa_bus_name,
> > + .set_vq_affinity = virtio_vdpa_set_vq_affinity,
> > + .get_vq_affinity = virtio_vdpa_get_vq_affinity,
> > };
> >
> > static void virtio_vdpa_release_dev(struct device *_d)
> > diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> > index 43f59ef10cc9..d61f369f9cd6 100644
> > --- a/include/linux/vdpa.h
> > +++ b/include/linux/vdpa.h
> > @@ -250,6 +250,15 @@ struct vdpa_map_file {
> > * @vdev: vdpa device
> > * Returns the iova range supported by
> > * the device.
> > + * @set_vq_affinity: Set the irq affinity of virtqueue (optional)
>
>
> Nit: it's better not mention IRQ here because the virtqueue notification
> is not necessarily backed on IRQ.
>

OK.

Thanks,
Yongji