2022-04-24 08:51:34

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 00/16] virtio_ring: resize support re-use the old buffers

This patch set is based on "virtio pci support VIRTIO_F_RING_RESET". This patch
set is rfc since it hasn't been merged yet. And "virtio pci support
VIRTIO_F_RING_RESET" is currently waiting for Jason Wang's "rework on the IRQ
hardening of virtio".

This patch set implements the reuse of buffers committed before resize.
And it is resubmitted to the new vq in the order of the original submission.

A core idea is to detach the original vring after the new vring is allocated.
Then, the buffers are sequentially obtained from the old vring and submitted to
the new vq.

Please review.

Xuan Zhuo (16):
virtio_ring: split: vring_unmap_one_split() get extra by arg
virtio_ring: split: introduce vring_virtqueue_detach_split()
virtio_ring: split: extract virtqueue_update_split()
virtio_ring: split: extract detach_from_vring_split()
virtio_ring: split: support copy from vring
virtio_ring: split: introduce vring_reuse_bufs_split()
virtio_ring: split: resize support re-use buffers
virtio_ring: packed: extract next_idx()
virtio_ring: packed: always update desc_extra
virtio_ring: packed: introduce vring_virtqueue_detach_packed()
virtio_ring: packed: extract virtqueue_update_packed()
virtio_ring: packed: extract detach_from_vring_packed()
virtio_ring: packed: support copy from vring
virtio_ring: packed: introduce vring_reuse_bufs_packed()
virtio_ring: packed: resize support re-use buffers
virtio_ring: virtqueue_resize() no longer call recycle() directly

drivers/virtio/virtio_ring.c | 685 ++++++++++++++++++++++++++---------
1 file changed, 518 insertions(+), 167 deletions(-)

--
2.31.0


2022-04-24 08:53:16

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 02/16] virtio_ring: split: introduce vring_virtqueue_detach_split()

The function vring_virtqueue_detach_split() is introduced to detach the
vring of the current vq.

num_left records how many buffers there are, which can be used to check
the recovery of buffers completed.

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 0a2c58557027..f3ad9322b512 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -112,6 +112,8 @@ struct vring_virtqueue_split {
*/
u32 vring_align;
bool may_reduce_num;
+
+ u32 num_left;
};

struct vring_virtqueue_packed {
@@ -982,6 +984,21 @@ static void virtqueue_reinit_split(struct vring_virtqueue *vq)
virtqueue_vring_init_split(vq);
}

+static void virtqueue_vring_detach_split(struct vring_virtqueue *vq,
+ struct vring_virtqueue_split *vring)
+{
+ vring->vring = vq->split.vring;
+
+ vring->queue_dma_addr = vq->split.queue_dma_addr;
+
+ vring->queue_size_in_bytes = vq->split.queue_size_in_bytes;
+
+ vring->desc_extra = vq->split.desc_extra;
+ vring->desc_state = vq->split.desc_state;
+
+ vring->num_left = vring->vring.num - vq->vq.num_free;
+}
+
static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
struct vring_virtqueue_split *vring)
{
--
2.31.0

2022-04-24 10:48:55

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 03/16] virtio_ring: split: extract virtqueue_update_split()

Separate the logic for updating the vq state from virtqueue_add_split().

In this way, when the subsequent patch implements the logic of reusing
the buffer when resize, we can share this function.

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 95 ++++++++++++++++++++----------------
1 file changed, 54 insertions(+), 41 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index f3ad9322b512..6fd45c9a3517 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -507,6 +507,51 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
return next;
}

+static inline void virtqueue_update_split(struct vring_virtqueue *vq,
+ u32 descs_used,
+ u32 next,
+ struct vring_desc *desc,
+ void *data)
+{
+ struct virtqueue *_vq = &vq->vq;
+ u32 avail, head;
+
+ head = vq->free_head;
+
+ /* We're using some buffers from the free list. */
+ vq->vq.num_free -= descs_used;
+
+ /* Update free pointer */
+ vq->free_head = next;
+
+ /* Store token and indirect buffer state. */
+ vq->split.desc_state[head].data = data;
+ vq->split.desc_state[head].indir_desc = desc;
+
+ /* Put entry in available array (but don't update avail->idx until they
+ * do sync).
+ */
+ avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
+ vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
+
+ /* Descriptors and available array need to be set before we expose the
+ * new available array entries.
+ */
+ virtio_wmb(vq->weak_barriers);
+ vq->split.avail_idx_shadow++;
+ vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
+ vq->split.avail_idx_shadow);
+ vq->num_added++;
+
+ pr_debug("Added buffer head %i to %p\n", head, vq);
+
+ /* This is very unlikely, but theoretically possible. Kick
+ * just in case.
+ */
+ if (unlikely(vq->num_added == (1 << 16) - 1))
+ virtqueue_kick(_vq);
+}
+
static inline int virtqueue_add_split(struct virtqueue *_vq,
struct scatterlist *sgs[],
unsigned int total_sg,
@@ -519,7 +564,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
struct vring_virtqueue *vq = to_vvq(_vq);
struct scatterlist *sg;
struct vring_desc *desc;
- unsigned int i, n, avail, descs_used, prev, err_idx;
+ unsigned int i, n, descs_used, prev, err_idx;
int head;
bool indirect;

@@ -619,50 +664,18 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
if (vring_mapping_error(vq, addr))
goto unmap_release;

- virtqueue_add_desc_split(_vq, vq->split.vring.desc,
- head, addr,
- total_sg * sizeof(struct vring_desc),
- VRING_DESC_F_INDIRECT,
- false);
+ i = virtqueue_add_desc_split(_vq, vq->split.vring.desc,
+ head, addr,
+ total_sg * sizeof(struct vring_desc),
+ VRING_DESC_F_INDIRECT,
+ false);
+ } else {
+ desc = ctx;
}

- /* We're using some buffers from the free list. */
- vq->vq.num_free -= descs_used;
-
- /* Update free pointer */
- if (indirect)
- vq->free_head = vq->split.desc_extra[head].next;
- else
- vq->free_head = i;
-
- /* Store token and indirect buffer state. */
- vq->split.desc_state[head].data = data;
- if (indirect)
- vq->split.desc_state[head].indir_desc = desc;
- else
- vq->split.desc_state[head].indir_desc = ctx;
-
- /* Put entry in available array (but don't update avail->idx until they
- * do sync). */
- avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
- vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
-
- /* Descriptors and available array need to be set before we expose the
- * new available array entries. */
- virtio_wmb(vq->weak_barriers);
- vq->split.avail_idx_shadow++;
- vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
- vq->split.avail_idx_shadow);
- vq->num_added++;
-
- pr_debug("Added buffer head %i to %p\n", head, vq);
+ virtqueue_update_split(vq, descs_used, i, desc, data);
END_USE(vq);

- /* This is very unlikely, but theoretically possible. Kick
- * just in case. */
- if (unlikely(vq->num_added == (1 << 16) - 1))
- virtqueue_kick(_vq);
-
return 0;

unmap_release:
--
2.31.0

2022-04-24 11:47:54

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 01/16] virtio_ring: split: vring_unmap_one_split() get extra by arg

vring_unmap_one_split() is changed to pass the extra from the argument
instead of getting it from vq. The purpose of this is that this function
can be used later to unmap the extra from the detached vring.

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 0516fdbd070e..0a2c58557027 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -426,32 +426,31 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
}

static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
- unsigned int i)
+ struct vring_desc_extra *extra)
{
- struct vring_desc_extra *extra = vq->split.desc_extra;
u16 flags;

if (!vq->use_dma_api)
goto out;

- flags = extra[i].flags;
+ flags = extra->flags;

if (flags & VRING_DESC_F_INDIRECT) {
dma_unmap_single(vring_dma_dev(vq),
- extra[i].addr,
- extra[i].len,
+ extra->addr,
+ extra->len,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
dma_unmap_page(vring_dma_dev(vq),
- extra[i].addr,
- extra[i].len,
+ extra->addr,
+ extra->len,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
}

out:
- return extra[i].next;
+ return extra->next;
}

static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
@@ -679,7 +678,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
vring_unmap_one_split_indirect(vq, &desc[i]);
i = virtio16_to_cpu(_vq->vdev, desc[i].next);
} else
- i = vring_unmap_one_split(vq, i);
+ i = vring_unmap_one_split(vq, &vq->split.desc_extra[i]);
}

if (indirect)
@@ -733,12 +732,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
i = head;

while (vq->split.vring.desc[i].flags & nextflag) {
- vring_unmap_one_split(vq, i);
+ vring_unmap_one_split(vq, &vq->split.desc_extra[i]);
i = vq->split.desc_extra[i].next;
vq->vq.num_free++;
}

- vring_unmap_one_split(vq, i);
+ vring_unmap_one_split(vq, &vq->split.desc_extra[i]);
vq->split.desc_extra[i].next = vq->free_head;
vq->free_head = head;

--
2.31.0

2022-04-24 12:11:15

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 14/16] virtio_ring: packed: introduce vring_reuse_bufs_packed()

vring_reuse_bufs_packed() checks vring.desc forward based on
last_used_idx. The resulting buffers are the order in which they were
committed.

It is beneficial to ensure the order of buffers in the process of reuse.
For example, under virtio-net, if the order is not guaranteed, it may
lead to out-of-order tcp streams.

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 45 ++++++++++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 8ca9985ffb4b..66f71e22ece0 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2041,6 +2041,51 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
return true;
}

+static void vring_reuse_bufs_packed(struct vring_virtqueue *vq,
+ struct vring_virtqueue_packed *vring,
+ void (*recycle)(struct virtqueue *vq, void *buf))
+{
+ struct vring_desc_state_packed *state;
+ u32 last_used, id, desc_num = 0;
+ int err = 0;
+ void *buf;
+
+ last_used = vring->last_used_idx;
+
+ do {
+ id = le16_to_cpu(vring->vring.desc[last_used].id);
+
+ state = &vring->desc_state[id];
+
+ if (!state->data) {
+ last_used++;
+ goto next;
+ }
+
+ /* once add to vq fail, no more try add to vq. */
+ if (err >= 0) {
+ err = vring_copy_to_vq_packed(vq, vring, id);
+ if (err >= 0)
+ goto ok;
+ }
+
+ buf = state->data;
+ detach_buf_from_vring_packed(vring, vq, id, 0, NULL);
+ recycle(&vq->vq, buf);
+
+ok:
+ last_used += state->num;
+ desc_num += state->num;
+
+next:
+ if (unlikely(last_used >= vring->vring.num))
+ last_used -= vring->vring.num;
+
+ } while (last_used != vring->last_used_idx);
+
+ WARN_ON(vring->num_left != desc_num);
+}
+
static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
--
2.31.0

2022-04-24 14:31:22

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 13/16] virtio_ring: packed: support copy from vring

To support reusing old buffers during resize.

This patch implements copying a buffer from the detached vring to the vq
where the new vring is attached.

This process is similar to virtqueue_add_packed(), but skips DMA. Use
the function virtqueue_update_packed() provided by the previous patch to
update the state of the vq.

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 76 ++++++++++++++++++++++++++++++++++++
1 file changed, 76 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 1efb47b88b40..8ca9985ffb4b 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1654,6 +1654,82 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
return -EIO;
}

+static u32 vring_copy_desc_packed(struct vring_virtqueue *vq,
+ u32 idx,
+ u16 curr,
+ __le16 *head_flags,
+ struct vring_virtqueue_packed *vring,
+ u32 src)
+{
+ u16 old_flags = vring->desc_extra[src].flags;
+ u16 flags = vq->packed.avail_used_flags;
+ struct vring_packed_desc *desc;
+ struct vring_desc_extra *extra;
+
+ if (old_flags & VRING_DESC_F_NEXT)
+ flags |= VRING_DESC_F_NEXT;
+
+ if (old_flags & VRING_DESC_F_WRITE)
+ flags |= VRING_DESC_F_WRITE;
+
+ if (old_flags & VRING_DESC_F_INDIRECT)
+ flags |= VRING_DESC_F_INDIRECT;
+
+ desc = vq->packed.vring.desc + idx;
+ extra = vq->packed.desc_extra + curr;
+
+ if (head_flags)
+ *head_flags = cpu_to_le16(flags);
+ else
+ desc->flags = cpu_to_le16(flags);
+
+ desc->addr = cpu_to_le64(vring->desc_extra[src].addr);
+ desc->len = cpu_to_le32(vring->desc_extra[src].len);
+ desc->id = cpu_to_le16(vq->free_head);
+
+ extra->addr = vring->desc_extra[src].addr;
+ extra->len = vring->desc_extra[src].len;
+ extra->flags = vring->desc_extra[src].flags;
+
+ return vq->packed.desc_extra[curr].next;
+}
+
+static int vring_copy_to_vq_packed(struct vring_virtqueue *vq,
+ struct vring_virtqueue_packed *vring,
+ u32 old_id)
+{
+ struct vring_desc_state_packed *state;
+ __le16 head_flags;
+ u16 prev, curr;
+ u32 i, n;
+
+ state = &vring->desc_state[old_id];
+
+ if (state->num > vq->vq.num_free)
+ return -ENOSPC;
+
+ i = vq->packed.next_avail_idx;
+ curr = vq->free_head;
+
+ for (n = 0; n < state->num; n++) {
+ prev = curr;
+ curr = vring_copy_desc_packed(vq, i, curr,
+ n ? NULL : &head_flags,
+ vring, old_id);
+
+ old_id = vring->desc_extra[old_id].next;
+
+ i = next_idx(vq, i);
+ }
+
+ virtqueue_update_packed(vq, state->num, curr, prev, i, head_flags,
+ state->indir_desc, state->data);
+
+ state->data = NULL;
+
+ return state->num;
+}
+
static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
--
2.31.0

2022-04-24 17:28:42

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 07/16] virtio_ring: split: resize support re-use buffers

Split vring resize supports reusing the original buffer.

The split vring resize function implemented earlier uses the method
of letting the upper layer recycle all the buffers. This commit will
first try to re-put it to the new vring in the order submitted to the
old vring. The remaining buffers that cannot be submitted to the new
vring will be called the recycle callback to release.

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 02d4ffcc0a3b..fa4270e8c009 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1281,11 +1281,13 @@ static struct virtqueue *vring_create_virtqueue_split(
return vq;
}

-static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
+static int virtqueue_resize_split(struct virtqueue *_vq, u32 num,
+ void (*recycle)(struct virtqueue *vq, void *buf))
{
+ struct vring_virtqueue_split vring = {}, vring_old = {};
struct vring_virtqueue *vq = to_vvq(_vq);
- struct vring_virtqueue_split vring = {};
struct virtio_device *vdev = _vq->vdev;
+ void *buf;
int err;

err = vring_alloc_queue_split(&vring, vdev, num, vq->split.vring_align,
@@ -1299,15 +1301,26 @@ static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
goto err;
}

- vring_free(&vq->vq);
+ virtqueue_vring_detach_split(vq, &vring_old);

virtqueue_init(vq, vring.vring.num);
virtqueue_vring_attach_split(vq, &vring);
virtqueue_vring_init_split(vq);

+ vring_reuse_bufs_split(vq, &vring_old, recycle);
+ vring_free_split(&vring_old, vdev);
+
return 0;

err:
+ /*
+ * In the case of failure to create vring, do not try to reuse the
+ * original buffer. Because the probability of this situation is not
+ * high, but we have to introduce new logic.
+ */
+ while ((buf = virtqueue_detach_unused_buf(&vq->vq)))
+ recycle(&vq->vq, buf);
+
virtqueue_reinit_split(vq);
return -ENOMEM;
}
@@ -2747,7 +2760,7 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
if (packed)
err = virtqueue_resize_packed(_vq, num);
else
- err = virtqueue_resize_split(_vq, num);
+ err = virtqueue_resize_split(_vq, num, recycle);

if (vdev->config->enable_reset_vq(_vq))
return -EBUSY;
--
2.31.0

2022-04-24 21:56:42

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 15/16] virtio_ring: packed: resize support re-use buffers

Packed vring resize supports reusing the original buffer.

The packed vring resize function implemented earlier uses the method
of letting the upper layer recycle all the buffers. This commit will
first try to re-put it to the new vring in the order submitted to the
old vring. The remaining buffers that cannot be submitted to the new
vring will be called the recycle callback to release.

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 66f71e22ece0..730c8dded4c7 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -217,7 +217,6 @@ struct vring_virtqueue {
};

static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
-static void vring_free(struct virtqueue *_vq);

/*
* Helpers.
@@ -2357,11 +2356,13 @@ static struct virtqueue *vring_create_virtqueue_packed(
return NULL;
}

-static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
+static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num,
+ void (*recycle)(struct virtqueue *vq, void *buf))
{
- struct vring_virtqueue_packed vring = {};
+ struct vring_virtqueue_packed vring = {}, vring_old = {};
struct vring_virtqueue *vq = to_vvq(_vq);
struct virtio_device *vdev = _vq->vdev;
+ void *buf;
int err;

if (vring_alloc_queue_packed(&vring, vdev, num))
@@ -2371,17 +2372,28 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
if (err)
goto err_state_extra;

- vring_free(&vq->vq);
+ vring_virtqueue_detach_packed(vq, &vring_old);

virtqueue_init(vq, vring.vring.num);
virtqueue_vring_attach_packed(vq, &vring);
virtqueue_vring_init_packed(vq);

+ vring_reuse_bufs_packed(vq, &vring_old, recycle);
+ vring_free_packed(&vring_old, vdev);
+
return 0;

err_state_extra:
vring_free_packed(&vring, vdev);
err_ring:
+ /*
+ * In the case of failure to create vring, do not try to reuse the
+ * original buffer. Because the probability of this situation is not
+ * high, but we have to introduce new logic.
+ */
+ while ((buf = virtqueue_detach_unused_buf(&vq->vq)))
+ recycle(&vq->vq, buf);
+
virtqueue_reinit_packed(vq);
return -ENOMEM;
}
@@ -2914,7 +2926,7 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
recycle(_vq, buf);

if (packed)
- err = virtqueue_resize_packed(_vq, num);
+ err = virtqueue_resize_packed(_vq, num, recycle);
else
err = virtqueue_resize_split(_vq, num, recycle);

--
2.31.0

2022-04-25 07:17:07

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 04/16] virtio_ring: split: extract detach_from_vring_split()

To handle freeing buf from the detached vring, do a split for
detach_buf_split().

The split function detach_buf_from_vring_split() is used to release buf
from vring, and the vq passed in is read-only. All modifications are for
vring.

In this way, detach_buf_from_vring_split() becomes a general function,
which can be used for detach_buf_split() and also for handling detached
vrings.

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 54 +++++++++++++++++++++++-------------
1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 6fd45c9a3517..aa85058978cb 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -734,54 +734,68 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
return needs_kick;
}

-static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
- void **ctx)
+static int detach_buf_from_vring_split(struct vring_virtqueue_split *vring,
+ struct vring_virtqueue const *vq,
+ unsigned int head,
+ unsigned int free_head,
+ void **ctx)
{
- unsigned int i, j;
+ unsigned int i, j, num = 0;
__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);

/* Clear data ptr. */
- vq->split.desc_state[head].data = NULL;
+ vring->desc_state[head].data = NULL;

/* Put back on free list: unmap first-level descriptors and find end */
i = head;

- while (vq->split.vring.desc[i].flags & nextflag) {
- vring_unmap_one_split(vq, &vq->split.desc_extra[i]);
- i = vq->split.desc_extra[i].next;
- vq->vq.num_free++;
+ while (vring->vring.desc[i].flags & nextflag) {
+ vring_unmap_one_split(vq, &vring->desc_extra[i]);
+ i = vring->desc_extra[i].next;
+ ++num;
}

- vring_unmap_one_split(vq, &vq->split.desc_extra[i]);
- vq->split.desc_extra[i].next = vq->free_head;
- vq->free_head = head;
+ vring_unmap_one_split(vq, &vring->desc_extra[i]);
+ vring->desc_extra[i].next = free_head;

- /* Plus final descriptor */
- vq->vq.num_free++;
+ ++num;

if (vq->indirect) {
struct vring_desc *indir_desc =
- vq->split.desc_state[head].indir_desc;
+ vring->desc_state[head].indir_desc;
u32 len;

/* Free the indirect table, if any, now that it's unmapped. */
if (!indir_desc)
- return;
+ return num;

- len = vq->split.desc_extra[head].len;
+ len = vring->desc_extra[head].len;

- BUG_ON(!(vq->split.desc_extra[head].flags &
- VRING_DESC_F_INDIRECT));
+ BUG_ON(!(vring->desc_extra[head].flags & VRING_DESC_F_INDIRECT));
BUG_ON(len == 0 || len % sizeof(struct vring_desc));

for (j = 0; j < len / sizeof(struct vring_desc); j++)
vring_unmap_one_split_indirect(vq, &indir_desc[j]);

kfree(indir_desc);
- vq->split.desc_state[head].indir_desc = NULL;
+ vring->desc_state[head].indir_desc = NULL;
} else if (ctx) {
- *ctx = vq->split.desc_state[head].indir_desc;
+ *ctx = vring->desc_state[head].indir_desc;
}
+
+ return num;
+}
+
+static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
+ void **ctx)
+{
+ int num;
+
+ num = detach_buf_from_vring_split(&vq->split, vq, head, vq->free_head,
+ ctx);
+
+ vq->vq.num_free += num;
+ vq->free_head = head;
}

static inline bool more_used_split(const struct vring_virtqueue *vq)
--
2.31.0

2022-04-25 07:38:55

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 08/16] virtio_ring: packed: extract next_idx()

Separate the logic of idx growth of packed into a function. It is
convenient to share in multiple locations. Subsequent patches will also
use this logic.

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 30 +++++++++++++++++-------------
1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index fa4270e8c009..e3525d92f646 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1330,6 +1330,18 @@ static int virtqueue_resize_split(struct virtqueue *_vq, u32 num,
* Packed ring specific functions - *_packed().
*/

+static u32 next_idx(struct vring_virtqueue *vq, u32 idx)
+{
+ if ((unlikely(++idx >= vq->packed.vring.num))) {
+ idx = 0;
+ vq->packed.avail_used_flags ^=
+ 1 << VRING_PACKED_DESC_F_AVAIL |
+ 1 << VRING_PACKED_DESC_F_USED;
+ }
+
+ return idx;
+}
+
static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
struct vring_desc_extra *extra)
{
@@ -1465,14 +1477,11 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
vq->vq.num_free -= 1;

/* Update free pointer */
- n = head + 1;
- if (n >= vq->packed.vring.num) {
- n = 0;
+ n = next_idx(vq, head);
+
+ if (n < head)
vq->packed.avail_wrap_counter ^= 1;
- vq->packed.avail_used_flags ^=
- 1 << VRING_PACKED_DESC_F_AVAIL |
- 1 << VRING_PACKED_DESC_F_USED;
- }
+
vq->packed.next_avail_idx = n;
vq->free_head = vq->packed.desc_extra[id].next;

@@ -1592,12 +1601,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
prev = curr;
curr = vq->packed.desc_extra[curr].next;

- if ((unlikely(++i >= vq->packed.vring.num))) {
- i = 0;
- vq->packed.avail_used_flags ^=
- 1 << VRING_PACKED_DESC_F_AVAIL |
- 1 << VRING_PACKED_DESC_F_USED;
- }
+ i = next_idx(vq, i);
}
}

--
2.31.0

2022-04-25 08:04:27

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 06/16] virtio_ring: split: introduce vring_reuse_bufs_split()

This patch will resubmit the buffers to the new vq in the order in which
they were submitted.

In order to get these buffers in order, the patch will get the buffers
from the avail ring. We can know the current position of the avail ring
from vring.avail->idx.

First, check backward from idx. If a state appears repeatedly, it means
that the buffer corresponding to this state has been consumed by the
device and resubmitted. We will remove the subsequent state from the
queue. Then move forward from the position where idx ends, the buffers
encountered at this time are the order in which they were submitted.

It is beneficial to ensure the order of buffers in the process of reuse.
For example, under virtio-net, if the order is not guaranteed, it may
lead to out-of-order tcp streams.

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 65 ++++++++++++++++++++++++++++++++++++
1 file changed, 65 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 167442cfdb2a..02d4ffcc0a3b 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -69,6 +69,7 @@
struct vring_desc_state_split {
void *data; /* Data for callback. */
struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
+ bool checked;
};

struct vring_desc_state_packed {
@@ -1007,6 +1008,70 @@ static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
return true;
}

+static void vring_reuse_bufs_split(struct vring_virtqueue *vq,
+ struct vring_virtqueue_split *vring,
+ void (*recycle)(struct virtqueue *vq, void *buf))
+{
+ u32 head, num, idx, oidx, i, desc_num = 0;
+ u16 null, *p;
+ int err = 0;
+ void *buf;
+
+ num = vring->vring.num;
+
+ oidx = le16_to_cpu(vring->vring.avail->idx) - 1;
+ null = vring->vring.avail->ring[oidx & (num - 1)];
+
+ /*
+ * Check in the opposite direction in the avail ring. If a state appears
+ * repeatedly, it means that the state has been used and rejoined the
+ * avail ring.
+ */
+ for (i = 0, idx = oidx; i < num; ++i, --idx) {
+ p = &vring->vring.avail->ring[idx & (num - 1)];
+
+ head = virtio32_to_cpu(vq->vq.vdev, *p);
+
+ if (vring->desc_state[head].checked) {
+ *p = null;
+ continue;
+ }
+
+ vring->desc_state[head].checked = true;
+ }
+
+ /*
+ * Checking the avail ring forward, the non-null states encountered are
+ * the order in which they were added to the avail ring.
+ */
+ for (i = 0, ++idx; i < num; ++i, ++idx) {
+ p = &vring->vring.avail->ring[idx & (num - 1)];
+ if (*p == null && idx != oidx)
+ continue;
+
+ head = virtio32_to_cpu(vq->vq.vdev, *p);
+
+ if (!vring->desc_state[head].data)
+ continue;
+
+ /* once add to vq fail, no more try add to vq. */
+ if (err >= 0) {
+ err = vring_copy_to_vq_split(vq, vring, head);
+ if (err >= 0) {
+ desc_num += err;
+ continue;
+ }
+ }
+
+ buf = vring->desc_state[head].data;
+ desc_num += detach_buf_from_vring_split(vring, vq, head, 0,
+ NULL);
+ recycle(&vq->vq, buf);
+ }
+
+ WARN_ON(vring->num_left != desc_num);
+}
+
static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
--
2.31.0

2022-04-25 08:28:46

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 11/16] virtio_ring: packed: extract virtqueue_update_packed()

Separate the logic for updating the vq state from virtqueue_add_packed()
and virtqueue_add_indirect_packed().

In this way, when the subsequent patch implements the logic of reusing
the buffer when resize, we can share this function.

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 96 ++++++++++++++++++------------------
1 file changed, 47 insertions(+), 49 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 219e008a4633..5e6bd9a4e648 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1403,6 +1403,47 @@ static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
return desc;
}

+static inline void virtqueue_update_packed(struct vring_virtqueue *vq,
+ u32 descs_used,
+ u16 curr,
+ u16 prev,
+ u32 idx,
+ __le16 head_flags,
+ struct vring_packed_desc *desc,
+ void *data)
+{
+ u16 head, id;
+
+ id = vq->free_head;
+ head = vq->packed.next_avail_idx;
+
+ if (idx < head)
+ vq->packed.avail_wrap_counter ^= 1;
+
+ /* We're using some buffers from the free list. */
+ vq->vq.num_free -= descs_used;
+
+ /* Update free pointer */
+ vq->packed.next_avail_idx = idx;
+ vq->free_head = curr;
+
+ /* Store token. */
+ vq->packed.desc_state[id].num = descs_used;
+ vq->packed.desc_state[id].data = data;
+ vq->packed.desc_state[id].indir_desc = desc;
+ vq->packed.desc_state[id].last = prev;
+
+ /*
+ * A driver MUST NOT make the first descriptor in the list
+ * available before all subsequent descriptors comprising
+ * the list are made available.
+ */
+ virtio_wmb(vq->weak_barriers);
+ vq->packed.vring.desc[head].flags = head_flags;
+ vq->num_added += descs_used;
+
+}
+
static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
struct scatterlist *sgs[],
unsigned int total_sg,
@@ -1414,6 +1455,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
struct vring_packed_desc *desc;
struct scatterlist *sg;
unsigned int i, n, err_idx;
+ __le16 head_flags;
u16 head, id;
dma_addr_t addr;

@@ -1466,34 +1508,13 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
vq->packed.avail_used_flags;

- /*
- * A driver MUST NOT make the first descriptor in the list
- * available before all subsequent descriptors comprising
- * the list are made available.
- */
- virtio_wmb(vq->weak_barriers);
- vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
- vq->packed.avail_used_flags);
-
- /* We're using some buffers from the free list. */
- vq->vq.num_free -= 1;
+ head_flags = cpu_to_le16(VRING_DESC_F_INDIRECT | vq->packed.avail_used_flags);

/* Update free pointer */
n = next_idx(vq, head);

- if (n < head)
- vq->packed.avail_wrap_counter ^= 1;
-
- vq->packed.next_avail_idx = n;
- vq->free_head = vq->packed.desc_extra[id].next;
-
- /* Store token and indirect buffer state. */
- vq->packed.desc_state[id].num = 1;
- vq->packed.desc_state[id].data = data;
- vq->packed.desc_state[id].indir_desc = desc;
- vq->packed.desc_state[id].last = id;
-
- vq->num_added += 1;
+ virtqueue_update_packed(vq, 1, vq->packed.desc_extra[id].next, id, n,
+ head_flags, desc, data);

pr_debug("Added buffer head %i to %p\n", head, vq);
END_USE(vq);
@@ -1605,31 +1626,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
}
}

- if (i < head)
- vq->packed.avail_wrap_counter ^= 1;
-
- /* We're using some buffers from the free list. */
- vq->vq.num_free -= descs_used;
-
- /* Update free pointer */
- vq->packed.next_avail_idx = i;
- vq->free_head = curr;
-
- /* Store token. */
- vq->packed.desc_state[id].num = descs_used;
- vq->packed.desc_state[id].data = data;
- vq->packed.desc_state[id].indir_desc = ctx;
- vq->packed.desc_state[id].last = prev;
-
- /*
- * A driver MUST NOT make the first descriptor in the list
- * available before all subsequent descriptors comprising
- * the list are made available.
- */
- virtio_wmb(vq->weak_barriers);
- vq->packed.vring.desc[head].flags = head_flags;
- vq->num_added += descs_used;
-
+ virtqueue_update_packed(vq, descs_used, curr, prev, i, head_flags,
+ ctx, data);
pr_debug("Added buffer head %i to %p\n", head, vq);
END_USE(vq);

--
2.31.0

2022-04-25 08:29:44

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 16/16] virtio_ring: virtqueue_resize() no longer call recycle() directly

virtqueue_resize() no longer calls the recycle callback to release the
buffer. These bufs are reused by virtqueue_resize_* first, and if they
cannot be used, the buffers that cannot be reused will be released

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 730c8dded4c7..51dab35a54c9 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2895,7 +2895,6 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
struct vring_virtqueue *vq = to_vvq(_vq);
struct virtio_device *vdev = vq->vq.vdev;
bool packed;
- void *buf;
int err;

if (!vq->we_own_ring)
@@ -2922,16 +2921,19 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
if (err)
return err;

- while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
- recycle(_vq, buf);
-
if (packed)
err = virtqueue_resize_packed(_vq, num, recycle);
else
err = virtqueue_resize_split(_vq, num, recycle);

- if (vdev->config->enable_reset_vq(_vq))
+ if (vdev->config->enable_reset_vq(_vq)) {
return -EBUSY;
+ } else if (!err) {
+ num = packed ? vq->packed.vring.num : vq->split.vring.num;
+
+ if (num != vq->vq.num_free)
+ virtqueue_kick(_vq);
+ }

return err;
}
--
2.31.0

2022-04-25 17:52:03

by Xuan Zhuo

[permalink] [raw]
Subject: [RFC PATCH 09/16] virtio_ring: packed: always update desc_extra

No longer determine whether to update desc_extra based on use_dma_api.

Because desc will be modified by the device, in the process of resize,
if you want to reuse buffers, you can only get len and flags from
desc_extra.

Signed-off-by: Xuan Zhuo <[email protected]>
---
drivers/virtio/virtio_ring.c | 22 +++++++++-------------
1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index e3525d92f646..436b18184dfe 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1456,13 +1456,11 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
sizeof(struct vring_packed_desc));
vq->packed.vring.desc[head].id = cpu_to_le16(id);

- if (vq->use_dma_api) {
- vq->packed.desc_extra[id].addr = addr;
- vq->packed.desc_extra[id].len = total_sg *
- sizeof(struct vring_packed_desc);
- vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
- vq->packed.avail_used_flags;
- }
+ vq->packed.desc_extra[id].addr = addr;
+ vq->packed.desc_extra[id].len = total_sg *
+ sizeof(struct vring_packed_desc);
+ vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
+ vq->packed.avail_used_flags;

/*
* A driver MUST NOT make the first descriptor in the list
@@ -1592,12 +1590,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
desc[i].len = cpu_to_le32(sg->length);
desc[i].id = cpu_to_le16(id);

- if (unlikely(vq->use_dma_api)) {
- vq->packed.desc_extra[curr].addr = addr;
- vq->packed.desc_extra[curr].len = sg->length;
- vq->packed.desc_extra[curr].flags =
- le16_to_cpu(flags);
- }
+ vq->packed.desc_extra[curr].addr = addr;
+ vq->packed.desc_extra[curr].len = sg->length;
+ vq->packed.desc_extra[curr].flags = le16_to_cpu(flags);
+
prev = curr;
curr = vq->packed.desc_extra[curr].next;

--
2.31.0