2022-09-01 06:14:47

by Guo Zhi

[permalink] [raw]
Subject: [RFC v3 0/7] In order support for virtio_ring, vhost and vsock.

In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced.
When this feature has been negotiated, virtio driver will use
descriptors in ring order: starting from offset 0 in the table, and
wrapping around at the end of the table. Vhost devices will always use
descriptors in the same order in which they have been made available.
This can reduce virtio accesses to used ring.

Based on updated virtio-spec, this series realized IN_ORDER prototype in virtio
driver and vhost. Currently IN_ORDER feature supported devices are *vhost_test*
and *vsock* in vhost and virtio-net in QEMU. IN_ORDER feature works well
combined with INDIRECT feature in this patch series.

Virtio driver in_order support for packed vq hasn't been done in this patch
series now.

Guo Zhi (7):
vhost: expose used buffers
vhost_test: batch used buffer
vsock: batch buffers in tx
vsock: announce VIRTIO_F_IN_ORDER in vsock
virtio: unmask F_NEXT flag in desc_extra
virtio: in order support for virtio_ring
virtio: announce VIRTIO_F_IN_ORDER support

drivers/vhost/test.c | 16 ++++++--
drivers/vhost/vhost.c | 16 ++++++--
drivers/vhost/vsock.c | 13 +++++-
drivers/virtio/virtio_ring.c | 79 +++++++++++++++++++++++++++++++-----
4 files changed, 104 insertions(+), 20 deletions(-)

--
2.17.1


2022-09-01 06:36:31

by Guo Zhi

[permalink] [raw]
Subject: [RFC v3 3/7] vsock: batch buffers in tx

Vsock uses buffers in order, and for tx driver doesn't have to
know the length of the buffer. So we can do a batch for vsock if
in order negotiated, only write one used ring for a batch of buffers

Signed-off-by: Guo Zhi <[email protected]>
---
drivers/vhost/vsock.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 368330417bde..e08fbbb5439e 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -497,7 +497,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
dev);
struct virtio_vsock_pkt *pkt;
- int head, pkts = 0, total_len = 0;
+ int head, pkts = 0, total_len = 0, add = 0;
unsigned int out, in;
bool added = false;

@@ -551,10 +551,18 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
else
virtio_transport_free_pkt(pkt);

- vhost_add_used(vq, head, 0);
+ if (!vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
+ vhost_add_used(vq, head, 0);
+ } else {
+ vq->heads[add].id = head;
+ vq->heads[add++].len = 0;
+ }
added = true;
} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));

+ /* If in order feature negotiaged, we can do a batch to increase performance */
+ if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER) && added)
+ vhost_add_used_n(vq, vq->heads, add);
no_more_replies:
if (added)
vhost_signal(&vsock->dev, vq);
--
2.17.1

2022-09-01 06:37:54

by Guo Zhi

[permalink] [raw]
Subject: [RFC v3 6/7] virtio: in order support for virtio_ring

If in order feature negotiated, we can skip the used ring to get
buffer's desc id sequentially. For skipped buffers in the batch, the
used ring doesn't contain the buffer length, actually there is not need
to get skipped buffers' length as they are tx buffer.

Signed-off-by: Guo Zhi <[email protected]>
---
drivers/virtio/virtio_ring.c | 74 +++++++++++++++++++++++++++++++-----
1 file changed, 64 insertions(+), 10 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 00aa4b7a49c2..d52624179b43 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -103,6 +103,9 @@ struct vring_virtqueue {
/* Host supports indirect buffers */
bool indirect;

+ /* Host supports in order feature */
+ bool in_order;
+
/* Host publishes avail event idx */
bool event;

@@ -144,6 +147,19 @@ struct vring_virtqueue {
/* DMA address and size information */
dma_addr_t queue_dma_addr;
size_t queue_size_in_bytes;
+
+ /* If in_order feature is negotiated, here is the next head to consume */
+ u16 next_desc_begin;
+ /*
+ * If in_order feature is negotiated,
+ * here is the last descriptor's id in the batch
+ */
+ u16 last_desc_in_batch;
+ /*
+ * If in_order feature is negotiated,
+ * buffers except last buffer in the batch are skipped buffer
+ */
+ bool is_skipped_buffer;
} split;

/* Available for packed ring */
@@ -584,8 +600,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
total_sg * sizeof(struct vring_desc),
VRING_DESC_F_INDIRECT,
false);
- vq->split.desc_extra[head & (vq->split.vring.num - 1)].flags &=
- ~VRING_DESC_F_NEXT;
}

/* We're using some buffers from the free list. */
@@ -701,8 +715,16 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
}

vring_unmap_one_split(vq, i);
- vq->split.desc_extra[i].next = vq->free_head;
- vq->free_head = head;
+ /*
+ * If in_order feature is negotiated,
+ * the descriptors are made available in order.
+ * Since the free_head is already a circular list,
+ * it must consume it sequentially.
+ */
+ if (!vq->in_order) {
+ vq->split.desc_extra[i].next = vq->free_head;
+ vq->free_head = head;
+ }

/* Plus final descriptor */
vq->vq.num_free++;
@@ -744,7 +766,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
{
struct vring_virtqueue *vq = to_vvq(_vq);
void *ret;
- unsigned int i;
+ unsigned int i, j;
u16 last_used;

START_USE(vq);
@@ -763,11 +785,38 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
/* Only get used array entries after they have been exposed by host. */
virtio_rmb(vq->weak_barriers);

- last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
- i = virtio32_to_cpu(_vq->vdev,
- vq->split.vring.used->ring[last_used].id);
- *len = virtio32_to_cpu(_vq->vdev,
- vq->split.vring.used->ring[last_used].len);
+ if (vq->in_order) {
+ last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
+ if (!vq->split.is_skipped_buffer) {
+ vq->split.last_desc_in_batch =
+ virtio32_to_cpu(_vq->vdev,
+ vq->split.vring.used->ring[last_used].id);
+ vq->split.is_skipped_buffer = true;
+ }
+ /* For skipped buffers in batch, we can ignore the len info, simply set len as 0 */
+ if (vq->split.next_desc_begin != vq->split.last_desc_in_batch) {
+ *len = 0;
+ } else {
+ *len = virtio32_to_cpu(_vq->vdev,
+ vq->split.vring.used->ring[last_used].len);
+ vq->split.is_skipped_buffer = false;
+ }
+ i = vq->split.next_desc_begin;
+ j = i;
+ /* Indirect only takes one descriptor in descriptor table */
+ while (!vq->indirect && (vq->split.desc_extra[j].flags & VRING_DESC_F_NEXT))
+ j = (j + 1) & (vq->split.vring.num - 1);
+ /* move to next */
+ j = (j + 1) % vq->split.vring.num;
+ /* Next buffer will use this descriptor in order */
+ vq->split.next_desc_begin = j;
+ } else {
+ last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
+ i = virtio32_to_cpu(_vq->vdev,
+ vq->split.vring.used->ring[last_used].id);
+ *len = virtio32_to_cpu(_vq->vdev,
+ vq->split.vring.used->ring[last_used].len);
+ }

if (unlikely(i >= vq->split.vring.num)) {
BAD_RING(vq, "id %u out of range\n", i);
@@ -2223,6 +2272,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,

vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
!context;
+ vq->in_order = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER);
vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);

if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
@@ -2235,6 +2285,10 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
vq->split.avail_flags_shadow = 0;
vq->split.avail_idx_shadow = 0;

+ vq->split.next_desc_begin = 0;
+ vq->split.last_desc_in_batch = 0;
+ vq->split.is_skipped_buffer = false;
+
/* No callback? Tell other side not to bother us. */
if (!callback) {
vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
--
2.17.1

2022-09-01 06:39:52

by Xuan Zhuo

[permalink] [raw]
Subject: Re: [RFC v3 6/7] virtio: in order support for virtio_ring

On Thu, 1 Sep 2022 13:54:33 +0800, Guo Zhi <[email protected]> wrote:
> If in order feature negotiated, we can skip the used ring to get
> buffer's desc id sequentially. For skipped buffers in the batch, the
> used ring doesn't contain the buffer length, actually there is not need
> to get skipped buffers' length as they are tx buffer.

As far as I know, currently virtio-net will use the buffer's length here for
statistics. So whether virtio-net also needs to make some changes.

Thanks.

>
> Signed-off-by: Guo Zhi <[email protected]>
> ---
> drivers/virtio/virtio_ring.c | 74 +++++++++++++++++++++++++++++++-----
> 1 file changed, 64 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 00aa4b7a49c2..d52624179b43 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -103,6 +103,9 @@ struct vring_virtqueue {
> /* Host supports indirect buffers */
> bool indirect;
>
> + /* Host supports in order feature */
> + bool in_order;
> +
> /* Host publishes avail event idx */
> bool event;
>
> @@ -144,6 +147,19 @@ struct vring_virtqueue {
> /* DMA address and size information */
> dma_addr_t queue_dma_addr;
> size_t queue_size_in_bytes;
> +
> + /* If in_order feature is negotiated, here is the next head to consume */
> + u16 next_desc_begin;
> + /*
> + * If in_order feature is negotiated,
> + * here is the last descriptor's id in the batch
> + */
> + u16 last_desc_in_batch;
> + /*
> + * If in_order feature is negotiated,
> + * buffers except last buffer in the batch are skipped buffer
> + */
> + bool is_skipped_buffer;
> } split;
>
> /* Available for packed ring */
> @@ -584,8 +600,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> total_sg * sizeof(struct vring_desc),
> VRING_DESC_F_INDIRECT,
> false);
> - vq->split.desc_extra[head & (vq->split.vring.num - 1)].flags &=
> - ~VRING_DESC_F_NEXT;
> }
>
> /* We're using some buffers from the free list. */
> @@ -701,8 +715,16 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> }
>
> vring_unmap_one_split(vq, i);
> - vq->split.desc_extra[i].next = vq->free_head;
> - vq->free_head = head;
> + /*
> + * If in_order feature is negotiated,
> + * the descriptors are made available in order.
> + * Since the free_head is already a circular list,
> + * it must consume it sequentially.
> + */
> + if (!vq->in_order) {
> + vq->split.desc_extra[i].next = vq->free_head;
> + vq->free_head = head;
> + }
>
> /* Plus final descriptor */
> vq->vq.num_free++;
> @@ -744,7 +766,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> void *ret;
> - unsigned int i;
> + unsigned int i, j;
> u16 last_used;
>
> START_USE(vq);
> @@ -763,11 +785,38 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> /* Only get used array entries after they have been exposed by host. */
> virtio_rmb(vq->weak_barriers);
>
> - last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
> - i = virtio32_to_cpu(_vq->vdev,
> - vq->split.vring.used->ring[last_used].id);
> - *len = virtio32_to_cpu(_vq->vdev,
> - vq->split.vring.used->ring[last_used].len);
> + if (vq->in_order) {
> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
> + if (!vq->split.is_skipped_buffer) {
> + vq->split.last_desc_in_batch =
> + virtio32_to_cpu(_vq->vdev,
> + vq->split.vring.used->ring[last_used].id);
> + vq->split.is_skipped_buffer = true;
> + }
> + /* For skipped buffers in batch, we can ignore the len info, simply set len as 0 */
> + if (vq->split.next_desc_begin != vq->split.last_desc_in_batch) {
> + *len = 0;
> + } else {
> + *len = virtio32_to_cpu(_vq->vdev,
> + vq->split.vring.used->ring[last_used].len);
> + vq->split.is_skipped_buffer = false;
> + }
> + i = vq->split.next_desc_begin;
> + j = i;
> + /* Indirect only takes one descriptor in descriptor table */
> + while (!vq->indirect && (vq->split.desc_extra[j].flags & VRING_DESC_F_NEXT))
> + j = (j + 1) & (vq->split.vring.num - 1);
> + /* move to next */
> + j = (j + 1) % vq->split.vring.num;
> + /* Next buffer will use this descriptor in order */
> + vq->split.next_desc_begin = j;
> + } else {
> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
> + i = virtio32_to_cpu(_vq->vdev,
> + vq->split.vring.used->ring[last_used].id);
> + *len = virtio32_to_cpu(_vq->vdev,
> + vq->split.vring.used->ring[last_used].len);
> + }
>
> if (unlikely(i >= vq->split.vring.num)) {
> BAD_RING(vq, "id %u out of range\n", i);
> @@ -2223,6 +2272,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
>
> vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
> !context;
> + vq->in_order = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER);
> vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
>
> if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> @@ -2235,6 +2285,10 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
> vq->split.avail_flags_shadow = 0;
> vq->split.avail_idx_shadow = 0;
>
> + vq->split.next_desc_begin = 0;
> + vq->split.last_desc_in_batch = 0;
> + vq->split.is_skipped_buffer = false;
> +
> /* No callback? Tell other side not to bother us. */
> if (!callback) {
> vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> --
> 2.17.1
>

2022-09-01 06:40:58

by Guo Zhi

[permalink] [raw]
Subject: [RFC v3 5/7] virtio: unmask F_NEXT flag in desc_extra

We didn't unmask F_NEXT flag in desc_extra in the end of a chain,
unmask it so that we can access desc_extra to get next information.

Signed-off-by: Guo Zhi <[email protected]>
---
drivers/virtio/virtio_ring.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a5ec724c01d8..00aa4b7a49c2 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -567,7 +567,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
}
/* Last one doesn't continue. */
desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
- if (!indirect && vq->use_dma_api)
+ if (!indirect)
vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
~VRING_DESC_F_NEXT;

@@ -584,6 +584,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
total_sg * sizeof(struct vring_desc),
VRING_DESC_F_INDIRECT,
false);
+ vq->split.desc_extra[head & (vq->split.vring.num - 1)].flags &=
+ ~VRING_DESC_F_NEXT;
}

/* We're using some buffers from the free list. */
@@ -685,7 +687,6 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
void **ctx)
{
unsigned int i, j;
- __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);

/* Clear data ptr. */
vq->split.desc_state[head].data = NULL;
@@ -693,7 +694,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
/* Put back on free list: unmap first-level descriptors and find end */
i = head;

- while (vq->split.vring.desc[i].flags & nextflag) {
+ while (vq->split.desc_extra[i].flags & VRING_DESC_F_NEXT) {
vring_unmap_one_split(vq, i);
i = vq->split.desc_extra[i].next;
vq->vq.num_free++;
--
2.17.1

2022-09-01 06:41:12

by Guo Zhi

[permalink] [raw]
Subject: [RFC v3 1/7] vhost: expose used buffers

Follow VIRTIO 1.1 spec, only writing out a single used ring for a batch
of descriptors.

Signed-off-by: Guo Zhi <[email protected]>
---
drivers/vhost/vhost.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 40097826cff0..26862c8bf751 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2376,10 +2376,20 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
vring_used_elem_t __user *used;
u16 old, new;
int start;
+ int copy_n = count;

+ /**
+ * If in order feature negotiated, devices can notify the use of a batch of buffers to
+ * the driver by only writing out a single used ring entry with the id corresponding
+ * to the head entry of the descriptor chain describing the last buffer in the batch.
+ */
+ if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
+ copy_n = 1;
+ heads = &heads[count - 1];
+ }
start = vq->last_used_idx & (vq->num - 1);
used = vq->used->ring + start;
- if (vhost_put_used(vq, heads, start, count)) {
+ if (vhost_put_used(vq, heads, start, copy_n)) {
vq_err(vq, "Failed to write used");
return -EFAULT;
}
@@ -2388,7 +2398,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
smp_wmb();
/* Log used ring entry write. */
log_used(vq, ((void __user *)used - (void __user *)vq->used),
- count * sizeof *used);
+ copy_n * sizeof(*used));
}
old = vq->last_used_idx;
new = (vq->last_used_idx += count);
@@ -2410,7 +2420,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,

start = vq->last_used_idx & (vq->num - 1);
n = vq->num - start;
- if (n < count) {
+ if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
r = __vhost_add_used_n(vq, heads, n);
if (r < 0)
return r;
--
2.17.1

2022-09-01 06:51:48

by Guo Zhi

[permalink] [raw]
Subject: [RFC v3 7/7] virtio: announce VIRTIO_F_IN_ORDER support

In order feature is supported by default in virtio.

Signed-off-by: Guo Zhi <[email protected]>
---
drivers/virtio/virtio_ring.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index d52624179b43..63a6381913a5 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2432,6 +2432,8 @@ void vring_transport_features(struct virtio_device *vdev)
break;
case VIRTIO_F_ORDER_PLATFORM:
break;
+ case VIRTIO_F_IN_ORDER:
+ break;
default:
/* We don't understand this bit. */
__virtio_clear_bit(vdev, i);
--
2.17.1

2022-09-01 09:36:49

by Eugenio Perez Martin

[permalink] [raw]
Subject: Re: [RFC v3 3/7] vsock: batch buffers in tx

On Thu, Sep 1, 2022 at 7:55 AM Guo Zhi <[email protected]> wrote:
>
> Vsock uses buffers in order, and for tx driver doesn't have to
> know the length of the buffer. So we can do a batch for vsock if
> in order negotiated, only write one used ring for a batch of buffers
>
> Signed-off-by: Guo Zhi <[email protected]>
> ---
> drivers/vhost/vsock.c | 12 ++++++++++--
> 1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index 368330417bde..e08fbbb5439e 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -497,7 +497,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
> struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
> dev);
> struct virtio_vsock_pkt *pkt;
> - int head, pkts = 0, total_len = 0;
> + int head, pkts = 0, total_len = 0, add = 0;
> unsigned int out, in;
> bool added = false;
>
> @@ -551,10 +551,18 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
> else
> virtio_transport_free_pkt(pkt);
>
> - vhost_add_used(vq, head, 0);
> + if (!vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
> + vhost_add_used(vq, head, 0);
> + } else {
> + vq->heads[add].id = head;
> + vq->heads[add++].len = 0;

Knowing that the descriptors are used in order we can save a few
memory writes to the vq->heads[] array. vhost.c is checking for the
feature in_order anyway.

Thanks!

> + }
> added = true;
> } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
>
> + /* If in order feature negotiaged, we can do a batch to increase performance */
> + if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER) && added)
> + vhost_add_used_n(vq, vq->heads, add);
> no_more_replies:
> if (added)
> vhost_signal(&vsock->dev, vq);
> --
> 2.17.1
>

2022-09-01 09:39:59

by Eugenio Perez Martin

[permalink] [raw]
Subject: Re: [RFC v3 1/7] vhost: expose used buffers

On Thu, Sep 1, 2022 at 7:55 AM Guo Zhi <[email protected]> wrote:
>
> Follow VIRTIO 1.1 spec, only writing out a single used ring for a batch
> of descriptors.
>
> Signed-off-by: Guo Zhi <[email protected]>
> ---
> drivers/vhost/vhost.c | 16 +++++++++++++---
> 1 file changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 40097826cff0..26862c8bf751 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -2376,10 +2376,20 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
> vring_used_elem_t __user *used;
> u16 old, new;
> int start;
> + int copy_n = count;
>
> + /**
> + * If in order feature negotiated, devices can notify the use of a batch of buffers to
> + * the driver by only writing out a single used ring entry with the id corresponding
> + * to the head entry of the descriptor chain describing the last buffer in the batch.
> + */
> + if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
> + copy_n = 1;
> + heads = &heads[count - 1];
> + }
> start = vq->last_used_idx & (vq->num - 1);
> used = vq->used->ring + start;
> - if (vhost_put_used(vq, heads, start, count)) {
> + if (vhost_put_used(vq, heads, start, copy_n)) {
> vq_err(vq, "Failed to write used");
> return -EFAULT;
> }
> @@ -2388,7 +2398,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
> smp_wmb();
> /* Log used ring entry write. */
> log_used(vq, ((void __user *)used - (void __user *)vq->used),
> - count * sizeof *used);
> + copy_n * sizeof(*used));

log_used reports to the VMM the modified memory by the device. It
iterates over used descriptors translating them to do so.

We need to either report here all the descriptors or to modify
log_used so it reports all the batch with in_order feature. The latter
has an extra advantage: no need to report these non-existent writes to
the used ring of the skipped buffers. Although it probably does not
make a difference in performance.

With the current code, we could iterate the heads[] array too, calling
. However, I think it would be a waste. More on that later.

Thanks!

> }
> old = vq->last_used_idx;
> new = (vq->last_used_idx += count);
> @@ -2410,7 +2420,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
>
> start = vq->last_used_idx & (vq->num - 1);
> n = vq->num - start;
> - if (n < count) {
> + if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
> r = __vhost_add_used_n(vq, heads, n);
> if (r < 0)
> return r;
> --
> 2.17.1
>

2022-09-07 04:28:59

by Jason Wang

[permalink] [raw]
Subject: Re: [RFC v3 1/7] vhost: expose used buffers


在 2022/9/1 13:54, Guo Zhi 写道:
> Follow VIRTIO 1.1 spec, only writing out a single used ring for a batch
> of descriptors.
>
> Signed-off-by: Guo Zhi <[email protected]>
> ---
> drivers/vhost/vhost.c | 16 +++++++++++++---
> 1 file changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 40097826cff0..26862c8bf751 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -2376,10 +2376,20 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
> vring_used_elem_t __user *used;
> u16 old, new;
> int start;
> + int copy_n = count;
>
> + /**
> + * If in order feature negotiated, devices can notify the use of a batch of buffers to
> + * the driver by only writing out a single used ring entry with the id corresponding
> + * to the head entry of the descriptor chain describing the last buffer in the batch.
> + */
> + if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
> + copy_n = 1;
> + heads = &heads[count - 1];
> + }


Would it better to have a dedicated helper like
vhost_add_used_in_order() here?


> start = vq->last_used_idx & (vq->num - 1);
> used = vq->used->ring + start;
> - if (vhost_put_used(vq, heads, start, count)) {
> + if (vhost_put_used(vq, heads, start, copy_n)) {
> vq_err(vq, "Failed to write used");
> return -EFAULT;
> }
> @@ -2388,7 +2398,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
> smp_wmb();
> /* Log used ring entry write. */
> log_used(vq, ((void __user *)used - (void __user *)vq->used),
> - count * sizeof *used);
> + copy_n * sizeof(*used));
> }
> old = vq->last_used_idx;
> new = (vq->last_used_idx += count);
> @@ -2410,7 +2420,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
>
> start = vq->last_used_idx & (vq->num - 1);
> n = vq->num - start;
> - if (n < count) {
> + if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {


This seems strange, any reason for this? (Actually if we support
in-order we only need one used slot which fit for the case here)

Thanks


> r = __vhost_add_used_n(vq, heads, n);
> if (r < 0)
> return r;

2022-09-07 04:39:17

by Jason Wang

[permalink] [raw]
Subject: Re: [RFC v3 3/7] vsock: batch buffers in tx


在 2022/9/1 13:54, Guo Zhi 写道:
> Vsock uses buffers in order, and for tx driver doesn't have to
> know the length of the buffer. So we can do a batch for vsock if
> in order negotiated, only write one used ring for a batch of buffers
>
> Signed-off-by: Guo Zhi <[email protected]>
> ---
> drivers/vhost/vsock.c | 12 ++++++++++--
> 1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index 368330417bde..e08fbbb5439e 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -497,7 +497,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
> struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
> dev);
> struct virtio_vsock_pkt *pkt;
> - int head, pkts = 0, total_len = 0;
> + int head, pkts = 0, total_len = 0, add = 0;
> unsigned int out, in;
> bool added = false;
>
> @@ -551,10 +551,18 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
> else
> virtio_transport_free_pkt(pkt);
>
> - vhost_add_used(vq, head, 0);
> + if (!vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
> + vhost_add_used(vq, head, 0);


I'd do this step by step.

1) switch to use vhost_add_used_n() for vsock, less copy_to_user()
better performance
2) do in-order on top


> + } else {
> + vq->heads[add].id = head;
> + vq->heads[add++].len = 0;


How can we guarantee that we are in the boundary of the heads array?

Btw in the case of in-order we don't need to record the heads, instead
we just need to know the head of the last buffer, it reduces the stress
on the cache.

Thanks


> + }
> added = true;
> } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
>
> + /* If in order feature negotiaged, we can do a batch to increase performance */
> + if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER) && added)
> + vhost_add_used_n(vq, vq->heads, add);
> no_more_replies:
> if (added)
> vhost_signal(&vsock->dev, vq);

2022-09-07 04:40:13

by Jason Wang

[permalink] [raw]
Subject: Re: [RFC v3 0/7] In order support for virtio_ring, vhost and vsock.


在 2022/9/1 13:54, Guo Zhi 写道:
> In virtio-spec 1.1, new feature bit VIRTIO_F_IN_ORDER was introduced.
> When this feature has been negotiated, virtio driver will use
> descriptors in ring order: starting from offset 0 in the table, and
> wrapping around at the end of the table. Vhost devices will always use
> descriptors in the same order in which they have been made available.
> This can reduce virtio accesses to used ring.
>
> Based on updated virtio-spec, this series realized IN_ORDER prototype in virtio
> driver and vhost. Currently IN_ORDER feature supported devices are *vhost_test*
> and *vsock* in vhost and virtio-net in QEMU. IN_ORDER feature works well
> combined with INDIRECT feature in this patch series.


As stated in the previous versions, I'd like to see performance numbers.
We need to prove that the feature actually help for the performance.

And it would be even better if we do the in-order in this order (vhost
side):

1) enable in-order but without batching used
2) enable in-order with batching used

Then we can see how:

1) in-order helps
2) batching helps

Thanks


>
> Virtio driver in_order support for packed vq hasn't been done in this patch
> series now.
>
> Guo Zhi (7):
> vhost: expose used buffers
> vhost_test: batch used buffer
> vsock: batch buffers in tx
> vsock: announce VIRTIO_F_IN_ORDER in vsock
> virtio: unmask F_NEXT flag in desc_extra
> virtio: in order support for virtio_ring
> virtio: announce VIRTIO_F_IN_ORDER support
>
> drivers/vhost/test.c | 16 ++++++--
> drivers/vhost/vhost.c | 16 ++++++--
> drivers/vhost/vsock.c | 13 +++++-
> drivers/virtio/virtio_ring.c | 79 +++++++++++++++++++++++++++++++-----
> 4 files changed, 104 insertions(+), 20 deletions(-)
>

2022-09-07 05:46:38

by Jason Wang

[permalink] [raw]
Subject: Re: [RFC v3 6/7] virtio: in order support for virtio_ring


在 2022/9/1 13:54, Guo Zhi 写道:
> If in order feature negotiated, we can skip the used ring to get
> buffer's desc id sequentially. For skipped buffers in the batch, the
> used ring doesn't contain the buffer length, actually there is not need
> to get skipped buffers' length as they are tx buffer.
>
> Signed-off-by: Guo Zhi <[email protected]>
> ---
> drivers/virtio/virtio_ring.c | 74 +++++++++++++++++++++++++++++++-----
> 1 file changed, 64 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 00aa4b7a49c2..d52624179b43 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -103,6 +103,9 @@ struct vring_virtqueue {
> /* Host supports indirect buffers */
> bool indirect;
>
> + /* Host supports in order feature */
> + bool in_order;
> +
> /* Host publishes avail event idx */
> bool event;
>
> @@ -144,6 +147,19 @@ struct vring_virtqueue {
> /* DMA address and size information */
> dma_addr_t queue_dma_addr;
> size_t queue_size_in_bytes;
> +
> + /* If in_order feature is negotiated, here is the next head to consume */
> + u16 next_desc_begin;
> + /*
> + * If in_order feature is negotiated,
> + * here is the last descriptor's id in the batch
> + */
> + u16 last_desc_in_batch;
> + /*
> + * If in_order feature is negotiated,
> + * buffers except last buffer in the batch are skipped buffer
> + */
> + bool is_skipped_buffer;
> } split;
>
> /* Available for packed ring */
> @@ -584,8 +600,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> total_sg * sizeof(struct vring_desc),
> VRING_DESC_F_INDIRECT,
> false);
> - vq->split.desc_extra[head & (vq->split.vring.num - 1)].flags &=
> - ~VRING_DESC_F_NEXT;


This seems irrelevant.


> }
>
> /* We're using some buffers from the free list. */
> @@ -701,8 +715,16 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> }
>
> vring_unmap_one_split(vq, i);
> - vq->split.desc_extra[i].next = vq->free_head;
> - vq->free_head = head;
> + /*
> + * If in_order feature is negotiated,
> + * the descriptors are made available in order.
> + * Since the free_head is already a circular list,
> + * it must consume it sequentially.
> + */
> + if (!vq->in_order) {
> + vq->split.desc_extra[i].next = vq->free_head;
> + vq->free_head = head;
> + }
>
> /* Plus final descriptor */
> vq->vq.num_free++;
> @@ -744,7 +766,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> void *ret;
> - unsigned int i;
> + unsigned int i, j;
> u16 last_used;
>
> START_USE(vq);
> @@ -763,11 +785,38 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> /* Only get used array entries after they have been exposed by host. */
> virtio_rmb(vq->weak_barriers);
>
> - last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
> - i = virtio32_to_cpu(_vq->vdev,
> - vq->split.vring.used->ring[last_used].id);
> - *len = virtio32_to_cpu(_vq->vdev,
> - vq->split.vring.used->ring[last_used].len);
> + if (vq->in_order) {
> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));


Let's move this beyond the in_order check.


> + if (!vq->split.is_skipped_buffer) {
> + vq->split.last_desc_in_batch =
> + virtio32_to_cpu(_vq->vdev,
> + vq->split.vring.used->ring[last_used].id);
> + vq->split.is_skipped_buffer = true;
> + }
> + /* For skipped buffers in batch, we can ignore the len info, simply set len as 0 */


This seems to break the caller that depends on a correct len.


> + if (vq->split.next_desc_begin != vq->split.last_desc_in_batch) {
> + *len = 0;
> + } else {
> + *len = virtio32_to_cpu(_vq->vdev,
> + vq->split.vring.used->ring[last_used].len);
> + vq->split.is_skipped_buffer = false;
> + }
> + i = vq->split.next_desc_begin;
> + j = i;
> + /* Indirect only takes one descriptor in descriptor table */
> + while (!vq->indirect && (vq->split.desc_extra[j].flags & VRING_DESC_F_NEXT))
> + j = (j + 1) & (vq->split.vring.num - 1);


Any reason indirect descriptors can't be chained?


> + /* move to next */
> + j = (j + 1) % vq->split.vring.num;
> + /* Next buffer will use this descriptor in order */
> + vq->split.next_desc_begin = j;


Is it more efficient to poke the available ring?

Thanks


> + } else {
> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
> + i = virtio32_to_cpu(_vq->vdev,
> + vq->split.vring.used->ring[last_used].id);
> + *len = virtio32_to_cpu(_vq->vdev,
> + vq->split.vring.used->ring[last_used].len);
> + }
>
> if (unlikely(i >= vq->split.vring.num)) {
> BAD_RING(vq, "id %u out of range\n", i);
> @@ -2223,6 +2272,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
>
> vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
> !context;
> + vq->in_order = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER);
> vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
>
> if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> @@ -2235,6 +2285,10 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
> vq->split.avail_flags_shadow = 0;
> vq->split.avail_idx_shadow = 0;
>
> + vq->split.next_desc_begin = 0;
> + vq->split.last_desc_in_batch = 0;
> + vq->split.is_skipped_buffer = false;
> +
> /* No callback? Tell other side not to bother us. */
> if (!callback) {
> vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;

2022-09-08 09:08:28

by Guo Zhi

[permalink] [raw]
Subject: Re: [RFC v3 3/7] vsock: batch buffers in tx



----- Original Message -----
> From: "jasowang" <[email protected]>
> To: "Guo Zhi" <[email protected]>, "eperezma" <[email protected]>, "sgarzare" <[email protected]>, "Michael
> Tsirkin" <[email protected]>
> Cc: "netdev" <[email protected]>, "linux-kernel" <[email protected]>, "kvm list" <[email protected]>,
> "virtualization" <[email protected]>
> Sent: Wednesday, September 7, 2022 12:27:40 PM
> Subject: Re: [RFC v3 3/7] vsock: batch buffers in tx

> ?? 2022/9/1 13:54, Guo Zhi д??:
>> Vsock uses buffers in order, and for tx driver doesn't have to
>> know the length of the buffer. So we can do a batch for vsock if
>> in order negotiated, only write one used ring for a batch of buffers
>>
>> Signed-off-by: Guo Zhi <[email protected]>
>> ---
>> drivers/vhost/vsock.c | 12 ++++++++++--
>> 1 file changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>> index 368330417bde..e08fbbb5439e 100644
>> --- a/drivers/vhost/vsock.c
>> +++ b/drivers/vhost/vsock.c
>> @@ -497,7 +497,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work
>> *work)
>> struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
>> dev);
>> struct virtio_vsock_pkt *pkt;
>> - int head, pkts = 0, total_len = 0;
>> + int head, pkts = 0, total_len = 0, add = 0;
>> unsigned int out, in;
>> bool added = false;
>>
>> @@ -551,10 +551,18 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work
>> *work)
>> else
>> virtio_transport_free_pkt(pkt);
>>
>> - vhost_add_used(vq, head, 0);
>> + if (!vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>> + vhost_add_used(vq, head, 0);
>
>
> I'd do this step by step.
>
> 1) switch to use vhost_add_used_n() for vsock, less copy_to_user()
> better performance
> 2) do in-order on top
>
>
LGTM!, I think it is the correct way.

>> + } else {
>> + vq->heads[add].id = head;
>> + vq->heads[add++].len = 0;
>
>
> How can we guarantee that we are in the boundary of the heads array?
>
> Btw in the case of in-order we don't need to record the heads, instead
> we just need to know the head of the last buffer, it reduces the stress
> on the cache.
>
> Thanks
>
Yeah, I will change this and only copy last head for in order feature.

Thanks
>
>> + }
>> added = true;
>> } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
>>
>> + /* If in order feature negotiaged, we can do a batch to increase performance
>> */
>> + if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER) && added)
>> + vhost_add_used_n(vq, vq->heads, add);
>> no_more_replies:
>> if (added)
>> vhost_signal(&vsock->dev, vq);

2022-09-08 09:15:42

by Guo Zhi

[permalink] [raw]
Subject: Re: [RFC v3 6/7] virtio: in order support for virtio_ring



----- Original Message -----
> From: "jasowang" <[email protected]>
> To: "Guo Zhi" <[email protected]>, "eperezma" <[email protected]>, "sgarzare" <[email protected]>, "Michael
> Tsirkin" <[email protected]>
> Cc: "netdev" <[email protected]>, "linux-kernel" <[email protected]>, "kvm list" <[email protected]>,
> "virtualization" <[email protected]>
> Sent: Wednesday, September 7, 2022 1:38:03 PM
> Subject: Re: [RFC v3 6/7] virtio: in order support for virtio_ring

> ?? 2022/9/1 13:54, Guo Zhi д??:
>> If in order feature negotiated, we can skip the used ring to get
>> buffer's desc id sequentially. For skipped buffers in the batch, the
>> used ring doesn't contain the buffer length, actually there is not need
>> to get skipped buffers' length as they are tx buffer.
>>
>> Signed-off-by: Guo Zhi <[email protected]>
>> ---
>> drivers/virtio/virtio_ring.c | 74 +++++++++++++++++++++++++++++++-----
>> 1 file changed, 64 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> index 00aa4b7a49c2..d52624179b43 100644
>> --- a/drivers/virtio/virtio_ring.c
>> +++ b/drivers/virtio/virtio_ring.c
>> @@ -103,6 +103,9 @@ struct vring_virtqueue {
>> /* Host supports indirect buffers */
>> bool indirect;
>>
>> + /* Host supports in order feature */
>> + bool in_order;
>> +
>> /* Host publishes avail event idx */
>> bool event;
>>
>> @@ -144,6 +147,19 @@ struct vring_virtqueue {
>> /* DMA address and size information */
>> dma_addr_t queue_dma_addr;
>> size_t queue_size_in_bytes;
>> +
>> + /* If in_order feature is negotiated, here is the next head to consume */
>> + u16 next_desc_begin;
>> + /*
>> + * If in_order feature is negotiated,
>> + * here is the last descriptor's id in the batch
>> + */
>> + u16 last_desc_in_batch;
>> + /*
>> + * If in_order feature is negotiated,
>> + * buffers except last buffer in the batch are skipped buffer
>> + */
>> + bool is_skipped_buffer;
>> } split;
>>
>> /* Available for packed ring */
>> @@ -584,8 +600,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
>> total_sg * sizeof(struct vring_desc),
>> VRING_DESC_F_INDIRECT,
>> false);
>> - vq->split.desc_extra[head & (vq->split.vring.num - 1)].flags &=
>> - ~VRING_DESC_F_NEXT;
>
>
> This seems irrelevant.
>
We have to unmask VRING_DESC_F_NEXT, so that we can calculate the length of a descriptor chain
in get_buf_ctx_split.
Thanks.
>
>> }
>>
>> /* We're using some buffers from the free list. */
>> @@ -701,8 +715,16 @@ static void detach_buf_split(struct vring_virtqueue *vq,
>> unsigned int head,
>> }
>>
>> vring_unmap_one_split(vq, i);
>> - vq->split.desc_extra[i].next = vq->free_head;
>> - vq->free_head = head;
>> + /*
>> + * If in_order feature is negotiated,
>> + * the descriptors are made available in order.
>> + * Since the free_head is already a circular list,
>> + * it must consume it sequentially.
>> + */
>> + if (!vq->in_order) {
>> + vq->split.desc_extra[i].next = vq->free_head;
>> + vq->free_head = head;
>> + }
>>
>> /* Plus final descriptor */
>> vq->vq.num_free++;
>> @@ -744,7 +766,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
>> *_vq,
>> {
>> struct vring_virtqueue *vq = to_vvq(_vq);
>> void *ret;
>> - unsigned int i;
>> + unsigned int i, j;
>> u16 last_used;
>>
>> START_USE(vq);
>> @@ -763,11 +785,38 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
>> *_vq,
>> /* Only get used array entries after they have been exposed by host. */
>> virtio_rmb(vq->weak_barriers);
>>
>> - last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> - i = virtio32_to_cpu(_vq->vdev,
>> - vq->split.vring.used->ring[last_used].id);
>> - *len = virtio32_to_cpu(_vq->vdev,
>> - vq->split.vring.used->ring[last_used].len);
>> + if (vq->in_order) {
>> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>
>
> Let's move this beyond the in_order check.
>
Sorry for my mistake.
>
>> + if (!vq->split.is_skipped_buffer) {
>> + vq->split.last_desc_in_batch =
>> + virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].id);
>> + vq->split.is_skipped_buffer = true;
>> + }
>> + /* For skipped buffers in batch, we can ignore the len info, simply set len
>> as 0 */
>
>
> This seems to break the caller that depends on a correct len.
>

IMHO, we can do this because the device will only batch for skipped buffers which is tx.

>
>> + if (vq->split.next_desc_begin != vq->split.last_desc_in_batch) {
>> + *len = 0;
>> + } else {
>> + *len = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].len);
>> + vq->split.is_skipped_buffer = false;
>> + }
>> + i = vq->split.next_desc_begin;
>> + j = i;
>> + /* Indirect only takes one descriptor in descriptor table */
>> + while (!vq->indirect && (vq->split.desc_extra[j].flags & VRING_DESC_F_NEXT))
>> + j = (j + 1) & (vq->split.vring.num - 1);
>
>
> Any reason indirect descriptors can't be chained?
>
>
>> + /* move to next */
>> + j = (j + 1) % vq->split.vring.num;
>> + /* Next buffer will use this descriptor in order */
>> + vq->split.next_desc_begin = j;
>
>
> Is it more efficient to poke the available ring?
>
> Thanks
>
>
>> + } else {
>> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> + i = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].id);
>> + *len = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].len);
>> + }
>>
>> if (unlikely(i >= vq->split.vring.num)) {
>> BAD_RING(vq, "id %u out of range\n", i);
>> @@ -2223,6 +2272,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
>> index,
>>
>> vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
>> !context;
>> + vq->in_order = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER);
>> vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
>>
>> if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>> @@ -2235,6 +2285,10 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
>> index,
>> vq->split.avail_flags_shadow = 0;
>> vq->split.avail_idx_shadow = 0;
>>
>> + vq->split.next_desc_begin = 0;
>> + vq->split.last_desc_in_batch = 0;
>> + vq->split.is_skipped_buffer = false;
>> +
>> /* No callback? Tell other side not to bother us. */
>> if (!callback) {
>> vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;

2022-09-08 09:20:08

by Guo Zhi

[permalink] [raw]
Subject: Re: [RFC v3 1/7] vhost: expose used buffers



----- Original Message -----
> From: "jasowang" <[email protected]>
> To: "Guo Zhi" <[email protected]>, "eperezma" <[email protected]>, "sgarzare" <[email protected]>, "Michael
> Tsirkin" <[email protected]>
> Cc: "netdev" <[email protected]>, "linux-kernel" <[email protected]>, "kvm list" <[email protected]>,
> "virtualization" <[email protected]>
> Sent: Wednesday, September 7, 2022 12:21:06 PM
> Subject: Re: [RFC v3 1/7] vhost: expose used buffers

> ?? 2022/9/1 13:54, Guo Zhi д??:
>> Follow VIRTIO 1.1 spec, only writing out a single used ring for a batch
>> of descriptors.
>>
>> Signed-off-by: Guo Zhi <[email protected]>
>> ---
>> drivers/vhost/vhost.c | 16 +++++++++++++---
>> 1 file changed, 13 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 40097826cff0..26862c8bf751 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -2376,10 +2376,20 @@ static int __vhost_add_used_n(struct vhost_virtqueue
>> *vq,
>> vring_used_elem_t __user *used;
>> u16 old, new;
>> int start;
>> + int copy_n = count;
>>
>> + /**
>> + * If in order feature negotiated, devices can notify the use of a batch of
>> buffers to
>> + * the driver by only writing out a single used ring entry with the id
>> corresponding
>> + * to the head entry of the descriptor chain describing the last buffer in the
>> batch.
>> + */
>> + if (vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>> + copy_n = 1;
>> + heads = &heads[count - 1];
>> + }
>
>
> Would it better to have a dedicated helper like
> vhost_add_used_in_order() here?
>
That would be much more convenient and clear to implement.
I think have a dedicated function for in order feature in vhost is better.

>
>
>> start = vq->last_used_idx & (vq->num - 1);
>> used = vq->used->ring + start;
>> - if (vhost_put_used(vq, heads, start, count)) {
>> + if (vhost_put_used(vq, heads, start, copy_n)) {
>> vq_err(vq, "Failed to write used");
>> return -EFAULT;
>> }
>> @@ -2388,7 +2398,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
>> smp_wmb();
>> /* Log used ring entry write. */
>> log_used(vq, ((void __user *)used - (void __user *)vq->used),
>> - count * sizeof *used);
>> + copy_n * sizeof(*used));
>> }
>> old = vq->last_used_idx;
>> new = (vq->last_used_idx += count);
>> @@ -2410,7 +2420,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct
>> vring_used_elem *heads,
>>
>> start = vq->last_used_idx & (vq->num - 1);
>> n = vq->num - start;
>> - if (n < count) {
>> + if (n < count && !vhost_has_feature(vq, VIRTIO_F_IN_ORDER)) {
>
>
> This seems strange, any reason for this? (Actually if we support
> in-order we only need one used slot which fit for the case here)
>
> Thanks
>
If in order feature negotiated, even the count is larger than n,
we don't need to call __vhost_add_used_n again, because in order
only use one slot.

Thanks
>
>> r = __vhost_add_used_n(vq, heads, n);
>> if (r < 0)
>> return r;

2022-09-08 10:09:57

by Guo Zhi

[permalink] [raw]
Subject: Re: [RFC v3 6/7] virtio: in order support for virtio_ring



----- Original Message -----
> From: "jasowang" <[email protected]>
> To: "Guo Zhi" <[email protected]>, "eperezma" <[email protected]>, "sgarzare" <[email protected]>, "Michael
> Tsirkin" <[email protected]>
> Cc: "netdev" <[email protected]>, "linux-kernel" <[email protected]>, "kvm list" <[email protected]>,
> "virtualization" <[email protected]>
> Sent: Wednesday, September 7, 2022 1:38:03 PM
> Subject: Re: [RFC v3 6/7] virtio: in order support for virtio_ring

> ?? 2022/9/1 13:54, Guo Zhi д??:
>> If in order feature negotiated, we can skip the used ring to get
>> buffer's desc id sequentially. For skipped buffers in the batch, the
>> used ring doesn't contain the buffer length, actually there is not need
>> to get skipped buffers' length as they are tx buffer.
>>
>> Signed-off-by: Guo Zhi <[email protected]>
>> ---
>> drivers/virtio/virtio_ring.c | 74 +++++++++++++++++++++++++++++++-----
>> 1 file changed, 64 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> index 00aa4b7a49c2..d52624179b43 100644
>> --- a/drivers/virtio/virtio_ring.c
>> +++ b/drivers/virtio/virtio_ring.c
>> @@ -103,6 +103,9 @@ struct vring_virtqueue {
>> /* Host supports indirect buffers */
>> bool indirect;
>>
>> + /* Host supports in order feature */
>> + bool in_order;
>> +
>> /* Host publishes avail event idx */
>> bool event;
>>
>> @@ -144,6 +147,19 @@ struct vring_virtqueue {
>> /* DMA address and size information */
>> dma_addr_t queue_dma_addr;
>> size_t queue_size_in_bytes;
>> +
>> + /* If in_order feature is negotiated, here is the next head to consume */
>> + u16 next_desc_begin;
>> + /*
>> + * If in_order feature is negotiated,
>> + * here is the last descriptor's id in the batch
>> + */
>> + u16 last_desc_in_batch;
>> + /*
>> + * If in_order feature is negotiated,
>> + * buffers except last buffer in the batch are skipped buffer
>> + */
>> + bool is_skipped_buffer;
>> } split;
>>
>> /* Available for packed ring */
>> @@ -584,8 +600,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
>> total_sg * sizeof(struct vring_desc),
>> VRING_DESC_F_INDIRECT,
>> false);
>> - vq->split.desc_extra[head & (vq->split.vring.num - 1)].flags &=
>> - ~VRING_DESC_F_NEXT;
>
>
> This seems irrelevant.
>
>
I will put this change in another commit, this is due to my git rebase mistake.
Thanks.

>> }
>>
>> /* We're using some buffers from the free list. */
>> @@ -701,8 +715,16 @@ static void detach_buf_split(struct vring_virtqueue *vq,
>> unsigned int head,
>> }
>>
>> vring_unmap_one_split(vq, i);
>> - vq->split.desc_extra[i].next = vq->free_head;
>> - vq->free_head = head;
>> + /*
>> + * If in_order feature is negotiated,
>> + * the descriptors are made available in order.
>> + * Since the free_head is already a circular list,
>> + * it must consume it sequentially.
>> + */
>> + if (!vq->in_order) {
>> + vq->split.desc_extra[i].next = vq->free_head;
>> + vq->free_head = head;
>> + }
>>
>> /* Plus final descriptor */
>> vq->vq.num_free++;
>> @@ -744,7 +766,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
>> *_vq,
>> {
>> struct vring_virtqueue *vq = to_vvq(_vq);
>> void *ret;
>> - unsigned int i;
>> + unsigned int i, j;
>> u16 last_used;
>>
>> START_USE(vq);
>> @@ -763,11 +785,38 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
>> *_vq,
>> /* Only get used array entries after they have been exposed by host. */
>> virtio_rmb(vq->weak_barriers);
>>
>> - last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> - i = virtio32_to_cpu(_vq->vdev,
>> - vq->split.vring.used->ring[last_used].id);
>> - *len = virtio32_to_cpu(_vq->vdev,
>> - vq->split.vring.used->ring[last_used].len);
>> + if (vq->in_order) {
>> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>
>
> Let's move this beyond the in_order check.
>
>
>> + if (!vq->split.is_skipped_buffer) {
>> + vq->split.last_desc_in_batch =
>> + virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].id);
>> + vq->split.is_skipped_buffer = true;
>> + }
>> + /* For skipped buffers in batch, we can ignore the len info, simply set len
>> as 0 */
>
>
> This seems to break the caller that depends on a correct len.
>
>
>> + if (vq->split.next_desc_begin != vq->split.last_desc_in_batch) {
>> + *len = 0;
>> + } else {
>> + *len = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].len);
>> + vq->split.is_skipped_buffer = false;
>> + }
>> + i = vq->split.next_desc_begin;
>> + j = i;
>> + /* Indirect only takes one descriptor in descriptor table */
>> + while (!vq->indirect && (vq->split.desc_extra[j].flags & VRING_DESC_F_NEXT))
>> + j = (j + 1) & (vq->split.vring.num - 1);
>
>
> Any reason indirect descriptors can't be chained?
>
>
>> + /* move to next */
>> + j = (j + 1) % vq->split.vring.num;
>> + /* Next buffer will use this descriptor in order */
>> + vq->split.next_desc_begin = j;
>
>
> Is it more efficient to poke the available ring?
>
> Thanks
>
>
>> + } else {
>> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> + i = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].id);
>> + *len = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].len);
>> + }
>>
>> if (unlikely(i >= vq->split.vring.num)) {
>> BAD_RING(vq, "id %u out of range\n", i);
>> @@ -2223,6 +2272,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
>> index,
>>
>> vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
>> !context;
>> + vq->in_order = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER);
>> vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
>>
>> if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>> @@ -2235,6 +2285,10 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
>> index,
>> vq->split.avail_flags_shadow = 0;
>> vq->split.avail_idx_shadow = 0;
>>
>> + vq->split.next_desc_begin = 0;
>> + vq->split.last_desc_in_batch = 0;
>> + vq->split.is_skipped_buffer = false;
>> +
>> /* No callback? Tell other side not to bother us. */
>> if (!callback) {
>> vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;

2022-10-03 11:53:41

by Guo Zhi

[permalink] [raw]
Subject: Re: [RFC v3 6/7] virtio: in order support for virtio_ring



----- Original Message -----
> From: "jasowang" <[email protected]>
> To: "Guo Zhi" <[email protected]>, "eperezma" <[email protected]>, "sgarzare" <[email protected]>, "Michael
> Tsirkin" <[email protected]>
> Cc: "netdev" <[email protected]>, "linux-kernel" <[email protected]>, "kvm list" <[email protected]>,
> "virtualization" <[email protected]>
> Sent: Wednesday, September 7, 2022 1:38:03 PM
> Subject: Re: [RFC v3 6/7] virtio: in order support for virtio_ring

> ?? 2022/9/1 13:54, Guo Zhi д??:
>> If in order feature negotiated, we can skip the used ring to get
>> buffer's desc id sequentially. For skipped buffers in the batch, the
>> used ring doesn't contain the buffer length, actually there is not need
>> to get skipped buffers' length as they are tx buffer.
>>
>> Signed-off-by: Guo Zhi <[email protected]>
>> ---
>> drivers/virtio/virtio_ring.c | 74 +++++++++++++++++++++++++++++++-----
>> 1 file changed, 64 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> index 00aa4b7a49c2..d52624179b43 100644
>> --- a/drivers/virtio/virtio_ring.c
>> +++ b/drivers/virtio/virtio_ring.c
>> @@ -103,6 +103,9 @@ struct vring_virtqueue {
>> /* Host supports indirect buffers */
>> bool indirect;
>>
>> + /* Host supports in order feature */
>> + bool in_order;
>> +
>> /* Host publishes avail event idx */
>> bool event;
>>
>> @@ -144,6 +147,19 @@ struct vring_virtqueue {
>> /* DMA address and size information */
>> dma_addr_t queue_dma_addr;
>> size_t queue_size_in_bytes;
>> +
>> + /* If in_order feature is negotiated, here is the next head to consume */
>> + u16 next_desc_begin;
>> + /*
>> + * If in_order feature is negotiated,
>> + * here is the last descriptor's id in the batch
>> + */
>> + u16 last_desc_in_batch;
>> + /*
>> + * If in_order feature is negotiated,
>> + * buffers except last buffer in the batch are skipped buffer
>> + */
>> + bool is_skipped_buffer;
>> } split;
>>
>> /* Available for packed ring */
>> @@ -584,8 +600,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
>> total_sg * sizeof(struct vring_desc),
>> VRING_DESC_F_INDIRECT,
>> false);
>> - vq->split.desc_extra[head & (vq->split.vring.num - 1)].flags &=
>> - ~VRING_DESC_F_NEXT;
>
>
> This seems irrelevant.
>
>
>> }
>>
>> /* We're using some buffers from the free list. */
>> @@ -701,8 +715,16 @@ static void detach_buf_split(struct vring_virtqueue *vq,
>> unsigned int head,
>> }
>>
>> vring_unmap_one_split(vq, i);
>> - vq->split.desc_extra[i].next = vq->free_head;
>> - vq->free_head = head;
>> + /*
>> + * If in_order feature is negotiated,
>> + * the descriptors are made available in order.
>> + * Since the free_head is already a circular list,
>> + * it must consume it sequentially.
>> + */
>> + if (!vq->in_order) {
>> + vq->split.desc_extra[i].next = vq->free_head;
>> + vq->free_head = head;
>> + }
>>
>> /* Plus final descriptor */
>> vq->vq.num_free++;
>> @@ -744,7 +766,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
>> *_vq,
>> {
>> struct vring_virtqueue *vq = to_vvq(_vq);
>> void *ret;
>> - unsigned int i;
>> + unsigned int i, j;
>> u16 last_used;
>>
>> START_USE(vq);
>> @@ -763,11 +785,38 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
>> *_vq,
>> /* Only get used array entries after they have been exposed by host. */
>> virtio_rmb(vq->weak_barriers);
>>
>> - last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> - i = virtio32_to_cpu(_vq->vdev,
>> - vq->split.vring.used->ring[last_used].id);
>> - *len = virtio32_to_cpu(_vq->vdev,
>> - vq->split.vring.used->ring[last_used].len);
>> + if (vq->in_order) {
>> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>
>
> Let's move this beyond the in_order check.
>
>
>> + if (!vq->split.is_skipped_buffer) {
>> + vq->split.last_desc_in_batch =
>> + virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].id);
>> + vq->split.is_skipped_buffer = true;
>> + }
>> + /* For skipped buffers in batch, we can ignore the len info, simply set len
>> as 0 */
>
>
> This seems to break the caller that depends on a correct len.
>
>
>> + if (vq->split.next_desc_begin != vq->split.last_desc_in_batch) {
>> + *len = 0;
>> + } else {
>> + *len = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].len);
>> + vq->split.is_skipped_buffer = false;
>> + }
>> + i = vq->split.next_desc_begin;
>> + j = i;
>> + /* Indirect only takes one descriptor in descriptor table */
>> + while (!vq->indirect && (vq->split.desc_extra[j].flags & VRING_DESC_F_NEXT))
>> + j = (j + 1) & (vq->split.vring.num - 1);
>
>
> Any reason indirect descriptors can't be chained?
>

If the descriptor is indirect, we only take one slot in the descriptor ring. We are just calculating the number of entries here.
If indirect is chained, it still fill only one slot in the vring.

Thanks

>
>> + /* move to next */
>> + j = (j + 1) % vq->split.vring.num;
>> + /* Next buffer will use this descriptor in order */
>> + vq->split.next_desc_begin = j;
>
>
> Is it more efficient to poke the available ring?
>
> Thanks
>
The available ring is under the guest's control and it may modify it.
Access to avail vring would be another indirection too.

Thanks
>
>> + } else {
>> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> + i = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].id);
>> + *len = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].len);
>> + }
>>
>> if (unlikely(i >= vq->split.vring.num)) {
>> BAD_RING(vq, "id %u out of range\n", i);
>> @@ -2223,6 +2272,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
>> index,
>>
>> vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
>> !context;
>> + vq->in_order = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER);
>> vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
>>
>> if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
>> @@ -2235,6 +2285,10 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
>> index,
>> vq->split.avail_flags_shadow = 0;
>> vq->split.avail_idx_shadow = 0;
>>
>> + vq->split.next_desc_begin = 0;
>> + vq->split.last_desc_in_batch = 0;
>> + vq->split.is_skipped_buffer = false;
>> +
>> /* No callback? Tell other side not to bother us. */
>> if (!callback) {
>> vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;