If in order feature negotiated, we can skip the used ring to get
buffer's desc id sequentially.
Signed-off-by: Guo Zhi <[email protected]>
---
drivers/virtio/virtio_ring.c | 37 ++++++++++++++++++++++++++++--------
1 file changed, 29 insertions(+), 8 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a5ec724c0..4d57a4edc 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -144,6 +144,9 @@ struct vring_virtqueue {
/* DMA address and size information */
dma_addr_t queue_dma_addr;
size_t queue_size_in_bytes;
+
+ /* In order feature batch begin here */
+ u16 next_batch_desc_begin;
} split;
/* Available for packed ring */
@@ -700,8 +703,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
}
vring_unmap_one_split(vq, i);
- vq->split.desc_extra[i].next = vq->free_head;
- vq->free_head = head;
+ if (!virtio_has_feature(vq->vq.vdev, VIRTIO_F_IN_ORDER)) {
+ vq->split.desc_extra[i].next = vq->free_head;
+ vq->free_head = head;
+ }
/* Plus final descriptor */
vq->vq.num_free++;
@@ -743,7 +748,8 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
{
struct vring_virtqueue *vq = to_vvq(_vq);
void *ret;
- unsigned int i;
+ __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
+ unsigned int i, j;
u16 last_used;
START_USE(vq);
@@ -762,11 +768,24 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
/* Only get used array entries after they have been exposed by host. */
virtio_rmb(vq->weak_barriers);
- last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
- i = virtio32_to_cpu(_vq->vdev,
- vq->split.vring.used->ring[last_used].id);
- *len = virtio32_to_cpu(_vq->vdev,
- vq->split.vring.used->ring[last_used].len);
+ if (virtio_has_feature(_vq->vdev, VIRTIO_F_IN_ORDER)) {
+ /* Skip used ring and get used desc in order*/
+ i = vq->split.next_batch_desc_begin;
+ j = i;
+ while (vq->split.vring.desc[j].flags & nextflag)
+ j = (j + 1) % vq->split.vring.num;
+ /* move to next */
+ j = (j + 1) % vq->split.vring.num;
+ vq->split.next_batch_desc_begin = j;
+
+ /* TODO: len of buffer */
+ } else {
+ last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
+ i = virtio32_to_cpu(_vq->vdev,
+ vq->split.vring.used->ring[last_used].id);
+ *len = virtio32_to_cpu(_vq->vdev,
+ vq->split.vring.used->ring[last_used].len);
+ }
if (unlikely(i >= vq->split.vring.num)) {
BAD_RING(vq, "id %u out of range\n", i);
@@ -2234,6 +2253,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
vq->split.avail_flags_shadow = 0;
vq->split.avail_idx_shadow = 0;
+ vq->split.next_batch_desc_begin = 0;
+
/* No callback? Tell other side not to bother us. */
if (!callback) {
vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
--
2.17.1
在 2022/7/21 16:43, Guo Zhi 写道:
> If in order feature negotiated, we can skip the used ring to get
> buffer's desc id sequentially.
Let's rename the patch to something like "in order support for virtio_ring"
>
> Signed-off-by: Guo Zhi <[email protected]>
> ---
> drivers/virtio/virtio_ring.c | 37 ++++++++++++++++++++++++++++--------
> 1 file changed, 29 insertions(+), 8 deletions(-)
I don't see packed support in this patch, we need to implement that.
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index a5ec724c0..4d57a4edc 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -144,6 +144,9 @@ struct vring_virtqueue {
> /* DMA address and size information */
> dma_addr_t queue_dma_addr;
> size_t queue_size_in_bytes;
> +
> + /* In order feature batch begin here */
> + u16 next_batch_desc_begin;
> } split;
>
> /* Available for packed ring */
> @@ -700,8 +703,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> }
>
> vring_unmap_one_split(vq, i);
> - vq->split.desc_extra[i].next = vq->free_head;
> - vq->free_head = head;
> + if (!virtio_has_feature(vq->vq.vdev, VIRTIO_F_IN_ORDER)) {
> + vq->split.desc_extra[i].next = vq->free_head;
> + vq->free_head = head;
> + }
Let's add a comment to explain why we don't need anything if in order is
neogitated.
>
> /* Plus final descriptor */
> vq->vq.num_free++;
> @@ -743,7 +748,8 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> void *ret;
> - unsigned int i;
> + __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> + unsigned int i, j;
> u16 last_used;
>
> START_USE(vq);
> @@ -762,11 +768,24 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> /* Only get used array entries after they have been exposed by host. */
> virtio_rmb(vq->weak_barriers);
>
> - last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
> - i = virtio32_to_cpu(_vq->vdev,
> - vq->split.vring.used->ring[last_used].id);
> - *len = virtio32_to_cpu(_vq->vdev,
> - vq->split.vring.used->ring[last_used].len);
> + if (virtio_has_feature(_vq->vdev, VIRTIO_F_IN_ORDER)) {
> + /* Skip used ring and get used desc in order*/
> + i = vq->split.next_batch_desc_begin;
> + j = i;
> + while (vq->split.vring.desc[j].flags & nextflag)
Let's don't depend on the descriptor ring which is under the control of
the malicious hypervisor.
Let's use desc_extra that is not visible by the hypervisor. More can be
seen in this commit:
72b5e8958738 ("virtio-ring: store DMA metadata in desc_extra for split
virtqueue")
> + j = (j + 1) % vq->split.vring.num;
> + /* move to next */
> + j = (j + 1) % vq->split.vring.num;
> + vq->split.next_batch_desc_begin = j;
I'm not sure I get the logic here, basically I think we should check
buffer instead of descriptor here.
So if vring.used->ring[last_used].id != last_used, we know all
[last_used, vring.used->ring[last_used].id] have been used in a batch?
> +
> + /* TODO: len of buffer */
So spec said:
"
The skipped buffers (for which no used ring entry was written) are
assumed to have been used (read or written) by the device completely.
"
Thanks
> + } else {
> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
> + i = virtio32_to_cpu(_vq->vdev,
> + vq->split.vring.used->ring[last_used].id);
> + *len = virtio32_to_cpu(_vq->vdev,
> + vq->split.vring.used->ring[last_used].len);
> + }
>
> if (unlikely(i >= vq->split.vring.num)) {
> BAD_RING(vq, "id %u out of range\n", i);
> @@ -2234,6 +2253,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
> vq->split.avail_flags_shadow = 0;
> vq->split.avail_idx_shadow = 0;
>
> + vq->split.next_batch_desc_begin = 0;
> +
> /* No callback? Tell other side not to bother us. */
> if (!callback) {
> vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
On 2022/7/26 16:07, Jason Wang wrote:
>
> 在 2022/7/21 16:43, Guo Zhi 写道:
>> If in order feature negotiated, we can skip the used ring to get
>> buffer's desc id sequentially.
>
>
> Let's rename the patch to something like "in order support for
> virtio_ring"
>
>
>>
>> Signed-off-by: Guo Zhi <[email protected]>
>> ---
>> drivers/virtio/virtio_ring.c | 37 ++++++++++++++++++++++++++++--------
>> 1 file changed, 29 insertions(+), 8 deletions(-)
>
>
> I don't see packed support in this patch, we need to implement that.
>
It will be implemented later.
>
>>
>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> index a5ec724c0..4d57a4edc 100644
>> --- a/drivers/virtio/virtio_ring.c
>> +++ b/drivers/virtio/virtio_ring.c
>> @@ -144,6 +144,9 @@ struct vring_virtqueue {
>> /* DMA address and size information */
>> dma_addr_t queue_dma_addr;
>> size_t queue_size_in_bytes;
>> +
>> + /* In order feature batch begin here */
>> + u16 next_batch_desc_begin;
>> } split;
>> /* Available for packed ring */
>> @@ -700,8 +703,10 @@ static void detach_buf_split(struct
>> vring_virtqueue *vq, unsigned int head,
>> }
>> vring_unmap_one_split(vq, i);
>> - vq->split.desc_extra[i].next = vq->free_head;
>> - vq->free_head = head;
>> + if (!virtio_has_feature(vq->vq.vdev, VIRTIO_F_IN_ORDER)) {
>> + vq->split.desc_extra[i].next = vq->free_head;
>> + vq->free_head = head;
>> + }
>
>
> Let's add a comment to explain why we don't need anything if in order
> is neogitated.
>
LGTM.
>
>> /* Plus final descriptor */
>> vq->vq.num_free++;
>> @@ -743,7 +748,8 @@ static void *virtqueue_get_buf_ctx_split(struct
>> virtqueue *_vq,
>> {
>> struct vring_virtqueue *vq = to_vvq(_vq);
>> void *ret;
>> - unsigned int i;
>> + __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev,
>> VRING_DESC_F_NEXT);
>> + unsigned int i, j;
>> u16 last_used;
>> START_USE(vq);
>> @@ -762,11 +768,24 @@ static void *virtqueue_get_buf_ctx_split(struct
>> virtqueue *_vq,
>> /* Only get used array entries after they have been exposed by
>> host. */
>> virtio_rmb(vq->weak_barriers);
>> - last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> - i = virtio32_to_cpu(_vq->vdev,
>> - vq->split.vring.used->ring[last_used].id);
>> - *len = virtio32_to_cpu(_vq->vdev,
>> - vq->split.vring.used->ring[last_used].len);
>> + if (virtio_has_feature(_vq->vdev, VIRTIO_F_IN_ORDER)) {
>> + /* Skip used ring and get used desc in order*/
>> + i = vq->split.next_batch_desc_begin;
>> + j = i;
>> + while (vq->split.vring.desc[j].flags & nextflag)
>
>
> Let's don't depend on the descriptor ring which is under the control
> of the malicious hypervisor.
>
> Let's use desc_extra that is not visible by the hypervisor. More can
> be seen in this commit:
>
> 72b5e8958738 ("virtio-ring: store DMA metadata in desc_extra for split
> virtqueue")
>
LGTM, I will use desc_extra in new version patch.
>
>> + j = (j + 1) % vq->split.vring.num;
>> + /* move to next */
>> + j = (j + 1) % vq->split.vring.num;
>> + vq->split.next_batch_desc_begin = j;
>
>
> I'm not sure I get the logic here, basically I think we should check
> buffer instead of descriptor here.
Because the vq->last_used_idx != vq->split.vring.used->idx, So the
virtio driver know these has at least one used descriptor. the
descriptor's id is vq->split.next_batch_desc_begin because of in order.
Then we have to traverse the descriptor chain and point
vq->split.next_batch_desc_begin to next used descriptor.
Thanks.
>
> So if vring.used->ring[last_used].id != last_used, we know all
> [last_used, vring.used->ring[last_used].id] have been used in a batch?
>
>
>> +
>> + /* TODO: len of buffer */
>
>
> So spec said:
>
> "
>
> The skipped buffers (for which no used ring entry was written) are
> assumed to have been used (read or written) by the device completely.
>
>
> "
>
> Thanks
>
The driver will need len in used ring to get buffer size. However in
order will not write len of each buffer in used ring. So I will tried
pass len of buffer in device header.
>
>> + } else {
>> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> + i = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].id);
>> + *len = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].len);
>> + }
>> if (unlikely(i >= vq->split.vring.num)) {
>> BAD_RING(vq, "id %u out of range\n", i);
>> @@ -2234,6 +2253,8 @@ struct virtqueue
>> *__vring_new_virtqueue(unsigned int index,
>> vq->split.avail_flags_shadow = 0;
>> vq->split.avail_idx_shadow = 0;
>> + vq->split.next_batch_desc_begin = 0;
>> +
>> /* No callback? Tell other side not to bother us. */
>> if (!callback) {
>> vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
>
----- Original Message -----
> From: "jasowang" <[email protected]>
> To: "Guo Zhi" <[email protected]>, "eperezma" <[email protected]>, "sgarzare" <[email protected]>, "Michael
> Tsirkin" <[email protected]>
> Cc: "netdev" <[email protected]>, "linux-kernel" <[email protected]>, "kvm list" <[email protected]>,
> "virtualization" <[email protected]>
> Sent: Tuesday, July 26, 2022 4:07:46 PM
> Subject: Re: [RFC 4/5] virtio: get desc id in order
> ?? 2022/7/21 16:43, Guo Zhi д??:
>> If in order feature negotiated, we can skip the used ring to get
>> buffer's desc id sequentially.
>
>
> Let's rename the patch to something like "in order support for virtio_ring"
>
>
>>
>> Signed-off-by: Guo Zhi <[email protected]>
>> ---
>> drivers/virtio/virtio_ring.c | 37 ++++++++++++++++++++++++++++--------
>> 1 file changed, 29 insertions(+), 8 deletions(-)
>
>
> I don't see packed support in this patch, we need to implement that.
>
>
>>
>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> index a5ec724c0..4d57a4edc 100644
>> --- a/drivers/virtio/virtio_ring.c
>> +++ b/drivers/virtio/virtio_ring.c
>> @@ -144,6 +144,9 @@ struct vring_virtqueue {
>> /* DMA address and size information */
>> dma_addr_t queue_dma_addr;
>> size_t queue_size_in_bytes;
>> +
>> + /* In order feature batch begin here */
>> + u16 next_batch_desc_begin;
>> } split;
>>
>> /* Available for packed ring */
>> @@ -700,8 +703,10 @@ static void detach_buf_split(struct vring_virtqueue *vq,
>> unsigned int head,
>> }
>>
>> vring_unmap_one_split(vq, i);
>> - vq->split.desc_extra[i].next = vq->free_head;
>> - vq->free_head = head;
>> + if (!virtio_has_feature(vq->vq.vdev, VIRTIO_F_IN_ORDER)) {
>> + vq->split.desc_extra[i].next = vq->free_head;
>> + vq->free_head = head;
>> + }
>
>
> Let's add a comment to explain why we don't need anything if in order is
> neogitated.
>
>
>>
>> /* Plus final descriptor */
>> vq->vq.num_free++;
>> @@ -743,7 +748,8 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
>> *_vq,
>> {
>> struct vring_virtqueue *vq = to_vvq(_vq);
>> void *ret;
>> - unsigned int i;
>> + __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
>> + unsigned int i, j;
>> u16 last_used;
>>
>> START_USE(vq);
>> @@ -762,11 +768,24 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
>> *_vq,
>> /* Only get used array entries after they have been exposed by host. */
>> virtio_rmb(vq->weak_barriers);
>>
>> - last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> - i = virtio32_to_cpu(_vq->vdev,
>> - vq->split.vring.used->ring[last_used].id);
>> - *len = virtio32_to_cpu(_vq->vdev,
>> - vq->split.vring.used->ring[last_used].len);
>> + if (virtio_has_feature(_vq->vdev, VIRTIO_F_IN_ORDER)) {
>> + /* Skip used ring and get used desc in order*/
>> + i = vq->split.next_batch_desc_begin;
>> + j = i;
>> + while (vq->split.vring.desc[j].flags & nextflag)
>
>
> Let's don't depend on the descriptor ring which is under the control of
> the malicious hypervisor.
>
> Let's use desc_extra that is not visible by the hypervisor. More can be
> seen in this commit:
>
> 72b5e8958738 ("virtio-ring: store DMA metadata in desc_extra for split
> virtqueue")
>
>
>> + j = (j + 1) % vq->split.vring.num;
>> + /* move to next */
>> + j = (j + 1) % vq->split.vring.num;
>> + vq->split.next_batch_desc_begin = j;
>
>
> I'm not sure I get the logic here, basically I think we should check
> buffer instead of descriptor here.
I's sorry I don't understand this comment.
In order means device use descriptors in the same order as they been available.
So we should iterate the descriptor table and calculte the next desc which will be used,
because we don't use used ring now.
>
> So if vring.used->ring[last_used].id != last_used, we know all
> [last_used, vring.used->ring[last_used].id] have been used in a batch?
>
We don't use used ring for in order feature.
N descriptors in descriptor table from vq->split.next_batch_desc_begin have been used.
N is vq->split.vring.used->idx - vq->last_used_idx (haven't consider ring problem for short).
>
>> +
>> + /* TODO: len of buffer */
>
>
> So spec said:
>
> "
>
> The skipped buffers (for which no used ring entry was written) are
> assumed to have been used (read or written) by the device completely.
>
>
> "
>
> Thanks
>
>
>> + } else {
>> + last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
>> + i = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].id);
>> + *len = virtio32_to_cpu(_vq->vdev,
>> + vq->split.vring.used->ring[last_used].len);
>> + }
>>
>> if (unlikely(i >= vq->split.vring.num)) {
>> BAD_RING(vq, "id %u out of range\n", i);
>> @@ -2234,6 +2253,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
>> index,
>> vq->split.avail_flags_shadow = 0;
>> vq->split.avail_idx_shadow = 0;
>>
>> + vq->split.next_batch_desc_begin = 0;
>> +
>> /* No callback? Tell other side not to bother us. */
>> if (!callback) {
>> vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;