Hello everyone,
This RFC implements packed ring support in virtio driver.
Some simple functional tests have been done with Jason's
packed ring implementation in vhost:
https://lkml.org/lkml/2018/4/23/12
Both of ping and netperf worked as expected (with EVENT_IDX
disabled). But there are below known issues:
1. Reloading the guest driver will break the Tx/Rx;
2. Zeroing the flags when detaching a used desc will
break the guest -> host path.
Some simple functional tests have also been done with
Wei's packed ring implementation in QEMU:
http://lists.nongnu.org/archive/html/qemu-devel/2018-04/msg00342.html
Both of ping and netperf worked as expected (with EVENT_IDX
disabled). Reloading the guest driver also worked as expected.
TODO:
- Refinements (for code and commit log) and bug fixes;
- Discuss/fix/test EVENT_IDX support;
- Test devices other than net;
RFC v2 -> RFC v3:
- Split into small patches (Jason);
- Add helper virtqueue_use_indirect() (Jason);
- Just set id for the last descriptor of a list (Jason);
- Calculate the prev in virtqueue_add_packed() (Jason);
- Fix/improve desc suppression code (Jason/MST);
- Refine the code layout for XXX_split/packed and wrappers (MST);
- Fix the comments and API in uapi (MST);
- Remove the BUG_ON() for indirect (Jason);
- Some other refinements and bug fixes;
RFC v1 -> RFC v2:
- Add indirect descriptor support - compile test only;
- Add event suppression supprt - compile test only;
- Move vring_packed_init() out of uapi (Jason, MST);
- Merge two loops into one in virtqueue_add_packed() (Jason);
- Split vring_unmap_one() for packed ring and split ring (Jason);
- Avoid using '%' operator (Jason);
- Rename free_head -> next_avail_idx (Jason);
- Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
- Some other refinements and bug fixes;
Thanks!
Tiwei Bie (5):
virtio: add packed ring definitions
virtio_ring: support creating packed ring
virtio_ring: add packed ring support
virtio_ring: add event idx support in packed ring
virtio_ring: enable packed ring
drivers/virtio/virtio_ring.c | 1271 ++++++++++++++++++++++++++++--------
include/linux/virtio_ring.h | 8 +-
include/uapi/linux/virtio_config.h | 12 +-
include/uapi/linux/virtio_ring.h | 36 +
4 files changed, 1049 insertions(+), 278 deletions(-)
--
2.11.0
This commit introduces the event idx support in packed
ring. This feature is temporarily disabled, because the
implementation in this patch may not work as expected,
and some further discussions on the implementation are
needed, e.g. do we have to check the wrap counter when
checking whether a kick is needed?
Signed-off-by: Tiwei Bie <[email protected]>
---
drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++++++----
1 file changed, 49 insertions(+), 4 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 0181e93897be..b1039c2985b9 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -986,7 +986,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- u16 flags;
+ u16 new, old, off_wrap, flags;
bool needs_kick;
u32 snapshot;
@@ -995,7 +995,12 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
* suppressions. */
virtio_mb(vq->weak_barriers);
+ old = vq->next_avail_idx - vq->num_added;
+ new = vq->next_avail_idx;
+ vq->num_added = 0;
+
snapshot = *(u32 *)vq->vring_packed.device;
+ off_wrap = virtio16_to_cpu(_vq->vdev, snapshot & 0xffff);
flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
#ifdef DEBUG
@@ -1006,7 +1011,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
vq->last_add_time_valid = false;
#endif
- needs_kick = (flags != VRING_EVENT_F_DISABLE);
+ if (flags == VRING_EVENT_F_DESC)
+ needs_kick = vring_need_event(off_wrap & ~(1<<15), new, old);
+ else
+ needs_kick = (flags != VRING_EVENT_F_DISABLE);
END_USE(vq);
return needs_kick;
}
@@ -1116,6 +1124,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
if (vq->last_used_idx >= vq->vring_packed.num)
vq->last_used_idx -= vq->vring_packed.num;
+ /* If we expect an interrupt for the next entry, tell host
+ * by writing event index and flush out the write before
+ * the read in the next get_buf call. */
+ if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
+ virtio_store_mb(vq->weak_barriers,
+ &vq->vring_packed.driver->off_wrap,
+ cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
+ (vq->wrap_counter << 15)));
+
#ifdef DEBUG
vq->last_add_time_valid = false;
#endif
@@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
/* We optimistically turn back on interrupts, then check if there was
* more to do. */
+ /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
+ * either clear the flags bit or point the event index at the next
+ * entry. Always update the event index to keep code simple. */
+
+ vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
+ vq->last_used_idx | (vq->wrap_counter << 15));
if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
virtio_wmb(vq->weak_barriers);
- vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
+ vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
+ VRING_EVENT_F_ENABLE;
vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
vq->event_flags_shadow);
}
@@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
+ u16 bufs, used_idx, wrap_counter;
START_USE(vq);
/* We optimistically turn back on interrupts, then check if there was
* more to do. */
+ /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
+ * either clear the flags bit or point the event index at the next
+ * entry. Always update the event index to keep code simple. */
+
+ /* TODO: tune this threshold */
+ bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
+
+ used_idx = vq->last_used_idx + bufs;
+ wrap_counter = vq->wrap_counter;
+
+ if (used_idx >= vq->vring_packed.num) {
+ used_idx -= vq->vring_packed.num;
+ wrap_counter ^= 1;
+ }
+
+ vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
+ used_idx | (wrap_counter << 15));
if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
virtio_wmb(vq->weak_barriers);
- vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
+ vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
+ VRING_EVENT_F_ENABLE;
vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
vq->event_flags_shadow);
}
@@ -1822,8 +1865,10 @@ void vring_transport_features(struct virtio_device *vdev)
switch (i) {
case VIRTIO_RING_F_INDIRECT_DESC:
break;
+#if 0
case VIRTIO_RING_F_EVENT_IDX:
break;
+#endif
case VIRTIO_F_VERSION_1:
break;
case VIRTIO_F_IOMMU_PLATFORM:
--
2.11.0
This commit introduces the basic support (without EVENT_IDX)
for packed ring.
Signed-off-by: Tiwei Bie <[email protected]>
---
drivers/virtio/virtio_ring.c | 444 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 434 insertions(+), 10 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index e164822ca66e..0181e93897be 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -58,7 +58,8 @@
struct vring_desc_state {
void *data; /* Data for callback. */
- struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
+ void *indir_desc; /* Indirect descriptor, if any. */
+ int num; /* Descriptor list length. */
};
struct vring_virtqueue {
@@ -142,6 +143,16 @@ struct vring_virtqueue {
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
+static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
+ unsigned int total_sg)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ /* If the host supports indirect descriptor tables, and we have multiple
+ * buffers, then go indirect. FIXME: tune this threshold */
+ return (vq->indirect && total_sg > 1 && vq->vq.num_free);
+}
+
/*
* Modern virtio devices have feature bits to specify whether they need a
* quirk and bypass the IOMMU. If not there, just use the DMA API.
@@ -327,9 +338,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
head = vq->free_head;
- /* If the host supports indirect descriptor tables, and we have multiple
- * buffers, then go indirect. FIXME: tune this threshold */
- if (vq->indirect && total_sg > 1 && vq->vq.num_free)
+ if (virtqueue_use_indirect(_vq, total_sg))
desc = alloc_indirect_split(_vq, total_sg, gfp);
else {
desc = NULL;
@@ -741,6 +750,49 @@ static inline unsigned vring_size_packed(unsigned int num, unsigned long align)
& ~(align - 1)) + sizeof(struct vring_packed_desc_event) * 2;
}
+static void vring_unmap_one_packed(const struct vring_virtqueue *vq,
+ struct vring_packed_desc *desc)
+{
+ u16 flags;
+
+ if (!vring_use_dma_api(vq->vq.vdev))
+ return;
+
+ flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
+
+ if (flags & VRING_DESC_F_INDIRECT) {
+ dma_unmap_single(vring_dma_dev(vq),
+ virtio64_to_cpu(vq->vq.vdev, desc->addr),
+ virtio32_to_cpu(vq->vq.vdev, desc->len),
+ (flags & VRING_DESC_F_WRITE) ?
+ DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ } else {
+ dma_unmap_page(vring_dma_dev(vq),
+ virtio64_to_cpu(vq->vq.vdev, desc->addr),
+ virtio32_to_cpu(vq->vq.vdev, desc->len),
+ (flags & VRING_DESC_F_WRITE) ?
+ DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ }
+}
+
+static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
+ unsigned int total_sg,
+ gfp_t gfp)
+{
+ struct vring_packed_desc *desc;
+
+ /*
+ * We require lowmem mappings for the descriptors because
+ * otherwise virt_to_phys will give us bogus addresses in the
+ * virtqueue.
+ */
+ gfp &= ~__GFP_HIGHMEM;
+
+ desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);
+
+ return desc;
+}
+
static inline int virtqueue_add_packed(struct virtqueue *_vq,
struct scatterlist *sgs[],
unsigned int total_sg,
@@ -750,47 +802,419 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
void *ctx,
gfp_t gfp)
{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct vring_packed_desc *desc;
+ struct scatterlist *sg;
+ unsigned int i, n, descs_used, uninitialized_var(prev), err_idx;
+ __virtio16 uninitialized_var(head_flags), flags;
+ int head, wrap_counter;
+ bool indirect;
+
+ START_USE(vq);
+
+ BUG_ON(data == NULL);
+ BUG_ON(ctx && vq->indirect);
+
+ if (unlikely(vq->broken)) {
+ END_USE(vq);
+ return -EIO;
+ }
+
+#ifdef DEBUG
+ {
+ ktime_t now = ktime_get();
+
+ /* No kick or get, with .1 second between? Warn. */
+ if (vq->last_add_time_valid)
+ WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
+ > 100);
+ vq->last_add_time = now;
+ vq->last_add_time_valid = true;
+ }
+#endif
+
+ BUG_ON(total_sg == 0);
+
+ head = vq->next_avail_idx;
+ wrap_counter = vq->wrap_counter;
+
+ if (virtqueue_use_indirect(_vq, total_sg))
+ desc = alloc_indirect_packed(_vq, total_sg, gfp);
+ else {
+ desc = NULL;
+ WARN_ON_ONCE(total_sg > vq->vring_packed.num && !vq->indirect);
+ }
+
+ if (desc) {
+ /* Use a single buffer which doesn't continue */
+ indirect = true;
+ /* Set up rest to use this indirect table. */
+ i = 0;
+ descs_used = 1;
+ } else {
+ indirect = false;
+ desc = vq->vring_packed.desc;
+ i = head;
+ descs_used = total_sg;
+ }
+
+ if (vq->vq.num_free < descs_used) {
+ pr_debug("Can't add buf len %i - avail = %i\n",
+ descs_used, vq->vq.num_free);
+ /* FIXME: for historical reasons, we force a notify here if
+ * there are outgoing parts to the buffer. Presumably the
+ * host should service the ring ASAP. */
+ if (out_sgs)
+ vq->notify(&vq->vq);
+ if (indirect)
+ kfree(desc);
+ END_USE(vq);
+ return -ENOSPC;
+ }
+
+ for (n = 0; n < out_sgs + in_sgs; n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ if (vring_mapping_error(vq, addr))
+ goto unmap_release;
+
+ flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT |
+ (n < out_sgs ? 0 : VRING_DESC_F_WRITE) |
+ VRING_DESC_F_AVAIL(vq->wrap_counter) |
+ VRING_DESC_F_USED(!vq->wrap_counter));
+ if (!indirect && i == head)
+ head_flags = flags;
+ else
+ desc[i].flags = flags;
+
+ desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
+ desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
+ i++;
+ if (!indirect && i >= vq->vring_packed.num) {
+ i = 0;
+ vq->wrap_counter ^= 1;
+ }
+ }
+ }
+
+ prev = (i > 0 ? i : vq->vring_packed.num) - 1;
+ desc[prev].id = cpu_to_virtio32(_vq->vdev, head);
+
+ /* Last one doesn't continue. */
+ if (total_sg == 1)
+ head_flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
+ else
+ desc[prev].flags &= cpu_to_virtio16(_vq->vdev,
+ ~VRING_DESC_F_NEXT);
+
+ if (indirect) {
+ /* Now that the indirect table is filled in, map it. */
+ dma_addr_t addr = vring_map_single(
+ vq, desc, total_sg * sizeof(struct vring_packed_desc),
+ DMA_TO_DEVICE);
+ if (vring_mapping_error(vq, addr))
+ goto unmap_release;
+
+ head_flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT |
+ VRING_DESC_F_AVAIL(wrap_counter) |
+ VRING_DESC_F_USED(!wrap_counter));
+ vq->vring_packed.desc[head].addr = cpu_to_virtio64(_vq->vdev,
+ addr);
+ vq->vring_packed.desc[head].len = cpu_to_virtio32(_vq->vdev,
+ total_sg * sizeof(struct vring_packed_desc));
+ vq->vring_packed.desc[head].id = cpu_to_virtio32(_vq->vdev,
+ head);
+ }
+
+ /* We're using some buffers from the free list. */
+ vq->vq.num_free -= descs_used;
+
+ /* Update free pointer */
+ if (indirect) {
+ n = head + 1;
+ if (n >= vq->vring_packed.num) {
+ n = 0;
+ vq->wrap_counter ^= 1;
+ }
+ vq->next_avail_idx = n;
+ } else
+ vq->next_avail_idx = i;
+
+ /* Store token and indirect buffer state. */
+ vq->desc_state[head].num = descs_used;
+ vq->desc_state[head].data = data;
+ if (indirect)
+ vq->desc_state[head].indir_desc = desc;
+ else
+ vq->desc_state[head].indir_desc = ctx;
+
+ /* A driver MUST NOT make the first descriptor in the list
+ * available before all subsequent descriptors comprising
+ * the list are made available. */
+ virtio_wmb(vq->weak_barriers);
+ vq->vring_packed.desc[head].flags = head_flags;
+ vq->num_added += descs_used;
+
+ pr_debug("Added buffer head %i to %p\n", head, vq);
+ END_USE(vq);
+
+ return 0;
+
+unmap_release:
+ err_idx = i;
+ i = head;
+
+ for (n = 0; n < total_sg; n++) {
+ if (i == err_idx)
+ break;
+ vring_unmap_one_packed(vq, &desc[i]);
+ i++;
+ if (!indirect && i >= vq->vring_packed.num)
+ i = 0;
+ }
+
+ vq->wrap_counter = wrap_counter;
+
+ if (indirect)
+ kfree(desc);
+
+ END_USE(vq);
return -EIO;
}
static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
{
- return false;
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ u16 flags;
+ bool needs_kick;
+ u32 snapshot;
+
+ START_USE(vq);
+ /* We need to expose the new flags value before checking notification
+ * suppressions. */
+ virtio_mb(vq->weak_barriers);
+
+ snapshot = *(u32 *)vq->vring_packed.device;
+ flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
+
+#ifdef DEBUG
+ if (vq->last_add_time_valid) {
+ WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
+ vq->last_add_time)) > 100);
+ }
+ vq->last_add_time_valid = false;
+#endif
+
+ needs_kick = (flags != VRING_EVENT_F_DISABLE);
+ END_USE(vq);
+ return needs_kick;
+}
+
+static void detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
+ void **ctx)
+{
+ struct vring_packed_desc *desc;
+ unsigned int i, j;
+
+ /* Clear data ptr. */
+ vq->desc_state[head].data = NULL;
+
+ i = head;
+
+ for (j = 0; j < vq->desc_state[head].num; j++) {
+ desc = &vq->vring_packed.desc[i];
+ vring_unmap_one_packed(vq, desc);
+ i++;
+ if (i >= vq->vring_packed.num)
+ i = 0;
+ }
+
+ vq->vq.num_free += vq->desc_state[head].num;
+
+ if (vq->indirect) {
+ u32 len;
+
+ /* Free the indirect table, if any, now that it's unmapped. */
+ desc = vq->desc_state[head].indir_desc;
+ if (!desc)
+ return;
+
+ len = virtio32_to_cpu(vq->vq.vdev,
+ vq->vring_packed.desc[head].len);
+
+ for (j = 0; j < len / sizeof(struct vring_packed_desc); j++)
+ vring_unmap_one_packed(vq, &desc[j]);
+
+ kfree(desc);
+ vq->desc_state[head].indir_desc = NULL;
+ } else if (ctx) {
+ *ctx = vq->desc_state[head].indir_desc;
+ }
}
static inline bool more_used_packed(const struct vring_virtqueue *vq)
{
- return false;
+ u16 last_used, flags;
+ bool avail, used;
+
+ if (vq->vq.num_free == vq->vring_packed.num)
+ return false;
+
+ last_used = vq->last_used_idx;
+ flags = virtio16_to_cpu(vq->vq.vdev,
+ vq->vring_packed.desc[last_used].flags);
+ avail = flags & VRING_DESC_F_AVAIL(1);
+ used = flags & VRING_DESC_F_USED(1);
+
+ return avail == used;
}
static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
unsigned int *len,
void **ctx)
{
- return NULL;
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ void *ret;
+ unsigned int i;
+ u16 last_used;
+
+ START_USE(vq);
+
+ if (unlikely(vq->broken)) {
+ END_USE(vq);
+ return NULL;
+ }
+
+ if (!more_used_packed(vq)) {
+ pr_debug("No more buffers in queue\n");
+ END_USE(vq);
+ return NULL;
+ }
+
+ /* Only get used elements after they have been exposed by host. */
+ virtio_rmb(vq->weak_barriers);
+
+ last_used = vq->last_used_idx;
+ i = virtio32_to_cpu(_vq->vdev, vq->vring_packed.desc[last_used].id);
+ *len = virtio32_to_cpu(_vq->vdev, vq->vring_packed.desc[last_used].len);
+
+ if (unlikely(i >= vq->vring_packed.num)) {
+ BAD_RING(vq, "id %u out of range\n", i);
+ return NULL;
+ }
+ if (unlikely(!vq->desc_state[i].data)) {
+ BAD_RING(vq, "id %u is not a head!\n", i);
+ return NULL;
+ }
+
+ /* detach_buf_packed clears data, so grab it now. */
+ ret = vq->desc_state[i].data;
+ detach_buf_packed(vq, i, ctx);
+
+ vq->last_used_idx += vq->desc_state[i].num;
+ if (vq->last_used_idx >= vq->vring_packed.num)
+ vq->last_used_idx -= vq->vring_packed.num;
+
+#ifdef DEBUG
+ vq->last_add_time_valid = false;
+#endif
+
+ END_USE(vq);
+ return ret;
}
static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ if (vq->event_flags_shadow != VRING_EVENT_F_DISABLE) {
+ vq->event_flags_shadow = VRING_EVENT_F_DISABLE;
+ vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
+ vq->event_flags_shadow);
+ }
}
static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
{
- return 0;
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ START_USE(vq);
+
+ /* We optimistically turn back on interrupts, then check if there was
+ * more to do. */
+
+ if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
+ virtio_wmb(vq->weak_barriers);
+ vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
+ vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
+ vq->event_flags_shadow);
+ }
+
+ END_USE(vq);
+ return vq->last_used_idx;
}
static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
{
- return false;
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ bool avail, used;
+ u16 flags;
+
+ virtio_mb(vq->weak_barriers);
+ flags = virtio16_to_cpu(vq->vq.vdev,
+ vq->vring_packed.desc[last_used_idx].flags);
+ avail = flags & VRING_DESC_F_AVAIL(1);
+ used = flags & VRING_DESC_F_USED(1);
+ return avail == used;
}
static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
{
- return false;
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ START_USE(vq);
+
+ /* We optimistically turn back on interrupts, then check if there was
+ * more to do. */
+
+ if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
+ virtio_wmb(vq->weak_barriers);
+ vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
+ vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
+ vq->event_flags_shadow);
+ }
+
+ if (more_used_packed(vq)) {
+ END_USE(vq);
+ return false;
+ }
+
+ END_USE(vq);
+ return true;
}
static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ unsigned int i;
+ void *buf;
+
+ START_USE(vq);
+
+ for (i = 0; i < vq->vring_packed.num; i++) {
+ if (!vq->desc_state[i].data)
+ continue;
+ /* detach_buf clears data, so grab it now. */
+ buf = vq->desc_state[i].data;
+ detach_buf_packed(vq, i, NULL);
+ END_USE(vq);
+ return buf;
+ }
+ /* That should have freed everything. */
+ BUG_ON(vq->vq.num_free != vq->vring_packed.num);
+
+ END_USE(vq);
return NULL;
}
--
2.11.0
This commit introduces the support for creating packed ring.
All split ring specific functions are added _split suffix.
Some necessary stubs for packed ring are also added.
Signed-off-by: Tiwei Bie <[email protected]>
---
drivers/virtio/virtio_ring.c | 764 ++++++++++++++++++++++++++++---------------
include/linux/virtio_ring.h | 8 +-
2 files changed, 513 insertions(+), 259 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 71458f493cf8..e164822ca66e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -64,8 +64,8 @@ struct vring_desc_state {
struct vring_virtqueue {
struct virtqueue vq;
- /* Actual memory layout for this queue */
- struct vring vring;
+ /* Is this a packed ring? */
+ bool packed;
/* Can we use weak barriers? */
bool weak_barriers;
@@ -79,19 +79,45 @@ struct vring_virtqueue {
/* Host publishes avail event idx */
bool event;
- /* Head of free buffer list. */
- unsigned int free_head;
/* Number we've added since last sync. */
unsigned int num_added;
/* Last used index we've seen. */
u16 last_used_idx;
- /* Last written value to avail->flags */
- u16 avail_flags_shadow;
+ union {
+ /* Available for split ring */
+ struct {
+ /* Actual memory layout for this queue. */
+ struct vring vring;
- /* Last written value to avail->idx in guest byte order */
- u16 avail_idx_shadow;
+ /* Head of free buffer list. */
+ unsigned int free_head;
+
+ /* Last written value to avail->flags */
+ u16 avail_flags_shadow;
+
+ /* Last written value to avail->idx in
+ * guest byte order. */
+ u16 avail_idx_shadow;
+ };
+
+ /* Available for packed ring */
+ struct {
+ /* Actual memory layout for this queue. */
+ struct vring_packed vring_packed;
+
+ /* Driver ring wrap counter. */
+ u8 wrap_counter;
+
+ /* Index of the next avail descriptor. */
+ unsigned int next_avail_idx;
+
+ /* Last written value to driver->flags in
+ * guest byte order. */
+ u16 event_flags_shadow;
+ };
+ };
/* How to notify other side. FIXME: commonalize hcalls! */
bool (*notify)(struct virtqueue *vq);
@@ -201,8 +227,17 @@ static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
cpu_addr, size, direction);
}
-static void vring_unmap_one(const struct vring_virtqueue *vq,
- struct vring_desc *desc)
+static int vring_mapping_error(const struct vring_virtqueue *vq,
+ dma_addr_t addr)
+{
+ if (!vring_use_dma_api(vq->vq.vdev))
+ return 0;
+
+ return dma_mapping_error(vring_dma_dev(vq), addr);
+}
+
+static void vring_unmap_one_split(const struct vring_virtqueue *vq,
+ struct vring_desc *desc)
{
u16 flags;
@@ -226,17 +261,9 @@ static void vring_unmap_one(const struct vring_virtqueue *vq,
}
}
-static int vring_mapping_error(const struct vring_virtqueue *vq,
- dma_addr_t addr)
-{
- if (!vring_use_dma_api(vq->vq.vdev))
- return 0;
-
- return dma_mapping_error(vring_dma_dev(vq), addr);
-}
-
-static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
- unsigned int total_sg, gfp_t gfp)
+static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
+ unsigned int total_sg,
+ gfp_t gfp)
{
struct vring_desc *desc;
unsigned int i;
@@ -257,14 +284,14 @@ static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
return desc;
}
-static inline int virtqueue_add(struct virtqueue *_vq,
- struct scatterlist *sgs[],
- unsigned int total_sg,
- unsigned int out_sgs,
- unsigned int in_sgs,
- void *data,
- void *ctx,
- gfp_t gfp)
+static inline int virtqueue_add_split(struct virtqueue *_vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs,
+ void *data,
+ void *ctx,
+ gfp_t gfp)
{
struct vring_virtqueue *vq = to_vvq(_vq);
struct scatterlist *sg;
@@ -303,7 +330,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
/* If the host supports indirect descriptor tables, and we have multiple
* buffers, then go indirect. FIXME: tune this threshold */
if (vq->indirect && total_sg > 1 && vq->vq.num_free)
- desc = alloc_indirect(_vq, total_sg, gfp);
+ desc = alloc_indirect_split(_vq, total_sg, gfp);
else {
desc = NULL;
WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect);
@@ -424,7 +451,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
for (n = 0; n < total_sg; n++) {
if (i == err_idx)
break;
- vring_unmap_one(vq, &desc[i]);
+ vring_unmap_one_split(vq, &desc[i]);
i = virtio16_to_cpu(_vq->vdev, vq->vring.desc[i].next);
}
@@ -435,6 +462,355 @@ static inline int virtqueue_add(struct virtqueue *_vq,
return -EIO;
}
+static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ u16 new, old;
+ bool needs_kick;
+
+ START_USE(vq);
+ /* We need to expose available array entries before checking avail
+ * event. */
+ virtio_mb(vq->weak_barriers);
+
+ old = vq->avail_idx_shadow - vq->num_added;
+ new = vq->avail_idx_shadow;
+ vq->num_added = 0;
+
+#ifdef DEBUG
+ if (vq->last_add_time_valid) {
+ WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
+ vq->last_add_time)) > 100);
+ }
+ vq->last_add_time_valid = false;
+#endif
+
+ if (vq->event) {
+ needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->vring)),
+ new, old);
+ } else {
+ needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY));
+ }
+ END_USE(vq);
+ return needs_kick;
+}
+
+static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
+ void **ctx)
+{
+ unsigned int i, j;
+ __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
+
+ /* Clear data ptr. */
+ vq->desc_state[head].data = NULL;
+
+ /* Put back on free list: unmap first-level descriptors and find end */
+ i = head;
+
+ while (vq->vring.desc[i].flags & nextflag) {
+ vring_unmap_one_split(vq, &vq->vring.desc[i]);
+ i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
+ vq->vq.num_free++;
+ }
+
+ vring_unmap_one_split(vq, &vq->vring.desc[i]);
+ vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
+ vq->free_head = head;
+
+ /* Plus final descriptor */
+ vq->vq.num_free++;
+
+ if (vq->indirect) {
+ struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
+ u32 len;
+
+ /* Free the indirect table, if any, now that it's unmapped. */
+ if (!indir_desc)
+ return;
+
+ len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
+
+ BUG_ON(!(vq->vring.desc[head].flags &
+ cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
+ BUG_ON(len == 0 || len % sizeof(struct vring_desc));
+
+ for (j = 0; j < len / sizeof(struct vring_desc); j++)
+ vring_unmap_one_split(vq, &indir_desc[j]);
+
+ kfree(indir_desc);
+ vq->desc_state[head].indir_desc = NULL;
+ } else if (ctx) {
+ *ctx = vq->desc_state[head].indir_desc;
+ }
+}
+
+static inline bool more_used_split(const struct vring_virtqueue *vq)
+{
+ return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx);
+}
+
+static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
+ unsigned int *len,
+ void **ctx)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ void *ret;
+ unsigned int i;
+ u16 last_used;
+
+ START_USE(vq);
+
+ if (unlikely(vq->broken)) {
+ END_USE(vq);
+ return NULL;
+ }
+
+ if (!more_used_split(vq)) {
+ pr_debug("No more buffers in queue\n");
+ END_USE(vq);
+ return NULL;
+ }
+
+ /* Only get used array entries after they have been exposed by host. */
+ virtio_rmb(vq->weak_barriers);
+
+ last_used = (vq->last_used_idx & (vq->vring.num - 1));
+ i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
+ *len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);
+
+ if (unlikely(i >= vq->vring.num)) {
+ BAD_RING(vq, "id %u out of range\n", i);
+ return NULL;
+ }
+ if (unlikely(!vq->desc_state[i].data)) {
+ BAD_RING(vq, "id %u is not a head!\n", i);
+ return NULL;
+ }
+
+ /* detach_buf_split clears data, so grab it now. */
+ ret = vq->desc_state[i].data;
+ detach_buf_split(vq, i, ctx);
+ vq->last_used_idx++;
+ /* If we expect an interrupt for the next entry, tell host
+ * by writing event index and flush out the write before
+ * the read in the next get_buf call. */
+ if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
+ virtio_store_mb(vq->weak_barriers,
+ &vring_used_event(&vq->vring),
+ cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
+
+#ifdef DEBUG
+ vq->last_add_time_valid = false;
+#endif
+
+ END_USE(vq);
+ return ret;
+}
+
+static void virtqueue_disable_cb_split(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
+ vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+ if (!vq->event)
+ vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+ }
+}
+
+static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ u16 last_used_idx;
+
+ START_USE(vq);
+
+ /* We optimistically turn back on interrupts, then check if there was
+ * more to do. */
+ /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
+ * either clear the flags bit or point the event index at the next
+ * entry. Always do both to keep code simple. */
+ if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+ vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+ if (!vq->event)
+ vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+ }
+ vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
+ END_USE(vq);
+ return last_used_idx;
+}
+
+static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ virtio_mb(vq->weak_barriers);
+ return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx);
+}
+
+static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ u16 bufs;
+
+ START_USE(vq);
+
+ /* We optimistically turn back on interrupts, then check if there was
+ * more to do. */
+ /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
+ * either clear the flags bit or point the event index at the next
+ * entry. Always update the event index to keep code simple. */
+ if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+ vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+ if (!vq->event)
+ vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+ }
+ /* TODO: tune this threshold */
+ bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
+
+ virtio_store_mb(vq->weak_barriers,
+ &vring_used_event(&vq->vring),
+ cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
+
+ if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
+ END_USE(vq);
+ return false;
+ }
+
+ END_USE(vq);
+ return true;
+}
+
+static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ unsigned int i;
+ void *buf;
+
+ START_USE(vq);
+
+ for (i = 0; i < vq->vring.num; i++) {
+ if (!vq->desc_state[i].data)
+ continue;
+ /* detach_buf clears data, so grab it now. */
+ buf = vq->desc_state[i].data;
+ detach_buf_split(vq, i, NULL);
+ vq->avail_idx_shadow--;
+ vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
+ END_USE(vq);
+ return buf;
+ }
+ /* That should have freed everything. */
+ BUG_ON(vq->vq.num_free != vq->vring.num);
+
+ END_USE(vq);
+ return NULL;
+}
+
+/*
+ * The layout for the packed ring is a continuous chunk of memory
+ * which looks like this.
+ *
+ * struct vring_packed {
+ * // The actual descriptors (16 bytes each)
+ * struct vring_packed_desc desc[num];
+ *
+ * // Padding to the next align boundary.
+ * char pad[];
+ *
+ * // Driver Event Suppression
+ * struct vring_packed_desc_event driver;
+ *
+ * // Device Event Suppression
+ * struct vring_packed_desc_event device;
+ * };
+ */
+static inline void vring_init_packed(struct vring_packed *vr, unsigned int num,
+ void *p, unsigned long align)
+{
+ vr->num = num;
+ vr->desc = p;
+ vr->driver = (void *)(((uintptr_t)p + sizeof(struct vring_packed_desc)
+ * num + align - 1) & ~(align - 1));
+ vr->device = vr->driver + 1;
+}
+
+static inline unsigned vring_size_packed(unsigned int num, unsigned long align)
+{
+ return ((sizeof(struct vring_packed_desc) * num + align - 1)
+ & ~(align - 1)) + sizeof(struct vring_packed_desc_event) * 2;
+}
+
+static inline int virtqueue_add_packed(struct virtqueue *_vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs,
+ void *data,
+ void *ctx,
+ gfp_t gfp)
+{
+ return -EIO;
+}
+
+static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
+{
+ return false;
+}
+
+static inline bool more_used_packed(const struct vring_virtqueue *vq)
+{
+ return false;
+}
+
+static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
+ unsigned int *len,
+ void **ctx)
+{
+ return NULL;
+}
+
+static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
+{
+}
+
+static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
+{
+ return 0;
+}
+
+static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
+{
+ return false;
+}
+
+static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
+{
+ return false;
+}
+
+static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
+{
+ return NULL;
+}
+
+static inline int virtqueue_add(struct virtqueue *_vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs,
+ void *data,
+ void *ctx,
+ gfp_t gfp)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ return vq->packed ? virtqueue_add_packed(_vq, sgs, total_sg, out_sgs,
+ in_sgs, data, ctx, gfp) :
+ virtqueue_add_split(_vq, sgs, total_sg, out_sgs,
+ in_sgs, data, ctx, gfp);
+}
+
/**
* virtqueue_add_sgs - expose buffers to other end
* @vq: the struct virtqueue we're talking about.
@@ -551,34 +927,9 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
bool virtqueue_kick_prepare(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- u16 new, old;
- bool needs_kick;
- START_USE(vq);
- /* We need to expose available array entries before checking avail
- * event. */
- virtio_mb(vq->weak_barriers);
-
- old = vq->avail_idx_shadow - vq->num_added;
- new = vq->avail_idx_shadow;
- vq->num_added = 0;
-
-#ifdef DEBUG
- if (vq->last_add_time_valid) {
- WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
- vq->last_add_time)) > 100);
- }
- vq->last_add_time_valid = false;
-#endif
-
- if (vq->event) {
- needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->vring)),
- new, old);
- } else {
- needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY));
- }
- END_USE(vq);
- return needs_kick;
+ return vq->packed ? virtqueue_kick_prepare_packed(_vq) :
+ virtqueue_kick_prepare_split(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
@@ -626,58 +977,9 @@ bool virtqueue_kick(struct virtqueue *vq)
}
EXPORT_SYMBOL_GPL(virtqueue_kick);
-static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
- void **ctx)
-{
- unsigned int i, j;
- __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
-
- /* Clear data ptr. */
- vq->desc_state[head].data = NULL;
-
- /* Put back on free list: unmap first-level descriptors and find end */
- i = head;
-
- while (vq->vring.desc[i].flags & nextflag) {
- vring_unmap_one(vq, &vq->vring.desc[i]);
- i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
- vq->vq.num_free++;
- }
-
- vring_unmap_one(vq, &vq->vring.desc[i]);
- vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
- vq->free_head = head;
-
- /* Plus final descriptor */
- vq->vq.num_free++;
-
- if (vq->indirect) {
- struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
- u32 len;
-
- /* Free the indirect table, if any, now that it's unmapped. */
- if (!indir_desc)
- return;
-
- len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
-
- BUG_ON(!(vq->vring.desc[head].flags &
- cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
- BUG_ON(len == 0 || len % sizeof(struct vring_desc));
-
- for (j = 0; j < len / sizeof(struct vring_desc); j++)
- vring_unmap_one(vq, &indir_desc[j]);
-
- kfree(indir_desc);
- vq->desc_state[head].indir_desc = NULL;
- } else if (ctx) {
- *ctx = vq->desc_state[head].indir_desc;
- }
-}
-
static inline bool more_used(const struct vring_virtqueue *vq)
{
- return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx);
+ return vq->packed ? more_used_packed(vq) : more_used_split(vq);
}
/**
@@ -700,57 +1002,9 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
void **ctx)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- void *ret;
- unsigned int i;
- u16 last_used;
- START_USE(vq);
-
- if (unlikely(vq->broken)) {
- END_USE(vq);
- return NULL;
- }
-
- if (!more_used(vq)) {
- pr_debug("No more buffers in queue\n");
- END_USE(vq);
- return NULL;
- }
-
- /* Only get used array entries after they have been exposed by host. */
- virtio_rmb(vq->weak_barriers);
-
- last_used = (vq->last_used_idx & (vq->vring.num - 1));
- i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
- *len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);
-
- if (unlikely(i >= vq->vring.num)) {
- BAD_RING(vq, "id %u out of range\n", i);
- return NULL;
- }
- if (unlikely(!vq->desc_state[i].data)) {
- BAD_RING(vq, "id %u is not a head!\n", i);
- return NULL;
- }
-
- /* detach_buf clears data, so grab it now. */
- ret = vq->desc_state[i].data;
- detach_buf(vq, i, ctx);
- vq->last_used_idx++;
- /* If we expect an interrupt for the next entry, tell host
- * by writing event index and flush out the write before
- * the read in the next get_buf call. */
- if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
- virtio_store_mb(vq->weak_barriers,
- &vring_used_event(&vq->vring),
- cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
-
-#ifdef DEBUG
- vq->last_add_time_valid = false;
-#endif
-
- END_USE(vq);
- return ret;
+ return vq->packed ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
+ virtqueue_get_buf_ctx_split(_vq, len, ctx);
}
EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
@@ -772,12 +1026,10 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
- vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
- if (!vq->event)
- vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
- }
-
+ if (vq->packed)
+ virtqueue_disable_cb_packed(_vq);
+ else
+ virtqueue_disable_cb_split(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
@@ -796,23 +1048,9 @@ EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- u16 last_used_idx;
- START_USE(vq);
-
- /* We optimistically turn back on interrupts, then check if there was
- * more to do. */
- /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
- * either clear the flags bit or point the event index at the next
- * entry. Always do both to keep code simple. */
- if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
- vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
- if (!vq->event)
- vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
- }
- vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
- END_USE(vq);
- return last_used_idx;
+ return vq->packed ? virtqueue_enable_cb_prepare_packed(_vq) :
+ virtqueue_enable_cb_prepare_split(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
@@ -829,8 +1067,8 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- virtio_mb(vq->weak_barriers);
- return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx);
+ return vq->packed ? virtqueue_poll_packed(_vq, last_used_idx) :
+ virtqueue_poll_split(_vq, last_used_idx);
}
EXPORT_SYMBOL_GPL(virtqueue_poll);
@@ -868,34 +1106,9 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- u16 bufs;
- START_USE(vq);
-
- /* We optimistically turn back on interrupts, then check if there was
- * more to do. */
- /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
- * either clear the flags bit or point the event index at the next
- * entry. Always update the event index to keep code simple. */
- if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
- vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
- if (!vq->event)
- vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
- }
- /* TODO: tune this threshold */
- bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
-
- virtio_store_mb(vq->weak_barriers,
- &vring_used_event(&vq->vring),
- cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
-
- if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
- END_USE(vq);
- return false;
- }
-
- END_USE(vq);
- return true;
+ return vq->packed ? virtqueue_enable_cb_delayed_packed(_vq) :
+ virtqueue_enable_cb_delayed_split(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
@@ -910,27 +1123,9 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- unsigned int i;
- void *buf;
- START_USE(vq);
-
- for (i = 0; i < vq->vring.num; i++) {
- if (!vq->desc_state[i].data)
- continue;
- /* detach_buf clears data, so grab it now. */
- buf = vq->desc_state[i].data;
- detach_buf(vq, i, NULL);
- vq->avail_idx_shadow--;
- vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
- END_USE(vq);
- return buf;
- }
- /* That should have freed everything. */
- BUG_ON(vq->vq.num_free != vq->vring.num);
-
- END_USE(vq);
- return NULL;
+ return vq->packed ? virtqueue_detach_unused_buf_packed(_vq) :
+ virtqueue_detach_unused_buf_split(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
@@ -955,7 +1150,8 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
EXPORT_SYMBOL_GPL(vring_interrupt);
struct virtqueue *__vring_new_virtqueue(unsigned int index,
- struct vring vring,
+ union vring_union vring,
+ bool packed,
struct virtio_device *vdev,
bool weak_barriers,
bool context,
@@ -963,19 +1159,20 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
void (*callback)(struct virtqueue *),
const char *name)
{
- unsigned int i;
+ unsigned int num, i;
struct vring_virtqueue *vq;
- vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state),
+ num = packed ? vring.vring_packed.num : vring.vring_split.num;
+
+ vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state),
GFP_KERNEL);
if (!vq)
return NULL;
- vq->vring = vring;
vq->vq.callback = callback;
vq->vq.vdev = vdev;
vq->vq.name = name;
- vq->vq.num_free = vring.num;
+ vq->vq.num_free = num;
vq->vq.index = index;
vq->we_own_ring = false;
vq->queue_dma_addr = 0;
@@ -984,9 +1181,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
vq->weak_barriers = weak_barriers;
vq->broken = false;
vq->last_used_idx = 0;
- vq->avail_flags_shadow = 0;
- vq->avail_idx_shadow = 0;
vq->num_added = 0;
+ vq->packed = packed;
list_add_tail(&vq->vq.list, &vdev->vqs);
#ifdef DEBUG
vq->in_use = false;
@@ -997,18 +1193,37 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
!context;
vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
+ if (vq->packed) {
+ vq->vring_packed = vring.vring_packed;
+ vq->next_avail_idx = 0;
+ vq->wrap_counter = 1;
+ vq->event_flags_shadow = 0;
+ } else {
+ vq->vring = vring.vring_split;
+ vq->avail_flags_shadow = 0;
+ vq->avail_idx_shadow = 0;
+
+ /* Put everything in free lists. */
+ vq->free_head = 0;
+ for (i = 0; i < num-1; i++)
+ vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
+ }
+
/* No callback? Tell other side not to bother us. */
if (!callback) {
- vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
- if (!vq->event)
- vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
+ if (packed) {
+ vq->event_flags_shadow = VRING_EVENT_F_DISABLE;
+ vq->vring_packed.driver->flags = cpu_to_virtio16(vdev,
+ vq->event_flags_shadow);
+ } else {
+ vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+ if (!vq->event)
+ vq->vring.avail->flags = cpu_to_virtio16(vdev,
+ vq->avail_flags_shadow);
+ }
}
- /* Put everything in free lists. */
- vq->free_head = 0;
- for (i = 0; i < vring.num-1; i++)
- vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
- memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
+ memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state));
return &vq->vq;
}
@@ -1056,6 +1271,12 @@ static void vring_free_queue(struct virtio_device *vdev, size_t size,
}
}
+static inline int
+__vring_size(unsigned int num, unsigned long align, bool packed)
+{
+ return packed ? vring_size_packed(num, align) : vring_size(num, align);
+}
+
struct virtqueue *vring_create_virtqueue(
unsigned int index,
unsigned int num,
@@ -1072,7 +1293,8 @@ struct virtqueue *vring_create_virtqueue(
void *queue = NULL;
dma_addr_t dma_addr;
size_t queue_size_in_bytes;
- struct vring vring;
+ union vring_union vring;
+ bool packed;
/* We assume num is a power of 2. */
if (num & (num - 1)) {
@@ -1080,9 +1302,13 @@ struct virtqueue *vring_create_virtqueue(
return NULL;
}
+ packed = virtio_has_feature(vdev, VIRTIO_F_RING_PACKED);
+
/* TODO: allocate each queue chunk individually */
- for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
- queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
+ for (; num && __vring_size(num, vring_align, packed) > PAGE_SIZE;
+ num /= 2) {
+ queue = vring_alloc_queue(vdev, __vring_size(num, vring_align,
+ packed),
&dma_addr,
GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
if (queue)
@@ -1094,17 +1320,21 @@ struct virtqueue *vring_create_virtqueue(
if (!queue) {
/* Try to get a single page. You are my only hope! */
- queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
+ queue = vring_alloc_queue(vdev, __vring_size(num, vring_align,
+ packed),
&dma_addr, GFP_KERNEL|__GFP_ZERO);
}
if (!queue)
return NULL;
- queue_size_in_bytes = vring_size(num, vring_align);
- vring_init(&vring, num, queue, vring_align);
+ queue_size_in_bytes = __vring_size(num, vring_align, packed);
+ if (packed)
+ vring_init_packed(&vring.vring_packed, num, queue, vring_align);
+ else
+ vring_init(&vring.vring_split, num, queue, vring_align);
- vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
- notify, callback, name);
+ vq = __vring_new_virtqueue(index, vring, packed, vdev, weak_barriers,
+ context, notify, callback, name);
if (!vq) {
vring_free_queue(vdev, queue_size_in_bytes, queue,
dma_addr);
@@ -1130,10 +1360,17 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
void (*callback)(struct virtqueue *vq),
const char *name)
{
- struct vring vring;
- vring_init(&vring, num, pages, vring_align);
- return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
- notify, callback, name);
+ union vring_union vring;
+ bool packed;
+
+ packed = virtio_has_feature(vdev, VIRTIO_F_RING_PACKED);
+ if (packed)
+ vring_init_packed(&vring.vring_packed, num, pages, vring_align);
+ else
+ vring_init(&vring.vring_split, num, pages, vring_align);
+
+ return __vring_new_virtqueue(index, vring, packed, vdev, weak_barriers,
+ context, notify, callback, name);
}
EXPORT_SYMBOL_GPL(vring_new_virtqueue);
@@ -1143,7 +1380,9 @@ void vring_del_virtqueue(struct virtqueue *_vq)
if (vq->we_own_ring) {
vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
- vq->vring.desc, vq->queue_dma_addr);
+ vq->packed ? (void *)vq->vring_packed.desc :
+ (void *)vq->vring.desc,
+ vq->queue_dma_addr);
}
list_del(&_vq->list);
kfree(vq);
@@ -1185,7 +1424,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
struct vring_virtqueue *vq = to_vvq(_vq);
- return vq->vring.num;
+ return vq->packed ? vq->vring_packed.num : vq->vring.num;
}
EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
@@ -1228,6 +1467,10 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
BUG_ON(!vq->we_own_ring);
+ if (vq->packed)
+ return vq->queue_dma_addr + ((char *)vq->vring_packed.driver -
+ (char *)vq->vring_packed.desc);
+
return vq->queue_dma_addr +
((char *)vq->vring.avail - (char *)vq->vring.desc);
}
@@ -1239,11 +1482,16 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
BUG_ON(!vq->we_own_ring);
+ if (vq->packed)
+ return vq->queue_dma_addr + ((char *)vq->vring_packed.device -
+ (char *)vq->vring_packed.desc);
+
return vq->queue_dma_addr +
((char *)vq->vring.used - (char *)vq->vring.desc);
}
EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
+/* Only available for split ring */
const struct vring *virtqueue_get_vring(struct virtqueue *vq)
{
return &to_vvq(vq)->vring;
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index bbf32524ab27..a0075894ad16 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -60,6 +60,11 @@ static inline void virtio_store_mb(bool weak_barriers,
struct virtio_device;
struct virtqueue;
+union vring_union {
+ struct vring vring_split;
+ struct vring_packed vring_packed;
+};
+
/*
* Creates a virtqueue and allocates the descriptor ring. If
* may_reduce_num is set, then this may allocate a smaller ring than
@@ -79,7 +84,8 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
/* Creates a virtqueue with a custom layout. */
struct virtqueue *__vring_new_virtqueue(unsigned int index,
- struct vring vring,
+ union vring_union vring,
+ bool packed,
struct virtio_device *vdev,
bool weak_barriers,
bool ctx,
--
2.11.0
Signed-off-by: Tiwei Bie <[email protected]>
---
drivers/virtio/virtio_ring.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index b1039c2985b9..9a3d13e1e2ba 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1873,6 +1873,8 @@ void vring_transport_features(struct virtio_device *vdev)
break;
case VIRTIO_F_IOMMU_PLATFORM:
break;
+ case VIRTIO_F_RING_PACKED:
+ break;
default:
/* We don't understand this bit. */
__virtio_clear_bit(vdev, i);
--
2.11.0
Signed-off-by: Tiwei Bie <[email protected]>
---
include/uapi/linux/virtio_config.h | 12 +++++++++++-
include/uapi/linux/virtio_ring.h | 36 ++++++++++++++++++++++++++++++++++++
2 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index 308e2096291f..a6e392325e3a 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -49,7 +49,7 @@
* transport being used (eg. virtio_ring), the rest are per-device feature
* bits. */
#define VIRTIO_TRANSPORT_F_START 28
-#define VIRTIO_TRANSPORT_F_END 34
+#define VIRTIO_TRANSPORT_F_END 36
#ifndef VIRTIO_CONFIG_NO_LEGACY
/* Do we get callbacks when the ring is completely used, even if we've
@@ -71,4 +71,14 @@
* this is for compatibility with legacy systems.
*/
#define VIRTIO_F_IOMMU_PLATFORM 33
+
+/* This feature indicates support for the packed virtqueue layout. */
+#define VIRTIO_F_RING_PACKED 34
+
+/*
+ * This feature indicates that all buffers are used by the device
+ * in the same order in which they have been made available.
+ */
+#define VIRTIO_F_IN_ORDER 35
+
#endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h
index 6d5d5faa989b..3932cb80c347 100644
--- a/include/uapi/linux/virtio_ring.h
+++ b/include/uapi/linux/virtio_ring.h
@@ -44,6 +44,9 @@
/* This means the buffer contains a list of buffer descriptors. */
#define VRING_DESC_F_INDIRECT 4
+#define VRING_DESC_F_AVAIL(b) ((b) << 7)
+#define VRING_DESC_F_USED(b) ((b) << 15)
+
/* The Host uses this in used->flags to advise the Guest: don't kick me when
* you add a buffer. It's unreliable, so it's simply an optimization. Guest
* will still kick if it's out of buffers. */
@@ -53,6 +56,10 @@
* optimization. */
#define VRING_AVAIL_F_NO_INTERRUPT 1
+#define VRING_EVENT_F_ENABLE 0x0
+#define VRING_EVENT_F_DISABLE 0x1
+#define VRING_EVENT_F_DESC 0x2
+
/* We support indirect buffer descriptors */
#define VIRTIO_RING_F_INDIRECT_DESC 28
@@ -171,4 +178,33 @@ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
}
+struct vring_packed_desc_event {
+ /* __virtio16 off : 15; // Descriptor Event Offset
+ * __virtio16 wrap : 1; // Descriptor Event Wrap Counter */
+ __virtio16 off_wrap;
+ /* __virtio16 flags : 2; // Descriptor Event Flags */
+ __virtio16 flags;
+};
+
+struct vring_packed_desc {
+ /* Buffer Address. */
+ __virtio64 addr;
+ /* Buffer Length. */
+ __virtio32 len;
+ /* Buffer ID. */
+ __virtio16 id;
+ /* The flags depending on descriptor type. */
+ __virtio16 flags;
+};
+
+struct vring_packed {
+ unsigned int num;
+
+ struct vring_packed_desc *desc;
+
+ struct vring_packed_desc_event *driver;
+
+ struct vring_packed_desc_event *device;
+};
+
#endif /* _UAPI_LINUX_VIRTIO_RING_H */
--
2.11.0
On 2018年04月25日 13:15, Tiwei Bie wrote:
> Hello everyone,
>
> This RFC implements packed ring support in virtio driver.
>
> Some simple functional tests have been done with Jason's
> packed ring implementation in vhost:
>
> https://lkml.org/lkml/2018/4/23/12
>
> Both of ping and netperf worked as expected (with EVENT_IDX
> disabled). But there are below known issues:
>
> 1. Reloading the guest driver will break the Tx/Rx;
Will have a look at this issue.
> 2. Zeroing the flags when detaching a used desc will
> break the guest -> host path.
I still think zeroing flags is unnecessary or even a bug. At host, I
track last observed avail wrap counter and detect avail like (what is
suggested in the example code in the spec):
static bool desc_is_avail(struct vhost_virtqueue *vq, __virtio16 flags)
{
bool avail = flags & cpu_to_vhost16(vq, DESC_AVAIL);
return avail == vq->avail_wrap_counter;
}
So zeroing wrap can not work with this obviously.
Thanks
>
> Some simple functional tests have also been done with
> Wei's packed ring implementation in QEMU:
>
> http://lists.nongnu.org/archive/html/qemu-devel/2018-04/msg00342.html
>
> Both of ping and netperf worked as expected (with EVENT_IDX
> disabled). Reloading the guest driver also worked as expected.
>
> TODO:
> - Refinements (for code and commit log) and bug fixes;
> - Discuss/fix/test EVENT_IDX support;
> - Test devices other than net;
>
> RFC v2 -> RFC v3:
> - Split into small patches (Jason);
> - Add helper virtqueue_use_indirect() (Jason);
> - Just set id for the last descriptor of a list (Jason);
> - Calculate the prev in virtqueue_add_packed() (Jason);
> - Fix/improve desc suppression code (Jason/MST);
> - Refine the code layout for XXX_split/packed and wrappers (MST);
> - Fix the comments and API in uapi (MST);
> - Remove the BUG_ON() for indirect (Jason);
> - Some other refinements and bug fixes;
>
> RFC v1 -> RFC v2:
> - Add indirect descriptor support - compile test only;
> - Add event suppression supprt - compile test only;
> - Move vring_packed_init() out of uapi (Jason, MST);
> - Merge two loops into one in virtqueue_add_packed() (Jason);
> - Split vring_unmap_one() for packed ring and split ring (Jason);
> - Avoid using '%' operator (Jason);
> - Rename free_head -> next_avail_idx (Jason);
> - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> - Some other refinements and bug fixes;
>
> Thanks!
>
> Tiwei Bie (5):
> virtio: add packed ring definitions
> virtio_ring: support creating packed ring
> virtio_ring: add packed ring support
> virtio_ring: add event idx support in packed ring
> virtio_ring: enable packed ring
>
> drivers/virtio/virtio_ring.c | 1271 ++++++++++++++++++++++++++++--------
> include/linux/virtio_ring.h | 8 +-
> include/uapi/linux/virtio_config.h | 12 +-
> include/uapi/linux/virtio_ring.h | 36 +
> 4 files changed, 1049 insertions(+), 278 deletions(-)
>
On Fri, Apr 27, 2018 at 11:56:05AM +0800, Jason Wang wrote:
>
>
> On 2018年04月25日 13:15, Tiwei Bie wrote:
> > Hello everyone,
> >
> > This RFC implements packed ring support in virtio driver.
> >
> > Some simple functional tests have been done with Jason's
> > packed ring implementation in vhost:
> >
> > https://lkml.org/lkml/2018/4/23/12
> >
> > Both of ping and netperf worked as expected (with EVENT_IDX
> > disabled). But there are below known issues:
> >
> > 1. Reloading the guest driver will break the Tx/Rx;
>
> Will have a look at this issue.
>
> > 2. Zeroing the flags when detaching a used desc will
> > break the guest -> host path.
>
> I still think zeroing flags is unnecessary or even a bug. At host, I track
> last observed avail wrap counter and detect avail like (what is suggested in
> the example code in the spec):
>
> static bool desc_is_avail(struct vhost_virtqueue *vq, __virtio16 flags)
> {
> bool avail = flags & cpu_to_vhost16(vq, DESC_AVAIL);
>
> return avail == vq->avail_wrap_counter;
> }
>
> So zeroing wrap can not work with this obviously.
>
> Thanks
I agree. I think what one should do is flip the available bit.
> >
> > Some simple functional tests have also been done with
> > Wei's packed ring implementation in QEMU:
> >
> > http://lists.nongnu.org/archive/html/qemu-devel/2018-04/msg00342.html
> >
> > Both of ping and netperf worked as expected (with EVENT_IDX
> > disabled). Reloading the guest driver also worked as expected.
> >
> > TODO:
> > - Refinements (for code and commit log) and bug fixes;
> > - Discuss/fix/test EVENT_IDX support;
> > - Test devices other than net;
> >
> > RFC v2 -> RFC v3:
> > - Split into small patches (Jason);
> > - Add helper virtqueue_use_indirect() (Jason);
> > - Just set id for the last descriptor of a list (Jason);
> > - Calculate the prev in virtqueue_add_packed() (Jason);
> > - Fix/improve desc suppression code (Jason/MST);
> > - Refine the code layout for XXX_split/packed and wrappers (MST);
> > - Fix the comments and API in uapi (MST);
> > - Remove the BUG_ON() for indirect (Jason);
> > - Some other refinements and bug fixes;
> >
> > RFC v1 -> RFC v2:
> > - Add indirect descriptor support - compile test only;
> > - Add event suppression supprt - compile test only;
> > - Move vring_packed_init() out of uapi (Jason, MST);
> > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > - Avoid using '%' operator (Jason);
> > - Rename free_head -> next_avail_idx (Jason);
> > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > - Some other refinements and bug fixes;
> >
> > Thanks!
> >
> > Tiwei Bie (5):
> > virtio: add packed ring definitions
> > virtio_ring: support creating packed ring
> > virtio_ring: add packed ring support
> > virtio_ring: add event idx support in packed ring
> > virtio_ring: enable packed ring
> >
> > drivers/virtio/virtio_ring.c | 1271 ++++++++++++++++++++++++++++--------
> > include/linux/virtio_ring.h | 8 +-
> > include/uapi/linux/virtio_config.h | 12 +-
> > include/uapi/linux/virtio_ring.h | 36 +
> > 4 files changed, 1049 insertions(+), 278 deletions(-)
> >
On 2018年04月27日 12:18, Michael S. Tsirkin wrote:
> On Fri, Apr 27, 2018 at 11:56:05AM +0800, Jason Wang wrote:
>> On 2018年04月25日 13:15, Tiwei Bie wrote:
>>> Hello everyone,
>>>
>>> This RFC implements packed ring support in virtio driver.
>>>
>>> Some simple functional tests have been done with Jason's
>>> packed ring implementation in vhost:
>>>
>>> https://lkml.org/lkml/2018/4/23/12
>>>
>>> Both of ping and netperf worked as expected (with EVENT_IDX
>>> disabled). But there are below known issues:
>>>
>>> 1. Reloading the guest driver will break the Tx/Rx;
>> Will have a look at this issue.
>>
>>> 2. Zeroing the flags when detaching a used desc will
>>> break the guest -> host path.
>> I still think zeroing flags is unnecessary or even a bug. At host, I track
>> last observed avail wrap counter and detect avail like (what is suggested in
>> the example code in the spec):
>>
>> static bool desc_is_avail(struct vhost_virtqueue *vq, __virtio16 flags)
>> {
>> bool avail = flags & cpu_to_vhost16(vq, DESC_AVAIL);
>>
>> return avail == vq->avail_wrap_counter;
>> }
>>
>> So zeroing wrap can not work with this obviously.
>>
>> Thanks
> I agree. I think what one should do is flip the available bit.
>
But is this flipping a must?
Thanks
On Fri, Apr 27, 2018 at 02:17:51PM +0800, Jason Wang wrote:
> On 2018年04月27日 12:18, Michael S. Tsirkin wrote:
> > On Fri, Apr 27, 2018 at 11:56:05AM +0800, Jason Wang wrote:
> > > On 2018年04月25日 13:15, Tiwei Bie wrote:
> > > > Hello everyone,
> > > >
> > > > This RFC implements packed ring support in virtio driver.
> > > >
> > > > Some simple functional tests have been done with Jason's
> > > > packed ring implementation in vhost:
> > > >
> > > > https://lkml.org/lkml/2018/4/23/12
> > > >
> > > > Both of ping and netperf worked as expected (with EVENT_IDX
> > > > disabled). But there are below known issues:
> > > >
> > > > 1. Reloading the guest driver will break the Tx/Rx;
> > > Will have a look at this issue.
> > >
> > > > 2. Zeroing the flags when detaching a used desc will
> > > > break the guest -> host path.
> > > I still think zeroing flags is unnecessary or even a bug. At host, I track
> > > last observed avail wrap counter and detect avail like (what is suggested in
> > > the example code in the spec):
> > >
> > > static bool desc_is_avail(struct vhost_virtqueue *vq, __virtio16 flags)
> > > {
> > > bool avail = flags & cpu_to_vhost16(vq, DESC_AVAIL);
> > >
> > > return avail == vq->avail_wrap_counter;
> > > }
> > >
> > > So zeroing wrap can not work with this obviously.
> > >
> > > Thanks
> > I agree. I think what one should do is flip the available bit.
> >
>
> But is this flipping a must?
>
> Thanks
Yeah, that's my question too. It seems to be a requirement
for driver that, the only change to the desc status that a
driver can do during running is to mark the desc as avail,
and any other changes to the desc status are not allowed.
Similarly, the device can only mark the desc as used, and
any other changes to the desc status are also not allowed.
So the question is, are there such requirements?
Based on below contents in the spec:
"""
Thus VIRTQ_DESC_F_AVAIL and VIRTQ_DESC_F_USED bits are different
for an available descriptor and equal for a used descriptor.
Note that this observation is mostly useful for sanity-checking
as these are necessary but not sufficient conditions
"""
It seems that, it's necessary for devices to check whether
the AVAIL bit and USED bit are different.
Best regards,
Tiwei Bie
On 2018年04月27日 17:12, Tiwei Bie wrote:
> On Fri, Apr 27, 2018 at 02:17:51PM +0800, Jason Wang wrote:
>> On 2018年04月27日 12:18, Michael S. Tsirkin wrote:
>>> On Fri, Apr 27, 2018 at 11:56:05AM +0800, Jason Wang wrote:
>>>> On 2018年04月25日 13:15, Tiwei Bie wrote:
>>>>> Hello everyone,
>>>>>
>>>>> This RFC implements packed ring support in virtio driver.
>>>>>
>>>>> Some simple functional tests have been done with Jason's
>>>>> packed ring implementation in vhost:
>>>>>
>>>>> https://lkml.org/lkml/2018/4/23/12
>>>>>
>>>>> Both of ping and netperf worked as expected (with EVENT_IDX
>>>>> disabled). But there are below known issues:
>>>>>
>>>>> 1. Reloading the guest driver will break the Tx/Rx;
>>>> Will have a look at this issue.
>>>>
>>>>> 2. Zeroing the flags when detaching a used desc will
>>>>> break the guest -> host path.
>>>> I still think zeroing flags is unnecessary or even a bug. At host, I track
>>>> last observed avail wrap counter and detect avail like (what is suggested in
>>>> the example code in the spec):
>>>>
>>>> static bool desc_is_avail(struct vhost_virtqueue *vq, __virtio16 flags)
>>>> {
>>>> bool avail = flags & cpu_to_vhost16(vq, DESC_AVAIL);
>>>>
>>>> return avail == vq->avail_wrap_counter;
>>>> }
>>>>
>>>> So zeroing wrap can not work with this obviously.
>>>>
>>>> Thanks
>>> I agree. I think what one should do is flip the available bit.
>>>
>> But is this flipping a must?
>>
>> Thanks
> Yeah, that's my question too. It seems to be a requirement
> for driver that, the only change to the desc status that a
> driver can do during running is to mark the desc as avail,
> and any other changes to the desc status are not allowed.
> Similarly, the device can only mark the desc as used, and
> any other changes to the desc status are also not allowed.
> So the question is, are there such requirements?
Looks not, but I think we need clarify this in the spec.
Thanks
>
> Based on below contents in the spec:
>
> """
> Thus VIRTQ_DESC_F_AVAIL and VIRTQ_DESC_F_USED bits are different
> for an available descriptor and equal for a used descriptor.
>
> Note that this observation is mostly useful for sanity-checking
> as these are necessary but not sufficient conditions
> """
>
> It seems that, it's necessary for devices to check whether
> the AVAIL bit and USED bit are different.
>
> Best regards,
> Tiwei Bie
On 2018年04月25日 13:15, Tiwei Bie wrote:
> This commit introduces the event idx support in packed
> ring. This feature is temporarily disabled, because the
> implementation in this patch may not work as expected,
> and some further discussions on the implementation are
> needed, e.g. do we have to check the wrap counter when
> checking whether a kick is needed?
>
> Signed-off-by: Tiwei Bie <[email protected]>
> ---
> drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 49 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 0181e93897be..b1039c2985b9 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -986,7 +986,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> - u16 flags;
> + u16 new, old, off_wrap, flags;
> bool needs_kick;
> u32 snapshot;
>
> @@ -995,7 +995,12 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> * suppressions. */
> virtio_mb(vq->weak_barriers);
>
> + old = vq->next_avail_idx - vq->num_added;
> + new = vq->next_avail_idx;
> + vq->num_added = 0;
> +
> snapshot = *(u32 *)vq->vring_packed.device;
> + off_wrap = virtio16_to_cpu(_vq->vdev, snapshot & 0xffff);
> flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
>
> #ifdef DEBUG
> @@ -1006,7 +1011,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> vq->last_add_time_valid = false;
> #endif
>
> - needs_kick = (flags != VRING_EVENT_F_DISABLE);
> + if (flags == VRING_EVENT_F_DESC)
> + needs_kick = vring_need_event(off_wrap & ~(1<<15), new, old);
I wonder whether or not the math is correct. Both new and event are in
the unit of descriptor ring size, but old looks not.
Thanks
> + else
> + needs_kick = (flags != VRING_EVENT_F_DISABLE);
> END_USE(vq);
> return needs_kick;
> }
> @@ -1116,6 +1124,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> if (vq->last_used_idx >= vq->vring_packed.num)
> vq->last_used_idx -= vq->vring_packed.num;
>
> + /* If we expect an interrupt for the next entry, tell host
> + * by writing event index and flush out the write before
> + * the read in the next get_buf call. */
> + if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
> + virtio_store_mb(vq->weak_barriers,
> + &vq->vring_packed.driver->off_wrap,
> + cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
> + (vq->wrap_counter << 15)));
> +
> #ifdef DEBUG
> vq->last_add_time_valid = false;
> #endif
> @@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
>
> /* We optimistically turn back on interrupts, then check if there was
> * more to do. */
> + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> + * either clear the flags bit or point the event index at the next
> + * entry. Always update the event index to keep code simple. */
> +
> + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> + vq->last_used_idx | (vq->wrap_counter << 15));
>
> if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> virtio_wmb(vq->weak_barriers);
> - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> + VRING_EVENT_F_ENABLE;
> vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> vq->event_flags_shadow);
> }
> @@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
> static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> + u16 bufs, used_idx, wrap_counter;
>
> START_USE(vq);
>
> /* We optimistically turn back on interrupts, then check if there was
> * more to do. */
> + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> + * either clear the flags bit or point the event index at the next
> + * entry. Always update the event index to keep code simple. */
> +
> + /* TODO: tune this threshold */
> + bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
> +
> + used_idx = vq->last_used_idx + bufs;
> + wrap_counter = vq->wrap_counter;
> +
> + if (used_idx >= vq->vring_packed.num) {
> + used_idx -= vq->vring_packed.num;
> + wrap_counter ^= 1;
> + }
> +
> + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> + used_idx | (wrap_counter << 15));
>
> if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> virtio_wmb(vq->weak_barriers);
> - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> + VRING_EVENT_F_ENABLE;
> vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> vq->event_flags_shadow);
> }
> @@ -1822,8 +1865,10 @@ void vring_transport_features(struct virtio_device *vdev)
> switch (i) {
> case VIRTIO_RING_F_INDIRECT_DESC:
> break;
> +#if 0
> case VIRTIO_RING_F_EVENT_IDX:
> break;
> +#endif
> case VIRTIO_F_VERSION_1:
> break;
> case VIRTIO_F_IOMMU_PLATFORM:
On 2018年04月25日 13:15, Tiwei Bie wrote:
> Hello everyone,
>
> This RFC implements packed ring support in virtio driver.
>
> Some simple functional tests have been done with Jason's
> packed ring implementation in vhost:
>
> https://lkml.org/lkml/2018/4/23/12
>
> Both of ping and netperf worked as expected (with EVENT_IDX
> disabled). But there are below known issues:
>
> 1. Reloading the guest driver will break the Tx/Rx;
It looks like the reason is we don't sync wrap counter information
between host and qemu through VHOST_SET/GET_VRING_BASE. And both vhost
and qemu need to do this through encoding warp counters to higher bits
of vhost_vring_state.num,
Thanks
On Wed, May 02, 2018 at 10:51:06AM +0800, Jason Wang wrote:
> On 2018年04月25日 13:15, Tiwei Bie wrote:
> > This commit introduces the event idx support in packed
> > ring. This feature is temporarily disabled, because the
> > implementation in this patch may not work as expected,
> > and some further discussions on the implementation are
> > needed, e.g. do we have to check the wrap counter when
> > checking whether a kick is needed?
> >
> > Signed-off-by: Tiwei Bie <[email protected]>
> > ---
> > drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++++++----
> > 1 file changed, 49 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 0181e93897be..b1039c2985b9 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -986,7 +986,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > - u16 flags;
> > + u16 new, old, off_wrap, flags;
> > bool needs_kick;
> > u32 snapshot;
> > @@ -995,7 +995,12 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > * suppressions. */
> > virtio_mb(vq->weak_barriers);
> > + old = vq->next_avail_idx - vq->num_added;
> > + new = vq->next_avail_idx;
> > + vq->num_added = 0;
> > +
> > snapshot = *(u32 *)vq->vring_packed.device;
> > + off_wrap = virtio16_to_cpu(_vq->vdev, snapshot & 0xffff);
> > flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
> > #ifdef DEBUG
> > @@ -1006,7 +1011,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > vq->last_add_time_valid = false;
> > #endif
> > - needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > + if (flags == VRING_EVENT_F_DESC)
> > + needs_kick = vring_need_event(off_wrap & ~(1<<15), new, old);
>
> I wonder whether or not the math is correct. Both new and event are in the
> unit of descriptor ring size, but old looks not.
What vring_need_event() cares is the distance between
`new` and `old`, i.e. vq->num_added. So I think there
is nothing wrong with `old`. But the calculation of the
distance between `new` and `event_idx` isn't right when
`new` wraps. How do you think about the below code:
wrap_counter = off_wrap >> 15;
event_idx = off_wrap & ~(1<<15);
if (wrap_counter != vq->wrap_counter)
event_idx -= vq->vring_packed.num;
needs_kick = vring_need_event(event_idx, new, old);
Best regards,
Tiwei Bie
>
> Thanks
>
> > + else
> > + needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > END_USE(vq);
> > return needs_kick;
> > }
> > @@ -1116,6 +1124,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> > if (vq->last_used_idx >= vq->vring_packed.num)
> > vq->last_used_idx -= vq->vring_packed.num;
> > + /* If we expect an interrupt for the next entry, tell host
> > + * by writing event index and flush out the write before
> > + * the read in the next get_buf call. */
> > + if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
> > + virtio_store_mb(vq->weak_barriers,
> > + &vq->vring_packed.driver->off_wrap,
> > + cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
> > + (vq->wrap_counter << 15)));
> > +
> > #ifdef DEBUG
> > vq->last_add_time_valid = false;
> > #endif
> > @@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > /* We optimistically turn back on interrupts, then check if there was
> > * more to do. */
> > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > + * either clear the flags bit or point the event index at the next
> > + * entry. Always update the event index to keep code simple. */
> > +
> > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > + vq->last_used_idx | (vq->wrap_counter << 15));
> > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > virtio_wmb(vq->weak_barriers);
> > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > + VRING_EVENT_F_ENABLE;
> > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > vq->event_flags_shadow);
> > }
> > @@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
> > static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > + u16 bufs, used_idx, wrap_counter;
> > START_USE(vq);
> > /* We optimistically turn back on interrupts, then check if there was
> > * more to do. */
> > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > + * either clear the flags bit or point the event index at the next
> > + * entry. Always update the event index to keep code simple. */
> > +
> > + /* TODO: tune this threshold */
> > + bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
> > +
> > + used_idx = vq->last_used_idx + bufs;
> > + wrap_counter = vq->wrap_counter;
> > +
> > + if (used_idx >= vq->vring_packed.num) {
> > + used_idx -= vq->vring_packed.num;
> > + wrap_counter ^= 1;
> > + }
> > +
> > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > + used_idx | (wrap_counter << 15));
> > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > virtio_wmb(vq->weak_barriers);
> > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > + VRING_EVENT_F_ENABLE;
> > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > vq->event_flags_shadow);
> > }
> > @@ -1822,8 +1865,10 @@ void vring_transport_features(struct virtio_device *vdev)
> > switch (i) {
> > case VIRTIO_RING_F_INDIRECT_DESC:
> > break;
> > +#if 0
> > case VIRTIO_RING_F_EVENT_IDX:
> > break;
> > +#endif
> > case VIRTIO_F_VERSION_1:
> > break;
> > case VIRTIO_F_IOMMU_PLATFORM:
>
On Wed, May 02, 2018 at 03:28:19PM +0800, Tiwei Bie wrote:
> On Wed, May 02, 2018 at 10:51:06AM +0800, Jason Wang wrote:
> > On 2018年04月25日 13:15, Tiwei Bie wrote:
> > > This commit introduces the event idx support in packed
> > > ring. This feature is temporarily disabled, because the
> > > implementation in this patch may not work as expected,
> > > and some further discussions on the implementation are
> > > needed, e.g. do we have to check the wrap counter when
> > > checking whether a kick is needed?
> > >
> > > Signed-off-by: Tiwei Bie <[email protected]>
> > > ---
> > > drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++++++----
> > > 1 file changed, 49 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 0181e93897be..b1039c2985b9 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -986,7 +986,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > > static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > {
> > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > - u16 flags;
> > > + u16 new, old, off_wrap, flags;
> > > bool needs_kick;
> > > u32 snapshot;
> > > @@ -995,7 +995,12 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > * suppressions. */
> > > virtio_mb(vq->weak_barriers);
> > > + old = vq->next_avail_idx - vq->num_added;
> > > + new = vq->next_avail_idx;
> > > + vq->num_added = 0;
> > > +
> > > snapshot = *(u32 *)vq->vring_packed.device;
> > > + off_wrap = virtio16_to_cpu(_vq->vdev, snapshot & 0xffff);
> > > flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
> > > #ifdef DEBUG
> > > @@ -1006,7 +1011,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > vq->last_add_time_valid = false;
> > > #endif
> > > - needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > + if (flags == VRING_EVENT_F_DESC)
> > > + needs_kick = vring_need_event(off_wrap & ~(1<<15), new, old);
> >
> > I wonder whether or not the math is correct. Both new and event are in the
> > unit of descriptor ring size, but old looks not.
>
> What vring_need_event() cares is the distance between
> `new` and `old`, i.e. vq->num_added. So I think there
> is nothing wrong with `old`. But the calculation of the
> distance between `new` and `event_idx` isn't right when
> `new` wraps. How do you think about the below code:
>
> wrap_counter = off_wrap >> 15;
> event_idx = off_wrap & ~(1<<15);
> if (wrap_counter != vq->wrap_counter)
> event_idx -= vq->vring_packed.num;
>
> needs_kick = vring_need_event(event_idx, new, old);
I suspect this hack won't work for non power of 2 ring.
> Best regards,
> Tiwei Bie
>
>
> >
> > Thanks
> >
> > > + else
> > > + needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > END_USE(vq);
> > > return needs_kick;
> > > }
> > > @@ -1116,6 +1124,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> > > if (vq->last_used_idx >= vq->vring_packed.num)
> > > vq->last_used_idx -= vq->vring_packed.num;
> > > + /* If we expect an interrupt for the next entry, tell host
> > > + * by writing event index and flush out the write before
> > > + * the read in the next get_buf call. */
> > > + if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
> > > + virtio_store_mb(vq->weak_barriers,
> > > + &vq->vring_packed.driver->off_wrap,
> > > + cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
> > > + (vq->wrap_counter << 15)));
> > > +
> > > #ifdef DEBUG
> > > vq->last_add_time_valid = false;
> > > #endif
> > > @@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > > /* We optimistically turn back on interrupts, then check if there was
> > > * more to do. */
> > > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > + * either clear the flags bit or point the event index at the next
> > > + * entry. Always update the event index to keep code simple. */
> > > +
> > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > + vq->last_used_idx | (vq->wrap_counter << 15));
> > > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > virtio_wmb(vq->weak_barriers);
> > > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > + VRING_EVENT_F_ENABLE;
> > > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > vq->event_flags_shadow);
> > > }
> > > @@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
> > > static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> > > {
> > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > + u16 bufs, used_idx, wrap_counter;
> > > START_USE(vq);
> > > /* We optimistically turn back on interrupts, then check if there was
> > > * more to do. */
> > > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > + * either clear the flags bit or point the event index at the next
> > > + * entry. Always update the event index to keep code simple. */
> > > +
> > > + /* TODO: tune this threshold */
> > > + bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
> > > +
> > > + used_idx = vq->last_used_idx + bufs;
> > > + wrap_counter = vq->wrap_counter;
> > > +
> > > + if (used_idx >= vq->vring_packed.num) {
> > > + used_idx -= vq->vring_packed.num;
> > > + wrap_counter ^= 1;
> > > + }
> > > +
> > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > + used_idx | (wrap_counter << 15));
> > > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > virtio_wmb(vq->weak_barriers);
> > > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > + VRING_EVENT_F_ENABLE;
> > > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > vq->event_flags_shadow);
> > > }
> > > @@ -1822,8 +1865,10 @@ void vring_transport_features(struct virtio_device *vdev)
> > > switch (i) {
> > > case VIRTIO_RING_F_INDIRECT_DESC:
> > > break;
> > > +#if 0
> > > case VIRTIO_RING_F_EVENT_IDX:
> > > break;
> > > +#endif
> > > case VIRTIO_F_VERSION_1:
> > > break;
> > > case VIRTIO_F_IOMMU_PLATFORM:
> >
On Wed, May 02, 2018 at 04:51:01PM +0300, Michael S. Tsirkin wrote:
> On Wed, May 02, 2018 at 03:28:19PM +0800, Tiwei Bie wrote:
> > On Wed, May 02, 2018 at 10:51:06AM +0800, Jason Wang wrote:
> > > On 2018年04月25日 13:15, Tiwei Bie wrote:
> > > > This commit introduces the event idx support in packed
> > > > ring. This feature is temporarily disabled, because the
> > > > implementation in this patch may not work as expected,
> > > > and some further discussions on the implementation are
> > > > needed, e.g. do we have to check the wrap counter when
> > > > checking whether a kick is needed?
> > > >
> > > > Signed-off-by: Tiwei Bie <[email protected]>
> > > > ---
> > > > drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++++++----
> > > > 1 file changed, 49 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > index 0181e93897be..b1039c2985b9 100644
> > > > --- a/drivers/virtio/virtio_ring.c
> > > > +++ b/drivers/virtio/virtio_ring.c
> > > > @@ -986,7 +986,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > > > static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > {
> > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > - u16 flags;
> > > > + u16 new, old, off_wrap, flags;
> > > > bool needs_kick;
> > > > u32 snapshot;
> > > > @@ -995,7 +995,12 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > * suppressions. */
> > > > virtio_mb(vq->weak_barriers);
> > > > + old = vq->next_avail_idx - vq->num_added;
> > > > + new = vq->next_avail_idx;
> > > > + vq->num_added = 0;
> > > > +
> > > > snapshot = *(u32 *)vq->vring_packed.device;
> > > > + off_wrap = virtio16_to_cpu(_vq->vdev, snapshot & 0xffff);
> > > > flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
> > > > #ifdef DEBUG
> > > > @@ -1006,7 +1011,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > vq->last_add_time_valid = false;
> > > > #endif
> > > > - needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > > + if (flags == VRING_EVENT_F_DESC)
> > > > + needs_kick = vring_need_event(off_wrap & ~(1<<15), new, old);
> > >
> > > I wonder whether or not the math is correct. Both new and event are in the
> > > unit of descriptor ring size, but old looks not.
> >
> > What vring_need_event() cares is the distance between
> > `new` and `old`, i.e. vq->num_added. So I think there
> > is nothing wrong with `old`. But the calculation of the
> > distance between `new` and `event_idx` isn't right when
> > `new` wraps. How do you think about the below code:
> >
> > wrap_counter = off_wrap >> 15;
> > event_idx = off_wrap & ~(1<<15);
> > if (wrap_counter != vq->wrap_counter)
> > event_idx -= vq->vring_packed.num;
> >
> > needs_kick = vring_need_event(event_idx, new, old);
>
> I suspect this hack won't work for non power of 2 ring.
Above code doesn't require the ring size to be a power of 2.
For (__u16)(new_idx - old), what we want to get is vq->num_added.
old = vq->next_avail_idx - vq->num_added;
new = vq->next_avail_idx;
When vq->next_avail_idx >= vq->num_added, it's obvious that,
(__u16)(new_idx - old) is vq->num_added.
And when vq->next_avail_idx < vq->num_added, new will be smaller
than old (old will be a big unsigned number), but (__u16)(new_idx
- old) is still vq->num_added.
For (__u16)(new_idx - event_idx - 1), when new wraps and event_idx
doesn't wrap, the most straightforward way to calculate it is:
(new + vq->vring_packed.num) - event_idx - 1.
But we can also calculate it in this way:
event_idx -= vq->vring_packed.num;
(event_idx will be a big unsigned number)
Then (__u16)(new_idx - event_idx - 1) will be the value we want.
Best regards,
Tiwei Bie
>
>
> > Best regards,
> > Tiwei Bie
> >
> >
> > >
> > > Thanks
> > >
> > > > + else
> > > > + needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > > END_USE(vq);
> > > > return needs_kick;
> > > > }
> > > > @@ -1116,6 +1124,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> > > > if (vq->last_used_idx >= vq->vring_packed.num)
> > > > vq->last_used_idx -= vq->vring_packed.num;
> > > > + /* If we expect an interrupt for the next entry, tell host
> > > > + * by writing event index and flush out the write before
> > > > + * the read in the next get_buf call. */
> > > > + if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
> > > > + virtio_store_mb(vq->weak_barriers,
> > > > + &vq->vring_packed.driver->off_wrap,
> > > > + cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
> > > > + (vq->wrap_counter << 15)));
> > > > +
> > > > #ifdef DEBUG
> > > > vq->last_add_time_valid = false;
> > > > #endif
> > > > @@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > > > /* We optimistically turn back on interrupts, then check if there was
> > > > * more to do. */
> > > > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > > + * either clear the flags bit or point the event index at the next
> > > > + * entry. Always update the event index to keep code simple. */
> > > > +
> > > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > > + vq->last_used_idx | (vq->wrap_counter << 15));
> > > > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > > virtio_wmb(vq->weak_barriers);
> > > > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > > + VRING_EVENT_F_ENABLE;
> > > > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > > vq->event_flags_shadow);
> > > > }
> > > > @@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
> > > > static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> > > > {
> > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > + u16 bufs, used_idx, wrap_counter;
> > > > START_USE(vq);
> > > > /* We optimistically turn back on interrupts, then check if there was
> > > > * more to do. */
> > > > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > > + * either clear the flags bit or point the event index at the next
> > > > + * entry. Always update the event index to keep code simple. */
> > > > +
> > > > + /* TODO: tune this threshold */
> > > > + bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
> > > > +
> > > > + used_idx = vq->last_used_idx + bufs;
> > > > + wrap_counter = vq->wrap_counter;
> > > > +
> > > > + if (used_idx >= vq->vring_packed.num) {
> > > > + used_idx -= vq->vring_packed.num;
> > > > + wrap_counter ^= 1;
> > > > + }
> > > > +
> > > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > > + used_idx | (wrap_counter << 15));
> > > > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > > virtio_wmb(vq->weak_barriers);
> > > > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > > + VRING_EVENT_F_ENABLE;
> > > > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > > vq->event_flags_shadow);
> > > > }
> > > > @@ -1822,8 +1865,10 @@ void vring_transport_features(struct virtio_device *vdev)
> > > > switch (i) {
> > > > case VIRTIO_RING_F_INDIRECT_DESC:
> > > > break;
> > > > +#if 0
> > > > case VIRTIO_RING_F_EVENT_IDX:
> > > > break;
> > > > +#endif
> > > > case VIRTIO_F_VERSION_1:
> > > > break;
> > > > case VIRTIO_F_IOMMU_PLATFORM:
> > >
On Wed, May 02, 2018 at 11:12:55PM +0800, Tiwei Bie wrote:
> On Wed, May 02, 2018 at 04:51:01PM +0300, Michael S. Tsirkin wrote:
> > On Wed, May 02, 2018 at 03:28:19PM +0800, Tiwei Bie wrote:
> > > On Wed, May 02, 2018 at 10:51:06AM +0800, Jason Wang wrote:
> > > > On 2018年04月25日 13:15, Tiwei Bie wrote:
> > > > > This commit introduces the event idx support in packed
> > > > > ring. This feature is temporarily disabled, because the
> > > > > implementation in this patch may not work as expected,
> > > > > and some further discussions on the implementation are
> > > > > needed, e.g. do we have to check the wrap counter when
> > > > > checking whether a kick is needed?
> > > > >
> > > > > Signed-off-by: Tiwei Bie <[email protected]>
> > > > > ---
> > > > > drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++++++----
> > > > > 1 file changed, 49 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > index 0181e93897be..b1039c2985b9 100644
> > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > @@ -986,7 +986,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > > > > static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > > {
> > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > - u16 flags;
> > > > > + u16 new, old, off_wrap, flags;
> > > > > bool needs_kick;
> > > > > u32 snapshot;
> > > > > @@ -995,7 +995,12 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > > * suppressions. */
> > > > > virtio_mb(vq->weak_barriers);
> > > > > + old = vq->next_avail_idx - vq->num_added;
> > > > > + new = vq->next_avail_idx;
> > > > > + vq->num_added = 0;
> > > > > +
> > > > > snapshot = *(u32 *)vq->vring_packed.device;
> > > > > + off_wrap = virtio16_to_cpu(_vq->vdev, snapshot & 0xffff);
> > > > > flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
> > > > > #ifdef DEBUG
> > > > > @@ -1006,7 +1011,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > > vq->last_add_time_valid = false;
> > > > > #endif
> > > > > - needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > > > + if (flags == VRING_EVENT_F_DESC)
> > > > > + needs_kick = vring_need_event(off_wrap & ~(1<<15), new, old);
> > > >
> > > > I wonder whether or not the math is correct. Both new and event are in the
> > > > unit of descriptor ring size, but old looks not.
> > >
> > > What vring_need_event() cares is the distance between
> > > `new` and `old`, i.e. vq->num_added. So I think there
> > > is nothing wrong with `old`. But the calculation of the
> > > distance between `new` and `event_idx` isn't right when
> > > `new` wraps. How do you think about the below code:
> > >
> > > wrap_counter = off_wrap >> 15;
> > > event_idx = off_wrap & ~(1<<15);
> > > if (wrap_counter != vq->wrap_counter)
> > > event_idx -= vq->vring_packed.num;
> > >
> > > needs_kick = vring_need_event(event_idx, new, old);
> >
> > I suspect this hack won't work for non power of 2 ring.
>
> Above code doesn't require the ring size to be a power of 2.
>
> For (__u16)(new_idx - old), what we want to get is vq->num_added.
>
> old = vq->next_avail_idx - vq->num_added;
> new = vq->next_avail_idx;
>
> When vq->next_avail_idx >= vq->num_added, it's obvious that,
> (__u16)(new_idx - old) is vq->num_added.
>
> And when vq->next_avail_idx < vq->num_added, new will be smaller
> than old (old will be a big unsigned number), but (__u16)(new_idx
> - old) is still vq->num_added.
>
> For (__u16)(new_idx - event_idx - 1), when new wraps and event_idx
> doesn't wrap, the most straightforward way to calculate it is:
> (new + vq->vring_packed.num) - event_idx - 1.
So how about we use the straightforward way then?
> But we can also calculate it in this way:
>
> event_idx -= vq->vring_packed.num;
> (event_idx will be a big unsigned number)
>
> Then (__u16)(new_idx - event_idx - 1) will be the value we want.
>
> Best regards,
> Tiwei Bie
> >
> >
> > > Best regards,
> > > Tiwei Bie
> > >
> > >
> > > >
> > > > Thanks
> > > >
> > > > > + else
> > > > > + needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > > > END_USE(vq);
> > > > > return needs_kick;
> > > > > }
> > > > > @@ -1116,6 +1124,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> > > > > if (vq->last_used_idx >= vq->vring_packed.num)
> > > > > vq->last_used_idx -= vq->vring_packed.num;
> > > > > + /* If we expect an interrupt for the next entry, tell host
> > > > > + * by writing event index and flush out the write before
> > > > > + * the read in the next get_buf call. */
> > > > > + if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
> > > > > + virtio_store_mb(vq->weak_barriers,
> > > > > + &vq->vring_packed.driver->off_wrap,
> > > > > + cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
> > > > > + (vq->wrap_counter << 15)));
> > > > > +
> > > > > #ifdef DEBUG
> > > > > vq->last_add_time_valid = false;
> > > > > #endif
> > > > > @@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > > > > /* We optimistically turn back on interrupts, then check if there was
> > > > > * more to do. */
> > > > > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > > > + * either clear the flags bit or point the event index at the next
> > > > > + * entry. Always update the event index to keep code simple. */
> > > > > +
> > > > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > > > + vq->last_used_idx | (vq->wrap_counter << 15));
> > > > > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > > > virtio_wmb(vq->weak_barriers);
> > > > > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > > > + VRING_EVENT_F_ENABLE;
> > > > > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > > > vq->event_flags_shadow);
> > > > > }
> > > > > @@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
> > > > > static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> > > > > {
> > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > + u16 bufs, used_idx, wrap_counter;
> > > > > START_USE(vq);
> > > > > /* We optimistically turn back on interrupts, then check if there was
> > > > > * more to do. */
> > > > > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > > > + * either clear the flags bit or point the event index at the next
> > > > > + * entry. Always update the event index to keep code simple. */
> > > > > +
> > > > > + /* TODO: tune this threshold */
> > > > > + bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
> > > > > +
> > > > > + used_idx = vq->last_used_idx + bufs;
> > > > > + wrap_counter = vq->wrap_counter;
> > > > > +
> > > > > + if (used_idx >= vq->vring_packed.num) {
> > > > > + used_idx -= vq->vring_packed.num;
> > > > > + wrap_counter ^= 1;
> > > > > + }
> > > > > +
> > > > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > > > + used_idx | (wrap_counter << 15));
> > > > > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > > > virtio_wmb(vq->weak_barriers);
> > > > > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > > > + VRING_EVENT_F_ENABLE;
> > > > > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > > > vq->event_flags_shadow);
> > > > > }
> > > > > @@ -1822,8 +1865,10 @@ void vring_transport_features(struct virtio_device *vdev)
> > > > > switch (i) {
> > > > > case VIRTIO_RING_F_INDIRECT_DESC:
> > > > > break;
> > > > > +#if 0
> > > > > case VIRTIO_RING_F_EVENT_IDX:
> > > > > break;
> > > > > +#endif
> > > > > case VIRTIO_F_VERSION_1:
> > > > > break;
> > > > > case VIRTIO_F_IOMMU_PLATFORM:
> > > >
On Wed, May 02, 2018 at 06:42:57PM +0300, Michael S. Tsirkin wrote:
> On Wed, May 02, 2018 at 11:12:55PM +0800, Tiwei Bie wrote:
> > On Wed, May 02, 2018 at 04:51:01PM +0300, Michael S. Tsirkin wrote:
> > > On Wed, May 02, 2018 at 03:28:19PM +0800, Tiwei Bie wrote:
> > > > On Wed, May 02, 2018 at 10:51:06AM +0800, Jason Wang wrote:
> > > > > On 2018年04月25日 13:15, Tiwei Bie wrote:
> > > > > > This commit introduces the event idx support in packed
> > > > > > ring. This feature is temporarily disabled, because the
> > > > > > implementation in this patch may not work as expected,
> > > > > > and some further discussions on the implementation are
> > > > > > needed, e.g. do we have to check the wrap counter when
> > > > > > checking whether a kick is needed?
> > > > > >
> > > > > > Signed-off-by: Tiwei Bie <[email protected]>
> > > > > > ---
> > > > > > drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++++++----
> > > > > > 1 file changed, 49 insertions(+), 4 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > index 0181e93897be..b1039c2985b9 100644
> > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > @@ -986,7 +986,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > > > > > static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > > > {
> > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > - u16 flags;
> > > > > > + u16 new, old, off_wrap, flags;
> > > > > > bool needs_kick;
> > > > > > u32 snapshot;
> > > > > > @@ -995,7 +995,12 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > > > * suppressions. */
> > > > > > virtio_mb(vq->weak_barriers);
> > > > > > + old = vq->next_avail_idx - vq->num_added;
> > > > > > + new = vq->next_avail_idx;
> > > > > > + vq->num_added = 0;
> > > > > > +
> > > > > > snapshot = *(u32 *)vq->vring_packed.device;
> > > > > > + off_wrap = virtio16_to_cpu(_vq->vdev, snapshot & 0xffff);
> > > > > > flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
> > > > > > #ifdef DEBUG
> > > > > > @@ -1006,7 +1011,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > > > vq->last_add_time_valid = false;
> > > > > > #endif
> > > > > > - needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > > > > + if (flags == VRING_EVENT_F_DESC)
> > > > > > + needs_kick = vring_need_event(off_wrap & ~(1<<15), new, old);
> > > > >
> > > > > I wonder whether or not the math is correct. Both new and event are in the
> > > > > unit of descriptor ring size, but old looks not.
> > > >
> > > > What vring_need_event() cares is the distance between
> > > > `new` and `old`, i.e. vq->num_added. So I think there
> > > > is nothing wrong with `old`. But the calculation of the
> > > > distance between `new` and `event_idx` isn't right when
> > > > `new` wraps. How do you think about the below code:
> > > >
> > > > wrap_counter = off_wrap >> 15;
> > > > event_idx = off_wrap & ~(1<<15);
> > > > if (wrap_counter != vq->wrap_counter)
> > > > event_idx -= vq->vring_packed.num;
> > > >
> > > > needs_kick = vring_need_event(event_idx, new, old);
> > >
> > > I suspect this hack won't work for non power of 2 ring.
> >
> > Above code doesn't require the ring size to be a power of 2.
> >
> > For (__u16)(new_idx - old), what we want to get is vq->num_added.
> >
> > old = vq->next_avail_idx - vq->num_added;
> > new = vq->next_avail_idx;
> >
> > When vq->next_avail_idx >= vq->num_added, it's obvious that,
> > (__u16)(new_idx - old) is vq->num_added.
> >
> > And when vq->next_avail_idx < vq->num_added, new will be smaller
> > than old (old will be a big unsigned number), but (__u16)(new_idx
> > - old) is still vq->num_added.
> >
> > For (__u16)(new_idx - event_idx - 1), when new wraps and event_idx
> > doesn't wrap, the most straightforward way to calculate it is:
> > (new + vq->vring_packed.num) - event_idx - 1.
>
> So how about we use the straightforward way then?
You mean we do new += vq->vring_packed.num instead
of event_idx -= vq->vring_packed.num before calling
vring_need_event()?
The problem is that, the second param (new_idx) of
vring_need_event() will be used for:
(__u16)(new_idx - event_idx - 1)
(__u16)(new_idx - old)
So if we change new, we will need to change old too.
And that would be an ugly hack..
Best regards,
Tiwei Bie
>
> > But we can also calculate it in this way:
> >
> > event_idx -= vq->vring_packed.num;
> > (event_idx will be a big unsigned number)
> >
> > Then (__u16)(new_idx - event_idx - 1) will be the value we want.
> >
> > Best regards,
> > Tiwei Bie
>
>
> > >
> > >
> > > > Best regards,
> > > > Tiwei Bie
> > > >
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > > + else
> > > > > > + needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > > > > END_USE(vq);
> > > > > > return needs_kick;
> > > > > > }
> > > > > > @@ -1116,6 +1124,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> > > > > > if (vq->last_used_idx >= vq->vring_packed.num)
> > > > > > vq->last_used_idx -= vq->vring_packed.num;
> > > > > > + /* If we expect an interrupt for the next entry, tell host
> > > > > > + * by writing event index and flush out the write before
> > > > > > + * the read in the next get_buf call. */
> > > > > > + if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
> > > > > > + virtio_store_mb(vq->weak_barriers,
> > > > > > + &vq->vring_packed.driver->off_wrap,
> > > > > > + cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
> > > > > > + (vq->wrap_counter << 15)));
> > > > > > +
> > > > > > #ifdef DEBUG
> > > > > > vq->last_add_time_valid = false;
> > > > > > #endif
> > > > > > @@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > > > > > /* We optimistically turn back on interrupts, then check if there was
> > > > > > * more to do. */
> > > > > > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > > > > + * either clear the flags bit or point the event index at the next
> > > > > > + * entry. Always update the event index to keep code simple. */
> > > > > > +
> > > > > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > > > > + vq->last_used_idx | (vq->wrap_counter << 15));
> > > > > > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > > > > virtio_wmb(vq->weak_barriers);
> > > > > > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > > > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > > > > + VRING_EVENT_F_ENABLE;
> > > > > > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > > > > vq->event_flags_shadow);
> > > > > > }
> > > > > > @@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
> > > > > > static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> > > > > > {
> > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > + u16 bufs, used_idx, wrap_counter;
> > > > > > START_USE(vq);
> > > > > > /* We optimistically turn back on interrupts, then check if there was
> > > > > > * more to do. */
> > > > > > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > > > > + * either clear the flags bit or point the event index at the next
> > > > > > + * entry. Always update the event index to keep code simple. */
> > > > > > +
> > > > > > + /* TODO: tune this threshold */
> > > > > > + bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
> > > > > > +
> > > > > > + used_idx = vq->last_used_idx + bufs;
> > > > > > + wrap_counter = vq->wrap_counter;
> > > > > > +
> > > > > > + if (used_idx >= vq->vring_packed.num) {
> > > > > > + used_idx -= vq->vring_packed.num;
> > > > > > + wrap_counter ^= 1;
> > > > > > + }
> > > > > > +
> > > > > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > > > > + used_idx | (wrap_counter << 15));
> > > > > > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > > > > virtio_wmb(vq->weak_barriers);
> > > > > > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > > > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > > > > + VRING_EVENT_F_ENABLE;
> > > > > > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > > > > vq->event_flags_shadow);
> > > > > > }
> > > > > > @@ -1822,8 +1865,10 @@ void vring_transport_features(struct virtio_device *vdev)
> > > > > > switch (i) {
> > > > > > case VIRTIO_RING_F_INDIRECT_DESC:
> > > > > > break;
> > > > > > +#if 0
> > > > > > case VIRTIO_RING_F_EVENT_IDX:
> > > > > > break;
> > > > > > +#endif
> > > > > > case VIRTIO_F_VERSION_1:
> > > > > > break;
> > > > > > case VIRTIO_F_IOMMU_PLATFORM:
> > > > >
On Thu, May 03, 2018 at 09:11:16AM +0800, Tiwei Bie wrote:
> On Wed, May 02, 2018 at 06:42:57PM +0300, Michael S. Tsirkin wrote:
> > On Wed, May 02, 2018 at 11:12:55PM +0800, Tiwei Bie wrote:
> > > On Wed, May 02, 2018 at 04:51:01PM +0300, Michael S. Tsirkin wrote:
> > > > On Wed, May 02, 2018 at 03:28:19PM +0800, Tiwei Bie wrote:
> > > > > On Wed, May 02, 2018 at 10:51:06AM +0800, Jason Wang wrote:
> > > > > > On 2018年04月25日 13:15, Tiwei Bie wrote:
> > > > > > > This commit introduces the event idx support in packed
> > > > > > > ring. This feature is temporarily disabled, because the
> > > > > > > implementation in this patch may not work as expected,
> > > > > > > and some further discussions on the implementation are
> > > > > > > needed, e.g. do we have to check the wrap counter when
> > > > > > > checking whether a kick is needed?
> > > > > > >
> > > > > > > Signed-off-by: Tiwei Bie <[email protected]>
> > > > > > > ---
> > > > > > > drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++++++----
> > > > > > > 1 file changed, 49 insertions(+), 4 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > index 0181e93897be..b1039c2985b9 100644
> > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > @@ -986,7 +986,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > > > > > > static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > > > > {
> > > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > - u16 flags;
> > > > > > > + u16 new, old, off_wrap, flags;
> > > > > > > bool needs_kick;
> > > > > > > u32 snapshot;
> > > > > > > @@ -995,7 +995,12 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > > > > * suppressions. */
> > > > > > > virtio_mb(vq->weak_barriers);
> > > > > > > + old = vq->next_avail_idx - vq->num_added;
> > > > > > > + new = vq->next_avail_idx;
> > > > > > > + vq->num_added = 0;
> > > > > > > +
> > > > > > > snapshot = *(u32 *)vq->vring_packed.device;
> > > > > > > + off_wrap = virtio16_to_cpu(_vq->vdev, snapshot & 0xffff);
> > > > > > > flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
> > > > > > > #ifdef DEBUG
> > > > > > > @@ -1006,7 +1011,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > > > > vq->last_add_time_valid = false;
> > > > > > > #endif
> > > > > > > - needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > > > > > + if (flags == VRING_EVENT_F_DESC)
> > > > > > > + needs_kick = vring_need_event(off_wrap & ~(1<<15), new, old);
> > > > > >
> > > > > > I wonder whether or not the math is correct. Both new and event are in the
> > > > > > unit of descriptor ring size, but old looks not.
> > > > >
> > > > > What vring_need_event() cares is the distance between
> > > > > `new` and `old`, i.e. vq->num_added. So I think there
> > > > > is nothing wrong with `old`. But the calculation of the
> > > > > distance between `new` and `event_idx` isn't right when
> > > > > `new` wraps. How do you think about the below code:
> > > > >
> > > > > wrap_counter = off_wrap >> 15;
> > > > > event_idx = off_wrap & ~(1<<15);
> > > > > if (wrap_counter != vq->wrap_counter)
> > > > > event_idx -= vq->vring_packed.num;
> > > > >
> > > > > needs_kick = vring_need_event(event_idx, new, old);
> > > >
> > > > I suspect this hack won't work for non power of 2 ring.
> > >
> > > Above code doesn't require the ring size to be a power of 2.
> > >
> > > For (__u16)(new_idx - old), what we want to get is vq->num_added.
> > >
> > > old = vq->next_avail_idx - vq->num_added;
> > > new = vq->next_avail_idx;
> > >
> > > When vq->next_avail_idx >= vq->num_added, it's obvious that,
> > > (__u16)(new_idx - old) is vq->num_added.
> > >
> > > And when vq->next_avail_idx < vq->num_added, new will be smaller
> > > than old (old will be a big unsigned number), but (__u16)(new_idx
> > > - old) is still vq->num_added.
> > >
> > > For (__u16)(new_idx - event_idx - 1), when new wraps and event_idx
> > > doesn't wrap, the most straightforward way to calculate it is:
> > > (new + vq->vring_packed.num) - event_idx - 1.
> >
> > So how about we use the straightforward way then?
>
> You mean we do new += vq->vring_packed.num instead
> of event_idx -= vq->vring_packed.num before calling
> vring_need_event()?
>
> The problem is that, the second param (new_idx) of
> vring_need_event() will be used for:
>
> (__u16)(new_idx - event_idx - 1)
> (__u16)(new_idx - old)
>
> So if we change new, we will need to change old too.
I think that since we have a branch there anyway,
we are better off just special-casing if (wrap_counter != vq->wrap_counter).
Treat is differenty and avoid casts.
> And that would be an ugly hack..
>
> Best regards,
> Tiwei Bie
I consider casts and huge numbers with two's complement
games even uglier.
> >
> > > But we can also calculate it in this way:
> > >
> > > event_idx -= vq->vring_packed.num;
> > > (event_idx will be a big unsigned number)
> > >
> > > Then (__u16)(new_idx - event_idx - 1) will be the value we want.
> > >
> > > Best regards,
> > > Tiwei Bie
> >
> >
> > > >
> > > >
> > > > > Best regards,
> > > > > Tiwei Bie
> > > > >
> > > > >
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > > > + else
> > > > > > > + needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > > > > > END_USE(vq);
> > > > > > > return needs_kick;
> > > > > > > }
> > > > > > > @@ -1116,6 +1124,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> > > > > > > if (vq->last_used_idx >= vq->vring_packed.num)
> > > > > > > vq->last_used_idx -= vq->vring_packed.num;
> > > > > > > + /* If we expect an interrupt for the next entry, tell host
> > > > > > > + * by writing event index and flush out the write before
> > > > > > > + * the read in the next get_buf call. */
> > > > > > > + if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
> > > > > > > + virtio_store_mb(vq->weak_barriers,
> > > > > > > + &vq->vring_packed.driver->off_wrap,
> > > > > > > + cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
> > > > > > > + (vq->wrap_counter << 15)));
> > > > > > > +
> > > > > > > #ifdef DEBUG
> > > > > > > vq->last_add_time_valid = false;
> > > > > > > #endif
> > > > > > > @@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > > > > > > /* We optimistically turn back on interrupts, then check if there was
> > > > > > > * more to do. */
> > > > > > > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > > > > > + * either clear the flags bit or point the event index at the next
> > > > > > > + * entry. Always update the event index to keep code simple. */
> > > > > > > +
> > > > > > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > > > > > + vq->last_used_idx | (vq->wrap_counter << 15));
> > > > > > > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > > > > > virtio_wmb(vq->weak_barriers);
> > > > > > > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > > > > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > > > > > + VRING_EVENT_F_ENABLE;
> > > > > > > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > > > > > vq->event_flags_shadow);
> > > > > > > }
> > > > > > > @@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
> > > > > > > static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> > > > > > > {
> > > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > + u16 bufs, used_idx, wrap_counter;
> > > > > > > START_USE(vq);
> > > > > > > /* We optimistically turn back on interrupts, then check if there was
> > > > > > > * more to do. */
> > > > > > > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > > > > > + * either clear the flags bit or point the event index at the next
> > > > > > > + * entry. Always update the event index to keep code simple. */
> > > > > > > +
> > > > > > > + /* TODO: tune this threshold */
> > > > > > > + bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
> > > > > > > +
> > > > > > > + used_idx = vq->last_used_idx + bufs;
> > > > > > > + wrap_counter = vq->wrap_counter;
> > > > > > > +
> > > > > > > + if (used_idx >= vq->vring_packed.num) {
> > > > > > > + used_idx -= vq->vring_packed.num;
> > > > > > > + wrap_counter ^= 1;
> > > > > > > + }
> > > > > > > +
> > > > > > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > > > > > + used_idx | (wrap_counter << 15));
> > > > > > > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > > > > > virtio_wmb(vq->weak_barriers);
> > > > > > > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > > > > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > > > > > + VRING_EVENT_F_ENABLE;
> > > > > > > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > > > > > vq->event_flags_shadow);
> > > > > > > }
> > > > > > > @@ -1822,8 +1865,10 @@ void vring_transport_features(struct virtio_device *vdev)
> > > > > > > switch (i) {
> > > > > > > case VIRTIO_RING_F_INDIRECT_DESC:
> > > > > > > break;
> > > > > > > +#if 0
> > > > > > > case VIRTIO_RING_F_EVENT_IDX:
> > > > > > > break;
> > > > > > > +#endif
> > > > > > > case VIRTIO_F_VERSION_1:
> > > > > > > break;
> > > > > > > case VIRTIO_F_IOMMU_PLATFORM:
> > > > > >
On Thu, May 03, 2018 at 04:44:39AM +0300, Michael S. Tsirkin wrote:
> On Thu, May 03, 2018 at 09:11:16AM +0800, Tiwei Bie wrote:
> > On Wed, May 02, 2018 at 06:42:57PM +0300, Michael S. Tsirkin wrote:
> > > On Wed, May 02, 2018 at 11:12:55PM +0800, Tiwei Bie wrote:
> > > > On Wed, May 02, 2018 at 04:51:01PM +0300, Michael S. Tsirkin wrote:
> > > > > On Wed, May 02, 2018 at 03:28:19PM +0800, Tiwei Bie wrote:
> > > > > > On Wed, May 02, 2018 at 10:51:06AM +0800, Jason Wang wrote:
> > > > > > > On 2018年04月25日 13:15, Tiwei Bie wrote:
> > > > > > > > This commit introduces the event idx support in packed
> > > > > > > > ring. This feature is temporarily disabled, because the
> > > > > > > > implementation in this patch may not work as expected,
> > > > > > > > and some further discussions on the implementation are
> > > > > > > > needed, e.g. do we have to check the wrap counter when
> > > > > > > > checking whether a kick is needed?
> > > > > > > >
> > > > > > > > Signed-off-by: Tiwei Bie <[email protected]>
> > > > > > > > ---
> > > > > > > > drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++++++----
> > > > > > > > 1 file changed, 49 insertions(+), 4 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > index 0181e93897be..b1039c2985b9 100644
> > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > @@ -986,7 +986,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > > > > > > > static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > > > > > {
> > > > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > > - u16 flags;
> > > > > > > > + u16 new, old, off_wrap, flags;
> > > > > > > > bool needs_kick;
> > > > > > > > u32 snapshot;
> > > > > > > > @@ -995,7 +995,12 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > > > > > * suppressions. */
> > > > > > > > virtio_mb(vq->weak_barriers);
> > > > > > > > + old = vq->next_avail_idx - vq->num_added;
> > > > > > > > + new = vq->next_avail_idx;
> > > > > > > > + vq->num_added = 0;
> > > > > > > > +
> > > > > > > > snapshot = *(u32 *)vq->vring_packed.device;
> > > > > > > > + off_wrap = virtio16_to_cpu(_vq->vdev, snapshot & 0xffff);
> > > > > > > > flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
> > > > > > > > #ifdef DEBUG
> > > > > > > > @@ -1006,7 +1011,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > > > > > vq->last_add_time_valid = false;
> > > > > > > > #endif
> > > > > > > > - needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > > > > > > + if (flags == VRING_EVENT_F_DESC)
> > > > > > > > + needs_kick = vring_need_event(off_wrap & ~(1<<15), new, old);
> > > > > > >
> > > > > > > I wonder whether or not the math is correct. Both new and event are in the
> > > > > > > unit of descriptor ring size, but old looks not.
> > > > > >
> > > > > > What vring_need_event() cares is the distance between
> > > > > > `new` and `old`, i.e. vq->num_added. So I think there
> > > > > > is nothing wrong with `old`. But the calculation of the
> > > > > > distance between `new` and `event_idx` isn't right when
> > > > > > `new` wraps. How do you think about the below code:
> > > > > >
> > > > > > wrap_counter = off_wrap >> 15;
> > > > > > event_idx = off_wrap & ~(1<<15);
> > > > > > if (wrap_counter != vq->wrap_counter)
> > > > > > event_idx -= vq->vring_packed.num;
> > > > > >
> > > > > > needs_kick = vring_need_event(event_idx, new, old);
> > > > >
> > > > > I suspect this hack won't work for non power of 2 ring.
> > > >
> > > > Above code doesn't require the ring size to be a power of 2.
> > > >
> > > > For (__u16)(new_idx - old), what we want to get is vq->num_added.
> > > >
> > > > old = vq->next_avail_idx - vq->num_added;
> > > > new = vq->next_avail_idx;
> > > >
> > > > When vq->next_avail_idx >= vq->num_added, it's obvious that,
> > > > (__u16)(new_idx - old) is vq->num_added.
> > > >
> > > > And when vq->next_avail_idx < vq->num_added, new will be smaller
> > > > than old (old will be a big unsigned number), but (__u16)(new_idx
> > > > - old) is still vq->num_added.
> > > >
> > > > For (__u16)(new_idx - event_idx - 1), when new wraps and event_idx
> > > > doesn't wrap, the most straightforward way to calculate it is:
> > > > (new + vq->vring_packed.num) - event_idx - 1.
> > >
> > > So how about we use the straightforward way then?
> >
> > You mean we do new += vq->vring_packed.num instead
> > of event_idx -= vq->vring_packed.num before calling
> > vring_need_event()?
> >
> > The problem is that, the second param (new_idx) of
> > vring_need_event() will be used for:
> >
> > (__u16)(new_idx - event_idx - 1)
> > (__u16)(new_idx - old)
> >
> > So if we change new, we will need to change old too.
>
> I think that since we have a branch there anyway,
> we are better off just special-casing if (wrap_counter != vq->wrap_counter).
> Treat is differenty and avoid casts.
>
> > And that would be an ugly hack..
> >
> > Best regards,
> > Tiwei Bie
>
> I consider casts and huge numbers with two's complement
> games even uglier.
The dependency on two's complement game is introduced
since the split ring.
In packed ring, old is calculated via:
old = vq->next_avail_idx - vq->num_added;
In split ring, old is calculated via:
old = vq->avail_idx_shadow - vq->num_added;
In both cases, when vq->num_added is bigger, old will
be a big number.
Best regards,
Tiwei Bie
>
> > >
> > > > But we can also calculate it in this way:
> > > >
> > > > event_idx -= vq->vring_packed.num;
> > > > (event_idx will be a big unsigned number)
> > > >
> > > > Then (__u16)(new_idx - event_idx - 1) will be the value we want.
> > > >
> > > > Best regards,
> > > > Tiwei Bie
> > >
> > >
> > > > >
> > > > >
> > > > > > Best regards,
> > > > > > Tiwei Bie
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > > > + else
> > > > > > > > + needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > > > > > > END_USE(vq);
> > > > > > > > return needs_kick;
> > > > > > > > }
> > > > > > > > @@ -1116,6 +1124,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> > > > > > > > if (vq->last_used_idx >= vq->vring_packed.num)
> > > > > > > > vq->last_used_idx -= vq->vring_packed.num;
> > > > > > > > + /* If we expect an interrupt for the next entry, tell host
> > > > > > > > + * by writing event index and flush out the write before
> > > > > > > > + * the read in the next get_buf call. */
> > > > > > > > + if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
> > > > > > > > + virtio_store_mb(vq->weak_barriers,
> > > > > > > > + &vq->vring_packed.driver->off_wrap,
> > > > > > > > + cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
> > > > > > > > + (vq->wrap_counter << 15)));
> > > > > > > > +
> > > > > > > > #ifdef DEBUG
> > > > > > > > vq->last_add_time_valid = false;
> > > > > > > > #endif
> > > > > > > > @@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > > > > > > > /* We optimistically turn back on interrupts, then check if there was
> > > > > > > > * more to do. */
> > > > > > > > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > > > > > > + * either clear the flags bit or point the event index at the next
> > > > > > > > + * entry. Always update the event index to keep code simple. */
> > > > > > > > +
> > > > > > > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > > > > > > + vq->last_used_idx | (vq->wrap_counter << 15));
> > > > > > > > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > > > > > > virtio_wmb(vq->weak_barriers);
> > > > > > > > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > > > > > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > > > > > > + VRING_EVENT_F_ENABLE;
> > > > > > > > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > > > > > > vq->event_flags_shadow);
> > > > > > > > }
> > > > > > > > @@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
> > > > > > > > static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> > > > > > > > {
> > > > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > > + u16 bufs, used_idx, wrap_counter;
> > > > > > > > START_USE(vq);
> > > > > > > > /* We optimistically turn back on interrupts, then check if there was
> > > > > > > > * more to do. */
> > > > > > > > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > > > > > > + * either clear the flags bit or point the event index at the next
> > > > > > > > + * entry. Always update the event index to keep code simple. */
> > > > > > > > +
> > > > > > > > + /* TODO: tune this threshold */
> > > > > > > > + bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
> > > > > > > > +
> > > > > > > > + used_idx = vq->last_used_idx + bufs;
> > > > > > > > + wrap_counter = vq->wrap_counter;
> > > > > > > > +
> > > > > > > > + if (used_idx >= vq->vring_packed.num) {
> > > > > > > > + used_idx -= vq->vring_packed.num;
> > > > > > > > + wrap_counter ^= 1;
> > > > > > > > + }
> > > > > > > > +
> > > > > > > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > > > > > > + used_idx | (wrap_counter << 15));
> > > > > > > > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > > > > > > virtio_wmb(vq->weak_barriers);
> > > > > > > > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > > > > > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > > > > > > + VRING_EVENT_F_ENABLE;
> > > > > > > > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > > > > > > vq->event_flags_shadow);
> > > > > > > > }
> > > > > > > > @@ -1822,8 +1865,10 @@ void vring_transport_features(struct virtio_device *vdev)
> > > > > > > > switch (i) {
> > > > > > > > case VIRTIO_RING_F_INDIRECT_DESC:
> > > > > > > > break;
> > > > > > > > +#if 0
> > > > > > > > case VIRTIO_RING_F_EVENT_IDX:
> > > > > > > > break;
> > > > > > > > +#endif
> > > > > > > > case VIRTIO_F_VERSION_1:
> > > > > > > > break;
> > > > > > > > case VIRTIO_F_IOMMU_PLATFORM:
> > > > > > >
On 2018年05月03日 10:09, Tiwei Bie wrote:
>>>> So how about we use the straightforward way then?
>>> You mean we do new += vq->vring_packed.num instead
>>> of event_idx -= vq->vring_packed.num before calling
>>> vring_need_event()?
>>>
>>> The problem is that, the second param (new_idx) of
>>> vring_need_event() will be used for:
>>>
>>> (__u16)(new_idx - event_idx - 1)
>>> (__u16)(new_idx - old)
>>>
>>> So if we change new, we will need to change old too.
>> I think that since we have a branch there anyway,
>> we are better off just special-casing if (wrap_counter != vq->wrap_counter).
>> Treat is differenty and avoid casts.
>>
>>> And that would be an ugly hack..
>>>
>>> Best regards,
>>> Tiwei Bie
>> I consider casts and huge numbers with two's complement
>> games even uglier.
> The dependency on two's complement game is introduced
> since the split ring.
>
> In packed ring, old is calculated via:
>
> old = vq->next_avail_idx - vq->num_added;
>
> In split ring, old is calculated via:
>
> old = vq->avail_idx_shadow - vq->num_added;
>
> In both cases, when vq->num_added is bigger, old will
> be a big number.
>
> Best regards,
> Tiwei Bie
>
How about just do something like vhost:
static u16 vhost_idx_diff(struct vhost_virtqueue *vq, u16 old, u16 new)
{
if (new > old)
return new - old;
return (new + vq->num - old);
}
static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
__u16 event_off, __u16 new,
__u16 old)
{
return (__u16)(vhost_idx_diff(vq, new, event_off) - 1) <
(__u16)vhost_idx_diff(vq, new, old);
}
?
On Thu, May 03, 2018 at 03:25:29PM +0800, Jason Wang wrote:
> On 2018年05月03日 10:09, Tiwei Bie wrote:
> > > > > So how about we use the straightforward way then?
> > > > You mean we do new += vq->vring_packed.num instead
> > > > of event_idx -= vq->vring_packed.num before calling
> > > > vring_need_event()?
> > > >
> > > > The problem is that, the second param (new_idx) of
> > > > vring_need_event() will be used for:
> > > >
> > > > (__u16)(new_idx - event_idx - 1)
> > > > (__u16)(new_idx - old)
> > > >
> > > > So if we change new, we will need to change old too.
> > > I think that since we have a branch there anyway,
> > > we are better off just special-casing if (wrap_counter != vq->wrap_counter).
> > > Treat is differenty and avoid casts.
> > >
> > > > And that would be an ugly hack..
> > > >
> > > > Best regards,
> > > > Tiwei Bie
> > > I consider casts and huge numbers with two's complement
> > > games even uglier.
> > The dependency on two's complement game is introduced
> > since the split ring.
> >
> > In packed ring, old is calculated via:
> >
> > old = vq->next_avail_idx - vq->num_added;
> >
> > In split ring, old is calculated via:
> >
> > old = vq->avail_idx_shadow - vq->num_added;
> >
> > In both cases, when vq->num_added is bigger, old will
> > be a big number.
> >
> > Best regards,
> > Tiwei Bie
> >
>
> How about just do something like vhost:
>
> static u16 vhost_idx_diff(struct vhost_virtqueue *vq, u16 old, u16 new)
> {
> if (new > old)
> return new - old;
> return (new + vq->num - old);
> }
>
> static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
> __u16 event_off, __u16 new,
> __u16 old)
> {
> return (__u16)(vhost_idx_diff(vq, new, event_off) - 1) <
> (__u16)vhost_idx_diff(vq, new, old);
> }
>
> ?
It seems that there is a typo in above code. The second
param of vhost_idx_diff() is `old`, but when calling this
function in vhost_vring_packed_need_event(), `new` is
passed as the second param.
If we assume the second param of vhost_idx_diff() is new
and the third one is old, i.e.:
static u16 vhost_idx_diff(struct vhost_virtqueue *vq, u16 new, u16 old)
{
if (new > old)
return new - old;
return (new + vq->num - old);
}
I think it's still not right.
Because in virtqueue_enable_cb_delayed(), we may set an
event_off which is bigger than new and both of them have
wrapped. And in this case, although new is smaller than
event_off (i.e. the third param -- old), new shouldn't
add vq->num, and actually we are expecting a very big
idx diff.
Best regards,
Tiwei Bie
On 2018年05月03日 21:54, Tiwei Bie wrote:
> On Thu, May 03, 2018 at 03:25:29PM +0800, Jason Wang wrote:
>> On 2018年05月03日 10:09, Tiwei Bie wrote:
>>>>>> So how about we use the straightforward way then?
>>>>> You mean we do new += vq->vring_packed.num instead
>>>>> of event_idx -= vq->vring_packed.num before calling
>>>>> vring_need_event()?
>>>>>
>>>>> The problem is that, the second param (new_idx) of
>>>>> vring_need_event() will be used for:
>>>>>
>>>>> (__u16)(new_idx - event_idx - 1)
>>>>> (__u16)(new_idx - old)
>>>>>
>>>>> So if we change new, we will need to change old too.
>>>> I think that since we have a branch there anyway,
>>>> we are better off just special-casing if (wrap_counter != vq->wrap_counter).
>>>> Treat is differenty and avoid casts.
>>>>
>>>>> And that would be an ugly hack..
>>>>>
>>>>> Best regards,
>>>>> Tiwei Bie
>>>> I consider casts and huge numbers with two's complement
>>>> games even uglier.
>>> The dependency on two's complement game is introduced
>>> since the split ring.
>>>
>>> In packed ring, old is calculated via:
>>>
>>> old = vq->next_avail_idx - vq->num_added;
>>>
>>> In split ring, old is calculated via:
>>>
>>> old = vq->avail_idx_shadow - vq->num_added;
>>>
>>> In both cases, when vq->num_added is bigger, old will
>>> be a big number.
>>>
>>> Best regards,
>>> Tiwei Bie
>>>
>> How about just do something like vhost:
>>
>> static u16 vhost_idx_diff(struct vhost_virtqueue *vq, u16 old, u16 new)
>> {
>> if (new > old)
>> return new - old;
>> return (new + vq->num - old);
>> }
>>
>> static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
>> __u16 event_off, __u16 new,
>> __u16 old)
>> {
>> return (__u16)(vhost_idx_diff(vq, new, event_off) - 1) <
>> (__u16)vhost_idx_diff(vq, new, old);
>> }
>>
>> ?
> It seems that there is a typo in above code. The second
> param of vhost_idx_diff() is `old`, but when calling this
> function in vhost_vring_packed_need_event(), `new` is
> passed as the second param.
Right.
>
> If we assume the second param of vhost_idx_diff() is new
> and the third one is old, i.e.:
>
> static u16 vhost_idx_diff(struct vhost_virtqueue *vq, u16 new, u16 old)
> {
> if (new > old)
> return new - old;
> return (new + vq->num - old);
> }
>
> I think it's still not right.
>
> Because in virtqueue_enable_cb_delayed(), we may set an
> event_off which is bigger than new and both of them have
> wrapped. And in this case, although new is smaller than
> event_off (i.e. the third param -- old), new shouldn't
> add vq->num, and actually we are expecting a very big
> idx diff.
Yes, so to calculate distance correctly between event and new, we just
need to compare the warp counter and return false if it doesn't match
without the need to try to add vq.num here.
Thanks
>
> Best regards,
> Tiwei Bie
On 2018年05月08日 11:05, Jason Wang wrote:
>>
>> Because in virtqueue_enable_cb_delayed(), we may set an
>> event_off which is bigger than new and both of them have
>> wrapped. And in this case, although new is smaller than
>> event_off (i.e. the third param -- old), new shouldn't
>> add vq->num, and actually we are expecting a very big
>> idx diff.
>
> Yes, so to calculate distance correctly between event and new, we just
> need to compare the warp counter and return false if it doesn't match
> without the need to try to add vq.num here.
>
> Thanks
Sorry, looks like the following should work, we need add vq.num if
used_wrap_counter does not match:
static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
__u16 off_wrap, __u16 new,
__u16 old)
{
bool wrap = off_wrap >> 15;
int off = off_wrap & ~(1 << 15);
__u16 d1, d2;
if (wrap != vq->used_wrap_counter)
d1 = new + vq->num - off - 1;
else
d1 = new - off - 1;
if (new > old)
d2 = new - old;
else
d2 = new + vq->num - old;
return d1 < d2;
}
Thanks
On Tue, May 08, 2018 at 01:40:40PM +0800, Jason Wang wrote:
> On 2018年05月08日 11:05, Jason Wang wrote:
> > >
> > > Because in virtqueue_enable_cb_delayed(), we may set an
> > > event_off which is bigger than new and both of them have
> > > wrapped. And in this case, although new is smaller than
> > > event_off (i.e. the third param -- old), new shouldn't
> > > add vq->num, and actually we are expecting a very big
> > > idx diff.
> >
> > Yes, so to calculate distance correctly between event and new, we just
> > need to compare the warp counter and return false if it doesn't match
> > without the need to try to add vq.num here.
> >
> > Thanks
>
> Sorry, looks like the following should work, we need add vq.num if
> used_wrap_counter does not match:
>
> static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
> __u16 off_wrap, __u16 new,
> __u16 old)
> {
> bool wrap = off_wrap >> 15;
> int off = off_wrap & ~(1 << 15);
> __u16 d1, d2;
>
> if (wrap != vq->used_wrap_counter)
> d1 = new + vq->num - off - 1;
Just to draw your attention (maybe you have already
noticed this).
In this case (i.e. wrap != vq->used_wrap_counter),
it's also possible that (off < new) is true. Because,
when virtqueue_enable_cb_delayed_packed() is used,
`off` is calculated in driver in a way like this:
off = vq->last_used_idx + bufs;
if (off >= vq->vring_packed.num) {
off -= vq->vring_packed.num;
wrap_counter ^= 1;
}
And when `new` (in vhost) is close to vq->num. The
vq->last_used_idx + bufs (in driver) can be bigger
than vq->vring_packed.num, and:
1. `off` will wrap;
2. wrap counters won't match;
3. off < new;
And d1 (i.e. new + vq->num - off - 1) will be a value
bigger than vq->num. I'm okay with this, although it's
a bit weird.
Best regards,
Tiwei Bie
> else
> d1 = new - off - 1;
>
> if (new > old)
> d2 = new - old;
> else
> d2 = new + vq->num - old;
>
> return d1 < d2;
> }
>
> Thanks
>
On 2018年05月08日 14:44, Tiwei Bie wrote:
> On Tue, May 08, 2018 at 01:40:40PM +0800, Jason Wang wrote:
>> On 2018年05月08日 11:05, Jason Wang wrote:
>>>> Because in virtqueue_enable_cb_delayed(), we may set an
>>>> event_off which is bigger than new and both of them have
>>>> wrapped. And in this case, although new is smaller than
>>>> event_off (i.e. the third param -- old), new shouldn't
>>>> add vq->num, and actually we are expecting a very big
>>>> idx diff.
>>> Yes, so to calculate distance correctly between event and new, we just
>>> need to compare the warp counter and return false if it doesn't match
>>> without the need to try to add vq.num here.
>>>
>>> Thanks
>> Sorry, looks like the following should work, we need add vq.num if
>> used_wrap_counter does not match:
>>
>> static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
>> __u16 off_wrap, __u16 new,
>> __u16 old)
>> {
>> bool wrap = off_wrap >> 15;
>> int off = off_wrap & ~(1 << 15);
>> __u16 d1, d2;
>>
>> if (wrap != vq->used_wrap_counter)
>> d1 = new + vq->num - off - 1;
> Just to draw your attention (maybe you have already
> noticed this).
I miss this, thanks!
>
> In this case (i.e. wrap != vq->used_wrap_counter),
> it's also possible that (off < new) is true. Because,
>
> when virtqueue_enable_cb_delayed_packed() is used,
> `off` is calculated in driver in a way like this:
>
> off = vq->last_used_idx + bufs;
> if (off >= vq->vring_packed.num) {
> off -= vq->vring_packed.num;
> wrap_counter ^= 1;
> }
>
> And when `new` (in vhost) is close to vq->num. The
> vq->last_used_idx + bufs (in driver) can be bigger
> than vq->vring_packed.num, and:
>
> 1. `off` will wrap;
> 2. wrap counters won't match;
> 3. off < new;
>
> And d1 (i.e. new + vq->num - off - 1) will be a value
> bigger than vq->num. I'm okay with this, although it's
> a bit weird.
So I'm considering something more compact by reusing vring_need_event()
by pretending a larger queue size and adding vq->num back when necessary:
static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
__u16 off_wrap, __u16 new,
__u16 old)
{
bool wrap = vq->used_wrap_counter;
int off = off_wrap & ~(1 << 15);
__u16 d1, d2;
if (new < old) {
new += vq->num;
wrap ^= 1;
}
if (wrap != off_wrap >> 15)
off += vq->num;
return vring_need_event(off, new, old);
}
>
> Best regards,
> Tiwei Bie
>
>> else
>> d1 = new - off - 1;
>>
>> if (new > old)
>> d2 = new - old;
>> else
>> d2 = new + vq->num - old;
>>
>> return d1 < d2;
>> }
>>
>> Thanks
>>
On Tue, May 08, 2018 at 03:16:53PM +0800, Jason Wang wrote:
> On 2018年05月08日 14:44, Tiwei Bie wrote:
> > On Tue, May 08, 2018 at 01:40:40PM +0800, Jason Wang wrote:
> > > On 2018年05月08日 11:05, Jason Wang wrote:
> > > > > Because in virtqueue_enable_cb_delayed(), we may set an
> > > > > event_off which is bigger than new and both of them have
> > > > > wrapped. And in this case, although new is smaller than
> > > > > event_off (i.e. the third param -- old), new shouldn't
> > > > > add vq->num, and actually we are expecting a very big
> > > > > idx diff.
> > > > Yes, so to calculate distance correctly between event and new, we just
> > > > need to compare the warp counter and return false if it doesn't match
> > > > without the need to try to add vq.num here.
> > > >
> > > > Thanks
> > > Sorry, looks like the following should work, we need add vq.num if
> > > used_wrap_counter does not match:
> > >
> > > static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
> > > __u16 off_wrap, __u16 new,
> > > __u16 old)
> > > {
> > > bool wrap = off_wrap >> 15;
> > > int off = off_wrap & ~(1 << 15);
> > > __u16 d1, d2;
> > >
> > > if (wrap != vq->used_wrap_counter)
> > > d1 = new + vq->num - off - 1;
> > Just to draw your attention (maybe you have already
> > noticed this).
>
> I miss this, thanks!
>
> >
> > In this case (i.e. wrap != vq->used_wrap_counter),
> > it's also possible that (off < new) is true. Because,
> >
> > when virtqueue_enable_cb_delayed_packed() is used,
> > `off` is calculated in driver in a way like this:
> >
> > off = vq->last_used_idx + bufs;
> > if (off >= vq->vring_packed.num) {
> > off -= vq->vring_packed.num;
> > wrap_counter ^= 1;
> > }
> >
> > And when `new` (in vhost) is close to vq->num. The
> > vq->last_used_idx + bufs (in driver) can be bigger
> > than vq->vring_packed.num, and:
> >
> > 1. `off` will wrap;
> > 2. wrap counters won't match;
> > 3. off < new;
> >
> > And d1 (i.e. new + vq->num - off - 1) will be a value
> > bigger than vq->num. I'm okay with this, although it's
> > a bit weird.
>
>
> So I'm considering something more compact by reusing vring_need_event() by
> pretending a larger queue size and adding vq->num back when necessary:
>
> static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
> __u16 off_wrap, __u16 new,
> __u16 old)
> {
> bool wrap = vq->used_wrap_counter;
If the wrap counter is obtained from the vq,
I think `new` should also be obtained from
the vq. Or the wrap counter should be carried
in `new`.
> int off = off_wrap & ~(1 << 15);
> __u16 d1, d2;
>
> if (new < old) {
> new += vq->num;
> wrap ^= 1;
> }
>
> if (wrap != off_wrap >> 15)
> off += vq->num;
When `new` and `old` wraps, and `off` doesn't wrap,
wrap != (off_wrap >> 15) will be true. In this case,
`off` is bigger than `new`, and what we should do
is `off -= vq->num` instead of `off += vq->num`.
Best regards,
Tiwei Bie
>
> return vring_need_event(off, new, old);
> }
>
>
> >
> > Best regards,
> > Tiwei Bie
> >
> > > else
> > > d1 = new - off - 1;
> > >
> > > if (new > old)
> > > d2 = new - old;
> > > else
> > > d2 = new + vq->num - old;
> > >
> > > return d1 < d2;
> > > }
> > >
> > > Thanks
> > >
>
On 2018年05月08日 17:16, Tiwei Bie wrote:
> On Tue, May 08, 2018 at 03:16:53PM +0800, Jason Wang wrote:
>> On 2018年05月08日 14:44, Tiwei Bie wrote:
>>> On Tue, May 08, 2018 at 01:40:40PM +0800, Jason Wang wrote:
>>>> On 2018年05月08日 11:05, Jason Wang wrote:
>>>>>> Because in virtqueue_enable_cb_delayed(), we may set an
>>>>>> event_off which is bigger than new and both of them have
>>>>>> wrapped. And in this case, although new is smaller than
>>>>>> event_off (i.e. the third param -- old), new shouldn't
>>>>>> add vq->num, and actually we are expecting a very big
>>>>>> idx diff.
>>>>> Yes, so to calculate distance correctly between event and new, we just
>>>>> need to compare the warp counter and return false if it doesn't match
>>>>> without the need to try to add vq.num here.
>>>>>
>>>>> Thanks
>>>> Sorry, looks like the following should work, we need add vq.num if
>>>> used_wrap_counter does not match:
>>>>
>>>> static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
>>>> __u16 off_wrap, __u16 new,
>>>> __u16 old)
>>>> {
>>>> bool wrap = off_wrap >> 15;
>>>> int off = off_wrap & ~(1 << 15);
>>>> __u16 d1, d2;
>>>>
>>>> if (wrap != vq->used_wrap_counter)
>>>> d1 = new + vq->num - off - 1;
>>> Just to draw your attention (maybe you have already
>>> noticed this).
>> I miss this, thanks!
>>
>>> In this case (i.e. wrap != vq->used_wrap_counter),
>>> it's also possible that (off < new) is true. Because,
>>>
>>> when virtqueue_enable_cb_delayed_packed() is used,
>>> `off` is calculated in driver in a way like this:
>>>
>>> off = vq->last_used_idx + bufs;
>>> if (off >= vq->vring_packed.num) {
>>> off -= vq->vring_packed.num;
>>> wrap_counter ^= 1;
>>> }
>>>
>>> And when `new` (in vhost) is close to vq->num. The
>>> vq->last_used_idx + bufs (in driver) can be bigger
>>> than vq->vring_packed.num, and:
>>>
>>> 1. `off` will wrap;
>>> 2. wrap counters won't match;
>>> 3. off < new;
>>>
>>> And d1 (i.e. new + vq->num - off - 1) will be a value
>>> bigger than vq->num. I'm okay with this, although it's
>>> a bit weird.
>>
>> So I'm considering something more compact by reusing vring_need_event() by
>> pretending a larger queue size and adding vq->num back when necessary:
>>
>> static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
>> __u16 off_wrap, __u16 new,
>> __u16 old)
>> {
>> bool wrap = vq->used_wrap_counter;
> If the wrap counter is obtained from the vq,
> I think `new` should also be obtained from
> the vq. Or the wrap counter should be carried
> in `new`.
>
>> int off = off_wrap & ~(1 << 15);
>> __u16 d1, d2;
>>
>> if (new < old) {
>> new += vq->num;
>> wrap ^= 1;
>> }
>>
>> if (wrap != off_wrap >> 15)
>> off += vq->num;
> When `new` and `old` wraps, and `off` doesn't wrap,
> wrap != (off_wrap >> 15) will be true. In this case,
> `off` is bigger than `new`, and what we should do
> is `off -= vq->num` instead of `off += vq->num`.
If I understand this correctly, if we track old correctly, it won't
happen if guest driver behave correctly. That means it should only
happen for a buggy driver (e.g trying to move off_wrap back).
Thanks
>
> Best regards,
> Tiwei Bie
>
>> return vring_need_event(off, new, old);
>> }
>>
>>
>>> Best regards,
>>> Tiwei Bie
>>>
>>>> else
>>>> d1 = new - off - 1;
>>>>
>>>> if (new > old)
>>>> d2 = new - old;
>>>> else
>>>> d2 = new + vq->num - old;
>>>>
>>>> return d1 < d2;
>>>> }
>>>>
>>>> Thanks
>>>>
On Tue, May 08, 2018 at 05:34:40PM +0800, Jason Wang wrote:
> On 2018年05月08日 17:16, Tiwei Bie wrote:
> > On Tue, May 08, 2018 at 03:16:53PM +0800, Jason Wang wrote:
> > > On 2018年05月08日 14:44, Tiwei Bie wrote:
> > > > On Tue, May 08, 2018 at 01:40:40PM +0800, Jason Wang wrote:
> > > > > On 2018年05月08日 11:05, Jason Wang wrote:
> > > > > > > Because in virtqueue_enable_cb_delayed(), we may set an
> > > > > > > event_off which is bigger than new and both of them have
> > > > > > > wrapped. And in this case, although new is smaller than
> > > > > > > event_off (i.e. the third param -- old), new shouldn't
> > > > > > > add vq->num, and actually we are expecting a very big
> > > > > > > idx diff.
> > > > > > Yes, so to calculate distance correctly between event and new, we just
> > > > > > need to compare the warp counter and return false if it doesn't match
> > > > > > without the need to try to add vq.num here.
> > > > > >
> > > > > > Thanks
> > > > > Sorry, looks like the following should work, we need add vq.num if
> > > > > used_wrap_counter does not match:
> > > > >
> > > > > static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
> > > > > __u16 off_wrap, __u16 new,
> > > > > __u16 old)
> > > > > {
> > > > > bool wrap = off_wrap >> 15;
> > > > > int off = off_wrap & ~(1 << 15);
> > > > > __u16 d1, d2;
> > > > >
> > > > > if (wrap != vq->used_wrap_counter)
> > > > > d1 = new + vq->num - off - 1;
> > > > Just to draw your attention (maybe you have already
> > > > noticed this).
> > > I miss this, thanks!
> > >
> > > > In this case (i.e. wrap != vq->used_wrap_counter),
> > > > it's also possible that (off < new) is true. Because,
> > > >
> > > > when virtqueue_enable_cb_delayed_packed() is used,
> > > > `off` is calculated in driver in a way like this:
> > > >
> > > > off = vq->last_used_idx + bufs;
> > > > if (off >= vq->vring_packed.num) {
> > > > off -= vq->vring_packed.num;
> > > > wrap_counter ^= 1;
> > > > }
> > > >
> > > > And when `new` (in vhost) is close to vq->num. The
> > > > vq->last_used_idx + bufs (in driver) can be bigger
> > > > than vq->vring_packed.num, and:
> > > >
> > > > 1. `off` will wrap;
> > > > 2. wrap counters won't match;
> > > > 3. off < new;
> > > >
> > > > And d1 (i.e. new + vq->num - off - 1) will be a value
> > > > bigger than vq->num. I'm okay with this, although it's
> > > > a bit weird.
> > >
> > > So I'm considering something more compact by reusing vring_need_event() by
> > > pretending a larger queue size and adding vq->num back when necessary:
> > >
> > > static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
> > > __u16 off_wrap, __u16 new,
> > > __u16 old)
> > > {
> > > bool wrap = vq->used_wrap_counter;
> > If the wrap counter is obtained from the vq,
> > I think `new` should also be obtained from
> > the vq. Or the wrap counter should be carried
> > in `new`.
> >
> > > int off = off_wrap & ~(1 << 15);
> > > __u16 d1, d2;
> > >
> > > if (new < old) {
> > > new += vq->num;
> > > wrap ^= 1;
> > > }
> > >
> > > if (wrap != off_wrap >> 15)
> > > off += vq->num;
> > When `new` and `old` wraps, and `off` doesn't wrap,
> > wrap != (off_wrap >> 15) will be true. In this case,
> > `off` is bigger than `new`, and what we should do
> > is `off -= vq->num` instead of `off += vq->num`.
>
> If I understand this correctly, if we track old correctly, it won't happen
> if guest driver behave correctly. That means it should only happen for a
> buggy driver (e.g trying to move off_wrap back).
If vhost is faster than virtio driver, I guess above
case may happen. The `old` and `new` will be updated
each time we want to notify the driver. If the driver
is slower, `old` and `new` in vhost may wrap before
the `off` which is set by driver wraps.
Best regards,
Tiwei Bie
>
> Thanks
>
> >
> > Best regards,
> > Tiwei Bie
> >
> > > return vring_need_event(off, new, old);
> > > }
> > >
> > >
> > > > Best regards,
> > > > Tiwei Bie
> > > >
> > > > > else
> > > > > d1 = new - off - 1;
> > > > >
> > > > > if (new > old)
> > > > > d2 = new - old;
> > > > > else
> > > > > d2 = new + vq->num - old;
> > > > >
> > > > > return d1 < d2;
> > > > > }
> > > > >
> > > > > Thanks
> > > > >
>
On 2018年05月08日 17:44, Tiwei Bie wrote:
> On Tue, May 08, 2018 at 05:34:40PM +0800, Jason Wang wrote:
>> On 2018年05月08日 17:16, Tiwei Bie wrote:
>>> On Tue, May 08, 2018 at 03:16:53PM +0800, Jason Wang wrote:
>>>> On 2018年05月08日 14:44, Tiwei Bie wrote:
>>>>> On Tue, May 08, 2018 at 01:40:40PM +0800, Jason Wang wrote:
>>>>>> On 2018年05月08日 11:05, Jason Wang wrote:
>>>>>>>> Because in virtqueue_enable_cb_delayed(), we may set an
>>>>>>>> event_off which is bigger than new and both of them have
>>>>>>>> wrapped. And in this case, although new is smaller than
>>>>>>>> event_off (i.e. the third param -- old), new shouldn't
>>>>>>>> add vq->num, and actually we are expecting a very big
>>>>>>>> idx diff.
>>>>>>> Yes, so to calculate distance correctly between event and new, we just
>>>>>>> need to compare the warp counter and return false if it doesn't match
>>>>>>> without the need to try to add vq.num here.
>>>>>>>
>>>>>>> Thanks
>>>>>> Sorry, looks like the following should work, we need add vq.num if
>>>>>> used_wrap_counter does not match:
>>>>>>
>>>>>> static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
>>>>>> __u16 off_wrap, __u16 new,
>>>>>> __u16 old)
>>>>>> {
>>>>>> bool wrap = off_wrap >> 15;
>>>>>> int off = off_wrap & ~(1 << 15);
>>>>>> __u16 d1, d2;
>>>>>>
>>>>>> if (wrap != vq->used_wrap_counter)
>>>>>> d1 = new + vq->num - off - 1;
>>>>> Just to draw your attention (maybe you have already
>>>>> noticed this).
>>>> I miss this, thanks!
>>>>
>>>>> In this case (i.e. wrap != vq->used_wrap_counter),
>>>>> it's also possible that (off < new) is true. Because,
>>>>>
>>>>> when virtqueue_enable_cb_delayed_packed() is used,
>>>>> `off` is calculated in driver in a way like this:
>>>>>
>>>>> off = vq->last_used_idx + bufs;
>>>>> if (off >= vq->vring_packed.num) {
>>>>> off -= vq->vring_packed.num;
>>>>> wrap_counter ^= 1;
>>>>> }
>>>>>
>>>>> And when `new` (in vhost) is close to vq->num. The
>>>>> vq->last_used_idx + bufs (in driver) can be bigger
>>>>> than vq->vring_packed.num, and:
>>>>>
>>>>> 1. `off` will wrap;
>>>>> 2. wrap counters won't match;
>>>>> 3. off < new;
>>>>>
>>>>> And d1 (i.e. new + vq->num - off - 1) will be a value
>>>>> bigger than vq->num. I'm okay with this, although it's
>>>>> a bit weird.
>>>> So I'm considering something more compact by reusing vring_need_event() by
>>>> pretending a larger queue size and adding vq->num back when necessary:
>>>>
>>>> static bool vhost_vring_packed_need_event(struct vhost_virtqueue *vq,
>>>> __u16 off_wrap, __u16 new,
>>>> __u16 old)
>>>> {
>>>> bool wrap = vq->used_wrap_counter;
>>> If the wrap counter is obtained from the vq,
>>> I think `new` should also be obtained from
>>> the vq. Or the wrap counter should be carried
>>> in `new`.
>>>
>>>> int off = off_wrap & ~(1 << 15);
>>>> __u16 d1, d2;
>>>>
>>>> if (new < old) {
>>>> new += vq->num;
>>>> wrap ^= 1;
>>>> }
>>>>
>>>> if (wrap != off_wrap >> 15)
>>>> off += vq->num;
>>> When `new` and `old` wraps, and `off` doesn't wrap,
>>> wrap != (off_wrap >> 15) will be true. In this case,
>>> `off` is bigger than `new`, and what we should do
>>> is `off -= vq->num` instead of `off += vq->num`.
>> If I understand this correctly, if we track old correctly, it won't happen
>> if guest driver behave correctly. That means it should only happen for a
>> buggy driver (e.g trying to move off_wrap back).
> If vhost is faster than virtio driver, I guess above
> case may happen. The `old` and `new` will be updated
> each time we want to notify the driver. If the driver
> is slower, `old` and `new` in vhost may wrap before
> the `off` which is set by driver wraps.
>
> Best regards,
> Tiwei Bie
>
Oh, right.
But the code still work (in this case new - event_idx - 1 will
underflow). (And I admit it still looks ugly).
Thanks
On 2018年05月10日 15:32, Jason Wang wrote:
>
>
> On 2018年04月25日 13:15, Tiwei Bie wrote:
>> + /* We're using some buffers from the free list. */
>> + vq->vq.num_free -= descs_used;
>> +
>> + /* Update free pointer */
>> + if (indirect) {
>> + n = head + 1;
>> + if (n >= vq->vring_packed.num) {
>> + n = 0;
>> + vq->wrap_counter ^= 1;
>> + }
>> + vq->next_avail_idx = n;
>> + } else
>> + vq->next_avail_idx = i;
>
> During testing zerocopy (out of order completion), I found driver may
> submit two identical buffer id to vhost. So the above code may not
> work well.
>
> Consider the case that driver adds 3 buffer and virtqueue size is 8.
>
> a) id = 0,count = 2,next_avail = 2
>
> b) id = 2,count = 4,next_avail = 2
next_avail should be 6 here.
>
> c) id = 4,count = 2,next_avail = 0
>
id should be 6 here.
Thanks
> if packet b is done before packet a, driver may think buffer id 0 is
> available and try to use it if even if the real buffer 0 was not done.
>
> Thanks
> _______________________________________________
> Virtualization mailing list
> [email protected]
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
On 2018年04月25日 13:15, Tiwei Bie wrote:
> + /* We're using some buffers from the free list. */
> + vq->vq.num_free -= descs_used;
> +
> + /* Update free pointer */
> + if (indirect) {
> + n = head + 1;
> + if (n >= vq->vring_packed.num) {
> + n = 0;
> + vq->wrap_counter ^= 1;
> + }
> + vq->next_avail_idx = n;
> + } else
> + vq->next_avail_idx = i;
During testing zerocopy (out of order completion), I found driver may
submit two identical buffer id to vhost. So the above code may not work
well.
Consider the case that driver adds 3 buffer and virtqueue size is 8.
a) id = 0,count = 2,next_avail = 2
b) id = 2,count = 4,next_avail = 2
c) id = 4,count = 2,next_avail = 0
if packet b is done before packet a, driver may think buffer id 0 is
available and try to use it if even if the real buffer 0 was not done.
Thanks
On Thu, May 10, 2018 at 03:34:50PM +0800, Jason Wang wrote:
> On 2018年05月10日 15:32, Jason Wang wrote:
> > On 2018年04月25日 13:15, Tiwei Bie wrote:
> > > + /* We're using some buffers from the free list. */
> > > + vq->vq.num_free -= descs_used;
> > > +
> > > + /* Update free pointer */
> > > + if (indirect) {
> > > + n = head + 1;
> > > + if (n >= vq->vring_packed.num) {
> > > + n = 0;
> > > + vq->wrap_counter ^= 1;
> > > + }
> > > + vq->next_avail_idx = n;
> > > + } else
> > > + vq->next_avail_idx = i;
> >
> > During testing zerocopy (out of order completion), I found driver may
> > submit two identical buffer id to vhost. So the above code may not work
> > well.
> >
> > Consider the case that driver adds 3 buffer and virtqueue size is 8.
> >
> > a) id = 0,count = 2,next_avail = 2
> >
> > b) id = 2,count = 4,next_avail = 2
>
> next_avail should be 6 here.
>
> >
> > c) id = 4,count = 2,next_avail = 0
> >
>
> id should be 6 here.
>
> Thanks
>
> > if packet b is done before packet a, driver may think buffer id 0 is
> > available and try to use it if even if the real buffer 0 was not done.
> >
> > Thanks
Nice catch! Thanks a lot!
I'll implement an ID allocator.
Best regards,
Tiwei Bie
On 2018年05月10日 16:56, Tiwei Bie wrote:
> On Thu, May 10, 2018 at 03:34:50PM +0800, Jason Wang wrote:
>> On 2018年05月10日 15:32, Jason Wang wrote:
>>> On 2018年04月25日 13:15, Tiwei Bie wrote:
>>>> + /* We're using some buffers from the free list. */
>>>> + vq->vq.num_free -= descs_used;
>>>> +
>>>> + /* Update free pointer */
>>>> + if (indirect) {
>>>> + n = head + 1;
>>>> + if (n >= vq->vring_packed.num) {
>>>> + n = 0;
>>>> + vq->wrap_counter ^= 1;
>>>> + }
>>>> + vq->next_avail_idx = n;
>>>> + } else
>>>> + vq->next_avail_idx = i;
>>> During testing zerocopy (out of order completion), I found driver may
>>> submit two identical buffer id to vhost. So the above code may not work
>>> well.
>>>
>>> Consider the case that driver adds 3 buffer and virtqueue size is 8.
>>>
>>> a) id = 0,count = 2,next_avail = 2
>>>
>>> b) id = 2,count = 4,next_avail = 2
>> next_avail should be 6 here.
>>
>>> c) id = 4,count = 2,next_avail = 0
>>>
>> id should be 6 here.
>>
>> Thanks
>>
>>> if packet b is done before packet a, driver may think buffer id 0 is
>>> available and try to use it if even if the real buffer 0 was not done.
>>>
>>> Thanks
> Nice catch! Thanks a lot!
> I'll implement an ID allocator.
>
> Best regards,
> Tiwei Bie
Sounds good.
Another similar issue is detac_buf_packed(). It did:
for (j = 0; j < vq->desc_state[head].num; j++) {
desc = &vq->vring_packed.desc[i];
vring_unmap_one_packed(vq, desc);
i++;
if (i >= vq->vring_packed.num)
i = 0;
}
This probably won't work for out of order too and according to the spec:
"""
Driver needs to keep track of the size of the list corresponding to each
buffer ID, to be able to skip to where the next used descriptor is
written by the device.
"""
Looks like we should not depend on the descriptor ring.
Thanks
On Thu, May 10, 2018 at 05:49:20PM +0800, Jason Wang wrote:
> On 2018年05月10日 16:56, Tiwei Bie wrote:
> > On Thu, May 10, 2018 at 03:34:50PM +0800, Jason Wang wrote:
> > > On 2018年05月10日 15:32, Jason Wang wrote:
> > > > On 2018年04月25日 13:15, Tiwei Bie wrote:
> > > > > + /* We're using some buffers from the free list. */
> > > > > + vq->vq.num_free -= descs_used;
> > > > > +
> > > > > + /* Update free pointer */
> > > > > + if (indirect) {
> > > > > + n = head + 1;
> > > > > + if (n >= vq->vring_packed.num) {
> > > > > + n = 0;
> > > > > + vq->wrap_counter ^= 1;
> > > > > + }
> > > > > + vq->next_avail_idx = n;
> > > > > + } else
> > > > > + vq->next_avail_idx = i;
> > > > During testing zerocopy (out of order completion), I found driver may
> > > > submit two identical buffer id to vhost. So the above code may not work
> > > > well.
> > > >
> > > > Consider the case that driver adds 3 buffer and virtqueue size is 8.
> > > >
> > > > a) id = 0,count = 2,next_avail = 2
> > > >
> > > > b) id = 2,count = 4,next_avail = 2
> > > next_avail should be 6 here.
> > >
> > > > c) id = 4,count = 2,next_avail = 0
> > > >
> > > id should be 6 here.
> > >
> > > Thanks
> > >
> > > > if packet b is done before packet a, driver may think buffer id 0 is
> > > > available and try to use it if even if the real buffer 0 was not done.
> > > >
> > > > Thanks
> > Nice catch! Thanks a lot!
> > I'll implement an ID allocator.
> >
> > Best regards,
> > Tiwei Bie
>
> Sounds good.
>
> Another similar issue is detac_buf_packed(). It did:
>
> for (j = 0; j < vq->desc_state[head].num; j++) {
> desc = &vq->vring_packed.desc[i];
> vring_unmap_one_packed(vq, desc);
> i++;
> if (i >= vq->vring_packed.num)
> i = 0;
> }
>
> This probably won't work for out of order too and according to the spec:
>
> """
> Driver needs to keep track of the size of the list corresponding to each
> buffer ID, to be able to skip to where the next used descriptor is written
> by the device.
> """
>
> Looks like we should not depend on the descriptor ring.
Yeah, the previous ID allocation is too simple..
Let me fix it in the next version.
Thanks!
>
> Thanks
On 2018年04月25日 13:15, Tiwei Bie wrote:
> This commit introduces the event idx support in packed
> ring. This feature is temporarily disabled, because the
> implementation in this patch may not work as expected,
> and some further discussions on the implementation are
> needed, e.g. do we have to check the wrap counter when
> checking whether a kick is needed?
>
> Signed-off-by: Tiwei Bie <[email protected]>
> ---
> drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 49 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 0181e93897be..b1039c2985b9 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -986,7 +986,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> - u16 flags;
> + u16 new, old, off_wrap, flags;
> bool needs_kick;
> u32 snapshot;
>
> @@ -995,7 +995,12 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> * suppressions. */
> virtio_mb(vq->weak_barriers);
>
> + old = vq->next_avail_idx - vq->num_added;
> + new = vq->next_avail_idx;
> + vq->num_added = 0;
> +
> snapshot = *(u32 *)vq->vring_packed.device;
> + off_wrap = virtio16_to_cpu(_vq->vdev, snapshot & 0xffff);
> flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
>
> #ifdef DEBUG
> @@ -1006,7 +1011,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> vq->last_add_time_valid = false;
> #endif
>
> - needs_kick = (flags != VRING_EVENT_F_DISABLE);
> + if (flags == VRING_EVENT_F_DESC)
> + needs_kick = vring_need_event(off_wrap & ~(1<<15), new, old);
> + else
> + needs_kick = (flags != VRING_EVENT_F_DISABLE);
> END_USE(vq);
> return needs_kick;
> }
> @@ -1116,6 +1124,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> if (vq->last_used_idx >= vq->vring_packed.num)
> vq->last_used_idx -= vq->vring_packed.num;
>
> + /* If we expect an interrupt for the next entry, tell host
> + * by writing event index and flush out the write before
> + * the read in the next get_buf call. */
> + if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
> + virtio_store_mb(vq->weak_barriers,
> + &vq->vring_packed.driver->off_wrap,
> + cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
> + (vq->wrap_counter << 15)));
> +
> #ifdef DEBUG
> vq->last_add_time_valid = false;
> #endif
> @@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
>
> /* We optimistically turn back on interrupts, then check if there was
> * more to do. */
> + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> + * either clear the flags bit or point the event index at the next
> + * entry. Always update the event index to keep code simple. */
> +
> + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> + vq->last_used_idx | (vq->wrap_counter << 15));
Using vq->wrap_counter seems not correct, what we need is the warp
counter for the last_used_idx not next_avail_idx.
And I think there's even no need to bother with event idx here, how
about just set VRING_EVENT_F_ENABLE?
>
> if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> virtio_wmb(vq->weak_barriers);
> - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> + VRING_EVENT_F_ENABLE;
> vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> vq->event_flags_shadow);
> }
> @@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
> static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> + u16 bufs, used_idx, wrap_counter;
>
> START_USE(vq);
>
> /* We optimistically turn back on interrupts, then check if there was
> * more to do. */
> + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> + * either clear the flags bit or point the event index at the next
> + * entry. Always update the event index to keep code simple. */
> +
> + /* TODO: tune this threshold */
> + bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
bufs could be more than vq->num here, is this intended?
> +
> + used_idx = vq->last_used_idx + bufs;
> + wrap_counter = vq->wrap_counter;
> +
> + if (used_idx >= vq->vring_packed.num) {
> + used_idx -= vq->vring_packed.num;
> + wrap_counter ^= 1;
When used_idx is greater or equal vq->num, there's no need to flip
warp_counter bit since it should match next_avail_idx.
And we need also care about the case when next_avail wraps but used_idx
not. so we probaly need:
else if (vq->next_avail_idx < used_idx) {
wrap_counter ^= 1;
}
I think maybe it's time to add some sample codes in the spec to avoid
duplicating the efforts(bugs).
Thanks
> + }
> +
> + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> + used_idx | (wrap_counter << 15));
>
> if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> virtio_wmb(vq->weak_barriers);
> - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> + VRING_EVENT_F_ENABLE;
> vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> vq->event_flags_shadow);
> }
> @@ -1822,8 +1865,10 @@ void vring_transport_features(struct virtio_device *vdev)
> switch (i) {
> case VIRTIO_RING_F_INDIRECT_DESC:
> break;
> +#if 0
> case VIRTIO_RING_F_EVENT_IDX:
> break;
> +#endif
> case VIRTIO_F_VERSION_1:
> break;
> case VIRTIO_F_IOMMU_PLATFORM:
On Wed, May 16, 2018 at 01:01:04PM +0800, Jason Wang wrote:
> On 2018年04月25日 13:15, Tiwei Bie wrote:
[...]
> > @@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > /* We optimistically turn back on interrupts, then check if there was
> > * more to do. */
> > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > + * either clear the flags bit or point the event index at the next
> > + * entry. Always update the event index to keep code simple. */
> > +
> > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > + vq->last_used_idx | (vq->wrap_counter << 15));
>
>
> Using vq->wrap_counter seems not correct, what we need is the warp counter
> for the last_used_idx not next_avail_idx.
Yes, you're right. I have fixed it in my local repo,
but haven't sent out a new version yet.
I'll try to send out a new RFC today.
>
> And I think there's even no need to bother with event idx here, how about
> just set VRING_EVENT_F_ENABLE?
We had a similar discussion before. Michael prefers
to use VRING_EVENT_F_DESC when possible to avoid
extra interrupts if host is fast:
https://lkml.org/lkml/2018/4/16/1085
"""
I suspect this will lead to extra interrupts if host is fast.
So I think for now we should always use VRING_EVENT_F_DESC
if EVENT_IDX is negotiated.
"""
>
> > if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > virtio_wmb(vq->weak_barriers);
> > - vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > + VRING_EVENT_F_ENABLE;
> > vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > vq->event_flags_shadow);
> > }
> > @@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
> > static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > + u16 bufs, used_idx, wrap_counter;
> > START_USE(vq);
> > /* We optimistically turn back on interrupts, then check if there was
> > * more to do. */
> > + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > + * either clear the flags bit or point the event index at the next
> > + * entry. Always update the event index to keep code simple. */
> > +
> > + /* TODO: tune this threshold */
> > + bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
>
> bufs could be more than vq->num here, is this intended?
Yes, you're right. Like the above one -- I have fixed
it in my local repo, but haven't sent out a new version
yet. Thanks for spotting this!
>
> > +
> > + used_idx = vq->last_used_idx + bufs;
> > + wrap_counter = vq->wrap_counter;
> > +
> > + if (used_idx >= vq->vring_packed.num) {
> > + used_idx -= vq->vring_packed.num;
> > + wrap_counter ^= 1;
>
> When used_idx is greater or equal vq->num, there's no need to flip
> warp_counter bit since it should match next_avail_idx.
>
> And we need also care about the case when next_avail wraps but used_idx not.
> so we probaly need:
>
> else if (vq->next_avail_idx < used_idx) {
> wrap_counter ^= 1;
> }
>
> I think maybe it's time to add some sample codes in the spec to avoid
> duplicating the efforts(bugs).
+1
Best regards,
Tiwei Bie