From: K. Y. Srinivasan <[email protected]>
This patch set removes unused bits from vmbus internals, and also does some
optimization to the host signalling part of ring buffer logic.
Stephen Hemminger (6):
vmbus: simplify hv_ringbuffer_read
vmbus: drop unused ring_buffer_info elements
vmbus: refactor hv_signal_on_read
vmbus: eliminate duplicate cached index
vmbus: more host signalling avoidance
vmbus: add prefetch to ring buffer iterator
drivers/hv/ring_buffer.c | 169 ++++++++++++++++-----------------------------
include/linux/hyperv.h | 65 ------------------
2 files changed, 60 insertions(+), 174 deletions(-)
From: Stephen Hemminger <[email protected]>
With new iterator functions (and the double mapping) the ring buffer
read function can be greatly simplified.
Signed-off-by: Stephen Hemminger <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/ring_buffer.c | 118 +++++++---------------------------------------
1 files changed, 17 insertions(+), 101 deletions(-)
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 1f450c3..f299817 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -94,30 +94,6 @@ static void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel)
ring_info->ring_buffer->write_index = next_write_location;
}
-/* Get the next read location for the specified ring buffer. */
-static inline u32
-hv_get_next_read_location(const struct hv_ring_buffer_info *ring_info)
-{
- return ring_info->ring_buffer->read_index;
-}
-
-/*
- * Get the next read location + offset for the specified ring buffer.
- * This allows the caller to skip.
- */
-static inline u32
-hv_get_next_readlocation_withoffset(const struct hv_ring_buffer_info *ring_info,
- u32 offset)
-{
- u32 next = ring_info->ring_buffer->read_index;
-
- next += offset;
- if (next >= ring_info->ring_datasize)
- next -= ring_info->ring_datasize;
-
- return next;
-}
-
/* Set the next read location for the specified ring buffer. */
static inline void
hv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
@@ -142,29 +118,6 @@ static void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel)
}
/*
- * Helper routine to copy to source from ring buffer.
- * Assume there is enough room. Handles wrap-around in src case only!!
- */
-static u32 hv_copyfrom_ringbuffer(
- const struct hv_ring_buffer_info *ring_info,
- void *dest,
- u32 destlen,
- u32 start_read_offset)
-{
- void *ring_buffer = hv_get_ring_buffer(ring_info);
- u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
-
- memcpy(dest, ring_buffer + start_read_offset, destlen);
-
- start_read_offset += destlen;
- if (start_read_offset >= ring_buffer_size)
- start_read_offset -= ring_buffer_size;
-
- return start_read_offset;
-}
-
-
-/*
* Helper routine to copy from source to ring buffer.
* Assume there is enough room. Handles wrap-around in dest case only!!
*/
@@ -334,33 +287,22 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
return 0;
}
-static inline void
-init_cached_read_index(struct hv_ring_buffer_info *rbi)
-{
- rbi->cached_read_index = rbi->ring_buffer->read_index;
-}
-
int hv_ringbuffer_read(struct vmbus_channel *channel,
void *buffer, u32 buflen, u32 *buffer_actual_len,
u64 *requestid, bool raw)
{
- u32 bytes_avail_toread;
- u32 next_read_location;
- u64 prev_indices = 0;
- struct vmpacket_descriptor desc;
- u32 offset;
- u32 packetlen;
- struct hv_ring_buffer_info *inring_info = &channel->inbound;
-
- if (buflen <= 0)
+ struct vmpacket_descriptor *desc;
+ u32 packetlen, offset;
+
+ if (unlikely(buflen == 0))
return -EINVAL;
*buffer_actual_len = 0;
*requestid = 0;
- bytes_avail_toread = hv_get_bytes_to_read(inring_info);
/* Make sure there is something to read */
- if (bytes_avail_toread < sizeof(desc)) {
+ desc = hv_pkt_iter_first(channel);
+ if (desc == NULL) {
/*
* No error is set when there is even no header, drivers are
* supposed to analyze buffer_actual_len.
@@ -368,48 +310,22 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
return 0;
}
- init_cached_read_index(inring_info);
-
- next_read_location = hv_get_next_read_location(inring_info);
- next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc,
- sizeof(desc),
- next_read_location);
-
- offset = raw ? 0 : (desc.offset8 << 3);
- packetlen = (desc.len8 << 3) - offset;
+ offset = raw ? 0 : (desc->offset8 << 3);
+ packetlen = (desc->len8 << 3) - offset;
*buffer_actual_len = packetlen;
- *requestid = desc.trans_id;
-
- if (bytes_avail_toread < packetlen + offset)
- return -EAGAIN;
+ *requestid = desc->trans_id;
- if (packetlen > buflen)
+ if (unlikely(packetlen > buflen))
return -ENOBUFS;
- next_read_location =
- hv_get_next_readlocation_withoffset(inring_info, offset);
+ /* since ring is double mapped, only one copy is necessary */
+ memcpy(buffer, (const char *)desc + offset, packetlen);
- next_read_location = hv_copyfrom_ringbuffer(inring_info,
- buffer,
- packetlen,
- next_read_location);
+ /* Advance ring index to next packet descriptor */
+ __hv_pkt_iter_next(channel, desc);
- next_read_location = hv_copyfrom_ringbuffer(inring_info,
- &prev_indices,
- sizeof(u64),
- next_read_location);
-
- /*
- * Make sure all reads are done before we update the read index since
- * the writer may start writing to the read area once the read index
- * is updated.
- */
- virt_mb();
-
- /* Update the read index */
- hv_set_next_read_location(inring_info, next_read_location);
-
- hv_signal_on_read(channel);
+ /* Notify host of update */
+ hv_pkt_iter_close(channel);
return 0;
}
@@ -442,7 +358,7 @@ struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
struct hv_ring_buffer_info *rbi = &channel->inbound;
/* set state for later hv_signal_on_read() */
- init_cached_read_index(rbi);
+ rbi->cached_read_index = rbi->ring_buffer->read_index;
if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
return NULL;
--
1.7.1
From: Stephen Hemminger <[email protected]>
The function hv_signal_on_read was defined in hyperv.h and
only used in one place in ring_buffer code. Clearer to just
move it inline there.
Signed-off-by: Stephen Hemminger <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/ring_buffer.c | 32 ++++++++++++++++++++++++++++-
include/linux/hyperv.h | 49 ----------------------------------------------
2 files changed, 30 insertions(+), 51 deletions(-)
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index f299817..a9021f1 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -29,6 +29,7 @@
#include <linux/uio.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
+#include <linux/prefetch.h>
#include "hyperv_vmbus.h"
@@ -357,7 +358,7 @@ struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
{
struct hv_ring_buffer_info *rbi = &channel->inbound;
- /* set state for later hv_signal_on_read() */
+ /* set state for later hv_pkt_iter_close */
rbi->cached_read_index = rbi->ring_buffer->read_index;
if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
@@ -400,6 +401,8 @@ struct vmpacket_descriptor *
void hv_pkt_iter_close(struct vmbus_channel *channel)
{
struct hv_ring_buffer_info *rbi = &channel->inbound;
+ u32 cur_write_sz, cached_write_sz;
+ u32 pending_sz;
/*
* Make sure all reads are done before we update the read index since
@@ -409,6 +412,31 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)
virt_rmb();
rbi->ring_buffer->read_index = rbi->priv_read_index;
- hv_signal_on_read(channel);
+ /*
+ * Issue a full memory barrier before making the signaling decision.
+ * Here is the reason for having this barrier:
+ * If the reading of the pend_sz (in this function)
+ * were to be reordered and read before we commit the new read
+ * index (in the calling function) we could
+ * have a problem. If the host were to set the pending_sz after we
+ * have sampled pending_sz and go to sleep before we commit the
+ * read index, we could miss sending the interrupt. Issue a full
+ * memory barrier to address this.
+ */
+ virt_mb();
+
+ pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
+ /* If the other end is not blocked on write don't bother. */
+ if (pending_sz == 0)
+ return;
+
+ cur_write_sz = hv_get_bytes_to_write(rbi);
+
+ if (cur_write_sz < pending_sz)
+ return;
+
+ cached_write_sz = hv_get_cached_bytes_to_write(rbi);
+ if (cached_write_sz < pending_sz)
+ vmbus_setevent(channel);
}
EXPORT_SYMBOL_GPL(hv_pkt_iter_close);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index b1549c9..0262267 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1458,55 +1458,6 @@ int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
}
/*
- * To optimize the flow management on the send-side,
- * when the sender is blocked because of lack of
- * sufficient space in the ring buffer, potential the
- * consumer of the ring buffer can signal the producer.
- * This is controlled by the following parameters:
- *
- * 1. pending_send_sz: This is the size in bytes that the
- * producer is trying to send.
- * 2. The feature bit feat_pending_send_sz set to indicate if
- * the consumer of the ring will signal when the ring
- * state transitions from being full to a state where
- * there is room for the producer to send the pending packet.
- */
-
-static inline void hv_signal_on_read(struct vmbus_channel *channel)
-{
- u32 cur_write_sz, cached_write_sz;
- u32 pending_sz;
- struct hv_ring_buffer_info *rbi = &channel->inbound;
-
- /*
- * Issue a full memory barrier before making the signaling decision.
- * Here is the reason for having this barrier:
- * If the reading of the pend_sz (in this function)
- * were to be reordered and read before we commit the new read
- * index (in the calling function) we could
- * have a problem. If the host were to set the pending_sz after we
- * have sampled pending_sz and go to sleep before we commit the
- * read index, we could miss sending the interrupt. Issue a full
- * memory barrier to address this.
- */
- virt_mb();
-
- pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
- /* If the other end is not blocked on write don't bother. */
- if (pending_sz == 0)
- return;
-
- cur_write_sz = hv_get_bytes_to_write(rbi);
-
- if (cur_write_sz < pending_sz)
- return;
-
- cached_write_sz = hv_get_cached_bytes_to_write(rbi);
- if (cached_write_sz < pending_sz)
- vmbus_setevent(channel);
-}
-
-/*
* Mask off host interrupt callback notifications
*/
static inline void hv_begin_read(struct hv_ring_buffer_info *rbi)
--
1.7.1
From: Stephen Hemminger <[email protected]>
Don't need cached read index anymore now that packet iterator
is used. The iterator has the original read index until the
visible read_index is updated.
Signed-off-by: Stephen Hemminger <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/ring_buffer.c | 17 ++++-------------
include/linux/hyperv.h | 14 --------------
2 files changed, 4 insertions(+), 27 deletions(-)
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index a9021f1..b0f7952 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -358,9 +358,6 @@ struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
{
struct hv_ring_buffer_info *rbi = &channel->inbound;
- /* set state for later hv_pkt_iter_close */
- rbi->cached_read_index = rbi->ring_buffer->read_index;
-
if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
return NULL;
@@ -388,10 +385,7 @@ struct vmpacket_descriptor *
rbi->priv_read_index -= dsize;
/* more data? */
- if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
- return NULL;
- else
- return hv_get_ring_buffer(rbi) + rbi->priv_read_index;
+ return hv_pkt_iter_first(channel);
}
EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
@@ -401,7 +395,7 @@ struct vmpacket_descriptor *
void hv_pkt_iter_close(struct vmbus_channel *channel)
{
struct hv_ring_buffer_info *rbi = &channel->inbound;
- u32 cur_write_sz, cached_write_sz;
+ u32 orig_write_sz = hv_get_bytes_to_write(rbi);
u32 pending_sz;
/*
@@ -430,13 +424,10 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)
if (pending_sz == 0)
return;
- cur_write_sz = hv_get_bytes_to_write(rbi);
-
- if (cur_write_sz < pending_sz)
+ if (hv_get_bytes_to_write(rbi) < pending_sz)
return;
- cached_write_sz = hv_get_cached_bytes_to_write(rbi);
- if (cached_write_sz < pending_sz)
+ if (orig_write_sz < pending_sz)
vmbus_setevent(channel);
}
EXPORT_SYMBOL_GPL(hv_pkt_iter_close);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 0262267..9ede1fb 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -125,7 +125,6 @@ struct hv_ring_buffer_info {
u32 ring_datasize; /* < ring_size */
u32 priv_read_index;
- u32 cached_read_index;
};
/*
@@ -178,19 +177,6 @@ static inline u32 hv_get_bytes_to_write(const struct hv_ring_buffer_info *rbi)
return write;
}
-static inline u32 hv_get_cached_bytes_to_write(
- const struct hv_ring_buffer_info *rbi)
-{
- u32 read_loc, write_loc, dsize, write;
-
- dsize = rbi->ring_datasize;
- read_loc = rbi->cached_read_index;
- write_loc = rbi->ring_buffer->write_index;
-
- write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
- read_loc - write_loc;
- return write;
-}
/*
* VMBUS version is 32 bit entity broken up into
* two 16 bit quantities: major_number. minor_number.
--
1.7.1
From: Stephen Hemminger <[email protected]>
When iterating over incoming ring elements from the host, prefetch
the next descriptor so that it is cache hot.
Signed-off-by: Stephen Hemminger <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/ring_buffer.c | 7 ++++++-
1 files changed, 6 insertions(+), 1 deletions(-)
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 741daa6..12eb8ca 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -357,11 +357,16 @@ static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
{
struct hv_ring_buffer_info *rbi = &channel->inbound;
+ struct vmpacket_descriptor *desc;
if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
return NULL;
- return hv_get_ring_buffer(rbi) + rbi->priv_read_index;
+ desc = hv_get_ring_buffer(rbi) + rbi->priv_read_index;
+ if (desc)
+ prefetch((char *)desc + (desc->len8 << 3));
+
+ return desc;
}
EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
--
1.7.1
From: Stephen Hemminger <[email protected]>
The elements ring_data_start_offset and priv_write_index
are not used.
Signed-off-by: Stephen Hemminger <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
include/linux/hyperv.h | 2 --
1 files changed, 0 insertions(+), 2 deletions(-)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 9591ae7..b1549c9 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -124,8 +124,6 @@ struct hv_ring_buffer_info {
spinlock_t ring_lock;
u32 ring_datasize; /* < ring_size */
- u32 ring_data_startoffset;
- u32 priv_write_index;
u32 priv_read_index;
u32 cached_read_index;
};
--
1.7.1
From: Stephen Hemminger <[email protected]>
Don't signal host if it has disabled interrupts for that
ring buffer. Check the feature bit to see if host supports
pending send size flag.
Signed-off-by: Stephen Hemminger <[email protected]>
Signed-off-by: K. Y. Srinivasan <[email protected]>
---
drivers/hv/ring_buffer.c | 27 +++++++++++++++++++--------
1 files changed, 19 insertions(+), 8 deletions(-)
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index b0f7952..741daa6 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -396,7 +396,6 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)
{
struct hv_ring_buffer_info *rbi = &channel->inbound;
u32 orig_write_sz = hv_get_bytes_to_write(rbi);
- u32 pending_sz;
/*
* Make sure all reads are done before we update the read index since
@@ -419,15 +418,27 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)
*/
virt_mb();
- pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
- /* If the other end is not blocked on write don't bother. */
- if (pending_sz == 0)
+ /* If host has disabled notifications then skip */
+ if (rbi->ring_buffer->interrupt_mask)
return;
- if (hv_get_bytes_to_write(rbi) < pending_sz)
- return;
+ if (rbi->ring_buffer->feature_bits.feat_pending_send_sz) {
+ u32 pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
- if (orig_write_sz < pending_sz)
- vmbus_setevent(channel);
+ /*
+ * If there was space before we began iteration,
+ * then host was not blocked. Also handles case where
+ * pending_sz is zero then host has nothing pending
+ * and does not need to be signaled.
+ */
+ if (orig_write_sz > pending_sz)
+ return;
+
+ /* If pending write will not fit, don't give false hope. */
+ if (hv_get_bytes_to_write(rbi) < pending_sz)
+ return;
+ }
+
+ vmbus_setevent(channel);
}
EXPORT_SYMBOL_GPL(hv_pkt_iter_close);
--
1.7.1