2014-04-23 20:29:22

by KY Srinivasan

[permalink] [raw]
Subject: [PATCH V1 net-next 1/1] hyperv: Enable sendbuf mechanism on the send path

We send packets using a copy-free mechanism (this is the Guest to Host transport
via VMBUS). While this is obviously optimal for large packets,
it may not be optimal for small packets. Hyper-V host supports
a second mechanism for sending packets that is "copy based". We implement that
mechanism in this patch.

In this version of the patch I have addressed a comment from David Miller.

With this patch (and all of the other offload and VRSS patches), we are now able
to almost saturate a 10G interface between Linux VMs on Hyper-V
on different hosts - close to 9 Gbps as measured via iperf.

Signed-off-by: K. Y. Srinivasan <[email protected]>
Reviewed-by: Haiyang Zhang <[email protected]>
---
drivers/net/hyperv/hyperv_net.h | 14 +++
drivers/net/hyperv/netvsc.c | 226 +++++++++++++++++++++++++++++++++++++--
drivers/net/hyperv/netvsc_drv.c | 3 +-
3 files changed, 234 insertions(+), 9 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d1f7826..4b7df5a 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -140,6 +140,8 @@ struct hv_netvsc_packet {
void *send_completion_ctx;
void (*send_completion)(void *context);

+ u32 send_buf_index;
+
/* This points to the memory after page_buf */
struct rndis_message *rndis_msg;

@@ -582,6 +584,9 @@ struct nvsp_message {

#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */
#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */
+#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024) /* 1MB */
+#define NETVSC_INVALID_INDEX -1
+

#define NETVSC_RECEIVE_BUFFER_ID 0xcafe

@@ -607,6 +612,15 @@ struct netvsc_device {
u32 recv_section_cnt;
struct nvsp_1_receive_buffer_section *recv_section;

+ /* Send buffer allocated by us */
+ void *send_buf;
+ u32 send_buf_size;
+ u32 send_buf_gpadl_handle;
+ u32 send_section_cnt;
+ u32 send_section_size;
+ unsigned long *send_section_map;
+ int map_words;
+
/* Used for NetVSP initialization protocol */
struct completion channel_init_wait;
struct nvsp_message channel_init_pkt;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index bbee446..c041f63 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
+#include <asm/sync_bitops.h>

#include "hyperv_net.h"

@@ -80,7 +81,7 @@ get_in_err:
}


-static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
+static int netvsc_destroy_buf(struct netvsc_device *net_device)
{
struct nvsp_message *revoke_packet;
int ret = 0;
@@ -146,10 +147,62 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
net_device->recv_section = NULL;
}

+ /* Deal with the send buffer we may have setup.
+ * If we got a send section size, it means we received a
+ * SendsendBufferComplete msg (ie sent
+ * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
+ * to send a revoke msg here
+ */
+ if (net_device->send_section_size) {
+ /* Send the revoke receive buffer */
+ revoke_packet = &net_device->revoke_packet;
+ memset(revoke_packet, 0, sizeof(struct nvsp_message));
+
+ revoke_packet->hdr.msg_type =
+ NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
+ revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0;
+
+ ret = vmbus_sendpacket(net_device->dev->channel,
+ revoke_packet,
+ sizeof(struct nvsp_message),
+ (unsigned long)revoke_packet,
+ VM_PKT_DATA_INBAND, 0);
+ /* If we failed here, we might as well return and
+ * have a leak rather than continue and a bugchk
+ */
+ if (ret != 0) {
+ netdev_err(ndev, "unable to send "
+ "revoke send buffer to netvsp\n");
+ return ret;
+ }
+ }
+ /* Teardown the gpadl on the vsp end */
+ if (net_device->send_buf_gpadl_handle) {
+ ret = vmbus_teardown_gpadl(net_device->dev->channel,
+ net_device->send_buf_gpadl_handle);
+
+ /* If we failed here, we might as well return and have a leak
+ * rather than continue and a bugchk
+ */
+ if (ret != 0) {
+ netdev_err(ndev,
+ "unable to teardown send buffer's gpadl\n");
+ return ret;
+ }
+ net_device->recv_buf_gpadl_handle = 0;
+ }
+ if (net_device->send_buf) {
+ /* Free up the receive buffer */
+ free_pages((unsigned long)net_device->send_buf,
+ get_order(net_device->send_buf_size));
+ net_device->send_buf = NULL;
+ }
+ kfree(net_device->send_section_map);
+
return ret;
}

-static int netvsc_init_recv_buf(struct hv_device *device)
+static int netvsc_init_buf(struct hv_device *device)
{
int ret = 0;
int t;
@@ -248,10 +301,90 @@ static int netvsc_init_recv_buf(struct hv_device *device)
goto cleanup;
}

+ /* Now setup the send buffer.
+ */
+ net_device->send_buf =
+ (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
+ get_order(net_device->send_buf_size));
+ if (!net_device->send_buf) {
+ netdev_err(ndev, "unable to allocate send "
+ "buffer of size %d\n", net_device->send_buf_size);
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ /* Establish the gpadl handle for this buffer on this
+ * channel. Note: This call uses the vmbus connection rather
+ * than the channel to establish the gpadl handle.
+ */
+ ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
+ net_device->send_buf_size,
+ &net_device->send_buf_gpadl_handle);
+ if (ret != 0) {
+ netdev_err(ndev,
+ "unable to establish send buffer's gpadl\n");
+ goto cleanup;
+ }
+
+ /* Notify the NetVsp of the gpadl handle */
+ init_packet = &net_device->channel_init_pkt;
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+ init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
+ init_packet->msg.v1_msg.send_recv_buf.gpadl_handle =
+ net_device->send_buf_gpadl_handle;
+ init_packet->msg.v1_msg.send_recv_buf.id = 0;
+
+ /* Send the gpadl notification request */
+ ret = vmbus_sendpacket(device->channel, init_packet,
+ sizeof(struct nvsp_message),
+ (unsigned long)init_packet,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret != 0) {
+ netdev_err(ndev,
+ "unable to send send buffer's gpadl to netvsp\n");
+ goto cleanup;
+ }
+
+ t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
+ BUG_ON(t == 0);
+
+ /* Check the response */
+ if (init_packet->msg.v1_msg.
+ send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
+ netdev_err(ndev, "Unable to complete send buffer "
+ "initialization with NetVsp - status %d\n",
+ init_packet->msg.v1_msg.
+ send_recv_buf_complete.status);
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ /* Parse the response */
+ net_device->send_section_size = init_packet->msg.
+ v1_msg.send_send_buf_complete.section_size;
+
+ /* Section count is simply the size divided by the section size.
+ */
+ net_device->send_section_cnt =
+ net_device->send_buf_size/net_device->send_section_size;
+
+ dev_info(&device->device, "Send section size: %d, Section count:%d\n",
+ net_device->send_section_size, net_device->send_section_cnt);
+
+ /* Setup state for managing the send buffer. */
+ net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
+ BITS_PER_LONG);
+
+ net_device->send_section_map =
+ kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
+ if (net_device->send_section_map == NULL)
+ goto cleanup;
+
goto exit;

cleanup:
- netvsc_destroy_recv_buf(net_device);
+ netvsc_destroy_buf(net_device);

exit:
return ret;
@@ -369,8 +502,9 @@ static int netvsc_connect_vsp(struct hv_device *device)
net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
else
net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
+ net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;

- ret = netvsc_init_recv_buf(device);
+ ret = netvsc_init_buf(device);

cleanup:
return ret;
@@ -378,7 +512,7 @@ cleanup:

static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
{
- netvsc_destroy_recv_buf(net_device);
+ netvsc_destroy_buf(net_device);
}

/*
@@ -440,6 +574,12 @@ static inline u32 hv_ringbuf_avail_percent(
return avail_write * 100 / ring_info->ring_datasize;
}

+static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
+ u32 index)
+{
+ sync_change_bit(index, net_device->send_section_map);
+}
+
static void netvsc_send_completion(struct netvsc_device *net_device,
struct hv_device *device,
struct vmpacket_descriptor *packet)
@@ -447,6 +587,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
struct nvsp_message *nvsp_packet;
struct hv_netvsc_packet *nvsc_packet;
struct net_device *ndev;
+ u32 send_index;

ndev = net_device->ndev;

@@ -477,6 +618,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device,

/* Notify the layer above us */
if (nvsc_packet) {
+ send_index = nvsc_packet->send_buf_index;
+ if (send_index != NETVSC_INVALID_INDEX)
+ netvsc_free_send_slot(net_device, send_index);
q_idx = nvsc_packet->q_idx;
channel = nvsc_packet->channel;
nvsc_packet->send_completion(nvsc_packet->
@@ -504,6 +648,52 @@ static void netvsc_send_completion(struct netvsc_device *net_device,

}

+static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
+{
+ unsigned long index;
+ u32 max_words = net_device->map_words;
+ unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
+ u32 section_cnt = net_device->send_section_cnt;
+ int ret_val = NETVSC_INVALID_INDEX;
+ int i;
+ int prev_val;
+
+ for (i = 0; i < max_words; i++) {
+ if (!~(map_addr[i]))
+ continue;
+ index = ffz(map_addr[i]);
+ prev_val = sync_test_and_set_bit(index, &map_addr[i]);
+ if (prev_val)
+ continue;
+ if ((index + (i * BITS_PER_LONG)) >= section_cnt)
+ break;
+ ret_val = (index + (i * BITS_PER_LONG));
+ break;
+ }
+ return ret_val;
+}
+
+u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
+ unsigned int section_index,
+ struct hv_netvsc_packet *packet)
+{
+ char *start = net_device->send_buf;
+ char *dest = (start + (section_index * net_device->send_section_size));
+ int i;
+ u32 msg_size = 0;
+
+ for (i = 0; i < packet->page_buf_cnt; i++) {
+ char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
+ u32 offset = packet->page_buf[i].offset;
+ u32 len = packet->page_buf[i].len;
+
+ memcpy(dest, (src + offset), len);
+ msg_size += len;
+ dest += len;
+ }
+ return msg_size;
+}
+
int netvsc_send(struct hv_device *device,
struct hv_netvsc_packet *packet)
{
@@ -513,6 +703,10 @@ int netvsc_send(struct hv_device *device,
struct net_device *ndev;
struct vmbus_channel *out_channel = NULL;
u64 req_id;
+ unsigned int section_index = NETVSC_INVALID_INDEX;
+ u32 msg_size = 0;
+ struct sk_buff *skb;
+

net_device = get_outbound_net_device(device);
if (!net_device)
@@ -528,10 +722,26 @@ int netvsc_send(struct hv_device *device,
sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
}

- /* Not using send buffer section */
+ /* Attempt to send via sendbuf */
+ if (packet->total_data_buflen < net_device->send_section_size) {
+ section_index = netvsc_get_next_send_section(net_device);
+ if (section_index != NETVSC_INVALID_INDEX) {
+ msg_size = netvsc_copy_to_send_buf(net_device,
+ section_index,
+ packet);
+ skb = (struct sk_buff *)
+ (unsigned long)packet->send_completion_tid;
+ if (skb)
+ dev_kfree_skb_any(skb);
+ packet->page_buf_cnt = 0;
+ }
+ }
+ packet->send_buf_index = section_index;
+
+
sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
- 0xFFFFFFFF;
- sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
+ section_index;
+ sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size;

if (packet->send_completion)
req_id = (ulong)packet;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index c76b665..939e3af 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -236,10 +236,11 @@ static void netvsc_xmit_completion(void *context)
struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
struct sk_buff *skb = (struct sk_buff *)
(unsigned long)packet->send_completion_tid;
+ u32 index = packet->send_buf_index;

kfree(packet);

- if (skb)
+ if (skb && (index == NETVSC_INVALID_INDEX))
dev_kfree_skb_any(skb);
}

--
1.7.4.1


2014-04-23 21:46:54

by Dan Carpenter

[permalink] [raw]
Subject: Re: [PATCH V1 net-next 1/1] hyperv: Enable sendbuf mechanism on the send path


TLDR; Style nits and we should return -ENOMEM on error instead of
success.

On Wed, Apr 23, 2014 at 02:24:45PM -0700, K. Y. Srinivasan wrote:
> We send packets using a copy-free mechanism (this is the Guest to Host transport
> via VMBUS). While this is obviously optimal for large packets,
> it may not be optimal for small packets. Hyper-V host supports
> a second mechanism for sending packets that is "copy based". We implement that
> mechanism in this patch.
>
> In this version of the patch I have addressed a comment from David Miller.
>
> With this patch (and all of the other offload and VRSS patches), we are now able
> to almost saturate a 10G interface between Linux VMs on Hyper-V
> on different hosts - close to 9 Gbps as measured via iperf.
>
> Signed-off-by: K. Y. Srinivasan <[email protected]>
> Reviewed-by: Haiyang Zhang <[email protected]>
> ---
> drivers/net/hyperv/hyperv_net.h | 14 +++
> drivers/net/hyperv/netvsc.c | 226 +++++++++++++++++++++++++++++++++++++--
> drivers/net/hyperv/netvsc_drv.c | 3 +-
> 3 files changed, 234 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
> index d1f7826..4b7df5a 100644
> --- a/drivers/net/hyperv/hyperv_net.h
> +++ b/drivers/net/hyperv/hyperv_net.h
> @@ -140,6 +140,8 @@ struct hv_netvsc_packet {
> void *send_completion_ctx;
> void (*send_completion)(void *context);
>
> + u32 send_buf_index;
> +
> /* This points to the memory after page_buf */
> struct rndis_message *rndis_msg;
>
> @@ -582,6 +584,9 @@ struct nvsp_message {
>
> #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */
> #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */
> +#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024) /* 1MB */
> +#define NETVSC_INVALID_INDEX -1
> +
>
> #define NETVSC_RECEIVE_BUFFER_ID 0xcafe
>
> @@ -607,6 +612,15 @@ struct netvsc_device {
> u32 recv_section_cnt;
> struct nvsp_1_receive_buffer_section *recv_section;
>
> + /* Send buffer allocated by us */
> + void *send_buf;
> + u32 send_buf_size;
> + u32 send_buf_gpadl_handle;
> + u32 send_section_cnt;
> + u32 send_section_size;
> + unsigned long *send_section_map;
> + int map_words;
> +
> /* Used for NetVSP initialization protocol */
> struct completion channel_init_wait;
> struct nvsp_message channel_init_pkt;
> diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
> index bbee446..c041f63 100644
> --- a/drivers/net/hyperv/netvsc.c
> +++ b/drivers/net/hyperv/netvsc.c
> @@ -28,6 +28,7 @@
> #include <linux/slab.h>
> #include <linux/netdevice.h>
> #include <linux/if_ether.h>
> +#include <asm/sync_bitops.h>
>
> #include "hyperv_net.h"
>
> @@ -80,7 +81,7 @@ get_in_err:
> }
>
>
> -static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
> +static int netvsc_destroy_buf(struct netvsc_device *net_device)
> {
> struct nvsp_message *revoke_packet;
> int ret = 0;
> @@ -146,10 +147,62 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
> net_device->recv_section = NULL;
> }
>
> + /* Deal with the send buffer we may have setup.
> + * If we got a send section size, it means we received a
> + * SendsendBufferComplete msg (ie sent
> + * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
> + * to send a revoke msg here
> + */
> + if (net_device->send_section_size) {
> + /* Send the revoke receive buffer */
> + revoke_packet = &net_device->revoke_packet;
> + memset(revoke_packet, 0, sizeof(struct nvsp_message));
> +
> + revoke_packet->hdr.msg_type =
> + NVSP_MSG1_TYPE_REVOKE_SEND_BUF;

This fits in 80 characters.

revoke_packet->hdr.msg_type = NVSP_MSG1_TYPE_REVOKE_SEND_BUF;

> + revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0;
> +
> + ret = vmbus_sendpacket(net_device->dev->channel,
> + revoke_packet,
> + sizeof(struct nvsp_message),
> + (unsigned long)revoke_packet,
> + VM_PKT_DATA_INBAND, 0);
> + /* If we failed here, we might as well return and
> + * have a leak rather than continue and a bugchk
> + */
> + if (ret != 0) {

I have never been a big fan of these double negative conditions. It's
simpler and more normal style to say: "if (ret) { ". There are some
cases where it makes sense to compare against zero. For example, if you
are dealing with a variable as a number then you would say
"if (x == 0) ... else if (x == 1) ". Also for strcmp() then you should
use == 0 and != 0. But for error codes it's better it's more idiomatic
to just say "if (ret) ".

> + netdev_err(ndev, "unable to send "
> + "revoke send buffer to netvsp\n");
> + return ret;
> + }
> + }
> + /* Teardown the gpadl on the vsp end */
> + if (net_device->send_buf_gpadl_handle) {
> + ret = vmbus_teardown_gpadl(net_device->dev->channel,
> + net_device->send_buf_gpadl_handle);
> +
> + /* If we failed here, we might as well return and have a leak
> + * rather than continue and a bugchk
> + */
> + if (ret != 0) {
> + netdev_err(ndev,
> + "unable to teardown send buffer's gpadl\n");
> + return ret;
> + }
> + net_device->recv_buf_gpadl_handle = 0;
> + }
> + if (net_device->send_buf) {
> + /* Free up the receive buffer */
> + free_pages((unsigned long)net_device->send_buf,
> + get_order(net_device->send_buf_size));
> + net_device->send_buf = NULL;
> + }
> + kfree(net_device->send_section_map);
> +
> return ret;

This patch doesn't introduce this, but it's better to say:

return 0;

That is more clear so you don't have to back trace and verify what
"ret" is.

> }
>
> -static int netvsc_init_recv_buf(struct hv_device *device)
> +static int netvsc_init_buf(struct hv_device *device)
> {
> int ret = 0;
> int t;
> @@ -248,10 +301,90 @@ static int netvsc_init_recv_buf(struct hv_device *device)
> goto cleanup;
> }
>
> + /* Now setup the send buffer.
> + */
> + net_device->send_buf =
> + (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
> + get_order(net_device->send_buf_size));
> + if (!net_device->send_buf) {
> + netdev_err(ndev, "unable to allocate send "
> + "buffer of size %d\n", net_device->send_buf_size);

This string actually fits into 80 characters fine as-is.

netdev_err(ndev, "unable to allocate send buffer of size %d\n",
net_device->send_buf_size);


> + ret = -ENOMEM;
> + goto cleanup;
> + }
> +
> + /* Establish the gpadl handle for this buffer on this
> + * channel. Note: This call uses the vmbus connection rather
> + * than the channel to establish the gpadl handle.
> + */
> + ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
> + net_device->send_buf_size,
> + &net_device->send_buf_gpadl_handle);
> + if (ret != 0) {
> + netdev_err(ndev,
> + "unable to establish send buffer's gpadl\n");

Same. Just put it on one line.

netdev_err(ndev, "unable to establish send buffer's gpadl\n");

> + goto cleanup;
> + }
> +
> + /* Notify the NetVsp of the gpadl handle */
> + init_packet = &net_device->channel_init_pkt;
> + memset(init_packet, 0, sizeof(struct nvsp_message));
> + init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
> + init_packet->msg.v1_msg.send_recv_buf.gpadl_handle =
> + net_device->send_buf_gpadl_handle;
> + init_packet->msg.v1_msg.send_recv_buf.id = 0;
> +
> + /* Send the gpadl notification request */
> + ret = vmbus_sendpacket(device->channel, init_packet,
> + sizeof(struct nvsp_message),
> + (unsigned long)init_packet,
> + VM_PKT_DATA_INBAND,
> + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> + if (ret != 0) {
> + netdev_err(ndev,
> + "unable to send send buffer's gpadl to netvsp\n");
> + goto cleanup;
> + }
> +
> + t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
> + BUG_ON(t == 0);

Is there no way to just return an error code here? We may as well use
wait_for_completion() without the timeout if we're just going to call
BUG_ON() anyway.

> +
> + /* Check the response */
> + if (init_packet->msg.v1_msg.
> + send_send_buf_complete.status != NVSP_STAT_SUCCESS) {

Better to break it up like this:

if (init_packet->msg.v1_msg.send_send_buf_complete.status !=
NVSP_STAT_SUCCESS) {

There has to be a better name for "send_send_buf_complete"...

> + netdev_err(ndev, "Unable to complete send buffer "
> + "initialization with NetVsp - status %d\n",
> + init_packet->msg.v1_msg.
> + send_recv_buf_complete.status);
> + ret = -EINVAL;
> + goto cleanup;
> + }
> +
> + /* Parse the response */
> + net_device->send_section_size = init_packet->msg.
> + v1_msg.send_send_buf_complete.section_size;
> +
> + /* Section count is simply the size divided by the section size.
> + */
> + net_device->send_section_cnt =
> + net_device->send_buf_size/net_device->send_section_size;
> +
> + dev_info(&device->device, "Send section size: %d, Section count:%d\n",
> + net_device->send_section_size, net_device->send_section_cnt);
> +
> + /* Setup state for managing the send buffer. */
> + net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
> + BITS_PER_LONG);
> +
> + net_device->send_section_map =
> + kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);

Use kcalloc().

net_device->send_section_map = kcalloc(net_device->map_words,
sizeof(ulong), GFP_KERNEL);

> + if (net_device->send_section_map == NULL)
> + goto cleanup;

Set "ret = -ENOMEM;"

> +
> goto exit;
>

These bunny hops are super annoying. Just return success here. Why
does code have to be all tangled up?

> cleanup:
> - netvsc_destroy_recv_buf(net_device);
> + netvsc_destroy_buf(net_device);
>
> exit:
> return ret;
> @@ -369,8 +502,9 @@ static int netvsc_connect_vsp(struct hv_device *device)
> net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
> else
> net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
> + net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
>
> - ret = netvsc_init_recv_buf(device);
> + ret = netvsc_init_buf(device);
>
> cleanup:
> return ret;
> @@ -378,7 +512,7 @@ cleanup:
>
> static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
> {
> - netvsc_destroy_recv_buf(net_device);
> + netvsc_destroy_buf(net_device);
> }
>
> /*
> @@ -440,6 +574,12 @@ static inline u32 hv_ringbuf_avail_percent(
> return avail_write * 100 / ring_info->ring_datasize;
> }
>
> +static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
> + u32 index)
> +{
> + sync_change_bit(index, net_device->send_section_map);
> +}
> +
> static void netvsc_send_completion(struct netvsc_device *net_device,
> struct hv_device *device,
> struct vmpacket_descriptor *packet)
> @@ -447,6 +587,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
> struct nvsp_message *nvsp_packet;
> struct hv_netvsc_packet *nvsc_packet;
> struct net_device *ndev;
> + u32 send_index;
>
> ndev = net_device->ndev;
>
> @@ -477,6 +618,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
>
> /* Notify the layer above us */
> if (nvsc_packet) {
> + send_index = nvsc_packet->send_buf_index;
> + if (send_index != NETVSC_INVALID_INDEX)
> + netvsc_free_send_slot(net_device, send_index);
> q_idx = nvsc_packet->q_idx;
> channel = nvsc_packet->channel;
> nvsc_packet->send_completion(nvsc_packet->
> @@ -504,6 +648,52 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
>
> }
>
> +static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
> +{
> + unsigned long index;
> + u32 max_words = net_device->map_words;
> + unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
> + u32 section_cnt = net_device->send_section_cnt;
> + int ret_val = NETVSC_INVALID_INDEX;
> + int i;
> + int prev_val;
> +
> + for (i = 0; i < max_words; i++) {
> + if (!~(map_addr[i]))
> + continue;
> + index = ffz(map_addr[i]);
> + prev_val = sync_test_and_set_bit(index, &map_addr[i]);
> + if (prev_val)
> + continue;
> + if ((index + (i * BITS_PER_LONG)) >= section_cnt)
> + break;
> + ret_val = (index + (i * BITS_PER_LONG));
> + break;
> + }
> + return ret_val;
> +}
> +
> +u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
> + unsigned int section_index,
> + struct hv_netvsc_packet *packet)
> +{
> + char *start = net_device->send_buf;
> + char *dest = (start + (section_index * net_device->send_section_size));
> + int i;
> + u32 msg_size = 0;
> +
> + for (i = 0; i < packet->page_buf_cnt; i++) {
> + char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
> + u32 offset = packet->page_buf[i].offset;
> + u32 len = packet->page_buf[i].len;
> +
> + memcpy(dest, (src + offset), len);
> + msg_size += len;
> + dest += len;
> + }
> + return msg_size;
> +}
> +
> int netvsc_send(struct hv_device *device,
> struct hv_netvsc_packet *packet)
> {
> @@ -513,6 +703,10 @@ int netvsc_send(struct hv_device *device,
> struct net_device *ndev;
> struct vmbus_channel *out_channel = NULL;
> u64 req_id;
> + unsigned int section_index = NETVSC_INVALID_INDEX;
> + u32 msg_size = 0;
> + struct sk_buff *skb;
> +
>

Don't put consecutive blank lines.

> net_device = get_outbound_net_device(device);
> if (!net_device)
> @@ -528,10 +722,26 @@ int netvsc_send(struct hv_device *device,
> sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
> }
>
> - /* Not using send buffer section */
> + /* Attempt to send via sendbuf */
> + if (packet->total_data_buflen < net_device->send_section_size) {
> + section_index = netvsc_get_next_send_section(net_device);
> + if (section_index != NETVSC_INVALID_INDEX) {
> + msg_size = netvsc_copy_to_send_buf(net_device,
> + section_index,
> + packet);
> + skb = (struct sk_buff *)
> + (unsigned long)packet->send_completion_tid;
> + if (skb)
> + dev_kfree_skb_any(skb);
> + packet->page_buf_cnt = 0;
> + }
> + }
> + packet->send_buf_index = section_index;
> +
> +

Don't put consective blank lines.

regards,
dan carpenter

> sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
> - 0xFFFFFFFF;
> - sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
> + section_index;
> + sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size;
>
> if (packet->send_completion)
> req_id = (ulong)packet;
> diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
> index c76b665..939e3af 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -236,10 +236,11 @@ static void netvsc_xmit_completion(void *context)
> struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
> struct sk_buff *skb = (struct sk_buff *)
> (unsigned long)packet->send_completion_tid;
> + u32 index = packet->send_buf_index;
>
> kfree(packet);
>
> - if (skb)
> + if (skb && (index == NETVSC_INVALID_INDEX))
> dev_kfree_skb_any(skb);
> }
>
> --
> 1.7.4.1
>
> _______________________________________________
> devel mailing list
> [email protected]
> http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

2014-04-24 21:49:46

by Andev

[permalink] [raw]
Subject: Re: [PATCH V1 net-next 1/1] hyperv: Enable sendbuf mechanism on the send path

On Wed, Apr 23, 2014 at 5:24 PM, K. Y. Srinivasan <[email protected]> wrote:

> drivers/net/hyperv/hyperv_net.h | 14 +++
> drivers/net/hyperv/netvsc.c | 226 +++++++++++++++++++++++++++++++++++++--
> drivers/net/hyperv/netvsc_drv.c | 3 +-> 3 files changed, 234 insertions(+), 9 deletions(-)

I just looked over netvsc.c and it could definitely use a more
consistent coding style.

Your use of goto exit/cleanup in some functions and returning directly
on errors in others could use a cleanup. Please consider doing that
while you are touching those files.

2014-04-24 22:06:45

by KY Srinivasan

[permalink] [raw]
Subject: RE: [PATCH V1 net-next 1/1] hyperv: Enable sendbuf mechanism on the send path



> -----Original Message-----
> From: Andev [mailto:[email protected]]
> Sent: Thursday, April 24, 2014 2:50 PM
> To: KY Srinivasan
> Cc: [email protected]; [email protected]; LKML;
> [email protected]; [email protected]; [email protected];
> [email protected]
> Subject: Re: [PATCH V1 net-next 1/1] hyperv: Enable sendbuf mechanism on
> the send path
>
> On Wed, Apr 23, 2014 at 5:24 PM, K. Y. Srinivasan <[email protected]>
> wrote:
>
> > drivers/net/hyperv/hyperv_net.h | 14 +++
> > drivers/net/hyperv/netvsc.c | 226
> +++++++++++++++++++++++++++++++++++++--
> > drivers/net/hyperv/netvsc_drv.c | 3 +-> 3 files changed, 234
> insertions(+), 9 deletions(-)
>
> I just looked over netvsc.c and it could definitely use a more consistent
> coding style.
>
> Your use of goto exit/cleanup in some functions and returning directly on
> errors in others could use a cleanup. Please consider doing that while you are
> touching those files.

Will do. The most recent changes I made to netvsc.c, I think was consistent with the existing code;
going forward we will certainly move towards a more consistent coding style.

Regards,

K. Y
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m???? ????????I?

2014-04-24 22:31:28

by Dan Carpenter

[permalink] [raw]
Subject: Re: [PATCH V1 net-next 1/1] hyperv: Enable sendbuf mechanism on the send path

On Thu, Apr 24, 2014 at 10:06:24PM +0000, KY Srinivasan wrote:
> > From: Andev [mailto:[email protected]]
> > Your use of goto exit/cleanup in some functions and returning directly on
> > errors in others could use a cleanup. Please consider doing that while you are
> > touching those files.
>
> Will do. The most recent changes I made to netvsc.c, I think was
> consistent with the existing code; going forward we will certainly
> move towards a more consistent coding style.

It scares me when you talk about being consistent with the existing
code... Just do it the correct way.

1) Don't do the "return ret;" if you know ret is zero.

2) Replace:
ret = vmbus_sendpacket(...);

return ret;
with
return vmbus_sendpacket(...);

3) Don't do "goto cleanup;" when "return ret;" will suffice. The
do-nothing goto is misleading because you assume it will cleanup
somthing. Some people used to misread CodingStyle to think that all
functions should only have one return but I have updated it so it is
more clear.

regards,
dan carpenter

2014-04-30 14:17:08

by KY Srinivasan

[permalink] [raw]
Subject: RE: [PATCH V1 net-next 1/1] hyperv: Enable sendbuf mechanism on the send path



> -----Original Message-----
> From: K. Y. Srinivasan [mailto:[email protected]]
> Sent: Wednesday, April 23, 2014 2:25 PM
> To: [email protected]; [email protected]; linux-
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]
> Cc: KY Srinivasan
> Subject: [PATCH V1 net-next 1/1] hyperv: Enable sendbuf mechanism on the
> send path
>
> We send packets using a copy-free mechanism (this is the Guest to Host
> transport via VMBUS). While this is obviously optimal for large packets, it may
> not be optimal for small packets. Hyper-V host supports a second mechanism
> for sending packets that is "copy based". We implement that mechanism in
> this patch.
>
> In this version of the patch I have addressed a comment from David Miller.

David,

Please let me know if there is some other issue you want addressed in this patch.

Regards,

K. Y

2014-04-30 16:06:55

by David Miller

[permalink] [raw]
Subject: Re: [PATCH V1 net-next 1/1] hyperv: Enable sendbuf mechanism on the send path

From: KY Srinivasan <[email protected]>
Date: Wed, 30 Apr 2014 14:17:04 +0000

> Please let me know if there is some other issue you want addressed in this patch.

If the patch isn't in patchwork, I'm not going to apply it and you need to
simply resubmit it.