Add dev_num for vmbus device based on channel offer sequence.
Use this number for nic naming, and enable async probing.
Haiyang Zhang (3):
Drivers: hv: vmbus: Add a dev_num variable based on channel offer sequence
Drivers: hv: vmbus: Add dev_num to sysfs
hv_netvsc: Name NICs based on vmbus offer sequence and use async probe
Documentation/ABI/stable/sysfs-bus-vmbus | 8 +++++++
drivers/hv/channel_mgmt.c | 40 ++++++++++++++++++++++++++++++--
drivers/hv/vmbus_drv.c | 13 +++++++++++
drivers/net/hyperv/netvsc_drv.c | 18 +++++++++++---
include/linux/hyperv.h | 6 +++++
5 files changed, 80 insertions(+), 5 deletions(-)
--
1.8.3.1
This number is set to the first available number, starting from zero,
when a vmbus device’s primary channel is offered.
It will be used for stable naming when Async probing is used.
Signed-off-by: Haiyang Zhang <[email protected]>
---
drivers/hv/channel_mgmt.c | 40 ++++++++++++++++++++++++++++++++++++++--
include/linux/hyperv.h | 6 ++++++
2 files changed, 44 insertions(+), 2 deletions(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 8eb1675..b14c6a2 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -315,6 +315,8 @@ static struct vmbus_channel *alloc_channel(void)
if (!channel)
return NULL;
+ channel->dev_num = HV_DEV_NUM_INVALID;
+
spin_lock_init(&channel->lock);
init_completion(&channel->rescind_event);
@@ -541,6 +543,38 @@ static void vmbus_add_channel_work(struct work_struct *work)
}
/*
+ * Get the first available device number of its type, then
+ * record it in the channel structure.
+ */
+static void hv_set_devnum(struct vmbus_channel *newchannel)
+{
+ struct vmbus_channel *channel;
+ unsigned int i = 0;
+ bool found;
+
+ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+
+next:
+ found = false;
+
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ if (i == channel->dev_num &&
+ guid_equal(&channel->offermsg.offer.if_type,
+ &newchannel->offermsg.offer.if_type)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ i++;
+ goto next;
+ }
+
+ newchannel->dev_num = i;
+}
+
+/*
* vmbus_process_offer - Process the offer by creating a channel/device
* associated with this offer
*/
@@ -573,10 +607,12 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
}
}
- if (fnew)
+ if (fnew) {
+ hv_set_devnum(newchannel);
+
list_add_tail(&newchannel->listentry,
&vmbus_connection.chn_list);
- else {
+ } else {
/*
* Check to see if this is a valid sub-channel.
*/
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 26f3aee..4f110c5 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -718,6 +718,8 @@ struct vmbus_device {
bool perf_device;
};
+#define HV_DEV_NUM_INVALID (-1)
+
struct vmbus_channel {
struct list_head listentry;
@@ -849,6 +851,10 @@ struct vmbus_channel {
*/
struct vmbus_channel *primary_channel;
/*
+ * Used for device naming based on channel offer sequence.
+ */
+ int dev_num;
+ /*
* Support per-channel state for use by vmbus drivers.
*/
void *per_channel_state;
--
1.8.3.1
It's a number based on the vmbus device offer sequence.
Useful for device naming when using Async probing.
Signed-off-by: Haiyang Zhang <[email protected]>
---
Documentation/ABI/stable/sysfs-bus-vmbus | 8 ++++++++
drivers/hv/vmbus_drv.c | 13 +++++++++++++
2 files changed, 21 insertions(+)
diff --git a/Documentation/ABI/stable/sysfs-bus-vmbus b/Documentation/ABI/stable/sysfs-bus-vmbus
index 8e8d167..a42225d 100644
--- a/Documentation/ABI/stable/sysfs-bus-vmbus
+++ b/Documentation/ABI/stable/sysfs-bus-vmbus
@@ -49,6 +49,14 @@ Contact: Stephen Hemminger <[email protected]>
Description: This NUMA node to which the VMBUS device is
attached, or -1 if the node is unknown.
+What: /sys/bus/vmbus/devices/<UUID>/dev_num
+Date: Dec 2019
+KernelVersion: 5.5
+Contact: Haiyang Zhang <[email protected]>
+Description: A number based on the vmbus device offer sequence.
+ Useful for device naming when using Async probing.
+Users: Debugging tools and userspace drivers
+
What: /sys/bus/vmbus/devices/<UUID>/channels/<N>
Date: September. 2017
KernelVersion: 4.14
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 4ef5a66..fe7aefa 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -214,6 +214,18 @@ static ssize_t numa_node_show(struct device *dev,
static DEVICE_ATTR_RO(numa_node);
#endif
+static ssize_t dev_num_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hv_device *hv_dev = device_to_hv_device(dev);
+
+ if (!hv_dev->channel)
+ return -ENODEV;
+
+ return sprintf(buf, "%d\n", hv_dev->channel->dev_num);
+}
+static DEVICE_ATTR_RO(dev_num);
+
static ssize_t server_monitor_pending_show(struct device *dev,
struct device_attribute *dev_attr,
char *buf)
@@ -598,6 +610,7 @@ static ssize_t driver_override_show(struct device *dev,
#ifdef CONFIG_NUMA
&dev_attr_numa_node.attr,
#endif
+ &dev_attr_dev_num.attr,
&dev_attr_server_monitor_pending.attr,
&dev_attr_client_monitor_pending.attr,
&dev_attr_server_monitor_latency.attr,
--
1.8.3.1
The dev_num field in vmbus channel structure is assigned to the first
available number when the channel is offered. So netvsc driver uses it
for NIC naming based on channel offer sequence. Now re-enable the async
probing mode for faster probing.
Signed-off-by: Haiyang Zhang <[email protected]>
---
drivers/net/hyperv/netvsc_drv.c | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index f3f9eb8..39c412f 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2267,10 +2267,14 @@ static int netvsc_probe(struct hv_device *dev,
struct net_device_context *net_device_ctx;
struct netvsc_device_info *device_info = NULL;
struct netvsc_device *nvdev;
+ char name[IFNAMSIZ];
int ret = -ENOMEM;
- net = alloc_etherdev_mq(sizeof(struct net_device_context),
- VRSS_CHANNEL_MAX);
+ snprintf(name, IFNAMSIZ, "eth%d", dev->channel->dev_num);
+ net = alloc_netdev_mqs(sizeof(struct net_device_context), name,
+ NET_NAME_ENUM, ether_setup,
+ VRSS_CHANNEL_MAX, VRSS_CHANNEL_MAX);
+
if (!net)
goto no_net;
@@ -2355,6 +2359,14 @@ static int netvsc_probe(struct hv_device *dev,
net->max_mtu = ETH_DATA_LEN;
ret = register_netdevice(net);
+
+ if (ret == -EEXIST) {
+ pr_info("NIC name %s exists, request another name.\n",
+ net->name);
+ strlcpy(net->name, "eth%d", IFNAMSIZ);
+ ret = register_netdevice(net);
+ }
+
if (ret != 0) {
pr_err("Unable to register netdev.\n");
goto register_failed;
@@ -2496,7 +2508,7 @@ static int netvsc_resume(struct hv_device *dev)
.suspend = netvsc_suspend,
.resume = netvsc_resume,
.driver = {
- .probe_type = PROBE_FORCE_SYNCHRONOUS,
+ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
},
};
--
1.8.3.1
On Sat, 28 Dec 2019 15:46:33 -0800
Haiyang Zhang <[email protected]> wrote:
> - net = alloc_etherdev_mq(sizeof(struct net_device_context),
> - VRSS_CHANNEL_MAX);
> + snprintf(name, IFNAMSIZ, "eth%d", dev->channel->dev_num);
> + net = alloc_netdev_mqs(sizeof(struct net_device_context), name,
> + NET_NAME_ENUM, ether_setup,
> + VRSS_CHANNEL_MAX, VRSS_CHANNEL_MAX);
> +
Naming is a hard problem, and best left to userspace.
By choosing ethN as a naming policy, you potentially run into naming
conflicts with other non netvsc devices like those passed through or
SR-IOV devices.
Better to have udev use dev_num and use something like envN or something.
Udev also handles SRIOV devices in later versions.
Fighting against systemd, netplan, etc is not going to be make friends.
On Sat, 28 Dec 2019 15:46:31 -0800
Haiyang Zhang <[email protected]> wrote:
> +
> +next:
> + found = false;
> +
> + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
> + if (i == channel->dev_num &&
> + guid_equal(&channel->offermsg.offer.if_type,
> + &newchannel->offermsg.offer.if_type)) {
> + found = true;
> + break;
> + }
> + }
> +
> + if (found) {
> + i++;
> + goto next;
> + }
> +
Overall, keeping track of dev_num is a good solution.
I prefer not having a loop coded with goto's. Why not
a nested loop. Also, there already is a search of the channel
list in vmbus_process_offer() so why is another lookup needed?
> -----Original Message-----
> From: Stephen Hemminger <[email protected]>
> Sent: Saturday, December 28, 2019 7:20 PM
> To: Haiyang Zhang <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> KY Srinivasan <[email protected]>; Stephen Hemminger
> <[email protected]>; [email protected]; vkuznets
> <[email protected]>; [email protected]; [email protected]
> Subject: Re: [PATCH net-next, 1/3] Drivers: hv: vmbus: Add a dev_num variable
> based on channel offer sequence
>
> On Sat, 28 Dec 2019 15:46:31 -0800
> Haiyang Zhang <[email protected]> wrote:
>
> > +
> > +next:
> > + found = false;
> > +
> > + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
> > + if (i == channel->dev_num &&
> > + guid_equal(&channel->offermsg.offer.if_type,
> > + &newchannel->offermsg.offer.if_type)) {
> > + found = true;
> > + break;
> > + }
> > + }
> > +
> > + if (found) {
> > + i++;
> > + goto next;
> > + }
> > +
>
> Overall, keeping track of dev_num is a good solution.
>
> I prefer not having a loop coded with goto's. Why not
> a nested loop.
Sure, I can use a nested loop.
> Also, there already is a search of the channel
> list in vmbus_process_offer() so why is another lookup needed?
vmbus_process_offer() looks for the if_instance and if_type matches
to determine if this is a subchannel vs primary channel. The loop
terminates at different condition from hv_set_devnum().
So I didn't re-use the existing loop.
And this kind of search happens only during channel offering, and
doesn't impact performance much.
Thanks,
- Haiyang
> -----Original Message-----
> From: Stephen Hemminger <[email protected]>
> Sent: Saturday, December 28, 2019 7:13 PM
> To: Haiyang Zhang <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> KY Srinivasan <[email protected]>; Stephen Hemminger
> <[email protected]>; [email protected]; vkuznets
> <[email protected]>; [email protected]; [email protected]
> Subject: Re: [PATCH net-next, 3/3] hv_netvsc: Name NICs based on vmbus offer
> sequence and use async probe
>
> On Sat, 28 Dec 2019 15:46:33 -0800
> Haiyang Zhang <[email protected]> wrote:
>
> > - net = alloc_etherdev_mq(sizeof(struct net_device_context),
> > - VRSS_CHANNEL_MAX);
> > + snprintf(name, IFNAMSIZ, "eth%d", dev->channel->dev_num);
> > + net = alloc_netdev_mqs(sizeof(struct net_device_context), name,
> > + NET_NAME_ENUM, ether_setup,
> > + VRSS_CHANNEL_MAX, VRSS_CHANNEL_MAX);
> > +
>
> Naming is a hard problem, and best left to userspace.
> By choosing ethN as a naming policy, you potentially run into naming
> conflicts with other non netvsc devices like those passed through or
> SR-IOV devices.
If the dev_num based naming conflicts with existing device, it will fall back to
the previous scheme: "choose the next available eth* name by specifying eth%d".
See the fall back code path below:
if (ret == -EEXIST) {
pr_info("NIC name %s exists, request another name.\n",
net->name);
strlcpy(net->name, "eth%d", IFNAMSIZ);
ret = register_netdevice(net);
}
> Better to have udev use dev_num and use something like envN or something.
> Udev also handles SRIOV devices in later versions.
>
> Fighting against systemd, netplan, etc is not going to be make friends.
When netvsc was initially created, it uses "seth*" naming. But with strong requests
from many customers, we switched back to the regular "eth*" naming format.
The results of using dev_num based "eth*" names is same as what we are doing now --
The existing synchronous probing already generates "eth*" based on channel offer
sequence. With my patch, it still generates the same naming, but with asynchronous
probing.
So all the udev, or other user daemons, sees the same name for each device as before.
And, they can still set these names like what they do today.
Thanks,
- Haiyang