Currently there is no limit to the number of VFs netdevsim can enable.
In a real systems this value exist and used by driver.
Fore example, Some features might need to consider this value when
allocating memory.
Signed-off-by: Yuval Avnery <[email protected]>
Acked-by: Jiri Pirko <[email protected]>
---
drivers/net/netdevsim/bus.c | 39 +++++++++++++++++++++++--------
drivers/net/netdevsim/netdevsim.h | 1 +
2 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index 6aeed0c600f8..f1a0171080cb 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -26,9 +26,9 @@ static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev)
static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev,
unsigned int num_vfs)
{
- nsim_bus_dev->vfconfigs = kcalloc(num_vfs,
- sizeof(struct nsim_vf_config),
- GFP_KERNEL);
+ if (nsim_bus_dev->max_vfs < num_vfs)
+ return -ENOMEM;
+
if (!nsim_bus_dev->vfconfigs)
return -ENOMEM;
nsim_bus_dev->num_vfs = num_vfs;
@@ -38,8 +38,6 @@ static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev,
static void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev)
{
- kfree(nsim_bus_dev->vfconfigs);
- nsim_bus_dev->vfconfigs = NULL;
nsim_bus_dev->num_vfs = 0;
}
@@ -154,22 +152,29 @@ static struct device_type nsim_bus_dev_type = {
};
static struct nsim_bus_dev *
-nsim_bus_dev_new(unsigned int id, unsigned int port_count);
+nsim_bus_dev_new(unsigned int id, unsigned int port_count,
+ unsigned int max_vfs);
+
+#define NSIM_BUS_DEV_MAX_VFS 4
static ssize_t
new_device_store(struct bus_type *bus, const char *buf, size_t count)
{
struct nsim_bus_dev *nsim_bus_dev;
unsigned int port_count;
+ unsigned int max_vfs;
unsigned int id;
int err;
- err = sscanf(buf, "%u %u", &id, &port_count);
+ err = sscanf(buf, "%u %u %u", &id, &port_count, &max_vfs);
switch (err) {
case 1:
port_count = 1;
/* fall through */
case 2:
+ max_vfs = NSIM_BUS_DEV_MAX_VFS;
+ /* fall through */
+ case 3:
if (id > INT_MAX) {
pr_err("Value of \"id\" is too big.\n");
return -EINVAL;
@@ -179,7 +184,7 @@ new_device_store(struct bus_type *bus, const char *buf, size_t count)
pr_err("Format for adding new device is \"id port_count\" (uint uint).\n");
return -EINVAL;
}
- nsim_bus_dev = nsim_bus_dev_new(id, port_count);
+ nsim_bus_dev = nsim_bus_dev_new(id, port_count, max_vfs);
if (IS_ERR(nsim_bus_dev))
return PTR_ERR(nsim_bus_dev);
@@ -267,7 +272,8 @@ static struct bus_type nsim_bus = {
};
static struct nsim_bus_dev *
-nsim_bus_dev_new(unsigned int id, unsigned int port_count)
+nsim_bus_dev_new(unsigned int id, unsigned int port_count,
+ unsigned int max_vfs)
{
struct nsim_bus_dev *nsim_bus_dev;
int err;
@@ -284,12 +290,24 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count)
nsim_bus_dev->dev.type = &nsim_bus_dev_type;
nsim_bus_dev->port_count = port_count;
nsim_bus_dev->initial_net = current->nsproxy->net_ns;
+ nsim_bus_dev->max_vfs = max_vfs;
+
+ nsim_bus_dev->vfconfigs = kcalloc(nsim_bus_dev->max_vfs,
+ sizeof(struct nsim_vf_config),
+ GFP_KERNEL);
+ if (!nsim_bus_dev->vfconfigs) {
+ err = -ENOMEM;
+ goto err_nsim_bus_dev_id_free;
+ }
err = device_register(&nsim_bus_dev->dev);
if (err)
- goto err_nsim_bus_dev_id_free;
+ goto err_nsim_vfconfigs_free;
+
return nsim_bus_dev;
+err_nsim_vfconfigs_free:
+ kfree(nsim_bus_dev->vfconfigs);
err_nsim_bus_dev_id_free:
ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
err_nsim_bus_dev_free:
@@ -301,6 +319,7 @@ static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev)
{
device_unregister(&nsim_bus_dev->dev);
ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
+ kfree(nsim_bus_dev->vfconfigs);
kfree(nsim_bus_dev);
}
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 94df795ef4d3..e2049856add8 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -238,6 +238,7 @@ struct nsim_bus_dev {
struct net *initial_net; /* Purpose of this is to carry net pointer
* during the probe time only.
*/
+ unsigned int max_vfs;
unsigned int num_vfs;
struct nsim_vf_config *vfconfigs;
};
--
2.17.1
On Wed, 11 Dec 2019 04:58:53 +0200, Yuval Avnery wrote:
> Currently there is no limit to the number of VFs netdevsim can enable.
> In a real systems this value exist and used by driver.
> Fore example, Some features might need to consider this value when
> allocating memory.
Thanks for the patch!
Can you shed a little bit more light on where it pops up? Just for my
curiosity?
> Signed-off-by: Yuval Avnery <[email protected]>
> Acked-by: Jiri Pirko <[email protected]>
> diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
> index 6aeed0c600f8..f1a0171080cb 100644
> --- a/drivers/net/netdevsim/bus.c
> +++ b/drivers/net/netdevsim/bus.c
> @@ -26,9 +26,9 @@ static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev)
> static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev,
> unsigned int num_vfs)
> {
> - nsim_bus_dev->vfconfigs = kcalloc(num_vfs,
> - sizeof(struct nsim_vf_config),
> - GFP_KERNEL);
You're changing the semantics of the enable/disable as well now.
The old values used to be wiped when SR-IOV is disabled, now they
will be retained across disable/enable pair.
I think it'd be better if that wasn't the case. Users may expect a
system to be in the same state after they enable SR-IOV, regardless if
someone else used SR-IOV since last reboot.
Could you add a memset(,0,) here?
> + if (nsim_bus_dev->max_vfs < num_vfs)
> + return -ENOMEM;
> +
> if (!nsim_bus_dev->vfconfigs)
> return -ENOMEM;
This check seems useless now, no? We will always have vfconfigs
> nsim_bus_dev->num_vfs = num_vfs;
> @@ -38,8 +38,6 @@ static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev,
>
> static void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev)
> {
> - kfree(nsim_bus_dev->vfconfigs);
> - nsim_bus_dev->vfconfigs = NULL;
> nsim_bus_dev->num_vfs = 0;
> }
>
> @@ -154,22 +152,29 @@ static struct device_type nsim_bus_dev_type = {
> };
>
> static struct nsim_bus_dev *
> -nsim_bus_dev_new(unsigned int id, unsigned int port_count);
> +nsim_bus_dev_new(unsigned int id, unsigned int port_count,
> + unsigned int max_vfs);
> +
> +#define NSIM_BUS_DEV_MAX_VFS 4
>
> static ssize_t
> new_device_store(struct bus_type *bus, const char *buf, size_t count)
> {
> struct nsim_bus_dev *nsim_bus_dev;
> unsigned int port_count;
> + unsigned int max_vfs;
> unsigned int id;
> int err;
>
> - err = sscanf(buf, "%u %u", &id, &port_count);
> + err = sscanf(buf, "%u %u %u", &id, &port_count, &max_vfs);
> switch (err) {
> case 1:
> port_count = 1;
> /* fall through */
> case 2:
> + max_vfs = NSIM_BUS_DEV_MAX_VFS;
> + /* fall through */
> + case 3:
> if (id > INT_MAX) {
> pr_err("Value of \"id\" is too big.\n");
> return -EINVAL;
Is 0 VFs okay? will kcalloc(0, size, flags) behave correctly?
> @@ -179,7 +184,7 @@ new_device_store(struct bus_type *bus, const char *buf, size_t count)
> pr_err("Format for adding new device is \"id port_count\" (uint uint).\n");
> return -EINVAL;
> }
> - nsim_bus_dev = nsim_bus_dev_new(id, port_count);
> + nsim_bus_dev = nsim_bus_dev_new(id, port_count, max_vfs);
> if (IS_ERR(nsim_bus_dev))
> return PTR_ERR(nsim_bus_dev);
>
> @@ -267,7 +272,8 @@ static struct bus_type nsim_bus = {
> };
>
> static struct nsim_bus_dev *
> -nsim_bus_dev_new(unsigned int id, unsigned int port_count)
> +nsim_bus_dev_new(unsigned int id, unsigned int port_count,
> + unsigned int max_vfs)
> {
> struct nsim_bus_dev *nsim_bus_dev;
> int err;
> @@ -284,12 +290,24 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count)
> nsim_bus_dev->dev.type = &nsim_bus_dev_type;
> nsim_bus_dev->port_count = port_count;
> nsim_bus_dev->initial_net = current->nsproxy->net_ns;
> + nsim_bus_dev->max_vfs = max_vfs;
> +
> + nsim_bus_dev->vfconfigs = kcalloc(nsim_bus_dev->max_vfs,
> + sizeof(struct nsim_vf_config),
> + GFP_KERNEL);
> + if (!nsim_bus_dev->vfconfigs) {
> + err = -ENOMEM;
> + goto err_nsim_bus_dev_id_free;
> + }
>
> err = device_register(&nsim_bus_dev->dev);
> if (err)
> - goto err_nsim_bus_dev_id_free;
> + goto err_nsim_vfconfigs_free;
> +
> return nsim_bus_dev;
>
> +err_nsim_vfconfigs_free:
> + kfree(nsim_bus_dev->vfconfigs);
> err_nsim_bus_dev_id_free:
> ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
> err_nsim_bus_dev_free:
> @@ -301,6 +319,7 @@ static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev)
> {
> device_unregister(&nsim_bus_dev->dev);
> ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
> + kfree(nsim_bus_dev->vfconfigs);
> kfree(nsim_bus_dev);
> }
>
> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
> index 94df795ef4d3..e2049856add8 100644
> --- a/drivers/net/netdevsim/netdevsim.h
> +++ b/drivers/net/netdevsim/netdevsim.h
> @@ -238,6 +238,7 @@ struct nsim_bus_dev {
> struct net *initial_net; /* Purpose of this is to carry net pointer
> * during the probe time only.
> */
> + unsigned int max_vfs;
> unsigned int num_vfs;
> struct nsim_vf_config *vfconfigs;
> };
> -----Original Message-----
> From: Jakub Kicinski <[email protected]>
> Sent: Wednesday, December 11, 2019 9:59 AM
> To: Yuval Avnery <[email protected]>
> Cc: Jiri Pirko <[email protected]>; [email protected];
> [email protected]; [email protected]
> Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
>
> On Wed, 11 Dec 2019 04:58:53 +0200, Yuval Avnery wrote:
> > Currently there is no limit to the number of VFs netdevsim can enable.
> > In a real systems this value exist and used by driver.
> > Fore example, Some features might need to consider this value when
> > allocating memory.
>
> Thanks for the patch!
>
> Can you shed a little bit more light on where it pops up? Just for my curiosity?
Yes, like we described in the subdev threads.
User should be able to configure some attributes before the VF was enabled.
So all those (persistent) VF attributes should be available for query and configuration
before VF was enabled.
The driver can allocate an array according to max_vfs to hold all that data,
like we do here in" vfconfigs".
>
> > Signed-off-by: Yuval Avnery <[email protected]>
> > Acked-by: Jiri Pirko <[email protected]>
>
> > diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
> > index 6aeed0c600f8..f1a0171080cb 100644
> > --- a/drivers/net/netdevsim/bus.c
> > +++ b/drivers/net/netdevsim/bus.c
> > @@ -26,9 +26,9 @@ static struct nsim_bus_dev *to_nsim_bus_dev(struct
> > device *dev) static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev
> *nsim_bus_dev,
> > unsigned int num_vfs)
> > {
> > - nsim_bus_dev->vfconfigs = kcalloc(num_vfs,
> > - sizeof(struct nsim_vf_config),
> > - GFP_KERNEL);
>
> You're changing the semantics of the enable/disable as well now.
> The old values used to be wiped when SR-IOV is disabled, now they will be
> retained across disable/enable pair.
>
> I think it'd be better if that wasn't the case. Users may expect a system to be
> in the same state after they enable SR-IOV, regardless if someone else used
> SR-IOV since last reboot.
Right,
But some values should retain across enable/disable, for example MAC address which is persistent.
So maybe we need to retain some values, while resetting others on disable?
Would that work?
>
> Could you add a memset(,0,) here?
>
> > + if (nsim_bus_dev->max_vfs < num_vfs)
> > + return -ENOMEM;
> > +
> > if (!nsim_bus_dev->vfconfigs)
> > return -ENOMEM;
>
> This check seems useless now, no? We will always have vfconfigs
>
> > nsim_bus_dev->num_vfs = num_vfs;
> > @@ -38,8 +38,6 @@ static int nsim_bus_dev_vfs_enable(struct
> > nsim_bus_dev *nsim_bus_dev,
> >
> > static void nsim_bus_dev_vfs_disable(struct nsim_bus_dev
> > *nsim_bus_dev) {
> > - kfree(nsim_bus_dev->vfconfigs);
> > - nsim_bus_dev->vfconfigs = NULL;
> > nsim_bus_dev->num_vfs = 0;
> > }
> >
> > @@ -154,22 +152,29 @@ static struct device_type nsim_bus_dev_type = {
> > };
> >
> > static struct nsim_bus_dev *
> > -nsim_bus_dev_new(unsigned int id, unsigned int port_count);
> > +nsim_bus_dev_new(unsigned int id, unsigned int port_count,
> > + unsigned int max_vfs);
> > +
> > +#define NSIM_BUS_DEV_MAX_VFS 4
> >
> > static ssize_t
> > new_device_store(struct bus_type *bus, const char *buf, size_t count)
> > {
> > struct nsim_bus_dev *nsim_bus_dev;
> > unsigned int port_count;
> > + unsigned int max_vfs;
> > unsigned int id;
> > int err;
> >
> > - err = sscanf(buf, "%u %u", &id, &port_count);
> > + err = sscanf(buf, "%u %u %u", &id, &port_count, &max_vfs);
> > switch (err) {
> > case 1:
> > port_count = 1;
> > /* fall through */
> > case 2:
> > + max_vfs = NSIM_BUS_DEV_MAX_VFS;
> > + /* fall through */
> > + case 3:
> > if (id > INT_MAX) {
> > pr_err("Value of \"id\" is too big.\n");
> > return -EINVAL;
>
> Is 0 VFs okay? will kcalloc(0, size, flags) behave correctly?
Right, I will fix.
>
> > @@ -179,7 +184,7 @@ new_device_store(struct bus_type *bus, const char
> *buf, size_t count)
> > pr_err("Format for adding new device is \"id port_count\"
> (uint uint).\n");
> > return -EINVAL;
> > }
> > - nsim_bus_dev = nsim_bus_dev_new(id, port_count);
> > + nsim_bus_dev = nsim_bus_dev_new(id, port_count, max_vfs);
> > if (IS_ERR(nsim_bus_dev))
> > return PTR_ERR(nsim_bus_dev);
> >
> > @@ -267,7 +272,8 @@ static struct bus_type nsim_bus = { };
> >
> > static struct nsim_bus_dev *
> > -nsim_bus_dev_new(unsigned int id, unsigned int port_count)
> > +nsim_bus_dev_new(unsigned int id, unsigned int port_count,
> > + unsigned int max_vfs)
> > {
> > struct nsim_bus_dev *nsim_bus_dev;
> > int err;
> > @@ -284,12 +290,24 @@ nsim_bus_dev_new(unsigned int id, unsigned int
> port_count)
> > nsim_bus_dev->dev.type = &nsim_bus_dev_type;
> > nsim_bus_dev->port_count = port_count;
> > nsim_bus_dev->initial_net = current->nsproxy->net_ns;
> > + nsim_bus_dev->max_vfs = max_vfs;
> > +
> > + nsim_bus_dev->vfconfigs = kcalloc(nsim_bus_dev->max_vfs,
> > + sizeof(struct nsim_vf_config),
> > + GFP_KERNEL);
> > + if (!nsim_bus_dev->vfconfigs) {
> > + err = -ENOMEM;
> > + goto err_nsim_bus_dev_id_free;
> > + }
> >
> > err = device_register(&nsim_bus_dev->dev);
> > if (err)
> > - goto err_nsim_bus_dev_id_free;
> > + goto err_nsim_vfconfigs_free;
> > +
> > return nsim_bus_dev;
> >
> > +err_nsim_vfconfigs_free:
> > + kfree(nsim_bus_dev->vfconfigs);
> > err_nsim_bus_dev_id_free:
> > ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
> > err_nsim_bus_dev_free:
> > @@ -301,6 +319,7 @@ static void nsim_bus_dev_del(struct nsim_bus_dev
> > *nsim_bus_dev) {
> > device_unregister(&nsim_bus_dev->dev);
> > ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
> > + kfree(nsim_bus_dev->vfconfigs);
> > kfree(nsim_bus_dev);
> > }
> >
> > diff --git a/drivers/net/netdevsim/netdevsim.h
> > b/drivers/net/netdevsim/netdevsim.h
> > index 94df795ef4d3..e2049856add8 100644
> > --- a/drivers/net/netdevsim/netdevsim.h
> > +++ b/drivers/net/netdevsim/netdevsim.h
> > @@ -238,6 +238,7 @@ struct nsim_bus_dev {
> > struct net *initial_net; /* Purpose of this is to carry net pointer
> > * during the probe time only.
> > */
> > + unsigned int max_vfs;
> > unsigned int num_vfs;
> > struct nsim_vf_config *vfconfigs;
> > };
On Wed, 11 Dec 2019 18:19:56 +0000, Yuval Avnery wrote:
> > On Wed, 11 Dec 2019 04:58:53 +0200, Yuval Avnery wrote:
> > > Currently there is no limit to the number of VFs netdevsim can enable.
> > > In a real systems this value exist and used by driver.
> > > Fore example, Some features might need to consider this value when
> > > allocating memory.
> >
> > Thanks for the patch!
> >
> > Can you shed a little bit more light on where it pops up? Just for my curiosity?
>
> Yes, like we described in the subdev threads.
> User should be able to configure some attributes before the VF was enabled.
> So all those (persistent) VF attributes should be available for query and configuration
> before VF was enabled.
> The driver can allocate an array according to max_vfs to hold all that data,
> like we do here in" vfconfigs".
I was after more practical reasoning, are you writing some tests for
subdev stuff that will depend on this change? :)
> > > Signed-off-by: Yuval Avnery <[email protected]>
> > > Acked-by: Jiri Pirko <[email protected]>
> > >
> > > diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
> > > index 6aeed0c600f8..f1a0171080cb 100644
> > > --- a/drivers/net/netdevsim/bus.c
> > > +++ b/drivers/net/netdevsim/bus.c
> > > @@ -26,9 +26,9 @@ static struct nsim_bus_dev *to_nsim_bus_dev(struct
> > > device *dev) static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev
> > *nsim_bus_dev,
> > > unsigned int num_vfs)
> > > {
> > > - nsim_bus_dev->vfconfigs = kcalloc(num_vfs,
> > > - sizeof(struct nsim_vf_config),
> > > - GFP_KERNEL);
> >
> > You're changing the semantics of the enable/disable as well now.
> > The old values used to be wiped when SR-IOV is disabled, now they will be
> > retained across disable/enable pair.
> >
> > I think it'd be better if that wasn't the case. Users may expect a system to be
> > in the same state after they enable SR-IOV, regardless if someone else used
> > SR-IOV since last reboot.
>
> Right,
> But some values should retain across enable/disable, for example MAC address which is persistent.
> So maybe we need to retain some values, while resetting others on disable?
> Would that work?
Mmm. That is a good question. For all practical purposes SR-IOV used
to be local to the host that enables it until Smart/middle box NICs
emerged.
Perhaps the best way forward would be to reset the config that was set
via legacy APIs and keep only the MACs provisioned via persistent
devlink API?
So for now we'd memset, and once devlink API lands reset selectively?
> > Could you add a memset(,0,) here?
> >
> > > + if (nsim_bus_dev->max_vfs < num_vfs)
> > > + return -ENOMEM;
> > > +
> > > if (!nsim_bus_dev->vfconfigs)
> > > return -ENOMEM;
> >
> > This check seems useless now, no? We will always have vfconfigs
> -----Original Message-----
> From: Jakub Kicinski <[email protected]>
> Sent: Wednesday, December 11, 2019 11:16 AM
> To: Yuval Avnery <[email protected]>
> Cc: Jiri Pirko <[email protected]>; [email protected];
> [email protected]; [email protected]
> Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
>
> On Wed, 11 Dec 2019 18:19:56 +0000, Yuval Avnery wrote:
> > > On Wed, 11 Dec 2019 04:58:53 +0200, Yuval Avnery wrote:
> > > > Currently there is no limit to the number of VFs netdevsim can enable.
> > > > In a real systems this value exist and used by driver.
> > > > Fore example, Some features might need to consider this value when
> > > > allocating memory.
> > >
> > > Thanks for the patch!
> > >
> > > Can you shed a little bit more light on where it pops up? Just for my
> curiosity?
> >
> > Yes, like we described in the subdev threads.
> > User should be able to configure some attributes before the VF was
> enabled.
> > So all those (persistent) VF attributes should be available for query
> > and configuration before VF was enabled.
> > The driver can allocate an array according to max_vfs to hold all that
> > data, like we do here in" vfconfigs".
>
> I was after more practical reasoning, are you writing some tests for subdev
> stuff that will depend on this change? :)
Yes we are writing tests for subdev with this.
This is the way mlx5 works.. is that practical enough?
>
> > > > Signed-off-by: Yuval Avnery <[email protected]>
> > > > Acked-by: Jiri Pirko <[email protected]>
> > > >
> > > > diff --git a/drivers/net/netdevsim/bus.c
> > > > b/drivers/net/netdevsim/bus.c index 6aeed0c600f8..f1a0171080cb
> > > > 100644
> > > > --- a/drivers/net/netdevsim/bus.c
> > > > +++ b/drivers/net/netdevsim/bus.c
> > > > @@ -26,9 +26,9 @@ static struct nsim_bus_dev
> > > > *to_nsim_bus_dev(struct device *dev) static int
> > > > nsim_bus_dev_vfs_enable(struct nsim_bus_dev
> > > *nsim_bus_dev,
> > > > unsigned int num_vfs)
> > > > {
> > > > - nsim_bus_dev->vfconfigs = kcalloc(num_vfs,
> > > > - sizeof(struct nsim_vf_config),
> > > > - GFP_KERNEL);
> > >
> > > You're changing the semantics of the enable/disable as well now.
> > > The old values used to be wiped when SR-IOV is disabled, now they
> > > will be retained across disable/enable pair.
> > >
> > > I think it'd be better if that wasn't the case. Users may expect a
> > > system to be in the same state after they enable SR-IOV, regardless
> > > if someone else used SR-IOV since last reboot.
> >
> > Right,
> > But some values should retain across enable/disable, for example MAC
> address which is persistent.
> > So maybe we need to retain some values, while resetting others on
> disable?
> > Would that work?
>
> Mmm. That is a good question. For all practical purposes SR-IOV used to be
> local to the host that enables it until Smart/middle box NICs emerged.
>
> Perhaps the best way forward would be to reset the config that was set via
> legacy APIs and keep only the MACs provisioned via persistent devlink API?
>
> So for now we'd memset, and once devlink API lands reset selectively?
Legacy is also persistent.
Currently when you set mac address with "ip link vf set mac" it is persistent (at least in mlx5).
But ip link only exposes enabled VFS, so driver on VF has to reload to acquire this MAC.
With devlink subdev it will be possible to set the MAC before VF was enabled.
I think we need to distinguish here between:
- PF sets MAC to a VF - persistent.
- VF sets MAC to itself - not persistent.
But is the second case relevant in netdevsim?
>
> > > Could you add a memset(,0,) here?
> > >
> > > > + if (nsim_bus_dev->max_vfs < num_vfs)
> > > > + return -ENOMEM;
> > > > +
> > > > if (!nsim_bus_dev->vfconfigs)
> > > > return -ENOMEM;
> > >
> > > This check seems useless now, no? We will always have vfconfigs
On Wed, 11 Dec 2019 19:57:34 +0000, Yuval Avnery wrote:
> > -----Original Message-----
> > From: Jakub Kicinski <[email protected]>
> > Sent: Wednesday, December 11, 2019 11:16 AM
> > To: Yuval Avnery <[email protected]>
> > Cc: Jiri Pirko <[email protected]>; [email protected];
> > [email protected]; [email protected]
> > Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
> >
> > On Wed, 11 Dec 2019 18:19:56 +0000, Yuval Avnery wrote:
> > > > On Wed, 11 Dec 2019 04:58:53 +0200, Yuval Avnery wrote:
> > > > > Currently there is no limit to the number of VFs netdevsim can enable.
> > > > > In a real systems this value exist and used by driver.
> > > > > Fore example, Some features might need to consider this value when
> > > > > allocating memory.
> > > >
> > > > Thanks for the patch!
> > > >
> > > > Can you shed a little bit more light on where it pops up? Just for my
> > curiosity?
> > >
> > > Yes, like we described in the subdev threads.
> > > User should be able to configure some attributes before the VF was
> > enabled.
> > > So all those (persistent) VF attributes should be available for query
> > > and configuration before VF was enabled.
> > > The driver can allocate an array according to max_vfs to hold all that
> > > data, like we do here in" vfconfigs".
> >
> > I was after more practical reasoning, are you writing some tests for subdev
> > stuff that will depend on this change? :)
>
> Yes we are writing tests for subdev with this.
Okay, please post v2 together with the tests. We don't accept netdevsim
features without tests any more.
> This is the way mlx5 works.. is that practical enough?
>
> > > > > Signed-off-by: Yuval Avnery <[email protected]>
> > > > > Acked-by: Jiri Pirko <[email protected]>
> > > > >
> > > > > diff --git a/drivers/net/netdevsim/bus.c
> > > > > b/drivers/net/netdevsim/bus.c index 6aeed0c600f8..f1a0171080cb
> > > > > 100644
> > > > > --- a/drivers/net/netdevsim/bus.c
> > > > > +++ b/drivers/net/netdevsim/bus.c
> > > > > @@ -26,9 +26,9 @@ static struct nsim_bus_dev
> > > > > *to_nsim_bus_dev(struct device *dev) static int
> > > > > nsim_bus_dev_vfs_enable(struct nsim_bus_dev
> > > > *nsim_bus_dev,
> > > > > unsigned int num_vfs)
> > > > > {
> > > > > - nsim_bus_dev->vfconfigs = kcalloc(num_vfs,
> > > > > - sizeof(struct nsim_vf_config),
> > > > > - GFP_KERNEL);
> > > >
> > > > You're changing the semantics of the enable/disable as well now.
> > > > The old values used to be wiped when SR-IOV is disabled, now they
> > > > will be retained across disable/enable pair.
> > > >
> > > > I think it'd be better if that wasn't the case. Users may expect a
> > > > system to be in the same state after they enable SR-IOV, regardless
> > > > if someone else used SR-IOV since last reboot.
> > >
> > > Right,
> > > But some values should retain across enable/disable, for example MAC
> > address which is persistent.
> > > So maybe we need to retain some values, while resetting others on
> > disable?
> > > Would that work?
> >
> > Mmm. That is a good question. For all practical purposes SR-IOV used to be
> > local to the host that enables it until Smart/middle box NICs emerged.
> >
> > Perhaps the best way forward would be to reset the config that was set via
> > legacy APIs and keep only the MACs provisioned via persistent devlink API?
> >
> > So for now we'd memset, and once devlink API lands reset selectively?
>
> Legacy is also persistent.
> Currently when you set mac address with "ip link vf set mac" it is persistent (at least in mlx5).
"Currently in mlx5" - maybe, but this is netdevsim. Currently it clears
the config on re-enable which I believe to be preferable as explained
before.
> But ip link only exposes enabled VFS, so driver on VF has to reload to acquire this MAC.
> With devlink subdev it will be possible to set the MAC before VF was enabled.
Yup, sure. As I said, once subdev is implemented, we will treat the
addresses set by it differently. Those are inherently persistent or
rather their life time is independent of just the SR-IOV host.
> I think we need to distinguish here between:
> - PF sets MAC to a VF - persistent.
> - VF sets MAC to itself - not persistent.
>
> But is the second case relevant in netdevsim?
Not sure where you're going with this. Second case, i.e. if VF sets its
MAC, is not exposed in the hypervisor. I think iproute2 should still
list the MAC it provisioned, or 00:00.. if unset.
The two cases I'm differentiating is reset behaviour for addresses set
via PF vs via devlink.
> > > > Could you add a memset(,0,) here?
> > > >
> > > > > + if (nsim_bus_dev->max_vfs < num_vfs)
> > > > > + return -ENOMEM;
> > > > > +
> > > > > if (!nsim_bus_dev->vfconfigs)
> > > > > return -ENOMEM;
> > > >
> > > > This check seems useless now, no? We will always have vfconfigs
> -----Original Message-----
> From: Jakub Kicinski <[email protected]>
> Sent: Wednesday, December 11, 2019 2:24 PM
> To: Yuval Avnery <[email protected]>
> Cc: Jiri Pirko <[email protected]>; [email protected];
> [email protected]; [email protected]
> Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
>
> On Wed, 11 Dec 2019 19:57:34 +0000, Yuval Avnery wrote:
> > > -----Original Message-----
> > > From: Jakub Kicinski <[email protected]>
> > > Sent: Wednesday, December 11, 2019 11:16 AM
> > > To: Yuval Avnery <[email protected]>
> > > Cc: Jiri Pirko <[email protected]>; [email protected];
> > > [email protected]; [email protected]
> > > Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
> > >
> > > On Wed, 11 Dec 2019 18:19:56 +0000, Yuval Avnery wrote:
> > > > > On Wed, 11 Dec 2019 04:58:53 +0200, Yuval Avnery wrote:
> > > > > > Currently there is no limit to the number of VFs netdevsim can
> enable.
> > > > > > In a real systems this value exist and used by driver.
> > > > > > Fore example, Some features might need to consider this value
> > > > > > when allocating memory.
> > > > >
> > > > > Thanks for the patch!
> > > > >
> > > > > Can you shed a little bit more light on where it pops up? Just
> > > > > for my
> > > curiosity?
> > > >
> > > > Yes, like we described in the subdev threads.
> > > > User should be able to configure some attributes before the VF was
> > > enabled.
> > > > So all those (persistent) VF attributes should be available for
> > > > query and configuration before VF was enabled.
> > > > The driver can allocate an array according to max_vfs to hold all
> > > > that data, like we do here in" vfconfigs".
> > >
> > > I was after more practical reasoning, are you writing some tests for
> > > subdev stuff that will depend on this change? :)
> >
> > Yes we are writing tests for subdev with this.
>
> Okay, please post v2 together with the tests. We don't accept netdevsim
> features without tests any more.
I think the only test I can currently write is the enable SR-IOV max_vfs enforcement.
Because subdev is not in yet.
Will that be good enough?
>
> > This is the way mlx5 works.. is that practical enough?
> >
> > > > > > Signed-off-by: Yuval Avnery <[email protected]>
> > > > > > Acked-by: Jiri Pirko <[email protected]>
> > > > > >
> > > > > > diff --git a/drivers/net/netdevsim/bus.c
> > > > > > b/drivers/net/netdevsim/bus.c index 6aeed0c600f8..f1a0171080cb
> > > > > > 100644
> > > > > > --- a/drivers/net/netdevsim/bus.c
> > > > > > +++ b/drivers/net/netdevsim/bus.c
> > > > > > @@ -26,9 +26,9 @@ static struct nsim_bus_dev
> > > > > > *to_nsim_bus_dev(struct device *dev) static int
> > > > > > nsim_bus_dev_vfs_enable(struct nsim_bus_dev
> > > > > *nsim_bus_dev,
> > > > > > unsigned int num_vfs)
> > > > > > {
> > > > > > - nsim_bus_dev->vfconfigs = kcalloc(num_vfs,
> > > > > > - sizeof(struct
> nsim_vf_config),
> > > > > > - GFP_KERNEL);
> > > > >
> > > > > You're changing the semantics of the enable/disable as well now.
> > > > > The old values used to be wiped when SR-IOV is disabled, now
> > > > > they will be retained across disable/enable pair.
> > > > >
> > > > > I think it'd be better if that wasn't the case. Users may expect
> > > > > a system to be in the same state after they enable SR-IOV,
> > > > > regardless if someone else used SR-IOV since last reboot.
> > > >
> > > > Right,
> > > > But some values should retain across enable/disable, for example
> > > > MAC
> > > address which is persistent.
> > > > So maybe we need to retain some values, while resetting others on
> > > disable?
> > > > Would that work?
> > >
> > > Mmm. That is a good question. For all practical purposes SR-IOV used
> > > to be local to the host that enables it until Smart/middle box NICs
> emerged.
> > >
> > > Perhaps the best way forward would be to reset the config that was
> > > set via legacy APIs and keep only the MACs provisioned via persistent
> devlink API?
> > >
> > > So for now we'd memset, and once devlink API lands reset selectively?
> >
> > Legacy is also persistent.
> > Currently when you set mac address with "ip link vf set mac" it is persistent
> (at least in mlx5).
>
> "Currently in mlx5" - maybe, but this is netdevsim. Currently it clears the
> config on re-enable which I believe to be preferable as explained before.
>
> > But ip link only exposes enabled VFS, so driver on VF has to reload to
> acquire this MAC.
> > With devlink subdev it will be possible to set the MAC before VF was
> enabled.
>
> Yup, sure. As I said, once subdev is implemented, we will treat the addresses
> set by it differently. Those are inherently persistent or rather their life time is
> independent of just the SR-IOV host.
Ok, got it.
I am just wondering how this works when you have "ip link" and devlink setting the MAC independently.
Will they show the same MAC?
Or ip link will show the non-persistent MAC And devlink the persistent?
>
> > I think we need to distinguish here between:
> > - PF sets MAC to a VF - persistent.
> > - VF sets MAC to itself - not persistent.
> >
> > But is the second case relevant in netdevsim?
>
> Not sure where you're going with this. Second case, i.e. if VF sets its MAC, is
> not exposed in the hypervisor. I think iproute2 should still list the MAC it
> provisioned, or 00:00.. if unset.
Yes, these are two different unrelated MACs.
>
> The two cases I'm differentiating is reset behaviour for addresses set via PF
> vs via devlink.
>
> > > > > Could you add a memset(,0,) here?
> > > > >
> > > > > > + if (nsim_bus_dev->max_vfs < num_vfs)
> > > > > > + return -ENOMEM;
> > > > > > +
> > > > > > if (!nsim_bus_dev->vfconfigs)
> > > > > > return -ENOMEM;
> > > > >
> > > > > This check seems useless now, no? We will always have vfconfigs
On Wed, 11 Dec 2019 23:25:09 +0000, Yuval Avnery wrote:
> > -----Original Message-----
> > From: Jakub Kicinski <[email protected]>
> > Sent: Wednesday, December 11, 2019 2:24 PM
> > To: Yuval Avnery <[email protected]>
> > Cc: Jiri Pirko <[email protected]>; [email protected];
> > [email protected]; [email protected]
> > Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
> >
> > On Wed, 11 Dec 2019 19:57:34 +0000, Yuval Avnery wrote:
> > > > -----Original Message-----
> > > > From: Jakub Kicinski <[email protected]>
> > > > Sent: Wednesday, December 11, 2019 11:16 AM
> > > > To: Yuval Avnery <[email protected]>
> > > > Cc: Jiri Pirko <[email protected]>; [email protected];
> > > > [email protected]; [email protected]
> > > > Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
> > > >
> > > > On Wed, 11 Dec 2019 18:19:56 +0000, Yuval Avnery wrote:
> > > > > > On Wed, 11 Dec 2019 04:58:53 +0200, Yuval Avnery wrote:
> > > > > > > Currently there is no limit to the number of VFs netdevsim can enable.
> > > > > > > In a real systems this value exist and used by driver.
> > > > > > > Fore example, Some features might need to consider this value
> > > > > > > when allocating memory.
> > > > > >
> > > > > > Thanks for the patch!
> > > > > >
> > > > > > Can you shed a little bit more light on where it pops up? Just
> > > > > > for my
> > > > curiosity?
> > > > >
> > > > > Yes, like we described in the subdev threads.
> > > > > User should be able to configure some attributes before the VF was
> > > > enabled.
> > > > > So all those (persistent) VF attributes should be available for
> > > > > query and configuration before VF was enabled.
> > > > > The driver can allocate an array according to max_vfs to hold all
> > > > > that data, like we do here in" vfconfigs".
> > > >
> > > > I was after more practical reasoning, are you writing some tests for
> > > > subdev stuff that will depend on this change? :)
> > >
> > > Yes we are writing tests for subdev with this.
> >
> > Okay, please post v2 together with the tests. We don't accept netdevsim
> > features without tests any more.
>
> I think the only test I can currently write is the enable SR-IOV max_vfs enforcement.
> Because subdev is not in yet.
> Will that be good enough?
It'd be good to test some netdev API rather than just the enforcement
itself which is entirely in netdevsim, I think.
So max_vfs enforcement plus checking that ip link lists the correct
number of entries (and perhaps the entries are in reset state after
enable) would do IMO.
> > > This is the way mlx5 works.. is that practical enough?
> > >
> > > > > > > Signed-off-by: Yuval Avnery <[email protected]>
> > > > > > > Acked-by: Jiri Pirko <[email protected]>
> > > > > > >
> > > > > > > diff --git a/drivers/net/netdevsim/bus.c
> > > > > > > b/drivers/net/netdevsim/bus.c index 6aeed0c600f8..f1a0171080cb
> > > > > > > 100644
> > > > > > > --- a/drivers/net/netdevsim/bus.c
> > > > > > > +++ b/drivers/net/netdevsim/bus.c
> > > > > > > @@ -26,9 +26,9 @@ static struct nsim_bus_dev
> > > > > > > *to_nsim_bus_dev(struct device *dev) static int
> > > > > > > nsim_bus_dev_vfs_enable(struct nsim_bus_dev
> > > > > > *nsim_bus_dev,
> > > > > > > unsigned int num_vfs)
> > > > > > > {
> > > > > > > - nsim_bus_dev->vfconfigs = kcalloc(num_vfs,
> > > > > > > - sizeof(struct
> > nsim_vf_config),
> > > > > > > - GFP_KERNEL);
> > > > > >
> > > > > > You're changing the semantics of the enable/disable as well now.
> > > > > > The old values used to be wiped when SR-IOV is disabled, now
> > > > > > they will be retained across disable/enable pair.
> > > > > >
> > > > > > I think it'd be better if that wasn't the case. Users may expect
> > > > > > a system to be in the same state after they enable SR-IOV,
> > > > > > regardless if someone else used SR-IOV since last reboot.
> > > > >
> > > > > Right,
> > > > > But some values should retain across enable/disable, for example
> > > > > MAC
> > > > address which is persistent.
> > > > > So maybe we need to retain some values, while resetting others on
> > > > disable?
> > > > > Would that work?
> > > >
> > > > Mmm. That is a good question. For all practical purposes SR-IOV used
> > > > to be local to the host that enables it until Smart/middle box NICs
> > emerged.
> > > >
> > > > Perhaps the best way forward would be to reset the config that was
> > > > set via legacy APIs and keep only the MACs provisioned via persistent
> > devlink API?
> > > >
> > > > So for now we'd memset, and once devlink API lands reset selectively?
> > >
> > > Legacy is also persistent.
> > > Currently when you set mac address with "ip link vf set mac" it is persistent
> > (at least in mlx5).
> >
> > "Currently in mlx5" - maybe, but this is netdevsim. Currently it clears the
> > config on re-enable which I believe to be preferable as explained before.
> >
> > > But ip link only exposes enabled VFS, so driver on VF has to reload to
> > acquire this MAC.
> > > With devlink subdev it will be possible to set the MAC before VF was
> > enabled.
> >
> > Yup, sure. As I said, once subdev is implemented, we will treat the addresses
> > set by it differently. Those are inherently persistent or rather their life time is
> > independent of just the SR-IOV host.
>
> Ok, got it.
> I am just wondering how this works when you have "ip link" and devlink setting the MAC independently.
> Will they show the same MAC?
> Or ip link will show the non-persistent MAC And devlink the persistent?
My knee jerk reaction is that we should populate the values to those set
via devlink upon SR-IOV enable, but then if user overwrites those values
that's their problem.
Sort of mirror how VF MAC addrs work, just a level deeper. The VF
defaults to the MAC addr provided by the PF after reset, but it can
change it to something else (things may stop working because spoof
check etc. will drop all its frames, but nothing stops the VF in legacy
HW from writing its MAC addr register).
IOW the devlink addr is the default/provisioned addr, not necessarily
the addr the PF has set _now_.
Other options I guess are (a) reject the changes of the address from
the PF once devlink has set a value; (b) provide some device->control
CPU notifier which can ack/reject a request from the PF to change
devlink's value..?
You guys posted the devlink patches a while ago, what was your
implementation doing?
> -----Original Message-----
> From: Jakub Kicinski <[email protected]>
> Sent: Wednesday, December 11, 2019 3:50 PM
> To: Yuval Avnery <[email protected]>
> Cc: Jiri Pirko <[email protected]>; [email protected];
> [email protected]; [email protected]
> Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
>
> On Wed, 11 Dec 2019 23:25:09 +0000, Yuval Avnery wrote:
> > > -----Original Message-----
> > > From: Jakub Kicinski <[email protected]>
> > > Sent: Wednesday, December 11, 2019 2:24 PM
> > > To: Yuval Avnery <[email protected]>
> > > Cc: Jiri Pirko <[email protected]>; [email protected];
> > > [email protected]; [email protected]
> > > Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
> > >
> > > On Wed, 11 Dec 2019 19:57:34 +0000, Yuval Avnery wrote:
> > > > > -----Original Message-----
> > > > > From: Jakub Kicinski <[email protected]>
> > > > > Sent: Wednesday, December 11, 2019 11:16 AM
> > > > > To: Yuval Avnery <[email protected]>
> > > > > Cc: Jiri Pirko <[email protected]>; [email protected];
> > > > > [email protected]; [email protected]
> > > > > Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
> > > > >
> > > > > On Wed, 11 Dec 2019 18:19:56 +0000, Yuval Avnery wrote:
> > > > > > > On Wed, 11 Dec 2019 04:58:53 +0200, Yuval Avnery wrote:
> > > > > > > > Currently there is no limit to the number of VFs netdevsim can
> enable.
> > > > > > > > In a real systems this value exist and used by driver.
> > > > > > > > Fore example, Some features might need to consider this
> > > > > > > > value when allocating memory.
> > > > > > >
> > > > > > > Thanks for the patch!
> > > > > > >
> > > > > > > Can you shed a little bit more light on where it pops up?
> > > > > > > Just for my
> > > > > curiosity?
> > > > > >
> > > > > > Yes, like we described in the subdev threads.
> > > > > > User should be able to configure some attributes before the VF
> > > > > > was
> > > > > enabled.
> > > > > > So all those (persistent) VF attributes should be available
> > > > > > for query and configuration before VF was enabled.
> > > > > > The driver can allocate an array according to max_vfs to hold
> > > > > > all that data, like we do here in" vfconfigs".
> > > > >
> > > > > I was after more practical reasoning, are you writing some tests
> > > > > for subdev stuff that will depend on this change? :)
> > > >
> > > > Yes we are writing tests for subdev with this.
> > >
> > > Okay, please post v2 together with the tests. We don't accept
> > > netdevsim features without tests any more.
> >
> > I think the only test I can currently write is the enable SR-IOV max_vfs
> enforcement.
> > Because subdev is not in yet.
> > Will that be good enough?
>
> It'd be good to test some netdev API rather than just the enforcement itself
> which is entirely in netdevsim, I think.
>
> So max_vfs enforcement plus checking that ip link lists the correct number of
> entries (and perhaps the entries are in reset state after
> enable) would do IMO.
Ok, but this is possible regardless of my patch (to enable vfs).
>
> > > > This is the way mlx5 works.. is that practical enough?
> > > >
> > > > > > > > Signed-off-by: Yuval Avnery <[email protected]>
> > > > > > > > Acked-by: Jiri Pirko <[email protected]>
> > > > > > > >
> > > > > > > > diff --git a/drivers/net/netdevsim/bus.c
> > > > > > > > b/drivers/net/netdevsim/bus.c index
> > > > > > > > 6aeed0c600f8..f1a0171080cb
> > > > > > > > 100644
> > > > > > > > --- a/drivers/net/netdevsim/bus.c
> > > > > > > > +++ b/drivers/net/netdevsim/bus.c
> > > > > > > > @@ -26,9 +26,9 @@ static struct nsim_bus_dev
> > > > > > > > *to_nsim_bus_dev(struct device *dev) static int
> > > > > > > > nsim_bus_dev_vfs_enable(struct nsim_bus_dev
> > > > > > > *nsim_bus_dev,
> > > > > > > > unsigned int num_vfs) {
> > > > > > > > - nsim_bus_dev->vfconfigs = kcalloc(num_vfs,
> > > > > > > > - sizeof(struct
> > > nsim_vf_config),
> > > > > > > > - GFP_KERNEL);
> > > > > > >
> > > > > > > You're changing the semantics of the enable/disable as well now.
> > > > > > > The old values used to be wiped when SR-IOV is disabled, now
> > > > > > > they will be retained across disable/enable pair.
> > > > > > >
> > > > > > > I think it'd be better if that wasn't the case. Users may
> > > > > > > expect a system to be in the same state after they enable
> > > > > > > SR-IOV, regardless if someone else used SR-IOV since last reboot.
> > > > > >
> > > > > > Right,
> > > > > > But some values should retain across enable/disable, for
> > > > > > example MAC
> > > > > address which is persistent.
> > > > > > So maybe we need to retain some values, while resetting others
> > > > > > on
> > > > > disable?
> > > > > > Would that work?
> > > > >
> > > > > Mmm. That is a good question. For all practical purposes SR-IOV
> > > > > used to be local to the host that enables it until Smart/middle
> > > > > box NICs
> > > emerged.
> > > > >
> > > > > Perhaps the best way forward would be to reset the config that
> > > > > was set via legacy APIs and keep only the MACs provisioned via
> > > > > persistent
> > > devlink API?
> > > > >
> > > > > So for now we'd memset, and once devlink API lands reset
> selectively?
> > > >
> > > > Legacy is also persistent.
> > > > Currently when you set mac address with "ip link vf set mac" it is
> > > > persistent
> > > (at least in mlx5).
> > >
> > > "Currently in mlx5" - maybe, but this is netdevsim. Currently it
> > > clears the config on re-enable which I believe to be preferable as
> explained before.
> > >
> > > > But ip link only exposes enabled VFS, so driver on VF has to
> > > > reload to
> > > acquire this MAC.
> > > > With devlink subdev it will be possible to set the MAC before VF
> > > > was
> > > enabled.
> > >
> > > Yup, sure. As I said, once subdev is implemented, we will treat the
> > > addresses set by it differently. Those are inherently persistent or
> > > rather their life time is independent of just the SR-IOV host.
> >
> > Ok, got it.
> > I am just wondering how this works when you have "ip link" and devlink
> setting the MAC independently.
> > Will they show the same MAC?
> > Or ip link will show the non-persistent MAC And devlink the persistent?
>
> My knee jerk reaction is that we should populate the values to those set via
> devlink upon SR-IOV enable, but then if user overwrites those values that's
> their problem.
>
> Sort of mirror how VF MAC addrs work, just a level deeper. The VF defaults
> to the MAC addr provided by the PF after reset, but it can change it to
> something else (things may stop working because spoof check etc. will drop
> all its frames, but nothing stops the VF in legacy HW from writing its MAC
> addr register).
>
> IOW the devlink addr is the default/provisioned addr, not necessarily the
> addr the PF has set _now_.
>
> Other options I guess are (a) reject the changes of the address from the PF
> once devlink has set a value; (b) provide some device->control CPU notifier
> which can ack/reject a request from the PF to change devlink's value..?
>
> You guys posted the devlink patches a while ago, what was your
> implementation doing?
devlink simply calls the driver with set or get.
It is up to the vendor driver/HW if to make this address persistent or not.
The address is not saved in the devlink layer.
The MAC address in mlx5 is stored in the HW and
persistent (until PF reset) , whether it is set by devlink or ip link.
So from what I understand, we have the freedom to choose how netdevsim
behave in this case, which means non-persistent is ok.
On Thu, 12 Dec 2019 05:11:12 +0000, Yuval Avnery wrote:
> > > > Okay, please post v2 together with the tests. We don't accept
> > > > netdevsim features without tests any more.
> > >
> > > I think the only test I can currently write is the enable SR-IOV max_vfs
> > > enforcement. Because subdev is not in yet.
> > > Will that be good enough?
> >
> > It'd be good to test some netdev API rather than just the enforcement itself
> > which is entirely in netdevsim, I think.
> >
> > So max_vfs enforcement plus checking that ip link lists the correct number of
> > entries (and perhaps the entries are in reset state after
> > enable) would do IMO.
>
> Ok, but this is possible regardless of my patch (to enable vfs).
I was being lenient :) Your patch is only really needed when the
devlink API lands, since devlink will display all max VFs not enabled.
> > My knee jerk reaction is that we should populate the values to those set via
> > devlink upon SR-IOV enable, but then if user overwrites those values that's
> > their problem.
> >
> > Sort of mirror how VF MAC addrs work, just a level deeper. The VF defaults
> > to the MAC addr provided by the PF after reset, but it can change it to
> > something else (things may stop working because spoof check etc. will drop
> > all its frames, but nothing stops the VF in legacy HW from writing its MAC
> > addr register).
> >
> > IOW the devlink addr is the default/provisioned addr, not necessarily the
> > addr the PF has set _now_.
> >
> > Other options I guess are (a) reject the changes of the address from the PF
> > once devlink has set a value; (b) provide some device->control CPU notifier
> > which can ack/reject a request from the PF to change devlink's value..?
> >
> > You guys posted the devlink patches a while ago, what was your
> > implementation doing?
>
> devlink simply calls the driver with set or get.
> It is up to the vendor driver/HW if to make this address persistent or not.
> The address is not saved in the devlink layer.
It'd be preferable for the behaviour of the kernel API to not be vendor
specific. That defeats the purpose of having an operating system as a
HW abstraction layer. SR-IOV devices of today are so FW heavy we can
make them behave whatever way we choose makes most sense.
> The MAC address in mlx5 is stored in the HW and
> persistent (until PF reset) , whether it is set by devlink or ip link.
Okay, let's see if I understand. The devlink and ip link interfaces
basically do the same thing but one reaches from control CPU and the
other one from the SR-IOV host? And on SR-IOV host reset the addresses
go back to 00:00.. i.e. any?
What happens if the SR-IOV host changes the MAC? Is it used by HW or is
the MAC provisioned by the control CPU used for things like spoof check?
Does the control CPU get a notification for SR-IOV host reset? In that
case the control CPU driver could restore the MAC addr.
> So from what I understand, we have the freedom to choose how netdevsim
> behave in this case, which means non-persistent is ok.
To be clear - by persistent I meant that it survives the SR-IOV host's
resets, not necessarily written to NVRAM of any sort.
I'd like to see netdevsim to also serve as sort of a reference model
for device behaviour. Vendors who are not first to implement a feature
always complain that there is no documentation on how things should
work.
On Thu, Dec 12, 2019 at 10:25:17AM -0800, Jakub Kicinski wrote:
> I'd like to see netdevsim to also serve as sort of a reference model
> for device behaviour. Vendors who are not first to implement a feature
> always complain that there is no documentation on how things should
> work.
+1
I have a patch set that adds FIB offload implementation to netdevsim and
a gazillion of test cases that I share between netdevsim and mlxsw. Can
be used by more drivers when they land.
It's also very convenient for fuzzing now that syzkaller supports
netdevsim instances thanks to Jiri. I've been running syzkaller for a
few weeks now to test the FIB implementation.
> -----Original Message-----
> From: Jakub Kicinski <[email protected]>
> Sent: Thursday, December 12, 2019 10:25 AM
> To: Yuval Avnery <[email protected]>
> Cc: Jiri Pirko <[email protected]>; [email protected];
> [email protected]; [email protected]; Andy Gospodarek
> <[email protected]>
> Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
>
> On Thu, 12 Dec 2019 05:11:12 +0000, Yuval Avnery wrote:
> > > > > Okay, please post v2 together with the tests. We don't accept
> > > > > netdevsim features without tests any more.
> > > >
> > > > I think the only test I can currently write is the enable SR-IOV
> > > > max_vfs enforcement. Because subdev is not in yet.
> > > > Will that be good enough?
> > >
> > > It'd be good to test some netdev API rather than just the
> > > enforcement itself which is entirely in netdevsim, I think.
> > >
> > > So max_vfs enforcement plus checking that ip link lists the correct
> > > number of entries (and perhaps the entries are in reset state after
> > > enable) would do IMO.
> >
> > Ok, but this is possible regardless of my patch (to enable vfs).
>
> I was being lenient :) Your patch is only really needed when the devlink API
> lands, since devlink will display all max VFs not enabled.
>
> > > My knee jerk reaction is that we should populate the values to those
> > > set via devlink upon SR-IOV enable, but then if user overwrites
> > > those values that's their problem.
> > >
> > > Sort of mirror how VF MAC addrs work, just a level deeper. The VF
> > > defaults to the MAC addr provided by the PF after reset, but it can
> > > change it to something else (things may stop working because spoof
> > > check etc. will drop all its frames, but nothing stops the VF in
> > > legacy HW from writing its MAC addr register).
> > >
> > > IOW the devlink addr is the default/provisioned addr, not
> > > necessarily the addr the PF has set _now_.
> > >
> > > Other options I guess are (a) reject the changes of the address from
> > > the PF once devlink has set a value; (b) provide some
> > > device->control CPU notifier which can ack/reject a request from the PF
> to change devlink's value..?
> > >
> > > You guys posted the devlink patches a while ago, what was your
> > > implementation doing?
> >
> > devlink simply calls the driver with set or get.
> > It is up to the vendor driver/HW if to make this address persistent or not.
> > The address is not saved in the devlink layer.
>
> It'd be preferable for the behaviour of the kernel API to not be vendor
> specific. That defeats the purpose of having an operating system as a HW
> abstraction layer. SR-IOV devices of today are so FW heavy we can make
> them behave whatever way we choose makes most sense.
>
> > The MAC address in mlx5 is stored in the HW and persistent (until PF
> > reset) , whether it is set by devlink or ip link.
>
> Okay, let's see if I understand. The devlink and ip link interfaces basically do
> the same thing but one reaches from control CPU and the other one from
> the SR-IOV host? And on SR-IOV host reset the addresses go back to 00:00..
> i.e. any?
No,
This will work only in non-SmartNic mode, when e-switch manager is on the host,
MAC will be accessible through devlink and legacy tools..
For smartnic, only devlink from the embedded OS will work. Ip link from the host will not work.
>
> What happens if the SR-IOV host changes the MAC? Is it used by HW or is the
> MAC provisioned by the control CPU used for things like spoof check?
Host shouldn't have privileges to do it.
If it does, then it's under the host ownership (like in non-smartnic mode).
>
> Does the control CPU get a notification for SR-IOV host reset? In that case
> the control CPU driver could restore the MAC addr.
Yes, but this is irrelevant here, the MAC is already stored in HW/FW.
The MAC will reset only when the E-switch manager (on the control CPU) reset.
>
> > So from what I understand, we have the freedom to choose how
> netdevsim
> > behave in this case, which means non-persistent is ok.
>
> To be clear - by persistent I meant that it survives the SR-IOV host's resets,
> not necessarily written to NVRAM of any sort.
Yes, this is my view as well.
For non-smartnic it will survive VF disable/enable.
MAC is not stored on NVRAM, it will disappear once the driver on the control CPU resets.
>
> I'd like to see netdevsim to also serve as sort of a reference model for device
> behaviour. Vendors who are not first to implement a feature always
> complain that there is no documentation on how things should work.
Yes, this is a good idea.
But it seems we are always held back by legacy tools with no well-defined behavior.
On Thu, 12 Dec 2019 20:44:31 +0000, Yuval Avnery wrote:
> > -----Original Message-----
> > From: Jakub Kicinski <[email protected]>
> > Sent: Thursday, December 12, 2019 10:25 AM
> > To: Yuval Avnery <[email protected]>
> > Cc: Jiri Pirko <[email protected]>; [email protected];
> > [email protected]; [email protected]; Andy Gospodarek
> > <[email protected]>
> > Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
> >
> > On Thu, 12 Dec 2019 05:11:12 +0000, Yuval Avnery wrote:
> > > > > > Okay, please post v2 together with the tests. We don't accept
> > > > > > netdevsim features without tests any more.
> > > > >
> > > > > I think the only test I can currently write is the enable SR-IOV
> > > > > max_vfs enforcement. Because subdev is not in yet.
> > > > > Will that be good enough?
> > > >
> > > > It'd be good to test some netdev API rather than just the
> > > > enforcement itself which is entirely in netdevsim, I think.
> > > >
> > > > So max_vfs enforcement plus checking that ip link lists the correct
> > > > number of entries (and perhaps the entries are in reset state after
> > > > enable) would do IMO.
> > >
> > > Ok, but this is possible regardless of my patch (to enable vfs).
> >
> > I was being lenient :) Your patch is only really needed when the devlink API
> > lands, since devlink will display all max VFs not enabled.
> >
> > > > My knee jerk reaction is that we should populate the values to those
> > > > set via devlink upon SR-IOV enable, but then if user overwrites
> > > > those values that's their problem.
> > > >
> > > > Sort of mirror how VF MAC addrs work, just a level deeper. The VF
> > > > defaults to the MAC addr provided by the PF after reset, but it can
> > > > change it to something else (things may stop working because spoof
> > > > check etc. will drop all its frames, but nothing stops the VF in
> > > > legacy HW from writing its MAC addr register).
> > > >
> > > > IOW the devlink addr is the default/provisioned addr, not
> > > > necessarily the addr the PF has set _now_.
> > > >
> > > > Other options I guess are (a) reject the changes of the address from
> > > > the PF once devlink has set a value; (b) provide some
> > > > device->control CPU notifier which can ack/reject a request from the PF
> > to change devlink's value..?
> > > >
> > > > You guys posted the devlink patches a while ago, what was your
> > > > implementation doing?
> > >
> > > devlink simply calls the driver with set or get.
> > > It is up to the vendor driver/HW if to make this address persistent or not.
> > > The address is not saved in the devlink layer.
> >
> > It'd be preferable for the behaviour of the kernel API to not be vendor
> > specific. That defeats the purpose of having an operating system as a HW
> > abstraction layer. SR-IOV devices of today are so FW heavy we can make
> > them behave whatever way we choose makes most sense.
> >
> > > The MAC address in mlx5 is stored in the HW and persistent (until PF
> > > reset) , whether it is set by devlink or ip link.
> >
> > Okay, let's see if I understand. The devlink and ip link interfaces basically do
> > the same thing but one reaches from control CPU and the other one from
> > the SR-IOV host? And on SR-IOV host reset the addresses go back to 00:00..
> > i.e. any?
>
> No,
> This will work only in non-SmartNic mode, when e-switch manager is on the host,
> MAC will be accessible through devlink and legacy tools..
> For smartnic, only devlink from the embedded OS will work. Ip link from the host will not work.
I see, is this a more fine grained capability or all or nothing for
SR-IOV control? I'd think that if the SmartNIC's eswitch just
encapsulates all the frames into a L4 tunnel it shouldn't care about L2
addresses.
> > What happens if the SR-IOV host changes the MAC? Is it used by HW or is the
> > MAC provisioned by the control CPU used for things like spoof check?
>
> Host shouldn't have privileges to do it.
> If it does, then it's under the host ownership (like in non-smartnic mode).
I see so the MAC is fixed from bare metal host's PoV? And it has to be
set through some high level cloud API (for live migration etc)?
Do existing software stacks like libvirt handle not being able to set
the MAC happily?
> > Does the control CPU get a notification for SR-IOV host reset? In that case
> > the control CPU driver could restore the MAC addr.
>
> Yes, but this is irrelevant here, the MAC is already stored in HW/FW.
> The MAC will reset only when the E-switch manager (on the control CPU) reset.
>
> > > So from what I understand, we have the freedom to choose how netdevsim
> > > behave in this case, which means non-persistent is ok.
> >
> > To be clear - by persistent I meant that it survives the SR-IOV host's resets,
> > not necessarily written to NVRAM of any sort.
>
> Yes, this is my view as well.
> For non-smartnic it will survive VF disable/enable.
> MAC is not stored on NVRAM, it will disappear once the driver on the control CPU resets.
>
> > I'd like to see netdevsim to also serve as sort of a reference model for device
> > behaviour. Vendors who are not first to implement a feature always
> > complain that there is no documentation on how things should work.
>
> Yes, this is a good idea.
> But it seems we are always held back by legacy tools with no well-defined behavior.
On Thu, 12 Dec 2019 20:47:32 +0200, Ido Schimmel wrote:
> On Thu, Dec 12, 2019 at 10:25:17AM -0800, Jakub Kicinski wrote:
> > I'd like to see netdevsim to also serve as sort of a reference model
> > for device behaviour. Vendors who are not first to implement a feature
> > always complain that there is no documentation on how things should
> > work.
>
> +1
>
> I have a patch set that adds FIB offload implementation to netdevsim and
> a gazillion of test cases that I share between netdevsim and mlxsw. Can
> be used by more drivers when they land.
>
> It's also very convenient for fuzzing now that syzkaller supports
> netdevsim instances thanks to Jiri. I've been running syzkaller for a
> few weeks now to test the FIB implementation.
???????? very cool!
> -----Original Message-----
> From: Jakub Kicinski <[email protected]>
> Sent: Thursday, December 12, 2019 5:54 PM
> To: Yuval Avnery <[email protected]>
> Cc: Jiri Pirko <[email protected]>; [email protected];
> [email protected]; [email protected]; Andy Gospodarek
> <[email protected]>
> Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
>
> On Thu, 12 Dec 2019 20:44:31 +0000, Yuval Avnery wrote:
> > > -----Original Message-----
> > > From: Jakub Kicinski <[email protected]>
> > > Sent: Thursday, December 12, 2019 10:25 AM
> > > To: Yuval Avnery <[email protected]>
> > > Cc: Jiri Pirko <[email protected]>; [email protected];
> > > [email protected]; [email protected]; Andy
> > > Gospodarek <[email protected]>
> > > Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
> > >
> > > On Thu, 12 Dec 2019 05:11:12 +0000, Yuval Avnery wrote:
> > > > > > > Okay, please post v2 together with the tests. We don't
> > > > > > > accept netdevsim features without tests any more.
> > > > > >
> > > > > > I think the only test I can currently write is the enable
> > > > > > SR-IOV max_vfs enforcement. Because subdev is not in yet.
> > > > > > Will that be good enough?
> > > > >
> > > > > It'd be good to test some netdev API rather than just the
> > > > > enforcement itself which is entirely in netdevsim, I think.
> > > > >
> > > > > So max_vfs enforcement plus checking that ip link lists the
> > > > > correct number of entries (and perhaps the entries are in reset
> > > > > state after
> > > > > enable) would do IMO.
> > > >
> > > > Ok, but this is possible regardless of my patch (to enable vfs).
> > >
> > > I was being lenient :) Your patch is only really needed when the
> > > devlink API lands, since devlink will display all max VFs not enabled.
> > >
> > > > > My knee jerk reaction is that we should populate the values to
> > > > > those set via devlink upon SR-IOV enable, but then if user
> > > > > overwrites those values that's their problem.
> > > > >
> > > > > Sort of mirror how VF MAC addrs work, just a level deeper. The
> > > > > VF defaults to the MAC addr provided by the PF after reset, but
> > > > > it can change it to something else (things may stop working
> > > > > because spoof check etc. will drop all its frames, but nothing
> > > > > stops the VF in legacy HW from writing its MAC addr register).
> > > > >
> > > > > IOW the devlink addr is the default/provisioned addr, not
> > > > > necessarily the addr the PF has set _now_.
> > > > >
> > > > > Other options I guess are (a) reject the changes of the address
> > > > > from the PF once devlink has set a value; (b) provide some
> > > > > device->control CPU notifier which can ack/reject a request from
> > > > > device->the PF
> > > to change devlink's value..?
> > > > >
> > > > > You guys posted the devlink patches a while ago, what was your
> > > > > implementation doing?
> > > >
> > > > devlink simply calls the driver with set or get.
> > > > It is up to the vendor driver/HW if to make this address persistent or
> not.
> > > > The address is not saved in the devlink layer.
> > >
> > > It'd be preferable for the behaviour of the kernel API to not be
> > > vendor specific. That defeats the purpose of having an operating
> > > system as a HW abstraction layer. SR-IOV devices of today are so FW
> > > heavy we can make them behave whatever way we choose makes most
> sense.
> > >
> > > > The MAC address in mlx5 is stored in the HW and persistent (until
> > > > PF
> > > > reset) , whether it is set by devlink or ip link.
> > >
> > > Okay, let's see if I understand. The devlink and ip link interfaces
> > > basically do the same thing but one reaches from control CPU and the
> > > other one from the SR-IOV host? And on SR-IOV host reset the addresses
> go back to 00:00..
> > > i.e. any?
> >
> > No,
> > This will work only in non-SmartNic mode, when e-switch manager is on
> > the host, MAC will be accessible through devlink and legacy tools..
> > For smartnic, only devlink from the embedded OS will work. Ip link from
> the host will not work.
>
> I see, is this a more fine grained capability or all or nothing for SR-IOV control?
> I'd think that if the SmartNIC's eswitch just encapsulates all the frames into a
> L4 tunnel it shouldn't care about L2 addresses.
People keep saying that, but there are customers who wants this capability :)
>
> > > What happens if the SR-IOV host changes the MAC? Is it used by HW or
> > > is the MAC provisioned by the control CPU used for things like spoof
> check?
> >
> > Host shouldn't have privileges to do it.
> > If it does, then it's under the host ownership (like in non-smartnic mode).
>
> I see so the MAC is fixed from bare metal host's PoV? And it has to be set
Yes
> through some high level cloud API (for live migration etc)?
> Do existing software stacks like libvirt handle not being able to set the MAC
> happily?
I am not sure what you mean.
What we are talking about here is the E-switch manager setting a MAC to another VF.
When the VF driver loads it will query this MAC from the NIC. This is the way
It works today with "ip link set _vf_ mac"
Or in other words we are replacing "ip link set _vf_ mac" and not "ip link set address"
So that it can work from the SmartNic embedded system.
There is nothing really new here, ip link will not work from a SmartNic,
this is why need devlink subdev.
Hope that answers you question.
>
> > > Does the control CPU get a notification for SR-IOV host reset? In
> > > that case the control CPU driver could restore the MAC addr.
> >
> > Yes, but this is irrelevant here, the MAC is already stored in HW/FW.
> > The MAC will reset only when the E-switch manager (on the control CPU)
> reset.
> >
> > > > So from what I understand, we have the freedom to choose how
> > > > netdevsim behave in this case, which means non-persistent is ok.
> > >
> > > To be clear - by persistent I meant that it survives the SR-IOV
> > > host's resets, not necessarily written to NVRAM of any sort.
> >
> > Yes, this is my view as well.
> > For non-smartnic it will survive VF disable/enable.
> > MAC is not stored on NVRAM, it will disappear once the driver on the
> control CPU resets.
> >
> > > I'd like to see netdevsim to also serve as sort of a reference model
> > > for device behaviour. Vendors who are not first to implement a
> > > feature always complain that there is no documentation on how things
> should work.
> >
> > Yes, this is a good idea.
> > But it seems we are always held back by legacy tools with no well-defined
> behavior.
On Fri, 13 Dec 2019 03:21:02 +0000, Yuval Avnery wrote:
> > I see, is this a more fine grained capability or all or nothing for SR-IOV control?
> > I'd think that if the SmartNIC's eswitch just encapsulates all the frames into a
> > L4 tunnel it shouldn't care about L2 addresses.
>
> People keep saying that, but there are customers who wants this capability :)
Right, but we should have a plan for both, right? Some form of a switch
between L4/no checking/ip link changes are okay vs strict checking/L2/
SmartNIC provisions MAC addrs?
> > > > What happens if the SR-IOV host changes the MAC? Is it used by HW or
> > > > is the MAC provisioned by the control CPU used for things like spoof
> > > > check?
> > >
> > > Host shouldn't have privileges to do it.
> > > If it does, then it's under the host ownership (like in non-smartnic mode).
> >
> > I see so the MAC is fixed from bare metal host's PoV? And it has to be set
>
> Yes
>
> > through some high level cloud API (for live migration etc)?
> > Do existing software stacks like libvirt handle not being able to set the MAC
> > happily?
>
> I am not sure what you mean.
> What we are talking about here is the E-switch manager setting a MAC to another VF.
> When the VF driver loads it will query this MAC from the NIC. This is the way
> It works today with "ip link set _vf_ mac"
>
> Or in other words we are replacing "ip link set _vf_ mac" and not "ip link set address"
> So that it can work from the SmartNic embedded system.
> There is nothing really new here, ip link will not work from a SmartNic,
> this is why need devlink subdev.
Ack, but are we targeting the bare metal cloud scenario here or
something more limited? In a bare metal cloud AFAIU the customers
can use SR-IOV on the host, but the MACs need to be communicated/
/requested from the cloud management system.
IOW the ip link and the devlink APIs are in different domains of
control. Customer has access to ip link and provider has access to
devlink.
So my question is does libvirt run by the customer handle the fact
that it can't poke at ip link gracefully, and if live migration is
involved how is the customer supposed to ask the provider to move an
address?
> -----Original Message-----
> From: Jakub Kicinski <[email protected]>
> Sent: Friday, December 13, 2019 10:08 AM
> To: Yuval Avnery <[email protected]>
> Cc: Jiri Pirko <[email protected]>; [email protected];
> [email protected]; [email protected]; Andy Gospodarek
> <[email protected]>
> Subject: Re: [PATCH net-next] netdevsim: Add max_vfs to bus_dev
>
> On Fri, 13 Dec 2019 03:21:02 +0000, Yuval Avnery wrote:
> > > I see, is this a more fine grained capability or all or nothing for SR-IOV
> control?
> > > I'd think that if the SmartNIC's eswitch just encapsulates all the
> > > frames into a
> > > L4 tunnel it shouldn't care about L2 addresses.
> >
> > People keep saying that, but there are customers who wants this
> > capability :)
>
> Right, but we should have a plan for both, right? Some form of a switch
> between L4/no checking/ip link changes are okay vs strict checking/L2/
> SmartNIC provisions MAC addrs?
I am not sure I understand
The L2 checks will be on NIC, not on the switch.
Packet decapsulated and forwarded to the NIC, Where the MAC matters..
>
> > > > > What happens if the SR-IOV host changes the MAC? Is it used by
> > > > > HW or is the MAC provisioned by the control CPU used for things
> > > > > like spoof check?
> > > >
> > > > Host shouldn't have privileges to do it.
> > > > If it does, then it's under the host ownership (like in non-smartnic
> mode).
> > >
> > > I see so the MAC is fixed from bare metal host's PoV? And it has to
> > > be set
> >
> > Yes
> >
> > > through some high level cloud API (for live migration etc)?
> > > Do existing software stacks like libvirt handle not being able to
> > > set the MAC happily?
> >
> > I am not sure what you mean.
> > What we are talking about here is the E-switch manager setting a MAC to
> another VF.
> > When the VF driver loads it will query this MAC from the NIC. This is
> > the way It works today with "ip link set _vf_ mac"
> >
> > Or in other words we are replacing "ip link set _vf_ mac" and not "ip link set
> address"
> > So that it can work from the SmartNic embedded system.
> > There is nothing really new here, ip link will not work from a
> > SmartNic, this is why need devlink subdev.
>
> Ack, but are we targeting the bare metal cloud scenario here or something
> more limited? In a bare metal cloud AFAIU the customers can use SR-IOV on
> the host, but the MACs need to be communicated/ /requested from the
> cloud management system.
Yes, so the cloud management system communicates with the Control CPU, not the host,
Not whatever customer decides to run on the hypervisor. The host PF is powerless here (almost like VF).
>
> IOW the ip link and the devlink APIs are in different domains of control.
> Customer has access to ip link and provider has access to devlink.
For host VF - Customer has access to ip link exactly like in non-smartnic mode.
For host PF - "ip link set vf" will return error. Everything running on the host is not-trusted.
>
> So my question is does libvirt run by the customer handle the fact that it can't
> poke at ip link gracefully, and if live migration is involved how is the customer
> supposed to ask the provider to move an address?
I don't understand the question because I don't understand why is it different
from non-smartnic where the host hypervisor is in-charge.