2022-07-01 20:51:52

by Horatiu Vultur

[permalink] [raw]
Subject: [PATCH net-next v3 2/7] net: lan966x: Split lan966x_fdb_event_work

Split the function lan966x_fdb_event_work. One case for when the
orig_dev is a bridge and one case when orig_dev is lan966x port.
This is preparation for lag support. There is no functional change.

Signed-off-by: Horatiu Vultur <[email protected]>
---
.../ethernet/microchip/lan966x/lan966x_fdb.c | 127 ++++++++++--------
.../ethernet/microchip/lan966x/lan966x_main.h | 1 +
.../microchip/lan966x/lan966x_switchdev.c | 7 +-
3 files changed, 76 insertions(+), 59 deletions(-)

diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c
index da5ca7188679..5142e7c0de31 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c
@@ -8,6 +8,7 @@ struct lan966x_fdb_event_work {
struct work_struct work;
struct switchdev_notifier_fdb_info fdb_info;
struct net_device *dev;
+ struct net_device *orig_dev;
struct lan966x *lan966x;
unsigned long event;
};
@@ -127,75 +128,89 @@ void lan966x_fdb_deinit(struct lan966x *lan966x)
lan966x_fdb_purge_entries(lan966x);
}

-static void lan966x_fdb_event_work(struct work_struct *work)
+void lan966x_fdb_flush_workqueue(struct lan966x *lan966x)
+{
+ flush_workqueue(lan966x->fdb_work);
+}
+
+static void lan966x_fdb_port_event_work(struct lan966x_fdb_event_work *fdb_work)
{
- struct lan966x_fdb_event_work *fdb_work =
- container_of(work, struct lan966x_fdb_event_work, work);
struct switchdev_notifier_fdb_info *fdb_info;
- struct net_device *dev = fdb_work->dev;
struct lan966x_port *port;
struct lan966x *lan966x;
- int ret;

- fdb_info = &fdb_work->fdb_info;
lan966x = fdb_work->lan966x;
+ port = netdev_priv(fdb_work->orig_dev);
+ fdb_info = &fdb_work->fdb_info;

- if (lan966x_netdevice_check(dev)) {
- port = netdev_priv(dev);
-
- switch (fdb_work->event) {
- case SWITCHDEV_FDB_ADD_TO_DEVICE:
- if (!fdb_info->added_by_user)
- break;
- lan966x_mac_add_entry(lan966x, port, fdb_info->addr,
- fdb_info->vid);
+ switch (fdb_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ if (!fdb_info->added_by_user)
break;
- case SWITCHDEV_FDB_DEL_TO_DEVICE:
- if (!fdb_info->added_by_user)
- break;
- lan966x_mac_del_entry(lan966x, fdb_info->addr,
- fdb_info->vid);
+ lan966x_mac_add_entry(lan966x, port, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ if (!fdb_info->added_by_user)
break;
- }
- } else {
- if (!netif_is_bridge_master(dev))
- goto out;
-
- /* In case the bridge is called */
- switch (fdb_work->event) {
- case SWITCHDEV_FDB_ADD_TO_DEVICE:
- /* If there is no front port in this vlan, there is no
- * point to copy the frame to CPU because it would be
- * just dropped at later point. So add it only if
- * there is a port but it is required to store the fdb
- * entry for later point when a port actually gets in
- * the vlan.
- */
- lan966x_fdb_add_entry(lan966x, fdb_info);
- if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
- fdb_info->vid))
- break;
-
- lan966x_mac_cpu_learn(lan966x, fdb_info->addr,
- fdb_info->vid);
+ lan966x_mac_del_entry(lan966x, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ }
+}
+
+static void lan966x_fdb_bridge_event_work(struct lan966x_fdb_event_work *fdb_work)
+{
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct lan966x *lan966x;
+ int ret;
+
+ lan966x = fdb_work->lan966x;
+ fdb_info = &fdb_work->fdb_info;
+
+ /* In case the bridge is called */
+ switch (fdb_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ /* If there is no front port in this vlan, there is no
+ * point to copy the frame to CPU because it would be
+ * just dropped at later point. So add it only if
+ * there is a port but it is required to store the fdb
+ * entry for later point when a port actually gets in
+ * the vlan.
+ */
+ lan966x_fdb_add_entry(lan966x, fdb_info);
+ if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
+ fdb_info->vid))
break;
- case SWITCHDEV_FDB_DEL_TO_DEVICE:
- ret = lan966x_fdb_del_entry(lan966x, fdb_info);
- if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
- fdb_info->vid))
- break;
-
- if (ret)
- lan966x_mac_cpu_forget(lan966x, fdb_info->addr,
- fdb_info->vid);
+
+ lan966x_mac_cpu_learn(lan966x, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ ret = lan966x_fdb_del_entry(lan966x, fdb_info);
+ if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
+ fdb_info->vid))
break;
- }
+
+ if (ret)
+ lan966x_mac_cpu_forget(lan966x, fdb_info->addr,
+ fdb_info->vid);
+ break;
}
+}
+
+static void lan966x_fdb_event_work(struct work_struct *work)
+{
+ struct lan966x_fdb_event_work *fdb_work =
+ container_of(work, struct lan966x_fdb_event_work, work);
+
+ if (lan966x_netdevice_check(fdb_work->orig_dev))
+ lan966x_fdb_port_event_work(fdb_work);
+ else if (netif_is_bridge_master(fdb_work->orig_dev))
+ lan966x_fdb_bridge_event_work(fdb_work);

-out:
kfree(fdb_work->fdb_info.addr);
kfree(fdb_work);
- dev_put(dev);
}

int lan966x_handle_fdb(struct net_device *dev,
@@ -221,7 +236,8 @@ int lan966x_handle_fdb(struct net_device *dev,
if (!fdb_work)
return -ENOMEM;

- fdb_work->dev = orig_dev;
+ fdb_work->dev = dev;
+ fdb_work->orig_dev = orig_dev;
fdb_work->lan966x = lan966x;
fdb_work->event = event;
INIT_WORK(&fdb_work->work, lan966x_fdb_event_work);
@@ -231,7 +247,6 @@ int lan966x_handle_fdb(struct net_device *dev,
goto err_addr_alloc;

ether_addr_copy((u8 *)fdb_work->fdb_info.addr, fdb_info->addr);
- dev_hold(orig_dev);

queue_work(lan966x->fdb_work, &fdb_work->work);
break;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 3b86ddddc756..0a4f4d27eaa7 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -368,6 +368,7 @@ void lan966x_fdb_write_entries(struct lan966x *lan966x, u16 vid);
void lan966x_fdb_erase_entries(struct lan966x *lan966x, u16 vid);
int lan966x_fdb_init(struct lan966x *lan966x);
void lan966x_fdb_deinit(struct lan966x *lan966x);
+void lan966x_fdb_flush_workqueue(struct lan966x *lan966x);
int lan966x_handle_fdb(struct net_device *dev,
struct net_device *orig_dev,
unsigned long event, const void *ctx,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
index df2bee678559..d9fc6a9a3da1 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
@@ -320,9 +320,10 @@ static int lan966x_port_prechangeupper(struct net_device *dev,
{
struct lan966x_port *port = netdev_priv(dev);

- if (netif_is_bridge_master(info->upper_dev) && !info->linking)
- switchdev_bridge_port_unoffload(port->dev, port,
- NULL, NULL);
+ if (netif_is_bridge_master(info->upper_dev) && !info->linking) {
+ switchdev_bridge_port_unoffload(port->dev, port, NULL, NULL);
+ lan966x_fdb_flush_workqueue(port->lan966x);
+ }

return NOTIFY_DONE;
}
--
2.33.0


2022-07-02 14:10:37

by Vladimir Oltean

[permalink] [raw]
Subject: Re: [PATCH net-next v3 2/7] net: lan966x: Split lan966x_fdb_event_work

On Fri, Jul 01, 2022 at 10:52:22PM +0200, Horatiu Vultur wrote:
> Split the function lan966x_fdb_event_work. One case for when the
> orig_dev is a bridge and one case when orig_dev is lan966x port.
> This is preparation for lag support. There is no functional change.
>
> Signed-off-by: Horatiu Vultur <[email protected]>
> ---

> -static void lan966x_fdb_event_work(struct work_struct *work)
> +void lan966x_fdb_flush_workqueue(struct lan966x *lan966x)
> +{
> + flush_workqueue(lan966x->fdb_work);
> +}
> +

> diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
> index df2bee678559..d9fc6a9a3da1 100644
> --- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
> +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
> @@ -320,9 +320,10 @@ static int lan966x_port_prechangeupper(struct net_device *dev,
> {
> struct lan966x_port *port = netdev_priv(dev);
>
> - if (netif_is_bridge_master(info->upper_dev) && !info->linking)
> - switchdev_bridge_port_unoffload(port->dev, port,
> - NULL, NULL);
> + if (netif_is_bridge_master(info->upper_dev) && !info->linking) {
> + switchdev_bridge_port_unoffload(port->dev, port, NULL, NULL);
> + lan966x_fdb_flush_workqueue(port->lan966x);
> + }

Very curious as to why you decided to stuff this change in here.
There was no functional change in v2, now there is. And it's a change
you might need to come back to later (probably sooner than you'd like),
since the flushing of the workqueue is susceptible to causing deadlocks
if done improperly - let's see how you blame a commit that was only
supposed to move code, in that case ;)

The deadlock that I'm talking about comes from the fact that
lan966x_port_prechangeupper() runs with rtnl_lock() held. So the code of
the flushed workqueue item must not hold rtnl_lock(), or any other lock
that is blocked by the rtnl_lock(). Otherwise, the flushing will wait
for a workqueue item to complete, that in turn waits to acquire the
rtnl_lock, which is held by the thread waiting the workqueue to complete.

Analyzing your code, lan966x_mac_notifiers() takes rtnl_lock().
That is taken from threaded interrupt context - lan966x_mac_irq_process(),
but is a sub-lock of spin_lock(&lan966x->mac_lock).

There are 2 problems with that already: rtnl_lock() is a mutex => can
sleep, but &lan966x->mac_lock is a spin lock => is atomic. You can't
take rtnl_lock() from atomic context. Lockdep and/or CONFIG_DEBUG_ATOMIC_SLEEP
will tell you so much.

The second problem is the lock ordering inversion that this causes.
There exists a threaded IRQ which takes the locks in the order mac_lock
-> rtnl_lock, and there exists this new fdb_flush_workqueue which takes
the locks in the order rtnl_lock -> mac_lock. If they run at the same
time, kaboom. Again, lockdep will tell you as much.

I'm sorry, but you need to solve the existing locking problems with the
code first.

>
> return NOTIFY_DONE;
> }
> --
> 2.33.0
>

2022-07-05 21:57:09

by Horatiu Vultur

[permalink] [raw]
Subject: Re: [PATCH net-next v3 2/7] net: lan966x: Split lan966x_fdb_event_work

The 07/02/2022 14:08, Vladimir Oltean wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>
> On Fri, Jul 01, 2022 at 10:52:22PM +0200, Horatiu Vultur wrote:
> > Split the function lan966x_fdb_event_work. One case for when the
> > orig_dev is a bridge and one case when orig_dev is lan966x port.
> > This is preparation for lag support. There is no functional change.
> >
> > Signed-off-by: Horatiu Vultur <[email protected]>
> > ---
>
> > -static void lan966x_fdb_event_work(struct work_struct *work)
> > +void lan966x_fdb_flush_workqueue(struct lan966x *lan966x)
> > +{
> > + flush_workqueue(lan966x->fdb_work);
> > +}
> > +
>
> > diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
> > index df2bee678559..d9fc6a9a3da1 100644
> > --- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
> > +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
> > @@ -320,9 +320,10 @@ static int lan966x_port_prechangeupper(struct net_device *dev,
> > {
> > struct lan966x_port *port = netdev_priv(dev);
> >
> > - if (netif_is_bridge_master(info->upper_dev) && !info->linking)
> > - switchdev_bridge_port_unoffload(port->dev, port,
> > - NULL, NULL);
> > + if (netif_is_bridge_master(info->upper_dev) && !info->linking) {
> > + switchdev_bridge_port_unoffload(port->dev, port, NULL, NULL);
> > + lan966x_fdb_flush_workqueue(port->lan966x);
> > + }
>
> Very curious as to why you decided to stuff this change in here.
> There was no functional change in v2, now there is. And it's a change
> you might need to come back to later (probably sooner than you'd like),
> since the flushing of the workqueue is susceptible to causing deadlocks
> if done improperly - let's see how you blame a commit that was only
> supposed to move code, in that case ;)

There is a functional change here and I forgot to change the commit
message for this.
>
> The deadlock that I'm talking about comes from the fact that
> lan966x_port_prechangeupper() runs with rtnl_lock() held. So the code of
> the flushed workqueue item must not hold rtnl_lock(), or any other lock
> that is blocked by the rtnl_lock(). Otherwise, the flushing will wait
> for a workqueue item to complete, that in turn waits to acquire the
> rtnl_lock, which is held by the thread waiting the workqueue to complete.
>
> Analyzing your code, lan966x_mac_notifiers() takes rtnl_lock().
> That is taken from threaded interrupt context - lan966x_mac_irq_process(),
> but is a sub-lock of spin_lock(&lan966x->mac_lock).
>
> There are 2 problems with that already: rtnl_lock() is a mutex => can
> sleep, but &lan966x->mac_lock is a spin lock => is atomic. You can't
> take rtnl_lock() from atomic context. Lockdep and/or CONFIG_DEBUG_ATOMIC_SLEEP
> will tell you so much.
>
> The second problem is the lock ordering inversion that this causes.
> There exists a threaded IRQ which takes the locks in the order mac_lock
> -> rtnl_lock, and there exists this new fdb_flush_workqueue which takes
> the locks in the order rtnl_lock -> mac_lock. If they run at the same
> time, kaboom. Again, lockdep will tell you as much.
>
> I'm sorry, but you need to solve the existing locking problems with the
> code first.

As I see it, there 2 'different problems' which both have the same root
cause, the usage of the lan966x->mac_lock:
1. One is with lan966x_mac_notifiers and lan966x_mac_irq_process, which
is an issue on net. And this needs a separate patch.
2. Second is introduced by flushing the workqueue.

I am pretty sure I have run with CONFIG_DEBUG_ATOMIC_SLEEP but I
couldn't see any errors/warnings.

So let me start by fixing first issue on net.

>
> >
> > return NOTIFY_DONE;
> > }
> > --
> > 2.33.0
> >

--
/Horatiu