Folks,
The t1_interrupt() irq handler calls del_timer_sync() down the chain:
sge.c: t1_interrupt()
-> subr.c: t1_slow_intr_handler()
-> asic_slow_intr() || fpga_slow_intr()
-> t1_pci_intr_handler()
-> cxgb2.c: t1_fatal_err() # Cont. at [*]
-> fpga_slow_intr()
-> sge.c: t1_sge_intr_error_handler()
-> cxgb2.c: t1_fatal_err() # Cont. at [*]
[*] cxgb2.c: t1_fatal_err()
-> sge.c: t1_sge_stop()
-> timer.c: del_timer_sync()
This is invalid: if an irq handler calls del_timer_sync() on a timer
it has already interrupted, it will just loop forever. That's why
del_timer_sync() also has a WARN_ON(in_irq()).
Included is an RFC patch series that runs the interrupt handler slow
path, t1_slow_intr_handler(), in a threaded-irq context.
This also leads to nice code savings across the driver, as some
workqueues and spinlocks are no longer needed.
Note: Only compile-tested. I do not have the hardware in question.
Thanks,
8<--------------
Ahmed S. Darwish (3):
chelsio: cxgb: Remove ndo_poll_controller()
chelsio: cxgb: Move slow interrupt handling to threaded irqs
chelsio: cxgb: Do not schedule a workqueue for external interrupts
drivers/net/ethernet/chelsio/cxgb/common.h | 2 -
drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 58 ++--------------------
drivers/net/ethernet/chelsio/cxgb/sge.c | 25 +++++++---
drivers/net/ethernet/chelsio/cxgb/sge.h | 3 +-
drivers/net/ethernet/chelsio/cxgb/subr.c | 2 +-
5 files changed, 25 insertions(+), 65 deletions(-)
base-commit: 2c85ebc57b3e1817b6ce1a6b703928e113a90442
--
2.29.2
Since commit ac3d9dd034e5 ("netpoll: make ndo_poll_controller()
optional"), networking drivers which use NAPI for clearing their TX
completions should not provide an ndo_poll_controller(). Netpoll simply
triggers the necessary TX queues cleanup by synchronously calling the
driver's NAPI poll handler -- with irqs off and a zero budget.
Modify the cxgb's poll method to clear the TX queues upon zero budget.
Per API requirements, make sure to never consume any RX packet in that
case (budget=0), and thus also not to increment the budget upon return.
Afterwards, remove ndo_poll_controller().
Link: https://lkml.kernel.org/r/[email protected]
Link: https://lkml.kernel.org/r/[email protected]
References: 822d54b9c2c1 ("netpoll: Drop budget parameter from NAPI polling call hierarchy")
Signed-off-by: Ahmed S. Darwish <[email protected]>
Cc: Eric Dumazet <[email protected]>
---
drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 14 --------------
drivers/net/ethernet/chelsio/cxgb/sge.c | 9 ++++++++-
2 files changed, 8 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 0e4a0f413960..7b5a98330ef7 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -878,17 +878,6 @@ static int t1_set_features(struct net_device *dev, netdev_features_t features)
return 0;
}
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void t1_netpoll(struct net_device *dev)
-{
- unsigned long flags;
- struct adapter *adapter = dev->ml_priv;
-
- local_irq_save(flags);
- t1_interrupt(adapter->pdev->irq, adapter);
- local_irq_restore(flags);
-}
-#endif
/*
* Periodic accumulation of MAC statistics. This is used only if the MAC
@@ -973,9 +962,6 @@ static const struct net_device_ops cxgb_netdev_ops = {
.ndo_set_mac_address = t1_set_mac_addr,
.ndo_fix_features = t1_fix_features,
.ndo_set_features = t1_set_features,
-#ifdef CONFIG_NET_POLL_CONTROLLER
- .ndo_poll_controller = t1_netpoll,
-#endif
};
static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index 2d9c2b5a690a..d6df1a87db0b 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1609,7 +1609,14 @@ static int process_pure_responses(struct adapter *adapter)
int t1_poll(struct napi_struct *napi, int budget)
{
struct adapter *adapter = container_of(napi, struct adapter, napi);
- int work_done = process_responses(adapter, budget);
+ int work_done = 0;
+
+ if (budget)
+ work_done = process_responses(adapter, budget);
+ else {
+ /* budget=0 means: don't poll rx data */
+ process_pure_responses(adapter);
+ }
if (likely(work_done < budget)) {
napi_complete_done(napi, work_done);
--
2.29.2
The t1_interrupt() irq handler calls del_timer_sync() down the chain:
sge.c: t1_interrupt()
-> subr.c: t1_slow_intr_handler()
-> asic_slow_intr() || fpga_slow_intr()
-> t1_pci_intr_handler()
-> cxgb2.c: t1_fatal_err() # Cont. at [*]
-> fpga_slow_intr()
-> sge.c: t1_sge_intr_error_handler()
-> cxgb2.c: t1_fatal_err() # Cont. at [*]
[*] cxgb2.c: t1_fatal_err()
-> sge.c: t1_sge_stop()
-> timer.c: del_timer_sync()
This is invalid: if an irq handler calls del_timer_sync() on a timer it
has already interrupted, it will loop forever.
Move the slow t1 interrupt handling path, t1_slow_intr_handler(), to a
threaded-irq task context.
Signed-off-by: Ahmed S. Darwish <[email protected]>
---
drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 6 +++---
drivers/net/ethernet/chelsio/cxgb/sge.c | 13 +++++++++++--
drivers/net/ethernet/chelsio/cxgb/sge.h | 3 ++-
3 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 7b5a98330ef7..c96bdca4f270 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -211,9 +211,9 @@ static int cxgb_up(struct adapter *adapter)
t1_interrupts_clear(adapter);
adapter->params.has_msi = !disable_msi && !pci_enable_msi(adapter->pdev);
- err = request_irq(adapter->pdev->irq, t1_interrupt,
- adapter->params.has_msi ? 0 : IRQF_SHARED,
- adapter->name, adapter);
+ err = request_threaded_irq(adapter->pdev->irq, t1_irq, t1_irq_thread,
+ adapter->params.has_msi ? 0 : IRQF_SHARED,
+ adapter->name, adapter);
if (err) {
if (adapter->params.has_msi)
pci_disable_msi(adapter->pdev);
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index d6df1a87db0b..f1c402f6b889 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1626,11 +1626,10 @@ int t1_poll(struct napi_struct *napi, int budget)
return work_done;
}
-irqreturn_t t1_interrupt(int irq, void *data)
+irqreturn_t t1_irq(int irq, void *data)
{
struct adapter *adapter = data;
struct sge *sge = adapter->sge;
- int handled;
if (likely(responses_pending(adapter))) {
writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE);
@@ -1645,9 +1644,19 @@ irqreturn_t t1_interrupt(int irq, void *data)
napi_enable(&adapter->napi);
}
}
+
return IRQ_HANDLED;
}
+ return IRQ_WAKE_THREAD;
+}
+
+irqreturn_t t1_irq_thread(int irq, void *data)
+{
+ struct adapter *adapter = data;
+ struct sge *sge = adapter->sge;
+ int handled;
+
spin_lock(&adapter->async_lock);
handled = t1_slow_intr_handler(adapter);
spin_unlock(&adapter->async_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.h b/drivers/net/ethernet/chelsio/cxgb/sge.h
index a1ba591b3431..4072b3fb312b 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.h
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.h
@@ -74,7 +74,8 @@ struct sge *t1_sge_create(struct adapter *, struct sge_params *);
int t1_sge_configure(struct sge *, struct sge_params *);
int t1_sge_set_coalesce_params(struct sge *, struct sge_params *);
void t1_sge_destroy(struct sge *);
-irqreturn_t t1_interrupt(int irq, void *cookie);
+irqreturn_t t1_irq(int irq, void *cookie);
+irqreturn_t t1_irq_thread(int irq, void *cookie);
int t1_poll(struct napi_struct *, int);
netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev);
--
2.29.2
cxgb's "elmer0" external interrupt handling code requires task context,
so originally a workqueue was scheduled for it from the hardirq handler.
Now that all of the external interrupt handling, elmer0 included, is run
from a threaded-irq context, just directly call the real handler.
Remove all the workqueue code that is now no longer needed, including
the spinlock used for synchronizing the workqueue's NIC regsiters access
against the irq handler.
Signed-off-by: Ahmed S. Darwish <[email protected]>
---
drivers/net/ethernet/chelsio/cxgb/common.h | 2 --
drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 38 ----------------------
drivers/net/ethernet/chelsio/cxgb/sge.c | 3 --
drivers/net/ethernet/chelsio/cxgb/subr.c | 2 +-
4 files changed, 1 insertion(+), 44 deletions(-)
diff --git a/drivers/net/ethernet/chelsio/cxgb/common.h b/drivers/net/ethernet/chelsio/cxgb/common.h
index 6475060649e9..504882e66831 100644
--- a/drivers/net/ethernet/chelsio/cxgb/common.h
+++ b/drivers/net/ethernet/chelsio/cxgb/common.h
@@ -238,7 +238,6 @@ struct adapter {
int msg_enable;
u32 mmio_len;
- struct work_struct ext_intr_handler_task;
struct adapter_params params;
/* Terminator modules. */
@@ -256,7 +255,6 @@ struct adapter {
spinlock_t mac_lock;
/* guards async operations */
- spinlock_t async_lock ____cacheline_aligned;
u32 slow_intr_mask;
int t1powersave;
};
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index c96bdca4f270..b93e86d4d079 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -905,41 +905,6 @@ static void mac_stats_task(struct work_struct *work)
spin_unlock(&adapter->work_lock);
}
-/*
- * Processes elmer0 external interrupts in process context.
- */
-static void ext_intr_task(struct work_struct *work)
-{
- struct adapter *adapter =
- container_of(work, struct adapter, ext_intr_handler_task);
-
- t1_elmer0_ext_intr_handler(adapter);
-
- /* Now reenable external interrupts */
- spin_lock_irq(&adapter->async_lock);
- adapter->slow_intr_mask |= F_PL_INTR_EXT;
- writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE);
- writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
- adapter->regs + A_PL_ENABLE);
- spin_unlock_irq(&adapter->async_lock);
-}
-
-/*
- * Interrupt-context handler for elmer0 external interrupts.
- */
-void t1_elmer0_ext_intr(struct adapter *adapter)
-{
- /*
- * Schedule a task to handle external interrupts as we require
- * a process context. We disable EXT interrupts in the interim
- * and let the task reenable them when it's done.
- */
- adapter->slow_intr_mask &= ~F_PL_INTR_EXT;
- writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
- adapter->regs + A_PL_ENABLE);
- schedule_work(&adapter->ext_intr_handler_task);
-}
-
void t1_fatal_err(struct adapter *adapter)
{
if (adapter->flags & FULL_INIT_DONE) {
@@ -1045,11 +1010,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
spin_lock_init(&adapter->tpi_lock);
spin_lock_init(&adapter->work_lock);
- spin_lock_init(&adapter->async_lock);
spin_lock_init(&adapter->mac_lock);
- INIT_WORK(&adapter->ext_intr_handler_task,
- ext_intr_task);
INIT_DELAYED_WORK(&adapter->stats_update_task,
mac_stats_task);
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index f1c402f6b889..9b4ffddbbc05 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1657,10 +1657,7 @@ irqreturn_t t1_irq_thread(int irq, void *data)
struct sge *sge = adapter->sge;
int handled;
- spin_lock(&adapter->async_lock);
handled = t1_slow_intr_handler(adapter);
- spin_unlock(&adapter->async_lock);
-
if (!handled)
sge->stats.unhandled_irqs++;
diff --git a/drivers/net/ethernet/chelsio/cxgb/subr.c b/drivers/net/ethernet/chelsio/cxgb/subr.c
index ea0f8741d7cf..197d3bb924ca 100644
--- a/drivers/net/ethernet/chelsio/cxgb/subr.c
+++ b/drivers/net/ethernet/chelsio/cxgb/subr.c
@@ -858,7 +858,7 @@ static int asic_slow_intr(adapter_t *adapter)
if (cause & F_PL_INTR_PCIX)
t1_pci_intr_handler(adapter);
if (cause & F_PL_INTR_EXT)
- t1_elmer0_ext_intr(adapter);
+ t1_elmer0_ext_intr_handler(adapter);
/* Clear the interrupts just processed. */
writel(cause, adapter->regs + A_PL_CAUSE);
--
2.29.2
[[ Actually adding Eric to Cc ]]
On Thu, Dec 24, 2020 at 02:11:46PM +0100, Ahmed S. Darwish wrote:
> Since commit ac3d9dd034e5 ("netpoll: make ndo_poll_controller()
> optional"), networking drivers which use NAPI for clearing their TX
> completions should not provide an ndo_poll_controller(). Netpoll simply
> triggers the necessary TX queues cleanup by synchronously calling the
> driver's NAPI poll handler -- with irqs off and a zero budget.
>
> Modify the cxgb's poll method to clear the TX queues upon zero budget.
> Per API requirements, make sure to never consume any RX packet in that
> case (budget=0), and thus also not to increment the budget upon return.
>
> Afterwards, remove ndo_poll_controller().
>
> Link: https://lkml.kernel.org/r/[email protected]
> Link: https://lkml.kernel.org/r/[email protected]
> References: 822d54b9c2c1 ("netpoll: Drop budget parameter from NAPI polling call hierarchy")
> Signed-off-by: Ahmed S. Darwish <[email protected]>
> Cc: Eric Dumazet <[email protected]>
> ---
> drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 14 --------------
> drivers/net/ethernet/chelsio/cxgb/sge.c | 9 ++++++++-
> 2 files changed, 8 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
> index 0e4a0f413960..7b5a98330ef7 100644
> --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
> +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
> @@ -878,17 +878,6 @@ static int t1_set_features(struct net_device *dev, netdev_features_t features)
>
> return 0;
> }
> -#ifdef CONFIG_NET_POLL_CONTROLLER
> -static void t1_netpoll(struct net_device *dev)
> -{
> - unsigned long flags;
> - struct adapter *adapter = dev->ml_priv;
> -
> - local_irq_save(flags);
> - t1_interrupt(adapter->pdev->irq, adapter);
> - local_irq_restore(flags);
> -}
> -#endif
>
> /*
> * Periodic accumulation of MAC statistics. This is used only if the MAC
> @@ -973,9 +962,6 @@ static const struct net_device_ops cxgb_netdev_ops = {
> .ndo_set_mac_address = t1_set_mac_addr,
> .ndo_fix_features = t1_fix_features,
> .ndo_set_features = t1_set_features,
> -#ifdef CONFIG_NET_POLL_CONTROLLER
> - .ndo_poll_controller = t1_netpoll,
> -#endif
> };
>
> static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
> index 2d9c2b5a690a..d6df1a87db0b 100644
> --- a/drivers/net/ethernet/chelsio/cxgb/sge.c
> +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
> @@ -1609,7 +1609,14 @@ static int process_pure_responses(struct adapter *adapter)
> int t1_poll(struct napi_struct *napi, int budget)
> {
> struct adapter *adapter = container_of(napi, struct adapter, napi);
> - int work_done = process_responses(adapter, budget);
> + int work_done = 0;
> +
> + if (budget)
> + work_done = process_responses(adapter, budget);
> + else {
> + /* budget=0 means: don't poll rx data */
> + process_pure_responses(adapter);
> + }
>
> if (likely(work_done < budget)) {
> napi_complete_done(napi, work_done);
> --
> 2.29.2
>
On 2020-12-24 14:11:47 [+0100], Ahmed S. Darwish wrote:
> --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
> +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
> @@ -211,9 +211,9 @@ static int cxgb_up(struct adapter *adapter)
> t1_interrupts_clear(adapter);
>
> adapter->params.has_msi = !disable_msi && !pci_enable_msi(adapter->pdev);
> - err = request_irq(adapter->pdev->irq, t1_interrupt,
> - adapter->params.has_msi ? 0 : IRQF_SHARED,
> - adapter->name, adapter);
> + err = request_threaded_irq(adapter->pdev->irq, t1_irq, t1_irq_thread,
> + adapter->params.has_msi ? 0 : IRQF_SHARED,
> + adapter->name, adapter);
> if (err) {
> if (adapter->params.has_msi)
> pci_disable_msi(adapter->pdev);
> diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
> index d6df1a87db0b..f1c402f6b889 100644
> --- a/drivers/net/ethernet/chelsio/cxgb/sge.c
> +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
> @@ -1626,11 +1626,10 @@ int t1_poll(struct napi_struct *napi, int budget)
> return work_done;
> }
>
> -irqreturn_t t1_interrupt(int irq, void *data)
> +irqreturn_t t1_irq(int irq, void *data)
> {
> struct adapter *adapter = data;
> struct sge *sge = adapter->sge;
> - int handled;
>
> if (likely(responses_pending(adapter))) {
> writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE);
> @@ -1645,9 +1644,19 @@ irqreturn_t t1_interrupt(int irq, void *data)
> napi_enable(&adapter->napi);
> }
> }
> +
> return IRQ_HANDLED;
> }
>
> + return IRQ_WAKE_THREAD;
> +}
> +
> +irqreturn_t t1_irq_thread(int irq, void *data)
> +{
> + struct adapter *adapter = data;
> + struct sge *sge = adapter->sge;
> + int handled;
> +
> spin_lock(&adapter->async_lock);
> handled = t1_slow_intr_handler(adapter);
> spin_unlock(&adapter->async_lock);
This does not work in general, it might work in the MSI case but does
not work for the LEVEL interrupt case: The interrupt remains active
because it has not been ACKed. Chances are that the threaded handler
never gets scheduled because interrupt is still pending and t1_irq()
gets invoked right away.
For that reason, the primary must either mask the interrupt source or
use IRQF_ONESHOT to mask the interrupt line until the threaded handler
is done.
If you look at t1_elmer0_ext_intr() it disables F_PL_INTR_EXT before the
worker scheduled so the interrupt does not trigger again.
The worker then does what ever is needed (t1_elmer0_ext_intr_handler)
and then ACKs F_PL_INTR_EXT and enables F_PL_INTR_EXT again so it may
trigger an interrupt again.
Sebastian