efx_ef10_try_update_nic_stats_vf() used in_interrupt() to figure out
whether it is safe to sleep (for MCDI) or not.
The only caller from which it was not is efx_net_stats(), which can be
invoked under dev_base_lock from net-sysfs::netstat_show().
So add a new update_stats_atomic() method to struct efx_nic_type, and
call it from efx_net_stats(), removing the need for
efx_ef10_try_update_nic_stats_vf() to behave differently for this case
(which it wasn't doing correctly anyway).
For all nic_types other than EF10 VF, this method is NULL and so we
call the regular update_stats() methods, which are happy with being
called from atomic contexts.
Fixes: f00bf2305cab ("sfc: don't update stats on VF when called in atomic context")
Reported-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Edward Cree <[email protected]>
---
Only compile-tested so far, because I'm waiting for my kernel to
finish rebuilding with CONFIG_DEBUG_ATOMIC_SLEEP which I'm hoping
is the right thing to detect the bug in the existing code.
I also wasn't quite sure how to give credit to the thorough analysis
in the commit message of Sebastian's patch. I don't think we have
a Whatever-by: tag to cover that, do we?
And this doesn't include your GFP_KERNEL change, which should
probably go in separately if you take this.
drivers/net/ethernet/sfc/ef10.c | 22 +++++++++++++---------
drivers/net/ethernet/sfc/efx_common.c | 2 +-
drivers/net/ethernet/sfc/net_driver.h | 5 +++++
drivers/net/ethernet/sfc/nic_common.h | 7 +++++++
4 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index c9df2e96ebe4..b702ba5986dc 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -1871,15 +1871,6 @@ static int efx_ef10_try_update_nic_stats_vf(struct efx_nic *efx)
spin_unlock_bh(&efx->stats_lock);
- if (in_interrupt()) {
- /* If in atomic context, cannot update stats. Just update the
- * software stats and return so the caller can continue.
- */
- spin_lock_bh(&efx->stats_lock);
- efx_update_sw_stats(efx, stats);
- return 0;
- }
-
efx_ef10_get_stat_mask(efx, mask);
rc = efx_nic_alloc_buffer(efx, &stats_buf, dma_len, GFP_ATOMIC);
@@ -1938,6 +1929,18 @@ static size_t efx_ef10_update_stats_vf(struct efx_nic *efx, u64 *full_stats,
return efx_ef10_update_stats_common(efx, full_stats, core_stats);
}
+static size_t efx_ef10_update_stats_atomic_vf(struct efx_nic *efx, u64 *full_stats,
+ struct rtnl_link_stats64 *core_stats)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+ /* In atomic context, cannot update HW stats. Just update the
+ * software stats and return so the caller can continue.
+ */
+ efx_update_sw_stats(efx, nic_data->stats);
+ return efx_ef10_update_stats_common(efx, full_stats, core_stats);
+}
+
static void efx_ef10_push_irq_moderation(struct efx_channel *channel)
{
struct efx_nic *efx = channel->efx;
@@ -3998,6 +4001,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
.finish_flr = efx_port_dummy_op_void,
.describe_stats = efx_ef10_describe_stats,
.update_stats = efx_ef10_update_stats_vf,
+ .update_stats_atomic = efx_ef10_update_stats_atomic_vf,
.start_stats = efx_port_dummy_op_void,
.pull_stats = efx_port_dummy_op_void,
.stop_stats = efx_port_dummy_op_void,
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
index c256db241570..72a3f0e09f52 100644
--- a/drivers/net/ethernet/sfc/efx_common.c
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -602,7 +602,7 @@ void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
struct efx_nic *efx = netdev_priv(net_dev);
spin_lock_bh(&efx->stats_lock);
- efx->type->update_stats(efx, NULL, stats);
+ efx_nic_update_stats_atomic(efx, NULL, stats);
spin_unlock_bh(&efx->stats_lock);
}
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 47aa753e64bd..9f7dfdf708cf 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -1172,6 +1172,9 @@ struct efx_udp_tunnel {
* @describe_stats: Describe statistics for ethtool
* @update_stats: Update statistics not provided by event handling.
* Either argument may be %NULL.
+ * @update_stats_atomic: Update statistics while in atomic context, if that
+ * is more limiting than @update_stats. Otherwise, leave %NULL and
+ * driver core will call @update_stats.
* @start_stats: Start the regular fetching of statistics
* @pull_stats: Pull stats from the NIC and wait until they arrive.
* @stop_stats: Stop the regular fetching of statistics
@@ -1316,6 +1319,8 @@ struct efx_nic_type {
size_t (*describe_stats)(struct efx_nic *efx, u8 *names);
size_t (*update_stats)(struct efx_nic *efx, u64 *full_stats,
struct rtnl_link_stats64 *core_stats);
+ size_t (*update_stats_atomic)(struct efx_nic *efx, u64 *full_stats,
+ struct rtnl_link_stats64 *core_stats);
void (*start_stats)(struct efx_nic *efx);
void (*pull_stats)(struct efx_nic *efx);
void (*stop_stats)(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/nic_common.h b/drivers/net/ethernet/sfc/nic_common.h
index 82271f0b8627..b9cafe9cd568 100644
--- a/drivers/net/ethernet/sfc/nic_common.h
+++ b/drivers/net/ethernet/sfc/nic_common.h
@@ -244,6 +244,13 @@ void efx_nic_update_stats(const struct efx_hw_stat_desc *desc, size_t count,
const unsigned long *mask, u64 *stats,
const void *dma_buf, bool accumulate);
void efx_nic_fix_nodesc_drop_stat(struct efx_nic *efx, u64 *stat);
+static inline size_t efx_nic_update_stats_atomic(struct efx_nic *efx, u64 *full_stats,
+ struct rtnl_link_stats64 *core_stats)
+{
+ if (efx->type->update_stats_atomic)
+ return efx->type->update_stats_atomic(efx, full_stats, core_stats);
+ return efx->type->update_stats(efx, full_stats, core_stats);
+}
#define EFX_MAX_FLUSH_TIME 5000
On Mon, Sep 28 2020 at 21:05, Edward Cree wrote:
> efx_ef10_try_update_nic_stats_vf() used in_interrupt() to figure out
> whether it is safe to sleep (for MCDI) or not.
> The only caller from which it was not is efx_net_stats(), which can be
> invoked under dev_base_lock from net-sysfs::netstat_show().
> So add a new update_stats_atomic() method to struct efx_nic_type, and
> call it from efx_net_stats(), removing the need for
> efx_ef10_try_update_nic_stats_vf() to behave differently for this case
> (which it wasn't doing correctly anyway).
> For all nic_types other than EF10 VF, this method is NULL and so we
> call the regular update_stats() methods, which are happy with being
> called from atomic contexts.
>
> Fixes: f00bf2305cab ("sfc: don't update stats on VF when called in atomic context")
> Reported-by: Sebastian Andrzej Siewior <[email protected]>
> Signed-off-by: Edward Cree <[email protected]>
That's much nicer.
> ---
> Only compile-tested so far, because I'm waiting for my kernel to
> finish rebuilding with CONFIG_DEBUG_ATOMIC_SLEEP which I'm hoping
> is the right thing to detect the bug in the existing code.
> I also wasn't quite sure how to give credit to the thorough analysis
> in the commit message of Sebastian's patch. I don't think we have
> a Whatever-by: tag to cover that, do we?
Sebastian did the analysis and I did some word polishing, but the credit
surely goes to him.
Thanks,
tglx
On Mon, Sep 28, 2020 at 09:05:52PM +0100, Edward Cree wrote:
> efx_ef10_try_update_nic_stats_vf() used in_interrupt() to figure out
> whether it is safe to sleep (for MCDI) or not.
> The only caller from which it was not is efx_net_stats(), which can be
> invoked under dev_base_lock from net-sysfs::netstat_show().
> So add a new update_stats_atomic() method to struct efx_nic_type, and
> call it from efx_net_stats(), removing the need for
> efx_ef10_try_update_nic_stats_vf() to behave differently for this case
> (which it wasn't doing correctly anyway).
> For all nic_types other than EF10 VF, this method is NULL and so we
> call the regular update_stats() methods, which are happy with being
> called from atomic contexts.
>
> Fixes: f00bf2305cab ("sfc: don't update stats on VF when called in atomic context")
> Reported-by: Sebastian Andrzej Siewior <[email protected]>
> Signed-off-by: Edward Cree <[email protected]>
Reviewed-by: Martin Habets <[email protected]>
> ---
> Only compile-tested so far, because I'm waiting for my kernel to
> finish rebuilding with CONFIG_DEBUG_ATOMIC_SLEEP which I'm hoping
> is the right thing to detect the bug in the existing code.
> I also wasn't quite sure how to give credit to the thorough analysis
> in the commit message of Sebastian's patch. I don't think we have
> a Whatever-by: tag to cover that, do we?
> And this doesn't include your GFP_KERNEL change, which should
> probably go in separately if you take this.
>
> drivers/net/ethernet/sfc/ef10.c | 22 +++++++++++++---------
> drivers/net/ethernet/sfc/efx_common.c | 2 +-
> drivers/net/ethernet/sfc/net_driver.h | 5 +++++
> drivers/net/ethernet/sfc/nic_common.h | 7 +++++++
> 4 files changed, 26 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
> index c9df2e96ebe4..b702ba5986dc 100644
> --- a/drivers/net/ethernet/sfc/ef10.c
> +++ b/drivers/net/ethernet/sfc/ef10.c
> @@ -1871,15 +1871,6 @@ static int efx_ef10_try_update_nic_stats_vf(struct efx_nic *efx)
>
> spin_unlock_bh(&efx->stats_lock);
>
> - if (in_interrupt()) {
> - /* If in atomic context, cannot update stats. Just update the
> - * software stats and return so the caller can continue.
> - */
> - spin_lock_bh(&efx->stats_lock);
> - efx_update_sw_stats(efx, stats);
> - return 0;
> - }
> -
> efx_ef10_get_stat_mask(efx, mask);
>
> rc = efx_nic_alloc_buffer(efx, &stats_buf, dma_len, GFP_ATOMIC);
> @@ -1938,6 +1929,18 @@ static size_t efx_ef10_update_stats_vf(struct efx_nic *efx, u64 *full_stats,
> return efx_ef10_update_stats_common(efx, full_stats, core_stats);
> }
>
> +static size_t efx_ef10_update_stats_atomic_vf(struct efx_nic *efx, u64 *full_stats,
> + struct rtnl_link_stats64 *core_stats)
> +{
> + struct efx_ef10_nic_data *nic_data = efx->nic_data;
> +
> + /* In atomic context, cannot update HW stats. Just update the
> + * software stats and return so the caller can continue.
> + */
> + efx_update_sw_stats(efx, nic_data->stats);
> + return efx_ef10_update_stats_common(efx, full_stats, core_stats);
> +}
> +
> static void efx_ef10_push_irq_moderation(struct efx_channel *channel)
> {
> struct efx_nic *efx = channel->efx;
> @@ -3998,6 +4001,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
> .finish_flr = efx_port_dummy_op_void,
> .describe_stats = efx_ef10_describe_stats,
> .update_stats = efx_ef10_update_stats_vf,
> + .update_stats_atomic = efx_ef10_update_stats_atomic_vf,
> .start_stats = efx_port_dummy_op_void,
> .pull_stats = efx_port_dummy_op_void,
> .stop_stats = efx_port_dummy_op_void,
> diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
> index c256db241570..72a3f0e09f52 100644
> --- a/drivers/net/ethernet/sfc/efx_common.c
> +++ b/drivers/net/ethernet/sfc/efx_common.c
> @@ -602,7 +602,7 @@ void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
> struct efx_nic *efx = netdev_priv(net_dev);
>
> spin_lock_bh(&efx->stats_lock);
> - efx->type->update_stats(efx, NULL, stats);
> + efx_nic_update_stats_atomic(efx, NULL, stats);
> spin_unlock_bh(&efx->stats_lock);
> }
>
> diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
> index 47aa753e64bd..9f7dfdf708cf 100644
> --- a/drivers/net/ethernet/sfc/net_driver.h
> +++ b/drivers/net/ethernet/sfc/net_driver.h
> @@ -1172,6 +1172,9 @@ struct efx_udp_tunnel {
> * @describe_stats: Describe statistics for ethtool
> * @update_stats: Update statistics not provided by event handling.
> * Either argument may be %NULL.
> + * @update_stats_atomic: Update statistics while in atomic context, if that
> + * is more limiting than @update_stats. Otherwise, leave %NULL and
> + * driver core will call @update_stats.
> * @start_stats: Start the regular fetching of statistics
> * @pull_stats: Pull stats from the NIC and wait until they arrive.
> * @stop_stats: Stop the regular fetching of statistics
> @@ -1316,6 +1319,8 @@ struct efx_nic_type {
> size_t (*describe_stats)(struct efx_nic *efx, u8 *names);
> size_t (*update_stats)(struct efx_nic *efx, u64 *full_stats,
> struct rtnl_link_stats64 *core_stats);
> + size_t (*update_stats_atomic)(struct efx_nic *efx, u64 *full_stats,
> + struct rtnl_link_stats64 *core_stats);
> void (*start_stats)(struct efx_nic *efx);
> void (*pull_stats)(struct efx_nic *efx);
> void (*stop_stats)(struct efx_nic *efx);
> diff --git a/drivers/net/ethernet/sfc/nic_common.h b/drivers/net/ethernet/sfc/nic_common.h
> index 82271f0b8627..b9cafe9cd568 100644
> --- a/drivers/net/ethernet/sfc/nic_common.h
> +++ b/drivers/net/ethernet/sfc/nic_common.h
> @@ -244,6 +244,13 @@ void efx_nic_update_stats(const struct efx_hw_stat_desc *desc, size_t count,
> const unsigned long *mask, u64 *stats,
> const void *dma_buf, bool accumulate);
> void efx_nic_fix_nodesc_drop_stat(struct efx_nic *efx, u64 *stat);
> +static inline size_t efx_nic_update_stats_atomic(struct efx_nic *efx, u64 *full_stats,
> + struct rtnl_link_stats64 *core_stats)
> +{
> + if (efx->type->update_stats_atomic)
> + return efx->type->update_stats_atomic(efx, full_stats, core_stats);
> + return efx->type->update_stats(efx, full_stats, core_stats);
> +}
>
> #define EFX_MAX_FLUSH_TIME 5000
>
--
Martin Habets <[email protected]>
> On Mon, Sep 28 2020 at 21:05, Edward Cree wrote:
>> Only compile-tested so far, because I'm waiting for my kernel to
>> finish rebuilding with CONFIG_DEBUG_ATOMIC_SLEEP
I've now tested and confirmed that the might_sleep warning goes
away with this patch.
Thomas, do you want to pull it into v2 of your series, or should
I submit it separately to David?
-ed
On Tue, Sep 29 2020 at 16:15, Edward Cree wrote:
>> On Mon, Sep 28 2020 at 21:05, Edward Cree wrote:
>>> Only compile-tested so far, because I'm waiting for my kernel to
>>> finish rebuilding with CONFIG_DEBUG_ATOMIC_SLEEP
>
> I've now tested and confirmed that the might_sleep warning goes
> away with this patch.
>
> Thomas, do you want to pull it into v2 of your series, or should
> I submit it separately to David?
I have it already, but if Dave applies it right away, that's fine.
Thanks,
tglx