2023-01-17 17:42:27

by Arnd Bergmann

[permalink] [raw]
Subject: [PATCH] mlx5: reduce stack usage in mlx5_setup_tc

From: Arnd Bergmann <[email protected]>

Clang warns about excessive stack usage on 32-bit targets:

drivers/net/ethernet/mellanox/mlx5/core/en_main.c:3597:12: error: stack frame size (1184) exceeds limit (1024) in 'mlx5e_setup_tc' [-Werror,-Wframe-larger-than]
static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,

It turns out that both the mlx5e_setup_tc_mqprio_dcb() function and
the mlx5e_safe_switch_params() function it calls have a copy of
'struct mlx5e_params' on the stack, and this structure is fairly
large.

Use dynamic allocation for both.

Signed-off-by: Arnd Bergmann <[email protected]>
---
.../net/ethernet/mellanox/mlx5/core/en_main.c | 36 ++++++++++++-------
1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 6bb0fdaa5efa..e5198c26e383 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2993,37 +2993,42 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
return err;
}

-int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
+noinline_for_stack int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
struct mlx5e_params *params,
mlx5e_fp_preactivate preactivate,
void *context, bool reset)
{
- struct mlx5e_channels new_chs = {};
+ struct mlx5e_channels *new_chs;
int err;

reset &= test_bit(MLX5E_STATE_OPENED, &priv->state);
if (!reset)
return mlx5e_switch_priv_params(priv, params, preactivate, context);

- new_chs.params = *params;
+ new_chs = kzalloc(sizeof(*new_chs), GFP_KERNEL);
+ if (!new_chs)
+ return -ENOMEM;
+ new_chs->params = *params;

- mlx5e_selq_prepare_params(&priv->selq, &new_chs.params);
+ mlx5e_selq_prepare_params(&priv->selq, &new_chs->params);

- err = mlx5e_open_channels(priv, &new_chs);
+ err = mlx5e_open_channels(priv, new_chs);
if (err)
goto err_cancel_selq;

- err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context);
+ err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context);
if (err)
goto err_close;

+ kfree(new_chs);
return 0;

err_close:
- mlx5e_close_channels(&new_chs);
+ mlx5e_close_channels(new_chs);

err_cancel_selq:
mlx5e_selq_cancel(&priv->selq);
+ kfree(new_chs);
return err;
}

@@ -3419,10 +3424,10 @@ static void mlx5e_params_mqprio_reset(struct mlx5e_params *params)
mlx5e_params_mqprio_dcb_set(params, 1);
}

-static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
+static noinline_for_stack int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
struct tc_mqprio_qopt *mqprio)
{
- struct mlx5e_params new_params;
+ struct mlx5e_params *new_params;
u8 tc = mqprio->num_tc;
int err;

@@ -3431,10 +3436,13 @@ static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
if (tc && tc != MLX5E_MAX_NUM_TC)
return -EINVAL;

- new_params = priv->channels.params;
- mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1);
+ new_params = kmemdup(&priv->channels.params,
+ sizeof(priv->channels.params), GFP_KERNEL);
+ if (!new_params)
+ return -ENOMEM;
+ mlx5e_params_mqprio_dcb_set(new_params, tc ? tc : 1);

- err = mlx5e_safe_switch_params(priv, &new_params,
+ err = mlx5e_safe_switch_params(priv, new_params,
mlx5e_num_channels_changed_ctx, NULL, true);

if (!err && priv->mqprio_rl) {
@@ -3445,6 +3453,8 @@ static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,

priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
mlx5e_get_dcb_num_tc(&priv->channels.params));
+
+ kfree(new_params);
return err;
}

@@ -3533,7 +3543,7 @@ static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev
return rl;
}

-static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
+static noinline_for_stack int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
struct tc_mqprio_qopt_offload *mqprio)
{
mlx5e_fp_preactivate preactivate;
--
2.39.0


2023-01-17 18:14:59

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH] mlx5: reduce stack usage in mlx5_setup_tc

On Tue, Jan 17, 2023 at 9:28 AM Arnd Bergmann <[email protected]> wrote:
>
> From: Arnd Bergmann <[email protected]>
>
> Clang warns about excessive stack usage on 32-bit targets:
>
> drivers/net/ethernet/mellanox/mlx5/core/en_main.c:3597:12: error: stack frame size (1184) exceeds limit (1024) in 'mlx5e_setup_tc' [-Werror,-Wframe-larger-than]
> static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
>
> It turns out that both the mlx5e_setup_tc_mqprio_dcb() function and
> the mlx5e_safe_switch_params() function it calls have a copy of
> 'struct mlx5e_params' on the stack, and this structure is fairly
> large.

The logic changes LGTM, but were the noinline_for_stack left behind
from earlier local revisions? Do we still need those if these structs
have been moved from the stack?

>
> Use dynamic allocation for both.
>
> Signed-off-by: Arnd Bergmann <[email protected]>
> ---
> .../net/ethernet/mellanox/mlx5/core/en_main.c | 36 ++++++++++++-------
> 1 file changed, 23 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 6bb0fdaa5efa..e5198c26e383 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -2993,37 +2993,42 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
> return err;
> }
>
> -int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
> +noinline_for_stack int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
> struct mlx5e_params *params,
> mlx5e_fp_preactivate preactivate,
> void *context, bool reset)
> {
> - struct mlx5e_channels new_chs = {};
> + struct mlx5e_channels *new_chs;
> int err;
>
> reset &= test_bit(MLX5E_STATE_OPENED, &priv->state);
> if (!reset)
> return mlx5e_switch_priv_params(priv, params, preactivate, context);
>
> - new_chs.params = *params;
> + new_chs = kzalloc(sizeof(*new_chs), GFP_KERNEL);
> + if (!new_chs)
> + return -ENOMEM;
> + new_chs->params = *params;
>
> - mlx5e_selq_prepare_params(&priv->selq, &new_chs.params);
> + mlx5e_selq_prepare_params(&priv->selq, &new_chs->params);
>
> - err = mlx5e_open_channels(priv, &new_chs);
> + err = mlx5e_open_channels(priv, new_chs);
> if (err)
> goto err_cancel_selq;
>
> - err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context);
> + err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context);
> if (err)
> goto err_close;
>
> + kfree(new_chs);
> return 0;
>
> err_close:
> - mlx5e_close_channels(&new_chs);
> + mlx5e_close_channels(new_chs);
>
> err_cancel_selq:
> mlx5e_selq_cancel(&priv->selq);
> + kfree(new_chs);
> return err;
> }
>
> @@ -3419,10 +3424,10 @@ static void mlx5e_params_mqprio_reset(struct mlx5e_params *params)
> mlx5e_params_mqprio_dcb_set(params, 1);
> }
>
> -static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
> +static noinline_for_stack int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
> struct tc_mqprio_qopt *mqprio)
> {
> - struct mlx5e_params new_params;
> + struct mlx5e_params *new_params;
> u8 tc = mqprio->num_tc;
> int err;
>
> @@ -3431,10 +3436,13 @@ static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
> if (tc && tc != MLX5E_MAX_NUM_TC)
> return -EINVAL;
>
> - new_params = priv->channels.params;
> - mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1);
> + new_params = kmemdup(&priv->channels.params,
> + sizeof(priv->channels.params), GFP_KERNEL);
> + if (!new_params)
> + return -ENOMEM;
> + mlx5e_params_mqprio_dcb_set(new_params, tc ? tc : 1);
>
> - err = mlx5e_safe_switch_params(priv, &new_params,
> + err = mlx5e_safe_switch_params(priv, new_params,
> mlx5e_num_channels_changed_ctx, NULL, true);
>
> if (!err && priv->mqprio_rl) {
> @@ -3445,6 +3453,8 @@ static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
>
> priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
> mlx5e_get_dcb_num_tc(&priv->channels.params));
> +
> + kfree(new_params);
> return err;
> }
>
> @@ -3533,7 +3543,7 @@ static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev
> return rl;
> }
>
> -static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
> +static noinline_for_stack int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
> struct tc_mqprio_qopt_offload *mqprio)
> {
> mlx5e_fp_preactivate preactivate;
> --
> 2.39.0
>


--
Thanks,
~Nick Desaulniers

2023-01-17 19:14:47

by Tariq Toukan

[permalink] [raw]
Subject: Re: [PATCH] mlx5: reduce stack usage in mlx5_setup_tc



On 17/01/2023 19:28, Arnd Bergmann wrote:
> From: Arnd Bergmann <[email protected]>
>
> Clang warns about excessive stack usage on 32-bit targets:
>
> drivers/net/ethernet/mellanox/mlx5/core/en_main.c:3597:12: error: stack frame size (1184) exceeds limit (1024) in 'mlx5e_setup_tc' [-Werror,-Wframe-larger-than]
> static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
>
> It turns out that both the mlx5e_setup_tc_mqprio_dcb() function and
> the mlx5e_safe_switch_params() function it calls have a copy of
> 'struct mlx5e_params' on the stack, and this structure is fairly
> large.
>
> Use dynamic allocation for both.
>
> Signed-off-by: Arnd Bergmann <[email protected]>
> ---
> .../net/ethernet/mellanox/mlx5/core/en_main.c | 36 ++++++++++++-------
> 1 file changed, 23 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 6bb0fdaa5efa..e5198c26e383 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -2993,37 +2993,42 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
> return err;
> }
>
> -int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
> +noinline_for_stack int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
> struct mlx5e_params *params,
> mlx5e_fp_preactivate preactivate,
> void *context, bool reset)
> {
> - struct mlx5e_channels new_chs = {};
> + struct mlx5e_channels *new_chs;
> int err;
>
> reset &= test_bit(MLX5E_STATE_OPENED, &priv->state);
> if (!reset)
> return mlx5e_switch_priv_params(priv, params, preactivate, context);
>
> - new_chs.params = *params;
> + new_chs = kzalloc(sizeof(*new_chs), GFP_KERNEL);
> + if (!new_chs)
> + return -ENOMEM;
> + new_chs->params = *params;
>
> - mlx5e_selq_prepare_params(&priv->selq, &new_chs.params);
> + mlx5e_selq_prepare_params(&priv->selq, &new_chs->params);
>
> - err = mlx5e_open_channels(priv, &new_chs);
> + err = mlx5e_open_channels(priv, new_chs);
> if (err)
> goto err_cancel_selq;
>
> - err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context);
> + err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context);
> if (err)
> goto err_close;
>
> + kfree(new_chs);
> return 0;
>
> err_close:
> - mlx5e_close_channels(&new_chs);
> + mlx5e_close_channels(new_chs);
>
> err_cancel_selq:
> mlx5e_selq_cancel(&priv->selq);
> + kfree(new_chs);
> return err;
> }
>
> @@ -3419,10 +3424,10 @@ static void mlx5e_params_mqprio_reset(struct mlx5e_params *params)
> mlx5e_params_mqprio_dcb_set(params, 1);
> }
>
> -static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
> +static noinline_for_stack int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
> struct tc_mqprio_qopt *mqprio)
> {
> - struct mlx5e_params new_params;
> + struct mlx5e_params *new_params;
> u8 tc = mqprio->num_tc;
> int err;
>
> @@ -3431,10 +3436,13 @@ static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
> if (tc && tc != MLX5E_MAX_NUM_TC)
> return -EINVAL;
>
> - new_params = priv->channels.params;
> - mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1);
> + new_params = kmemdup(&priv->channels.params,
> + sizeof(priv->channels.params), GFP_KERNEL);
> + if (!new_params)
> + return -ENOMEM;
> + mlx5e_params_mqprio_dcb_set(new_params, tc ? tc : 1);
>
> - err = mlx5e_safe_switch_params(priv, &new_params,
> + err = mlx5e_safe_switch_params(priv, new_params,
> mlx5e_num_channels_changed_ctx, NULL, true);
>

Is this change really required, even after new_chs are dynamically
allocated?
As this code pattern of static local new_params repeats in all callers
of mlx5e_safe_switch_params, let's not change this one alone if not
necessary.


Same for the noinline_for_stack. Are they really needed even after using
dynamic allocation for new_chs?

> if (!err && priv->mqprio_rl) {
> @@ -3445,6 +3453,8 @@ static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
>
> priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
> mlx5e_get_dcb_num_tc(&priv->channels.params));
> +
> + kfree(new_params);
> return err;
> }
>
> @@ -3533,7 +3543,7 @@ static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev
> return rl;
> }
>
> -static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
> +static noinline_for_stack int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
> struct tc_mqprio_qopt_offload *mqprio)
> {
> mlx5e_fp_preactivate preactivate;

2023-01-17 21:57:04

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH] mlx5: reduce stack usage in mlx5_setup_tc

On Tue, Jan 17, 2023, at 18:46, Tariq Toukan wrote:
> On 17/01/2023 19:28, Arnd Bergmann wrote:
>> From: Arnd Bergmann <[email protected]>
>>
>> Clang warns about excessive stack usage on 32-bit targets:
>>
>> drivers/net/ethernet/mellanox/mlx5/core/en_main.c:3597:12: error: stack frame size (1184) exceeds limit (1024) in 'mlx5e_setup_tc' [-Werror,-Wframe-larger-than]
>> static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
>>
>> It turns out that both the mlx5e_setup_tc_mqprio_dcb() function and
>> the mlx5e_safe_switch_params() function it calls have a copy of
>> 'struct mlx5e_params' on the stack, and this structure is fairly
>> large.
>>
>> Use dynamic allocation for both.
>>

>>
>> - err = mlx5e_safe_switch_params(priv, &new_params,
>> + err = mlx5e_safe_switch_params(priv, new_params,
>> mlx5e_num_channels_changed_ctx, NULL, true);
>>
>
> Is this change really required, even after new_chs are dynamically
> allocated?
> As this code pattern of static local new_params repeats in all callers
> of mlx5e_safe_switch_params, let's not change this one alone if not
> necessary.

I'm not sure any more now, I actually did the patch a few weeks ago
and only now came across it while going through my backlog.

Generally speaking, the 'new_params' structure on the stack is
too large, but I no longer see warnings after my patch.

> Same for the noinline_for_stack. Are they really needed even after using
> dynamic allocation for new_chs?

I've reverted both of those hunks now, let me try reproducing the
original randconfig reports and see what still happens.

Arnd