2024-03-25 17:23:05

by Ben Greear

[permalink] [raw]
Subject: Re: [PATCH] wifi: mac80211: Ensure links are cleaned up when driver fails.

On 11/10/23 16:10, [email protected] wrote:
> From: Ben Greear <[email protected]>
>
> On cleanup paths, links need to be deleted even if the driver fails
> to do so. Add a flag to cause driver errors to be ignored in
> appropriate cases.
>
> This appears to fix some kernel warnings and crashes.

Hello Johannes,

Any interest in this patch?

Thanks,
Ben

>
> Signed-off-by: Ben Greear <[email protected]>
> ---
> net/mac80211/cfg.c | 4 ++--
> net/mac80211/ieee80211_i.h | 7 ++++---
> net/mac80211/iface.c | 2 +-
> net/mac80211/link.c | 30 +++++++++++++++++++-----------
> net/mac80211/mlme.c | 29 +++++++++++++++--------------
> 5 files changed, 41 insertions(+), 31 deletions(-)
>
> diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
> index 08c284c4984a..1c2b88429ce4 100644
> --- a/net/mac80211/cfg.c
> +++ b/net/mac80211/cfg.c
> @@ -4894,7 +4894,7 @@ static int ieee80211_add_intf_link(struct wiphy *wiphy,
> if (wdev->use_4addr)
> return -EOPNOTSUPP;
>
> - return ieee80211_vif_set_links(sdata, wdev->valid_links, 0);
> + return ieee80211_vif_set_links(sdata, wdev->valid_links, 0, false);
> }
>
> static void ieee80211_del_intf_link(struct wiphy *wiphy,
> @@ -4905,7 +4905,7 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy,
>
> lockdep_assert_wiphy(sdata->local->hw.wiphy);
>
> - ieee80211_vif_set_links(sdata, wdev->valid_links, 0);
> + ieee80211_vif_set_links(sdata, wdev->valid_links, 0, false);
> }
>
> static int sta_add_link_station(struct ieee80211_local *local,
> diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
> index 9ce7f7d2b573..34412ac5db71 100644
> --- a/net/mac80211/ieee80211_i.h
> +++ b/net/mac80211/ieee80211_i.h
> @@ -2040,10 +2040,11 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
> struct ieee80211_bss_conf *link_conf);
> void ieee80211_link_stop(struct ieee80211_link_data *link);
> int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata,
> - u16 new_links, u16 dormant_links);
> -static inline void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata)
> + u16 new_links, u16 dormant_links, bool ignore_driver_failures);
> +static inline void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata,
> + bool ignore_driver_failures)
> {
> - ieee80211_vif_set_links(sdata, 0, 0);
> + ieee80211_vif_set_links(sdata, 0, 0, ignore_driver_failures);
> }
>
> /* tx handling */
> diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
> index 0c7ec6ef9136..c71e6c786b28 100644
> --- a/net/mac80211/iface.c
> +++ b/net/mac80211/iface.c
> @@ -864,7 +864,7 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
> if (ieee80211_vif_is_mesh(&sdata->vif))
> ieee80211_mesh_teardown_sdata(sdata);
>
> - ieee80211_vif_clear_links(sdata);
> + ieee80211_vif_clear_links(sdata, true);
> ieee80211_link_stop(&sdata->deflink);
> }
>
> diff --git a/net/mac80211/link.c b/net/mac80211/link.c
> index 2dc0f46ee053..19c085a143e4 100644
> --- a/net/mac80211/link.c
> +++ b/net/mac80211/link.c
> @@ -225,7 +225,9 @@ static void ieee80211_set_vif_links_bitmaps(struct ieee80211_sub_if_data *sdata,
> if (sdata->vif.active_links)
> break;
> sdata->vif.active_links = valid_links & ~dormant_links;
> - WARN_ON(hweight16(sdata->vif.active_links) > 1);
> + if (WARN_ON(hweight16(sdata->vif.active_links) > 1))
> + sdata_err(sdata, "ERROR: set-vif-links-bitmaps: too many active-links, valid_links: 0x%x dormant_links: 0x%x active_links: 0x%x\n",
> + valid_links, dormant_links, sdata->vif.active_links);
> break;
> default:
> WARN_ON(1);
> @@ -234,7 +236,8 @@ static void ieee80211_set_vif_links_bitmaps(struct ieee80211_sub_if_data *sdata,
>
> static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata,
> struct link_container **to_free,
> - u16 new_links, u16 dormant_links)
> + u16 new_links, u16 dormant_links,
> + bool ignore_driver_failures)
> {
> u16 old_links = sdata->vif.valid_links;
> u16 old_active = sdata->vif.active_links;
> @@ -325,13 +328,17 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata,
> }
>
> if (ret) {
> - /* restore config */
> - memcpy(sdata->link, old_data, sizeof(old_data));
> - memcpy(sdata->vif.link_conf, old, sizeof(old));
> - ieee80211_set_vif_links_bitmaps(sdata, old_links, dormant_links);
> - /* and free (only) the newly allocated links */
> - memset(to_free, 0, sizeof(links));
> - goto free;
> + sdata_info(sdata, "driver error applying links: %d Restoring old configuration, old_links: 0x%x dormant_links: 0x%x requested new_links: 0x%x ignore-driver-failures: %d\n",
> + ret, old_links, dormant_links, new_links, ignore_driver_failures);
> + if (!ignore_driver_failures) {
> + /* restore config */
> + memcpy(sdata->link, old_data, sizeof(old_data));
> + memcpy(sdata->vif.link_conf, old, sizeof(old));
> + ieee80211_set_vif_links_bitmaps(sdata, old_links, dormant_links);
> + /* and free (only) the newly allocated links */
> + memset(to_free, 0, sizeof(links));
> + goto free;
> + }
> }
>
> /* use deflink/bss_conf again if and only if there are no more links */
> @@ -352,13 +359,14 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata,
> }
>
> int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata,
> - u16 new_links, u16 dormant_links)
> + u16 new_links, u16 dormant_links,
> + bool ignore_driver_failures)
> {
> struct link_container *links[IEEE80211_MLD_MAX_NUM_LINKS];
> int ret;
>
> ret = ieee80211_vif_update_links(sdata, links, new_links,
> - dormant_links);
> + dormant_links, ignore_driver_failures);
> ieee80211_free_links(sdata, links);
>
> return ret;
> diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
> index bbb337005766..21ae23531f5f 100644
> --- a/net/mac80211/mlme.c
> +++ b/net/mac80211/mlme.c
> @@ -3215,7 +3215,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
> memset(&sdata->u.mgd.ttlm_info, 0,
> sizeof(sdata->u.mgd.ttlm_info));
> wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work);
> - ieee80211_vif_set_links(sdata, 0, 0);
> + ieee80211_vif_set_links(sdata, 0, 0, true);
> }
>
> static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
> @@ -3379,7 +3379,8 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
>
> lockdep_assert_wiphy(sdata->local->hw.wiphy);
>
> - if (WARN_ON_ONCE(ieee80211_vif_is_mld(&sdata->vif)))
> + if (WARN_ONCE(ieee80211_vif_is_mld(&sdata->vif),
> + "mgd-probe-ap called for MLD station: %s", sdata->dev->name))
> return;
>
> if (!ieee80211_sdata_running(sdata))
> @@ -3654,7 +3655,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
> sdata->u.mgd.flags = 0;
>
> ieee80211_link_release_channel(&sdata->deflink);
> - ieee80211_vif_set_links(sdata, 0, 0);
> + ieee80211_vif_set_links(sdata, 0, 0, true);
> }
>
> cfg80211_put_bss(sdata->local->hw.wiphy, auth_data->bss);
> @@ -3711,7 +3712,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
> }
>
> ieee80211_link_release_channel(&sdata->deflink);
> - ieee80211_vif_set_links(sdata, 0, 0);
> + ieee80211_vif_set_links(sdata, 0, 0, true);
> }
>
> kfree(assoc_data);
> @@ -5268,7 +5269,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
> }
> }
>
> - ieee80211_vif_set_links(sdata, valid_links, dormant_links);
> + ieee80211_vif_set_links(sdata, valid_links, dormant_links, false);
> }
>
> for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
> @@ -5348,7 +5349,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
> }
>
> /* links might have changed due to rejected ones, set them again */
> - ieee80211_vif_set_links(sdata, valid_links, dormant_links);
> + ieee80211_vif_set_links(sdata, valid_links, dormant_links, false);
>
> rate_control_rate_init(sta);
>
> @@ -5915,7 +5916,7 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy,
> new_dormant_links = sdata->vif.dormant_links & ~sdata->u.mgd.removed_links;
>
> ret = ieee80211_vif_set_links(sdata, new_valid_links,
> - new_dormant_links);
> + new_dormant_links, false);
> if (ret)
> sdata_info(sdata, "Failed setting valid links\n");
>
> @@ -6045,12 +6046,12 @@ static void ieee80211_tid_to_link_map_work(struct wiphy *wiphy,
> return;
> }
>
> - ieee80211_vif_set_links(sdata, sdata->vif.valid_links, 0);
> + ieee80211_vif_set_links(sdata, sdata->vif.valid_links, 0, false);
> new_active_links = BIT(ffs(new_active_links) - 1);
> ieee80211_set_active_links(&sdata->vif, new_active_links);
>
> ret = ieee80211_vif_set_links(sdata, sdata->vif.valid_links,
> - new_dormant_links);
> + new_dormant_links, false);
>
> sdata->u.mgd.ttlm_info.active = true;
> sdata->u.mgd.ttlm_info.switch_time = 0;
> @@ -6165,7 +6166,7 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata,
> */
> ret = ieee80211_vif_set_links(sdata,
> sdata->vif.valid_links,
> - 0);
> + 0, false);
> if (ret) {
> sdata_info(sdata, "Failed setting valid/dormant links\n");
> return;
> @@ -7229,12 +7230,12 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
> mlo = true;
> if (WARN_ON(!ap_mld_addr))
> return -EINVAL;
> - err = ieee80211_vif_set_links(sdata, BIT(link_id), 0);
> + err = ieee80211_vif_set_links(sdata, BIT(link_id), 0, false);
> } else {
> if (WARN_ON(ap_mld_addr))
> return -EINVAL;
> ap_mld_addr = cbss->bssid;
> - err = ieee80211_vif_set_links(sdata, 0, 0);
> + err = ieee80211_vif_set_links(sdata, 0, 0, false);
> link_id = 0;
> mlo = false;
> }
> @@ -7386,7 +7387,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
>
> out_err:
> ieee80211_link_release_channel(&sdata->deflink);
> - ieee80211_vif_set_links(sdata, 0, 0);
> + ieee80211_vif_set_links(sdata, 0, 0, true);
> return err;
> }
>
> @@ -8037,7 +8038,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
> }
>
> /* if there was no authentication, set up the link */
> - err = ieee80211_vif_set_links(sdata, BIT(assoc_link_id), 0);
> + err = ieee80211_vif_set_links(sdata, BIT(assoc_link_id), 0, false);
> if (err)
> goto err_clear;
> } else {

--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com




2024-03-25 18:10:32

by Johannes Berg

[permalink] [raw]
Subject: Re: [PATCH] wifi: mac80211: Ensure links are cleaned up when driver fails.

On Mon, 2024-03-25 at 09:10 -0700, Ben Greear wrote:
> On 11/10/23 16:10, [email protected] wrote:
> > From: Ben Greear <[email protected]>
> >
> > On cleanup paths, links need to be deleted even if the driver fails
> > to do so. Add a flag to cause driver errors to be ignored in
> > appropriate cases.
> >
> > This appears to fix some kernel warnings and crashes.
>
> Hello Johannes,
>
> Any interest in this patch?

Well, you threw a bunch of unrelated stuff into it, and didn't even
really explain why it's needed ... so not really?

johannes

2024-03-25 18:20:15

by Ben Greear

[permalink] [raw]
Subject: Re: [PATCH] wifi: mac80211: Ensure links are cleaned up when driver fails.

On 3/25/24 10:30, Johannes Berg wrote:
> On Mon, 2024-03-25 at 09:10 -0700, Ben Greear wrote:
>> On 11/10/23 16:10, [email protected] wrote:
>>> From: Ben Greear <[email protected]>
>>>
>>> On cleanup paths, links need to be deleted even if the driver fails
>>> to do so. Add a flag to cause driver errors to be ignored in
>>> appropriate cases.
>>>
>>> This appears to fix some kernel warnings and crashes.
>>
>> Hello Johannes,
>>
>> Any interest in this patch?
>
> Well, you threw a bunch of unrelated stuff into it, and didn't even
> really explain why it's needed ... so not really?

It is needed because if FW crashes while you are trying to remove links, then link
removal would fail, which causes mac80211 to not clean up its links. In case where you
are trying to delete the station, this causes un-cleaned links which caused crashes
back when I was creating this patch. My patch allows always cleaning up the links
regardless of driver errors in the teardown paths.

Always possible some intervening changes made this less of a problem, especially since
MLO is disabled for be200 in upstream code anyway now.

I can remove the extra logging if you are otherwise OK with the approach
but don't want the logs.

Thanks,
Ben

>
> johannes
>

--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com



2024-03-25 18:24:42

by Johannes Berg

[permalink] [raw]
Subject: Re: [PATCH] wifi: mac80211: Ensure links are cleaned up when driver fails.

On Mon, 2024-03-25 at 10:40 -0700, Ben Greear wrote:
> It is needed because if FW crashes while you are trying to remove links, then link
> removal would fail, which causes mac80211 to not clean up its links.

OK. It should get back to some legal state after recovery though?

> In case where you
> are trying to delete the station, this causes un-cleaned links which caused crashes
> back when I was creating this patch. My patch allows always cleaning up the links
> regardless of driver errors in the teardown paths.

Seems potentially more like driver errors. Or perhaps we should just
ignore driver errors entirely?

> Always possible some intervening changes made this less of a problem, especially since
> MLO is disabled for be200 in upstream code anyway now.

Well that's just temporary, but we've also done a lot of work on FW
error recovery, though likely unrelated to this particular issue.

> I can remove the extra logging if you are otherwise OK with the approach
> but don't want the logs.

Well it'd be nice to actually see what crashed, and maybe it should
really be driver fixes? I don't really understand why mac80211 would
crash on failure of link removal.

johannes

2024-03-25 20:39:38

by Ben Greear

[permalink] [raw]
Subject: Re: [PATCH] wifi: mac80211: Ensure links are cleaned up when driver fails.

On 3/25/24 10:43, Johannes Berg wrote:
> On Mon, 2024-03-25 at 10:40 -0700, Ben Greear wrote:
>> It is needed because if FW crashes while you are trying to remove links, then link
>> removal would fail, which causes mac80211 to not clean up its links.
>
> OK. It should get back to some legal state after recovery though?

I assume mac80211 will re-add links on re-association/recovery.

>
>> In case where you
>> are trying to delete the station, this causes un-cleaned links which caused crashes
>> back when I was creating this patch. My patch allows always cleaning up the links
>> regardless of driver errors in the teardown paths.
>
> Seems potentially more like driver errors. Or perhaps we should just
> ignore driver errors entirely?

I guess you need errors reported when building links, but maybe removal always
has to succeed. Last I looked at iwlwifi, it returned errors on link
removal when FW crashed as it was trying to remove them.

>> Always possible some intervening changes made this less of a problem, especially since
>> MLO is disabled for be200 in upstream code anyway now.
>
> Well that's just temporary, but we've also done a lot of work on FW
> error recovery, though likely unrelated to this particular issue.
>
>> I can remove the extra logging if you are otherwise OK with the approach
>> but don't want the logs.
>
> Well it'd be nice to actually see what crashed, and maybe it should
> really be driver fixes? I don't really understand why mac80211 would
> crash on failure of link removal.

6.6-ish with old be200 firmware + MLO was a bad experience. There were multiple crashes all
over the place. This was one of them, but I don't recall exactly how it crashed, just
something about un-deleted (or re-built) links when rest of the code expected them to be deleted since
STA was going away.

This below is the real change in the patch. It would keep us from re-building links
in teardown paths. Rest of the patch is passing in the knowledge of whether we can ignore
the failure or not.

@@ -325,13 +328,17 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata,
}
if (ret) {
- /* restore config */
- memcpy(sdata->link, old_data, sizeof(old_data));
- memcpy(sdata->vif.link_conf, old, sizeof(old));
- ieee80211_set_vif_links_bitmaps(sdata, old_links, dormant_links);
- /* and free (only) the newly allocated links */
- memset(to_free, 0, sizeof(links));
- goto free;
+ sdata_info(sdata, "driver error applying links: %d Restoring old configuration, old_links: 0x%x dormant_links: 0x%x requested new_links: 0x%x
ignore-driver-failures: %d\n",
+ ret, old_links, dormant_links, new_links, ignore_driver_failures);
+ if (!ignore_driver_failures) {
+ /* restore config */
+ memcpy(sdata->link, old_data, sizeof(old_data));
+ memcpy(sdata->vif.link_conf, old, sizeof(old));
+ ieee80211_set_vif_links_bitmaps(sdata, old_links, dormant_links);
+ /* and free (only) the newly allocated links */
+ memset(to_free, 0, sizeof(links));
+ goto free;
+ }
}

Thanks,
Ben

--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com