In case of buggy firmware, brcmfmac may perform a hardware reset. If during
reset and subsequent probing an early failure occurs, a memory region is
accidentally double-freed. With hardened memory allocation enabled, this error
will be detected.
- return early where appropriate to skip unnecessary clean-up.
- set '.freezer' pointer to NULL to prevent double-freeing under possible
other circumstances and to re-align result under various different
behaviors of memory allocation freeing.
- correctly claim host on func1 for disabling func2.
- after reset, do not initiate probing immediately, but rely on events.
Given a firmware crash, function 'brcmf_sdio_bus_reset' is called. It calls
'brcmf_sdiod_remove', then follows up with 'brcmf_sdiod_probe' to reinitialize
the hardware. If 'brcmf_sdiod_probe' fails to "set F1 blocksize", it exits
early, which includes calling 'brcmf_sdiod_remove'. In both cases
'brcmf_sdiod_freezer_detach' is called to free allocated '.freezer', which
has not yet been re-allocated the second time.
Stacktrace of (failing) hardware reset after firmware-crash:
Code: b9402b82 8b0202c0 eb1a02df 54000041 (d4210000)
ret_from_fork+0x10/0x20
kthread+0x154/0x160
worker_thread+0x188/0x504
process_one_work+0x1f4/0x490
brcmf_core_bus_reset+0x34/0x44 [brcmfmac]
brcmf_sdio_bus_reset+0x68/0xc0 [brcmfmac]
brcmf_sdiod_probe+0x170/0x21c [brcmfmac]
brcmf_sdiod_remove+0x48/0xc0 [brcmfmac]
kfree+0x210/0x220
__slab_free+0x58/0x40c
Call trace:
x2 : 0000000000000040 x1 : fffffc00002d2b80 x0 : ffff00000b4aee40
x5 : ffff8000013fa728 x4 : 0000000000000001 x3 : ffff00000b4aee00
x8 : ffff800009967ce0 x7 : ffff8000099bfce0 x6 : 00000006f8005d01
x11: ffff8000099bfce0 x10: 00000000fffff000 x9 : ffff8000083401d0
x14: 0000000000000000 x13: 657a69736b636f6c x12: 6220314620746573
x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000030
x20: fffffc00002d2ba0 x19: fffffc00002d2b80 x18: 0000000000000000
x23: ffff00000b4aee00 x22: ffff00000b4aee00 x21: 0000000000000001
x26: ffff00000b4aee00 x25: ffff0000f7753705 x24: 000000000001288a
x29: ffff80000a22bbf0 x28: ffff000000401200 x27: 000000008020001a
sp : ffff80000a22bbf0
lr : kfree+0x210/0x220
pc : __slab_free+0x58/0x40c
pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
Workqueue: events brcmf_core_bus_reset [brcmfmac]
Hardware name: Pine64 Pinebook Pro (DT)
CPU: 2 PID: 639 Comm: kworker/2:2 Tainted: G C 5.16.0-0.bpo.4-arm64 #1 Debian 5.16.12-1~bpo11+1
nvmem_rockchip_efuse industrialio_triggered_buffer videodev snd_soc_core snd_pcm_dmaengine kfifo_buf snd_pcm io_domain mc industrialio mt>
Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_seq_device nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reje>
Internal error: Oops - BUG: 0 [#1] SMP
kernel BUG at mm/slub.c:379!
Signed-off-by: Danny van Heumen <[email protected]>
---
.../wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 13 +++++--------
.../net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 10 +---------
2 files changed, 6 insertions(+), 17 deletions(-)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index ac02244a6fdf..414ee21f42e3 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -802,6 +802,7 @@ static void brcmf_sdiod_freezer_detach(struct brcmf_sdio_dev *sdiodev)
if (sdiodev->freezer) {
WARN_ON(atomic_read(&sdiodev->freezer->freezing));
kfree(sdiodev->freezer);
+ sdiodev->freezer = NULL;
}
}
@@ -875,13 +876,9 @@ int brcmf_sdiod_remove(struct brcmf_sdio_dev *sdiodev)
brcmf_sdiod_freezer_detach(sdiodev);
- /* Disable Function 2 */
- sdio_claim_host(sdiodev->func2);
- sdio_disable_func(sdiodev->func2);
- sdio_release_host(sdiodev->func2);
-
- /* Disable Function 1 */
+ /* Disable functions 2 then 1. */
sdio_claim_host(sdiodev->func1);
+ sdio_disable_func(sdiodev->func2);
sdio_disable_func(sdiodev->func1);
sdio_release_host(sdiodev->func1);
@@ -911,7 +908,7 @@ int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
if (ret) {
brcmf_err("Failed to set F1 blocksize\n");
sdio_release_host(sdiodev->func1);
- goto out;
+ return ret;
}
switch (sdiodev->func2->device) {
case SDIO_DEVICE_ID_BROADCOM_CYPRESS_4373:
@@ -933,7 +930,7 @@ int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
if (ret) {
brcmf_err("Failed to set F2 blocksize\n");
sdio_release_host(sdiodev->func1);
- goto out;
+ return ret;
} else {
brcmf_dbg(SDIO, "set F2 blocksize to %d\n", f2_blksz);
}
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 212fbbe1cd7e..2ed70f809097 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4152,7 +4152,6 @@ int brcmf_sdio_get_fwname(struct device *dev, const char *ext, u8 *fw_name)
static int brcmf_sdio_bus_reset(struct device *dev)
{
- int ret = 0;
struct brcmf_bus *bus_if = dev_get_drvdata(dev);
struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
@@ -4169,14 +4168,7 @@ static int brcmf_sdio_bus_reset(struct device *dev)
sdio_release_host(sdiodev->func1);
brcmf_bus_change_state(sdiodev->bus_if, BRCMF_BUS_DOWN);
-
- ret = brcmf_sdiod_probe(sdiodev);
- if (ret) {
- brcmf_err("Failed to probe after sdio device reset: ret %d\n",
- ret);
- }
-
- return ret;
+ return 0;
}
static const struct brcmf_bus_ops brcmf_sdio_bus_ops = {
--
2.34.1
On 7/12/2022 1:21 AM, Danny van Heumen wrote:
> In case of buggy firmware, brcmfmac may perform a hardware reset. If during
> reset and subsequent probing an early failure occurs, a memory region is
> accidentally double-freed. With hardened memory allocation enabled, this error
> will be detected.
>
> - return early where appropriate to skip unnecessary clean-up.
> - set '.freezer' pointer to NULL to prevent double-freeing under possible
> other circumstances and to re-align result under various different
> behaviors of memory allocation freeing.
> - correctly claim host on func1 for disabling func2.
> - after reset, do not initiate probing immediately, but rely on events.
>
> Given a firmware crash, function 'brcmf_sdio_bus_reset' is called. It calls
> 'brcmf_sdiod_remove', then follows up with 'brcmf_sdiod_probe' to reinitialize
> the hardware. If 'brcmf_sdiod_probe' fails to "set F1 blocksize", it exits
> early, which includes calling 'brcmf_sdiod_remove'. In both cases
> 'brcmf_sdiod_freezer_detach' is called to free allocated '.freezer', which
> has not yet been re-allocated the second time.
>
> Stacktrace of (failing) hardware reset after firmware-crash:
>
> Code: b9402b82 8b0202c0 eb1a02df 54000041 (d4210000)
> ret_from_fork+0x10/0x20
> kthread+0x154/0x160
> worker_thread+0x188/0x504
> process_one_work+0x1f4/0x490
> brcmf_core_bus_reset+0x34/0x44 [brcmfmac]
> brcmf_sdio_bus_reset+0x68/0xc0 [brcmfmac]
> brcmf_sdiod_probe+0x170/0x21c [brcmfmac]
> brcmf_sdiod_remove+0x48/0xc0 [brcmfmac]
> kfree+0x210/0x220
> __slab_free+0x58/0x40c
> Call trace:
> x2 : 0000000000000040 x1 : fffffc00002d2b80 x0 : ffff00000b4aee40
> x5 : ffff8000013fa728 x4 : 0000000000000001 x3 : ffff00000b4aee00
> x8 : ffff800009967ce0 x7 : ffff8000099bfce0 x6 : 00000006f8005d01
> x11: ffff8000099bfce0 x10: 00000000fffff000 x9 : ffff8000083401d0
> x14: 0000000000000000 x13: 657a69736b636f6c x12: 6220314620746573
> x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000030
> x20: fffffc00002d2ba0 x19: fffffc00002d2b80 x18: 0000000000000000
> x23: ffff00000b4aee00 x22: ffff00000b4aee00 x21: 0000000000000001
> x26: ffff00000b4aee00 x25: ffff0000f7753705 x24: 000000000001288a
> x29: ffff80000a22bbf0 x28: ffff000000401200 x27: 000000008020001a
> sp : ffff80000a22bbf0
> lr : kfree+0x210/0x220
> pc : __slab_free+0x58/0x40c
> pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> Workqueue: events brcmf_core_bus_reset [brcmfmac]
> Hardware name: Pine64 Pinebook Pro (DT)
> CPU: 2 PID: 639 Comm: kworker/2:2 Tainted: G C 5.16.0-0.bpo.4-arm64 #1 Debian 5.16.12-1~bpo11+1
> nvmem_rockchip_efuse industrialio_triggered_buffer videodev snd_soc_core snd_pcm_dmaengine kfifo_buf snd_pcm io_domain mc industrialio mt>
> Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_seq_device nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reje>
> Internal error: Oops - BUG: 0 [#1] SMP
> kernel BUG at mm/slub.c:379!
Reviewed-by: Arend van Spriel <aspriel.gmail.com>
> Signed-off-by: Danny van Heumen <[email protected]>
> ---
> .../wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 13 +++++--------
> .../net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 10 +---------
> 2 files changed, 6 insertions(+), 17 deletions(-)
On Tue, 12 Jul 2022 at 01:22, Danny van Heumen <[email protected]> wrote:
>
> In case of buggy firmware, brcmfmac may perform a hardware reset. If during
> reset and subsequent probing an early failure occurs, a memory region is
> accidentally double-freed. With hardened memory allocation enabled, this error
> will be detected.
>
> - return early where appropriate to skip unnecessary clean-up.
> - set '.freezer' pointer to NULL to prevent double-freeing under possible
> other circumstances and to re-align result under various different
> behaviors of memory allocation freeing.
> - correctly claim host on func1 for disabling func2.
> - after reset, do not initiate probing immediately, but rely on events.
>
> Given a firmware crash, function 'brcmf_sdio_bus_reset' is called. It calls
> 'brcmf_sdiod_remove', then follows up with 'brcmf_sdiod_probe' to reinitialize
> the hardware. If 'brcmf_sdiod_probe' fails to "set F1 blocksize", it exits
> early, which includes calling 'brcmf_sdiod_remove'. In both cases
> 'brcmf_sdiod_freezer_detach' is called to free allocated '.freezer', which
> has not yet been re-allocated the second time.
>
> Stacktrace of (failing) hardware reset after firmware-crash:
>
> Code: b9402b82 8b0202c0 eb1a02df 54000041 (d4210000)
> ret_from_fork+0x10/0x20
> kthread+0x154/0x160
> worker_thread+0x188/0x504
> process_one_work+0x1f4/0x490
> brcmf_core_bus_reset+0x34/0x44 [brcmfmac]
> brcmf_sdio_bus_reset+0x68/0xc0 [brcmfmac]
> brcmf_sdiod_probe+0x170/0x21c [brcmfmac]
> brcmf_sdiod_remove+0x48/0xc0 [brcmfmac]
> kfree+0x210/0x220
> __slab_free+0x58/0x40c
> Call trace:
> x2 : 0000000000000040 x1 : fffffc00002d2b80 x0 : ffff00000b4aee40
> x5 : ffff8000013fa728 x4 : 0000000000000001 x3 : ffff00000b4aee00
> x8 : ffff800009967ce0 x7 : ffff8000099bfce0 x6 : 00000006f8005d01
> x11: ffff8000099bfce0 x10: 00000000fffff000 x9 : ffff8000083401d0
> x14: 0000000000000000 x13: 657a69736b636f6c x12: 6220314620746573
> x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000030
> x20: fffffc00002d2ba0 x19: fffffc00002d2b80 x18: 0000000000000000
> x23: ffff00000b4aee00 x22: ffff00000b4aee00 x21: 0000000000000001
> x26: ffff00000b4aee00 x25: ffff0000f7753705 x24: 000000000001288a
> x29: ffff80000a22bbf0 x28: ffff000000401200 x27: 000000008020001a
> sp : ffff80000a22bbf0
> lr : kfree+0x210/0x220
> pc : __slab_free+0x58/0x40c
> pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> Workqueue: events brcmf_core_bus_reset [brcmfmac]
> Hardware name: Pine64 Pinebook Pro (DT)
> CPU: 2 PID: 639 Comm: kworker/2:2 Tainted: G C 5.16.0-0.bpo.4-arm64 #1 Debian 5.16.12-1~bpo11+1
> nvmem_rockchip_efuse industrialio_triggered_buffer videodev snd_soc_core snd_pcm_dmaengine kfifo_buf snd_pcm io_domain mc industrialio mt>
> Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_seq_device nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reje>
> Internal error: Oops - BUG: 0 [#1] SMP
> kernel BUG at mm/slub.c:379!
>
> Signed-off-by: Danny van Heumen <[email protected]>
I have to admit that, to me, it looks a bit weird to have one driver
instance managing two different SDIO func devices. On the other hand,
I don't know the HW so there might be good reasons for why.
In any case, I want to point out a commit [1] for the mwifiex driver,
which could serve as a good inspiration of how to make use of the
mmc_hw_reset(). For example, one may look at the return code from
mmc_hw_reset() to understand whether the reset will be done
synchronous or asynchronous (via device re-registration).
That said, I think the $subject patch looks reasonable to me. So feel
free to add:
Reviewed-by: Ulf Hansson <[email protected]>
Kind regards
Uffe
[1]
cdb2256f795e ("mwifiex: Re-work support for SDIO HW reset")
> ---
> .../wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 13 +++++--------
> .../net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 10 +---------
> 2 files changed, 6 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
> index ac02244a6fdf..414ee21f42e3 100644
> --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
> +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
> @@ -802,6 +802,7 @@ static void brcmf_sdiod_freezer_detach(struct brcmf_sdio_dev *sdiodev)
> if (sdiodev->freezer) {
> WARN_ON(atomic_read(&sdiodev->freezer->freezing));
> kfree(sdiodev->freezer);
> + sdiodev->freezer = NULL;
> }
> }
>
> @@ -875,13 +876,9 @@ int brcmf_sdiod_remove(struct brcmf_sdio_dev *sdiodev)
>
> brcmf_sdiod_freezer_detach(sdiodev);
>
> - /* Disable Function 2 */
> - sdio_claim_host(sdiodev->func2);
> - sdio_disable_func(sdiodev->func2);
> - sdio_release_host(sdiodev->func2);
> -
> - /* Disable Function 1 */
> + /* Disable functions 2 then 1. */
> sdio_claim_host(sdiodev->func1);
> + sdio_disable_func(sdiodev->func2);
> sdio_disable_func(sdiodev->func1);
> sdio_release_host(sdiodev->func1);
>
> @@ -911,7 +908,7 @@ int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
> if (ret) {
> brcmf_err("Failed to set F1 blocksize\n");
> sdio_release_host(sdiodev->func1);
> - goto out;
> + return ret;
> }
> switch (sdiodev->func2->device) {
> case SDIO_DEVICE_ID_BROADCOM_CYPRESS_4373:
> @@ -933,7 +930,7 @@ int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
> if (ret) {
> brcmf_err("Failed to set F2 blocksize\n");
> sdio_release_host(sdiodev->func1);
> - goto out;
> + return ret;
> } else {
> brcmf_dbg(SDIO, "set F2 blocksize to %d\n", f2_blksz);
> }
> diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
> index 212fbbe1cd7e..2ed70f809097 100644
> --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
> +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
> @@ -4152,7 +4152,6 @@ int brcmf_sdio_get_fwname(struct device *dev, const char *ext, u8 *fw_name)
>
> static int brcmf_sdio_bus_reset(struct device *dev)
> {
> - int ret = 0;
> struct brcmf_bus *bus_if = dev_get_drvdata(dev);
> struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
>
> @@ -4169,14 +4168,7 @@ static int brcmf_sdio_bus_reset(struct device *dev)
> sdio_release_host(sdiodev->func1);
>
> brcmf_bus_change_state(sdiodev->bus_if, BRCMF_BUS_DOWN);
> -
> - ret = brcmf_sdiod_probe(sdiodev);
> - if (ret) {
> - brcmf_err("Failed to probe after sdio device reset: ret %d\n",
> - ret);
> - }
> -
> - return ret;
> + return 0;
> }
>
> static const struct brcmf_bus_ops brcmf_sdio_bus_ops = {
> --
> 2.34.1
>
On 7/14/2022 12:04 PM, Ulf Hansson wrote:
> On Tue, 12 Jul 2022 at 01:22, Danny van Heumen <[email protected]> wrote:
>>
>> In case of buggy firmware, brcmfmac may perform a hardware reset. If during
>> reset and subsequent probing an early failure occurs, a memory region is
>> accidentally double-freed. With hardened memory allocation enabled, this error
>> will be detected.
>>
>> - return early where appropriate to skip unnecessary clean-up.
>> - set '.freezer' pointer to NULL to prevent double-freeing under possible
>> other circumstances and to re-align result under various different
>> behaviors of memory allocation freeing.
>> - correctly claim host on func1 for disabling func2.
>> - after reset, do not initiate probing immediately, but rely on events.
>>
>> Given a firmware crash, function 'brcmf_sdio_bus_reset' is called. It calls
>> 'brcmf_sdiod_remove', then follows up with 'brcmf_sdiod_probe' to reinitialize
>> the hardware. If 'brcmf_sdiod_probe' fails to "set F1 blocksize", it exits
>> early, which includes calling 'brcmf_sdiod_remove'. In both cases
>> 'brcmf_sdiod_freezer_detach' is called to free allocated '.freezer', which
>> has not yet been re-allocated the second time.
>>
>> Stacktrace of (failing) hardware reset after firmware-crash:
>>
>> Code: b9402b82 8b0202c0 eb1a02df 54000041 (d4210000)
>> ret_from_fork+0x10/0x20
>> kthread+0x154/0x160
>> worker_thread+0x188/0x504
>> process_one_work+0x1f4/0x490
>> brcmf_core_bus_reset+0x34/0x44 [brcmfmac]
>> brcmf_sdio_bus_reset+0x68/0xc0 [brcmfmac]
>> brcmf_sdiod_probe+0x170/0x21c [brcmfmac]
>> brcmf_sdiod_remove+0x48/0xc0 [brcmfmac]
>> kfree+0x210/0x220
>> __slab_free+0x58/0x40c
>> Call trace:
>> x2 : 0000000000000040 x1 : fffffc00002d2b80 x0 : ffff00000b4aee40
>> x5 : ffff8000013fa728 x4 : 0000000000000001 x3 : ffff00000b4aee00
>> x8 : ffff800009967ce0 x7 : ffff8000099bfce0 x6 : 00000006f8005d01
>> x11: ffff8000099bfce0 x10: 00000000fffff000 x9 : ffff8000083401d0
>> x14: 0000000000000000 x13: 657a69736b636f6c x12: 6220314620746573
>> x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000030
>> x20: fffffc00002d2ba0 x19: fffffc00002d2b80 x18: 0000000000000000
>> x23: ffff00000b4aee00 x22: ffff00000b4aee00 x21: 0000000000000001
>> x26: ffff00000b4aee00 x25: ffff0000f7753705 x24: 000000000001288a
>> x29: ffff80000a22bbf0 x28: ffff000000401200 x27: 000000008020001a
>> sp : ffff80000a22bbf0
>> lr : kfree+0x210/0x220
>> pc : __slab_free+0x58/0x40c
>> pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>> Workqueue: events brcmf_core_bus_reset [brcmfmac]
>> Hardware name: Pine64 Pinebook Pro (DT)
>> CPU: 2 PID: 639 Comm: kworker/2:2 Tainted: G C 5.16.0-0.bpo.4-arm64 #1 Debian 5.16.12-1~bpo11+1
>> nvmem_rockchip_efuse industrialio_triggered_buffer videodev snd_soc_core snd_pcm_dmaengine kfifo_buf snd_pcm io_domain mc industrialio mt>
>> Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_seq_device nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reje>
>> Internal error: Oops - BUG: 0 [#1] SMP
>> kernel BUG at mm/slub.c:379!
>>
>> Signed-off-by: Danny van Heumen <[email protected]>
>
> I have to admit that, to me, it looks a bit weird to have one driver
> instance managing two different SDIO func devices. On the other hand,
> I don't know the HW so there might be good reasons for why.
>
> In any case, I want to point out a commit [1] for the mwifiex driver,
> which could serve as a good inspiration of how to make use of the
> mmc_hw_reset(). For example, one may look at the return code from
> mmc_hw_reset() to understand whether the reset will be done
> synchronous or asynchronous (via device re-registration).
Thanks, Uffe
Could the API be extended so the caller can request the reset to be
asynchronous.
Regards,
Arend
On Thu, 14 Jul 2022 at 14:49, Arend Van Spriel <[email protected]> wrote:
>
> On 7/14/2022 12:04 PM, Ulf Hansson wrote:
> > On Tue, 12 Jul 2022 at 01:22, Danny van Heumen <[email protected]> wrote:
> >>
> >> In case of buggy firmware, brcmfmac may perform a hardware reset. If during
> >> reset and subsequent probing an early failure occurs, a memory region is
> >> accidentally double-freed. With hardened memory allocation enabled, this error
> >> will be detected.
> >>
> >> - return early where appropriate to skip unnecessary clean-up.
> >> - set '.freezer' pointer to NULL to prevent double-freeing under possible
> >> other circumstances and to re-align result under various different
> >> behaviors of memory allocation freeing.
> >> - correctly claim host on func1 for disabling func2.
> >> - after reset, do not initiate probing immediately, but rely on events.
> >>
> >> Given a firmware crash, function 'brcmf_sdio_bus_reset' is called. It calls
> >> 'brcmf_sdiod_remove', then follows up with 'brcmf_sdiod_probe' to reinitialize
> >> the hardware. If 'brcmf_sdiod_probe' fails to "set F1 blocksize", it exits
> >> early, which includes calling 'brcmf_sdiod_remove'. In both cases
> >> 'brcmf_sdiod_freezer_detach' is called to free allocated '.freezer', which
> >> has not yet been re-allocated the second time.
> >>
> >> Stacktrace of (failing) hardware reset after firmware-crash:
> >>
> >> Code: b9402b82 8b0202c0 eb1a02df 54000041 (d4210000)
> >> ret_from_fork+0x10/0x20
> >> kthread+0x154/0x160
> >> worker_thread+0x188/0x504
> >> process_one_work+0x1f4/0x490
> >> brcmf_core_bus_reset+0x34/0x44 [brcmfmac]
> >> brcmf_sdio_bus_reset+0x68/0xc0 [brcmfmac]
> >> brcmf_sdiod_probe+0x170/0x21c [brcmfmac]
> >> brcmf_sdiod_remove+0x48/0xc0 [brcmfmac]
> >> kfree+0x210/0x220
> >> __slab_free+0x58/0x40c
> >> Call trace:
> >> x2 : 0000000000000040 x1 : fffffc00002d2b80 x0 : ffff00000b4aee40
> >> x5 : ffff8000013fa728 x4 : 0000000000000001 x3 : ffff00000b4aee00
> >> x8 : ffff800009967ce0 x7 : ffff8000099bfce0 x6 : 00000006f8005d01
> >> x11: ffff8000099bfce0 x10: 00000000fffff000 x9 : ffff8000083401d0
> >> x14: 0000000000000000 x13: 657a69736b636f6c x12: 6220314620746573
> >> x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000030
> >> x20: fffffc00002d2ba0 x19: fffffc00002d2b80 x18: 0000000000000000
> >> x23: ffff00000b4aee00 x22: ffff00000b4aee00 x21: 0000000000000001
> >> x26: ffff00000b4aee00 x25: ffff0000f7753705 x24: 000000000001288a
> >> x29: ffff80000a22bbf0 x28: ffff000000401200 x27: 000000008020001a
> >> sp : ffff80000a22bbf0
> >> lr : kfree+0x210/0x220
> >> pc : __slab_free+0x58/0x40c
> >> pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> >> Workqueue: events brcmf_core_bus_reset [brcmfmac]
> >> Hardware name: Pine64 Pinebook Pro (DT)
> >> CPU: 2 PID: 639 Comm: kworker/2:2 Tainted: G C 5.16.0-0.bpo.4-arm64 #1 Debian 5.16.12-1~bpo11+1
> >> nvmem_rockchip_efuse industrialio_triggered_buffer videodev snd_soc_core snd_pcm_dmaengine kfifo_buf snd_pcm io_domain mc industrialio mt>
> >> Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_seq_device nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reje>
> >> Internal error: Oops - BUG: 0 [#1] SMP
> >> kernel BUG at mm/slub.c:379!
> >>
> >> Signed-off-by: Danny van Heumen <[email protected]>
> >
> > I have to admit that, to me, it looks a bit weird to have one driver
> > instance managing two different SDIO func devices. On the other hand,
> > I don't know the HW so there might be good reasons for why.
> >
> > In any case, I want to point out a commit [1] for the mwifiex driver,
> > which could serve as a good inspiration of how to make use of the
> > mmc_hw_reset(). For example, one may look at the return code from
> > mmc_hw_reset() to understand whether the reset will be done
> > synchronous or asynchronous (via device re-registration).
>
> Thanks, Uffe
>
> Could the API be extended so the caller can request the reset to be
> asynchronous.
Yes, that should be fine. The current behaviour is that it will be
asynchronous if there are more than one SDIO func device bound to a
driver.
Kind regards
Uffe
On 7/14/2022 4:39 PM, Ulf Hansson wrote:
> On Thu, 14 Jul 2022 at 14:49, Arend Van Spriel <[email protected]> wrote:
>>
>> On 7/14/2022 12:04 PM, Ulf Hansson wrote:
>>> On Tue, 12 Jul 2022 at 01:22, Danny van Heumen <[email protected]> wrote:
>>>>
>>>> In case of buggy firmware, brcmfmac may perform a hardware reset. If during
>>>> reset and subsequent probing an early failure occurs, a memory region is
>>>> accidentally double-freed. With hardened memory allocation enabled, this error
>>>> will be detected.
>>>>
>>>> - return early where appropriate to skip unnecessary clean-up.
>>>> - set '.freezer' pointer to NULL to prevent double-freeing under possible
>>>> other circumstances and to re-align result under various different
>>>> behaviors of memory allocation freeing.
>>>> - correctly claim host on func1 for disabling func2.
>>>> - after reset, do not initiate probing immediately, but rely on events.
>>>>
>>>> Given a firmware crash, function 'brcmf_sdio_bus_reset' is called. It calls
>>>> 'brcmf_sdiod_remove', then follows up with 'brcmf_sdiod_probe' to reinitialize
>>>> the hardware. If 'brcmf_sdiod_probe' fails to "set F1 blocksize", it exits
>>>> early, which includes calling 'brcmf_sdiod_remove'. In both cases
>>>> 'brcmf_sdiod_freezer_detach' is called to free allocated '.freezer', which
>>>> has not yet been re-allocated the second time.
>>>>
>>>> Stacktrace of (failing) hardware reset after firmware-crash:
>>>>
>>>> Code: b9402b82 8b0202c0 eb1a02df 54000041 (d4210000)
>>>> ret_from_fork+0x10/0x20
>>>> kthread+0x154/0x160
>>>> worker_thread+0x188/0x504
>>>> process_one_work+0x1f4/0x490
>>>> brcmf_core_bus_reset+0x34/0x44 [brcmfmac]
>>>> brcmf_sdio_bus_reset+0x68/0xc0 [brcmfmac]
>>>> brcmf_sdiod_probe+0x170/0x21c [brcmfmac]
>>>> brcmf_sdiod_remove+0x48/0xc0 [brcmfmac]
>>>> kfree+0x210/0x220
>>>> __slab_free+0x58/0x40c
>>>> Call trace:
>>>> x2 : 0000000000000040 x1 : fffffc00002d2b80 x0 : ffff00000b4aee40
>>>> x5 : ffff8000013fa728 x4 : 0000000000000001 x3 : ffff00000b4aee00
>>>> x8 : ffff800009967ce0 x7 : ffff8000099bfce0 x6 : 00000006f8005d01
>>>> x11: ffff8000099bfce0 x10: 00000000fffff000 x9 : ffff8000083401d0
>>>> x14: 0000000000000000 x13: 657a69736b636f6c x12: 6220314620746573
>>>> x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000030
>>>> x20: fffffc00002d2ba0 x19: fffffc00002d2b80 x18: 0000000000000000
>>>> x23: ffff00000b4aee00 x22: ffff00000b4aee00 x21: 0000000000000001
>>>> x26: ffff00000b4aee00 x25: ffff0000f7753705 x24: 000000000001288a
>>>> x29: ffff80000a22bbf0 x28: ffff000000401200 x27: 000000008020001a
>>>> sp : ffff80000a22bbf0
>>>> lr : kfree+0x210/0x220
>>>> pc : __slab_free+0x58/0x40c
>>>> pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>>>> Workqueue: events brcmf_core_bus_reset [brcmfmac]
>>>> Hardware name: Pine64 Pinebook Pro (DT)
>>>> CPU: 2 PID: 639 Comm: kworker/2:2 Tainted: G C 5.16.0-0.bpo.4-arm64 #1 Debian 5.16.12-1~bpo11+1
>>>> nvmem_rockchip_efuse industrialio_triggered_buffer videodev snd_soc_core snd_pcm_dmaengine kfifo_buf snd_pcm io_domain mc industrialio mt>
>>>> Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_seq_device nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reje>
>>>> Internal error: Oops - BUG: 0 [#1] SMP
>>>> kernel BUG at mm/slub.c:379!
>>>>
>>>> Signed-off-by: Danny van Heumen <[email protected]>
>>>
>>> I have to admit that, to me, it looks a bit weird to have one driver
>>> instance managing two different SDIO func devices. On the other hand,
>>> I don't know the HW so there might be good reasons for why.
>>>
>>> In any case, I want to point out a commit [1] for the mwifiex driver,
>>> which could serve as a good inspiration of how to make use of the
>>> mmc_hw_reset(). For example, one may look at the return code from
>>> mmc_hw_reset() to understand whether the reset will be done
>>> synchronous or asynchronous (via device re-registration).
>>
>> Thanks, Uffe
>>
>> Could the API be extended so the caller can request the reset to be
>> asynchronous.
>
> Yes, that should be fine. The current behaviour is that it will be
> asynchronous if there are more than one SDIO func device bound to a
> driver.
I see. So for brcmfmac we fall into that category. Thanks for the info.
Regards,
Arend
------- Original Message -------
On Thursday, July 14th, 2022 at 12:04, Ulf Hansson <[email protected]> wrote:
> [..]
>
> That said, I think the $subject patch looks reasonable to me. So feel
> free to add:
>
> Reviewed-by: Ulf Hansson [email protected]
I am a first-time contributor. Is this your way of saying that I should submit
the patch somewhere other than 'linux-wireless@...'? I suppose this fix is not
urgent enough for 'stable@...', or is it?
I would appreciate information on who will and/or how to follow-up.
Regards,
Danny
On 7/18/2022 7:17 PM, Danny van Heumen wrote:
> ------- Original Message -------
> On Thursday, July 14th, 2022 at 12:04, Ulf Hansson <[email protected]> wrote:
>
>> [..]
>>
>> That said, I think the $subject patch looks reasonable to me. So feel
>> free to add:
>>
>> Reviewed-by: Ulf Hansson [email protected]
>
> I am a first-time contributor. Is this your way of saying that I should submit
> the patch somewhere other than 'linux-wireless@...'? I suppose this fix is not
> urgent enough for 'stable@...', or is it?
>
> I would appreciate information on who will and/or how to follow-up.
Hi Danny,
The above means that Uffe is okay with the patch being included. Kalle
Valo is the linux-wireless maintainer and he will apply the patch to the
linux-wireless repo. The status of your patch can be monitored through
patchwork [1].
By the way, you can add the Fixes: tag referring to the commit that
introduced the issue. Not sure which one should be considered here, but
it is either this one ...
Fixes: 9982464379e8 ("brcmfmac: make sdio suspend wait for threads to
freeze")
... or ...
Fixes: 7836102a750a ("brcmfmac: reset SDIO bus on a firmware crash")
How to create the tag is described here [2]. With git configured
properly you can simply do:
$ git log --pretty=fixes -1 <commit_sha1>
Stable patches have their additional rules [3]. The bus reset was added
in kernel v5.9. Hope this helps.
Regards,
Arend
[1]
https://patchwork.kernel.org/project/linux-wireless/patch/id1HN6qCMAirApBzTA6fT7ZFWBBGCJhULpflxQ7NT6cgCboVnn3RHpiOFjA9SbRqzBRFLk9ES0C4FNvO6fUQsNg7pqF6ZSNAYUo99nHy8PY=@dannyvanheumen.nl/
[2]
https://www.kernel.org/doc/html/v4.10/process/submitting-patches.html#describe-your-changes
[3]
https://www.kernel.org/doc/html/v4.10/process/stable-kernel-rules.html#procedure-for-submitting-patches-to-the-stable-tree
Danny van Heumen <[email protected]> wrote:
> In case of buggy firmware, brcmfmac may perform a hardware reset. If during
> reset and subsequent probing an early failure occurs, a memory region is
> accidentally double-freed. With hardened memory allocation enabled, this error
> will be detected.
>
> - return early where appropriate to skip unnecessary clean-up.
> - set '.freezer' pointer to NULL to prevent double-freeing under possible
> other circumstances and to re-align result under various different
> behaviors of memory allocation freeing.
> - correctly claim host on func1 for disabling func2.
> - after reset, do not initiate probing immediately, but rely on events.
>
> Given a firmware crash, function 'brcmf_sdio_bus_reset' is called. It calls
> 'brcmf_sdiod_remove', then follows up with 'brcmf_sdiod_probe' to reinitialize
> the hardware. If 'brcmf_sdiod_probe' fails to "set F1 blocksize", it exits
> early, which includes calling 'brcmf_sdiod_remove'. In both cases
> 'brcmf_sdiod_freezer_detach' is called to free allocated '.freezer', which
> has not yet been re-allocated the second time.
>
> Stacktrace of (failing) hardware reset after firmware-crash:
>
> Code: b9402b82 8b0202c0 eb1a02df 54000041 (d4210000)
> ret_from_fork+0x10/0x20
> kthread+0x154/0x160
> worker_thread+0x188/0x504
> process_one_work+0x1f4/0x490
> brcmf_core_bus_reset+0x34/0x44 [brcmfmac]
> brcmf_sdio_bus_reset+0x68/0xc0 [brcmfmac]
> brcmf_sdiod_probe+0x170/0x21c [brcmfmac]
> brcmf_sdiod_remove+0x48/0xc0 [brcmfmac]
> kfree+0x210/0x220
> __slab_free+0x58/0x40c
> Call trace:
> x2 : 0000000000000040 x1 : fffffc00002d2b80 x0 : ffff00000b4aee40
> x5 : ffff8000013fa728 x4 : 0000000000000001 x3 : ffff00000b4aee00
> x8 : ffff800009967ce0 x7 : ffff8000099bfce0 x6 : 00000006f8005d01
> x11: ffff8000099bfce0 x10: 00000000fffff000 x9 : ffff8000083401d0
> x14: 0000000000000000 x13: 657a69736b636f6c x12: 6220314620746573
> x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000030
> x20: fffffc00002d2ba0 x19: fffffc00002d2b80 x18: 0000000000000000
> x23: ffff00000b4aee00 x22: ffff00000b4aee00 x21: 0000000000000001
> x26: ffff00000b4aee00 x25: ffff0000f7753705 x24: 000000000001288a
> x29: ffff80000a22bbf0 x28: ffff000000401200 x27: 000000008020001a
> sp : ffff80000a22bbf0
> lr : kfree+0x210/0x220
> pc : __slab_free+0x58/0x40c
> pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> Workqueue: events brcmf_core_bus_reset [brcmfmac]
> Hardware name: Pine64 Pinebook Pro (DT)
> CPU: 2 PID: 639 Comm: kworker/2:2 Tainted: G C 5.16.0-0.bpo.4-arm64 #1 Debian 5.16.12-1~bpo11+1
> nvmem_rockchip_efuse industrialio_triggered_buffer videodev snd_soc_core snd_pcm_dmaengine kfifo_buf snd_pcm io_domain mc industrialio mt>
> Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_seq_device nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reje>
> Internal error: Oops - BUG: 0 [#1] SMP
> kernel BUG at mm/slub.c:379!
>
> Signed-off-by: Danny van Heumen <[email protected]>
> Reviewed-by: Arend van Spriel <aspriel.gmail.com>
> Reviewed-by: Ulf Hansson <[email protected]>
Patch applied to wireless-next.git, thanks.
cb774bd35318 wifi: brcmfmac: prevent double-free on hardware-reset
--
https://patchwork.kernel.org/project/linux-wireless/patch/id1HN6qCMAirApBzTA6fT7ZFWBBGCJhULpflxQ7NT6cgCboVnn3RHpiOFjA9SbRqzBRFLk9ES0C4FNvO6fUQsNg7pqF6ZSNAYUo99nHy8PY=@dannyvanheumen.nl/
https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches