2023-07-10 11:03:22

by Harry Austen

[permalink] [raw]
Subject: [PATCH] wifi: iwlwifi: mvm: enable thermal zone only when firmware is loaded

In iwl_mvm_thermal_zone_register(), when registering a thermal zone, the
thermal subsystem will evaluate its temperature.
But iwl_mvm_tzone_get_temp() fails at this time because
iwl_mvm_firmware_running() returns false.
And that's why many users report that they see
"thermal thermal_zoneX: failed to read out thermal zone (-61)"
message during wifi driver probing.

This patch attempts to fix this by delaying enabling of the thermal zone
until after the firmware has been loaded/initialized. It also gets
disabled when going into suspend.

Signed-off-by: Harry Austen <[email protected]>
---
.../net/wireless/intel/iwlwifi/mvm/mac80211.c | 18 ++++++++++++++++++
drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 9 +--------
2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index ce7905faa08f..a47d29a64dd4 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1187,6 +1187,17 @@ int iwl_mvm_mac_start(struct ieee80211_hw *hw)

mutex_unlock(&mvm->mutex);

+#ifdef CONFIG_THERMAL
+ /* Needs to be done outside of mutex guarded section to prevent deadlock, since enabling
+ * the thermal zone calls the .get_temp() callback, which attempts to acquire the mutex.
+ */
+ if (!ret) {
+ ret = thermal_zone_device_enable(mvm->tz_device.tzone);
+ if (ret)
+ IWL_DEBUG_TEMP(mvm, "Failed to enable thermal zone (err = %d)\n", ret);
+ }
+#endif
+
iwl_mvm_mei_set_sw_rfkill_state(mvm);

return ret;
@@ -1282,6 +1293,7 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm)
void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
{
struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+ int ret;

flush_work(&mvm->async_handlers_wk);
flush_work(&mvm->add_stream_wk);
@@ -1307,6 +1319,12 @@ void iwl_mvm_mac_stop(struct ieee80211_hw *hw)

iwl_mvm_mei_set_sw_rfkill_state(mvm);

+#ifdef CONFIG_THERMAL
+ ret = thermal_zone_device_disable(mvm->tz_device.tzone);
+ if (ret)
+ IWL_DEBUG_TEMP(mvm, "Failed to disable thermal zone (err = %d)\n", ret);
+#endif
+
mutex_lock(&mvm->mutex);
__iwl_mvm_mac_stop(mvm);
mutex_unlock(&mvm->mutex);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
index 157e96fa23c1..964d2d011c6b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
@@ -680,7 +680,7 @@ static struct thermal_zone_device_ops tzone_ops = {

static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
{
- int i, ret;
+ int i;
char name[16];
static atomic_t counter = ATOMIC_INIT(0);

@@ -707,13 +707,6 @@ static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
return;
}

- ret = thermal_zone_device_enable(mvm->tz_device.tzone);
- if (ret) {
- IWL_DEBUG_TEMP(mvm, "Failed to enable thermal zone\n");
- thermal_zone_device_unregister(mvm->tz_device.tzone);
- return;
- }
-
/* 0 is a valid temperature,
* so initialize the array with S16_MIN which invalid temperature
*/
--
2.41.0




2023-07-13 09:57:07

by Daniel Lezcano

[permalink] [raw]
Subject: Re: [PATCH] wifi: iwlwifi: mvm: enable thermal zone only when firmware is loaded

On 10/07/2023 12:47, Harry Austen wrote:
> In iwl_mvm_thermal_zone_register(), when registering a thermal zone, the
> thermal subsystem will evaluate its temperature.
> But iwl_mvm_tzone_get_temp() fails at this time because
> iwl_mvm_firmware_running() returns false.
> And that's why many users report that they see
> "thermal thermal_zoneX: failed to read out thermal zone (-61)"
> message during wifi driver probing.
>
> This patch attempts to fix this by delaying enabling of the thermal zone
> until after the firmware has been loaded/initialized. It also gets
> disabled when going into suspend.

Thanks for taking care of this bug.

The thermal zone will be accessible from userspace and can be enabled
manually. The resulting temperature read error will be the same in this
case.

IMO, it is cleaner to actually [un]register the thermal zone when the
firmware is [un]loaded

> Signed-off-by: Harry Austen <[email protected]>
> ---
> .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 18 ++++++++++++++++++
> drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 9 +--------
> 2 files changed, 19 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> index ce7905faa08f..a47d29a64dd4 100644
> --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> @@ -1187,6 +1187,17 @@ int iwl_mvm_mac_start(struct ieee80211_hw *hw)
>
> mutex_unlock(&mvm->mutex);
>
> +#ifdef CONFIG_THERMAL
> + /* Needs to be done outside of mutex guarded section to prevent deadlock, since enabling
> + * the thermal zone calls the .get_temp() callback, which attempts to acquire the mutex.
> + */
> + if (!ret) {
> + ret = thermal_zone_device_enable(mvm->tz_device.tzone);
> + if (ret)
> + IWL_DEBUG_TEMP(mvm, "Failed to enable thermal zone (err = %d)\n", ret);
> + }
> +#endif
> +
> iwl_mvm_mei_set_sw_rfkill_state(mvm);
>
> return ret;
> @@ -1282,6 +1293,7 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm)
> void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
> {
> struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
> + int ret;
>
> flush_work(&mvm->async_handlers_wk);
> flush_work(&mvm->add_stream_wk);
> @@ -1307,6 +1319,12 @@ void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
>
> iwl_mvm_mei_set_sw_rfkill_state(mvm);
>
> +#ifdef CONFIG_THERMAL
> + ret = thermal_zone_device_disable(mvm->tz_device.tzone);
> + if (ret)
> + IWL_DEBUG_TEMP(mvm, "Failed to disable thermal zone (err = %d)\n", ret);
> +#endif
> +
> mutex_lock(&mvm->mutex);
> __iwl_mvm_mac_stop(mvm);
> mutex_unlock(&mvm->mutex);
> diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> index 157e96fa23c1..964d2d011c6b 100644
> --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> @@ -680,7 +680,7 @@ static struct thermal_zone_device_ops tzone_ops = {
>
> static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
> {
> - int i, ret;
> + int i;
> char name[16];
> static atomic_t counter = ATOMIC_INIT(0);
>
> @@ -707,13 +707,6 @@ static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
> return;
> }
>
> - ret = thermal_zone_device_enable(mvm->tz_device.tzone);
> - if (ret) {
> - IWL_DEBUG_TEMP(mvm, "Failed to enable thermal zone\n");
> - thermal_zone_device_unregister(mvm->tz_device.tzone);
> - return;
> - }
> -
> /* 0 is a valid temperature,
> * so initialize the array with S16_MIN which invalid temperature
> */
> --
> 2.41.0
>
>

--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog


2023-07-16 13:49:06

by Zhang, Rui

[permalink] [raw]
Subject: Re: [PATCH] wifi: iwlwifi: mvm: enable thermal zone only when firmware is loaded

On Thu, 2023-07-13 at 11:41 +0200, Daniel Lezcano wrote:
> On 10/07/2023 12:47, Harry Austen wrote:
> > In iwl_mvm_thermal_zone_register(), when registering a thermal
> > zone, the
> > thermal subsystem will evaluate its temperature.
> > But iwl_mvm_tzone_get_temp() fails at this time because
> > iwl_mvm_firmware_running() returns false.
> > And that's why many users report that they see
> > "thermal thermal_zoneX: failed to read out thermal zone (-61)"
> > message during wifi driver probing.
> >
> > This patch attempts to fix this by delaying enabling of the thermal
> > zone
> > until after the firmware has been loaded/initialized. It also gets
> > disabled when going into suspend.
>
> Thanks for taking care of this bug.
>
> The thermal zone will be accessible from userspace

Currently, I see that the mode is checked only in
__thermal_zone_device_update().

should we limit the userspace access for disabled thermal zone? For
example, return failure when reading 'temp' sysfs attribute.

> and can be enabled
> manually.

maybe we should have a .change_mode() callback and return failure when
enabling the thermal zone with wifi firmware unloaded.

> The resulting temperature read error will be the same in this
> case.
>
> IMO, it is cleaner to actually [un]register the thermal zone when the
> firmware is [un]loaded

Austen,
may I know how often is this wifi firmware loaded/unloaded?

thanks,
rui
>
> > Signed-off-by: Harry Austen <[email protected]>
> > ---
> >   .../net/wireless/intel/iwlwifi/mvm/mac80211.c  | 18
> > ++++++++++++++++++
> >   drivers/net/wireless/intel/iwlwifi/mvm/tt.c    |  9 +--------
> >   2 files changed, 19 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > index ce7905faa08f..a47d29a64dd4 100644
> > --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > @@ -1187,6 +1187,17 @@ int iwl_mvm_mac_start(struct ieee80211_hw
> > *hw)
> >
> >         mutex_unlock(&mvm->mutex);
> >
> > +#ifdef CONFIG_THERMAL
> > +       /* Needs to be done outside of mutex guarded section to
> > prevent deadlock, since enabling
> > +        * the thermal zone calls the .get_temp() callback, which
> > attempts to acquire the mutex.
> > +        */
> > +       if (!ret) {
> > +               ret = thermal_zone_device_enable(mvm-
> > >tz_device.tzone);
> > +               if (ret)
> > +                       IWL_DEBUG_TEMP(mvm, "Failed to enable
> > thermal zone (err = %d)\n", ret);
> > +       }
> > +#endif
> > +
> >         iwl_mvm_mei_set_sw_rfkill_state(mvm);
> >
> >         return ret;
> > @@ -1282,6 +1293,7 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm)
> >   void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
> >   {
> >         struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
> > +       int ret;
> >
> >         flush_work(&mvm->async_handlers_wk);
> >         flush_work(&mvm->add_stream_wk);
> > @@ -1307,6 +1319,12 @@ void iwl_mvm_mac_stop(struct ieee80211_hw
> > *hw)
> >
> >         iwl_mvm_mei_set_sw_rfkill_state(mvm);
> >
> > +#ifdef CONFIG_THERMAL
> > +       ret = thermal_zone_device_disable(mvm->tz_device.tzone);
> > +       if (ret)
> > +               IWL_DEBUG_TEMP(mvm, "Failed to disable thermal zone
> > (err = %d)\n", ret);
> > +#endif
> > +
> >         mutex_lock(&mvm->mutex);
> >         __iwl_mvm_mac_stop(mvm);
> >         mutex_unlock(&mvm->mutex);
> > diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > index 157e96fa23c1..964d2d011c6b 100644
> > --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > @@ -680,7 +680,7 @@ static  struct thermal_zone_device_ops
> > tzone_ops = {
> >
> >   static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
> >   {
> > -       int i, ret;
> > +       int i;
> >         char name[16];
> >         static atomic_t counter = ATOMIC_INIT(0);
> >
> > @@ -707,13 +707,6 @@ static void
> > iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
> >                 return;
> >         }
> >
> > -       ret = thermal_zone_device_enable(mvm->tz_device.tzone);
> > -       if (ret) {
> > -               IWL_DEBUG_TEMP(mvm, "Failed to enable thermal
> > zone\n");
> > -               thermal_zone_device_unregister(mvm-
> > >tz_device.tzone);
> > -               return;
> > -       }
> > -
> >         /* 0 is a valid temperature,
> >          * so initialize the array with S16_MIN which invalid
> > temperature
> >          */
> > --
> > 2.41.0
> >
> >
>

2023-07-23 22:00:59

by Harry Austen

[permalink] [raw]
Subject: Re: [PATCH] wifi: iwlwifi: mvm: enable thermal zone only when firmware is loaded

On Sun Jul 16, 2023 at 2:45 PM BST, Zhang, Rui wrote:
> On Thu, 2023-07-13 at 11:41 +0200, Daniel Lezcano wrote:
> > On 10/07/2023 12:47, Harry Austen wrote:
> > > In iwl_mvm_thermal_zone_register(), when registering a thermal
> > > zone, the
> > > thermal subsystem will evaluate its temperature.
> > > But iwl_mvm_tzone_get_temp() fails at this time because
> > > iwl_mvm_firmware_running() returns false.
> > > And that's why many users report that they see
> > > "thermal thermal_zoneX: failed to read out thermal zone (-61)"
> > > message during wifi driver probing.
> > >
> > > This patch attempts to fix this by delaying enabling of the thermal
> > > zone
> > > until after the firmware has been loaded/initialized. It also gets
> > > disabled when going into suspend.
> >
> > Thanks for taking care of this bug.
> >
> > The thermal zone will be accessible from userspace
>
> Currently, I see that the mode is checked only in
> __thermal_zone_device_update().
>
> should we limit the userspace access for disabled thermal zone? For
> example, return failure when reading 'temp' sysfs attribute.

Ah okay thanks for checking. Yes, agree that is probably a more sensible behaviour.

>
> > and can be enabled
> > manually.
>
> maybe we should have a .change_mode() callback and return failure when
> enabling the thermal zone with wifi firmware unloaded.
>
> > The resulting temperature read error will be the same in this
> > case.
> >
> > IMO, it is cleaner to actually [un]register the thermal zone when the
> > firmware is [un]loaded
>
> Austen,
> may I know how often is this wifi firmware loaded/unloaded?

Personally, I have only ever seen the firmware loaded once on boot and never unloaded. That was the reason for my patch;
I was trying to reduce the number of kernel warning/error log messages on my system during boot. As far as I can tell,
the ieee80211_ops stop() callback is only called in drv_stop(), which for example can be called via ieee80211_suspend().

>
> thanks,
> rui
> >
> > > Signed-off-by: Harry Austen <[email protected]>
> > > ---
> > >   .../net/wireless/intel/iwlwifi/mvm/mac80211.c  | 18
> > > ++++++++++++++++++
> > >   drivers/net/wireless/intel/iwlwifi/mvm/tt.c    |  9 +--------
> > >   2 files changed, 19 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > > b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > > index ce7905faa08f..a47d29a64dd4 100644
> > > --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > > +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > > @@ -1187,6 +1187,17 @@ int iwl_mvm_mac_start(struct ieee80211_hw
> > > *hw)
> > >
> > >         mutex_unlock(&mvm->mutex);
> > >
> > > +#ifdef CONFIG_THERMAL
> > > +       /* Needs to be done outside of mutex guarded section to
> > > prevent deadlock, since enabling
> > > +        * the thermal zone calls the .get_temp() callback, which
> > > attempts to acquire the mutex.
> > > +        */
> > > +       if (!ret) {
> > > +               ret = thermal_zone_device_enable(mvm-
> > > >tz_device.tzone);
> > > +               if (ret)
> > > +                       IWL_DEBUG_TEMP(mvm, "Failed to enable
> > > thermal zone (err = %d)\n", ret);
> > > +       }
> > > +#endif
> > > +
> > >         iwl_mvm_mei_set_sw_rfkill_state(mvm);
> > >
> > >         return ret;
> > > @@ -1282,6 +1293,7 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm)
> > >   void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
> > >   {
> > >         struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
> > > +       int ret;
> > >
> > >         flush_work(&mvm->async_handlers_wk);
> > >         flush_work(&mvm->add_stream_wk);
> > > @@ -1307,6 +1319,12 @@ void iwl_mvm_mac_stop(struct ieee80211_hw
> > > *hw)
> > >
> > >         iwl_mvm_mei_set_sw_rfkill_state(mvm);
> > >
> > > +#ifdef CONFIG_THERMAL
> > > +       ret = thermal_zone_device_disable(mvm->tz_device.tzone);
> > > +       if (ret)
> > > +               IWL_DEBUG_TEMP(mvm, "Failed to disable thermal zone
> > > (err = %d)\n", ret);
> > > +#endif
> > > +
> > >         mutex_lock(&mvm->mutex);
> > >         __iwl_mvm_mac_stop(mvm);
> > >         mutex_unlock(&mvm->mutex);
> > > diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > > b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > > index 157e96fa23c1..964d2d011c6b 100644
> > > --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > > +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > > @@ -680,7 +680,7 @@ static  struct thermal_zone_device_ops
> > > tzone_ops = {
> > >
> > >   static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
> > >   {
> > > -       int i, ret;
> > > +       int i;
> > >         char name[16];
> > >         static atomic_t counter = ATOMIC_INIT(0);
> > >
> > > @@ -707,13 +707,6 @@ static void
> > > iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
> > >                 return;
> > >         }
> > >
> > > -       ret = thermal_zone_device_enable(mvm->tz_device.tzone);
> > > -       if (ret) {
> > > -               IWL_DEBUG_TEMP(mvm, "Failed to enable thermal
> > > zone\n");
> > > -               thermal_zone_device_unregister(mvm-
> > > >tz_device.tzone);
> > > -               return;
> > > -       }
> > > -
> > >         /* 0 is a valid temperature,
> > >          * so initialize the array with S16_MIN which invalid
> > > temperature
> > >          */
> > > --
> > > 2.41.0
> > >
> > >
> >

Thanks for the review! :)
Harry