Hi All,
This patch-set fixes system hang which occurs when the ping worker fires after
wdog suspend and before wdog resume. This happens because the ping worker can
issue low-level ping while the wdog clk was disabled by the suspend routine
(accessing hw wdog registers while they are not fed by the clk).
To overcome this issue two patches were introduced. Patch #1 provides
watchdog_dev_suspend/resume function, which can be used in wdog drivers. First
function allows to cancel watchdog ping worker during suspend, preventing
watchdog_dev from issuing low-level ping and second one restores ping worker if
needed.
Patch #2 introduces relevant changes to imx2_wdt driver and takes advantage of
just introduced routines.
Grzegorz Jaszczyk (2):
watchdog: introduce watchdog_dev_suspend/resume
watchdog: imx2_wdg: notify wdog subsystem about wdog suspend/resume
drivers/watchdog/imx2_wdt.c | 20 ++++++++++----
drivers/watchdog/watchdog_dev.c | 49 +++++++++++++++++++++++++++++++++
include/linux/watchdog.h | 2 ++
3 files changed, 66 insertions(+), 5 deletions(-)
--
2.29.0
Suspend routine disables wdog clk. Nevertheless, the watchdog subsystem
is not aware of that and can still try to ping wdog through
watchdog_ping_work. In order to prevent such condition and therefore
prevent from system hang (caused by the wdog register access issued
while the wdog clock is disabled) take advantage of
watchdog_dev_suspend/resume routines which will take care of watchdog
ping worker cancel and restore.
Additionally remove hw ping from suspend/resume since it will be
issued by watchdog_dev_suspend/resume routines.
Signed-off-by: Michal Koziel <[email protected]>
Signed-off-by: Grzegorz Jaszczyk <[email protected]>
---
drivers/watchdog/imx2_wdt.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index b84f80f7d342..24e3a4d2b529 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c
@@ -348,6 +348,17 @@ static int __maybe_unused imx2_wdt_suspend(struct device *dev)
{
struct watchdog_device *wdog = dev_get_drvdata(dev);
struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
+ int ret;
+
+ /*
+ * Before disabling clk we need to notify wdog subsystem that HW wdog
+ * is being suspended. This e.g. prevents watchdog_ping_work to fire
+ * when the clk is disabled, which would result with system hang caused
+ * by wdog register access while wdog clock is disabled.
+ */
+ ret = watchdog_dev_suspend(wdog);
+ if (ret)
+ return ret;
/* The watchdog IP block is running */
if (imx2_wdt_is_running(wdev)) {
@@ -356,7 +367,6 @@ static int __maybe_unused imx2_wdt_suspend(struct device *dev)
* during resume.
*/
__imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME);
- imx2_wdt_ping(wdog);
}
clk_disable_unprepare(wdev->clk);
@@ -383,12 +393,12 @@ static int __maybe_unused imx2_wdt_resume(struct device *dev)
*/
imx2_wdt_setup(wdog);
}
- if (imx2_wdt_is_running(wdev)) {
+ if (imx2_wdt_is_running(wdev))
imx2_wdt_set_timeout(wdog, wdog->timeout);
- imx2_wdt_ping(wdog);
- }
- return 0;
+ ret = watchdog_dev_resume(wdog);
+
+ return ret;
}
static SIMPLE_DEV_PM_OPS(imx2_wdt_pm_ops, imx2_wdt_suspend,
--
2.29.0
The watchdog drivers often disable wdog clock during suspend and then
enable it again during resume. Nevertheless the ping worker is still
running and can issue low-level ping while the wdog clock is disabled
causing the system hang. To prevent such condition introduce
watchdog_dev_suspend/resume which can be used by any wdog driver and
actually cancel ping worker during suspend and restore it back, if
needed, during resume.
Signed-off-by: Grzegorz Jaszczyk <[email protected]>
---
drivers/watchdog/watchdog_dev.c | 49 +++++++++++++++++++++++++++++++++
include/linux/watchdog.h | 2 ++
2 files changed, 51 insertions(+)
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 2946f3a63110..3feca1567281 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -1219,6 +1219,55 @@ void __exit watchdog_dev_exit(void)
kthread_destroy_worker(watchdog_kworker);
}
+int watchdog_dev_suspend(struct watchdog_device *wdd)
+{
+ struct watchdog_core_data *wd_data = wdd->wd_data;
+ int ret;
+
+ if (!wdd->wd_data)
+ return -ENODEV;
+
+ /* ping for the last time before suspend */
+ mutex_lock(&wd_data->lock);
+ if (watchdog_worker_should_ping(wd_data))
+ ret = __watchdog_ping(wd_data->wdd);
+ mutex_unlock(&wd_data->lock);
+
+ if (ret)
+ return ret;
+
+ /*
+ * make sure that watchdog worker will not kick in when the wdog is
+ * suspended
+ */
+ hrtimer_cancel(&wd_data->timer);
+ kthread_cancel_work_sync(&wd_data->work);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(watchdog_dev_suspend);
+
+int watchdog_dev_resume(struct watchdog_device *wdd)
+{
+ struct watchdog_core_data *wd_data = wdd->wd_data;
+ int ret;
+
+ if (!wdd->wd_data)
+ return -ENODEV;
+
+ /*
+ * __watchdog_ping will also retrigger hrtimer and therefore restore the
+ * ping worker if needed.
+ */
+ mutex_lock(&wd_data->lock);
+ if (watchdog_worker_should_ping(wd_data))
+ ret = __watchdog_ping(wd_data->wdd);
+ mutex_unlock(&wd_data->lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(watchdog_dev_resume);
+
module_param(handle_boot_enabled, bool, 0444);
MODULE_PARM_DESC(handle_boot_enabled,
"Watchdog core auto-updates boot enabled watchdogs before userspace takes over (default="
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 9b19e6bb68b5..febfde3b1ff6 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -209,6 +209,8 @@ extern int watchdog_init_timeout(struct watchdog_device *wdd,
unsigned int timeout_parm, struct device *dev);
extern int watchdog_register_device(struct watchdog_device *);
extern void watchdog_unregister_device(struct watchdog_device *);
+int watchdog_dev_suspend(struct watchdog_device *wdd);
+int watchdog_dev_resume(struct watchdog_device *wdd);
int watchdog_set_last_hw_keepalive(struct watchdog_device *, unsigned int);
--
2.29.0
On Tue, Jun 15, 2021 at 02:39:03PM +0200, Grzegorz Jaszczyk wrote:
> The watchdog drivers often disable wdog clock during suspend and then
> enable it again during resume. Nevertheless the ping worker is still
> running and can issue low-level ping while the wdog clock is disabled
> causing the system hang. To prevent such condition introduce
> watchdog_dev_suspend/resume which can be used by any wdog driver and
> actually cancel ping worker during suspend and restore it back, if
> needed, during resume.
>
I'll have to look into this further, but I don't think this is the correct
solution. Most likely the watchdog core needs to have its own independent
suspend/resule functions and suspend the high resolution timer on
suspend and restore it on resume. This may require an additional flag
to be set by drivers to indicate that the timer should be stopped on
suspend.
> Signed-off-by: Grzegorz Jaszczyk <[email protected]>
> ---
> drivers/watchdog/watchdog_dev.c | 49 +++++++++++++++++++++++++++++++++
> include/linux/watchdog.h | 2 ++
> 2 files changed, 51 insertions(+)
>
> diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
> index 2946f3a63110..3feca1567281 100644
> --- a/drivers/watchdog/watchdog_dev.c
> +++ b/drivers/watchdog/watchdog_dev.c
> @@ -1219,6 +1219,55 @@ void __exit watchdog_dev_exit(void)
> kthread_destroy_worker(watchdog_kworker);
> }
>
> +int watchdog_dev_suspend(struct watchdog_device *wdd)
> +{
> + struct watchdog_core_data *wd_data = wdd->wd_data;
> + int ret;
> +
> + if (!wdd->wd_data)
> + return -ENODEV;
> +
> + /* ping for the last time before suspend */
> + mutex_lock(&wd_data->lock);
> + if (watchdog_worker_should_ping(wd_data))
> + ret = __watchdog_ping(wd_data->wdd);
> + mutex_unlock(&wd_data->lock);
> +
> + if (ret)
> + return ret;
> +
> + /*
> + * make sure that watchdog worker will not kick in when the wdog is
> + * suspended
> + */
> + hrtimer_cancel(&wd_data->timer);
> + kthread_cancel_work_sync(&wd_data->work);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(watchdog_dev_suspend);
> +
> +int watchdog_dev_resume(struct watchdog_device *wdd)
> +{
> + struct watchdog_core_data *wd_data = wdd->wd_data;
> + int ret;
> +
> + if (!wdd->wd_data)
> + return -ENODEV;
> +
> + /*
> + * __watchdog_ping will also retrigger hrtimer and therefore restore the
> + * ping worker if needed.
> + */
> + mutex_lock(&wd_data->lock);
> + if (watchdog_worker_should_ping(wd_data))
> + ret = __watchdog_ping(wd_data->wdd);
> + mutex_unlock(&wd_data->lock);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(watchdog_dev_resume);
> +
> module_param(handle_boot_enabled, bool, 0444);
> MODULE_PARM_DESC(handle_boot_enabled,
> "Watchdog core auto-updates boot enabled watchdogs before userspace takes over (default="
> diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
> index 9b19e6bb68b5..febfde3b1ff6 100644
> --- a/include/linux/watchdog.h
> +++ b/include/linux/watchdog.h
> @@ -209,6 +209,8 @@ extern int watchdog_init_timeout(struct watchdog_device *wdd,
> unsigned int timeout_parm, struct device *dev);
> extern int watchdog_register_device(struct watchdog_device *);
> extern void watchdog_unregister_device(struct watchdog_device *);
> +int watchdog_dev_suspend(struct watchdog_device *wdd);
> +int watchdog_dev_resume(struct watchdog_device *wdd);
>
> int watchdog_set_last_hw_keepalive(struct watchdog_device *, unsigned int);
>
> --
> 2.29.0
>
On Tue, 15 Jun 2021 at 16:18, Guenter Roeck <[email protected]> wrote:
>
> On Tue, Jun 15, 2021 at 02:39:03PM +0200, Grzegorz Jaszczyk wrote:
> > The watchdog drivers often disable wdog clock during suspend and then
> > enable it again during resume. Nevertheless the ping worker is still
> > running and can issue low-level ping while the wdog clock is disabled
> > causing the system hang. To prevent such condition introduce
> > watchdog_dev_suspend/resume which can be used by any wdog driver and
> > actually cancel ping worker during suspend and restore it back, if
> > needed, during resume.
> >
>
> I'll have to look into this further, but I don't think this is the correct
> solution. Most likely the watchdog core needs to have its own independent
> suspend/resule functions and suspend the high resolution timer on
> suspend and restore it on resume. This may require an additional flag
> to be set by drivers to indicate that the timer should be stopped on
> suspend.
That makes sense - thank you for your suggestion. I think I could
register a pm notifier in the watchdog core when the new e.g.
WDOG_STOP_PING_ON_SUSPEND status flag will be set by the driver and
actually call watchdog_dev_suspend/resume from the notifier callback.
Please let me know if you see any other issue with this solution, if
not I will post v2.
Thank you in advance,
Grzegorz
>
> > Signed-off-by: Grzegorz Jaszczyk <[email protected]>
> > ---
> > drivers/watchdog/watchdog_dev.c | 49 +++++++++++++++++++++++++++++++++
> > include/linux/watchdog.h | 2 ++
> > 2 files changed, 51 insertions(+)
> >
> > diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
> > index 2946f3a63110..3feca1567281 100644
> > --- a/drivers/watchdog/watchdog_dev.c
> > +++ b/drivers/watchdog/watchdog_dev.c
> > @@ -1219,6 +1219,55 @@ void __exit watchdog_dev_exit(void)
> > kthread_destroy_worker(watchdog_kworker);
> > }
> >
> > +int watchdog_dev_suspend(struct watchdog_device *wdd)
> > +{
> > + struct watchdog_core_data *wd_data = wdd->wd_data;
> > + int ret;
> > +
> > + if (!wdd->wd_data)
> > + return -ENODEV;
> > +
> > + /* ping for the last time before suspend */
> > + mutex_lock(&wd_data->lock);
> > + if (watchdog_worker_should_ping(wd_data))
> > + ret = __watchdog_ping(wd_data->wdd);
> > + mutex_unlock(&wd_data->lock);
> > +
> > + if (ret)
> > + return ret;
> > +
> > + /*
> > + * make sure that watchdog worker will not kick in when the wdog is
> > + * suspended
> > + */
> > + hrtimer_cancel(&wd_data->timer);
> > + kthread_cancel_work_sync(&wd_data->work);
> > +
> > + return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(watchdog_dev_suspend);
> > +
> > +int watchdog_dev_resume(struct watchdog_device *wdd)
> > +{
> > + struct watchdog_core_data *wd_data = wdd->wd_data;
> > + int ret;
> > +
> > + if (!wdd->wd_data)
> > + return -ENODEV;
> > +
> > + /*
> > + * __watchdog_ping will also retrigger hrtimer and therefore restore the
> > + * ping worker if needed.
> > + */
> > + mutex_lock(&wd_data->lock);
> > + if (watchdog_worker_should_ping(wd_data))
> > + ret = __watchdog_ping(wd_data->wdd);
> > + mutex_unlock(&wd_data->lock);
> > +
> > + return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(watchdog_dev_resume);
> > +
> > module_param(handle_boot_enabled, bool, 0444);
> > MODULE_PARM_DESC(handle_boot_enabled,
> > "Watchdog core auto-updates boot enabled watchdogs before userspace takes over (default="
> > diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
> > index 9b19e6bb68b5..febfde3b1ff6 100644
> > --- a/include/linux/watchdog.h
> > +++ b/include/linux/watchdog.h
> > @@ -209,6 +209,8 @@ extern int watchdog_init_timeout(struct watchdog_device *wdd,
> > unsigned int timeout_parm, struct device *dev);
> > extern int watchdog_register_device(struct watchdog_device *);
> > extern void watchdog_unregister_device(struct watchdog_device *);
> > +int watchdog_dev_suspend(struct watchdog_device *wdd);
> > +int watchdog_dev_resume(struct watchdog_device *wdd);
> >
> > int watchdog_set_last_hw_keepalive(struct watchdog_device *, unsigned int);
> >
> > --
> > 2.29.0
> >
On Wed, Jun 16, 2021 at 03:59:23PM +0200, Grzegorz Jaszczyk wrote:
> On Tue, 15 Jun 2021 at 16:18, Guenter Roeck <[email protected]> wrote:
> >
> > On Tue, Jun 15, 2021 at 02:39:03PM +0200, Grzegorz Jaszczyk wrote:
> > > The watchdog drivers often disable wdog clock during suspend and then
> > > enable it again during resume. Nevertheless the ping worker is still
> > > running and can issue low-level ping while the wdog clock is disabled
> > > causing the system hang. To prevent such condition introduce
> > > watchdog_dev_suspend/resume which can be used by any wdog driver and
> > > actually cancel ping worker during suspend and restore it back, if
> > > needed, during resume.
> > >
> >
> > I'll have to look into this further, but I don't think this is the correct
> > solution. Most likely the watchdog core needs to have its own independent
> > suspend/resule functions and suspend the high resolution timer on
> > suspend and restore it on resume. This may require an additional flag
> > to be set by drivers to indicate that the timer should be stopped on
> > suspend.
>
> That makes sense - thank you for your suggestion. I think I could
> register a pm notifier in the watchdog core when the new e.g.
> WDOG_STOP_PING_ON_SUSPEND status flag will be set by the driver and
> actually call watchdog_dev_suspend/resume from the notifier callback.
> Please let me know if you see any other issue with this solution, if
> not I will post v2.
>
Go for it.
Thanks,
Guenter
Hi Grzegorz,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on hwmon/hwmon-next]
[also build test WARNING on soc/for-next linus/master v5.13-rc6 next-20210616]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Grzegorz-Jaszczyk/introduce-watchdog_dev_suspend-resume/20210616-201447
base: https://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git hwmon-next
config: arm64-randconfig-r022-20210615 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 64720f57bea6a6bf033feef4a5751ab9c0c3b401)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm64 cross compiling tool for clang build
# apt-get install binutils-aarch64-linux-gnu
# https://github.com/0day-ci/linux/commit/71dadcad8da1862c1205b19ddf4279d494e57545
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Grzegorz-Jaszczyk/introduce-watchdog_dev_suspend-resume/20210616-201447
git checkout 71dadcad8da1862c1205b19ddf4279d494e57545
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>):
>> drivers/watchdog/watchdog_dev.c:1232:2: warning: variable 'ret' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
if (watchdog_worker_should_ping(wd_data))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/compiler.h:56:28: note: expanded from macro 'if'
#define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/compiler.h:58:30: note: expanded from macro '__trace_if_var'
#define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/watchdog/watchdog_dev.c:1236:6: note: uninitialized use occurs here
if (ret)
^~~
include/linux/compiler.h:56:47: note: expanded from macro 'if'
#define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
^~~~
include/linux/compiler.h:58:52: note: expanded from macro '__trace_if_var'
#define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
^~~~
drivers/watchdog/watchdog_dev.c:1232:2: note: remove the 'if' if its condition is always true
if (watchdog_worker_should_ping(wd_data))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/compiler.h:56:23: note: expanded from macro 'if'
#define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
^
drivers/watchdog/watchdog_dev.c:1225:9: note: initialize the variable 'ret' to silence this warning
int ret;
^
= 0
drivers/watchdog/watchdog_dev.c:1263:2: warning: variable 'ret' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
if (watchdog_worker_should_ping(wd_data))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/compiler.h:56:28: note: expanded from macro 'if'
#define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/compiler.h:58:30: note: expanded from macro '__trace_if_var'
#define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/watchdog/watchdog_dev.c:1267:9: note: uninitialized use occurs here
return ret;
^~~
drivers/watchdog/watchdog_dev.c:1263:2: note: remove the 'if' if its condition is always true
if (watchdog_worker_should_ping(wd_data))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/compiler.h:56:23: note: expanded from macro 'if'
#define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
^
drivers/watchdog/watchdog_dev.c:1253:9: note: initialize the variable 'ret' to silence this warning
int ret;
^
= 0
2 warnings generated.
vim +1232 drivers/watchdog/watchdog_dev.c
1221
1222 int watchdog_dev_suspend(struct watchdog_device *wdd)
1223 {
1224 struct watchdog_core_data *wd_data = wdd->wd_data;
1225 int ret;
1226
1227 if (!wdd->wd_data)
1228 return -ENODEV;
1229
1230 /* ping for the last time before suspend */
1231 mutex_lock(&wd_data->lock);
> 1232 if (watchdog_worker_should_ping(wd_data))
1233 ret = __watchdog_ping(wd_data->wdd);
1234 mutex_unlock(&wd_data->lock);
1235
1236 if (ret)
1237 return ret;
1238
1239 /*
1240 * make sure that watchdog worker will not kick in when the wdog is
1241 * suspended
1242 */
1243 hrtimer_cancel(&wd_data->timer);
1244 kthread_cancel_work_sync(&wd_data->work);
1245
1246 return 0;
1247 }
1248 EXPORT_SYMBOL_GPL(watchdog_dev_suspend);
1249
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]