2013-04-01 05:59:44

by Tony Chung

[permalink] [raw]
Subject: [PATCH v3 1/2] watchdog: improve w83627hf_wdt to timeout in minute

Watchdog should support timeout in minutes.
For example, crash dump will usually require 5+ minutes.

This patch increase the timeout from 255 seconds to (255*60) seconds and should be good for older LTS releases.
This patch need to be rewritten again when this driver migrate to new watchdog infrastructure.

Added a new max_timeout variable so we can change it if needed.

Signed-off-by: Tony Chung <[email protected]>
---
drivers/watchdog/w83627hf_wdt.c | 74 ++++++++++++++++++++++++++++++--------
1 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c
index 92f1326..8f1111d 100644
--- a/drivers/watchdog/w83627hf_wdt.c
+++ b/drivers/watchdog/w83627hf_wdt.c
@@ -45,6 +45,7 @@

#define WATCHDOG_NAME "w83627hf/thf/hg/dhg WDT"
#define WATCHDOG_TIMEOUT 60 /* 60 sec default timeout */
+#define WATCHDOG_MAX_TIMEOUT (60 * 255)

static unsigned long wdt_is_open;
static char expect_close;
@@ -55,11 +56,14 @@ static int wdt_io = 0x2E;
module_param(wdt_io, int, 0);
MODULE_PARM_DESC(wdt_io, "w83627hf/thf WDT io port (default 0x2E)");

+static bool use_minute; /* timeout unit in minutes if set */
+static const int max_timeout = WATCHDOG_MAX_TIMEOUT;
static int timeout = WATCHDOG_TIMEOUT; /* in seconds */
module_param(timeout, int, 0);
MODULE_PARM_DESC(timeout,
- "Watchdog timeout in seconds. 1 <= timeout <= 255, default="
- __MODULE_STRING(WATCHDOG_TIMEOUT) ".");
+ "Watchdog timeout in seconds. 1<= timeout <="
+ __MODULE_STRING(WATCHDOG_MAX_TIMEOUT) " (default="
+ __MODULE_STRING(WATCHDOG_TIMEOUT) ")");

static bool nowayout = WATCHDOG_NOWAYOUT;
module_param(nowayout, bool, 0);
@@ -107,6 +111,48 @@ static void w83627hf_unselect_wd_register(void)
outb_p(0xAA, WDT_EFER); /* Leave extended function mode */
}

+/* setup CRF5 register */
+static void w83627hf_setup_crf5(void)
+{
+ unsigned char t;
+
+ outb_p(0xF5, WDT_EFER); /* Select CRF5 */
+ t = inb_p(WDT_EFDR); /* read CRF5 */
+ t &= ~0x0C; /* set second mode & disable keyboard
+ turning off watchdog */
+ t |= 0x02; /* enable the WDTO# output low pulse
+ to the KBRST# pin (PIN60) */
+ if (use_minute)
+ t |= 0x8; /* set timeout in minute */
+
+ outb_p(t, WDT_EFDR); /* Write back to CRF5 */
+}
+
+/* set use_minute if t > 255 */
+static inline int wdt_use_minute(int t)
+{
+ if (t > 255) {
+ t = DIV_ROUND_UP(t, 60); /* convert secs to minutes */
+ use_minute = 1;
+ } else {
+ use_minute = 0;
+ }
+
+ /* setup CRF5 for new timeout unit */
+ w83627hf_select_wd_register();
+ w83627hf_setup_crf5();
+ w83627hf_unselect_wd_register();
+
+ return t;
+}
+
+/* convert specified value to seconds if use_minute is set */
+static inline int wdt_get_timeout_secs(int t)
+{
+ return (use_minute) ? (t * 60) : t;
+}
+
+
/* tyan motherboards seem to set F5 to 0x4C ?
* So explicitly init to appropriate value. */

@@ -120,17 +166,11 @@ static void w83627hf_init(void)
t = inb_p(WDT_EFDR); /* read CRF6 */
if (t != 0) {
pr_info("Watchdog already running. Resetting timeout to %d sec\n",
- timeout);
+ wdt_get_timeout_secs(timeout));
outb_p(timeout, WDT_EFDR); /* Write back to CRF6 */
}

- outb_p(0xF5, WDT_EFER); /* Select CRF5 */
- t = inb_p(WDT_EFDR); /* read CRF5 */
- t &= ~0x0C; /* set second mode & disable keyboard
- turning off watchdog */
- t |= 0x02; /* enable the WDTO# output low pulse
- to the KBRST# pin (PIN60) */
- outb_p(t, WDT_EFDR); /* Write back to CRF5 */
+ w83627hf_setup_crf5();

outb_p(0xF7, WDT_EFER); /* Select CRF7 */
t = inb_p(WDT_EFDR); /* read CRF7 */
@@ -169,9 +209,9 @@ static int wdt_disable(void)

static int wdt_set_heartbeat(int t)
{
- if (t < 1 || t > 255)
+ if (t < 1 || t > max_timeout)
return -EINVAL;
- timeout = t;
+ timeout = wdt_use_minute(t);
return 0;
}

@@ -190,7 +230,7 @@ static int wdt_get_time(void)

spin_unlock(&io_lock);

- return timeleft;
+ return wdt_get_timeout_secs(timeleft);
}

static ssize_t wdt_write(struct file *file, const char __user *buf,
@@ -262,7 +302,8 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
wdt_ping();
/* Fall */
case WDIOC_GETTIMEOUT:
- return put_user(timeout, p);
+ timeval = wdt_get_timeout_secs(timeout);
+ return put_user(timeval, p);
case WDIOC_GETTIMELEFT:
timeval = wdt_get_time();
return put_user(timeval, p);
@@ -346,7 +387,8 @@ static int __init wdt_init(void)

if (wdt_set_heartbeat(timeout)) {
wdt_set_heartbeat(WATCHDOG_TIMEOUT);
- pr_info("timeout value must be 1 <= timeout <= 255, using %d\n",
+ pr_info("timeout value must be 1 <= timeout <= %d, using %d\n",
+ max_timeout,
WATCHDOG_TIMEOUT);
}

@@ -372,7 +414,7 @@ static int __init wdt_init(void)
}

pr_info("initialized. timeout=%d sec (nowayout=%d)\n",
- timeout, nowayout);
+ wdt_get_timeout_secs(timeout), nowayout);

out:
return ret;
--
1.7.0.4


2013-04-01 05:59:49

by Tony Chung

[permalink] [raw]
Subject: [PATCH v3 2/2] watchdog: fix w83627hf_wdt clear timeout expired

Observed that the w83627hf watchdog timer start counting during reboot.
If the system load the driver after 5 minutes, it rebooted immediately because of timer expired.
For example, fsck took more than 5 minutes to run, then the computer reboot as soon as the driver was loaded.

Signed-off-by: Tony Chung <[email protected]>
---
drivers/watchdog/w83627hf_wdt.c | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c
index 8f1111d..7eaa226 100644
--- a/drivers/watchdog/w83627hf_wdt.c
+++ b/drivers/watchdog/w83627hf_wdt.c
@@ -168,14 +168,16 @@ static void w83627hf_init(void)
pr_info("Watchdog already running. Resetting timeout to %d sec\n",
wdt_get_timeout_secs(timeout));
outb_p(timeout, WDT_EFDR); /* Write back to CRF6 */
+ } else {
+ outb_p(0, WDT_EFDR); /* disable to prevent reboot */
}

w83627hf_setup_crf5();

outb_p(0xF7, WDT_EFER); /* Select CRF7 */
t = inb_p(WDT_EFDR); /* read CRF7 */
- t &= ~0xC0; /* disable keyboard & mouse turning off
- watchdog */
+ t &= ~0xD0; /* clear timeout occurred and disable keyboard
+ & mouse turning off watchdog */
outb_p(t, WDT_EFDR); /* Write back to CRF7 */

w83627hf_unselect_wd_register();
--
1.7.0.4

2013-04-02 00:07:43

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] watchdog: fix w83627hf_wdt clear timeout expired

On Sun, Mar 31, 2013 at 10:59:32PM -0700, Tony Chung wrote:
> Observed that the w83627hf watchdog timer start counting during reboot.
> If the system load the driver after 5 minutes, it rebooted immediately because of timer expired.
> For example, fsck took more than 5 minutes to run, then the computer reboot as soon as the driver was loaded.
>
Thinking about it, I wonder if that really solves your problem.

If the watchdog driver is built into the kernel, and if the watchdog is already
running, you still may have the problem that it may time out again and trigger
(again) before the watchdog application can be loaded. Ultimately, in such a
situation, the watchdog application, or at least a primitive one, should really
be started from initramfs and possibly be replaced later on by the "real" watchdog
application. The watchdog package includes a small binary for that purpose,
though I have no idea if is is loaded in initramfs or only later.

Ultimately, that also applies to the original problem you tried to solve with
the longer timeout. It doesn't seem correct to have to set a long timeout to
address excessive boot times; in a way that defeats the purpose of a watchdog.
A cleaner solution for that problem would be to make sure that a small watchdog
application is started early in the boot process.

> Signed-off-by: Tony Chung <[email protected]>
> ---
> drivers/watchdog/w83627hf_wdt.c | 6 ++++--
> 1 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c
> index 8f1111d..7eaa226 100644
> --- a/drivers/watchdog/w83627hf_wdt.c
> +++ b/drivers/watchdog/w83627hf_wdt.c
> @@ -168,14 +168,16 @@ static void w83627hf_init(void)
> pr_info("Watchdog already running. Resetting timeout to %d sec\n",
> wdt_get_timeout_secs(timeout));
> outb_p(timeout, WDT_EFDR); /* Write back to CRF6 */
> + } else {
> + outb_p(0, WDT_EFDR); /* disable to prevent reboot */
> }
Did I miss that earlier, or did yo add it in this version ?

Reading the counter register just returned 0, so what are the benefits
of writing 0 into it ?

Thanks,
Guenter

>
> w83627hf_setup_crf5();
>
> outb_p(0xF7, WDT_EFER); /* Select CRF7 */
> t = inb_p(WDT_EFDR); /* read CRF7 */
> - t &= ~0xC0; /* disable keyboard & mouse turning off
> - watchdog */
> + t &= ~0xD0; /* clear timeout occurred and disable keyboard
> + & mouse turning off watchdog */
> outb_p(t, WDT_EFDR); /* Write back to CRF7 */
>
> w83627hf_unselect_wd_register();
> --
> 1.7.0.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-watchdog" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>

2013-04-02 04:59:03

by Tony Chung

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] watchdog: fix w83627hf_wdt clear timeout expired

Thanks Guenter!
I agree with you. My first reaction was also about a small watchdog
server that will start in early boot process. There are pros and
cons. For example, there are many types of watchdog devices such as
ipmi_watchdog which can accept more than 255 seconds for timeout. So
you really need udev to pick the correct watchdog driver. It could be
very complex.

Our requirement don't need watchdog protection during the boot process
until application is fully up but a driver should not assume anything.
Anyway, an unexpected reboot is definitely a bug that need to be
fixed. It is really easily reproducible. Depending on your hardware
and BIOS settings, just reboot the boot, wait for 5 minutes and then
run "insmod w83627hf_wdt.ko". The box just reboot by itself. The
watchdog sever is not even started.

This line is actually the original fix that is running over a year:
outb_p(0, WDT_EFDR); /* disable to prevent reboot */

When I tried to cleanup it up, I thought I don't need it but it
turned out it was still needed.
When I changed it from 0xC0 to 0xD0, it still reboot.

Thanks and best regards,
@Tony

On Mon, Apr 1, 2013 at 5:07 PM, Guenter Roeck <[email protected]> wrote:
> On Sun, Mar 31, 2013 at 10:59:32PM -0700, Tony Chung wrote:
>> Observed that the w83627hf watchdog timer start counting during reboot.
>> If the system load the driver after 5 minutes, it rebooted immediately because of timer expired.
>> For example, fsck took more than 5 minutes to run, then the computer reboot as soon as the driver was loaded.
>>
> Thinking about it, I wonder if that really solves your problem.
>
> If the watchdog driver is built into the kernel, and if the watchdog is already
> running, you still may have the problem that it may time out again and trigger
> (again) before the watchdog application can be loaded. Ultimately, in such a
> situation, the watchdog application, or at least a primitive one, should really
> be started from initramfs and possibly be replaced later on by the "real" watchdog
> application. The watchdog package includes a small binary for that purpose,
> though I have no idea if is is loaded in initramfs or only later.
>
> Ultimately, that also applies to the original problem you tried to solve with
> the longer timeout. It doesn't seem correct to have to set a long timeout to
> address excessive boot times; in a way that defeats the purpose of a watchdog.
> A cleaner solution for that problem would be to make sure that a small watchdog
> application is started early in the boot process.
>
>> Signed-off-by: Tony Chung <[email protected]>
>> ---
>> drivers/watchdog/w83627hf_wdt.c | 6 ++++--
>> 1 files changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c
>> index 8f1111d..7eaa226 100644
>> --- a/drivers/watchdog/w83627hf_wdt.c
>> +++ b/drivers/watchdog/w83627hf_wdt.c
>> @@ -168,14 +168,16 @@ static void w83627hf_init(void)
>> pr_info("Watchdog already running. Resetting timeout to %d sec\n",
>> wdt_get_timeout_secs(timeout));
>> outb_p(timeout, WDT_EFDR); /* Write back to CRF6 */
>> + } else {
>> + outb_p(0, WDT_EFDR); /* disable to prevent reboot */
>> }
> Did I miss that earlier, or did yo add it in this version ?
>
> Reading the counter register just returned 0, so what are the benefits
> of writing 0 into it ?
>
> Thanks,
> Guenter
>
>>
>> w83627hf_setup_crf5();
>>
>> outb_p(0xF7, WDT_EFER); /* Select CRF7 */
>> t = inb_p(WDT_EFDR); /* read CRF7 */
>> - t &= ~0xC0; /* disable keyboard & mouse turning off
>> - watchdog */
>> + t &= ~0xD0; /* clear timeout occurred and disable keyboard
>> + & mouse turning off watchdog */
>> outb_p(t, WDT_EFDR); /* Write back to CRF7 */
>>
>> w83627hf_unselect_wd_register();
>> --
>> 1.7.0.4
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-watchdog" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>



--
- Tony Chung

2013-04-03 04:21:36

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] watchdog: fix w83627hf_wdt clear timeout expired

On Mon, Apr 01, 2013 at 09:59:00PM -0700, Tony Chung wrote:
> Thanks Guenter!
> I agree with you. My first reaction was also about a small watchdog
> server that will start in early boot process. There are pros and
> cons. For example, there are many types of watchdog devices such as
> ipmi_watchdog which can accept more than 255 seconds for timeout. So
> you really need udev to pick the correct watchdog driver. It could be
> very complex.
>
> Our requirement don't need watchdog protection during the boot process
> until application is fully up but a driver should not assume anything.

Ok, but then the BIOS should not enable the watchdog.

> Anyway, an unexpected reboot is definitely a bug that need to be
> fixed. It is really easily reproducible. Depending on your hardware

Agreed.

> and BIOS settings, just reboot the boot, wait for 5 minutes and then
> run "insmod w83627hf_wdt.ko". The box just reboot by itself. The
> watchdog sever is not even started.
>
Doesn't happen for me, as the watchdog is initially not enabled in my system,
and bit 4 is never set. And when it gets set, the system immediately reboots.

> This line is actually the original fix that is running over a year:
> outb_p(0, WDT_EFDR); /* disable to prevent reboot */
>
Unfortunately this turns off the watchdog if it was running and has triggered.

> When I tried to cleanup it up, I thought I don't need it but it
> turned out it was still needed.
> When I changed it from 0xC0 to 0xD0, it still reboot.
>
So it looks like the watchdog triggered, for some reason did not cause
a reset, but resetting the trigger flag does.

Ultimately, the new code turns the watchdog off if it was running, has already
triggered, but did not cause a reset. The ultimate effect is that the system
will hang if it gets stuck before the watchdog application is started later on.
Maybe that is not your application, but others wil defintely want that
protection.

So I don't think the solution is correct. We should find out why the watchdog
trigger did not cause a reset. Also, I think it would be better in this
situation (if watchdog has triggered) to restart it with the default timeout.

What is the exact chip type in your system ? I want to have a look into the
datasheet; maybe I can find out how it can trigger without causing a reset.

Thanks,
Guenter

2013-04-03 15:07:01

by Tony Chung

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] watchdog: fix w83627hf_wdt clear timeout expired

On Tue, Apr 2, 2013 at 9:21 PM, Guenter Roeck <[email protected]> wrote:

>
>
> What is the exact chip type in your system ? I want to have a look into the
> datasheet; maybe I can find out how it can trigger without causing a reset.

Winbond 83627HF chip

I believe BIOS has watchdog disabled otherwise it would have reboot the box.
However, the timer just start counting.

Comparing to ipmi_watchdog, you can do this:
modprobe ipmi_watchdog ... start_now=0 ...action=<> nowayout=1

So it is possible to load the driver without start counting.

Notice it is an else, so t is actually 0 already (i.e. expired or
never start running):
> + } else {
> + outb_p(0, WDT_EFDR); /* disable to prevent reboot */


--
- Tony Chung

2013-04-03 15:50:22

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] watchdog: fix w83627hf_wdt clear timeout expired

On Wed, Apr 03, 2013 at 08:06:59AM -0700, Tony Chung wrote:
> On Tue, Apr 2, 2013 at 9:21 PM, Guenter Roeck <[email protected]> wrote:
>
> >
> >
> > What is the exact chip type in your system ? I want to have a look into the
> > datasheet; maybe I can find out how it can trigger without causing a reset.
>
> Winbond 83627HF chip
>
> I believe BIOS has watchdog disabled otherwise it would have reboot the box.
> However, the timer just start counting.
>
> Comparing to ipmi_watchdog, you can do this:
> modprobe ipmi_watchdog ... start_now=0 ...action=<> nowayout=1
>
> So it is possible to load the driver without start counting.
>
That is a different driver, though. you don't have the start_now option here.

> Notice it is an else, so t is actually 0 already (i.e. expired or
> never start running):

Still no idea why that would cause the system to reboot when you reset
the trigger without setting t to 0 again (or why the system doesn't reset
in the first place if the watchdog already triggered).

I am not really sure what the best approach is here, so let's leave it
up to the maintainer to decide which way to go.

Thanks,
Guenter

2013-04-03 16:07:18

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] watchdog: fix w83627hf_wdt clear timeout expired

On Wed, Apr 03, 2013 at 08:50:26AM -0700, Guenter Roeck wrote:
> On Wed, Apr 03, 2013 at 08:06:59AM -0700, Tony Chung wrote:
> > On Tue, Apr 2, 2013 at 9:21 PM, Guenter Roeck <[email protected]> wrote:
> >
> > >
> > >
> > > What is the exact chip type in your system ? I want to have a look into the
> > > datasheet; maybe I can find out how it can trigger without causing a reset.
> >
> > Winbond 83627HF chip
> >

Followup: what chip revision ?

Revision G or later have a new configuration bit, bit 3 of CR E7 on logical
device A.

SELWDTORST. Watch Dog Timer Reset Control.
= 0 is reset by LPC_RST.
= 1 is reset by PWR_OK.

I could imagine that the WDT logic is never correctly initialized in your
system, which might explain the behavior. If so, your code is indeed
correct (or the best I could come up with too), as we would have to ensure
that the wdt subsystem is initialized correctly by writing into all its registers.

Given that, I would suggest to re-submit the patch with a different explanation
(we don't know if the wdt really started running, all we know is that the
expired bit is set), and I'll give it an Acked-by. Something along the line of

"Observed that the Watchdog Timer Status bit can be set when the driver is
loaded. Reset it during initialization. The time-out value must be set to 0
explicitly in this case to prevent an immediate reset".

Thanks,
Guenter

> > I believe BIOS has watchdog disabled otherwise it would have reboot the box.
> > However, the timer just start counting.
> >
> > Comparing to ipmi_watchdog, you can do this:
> > modprobe ipmi_watchdog ... start_now=0 ...action=<> nowayout=1
> >
> > So it is possible to load the driver without start counting.
> >
> That is a different driver, though. you don't have the start_now option here.
>
> > Notice it is an else, so t is actually 0 already (i.e. expired or
> > never start running):
>
> Still no idea why that would cause the system to reboot when you reset
> the trigger without setting t to 0 again (or why the system doesn't reset
> in the first place if the watchdog already triggered).
>
> I am not really sure what the best approach is here, so let's leave it
> up to the maintainer to decide which way to go.
>
> Thanks,
> Guenter
> --
> To unsubscribe from this list: send the line "unsubscribe linux-watchdog" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>