2005-10-21 14:58:38

by Corey Minyard

[permalink] [raw]
Subject: [PATCH 9/9] ipmi: add timer thread

We must poll for responses to commands when interrupts aren't in use.
The default poll interval is based on using a kernel timer, which
varies with HZ. For character-based interfaces like KCS and SMIC
though, that can be way too slow (>15 minutes to flash a new firmware
with KCS, >20 seconds to retrieve the sensor list).

This creates a low-priority kernel thread to poll more often. If the
state machine is idle, so is the kernel thread. But if there's an
active command, it polls quite rapidly. This decrease a firmware
flash time from 15 minutes to 1.5 minutes, and the sensor list time to
4.5 seconds, on a Dell PowerEdge x8x system.

The timer-based polling remains, to ensure some amount of
responsiveness even under high user process CPU load.

Checking for a stopped timer at rmmod now uses atomics and
del_timer_sync() to ensure safe stoppage.

Signed-off-by: Matt Domsch <[email protected]>
Signed-off-by: Corey Minyard <[email protected]>

Index: linux-2.6.14-rc4/drivers/char/ipmi/ipmi_si_intf.c
===================================================================
--- linux-2.6.14-rc4.orig/drivers/char/ipmi/ipmi_si_intf.c
+++ linux-2.6.14-rc4/drivers/char/ipmi/ipmi_si_intf.c
@@ -126,6 +126,7 @@ struct ipmi_device_id {

struct smi_info
{
+ int intf_num;
ipmi_smi_t intf;
struct si_sm_data *si_sm;
struct si_sm_handlers *handlers;
@@ -193,8 +194,7 @@ struct smi_info
unsigned long last_timeout_jiffies;

/* Used to gracefully stop the timer without race conditions. */
- volatile int stop_operation;
- volatile int timer_stopped;
+ atomic_t stop_operation;

/* The driver will disable interrupts when it gets into a
situation where it cannot handle messages due to lack of
@@ -221,6 +221,9 @@ struct smi_info
unsigned long events;
unsigned long watchdog_pretimeouts;
unsigned long incoming_messages;
+
+ struct completion exiting;
+ long thread_pid;
};

static struct notifier_block *xaction_notifier_list;
@@ -779,6 +782,38 @@ static void set_run_to_completion(void *
spin_unlock_irqrestore(&(smi_info->si_lock), flags);
}

+static int ipmi_thread(void *data)
+{
+ struct smi_info *smi_info = data;
+ unsigned long flags, last=1;
+ enum si_sm_result smi_result;
+
+ daemonize("kipmi%d", smi_info->intf_num);
+ allow_signal(SIGKILL);
+ set_user_nice(current, 19);
+ while (!atomic_read(&smi_info->stop_operation)) {
+ schedule_timeout(last);
+ spin_lock_irqsave(&(smi_info->si_lock), flags);
+ smi_result=smi_event_handler(smi_info, 0);
+ spin_unlock_irqrestore(&(smi_info->si_lock), flags);
+ if (smi_result == SI_SM_CALL_WITHOUT_DELAY)
+ last = 0;
+ else if (smi_result == SI_SM_CALL_WITH_DELAY) {
+ udelay(1);
+ last = 0;
+ }
+ else {
+ /* System is idle; go to sleep */
+ last = 1;
+ current->state = TASK_INTERRUPTIBLE;
+ }
+ }
+ smi_info->thread_pid = 0;
+ complete_and_exit(&(smi_info->exiting), 0);
+ return 0;
+}
+
+
static void poll(void *send_info)
{
struct smi_info *smi_info = send_info;
@@ -837,10 +872,8 @@ static void smi_timeout(unsigned long da
struct timeval t;
#endif

- if (smi_info->stop_operation) {
- smi_info->timer_stopped = 1;
+ if (atomic_read(&smi_info->stop_operation))
return;
- }

spin_lock_irqsave(&(smi_info->si_lock), flags);
#ifdef DEBUG_TIMING
@@ -913,7 +946,7 @@ static irqreturn_t si_irq_handler(int ir
smi_info->interrupts++;
spin_unlock(&smi_info->count_lock);

- if (smi_info->stop_operation)
+ if (atomic_read(&smi_info->stop_operation))
goto out;

#ifdef DEBUG_TIMING
@@ -1432,7 +1465,7 @@ static u32 ipmi_acpi_gpe(void *context)
smi_info->interrupts++;
spin_unlock(&smi_info->count_lock);

- if (smi_info->stop_operation)
+ if (atomic_read(&smi_info->stop_operation))
goto out;

#ifdef DEBUG_TIMING
@@ -2177,6 +2210,16 @@ static void setup_xaction_handlers(struc
setup_dell_poweredge_bt_xaction_handler(smi_info);
}

+static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
+{
+ if (smi_info->thread_pid > 0) {
+ /* wake the potentially sleeping thread */
+ kill_proc(smi_info->thread_pid, SIGKILL, 0);
+ wait_for_completion(&(smi_info->exiting));
+ }
+ del_timer_sync(&smi_info->si_timer);
+}
+
/* Returns 0 if initialized, or negative on an error. */
static int init_one_smi(int intf_num, struct smi_info **smi)
{
@@ -2284,8 +2327,8 @@ static int init_one_smi(int intf_num, st
new_smi->run_to_completion = 0;

new_smi->interrupt_disabled = 0;
- new_smi->timer_stopped = 0;
- new_smi->stop_operation = 0;
+ atomic_set(&new_smi->stop_operation, 0);
+ new_smi->intf_num = intf_num;

/* Start clearing the flags before we enable interrupts or the
timer to avoid racing with the timer. */
@@ -2303,7 +2346,14 @@ static int init_one_smi(int intf_num, st
new_smi->si_timer.function = smi_timeout;
new_smi->last_timeout_jiffies = jiffies;
new_smi->si_timer.expires = jiffies + SI_TIMEOUT_JIFFIES;
+
add_timer(&(new_smi->si_timer));
+ if (new_smi->si_type != SI_BT) {
+ init_completion(&(new_smi->exiting));
+ new_smi->thread_pid = kernel_thread(ipmi_thread, new_smi,
+ CLONE_FS|CLONE_FILES|
+ CLONE_SIGHAND);
+ }

rv = ipmi_register_smi(&handlers,
new_smi,
@@ -2345,12 +2395,8 @@ static int init_one_smi(int intf_num, st
return 0;

out_err_stop_timer:
- new_smi->stop_operation = 1;
-
- /* Wait for the timer to stop. This avoids problems with race
- conditions removing the timer here. */
- while (!new_smi->timer_stopped)
- schedule_timeout_uninterruptible(1);
+ atomic_inc(&new_smi->stop_operation);
+ wait_for_timer_and_thread(new_smi);

out_err:
if (new_smi->intf)
@@ -2456,8 +2502,7 @@ static void __exit cleanup_one_si(struct
spin_lock_irqsave(&(to_clean->si_lock), flags);
spin_lock(&(to_clean->msg_lock));

- to_clean->stop_operation = 1;
-
+ atomic_inc(&to_clean->stop_operation);
to_clean->irq_cleanup(to_clean);

spin_unlock(&(to_clean->msg_lock));
@@ -2468,10 +2513,7 @@ static void __exit cleanup_one_si(struct
interrupt. */
synchronize_sched();

- /* Wait for the timer to stop. This avoids problems with race
- conditions removing the timer here. */
- while (!to_clean->timer_stopped)
- schedule_timeout_uninterruptible(1);
+ wait_for_timer_and_thread(to_clean);

/* Interrupts and timeouts are stopped, now make sure the
interface is in a clean state. */


2005-10-23 20:50:21

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 9/9] ipmi: add timer thread

Corey Minyard <[email protected]> wrote:
>
> We must poll for responses to commands when interrupts aren't in use.
> The default poll interval is based on using a kernel timer, which
> varies with HZ. For character-based interfaces like KCS and SMIC
> though, that can be way too slow (>15 minutes to flash a new firmware
> with KCS, >20 seconds to retrieve the sensor list).
>
> This creates a low-priority kernel thread to poll more often. If the
> state machine is idle, so is the kernel thread. But if there's an
> active command, it polls quite rapidly. This decrease a firmware
> flash time from 15 minutes to 1.5 minutes, and the sensor list time to
> 4.5 seconds, on a Dell PowerEdge x8x system.
>
> The timer-based polling remains, to ensure some amount of
> responsiveness even under high user process CPU load.
>
> Checking for a stopped timer at rmmod now uses atomics and
> del_timer_sync() to ensure safe stoppage.
>
> ...
>
> +static int ipmi_thread(void *data)
> +{
> + struct smi_info *smi_info = data;
> + unsigned long flags, last=1;
> + enum si_sm_result smi_result;
> +
> + daemonize("kipmi%d", smi_info->intf_num);
> + allow_signal(SIGKILL);
> + set_user_nice(current, 19);
> + while (!atomic_read(&smi_info->stop_operation)) {
> + schedule_timeout(last);
> + spin_lock_irqsave(&(smi_info->si_lock), flags);
> + smi_result=smi_event_handler(smi_info, 0);
> + spin_unlock_irqrestore(&(smi_info->si_lock), flags);
> + if (smi_result == SI_SM_CALL_WITHOUT_DELAY)
> + last = 0;
> + else if (smi_result == SI_SM_CALL_WITH_DELAY) {
> + udelay(1);
> + last = 0;
> + }
> + else {
> + /* System is idle; go to sleep */
> + last = 1;
> + current->state = TASK_INTERRUPTIBLE;
> + }
> + }
> + smi_info->thread_pid = 0;
> + complete_and_exit(&(smi_info->exiting), 0);
> + return 0;
> +}

The kthread API would be preferred, please. That way we avoid using
signals in-kernel too - they're a bit awkward. (Do you really want
userspace to be able to kill this kernel thread?)

The first call to schedule_timeout() here will not actually sleep at all,
due to it being in state TASK_RUNNING. Is that deliberate?

Also, this thread can exit in state TASK_INTERUPTIBLE. That's not a bug
per-se, but apparently it'll spit a warning in some of the patches which
Ingo is working on. I don't know why, but I'm sure there's a good reason
;)

2005-10-23 21:12:54

by Nish Aravamudan

[permalink] [raw]
Subject: Re: [PATCH 9/9] ipmi: add timer thread

On 10/23/05, Andrew Morton <[email protected]> wrote:
> Corey Minyard <[email protected]> wrote:
> >
> > We must poll for responses to commands when interrupts aren't in use.
> > The default poll interval is based on using a kernel timer, which
> > varies with HZ. For character-based interfaces like KCS and SMIC
> > though, that can be way too slow (>15 minutes to flash a new firmware
> > with KCS, >20 seconds to retrieve the sensor list).
> >
> > This creates a low-priority kernel thread to poll more often. If the
> > state machine is idle, so is the kernel thread. But if there's an
> > active command, it polls quite rapidly. This decrease a firmware
> > flash time from 15 minutes to 1.5 minutes, and the sensor list time to
> > 4.5 seconds, on a Dell PowerEdge x8x system.
> >
> > The timer-based polling remains, to ensure some amount of
> > responsiveness even under high user process CPU load.
> >
> > Checking for a stopped timer at rmmod now uses atomics and
> > del_timer_sync() to ensure safe stoppage.
> >
> > ...
> >
> > +static int ipmi_thread(void *data)
> > +{
> > + struct smi_info *smi_info = data;
> > + unsigned long flags, last=1;
> > + enum si_sm_result smi_result;
> > +
> > + daemonize("kipmi%d", smi_info->intf_num);
> > + allow_signal(SIGKILL);
> > + set_user_nice(current, 19);
> > + while (!atomic_read(&smi_info->stop_operation)) {
> > + schedule_timeout(last);
> > + spin_lock_irqsave(&(smi_info->si_lock), flags);
> > + smi_result=smi_event_handler(smi_info, 0);
> > + spin_unlock_irqrestore(&(smi_info->si_lock), flags);
> > + if (smi_result == SI_SM_CALL_WITHOUT_DELAY)
> > + last = 0;
> > + else if (smi_result == SI_SM_CALL_WITH_DELAY) {
> > + udelay(1);
> > + last = 0;
> > + }
> > + else {
> > + /* System is idle; go to sleep */
> > + last = 1;
> > + current->state = TASK_INTERRUPTIBLE;
> > + }
> > + }
> > + smi_info->thread_pid = 0;
> > + complete_and_exit(&(smi_info->exiting), 0);
> > + return 0;
> > +}

<snip>

> The first call to schedule_timeout() here will not actually sleep at all,
> due to it being in state TASK_RUNNING. Is that deliberate?
>
> Also, this thread can exit in state TASK_INTERUPTIBLE. That's not a bug
> per-se, but apparently it'll spit a warning in some of the patches which
> Ingo is working on. I don't know why, but I'm sure there's a good reason
> ;)

You beat me to this one, Andrew! :) Both issue can be avoided by using
schedule_timeout_interruptible().

Additionally, I think the last variable is simply being used to switch
between a 0 and 1 jiffy sleep (took me a while to figure that out in
GMail sadly -- any chance the variable could be renamed?). In the
current implementaion of schedule_timeout(), these will result in the
same behavior, expiring the timer at the next timer interrupt (the
next jiffy increment is the first time we'll notice we had a timer in
the past to expire). Not sure if that's the intent and perhaps just a
means to indicate what is desired (a sleep will still occur, even
though a udelay() has already in the loop, for instance), but wanted
to make sure.

Thanks,
Nish

2005-10-23 21:27:26

by Matt Domsch

[permalink] [raw]
Subject: Re: [PATCH 9/9] ipmi: add timer thread

On Sun, Oct 23, 2005 at 02:12:51PM -0700, Nish Aravamudan wrote:
> On 10/23/05, Andrew Morton <[email protected]> wrote:
> > The first call to schedule_timeout() here will not actually sleep at all,
> > due to it being in state TASK_RUNNING. Is that deliberate?

Wasn't intentional, but doesn't really matter.

> > Also, this thread can exit in state TASK_INTERUPTIBLE. That's not a bug
> > per-se, but apparently it'll spit a warning in some of the patches which
> > Ingo is working on. I don't know why, but I'm sure there's a good reason
> > ;)
>
> You beat me to this one, Andrew! :) Both issue can be avoided by using
> schedule_timeout_interruptible().

OK.

> Additionally, I think the last variable is simply being used to switch
> between a 0 and 1 jiffy sleep (took me a while to figure that out in
> GMail sadly -- any chance the variable could be renamed?). In the
> current implementaion of schedule_timeout(), these will result in the
> same behavior, expiring the timer at the next timer interrupt (the
> next jiffy increment is the first time we'll notice we had a timer in
> the past to expire). Not sure if that's the intent and perhaps just a
> means to indicate what is desired (a sleep will still occur, even
> though a udelay() has already in the loop, for instance), but wanted
> to make sure.

I think I need to move the schedule_timeout_interruptable() into the
else clause, not at the top of the loop, as that's really the only
case where I want it to sleep. In a former version of the patch, the
SI_SM_CALL_WITH_DELAY was supposed to be a 1-jiffy delay, while the
else clause was a several-jiffy delay. However, that lead to most
commands still taking too long to complete, hence the CALL_WITH_DELAY
case became a udelay(1).

I'll code up and test a patch that does this, and will send that ASAP.

Thanks,
Matt

--
Matt Domsch
Software Architect
Dell Linux Solutions linux.dell.com & http://www.dell.com/linux
Linux on Dell mailing lists @ http://lists.us.dell.com

2005-10-24 20:12:14

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH 9/9] ipmi: add timer thread

Nish Aravamudan wrote:
> On 10/23/05, Andrew Morton <[email protected]> wrote:
>
>>Corey Minyard <[email protected]> wrote:
>>
>>>We must poll for responses to commands when interrupts aren't in use.
>>>The default poll interval is based on using a kernel timer, which
>>>varies with HZ. For character-based interfaces like KCS and SMIC
>>>though, that can be way too slow (>15 minutes to flash a new firmware
>>>with KCS, >20 seconds to retrieve the sensor list).
>>>
>>>This creates a low-priority kernel thread to poll more often. If the
>>>state machine is idle, so is the kernel thread. But if there's an
>>>active command, it polls quite rapidly. This decrease a firmware
>>>flash time from 15 minutes to 1.5 minutes, and the sensor list time to
>>>4.5 seconds, on a Dell PowerEdge x8x system.
>>>
>>>The timer-based polling remains, to ensure some amount of
>>>responsiveness even under high user process CPU load.
>>>
>>>Checking for a stopped timer at rmmod now uses atomics and
>>>del_timer_sync() to ensure safe stoppage.
>>>
>>>...
>>>
>>>+static int ipmi_thread(void *data)
>>>+{
>>>+ struct smi_info *smi_info = data;
>>>+ unsigned long flags, last=1;
>>>+ enum si_sm_result smi_result;
>>>+
>>>+ daemonize("kipmi%d", smi_info->intf_num);
>>>+ allow_signal(SIGKILL);
>>>+ set_user_nice(current, 19);
>>>+ while (!atomic_read(&smi_info->stop_operation)) {
>>>+ schedule_timeout(last);
>>>+ spin_lock_irqsave(&(smi_info->si_lock), flags);
>>>+ smi_result=smi_event_handler(smi_info, 0);
>>>+ spin_unlock_irqrestore(&(smi_info->si_lock), flags);
>>>+ if (smi_result == SI_SM_CALL_WITHOUT_DELAY)
>>>+ last = 0;
>>>+ else if (smi_result == SI_SM_CALL_WITH_DELAY) {
>>>+ udelay(1);
>>>+ last = 0;
>>>+ }
>>>+ else {
>>>+ /* System is idle; go to sleep */
>>>+ last = 1;
>>>+ current->state = TASK_INTERRUPTIBLE;
>>>+ }
>>>+ }
>>>+ smi_info->thread_pid = 0;
>>>+ complete_and_exit(&(smi_info->exiting), 0);
>>>+ return 0;
>>>+}
>
>
> <snip>
>
>>The first call to schedule_timeout() here will not actually sleep at all,
>>due to it being in state TASK_RUNNING. Is that deliberate?
>>
>>Also, this thread can exit in state TASK_INTERUPTIBLE. That's not a bug
>>per-se, but apparently it'll spit a warning in some of the patches which
>>Ingo is working on. I don't know why, but I'm sure there's a good reason
>>;)
>
>
> You beat me to this one, Andrew! :) Both issue can be avoided by using
> schedule_timeout_interruptible().
>
> Additionally, I think the last variable is simply being used to switch
> between a 0 and 1 jiffy sleep (took me a while to figure that out in
> GMail sadly -- any chance the variable could be renamed?). In the
> current implementaion of schedule_timeout(), these will result in the
> same behavior, expiring the timer at the next timer interrupt (the
> next jiffy increment is the first time we'll notice we had a timer in
> the past to expire). Not sure if that's the intent and perhaps just a
> means to indicate what is desired (a sleep will still occur, even
> though a udelay() has already in the loop, for instance), but wanted
> to make sure.

I think it would be VERY nice if we could eliminate calls to sleep for NIL time. Sooner or later
these are going to bite us very badly.

In the above code, the handling of the "task state" is, well, funny. If last is 0, it is not
modified implying that:

if (last)
schedule_timeout(last);

might be a better rendering of the intended function.
>

--
George Anzinger [email protected]
HRT (High-res-timers): http://sourceforge.net/projects/high-res-timers/

2005-10-24 20:31:54

by Matt Domsch

[permalink] [raw]
Subject: [PATCH 2.6.14-rc5-mm1] ipmi: use kthread API

Convert ipmi driver thread to kthread API, only sleep when interface
is idle.

Signed-off-by: Matt Domsch <[email protected]>

--
Matt Domsch
Software Architect
Dell Linux Solutions linux.dell.com & http://www.dell.com/linux
Linux on Dell mailing lists @ http://lists.us.dell.com

Index: linux-2.6.ipmi/drivers/char/ipmi/ipmi_si_intf.c
===================================================================
--- linux-2.6.ipmi.orig/drivers/char/ipmi/ipmi_si_intf.c
+++ linux-2.6.ipmi/drivers/char/ipmi/ipmi_si_intf.c
@@ -52,6 +52,7 @@
#include <linux/pci.h>
#include <linux/ioport.h>
#include <linux/notifier.h>
+#include <linux/kthread.h>
#include <asm/irq.h>
#ifdef CONFIG_HIGH_RES_TIMERS
#include <linux/hrtime.h>
@@ -222,8 +223,7 @@ struct smi_info
unsigned long watchdog_pretimeouts;
unsigned long incoming_messages;

- struct completion exiting;
- long thread_pid;
+ struct task_struct *thread;
};

static struct notifier_block *xaction_notifier_list;
@@ -785,31 +785,22 @@ static void set_run_to_completion(void *
static int ipmi_thread(void *data)
{
struct smi_info *smi_info = data;
- unsigned long flags, last=1;
+ unsigned long flags;
enum si_sm_result smi_result;

- daemonize("kipmi%d", smi_info->intf_num);
- allow_signal(SIGKILL);
set_user_nice(current, 19);
- while (!atomic_read(&smi_info->stop_operation)) {
- schedule_timeout(last);
+ while (!kthread_should_stop()) {
spin_lock_irqsave(&(smi_info->si_lock), flags);
smi_result=smi_event_handler(smi_info, 0);
spin_unlock_irqrestore(&(smi_info->si_lock), flags);
- if (smi_result == SI_SM_CALL_WITHOUT_DELAY)
- last = 0;
- else if (smi_result == SI_SM_CALL_WITH_DELAY) {
- udelay(1);
- last = 0;
- }
- else {
- /* System is idle; go to sleep */
- last = 1;
- current->state = TASK_INTERRUPTIBLE;
+ if (smi_result == SI_SM_CALL_WITHOUT_DELAY) {
+ /* do nothing */
}
+ else if (smi_result == SI_SM_CALL_WITH_DELAY)
+ udelay(1);
+ else
+ schedule_timeout_interruptible(1);
}
- smi_info->thread_pid = 0;
- complete_and_exit(&(smi_info->exiting), 0);
return 0;
}

@@ -2212,11 +2203,8 @@ static void setup_xaction_handlers(struc

static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
{
- if (smi_info->thread_pid > 0) {
- /* wake the potentially sleeping thread */
- kill_proc(smi_info->thread_pid, SIGKILL, 0);
- wait_for_completion(&(smi_info->exiting));
- }
+ if (smi_info->thread != ERR_PTR(-ENOMEM))
+ kthread_stop(smi_info->thread);
del_timer_sync(&smi_info->si_timer);
}

@@ -2348,12 +2336,9 @@ static int init_one_smi(int intf_num, st
new_smi->si_timer.expires = jiffies + SI_TIMEOUT_JIFFIES;

add_timer(&(new_smi->si_timer));
- if (new_smi->si_type != SI_BT) {
- init_completion(&(new_smi->exiting));
- new_smi->thread_pid = kernel_thread(ipmi_thread, new_smi,
- CLONE_FS|CLONE_FILES|
- CLONE_SIGHAND);
- }
+ if (new_smi->si_type != SI_BT)
+ new_smi->thread = kthread_run(ipmi_thread, new_smi,
+ "kipmi%d", new_smi->intf_num);

rv = ipmi_register_smi(&handlers,
new_smi,