2006-11-29 16:27:00

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH] i386 add idle notifier

Hello,

Here is a patch that adds an idle notifier to the i386 tree.
The idle notifier functionalities and implementation are
identical to the x86_64 idle notifier. We use the idle notifier
in the context of perfmon.

The patch is against Andi Kleen's x86_64-2.6.19-rc6-061128-1.bz2
kernel. It may apply to other kernels but it needs some updates
to poll_idle() and default_idle() to work correctly.

changelog:
- add an idle notifier mechanism to i386 tree

signed-off-by: stephane eranian <[email protected]>

diff --exclude=.gitignore -urNp linux-2.6.19-rc6-ak.orig/arch/i386/kernel/apic.c linux-2.6.19-rc6-ak.base/arch/i386/kernel/apic.c
--- linux-2.6.19-rc6-ak.orig/arch/i386/kernel/apic.c 2006-11-28 13:17:21.000000000 -0800
+++ linux-2.6.19-rc6-ak.base/arch/i386/kernel/apic.c 2006-11-29 06:31:26.000000000 -0800
@@ -36,6 +36,7 @@
#include <asm/hpet.h>
#include <asm/i8253.h>
#include <asm/nmi.h>
+#include <asm/idle.h>

#include <mach_apic.h>
#include <mach_apicdef.h>
@@ -1255,6 +1256,7 @@ fastcall void smp_apic_timer_interrupt(s
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
+ exit_idle();
irq_enter();
smp_local_timer_interrupt();
irq_exit();
@@ -1305,6 +1307,7 @@ fastcall void smp_spurious_interrupt(str
{
unsigned long v;

+ exit_idle();
irq_enter();
/*
* Check if this really is a spurious interrupt and ACK it
@@ -1329,6 +1332,7 @@ fastcall void smp_error_interrupt(struct
{
unsigned long v, v1;

+ exit_idle();
irq_enter();
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
diff --exclude=.gitignore -urNp linux-2.6.19-rc6-ak.orig/arch/i386/kernel/cpu/mcheck/p4.c linux-2.6.19-rc6-ak.base/arch/i386/kernel/cpu/mcheck/p4.c
--- linux-2.6.19-rc6-ak.orig/arch/i386/kernel/cpu/mcheck/p4.c 2006-11-28 13:16:35.000000000 -0800
+++ linux-2.6.19-rc6-ak.base/arch/i386/kernel/cpu/mcheck/p4.c 2006-11-29 06:31:30.000000000 -0800
@@ -12,6 +12,7 @@
#include <asm/system.h>
#include <asm/msr.h>
#include <asm/apic.h>
+#include <asm/idle.h>

#include <asm/therm_throt.h>

@@ -59,6 +60,7 @@ static void (*vendor_thermal_interrupt)(

fastcall void smp_thermal_interrupt(struct pt_regs *regs)
{
+ exit_idle();
irq_enter();
vendor_thermal_interrupt(regs);
irq_exit();
diff --exclude=.gitignore -urNp linux-2.6.19-rc6-ak.orig/arch/i386/kernel/irq.c linux-2.6.19-rc6-ak.base/arch/i386/kernel/irq.c
--- linux-2.6.19-rc6-ak.orig/arch/i386/kernel/irq.c 2006-11-28 13:16:35.000000000 -0800
+++ linux-2.6.19-rc6-ak.base/arch/i386/kernel/irq.c 2006-11-29 06:31:34.000000000 -0800
@@ -19,6 +19,8 @@
#include <linux/cpu.h>
#include <linux/delay.h>

+#include <asm/idle.h>
+
DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);

@@ -61,6 +63,7 @@ fastcall unsigned int do_IRQ(struct pt_r
union irq_ctx *curctx, *irqctx;
u32 *isp;
#endif
+ exit_idle();

if (unlikely((unsigned)irq >= NR_IRQS)) {
printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
diff --exclude=.gitignore -urNp linux-2.6.19-rc6-ak.orig/arch/i386/kernel/process.c linux-2.6.19-rc6-ak.base/arch/i386/kernel/process.c
--- linux-2.6.19-rc6-ak.orig/arch/i386/kernel/process.c 2006-11-28 13:17:21.000000000 -0800
+++ linux-2.6.19-rc6-ak.base/arch/i386/kernel/process.c 2006-11-29 06:55:58.000000000 -0800
@@ -48,6 +48,7 @@
#include <asm/i387.h>
#include <asm/desc.h>
#include <asm/vm86.h>
+#include <asm/idle.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif
@@ -80,6 +81,46 @@ void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
static DEFINE_PER_CPU(unsigned int, cpu_idle_state);

+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
+
+void idle_notifier_register(struct notifier_block *n)
+{
+ atomic_notifier_chain_register(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_register);
+
+void idle_notifier_unregister(struct notifier_block *n)
+{
+ atomic_notifier_chain_unregister(&idle_notifier, n);
+}
+EXPORT_SYMBOL(idle_notifier_unregister);
+
+static DEFINE_PER_CPU(volatile unsigned long, idle_state);
+
+void enter_idle(void)
+{
+ /* needs to be atomic w.r.t. interrupts, not against other CPUs */
+ __set_bit(0, &__get_cpu_var(idle_state));
+ atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+}
+
+void __exit_idle(void)
+{
+ /* needs to be atomic w.r.t. interrupts, not against other CPUs */
+ if (__test_and_clear_bit(0, &__get_cpu_var(idle_state)) == 0)
+ return;
+ atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+}
+
+/* Called from interrupts to signify idle end */
+void exit_idle(void)
+{
+ /* idle loop has pid 0 */
+ if (current->pid)
+ return;
+ __exit_idle();
+}
+
void disable_hlt(void)
{
hlt_counter++;
@@ -184,7 +225,9 @@ void cpu_idle(void)
play_dead();

__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+ enter_idle();
idle();
+ __exit_idle();
}
preempt_enable_no_resched();
schedule();
diff --exclude=.gitignore -urNp linux-2.6.19-rc6-ak.orig/arch/i386/kernel/smp.c linux-2.6.19-rc6-ak.base/arch/i386/kernel/smp.c
--- linux-2.6.19-rc6-ak.orig/arch/i386/kernel/smp.c 2006-11-28 13:16:35.000000000 -0800
+++ linux-2.6.19-rc6-ak.base/arch/i386/kernel/smp.c 2006-11-29 06:31:38.000000000 -0800
@@ -23,6 +23,7 @@

#include <asm/mtrr.h>
#include <asm/tlbflush.h>
+#include <asm/idle.h>
#include <mach_apic.h>

/*
@@ -629,6 +630,7 @@ fastcall void smp_call_function_interrup
/*
* At this point the info structure may be out of scope unless wait==1
*/
+ exit_idle();
irq_enter();
(*func)(info);
irq_exit();
diff --exclude=.gitignore -urNp linux-2.6.19-rc6-ak.orig/include/asm-i386/idle.h linux-2.6.19-rc6-ak.base/include/asm-i386/idle.h
--- linux-2.6.19-rc6-ak.orig/include/asm-i386/idle.h 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.19-rc6-ak.base/include/asm-i386/idle.h 2006-11-29 06:35:06.000000000 -0800
@@ -0,0 +1,15 @@
+#ifndef _ASM_I386_IDLE_H
+#define _ASM_I386_IDLE_H 1
+
+#define IDLE_START 1
+#define IDLE_END 2
+
+struct notifier_block;
+void idle_notifier_register(struct notifier_block *n);
+void idle_notifier_unregister(struct notifier_block *n);
+
+void exit_idle(void);
+void enter_idle(void);
+}
+
+#endif


2006-11-29 16:37:37

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] i386 add idle notifier

On Wednesday 29 November 2006 17:25, Stephane Eranian wrote:
> Hello,
>
> Here is a patch that adds an idle notifier to the i386 tree.
> The idle notifier functionalities and implementation are
> identical to the x86_64 idle notifier. We use the idle notifier
> in the context of perfmon.
>
> The patch is against Andi Kleen's x86_64-2.6.19-rc6-061128-1.bz2
> kernel. It may apply to other kernels but it needs some updates
> to poll_idle() and default_idle() to work correctly.

Added thanks

-Andi

2006-11-29 16:41:36

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] i386 add idle notifier (take 2)

Hello,

[This is the second take due to stray '}' in the patch. Sorry about that]

Here is a patch that adds an idle notifier to the i386 tree.
The idle notifier functionalities and implementation are
identical to the x86_64 idle notifier. We use the idle notifier
in the context of perfmon.

The patch is against Andi Kleen's x86_64-2.6.19-rc6-061128-1.bz2
kernel. It may apply to other kernels but it needs some updates
to poll_idle() and default_idle() to work correctly.

changelog:
- add an idle notifier mechanism to i386 tree

signed-off-by: stephane eranian <[email protected]>

diff --exclude=.gitignore -urNp linux-2.6.19-rc6-ak.orig/arch/i386/kernel/apic.c linux-2.6.19-rc6-ak.base/arch/i386/kernel/apic.c
--- linux-2.6.19-rc6-ak.orig/arch/i386/kernel/apic.c 2006-11-28 13:17:21.000000000 -0800
+++ linux-2.6.19-rc6-ak.base/arch/i386/kernel/apic.c 2006-11-29 06:31:26.000000000 -0800
@@ -36,6 +36,7 @@
#include <asm/hpet.h>
#include <asm/i8253.h>
#include <asm/nmi.h>
+#include <asm/idle.h>

#include <mach_apic.h>
#include <mach_apicdef.h>
@@ -1255,6 +1256,7 @@ fastcall void smp_apic_timer_interrupt(s
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
+ exit_idle();
irq_enter();
smp_local_timer_interrupt();
irq_exit();
@@ -1305,6 +1307,7 @@ fastcall void smp_spurious_interrupt(str
{
unsigned long v;

+ exit_idle();
irq_enter();
/*
* Check if this really is a spurious interrupt and ACK it
@@ -1329,6 +1332,7 @@ fastcall void smp_error_interrupt(struct
{
unsigned long v, v1;

+ exit_idle();
irq_enter();
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
diff --exclude=.gitignore -urNp linux-2.6.19-rc6-ak.orig/arch/i386/kernel/cpu/mcheck/p4.c linux-2.6.19-rc6-ak.base/arch/i386/kernel/cpu/mcheck/p4.c
--- linux-2.6.19-rc6-ak.orig/arch/i386/kernel/cpu/mcheck/p4.c 2006-11-28 13:16:35.000000000 -0800
+++ linux-2.6.19-rc6-ak.base/arch/i386/kernel/cpu/mcheck/p4.c 2006-11-29 06:31:30.000000000 -0800
@@ -12,6 +12,7 @@
#include <asm/system.h>
#include <asm/msr.h>
#include <asm/apic.h>
+#include <asm/idle.h>

#include <asm/therm_throt.h>

@@ -59,6 +60,7 @@ static void (*vendor_thermal_interrupt)(

fastcall void smp_thermal_interrupt(struct pt_regs *regs)
{
+ exit_idle();
irq_enter();
vendor_thermal_interrupt(regs);
irq_exit();
diff --exclude=.gitignore -urNp linux-2.6.19-rc6-ak.orig/arch/i386/kernel/irq.c linux-2.6.19-rc6-ak.base/arch/i386/kernel/irq.c
--- linux-2.6.19-rc6-ak.orig/arch/i386/kernel/irq.c 2006-11-28 13:16:35.000000000 -0800
+++ linux-2.6.19-rc6-ak.base/arch/i386/kernel/irq.c 2006-11-29 06:31:34.000000000 -0800
@@ -19,6 +19,8 @@
#include <linux/cpu.h>
#include <linux/delay.h>

+#include <asm/idle.h>
+
DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);

@@ -61,6 +63,7 @@ fastcall unsigned int do_IRQ(struct pt_r
union irq_ctx *curctx, *irqctx;
u32 *isp;
#endif
+ exit_idle();

if (unlikely((unsigned)irq >= NR_IRQS)) {
printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
diff --exclude=.gitignore -urNp linux-2.6.19-rc6-ak.orig/arch/i386/kernel/process.c linux-2.6.19-rc6-ak.base/arch/i386/kernel/process.c
--- linux-2.6.19-rc6-ak.orig/arch/i386/kernel/process.c 2006-11-28 13:17:21.000000000 -0800
+++ linux-2.6.19-rc6-ak.base/arch/i386/kernel/process.c 2006-11-29 06:55:58.000000000 -0800
@@ -48,6 +48,7 @@
#include <asm/i387.h>
#include <asm/desc.h>
#include <asm/vm86.h>
+#include <asm/idle.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif
@@ -80,6 +81,46 @@ void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
static DEFINE_PER_CPU(unsigned int, cpu_idle_state);

+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
+
+void idle_notifier_register(struct notifier_block *n)
+{
+ atomic_notifier_chain_register(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_register);
+
+void idle_notifier_unregister(struct notifier_block *n)
+{
+ atomic_notifier_chain_unregister(&idle_notifier, n);
+}
+EXPORT_SYMBOL(idle_notifier_unregister);
+
+static DEFINE_PER_CPU(volatile unsigned long, idle_state);
+
+void enter_idle(void)
+{
+ /* needs to be atomic w.r.t. interrupts, not against other CPUs */
+ __set_bit(0, &__get_cpu_var(idle_state));
+ atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+}
+
+void __exit_idle(void)
+{
+ /* needs to be atomic w.r.t. interrupts, not against other CPUs */
+ if (__test_and_clear_bit(0, &__get_cpu_var(idle_state)) == 0)
+ return;
+ atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+}
+
+/* Called from interrupts to signify idle end */
+void exit_idle(void)
+{
+ /* idle loop has pid 0 */
+ if (current->pid)
+ return;
+ __exit_idle();
+}
+
void disable_hlt(void)
{
hlt_counter++;
@@ -184,7 +225,9 @@ void cpu_idle(void)
play_dead();

__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+ enter_idle();
idle();
+ __exit_idle();
}
preempt_enable_no_resched();
schedule();
diff --exclude=.gitignore -urNp linux-2.6.19-rc6-ak.orig/arch/i386/kernel/smp.c linux-2.6.19-rc6-ak.base/arch/i386/kernel/smp.c
--- linux-2.6.19-rc6-ak.orig/arch/i386/kernel/smp.c 2006-11-28 13:16:35.000000000 -0800
+++ linux-2.6.19-rc6-ak.base/arch/i386/kernel/smp.c 2006-11-29 06:31:38.000000000 -0800
@@ -23,6 +23,7 @@

#include <asm/mtrr.h>
#include <asm/tlbflush.h>
+#include <asm/idle.h>
#include <mach_apic.h>

/*
@@ -629,6 +630,7 @@ fastcall void smp_call_function_interrup
/*
* At this point the info structure may be out of scope unless wait==1
*/
+ exit_idle();
irq_enter();
(*func)(info);
irq_exit();
diff --exclude=.gitignore -urNp linux-2.6.19-rc6-ak.orig/include/asm-i386/idle.h linux-2.6.19-rc6-ak.base/include/asm-i386/idle.h
--- linux-2.6.19-rc6-ak.orig/include/asm-i386/idle.h 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.19-rc6-ak.base/include/asm-i386/idle.h 2006-11-29 08:31:43.000000000 -0800
@@ -0,0 +1,14 @@
+#ifndef _ASM_I386_IDLE_H
+#define _ASM_I386_IDLE_H 1
+
+#define IDLE_START 1
+#define IDLE_END 2
+
+struct notifier_block;
+void idle_notifier_register(struct notifier_block *n);
+void idle_notifier_unregister(struct notifier_block *n);
+
+void exit_idle(void);
+void enter_idle(void);
+
+#endif

2006-11-29 17:09:42

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH] i386 add idle notifier

On Wed, Nov 29, 2006 at 08:25:40AM -0800, Stephane Eranian wrote:
> Hello,
>
> Here is a patch that adds an idle notifier to the i386 tree.
> The idle notifier functionalities and implementation are
> identical to the x86_64 idle notifier. We use the idle notifier
> in the context of perfmon.
>
> The patch is against Andi Kleen's x86_64-2.6.19-rc6-061128-1.bz2
> kernel. It may apply to other kernels but it needs some updates
> to poll_idle() and default_idle() to work correctly.

Walking through a notifier chain on every single interrupt (including
timer interrupts) seems rather costly. What do you need this for
exactly?

2006-11-29 21:13:15

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] i386 add idle notifier

On Wed, 29 Nov 2006 17:09:39 +0000
Christoph Hellwig <[email protected]> wrote:

> On Wed, Nov 29, 2006 at 08:25:40AM -0800, Stephane Eranian wrote:
> > Hello,
> >
> > Here is a patch that adds an idle notifier to the i386 tree.
> > The idle notifier functionalities and implementation are
> > identical to the x86_64 idle notifier. We use the idle notifier
> > in the context of perfmon.
> >
> > The patch is against Andi Kleen's x86_64-2.6.19-rc6-061128-1.bz2
> > kernel. It may apply to other kernels but it needs some updates
> > to poll_idle() and default_idle() to work correctly.
>
> Walking through a notifier chain on every single interrupt (including
> timer interrupts) seems rather costly. What do you need this for
> exactly?

yes, it's a worry.

Why doesn't enter_idle() do the test_and_set_bit() thing, like
exit_idle()?

2006-11-29 22:25:28

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] i386 add idle notifier

Hello,

On Wed, Nov 29, 2006 at 01:09:44PM -0800, Andrew Morton wrote:
> On Wed, 29 Nov 2006 17:09:39 +0000
> Christoph Hellwig <[email protected]> wrote:
>
> > On Wed, Nov 29, 2006 at 08:25:40AM -0800, Stephane Eranian wrote:
> > > Hello,
> > >
> > > Here is a patch that adds an idle notifier to the i386 tree.
> > > The idle notifier functionalities and implementation are
> > > identical to the x86_64 idle notifier. We use the idle notifier
> > > in the context of perfmon.
> > >
> > > The patch is against Andi Kleen's x86_64-2.6.19-rc6-061128-1.bz2
> > > kernel. It may apply to other kernels but it needs some updates
> > > to poll_idle() and default_idle() to work correctly.
> >
> > Walking through a notifier chain on every single interrupt (including
> > timer interrupts) seems rather costly. What do you need this for
> > exactly?
>
Let me give you the background on this.

In system-wide mode, perfmon wants to exclude useless kernel execution
from being monitored. That execution is performed by the idle thread
when it enters its lowest loop level, i.e., poll_idle(), defaut_idle(),
mwait_idle(). It used to be that the idle loop was simply looping, waiting
for an interrupt or polling a variable. These days, it is a bit more
complex because on many processors, idle means going to a lower power state.
We want to capture useful idle thread execution such as interrupt servicing
but we want to exclude polling and low-power state.

When entering low-power, some processors systematically turn off
performance counters. But this is not necessarily the case for every processors.
Worse, for some processors it depends on the events being measured. Yet, at the
perfmon interface level, we want to guarantee a uniform behavior across architectures.
As such, we have to use software to enforce our policy.

Andi pointed out that there is an idle notifier in the x86-64 tree, and that it
could be a way to achieve our goal.

I would tend to agree with both of you that the way the notifier is implemented may be a little
bit overkill for what we need it for. In my latest patch, I made sure that we register/unregister
an idle notification routine only when necessary. Yet, it is true that on each interrupt received
by the idle thread you go through enter_idle()/exit_idle() and atomic_call_chain().

In the current implementation, registration is global, call are obviously made for one CPU and
a RCU read lock is used, so multiple calls from different CPUs can be issued at the same time.

AFAICT, perfmon is the only user of the idle notifier. Perfmon operates on a per-cpu basis in system
wide. As such, I think we could simplify it by making the registration/call mechanism completly a
per-CPU construct. This would simplify the locking aspect. We would still need the test_and_set
to avoid a race with interrupts.

> yes, it's a worry.
>
> Why doesn't enter_idle() do the test_and_set_bit() thing, like
> exit_idle()?

I think we could use the same test_and_set_bit(). In fact, there is only
one place where we call enter_idle(), so just like x86-64 we could just
replace this with
__get_cpu_var(idle_state) = 1;

--
-Stephane

2006-11-29 23:09:12

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] i386 add idle notifier

On Wed, 29 Nov 2006 14:18:53 -0800
Stephane Eranian <[email protected]> wrote:

> Hello,
>
> On Wed, Nov 29, 2006 at 01:09:44PM -0800, Andrew Morton wrote:
> > On Wed, 29 Nov 2006 17:09:39 +0000
> > Christoph Hellwig <[email protected]> wrote:
> >
> > > On Wed, Nov 29, 2006 at 08:25:40AM -0800, Stephane Eranian wrote:
> > > > Hello,
> > > >
> > > > Here is a patch that adds an idle notifier to the i386 tree.
> > > > The idle notifier functionalities and implementation are
> > > > identical to the x86_64 idle notifier. We use the idle notifier
> > > > in the context of perfmon.
> > > >
> > > > The patch is against Andi Kleen's x86_64-2.6.19-rc6-061128-1.bz2
> > > > kernel. It may apply to other kernels but it needs some updates
> > > > to poll_idle() and default_idle() to work correctly.
> > >
> > > Walking through a notifier chain on every single interrupt (including
> > > timer interrupts) seems rather costly. What do you need this for
> > > exactly?
> >
> Let me give you the background on this.
>
> In system-wide mode, perfmon wants to exclude useless kernel execution
> from being monitored. That execution is performed by the idle thread
> when it enters its lowest loop level, i.e., poll_idle(), defaut_idle(),
> mwait_idle(). It used to be that the idle loop was simply looping, waiting
> for an interrupt or polling a variable. These days, it is a bit more
> complex because on many processors, idle means going to a lower power state.
> We want to capture useful idle thread execution such as interrupt servicing
> but we want to exclude polling and low-power state.

An alternative approach might be to change perfmon so that it works out
whether it is being called by an idle thread

if ((current->flags & PF_IDLE) && (other stuff to do with irqs?))
return;

> We would still need the test_and_set
> to avoid a race with interrupts.

btw, I don't think anyone promised that __test_and_set_bit is atomic wrt
interrupts on all architectures. Is OK for x86.

2006-11-29 23:15:45

by Russell King

[permalink] [raw]
Subject: Re: [PATCH] i386 add idle notifier

On Wed, Nov 29, 2006 at 03:05:44PM -0800, Andrew Morton wrote:
> btw, I don't think anyone promised that __test_and_set_bit is atomic wrt
> interrupts on all architectures. Is OK for x86.

Correct. The generic version found in include/asm-generic/bitops/non-atomic.h
is not interrupt safe:

/**
* __test_and_set_bit - Set a bit and return its old value
* @nr: Bit to set
* @addr: Address to count from
*
* This operation is non-atomic and can be reordered.
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/

--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of:

2006-11-29 23:22:04

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] i386 add idle notifier


> An alternative approach might be to change perfmon so that it works out
> whether it is being called by an idle thread
>
> if ((current->flags & PF_IDLE) && (other stuff to do with irqs?))
> return;

The problem is that the performance counters just keep running in the CPU.
Perfmon needs to do something actively to disable them.

Actually on x86 they usually stop in true idle state in hardware, but
they don't do in polling mode and it sometimes seems to depend on
the firmware. So it mostly would be for idle=poll

But if you do walk clock time profiling exactly because they stop
a profiler should account for this somehow. Otherwise the profiling time
doesn't add up to 100%

-Andi

2006-11-30 08:23:11

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] i386 add idle notifier

Hi,

On Thu, Nov 30, 2006 at 12:21:41AM +0100, Andi Kleen wrote:
>
> > An alternative approach might be to change perfmon so that it works out
> > whether it is being called by an idle thread
> >
> > if ((current->flags & PF_IDLE) && (other stuff to do with irqs?))
> > return;
>
> The problem is that the performance counters just keep running in the CPU.
> Perfmon needs to do something actively to disable them.
>
Exactly.

> Actually on x86 they usually stop in true idle state in hardware, but
> they don't do in polling mode and it sometimes seems to depend on
> the firmware. So it mostly would be for idle=poll
>
Most likely. But we also have the deal with other architectures such
as MIPS,IA64, and PPC.

> But if you do walk clock time profiling exactly because they stop
> a profiler should account for this somehow. Otherwise the profiling time
> doesn't add up to 100%
>
Yes. Note that perfmon does maintain the duration when monitornig was active.
So it is possible to determine active time and use it to scale counts.

--
-Stephane