2007-05-25 07:10:26

by Ingo Molnar

[permalink] [raw]
Subject: [patch] sched_clock(): cleanups

Subject: [patch] sched_clock(): cleanups
From: Ingo Molnar <[email protected]>

clean up sched-clock.c - mostly comment style fixes.

Signed-off-by: Ingo Molnar <[email protected]>
---
arch/i386/kernel/sched-clock.c | 79 ++++++++++++++++++++++++++---------------
1 file changed, 51 insertions(+), 28 deletions(-)

Index: linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/sched-clock.c
===================================================================
--- linux-cfs-2.6.22-rc2-mm1.q.orig/arch/i386/kernel/sched-clock.c
+++ linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/sched-clock.c
@@ -60,18 +60,20 @@ DEFINE_PER_CPU(struct sc_data, sc_data)
*/
unsigned long long native_sched_clock(void)
{
- unsigned long long r;
struct sc_data *sc = &get_cpu_var(sc_data);
+ unsigned long long r;
+ unsigned long flags;

if (sc->unstable) {
- unsigned long flags;
r = (jiffies_64 - sc->sync_base) * (1000000000 / HZ);
r += sc->ns_base;
local_irq_save(flags);
- /* last_val is used to avoid non monotonity on a
- stable->unstable transition. Make sure the time
- never goes to before the last value returned by
- the TSC clock */
+ /*
+ * last_val is used to avoid non monotonity on a
+ * stable->unstable transition. Make sure the time
+ * never goes to before the last value returned by
+ * the TSC clock
+ */
if (r <= sc->last_val)
r = sc->last_val + 1;
sc->last_val = r;
@@ -87,8 +89,10 @@ unsigned long long native_sched_clock(vo
return r;
}

-/* We need to define a real function for sched_clock, to override the
- weak default version */
+/*
+ * We need to define a real function for sched_clock, to override the
+ * weak default version
+ */
#ifdef CONFIG_PARAVIRT
unsigned long long sched_clock(void)
{
@@ -101,9 +105,11 @@ unsigned long long sched_clock(void)

static int no_sc_for_printk;

-/* printk clock: when it is known the sc results are very non monotonic
- fall back to jiffies for printk. Other sched_clock users are supposed
- to handle this. */
+/*
+ * printk clock: when it is known the sc results are very non monotonic
+ * fall back to jiffies for printk. Other sched_clock users are supposed
+ * to handle this.
+ */
unsigned long long printk_clock(void)
{
if (unlikely(no_sc_for_printk))
@@ -119,15 +125,19 @@ static void resync_sc_freq(struct sc_dat
sc->unstable = 1;
return;
}
- /* Handle nesting, but when we're zero multiple calls in a row
- are ok too and not a bug */
+ /*
+ * Handle nesting, but when we're zero multiple calls in a row
+ * are ok too and not a bug
+ */
if (sc->unstable > 0)
sc->unstable--;
if (sc->unstable)
return;
- /* RED-PEN protect with seqlock? I hope that's not needed
- because sched_clock callers should be able to tolerate small
- errors. */
+ /*
+ * RED-PEN protect with seqlock? I hope that's not needed
+ * because sched_clock callers should be able to tolerate small
+ * errors.
+ */
sc->ns_base = ktime_to_ns(ktime_get());
rdtscll(sc->sync_base);
sc->cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR) / newfreq;
@@ -137,6 +147,7 @@ static void call_r_s_f(void *arg)
{
struct cpufreq_freqs *freq = arg;
unsigned f = freq->new;
+
if (!f)
f = cpufreq_get(freq->cpu);
if (!f)
@@ -146,7 +157,9 @@ static void call_r_s_f(void *arg)

static void call_r_s_f_here(void *arg)
{
- struct cpufreq_freqs f = { .cpu = get_cpu(), .new = 0 };
+ struct cpufreq_freqs f = { .new = 0 };
+
+ f.cpu = get_cpu();
call_r_s_f(&f);
put_cpu();
}
@@ -155,9 +168,11 @@ static int sc_freq_event(struct notifier
void *data)
{
struct cpufreq_freqs *freq = data;
- int cpu = get_cpu();
- struct sc_data *sc = &per_cpu(sc_data, cpu);
+ struct sc_data *sc;
+ int cpu;

+ cpu = get_cpu();
+ sc = &per_cpu(sc_data, cpu);
if (cpu_has(&cpu_data[cpu], X86_FEATURE_CONSTANT_TSC))
goto out;
if (freq->old == freq->new)
@@ -167,16 +182,20 @@ static int sc_freq_event(struct notifier
case CPUFREQ_SUSPENDCHANGE:
/* Mark TSC unstable during suspend/resume */
case CPUFREQ_PRECHANGE:
- /* Mark TSC as unstable until cpu frequency change is done
- because we don't know when exactly it will change.
- unstable in used as a counter to guard against races
- between the cpu frequency notifiers and normal resyncs */
+ /*
+ * Mark TSC as unstable until cpu frequency change is done
+ * because we don't know when exactly it will change.
+ * unstable in used as a counter to guard against races
+ * between the cpu frequency notifiers and normal resyncs
+ */
sc->unstable++;
/* FALL THROUGH */
case CPUFREQ_RESUMECHANGE:
case CPUFREQ_POSTCHANGE:
- /* Frequency change or resume is done -- update everything and
- mark TSC as stable again. */
+ /*
+ * Frequency change or resume is done -- update everything
+ * and mark TSC as stable again.
+ */
if (cpu == freq->cpu)
resync_sc_freq(sc, freq->new);
else
@@ -186,6 +205,7 @@ static int sc_freq_event(struct notifier
}
out:
put_cpu();
+
return NOTIFY_DONE;
}

@@ -197,6 +217,7 @@ static int __cpuinit
sc_cpu_event(struct notifier_block *self, unsigned long event, void *hcpu)
{
long cpu = (long)hcpu;
+
if (event == CPU_ONLINE) {
struct cpufreq_freqs f = { .cpu = cpu, .new = 0 };
smp_call_function_single(cpu, call_r_s_f, &f, 0, 1);
@@ -209,13 +230,15 @@ static __init int init_sched_clock(void)
if (unsynchronized_tsc())
no_sc_for_printk = 1;

- /* On a race between the various events the initialization might be
- done multiple times, but code is tolerant to this */
+ /*
+ * On a race between the various events the initialization might be
+ * done multiple times, but code is tolerant to this
+ */
cpufreq_register_notifier(&sc_freq_notifier,
CPUFREQ_TRANSITION_NOTIFIER);
hotcpu_notifier(sc_cpu_event, 0);
on_each_cpu(call_r_s_f_here, NULL, 0, 0);
+
return 0;
}
core_initcall(init_sched_clock);
-


2007-05-25 07:22:55

by Satyam Sharma

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups

Hi Ingo,

On 5/25/07, Ingo Molnar <[email protected]> wrote:
> [...]
> @@ -137,6 +147,7 @@ static void call_r_s_f(void *arg)
> {
> struct cpufreq_freqs *freq = arg;
> unsigned f = freq->new;
> +
> if (!f)
> f = cpufreq_get(freq->cpu);
> if (!f)

if (!f)
f = cpufreq_get(freq->cpu);
if (!f)
f = tsc_khz;

?

Something's not quite right here :-)

Satyam

2007-05-25 07:25:29

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups


* Satyam Sharma <[email protected]> wrote:

> Hi Ingo,
>
> On 5/25/07, Ingo Molnar <[email protected]> wrote:
> >[...]
> >@@ -137,6 +147,7 @@ static void call_r_s_f(void *arg)
> > {
> > struct cpufreq_freqs *freq = arg;
> > unsigned f = freq->new;
> >+
> > if (!f)
> > f = cpufreq_get(freq->cpu);
> > if (!f)
>
> if (!f)
> f = cpufreq_get(freq->cpu);
> if (!f)
> f = tsc_khz;
>
> ?
>
> Something's not quite right here :-)

yeah, that looks like a real bug. When i looked at it during the cleanup
i guess i got distracted by the descriptive function name of
call_r_s_f() ... :-/

Ingo

2007-05-25 07:26:58

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups


> > if (!f)
> > f = cpufreq_get(freq->cpu);
> > if (!f)
> > f = tsc_khz;
> >
> > ?
> >
> > Something's not quite right here :-)

ah, that's fine. It does this: 'try to give f a value', and then: 'if
still no value then give it tsc_khz as a last resort)

call_r_s_f() still needs an urgent rerenaming though =B-)

Ingo

2007-05-25 07:32:16

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups

> if (!f)
> f = cpufreq_get(freq->cpu);
> if (!f)
> f = tsc_khz;
>
> ?
>
> Something's not quite right here :-)

What do you think is wrong? cpufreq_get can return 0.

Admittedly the second test could be inside a block of the first,
but then it would make the code more ugly and this code is not
performance critical.

-Andi

2007-05-25 07:35:45

by Satyam Sharma

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups

On 5/25/07, Ingo Molnar <[email protected]> wrote:
>
> > > if (!f)
> > > f = cpufreq_get(freq->cpu);
> > > if (!f)
> > > f = tsc_khz;
> > >
> > > ?
> > >
> > > Something's not quite right here :-)
>
> ah, that's fine. It does this: 'try to give f a value', and then: 'if
> still no value then give it tsc_khz as a last resort)

Ugh, yes, didn't know cpufreq_get can return 0.

> call_r_s_f() still needs an urgent rerenaming though =B-)

So does "call_r_s_f_here()" :-)

Satyam

2007-05-25 07:38:44

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups

On Fri, May 25, 2007 at 09:26:41AM +0200, Ingo Molnar wrote:
>
> > > if (!f)
> > > f = cpufreq_get(freq->cpu);
> > > if (!f)
> > > f = tsc_khz;
> > >
> > > ?
> > >
> > > Something's not quite right here :-)
>
> ah, that's fine. It does this: 'try to give f a value', and then: 'if
> still no value then give it tsc_khz as a last resort)
>
> call_r_s_f() still needs an urgent rerenaming though =B-)

The whole thing would be much simpler if

- cpu up callbacks had a way to request executing on the target CPU
[i looked at this, but it was a little more involved than a simple
cleanup]
- smp_call_function_single had sane semantics regarding calling
the same CPU similar to on_each_cpu()
- C had anonymous lambda functions with nice syntax (ok one can dream)

But right now so many callbacks are needed that I ran out of good
names.

-Andi

2007-05-25 07:40:04

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups

On Fri, 2007-05-25 at 13:05 +0530, Satyam Sharma wrote:
> On 5/25/07, Ingo Molnar <[email protected]> wrote:

> > call_r_s_f() still needs an urgent rerenaming though =B-)
>
> So does "call_r_s_f_here()" :-)

That name makes me think of INTERCAL's 'DO COME FROM' statement.
And any code that makes one think of INTERCAL is say,.. special.. :-)

2007-05-25 07:40:25

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups


* Andi Kleen <[email protected]> wrote:

> > if (!f)
> > f = cpufreq_get(freq->cpu);
> > if (!f)
> > f = tsc_khz;
> >
> > ?
> >
> > Something's not quite right here :-)
>
> What do you think is wrong? cpufreq_get can return 0.
>
> Admittedly the second test could be inside a block of the first, but
> then it would make the code more ugly and this code is not performance
> critical.

moving it inside the block makes the code _cleaner_ in fact :-)

Ingo

2007-05-25 07:43:34

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups


* Ingo Molnar <[email protected]> wrote:

> > Admittedly the second test could be inside a block of the first, but
> > then it would make the code more ugly and this code is not
> > performance critical.
>
> moving it inside the block makes the code _cleaner_ in fact :-)

updated version of the cleanup patch below, renamed r_s_c to
resync_freq, and changed 'f' to 'freq'.

Ingo

---------------->
Subject: [patch] sched_clock(): cleanups
From: Ingo Molnar <[email protected]>

clean up sched-clock.c - mostly comment style fixes.

Signed-off-by: Ingo Molnar <[email protected]>
---
arch/i386/kernel/sched-clock.c | 108 +++++++++++++++++++++++++----------------
1 file changed, 68 insertions(+), 40 deletions(-)

Index: linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/sched-clock.c
===================================================================
--- linux-cfs-2.6.22-rc2-mm1.q.orig/arch/i386/kernel/sched-clock.c
+++ linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/sched-clock.c
@@ -60,18 +60,20 @@ DEFINE_PER_CPU(struct sc_data, sc_data)
*/
unsigned long long native_sched_clock(void)
{
- unsigned long long r;
struct sc_data *sc = &get_cpu_var(sc_data);
+ unsigned long long r;
+ unsigned long flags;

if (sc->unstable) {
- unsigned long flags;
r = (jiffies_64 - sc->sync_base) * (1000000000 / HZ);
r += sc->ns_base;
local_irq_save(flags);
- /* last_val is used to avoid non monotonity on a
- stable->unstable transition. Make sure the time
- never goes to before the last value returned by
- the TSC clock */
+ /*
+ * last_val is used to avoid non monotonity on a
+ * stable->unstable transition. Make sure the time
+ * never goes to before the last value returned by
+ * the TSC clock
+ */
if (r <= sc->last_val)
r = sc->last_val + 1;
sc->last_val = r;
@@ -87,8 +89,10 @@ unsigned long long native_sched_clock(vo
return r;
}

-/* We need to define a real function for sched_clock, to override the
- weak default version */
+/*
+ * We need to define a real function for sched_clock, to override the
+ * weak default version
+ */
#ifdef CONFIG_PARAVIRT
unsigned long long sched_clock(void)
{
@@ -101,9 +105,11 @@ unsigned long long sched_clock(void)

static int no_sc_for_printk;

-/* printk clock: when it is known the sc results are very non monotonic
- fall back to jiffies for printk. Other sched_clock users are supposed
- to handle this. */
+/*
+ * printk clock: when it is known the sc results are very non monotonic
+ * fall back to jiffies for printk. Other sched_clock users are supposed
+ * to handle this.
+ */
unsigned long long printk_clock(void)
{
if (unlikely(no_sc_for_printk))
@@ -119,35 +125,46 @@ static void resync_sc_freq(struct sc_dat
sc->unstable = 1;
return;
}
- /* Handle nesting, but when we're zero multiple calls in a row
- are ok too and not a bug */
+ /*
+ * Handle nesting, but when we're zero multiple calls in a row
+ * are ok too and not a bug
+ */
if (sc->unstable > 0)
sc->unstable--;
if (sc->unstable)
return;
- /* RED-PEN protect with seqlock? I hope that's not needed
- because sched_clock callers should be able to tolerate small
- errors. */
+ /*
+ * RED-PEN protect with seqlock? I hope that's not needed
+ * because sched_clock callers should be able to tolerate small
+ * errors.
+ */
sc->ns_base = ktime_to_ns(ktime_get());
rdtscll(sc->sync_base);
sc->cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR) / newfreq;
}

-static void call_r_s_f(void *arg)
+static void __resync_freq(void *arg)
{
struct cpufreq_freqs *freq = arg;
- unsigned f = freq->new;
- if (!f)
- f = cpufreq_get(freq->cpu);
- if (!f)
- f = tsc_khz;
- resync_sc_freq(&per_cpu(sc_data, freq->cpu), f);
+ unsigned freq = freq->new;
+
+ if (!freq) {
+ freq = cpufreq_get(freq->cpu);
+ /*
+ * Still no frequency? Then fall back to tsc_khz:
+ */
+ if (!freq)
+ freq = tsc_khz;
+ }
+ resync_sc_freq(&per_cpu(sc_data, freq->cpu), freq);
}

-static void call_r_s_f_here(void *arg)
+static void resync_freq(void *arg)
{
- struct cpufreq_freqs f = { .cpu = get_cpu(), .new = 0 };
- call_r_s_f(&f);
+ struct cpufreq_freqs f = { .new = 0 };
+
+ f.cpu = get_cpu();
+ __resync_freq(&f);
put_cpu();
}

@@ -155,9 +172,11 @@ static int sc_freq_event(struct notifier
void *data)
{
struct cpufreq_freqs *freq = data;
- int cpu = get_cpu();
- struct sc_data *sc = &per_cpu(sc_data, cpu);
+ struct sc_data *sc;
+ int cpu;

+ cpu = get_cpu();
+ sc = &per_cpu(sc_data, cpu);
if (cpu_has(&cpu_data[cpu], X86_FEATURE_CONSTANT_TSC))
goto out;
if (freq->old == freq->new)
@@ -167,25 +186,30 @@ static int sc_freq_event(struct notifier
case CPUFREQ_SUSPENDCHANGE:
/* Mark TSC unstable during suspend/resume */
case CPUFREQ_PRECHANGE:
- /* Mark TSC as unstable until cpu frequency change is done
- because we don't know when exactly it will change.
- unstable in used as a counter to guard against races
- between the cpu frequency notifiers and normal resyncs */
+ /*
+ * Mark TSC as unstable until cpu frequency change is done
+ * because we don't know when exactly it will change.
+ * unstable in used as a counter to guard against races
+ * between the cpu frequency notifiers and normal resyncs
+ */
sc->unstable++;
/* FALL THROUGH */
case CPUFREQ_RESUMECHANGE:
case CPUFREQ_POSTCHANGE:
- /* Frequency change or resume is done -- update everything and
- mark TSC as stable again. */
+ /*
+ * Frequency change or resume is done -- update everything
+ * and mark TSC as stable again.
+ */
if (cpu == freq->cpu)
resync_sc_freq(sc, freq->new);
else
- smp_call_function_single(freq->cpu, call_r_s_f,
+ smp_call_function_single(freq->cpu, __resync_freq,
freq, 0, 1);
break;
}
out:
put_cpu();
+
return NOTIFY_DONE;
}

@@ -197,9 +221,11 @@ static int __cpuinit
sc_cpu_event(struct notifier_block *self, unsigned long event, void *hcpu)
{
long cpu = (long)hcpu;
+
if (event == CPU_ONLINE) {
struct cpufreq_freqs f = { .cpu = cpu, .new = 0 };
- smp_call_function_single(cpu, call_r_s_f, &f, 0, 1);
+
+ smp_call_function_single(cpu, __resync_freq, &f, 0, 1);
}
return NOTIFY_DONE;
}
@@ -209,13 +235,15 @@ static __init int init_sched_clock(void)
if (unsynchronized_tsc())
no_sc_for_printk = 1;

- /* On a race between the various events the initialization might be
- done multiple times, but code is tolerant to this */
+ /*
+ * On a race between the various events the initialization might be
+ * done multiple times, but code is tolerant to this
+ */
cpufreq_register_notifier(&sc_freq_notifier,
CPUFREQ_TRANSITION_NOTIFIER);
hotcpu_notifier(sc_cpu_event, 0);
- on_each_cpu(call_r_s_f_here, NULL, 0, 0);
+ on_each_cpu(resync_freq, NULL, 0, 0);
+
return 0;
}
core_initcall(init_sched_clock);
-

2007-05-25 07:49:35

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups


* Ingo Molnar <[email protected]> wrote:

> updated version of the cleanup patch below, renamed r_s_c to
> resync_freq, and changed 'f' to 'freq'.

updated one below.

Ingo

------------------->
Subject: [patch] sched_clock(): cleanups
From: Ingo Molnar <[email protected]>

clean up sched-clock.c - mostly comment style fixes.

Signed-off-by: Ingo Molnar <[email protected]>
---
arch/i386/kernel/sched-clock.c | 110 +++++++++++++++++++++++++----------------
1 file changed, 69 insertions(+), 41 deletions(-)

Index: linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/sched-clock.c
===================================================================
--- linux-cfs-2.6.22-rc2-mm1.q.orig/arch/i386/kernel/sched-clock.c
+++ linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/sched-clock.c
@@ -60,18 +60,20 @@ DEFINE_PER_CPU(struct sc_data, sc_data)
*/
unsigned long long native_sched_clock(void)
{
- unsigned long long r;
struct sc_data *sc = &get_cpu_var(sc_data);
+ unsigned long long r;
+ unsigned long flags;

if (sc->unstable) {
- unsigned long flags;
r = (jiffies_64 - sc->sync_base) * (1000000000 / HZ);
r += sc->ns_base;
local_irq_save(flags);
- /* last_val is used to avoid non monotonity on a
- stable->unstable transition. Make sure the time
- never goes to before the last value returned by
- the TSC clock */
+ /*
+ * last_val is used to avoid non monotonity on a
+ * stable->unstable transition. Make sure the time
+ * never goes to before the last value returned by
+ * the TSC clock
+ */
if (r <= sc->last_val)
r = sc->last_val + 1;
sc->last_val = r;
@@ -87,8 +89,10 @@ unsigned long long native_sched_clock(vo
return r;
}

-/* We need to define a real function for sched_clock, to override the
- weak default version */
+/*
+ * We need to define a real function for sched_clock, to override the
+ * weak default version
+ */
#ifdef CONFIG_PARAVIRT
unsigned long long sched_clock(void)
{
@@ -101,9 +105,11 @@ unsigned long long sched_clock(void)

static int no_sc_for_printk;

-/* printk clock: when it is known the sc results are very non monotonic
- fall back to jiffies for printk. Other sched_clock users are supposed
- to handle this. */
+/*
+ * printk clock: when it is known the sc results are very non monotonic
+ * fall back to jiffies for printk. Other sched_clock users are supposed
+ * to handle this.
+ */
unsigned long long printk_clock(void)
{
if (unlikely(no_sc_for_printk))
@@ -119,35 +125,46 @@ static void resync_sc_freq(struct sc_dat
sc->unstable = 1;
return;
}
- /* Handle nesting, but when we're zero multiple calls in a row
- are ok too and not a bug */
+ /*
+ * Handle nesting, but when we're zero multiple calls in a row
+ * are ok too and not a bug
+ */
if (sc->unstable > 0)
sc->unstable--;
if (sc->unstable)
return;
- /* RED-PEN protect with seqlock? I hope that's not needed
- because sched_clock callers should be able to tolerate small
- errors. */
+ /*
+ * RED-PEN protect with seqlock? I hope that's not needed
+ * because sched_clock callers should be able to tolerate small
+ * errors.
+ */
sc->ns_base = ktime_to_ns(ktime_get());
rdtscll(sc->sync_base);
sc->cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR) / newfreq;
}

-static void call_r_s_f(void *arg)
+static void __resync_freq(void *arg)
{
- struct cpufreq_freqs *freq = arg;
- unsigned f = freq->new;
- if (!f)
- f = cpufreq_get(freq->cpu);
- if (!f)
- f = tsc_khz;
- resync_sc_freq(&per_cpu(sc_data, freq->cpu), f);
+ struct cpufreq_freqs *freqp = arg;
+ unsigned freq = freqp->new;
+
+ if (!freq) {
+ freq = cpufreq_get(freqp->cpu);
+ /*
+ * Still no frequency? Then fall back to tsc_khz:
+ */
+ if (!freq)
+ freq = tsc_khz;
+ }
+ resync_sc_freq(&per_cpu(sc_data, freqp->cpu), freq);
}

-static void call_r_s_f_here(void *arg)
+static void resync_freq(void *arg)
{
- struct cpufreq_freqs f = { .cpu = get_cpu(), .new = 0 };
- call_r_s_f(&f);
+ struct cpufreq_freqs f = { .new = 0 };
+
+ f.cpu = get_cpu();
+ __resync_freq(&f);
put_cpu();
}

@@ -155,9 +172,11 @@ static int sc_freq_event(struct notifier
void *data)
{
struct cpufreq_freqs *freq = data;
- int cpu = get_cpu();
- struct sc_data *sc = &per_cpu(sc_data, cpu);
+ struct sc_data *sc;
+ int cpu;

+ cpu = get_cpu();
+ sc = &per_cpu(sc_data, cpu);
if (cpu_has(&cpu_data[cpu], X86_FEATURE_CONSTANT_TSC))
goto out;
if (freq->old == freq->new)
@@ -167,25 +186,30 @@ static int sc_freq_event(struct notifier
case CPUFREQ_SUSPENDCHANGE:
/* Mark TSC unstable during suspend/resume */
case CPUFREQ_PRECHANGE:
- /* Mark TSC as unstable until cpu frequency change is done
- because we don't know when exactly it will change.
- unstable in used as a counter to guard against races
- between the cpu frequency notifiers and normal resyncs */
+ /*
+ * Mark TSC as unstable until cpu frequency change is done
+ * because we don't know when exactly it will change.
+ * unstable in used as a counter to guard against races
+ * between the cpu frequency notifiers and normal resyncs
+ */
sc->unstable++;
/* FALL THROUGH */
case CPUFREQ_RESUMECHANGE:
case CPUFREQ_POSTCHANGE:
- /* Frequency change or resume is done -- update everything and
- mark TSC as stable again. */
+ /*
+ * Frequency change or resume is done -- update everything
+ * and mark TSC as stable again.
+ */
if (cpu == freq->cpu)
resync_sc_freq(sc, freq->new);
else
- smp_call_function_single(freq->cpu, call_r_s_f,
+ smp_call_function_single(freq->cpu, __resync_freq,
freq, 0, 1);
break;
}
out:
put_cpu();
+
return NOTIFY_DONE;
}

@@ -197,9 +221,11 @@ static int __cpuinit
sc_cpu_event(struct notifier_block *self, unsigned long event, void *hcpu)
{
long cpu = (long)hcpu;
+
if (event == CPU_ONLINE) {
struct cpufreq_freqs f = { .cpu = cpu, .new = 0 };
- smp_call_function_single(cpu, call_r_s_f, &f, 0, 1);
+
+ smp_call_function_single(cpu, __resync_freq, &f, 0, 1);
}
return NOTIFY_DONE;
}
@@ -209,13 +235,15 @@ static __init int init_sched_clock(void)
if (unsynchronized_tsc())
no_sc_for_printk = 1;

- /* On a race between the various events the initialization might be
- done multiple times, but code is tolerant to this */
+ /*
+ * On a race between the various events the initialization might be
+ * done multiple times, but code is tolerant to this
+ */
cpufreq_register_notifier(&sc_freq_notifier,
CPUFREQ_TRANSITION_NOTIFIER);
hotcpu_notifier(sc_cpu_event, 0);
- on_each_cpu(call_r_s_f_here, NULL, 0, 0);
+ on_each_cpu(resync_freq, NULL, 0, 0);
+
return 0;
}
core_initcall(init_sched_clock);
-

2007-05-25 07:55:21

by Ingo Molnar

[permalink] [raw]
Subject: [patch] x86_64: fix sched_clock()

Subject: [patch] x86_64: fix sched_clock()
From: Ingo Molnar <[email protected]>

sched_clock() is totally broken on x86_64, because it is not defined by
the architecture at all! It fell the victim to the opaqueness of
__attribute__((weak)) and to non-testing.

the i386 version was supposed to be used. This patch fixes that. Booted
and tested on x86_64 and i386.

Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86_64/kernel/Makefile | 3 ++-
include/asm-x86_64/tsc.h | 5 +++++
2 files changed, 7 insertions(+), 1 deletion(-)

Index: linux-cfs-2.6.22-rc2-mm1.q/arch/x86_64/kernel/Makefile
===================================================================
--- linux-cfs-2.6.22-rc2-mm1.q.orig/arch/x86_64/kernel/Makefile
+++ linux-cfs-2.6.22-rc2-mm1.q/arch/x86_64/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y := process.o signal.o entry.o trap
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o bugs.o \
- perfctr-watchdog.o
+ perfctr-watchdog.o sched-clock.o

obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o
@@ -49,6 +49,7 @@ obj-y += pcspeaker.o

CFLAGS_vsyscall.o := $(PROFILING) -g0

+sched-clock-y += ../../i386/kernel/sched-clock.o
therm_throt-y += ../../i386/kernel/cpu/mcheck/therm_throt.o
bootflag-y += ../../i386/kernel/bootflag.o
legacy_serial-y += ../../i386/kernel/legacy_serial.o
Index: linux-cfs-2.6.22-rc2-mm1.q/include/asm-x86_64/tsc.h
===================================================================
--- linux-cfs-2.6.22-rc2-mm1.q.orig/include/asm-x86_64/tsc.h
+++ linux-cfs-2.6.22-rc2-mm1.q/include/asm-x86_64/tsc.h
@@ -1 +1,6 @@
#include <asm-i386/tsc.h>
+
+/*
+ * To be able to share sched-clock.c between i386 and x86_64:
+ */
+#define tsc_disable 0

2007-05-25 07:58:34

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups

On Fri, May 25, 2007 at 09:39:33AM +0200, Peter Zijlstra wrote:
> On Fri, 2007-05-25 at 13:05 +0530, Satyam Sharma wrote:
> > On 5/25/07, Ingo Molnar <[email protected]> wrote:
>
> > > call_r_s_f() still needs an urgent rerenaming though =B-)
> >
> > So does "call_r_s_f_here()" :-)
>
> That name makes me think of INTERCAL's 'DO COME FROM' statement.
> And any code that makes one think of INTERCAL is say,.. special.. :-)

Propose a better way to code this then? It's not my fault that dealing with
callbacks in C is so messy. _here just massages one callback
prototype (smp_call_function's) into another (cpufreq's) because
both callbacks do the same in this case.

The r_s_f BTW stands for resync_sc_freq which is a function earlier
in the file and should be familiar to a serious reader.

-Andi

2007-05-25 08:02:28

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()

On Fri, May 25, 2007 at 09:54:46AM +0200, Ingo Molnar wrote:
> Subject: [patch] x86_64: fix sched_clock()
> From: Ingo Molnar <[email protected]>
>
> sched_clock() is totally broken on x86_64, because it is not defined by
> the architecture at all! It fell the victim to the opaqueness of
> __attribute__((weak)) and to non-testing.

More non retesting.

>
> the i386 version was supposed to be used. This patch fixes that. Booted
> and tested on x86_64 and i386.

Hmm indeed. I actually had it correct at some point (i remember
fixing 64bit compile errors in sched-clock ;-). I guess the Makefile hunk
accidentially dropped out during some later merging and this didn't get
noticed due to the weak attribute.

That also explains the issue reported by Peter earlier I guess.

-Andi

2007-05-25 08:03:27

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups


* Andi Kleen <[email protected]> wrote:

> Propose a better way to code this then? It's not my fault that dealing
> with callbacks in C is so messy. _here just massages one callback
> prototype (smp_call_function's) into another (cpufreq's) because both
> callbacks do the same in this case.

see the last iteration of the cleanups i did. Naming the function after
what it does, and prefixing the preempt-unsafe one __ does the trick.

> The r_s_f BTW stands for resync_sc_freq which is a function earlier in
> the file and should be familiar to a serious reader.

I consider myself a serious reader and it wasnt obvious to me. Names
must always be descriptive, we cannot hold all the details in our heads
all the time.

Ingo

2007-05-25 08:04:43

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()


* Andi Kleen <[email protected]> wrote:

> > the i386 version was supposed to be used. This patch fixes that.
> > Booted and tested on x86_64 and i386.
>
> Hmm indeed. I actually had it correct at some point (i remember fixing
> 64bit compile errors in sched-clock ;-). I guess the Makefile hunk
> accidentially dropped out during some later merging and this didn't
> get noticed due to the weak attribute.

well the tsc.h bit was needed too.

> That also explains the issue reported by Peter earlier I guess.

yes, it does, at least on my box.

Ingo

2007-05-25 08:08:46

by Ingo Molnar

[permalink] [raw]
Subject: [patch] i386, numaq: enable TSCs again


Andi, Andrew, do you remember why we disabled TSCs on NUMAQ? It was
slightly async between CPUs, right? In that case we should try the patch
below.

Ingo

--------------------->
Subject: [patch] i386, numaq: enable TSCs again
From: Ingo Molnar <[email protected]>

enable TSCs on NUMAQ again. sched_clock() got improved and
the scheduler treats it as a per-CPU clock, so there should
be no issues from the slightly async TSCs on NUMAQ. Please
holler if this is wrong ...

Signed-off-by: Ingo Molnar <[email protected]>
---
arch/i386/kernel/numaq.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

Index: linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/numaq.c
===================================================================
--- linux-cfs-2.6.22-rc2-mm1.q.orig/arch/i386/kernel/numaq.c
+++ linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/numaq.c
@@ -81,8 +81,7 @@ int __init get_memcfg_numaq(void)
static int __init numaq_tsc_disable(void)
{
if (num_online_nodes() > 1) {
- printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
- tsc_disable = 1;
+ printk(KERN_DEBUG "NUMAQ: NOT disabling TSC. System still ok?\n");
}
return 0;
}

2007-05-25 08:15:28

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()

On Fri, 2007-05-25 at 09:54 +0200, Ingo Molnar wrote:
> Subject: [patch] x86_64: fix sched_clock()
> From: Ingo Molnar <[email protected]>
>
> sched_clock() is totally broken on x86_64, because it is not defined by
> the architecture at all! It fell the victim to the opaqueness of
> __attribute__((weak)) and to non-testing.
>
> the i386 version was supposed to be used. This patch fixes that. Booted
> and tested on x86_64 and i386.
>
> Signed-off-by: Ingo Molnar <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>

This does indeed solve my issue.

> ---
> arch/x86_64/kernel/Makefile | 3 ++-
> include/asm-x86_64/tsc.h | 5 +++++
> 2 files changed, 7 insertions(+), 1 deletion(-)
>
> Index: linux-cfs-2.6.22-rc2-mm1.q/arch/x86_64/kernel/Makefile
> ===================================================================
> --- linux-cfs-2.6.22-rc2-mm1.q.orig/arch/x86_64/kernel/Makefile
> +++ linux-cfs-2.6.22-rc2-mm1.q/arch/x86_64/kernel/Makefile
> @@ -9,7 +9,7 @@ obj-y := process.o signal.o entry.o trap
> x8664_ksyms.o i387.o syscall.o vsyscall.o \
> setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
> pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o bugs.o \
> - perfctr-watchdog.o
> + perfctr-watchdog.o sched-clock.o
>
> obj-$(CONFIG_STACKTRACE) += stacktrace.o
> obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o
> @@ -49,6 +49,7 @@ obj-y += pcspeaker.o
>
> CFLAGS_vsyscall.o := $(PROFILING) -g0
>
> +sched-clock-y += ../../i386/kernel/sched-clock.o
> therm_throt-y += ../../i386/kernel/cpu/mcheck/therm_throt.o
> bootflag-y += ../../i386/kernel/bootflag.o
> legacy_serial-y += ../../i386/kernel/legacy_serial.o
> Index: linux-cfs-2.6.22-rc2-mm1.q/include/asm-x86_64/tsc.h
> ===================================================================
> --- linux-cfs-2.6.22-rc2-mm1.q.orig/include/asm-x86_64/tsc.h
> +++ linux-cfs-2.6.22-rc2-mm1.q/include/asm-x86_64/tsc.h
> @@ -1 +1,6 @@
> #include <asm-i386/tsc.h>
> +
> +/*
> + * To be able to share sched-clock.c between i386 and x86_64:
> + */
> +#define tsc_disable 0
>

2007-05-25 08:17:07

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups

On Fri, 2007-05-25 at 09:58 +0200, Andi Kleen wrote:
> On Fri, May 25, 2007 at 09:39:33AM +0200, Peter Zijlstra wrote:
> > On Fri, 2007-05-25 at 13:05 +0530, Satyam Sharma wrote:
> > > On 5/25/07, Ingo Molnar <[email protected]> wrote:
> >
> > > > call_r_s_f() still needs an urgent rerenaming though =B-)
> > >
> > > So does "call_r_s_f_here()" :-)
> >
> > That name makes me think of INTERCAL's 'DO COME FROM' statement.
> > And any code that makes one think of INTERCAL is say,.. special.. :-)
>
> Propose a better way to code this then? It's not my fault that dealing with
> callbacks in C is so messy. _here just massages one callback
> prototype (smp_call_function's) into another (cpufreq's) because
> both callbacks do the same in this case.

I see you point; however a function called:
call_<some_other_function>_here() just doesn't make sense. It says as
much as: we should call some_other_function() but for some reason we
dont.

> The r_s_f BTW stands for resync_sc_freq which is a function earlier
> in the file and should be familiar to a serious reader.

It was.

2007-05-25 08:17:35

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()


* Peter Zijlstra <[email protected]> wrote:

> On Fri, 2007-05-25 at 09:54 +0200, Ingo Molnar wrote:
> > Subject: [patch] x86_64: fix sched_clock()
> > From: Ingo Molnar <[email protected]>
> >
> > sched_clock() is totally broken on x86_64, because it is not defined by
> > the architecture at all! It fell the victim to the opaqueness of
> > __attribute__((weak)) and to non-testing.
> >
> > the i386 version was supposed to be used. This patch fixes that. Booted
> > and tested on x86_64 and i386.
> >
> > Signed-off-by: Ingo Molnar <[email protected]>
> Acked-by: Peter Zijlstra <[email protected]>
>
> This does indeed solve my issue.

great! Btw., this is also accidental proof that CFS works well even with
a jiffies granularity sched_clock() ;-)

Ingo

2007-05-25 08:19:15

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [patch] i386, numaq: enable TSCs again

On Fri, May 25, 2007 at 10:08:27AM +0200, Ingo Molnar wrote:
> Andi, Andrew, do you remember why we disabled TSCs on NUMAQ? It was
> slightly async between CPUs, right? In that case we should try the patch
> below.

I remember. It was far beyond "slightly async;" they would drift
minutes apart during reasonable amounts of uptime, though it would take
at least several days to drift so far (I don't recall how long it took).

TSC synchronization is uniformly impossible on NUMA-Q. Bootlogs showing
the results of the attempts are still extant. They shouldn't end up too
far apart right after booting, but I don't have even ballpark estimates.
I'd hazard a guess of a few seconds.

NUMA-Q's also supported mixed CPU models in hardware, though that's
not really expected to be handled by Linux. I suspect DYNIX/ptx would
be used by anyone interested in that.


-- wli

2007-05-25 08:20:41

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()

On Fri, May 25, 2007 at 10:04:15AM +0200, Ingo Molnar wrote:
>
> * Andi Kleen <[email protected]> wrote:
>
> > > the i386 version was supposed to be used. This patch fixes that.
> > > Booted and tested on x86_64 and i386.
> >
> > Hmm indeed. I actually had it correct at some point (i remember fixing
> > 64bit compile errors in sched-clock ;-). I guess the Makefile hunk
> > accidentially dropped out during some later merging and this didn't
> > get noticed due to the weak attribute.
>
> well the tsc.h bit was needed too.

It's not in defconfig at least. I just tried it.

-Andi

2007-05-25 08:22:05

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups

On Fri, May 25, 2007 at 10:16:39AM +0200, Peter Zijlstra wrote:
> On Fri, 2007-05-25 at 09:58 +0200, Andi Kleen wrote:
> > On Fri, May 25, 2007 at 09:39:33AM +0200, Peter Zijlstra wrote:
> > > On Fri, 2007-05-25 at 13:05 +0530, Satyam Sharma wrote:
> > > > On 5/25/07, Ingo Molnar <[email protected]> wrote:
> > >
> > > > > call_r_s_f() still needs an urgent rerenaming though =B-)
> > > >
> > > > So does "call_r_s_f_here()" :-)
> > >
> > > That name makes me think of INTERCAL's 'DO COME FROM' statement.
> > > And any code that makes one think of INTERCAL is say,.. special.. :-)
> >
> > Propose a better way to code this then? It's not my fault that dealing with
> > callbacks in C is so messy. _here just massages one callback
> > prototype (smp_call_function's) into another (cpufreq's) because
> > both callbacks do the same in this case.
>
> I see you point; however a function called:
> call_<some_other_function>_here() just doesn't make sense. It says as
> much as: we should call some_other_function() but for some reason we
> dont.

It's just different semantics between cpufreq and smp_call_functions.
cpufreq doesn't execute on that CPU but gives you the cpu number,
smp_call_* executes on that CPU but doesn't give you a cpu number.
_here means call cpufreq callback on the current CPU.

-Andi

2007-05-25 08:22:31

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups

On Fri, 2007-05-25 at 09:49 +0200, Ingo Molnar wrote:
> * Ingo Molnar <[email protected]> wrote:
>
> > updated version of the cleanup patch below, renamed r_s_c to
> > resync_freq, and changed 'f' to 'freq'.
>
> updated one below.
>
> Ingo
>
> ------------------->
> Subject: [patch] sched_clock(): cleanups
> From: Ingo Molnar <[email protected]>
>
> clean up sched-clock.c - mostly comment style fixes.
>
> Signed-off-by: Ingo Molnar <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>

Looks good.

> ---
> arch/i386/kernel/sched-clock.c | 110 +++++++++++++++++++++++++----------------
> 1 file changed, 69 insertions(+), 41 deletions(-)
>
> Index: linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/sched-clock.c
> ===================================================================
> --- linux-cfs-2.6.22-rc2-mm1.q.orig/arch/i386/kernel/sched-clock.c
> +++ linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/sched-clock.c
> @@ -60,18 +60,20 @@ DEFINE_PER_CPU(struct sc_data, sc_data)
> */
> unsigned long long native_sched_clock(void)
> {
> - unsigned long long r;
> struct sc_data *sc = &get_cpu_var(sc_data);
> + unsigned long long r;
> + unsigned long flags;
>
> if (sc->unstable) {
> - unsigned long flags;
> r = (jiffies_64 - sc->sync_base) * (1000000000 / HZ);
> r += sc->ns_base;
> local_irq_save(flags);
> - /* last_val is used to avoid non monotonity on a
> - stable->unstable transition. Make sure the time
> - never goes to before the last value returned by
> - the TSC clock */
> + /*
> + * last_val is used to avoid non monotonity on a
> + * stable->unstable transition. Make sure the time
> + * never goes to before the last value returned by
> + * the TSC clock
> + */
> if (r <= sc->last_val)
> r = sc->last_val + 1;
> sc->last_val = r;
> @@ -87,8 +89,10 @@ unsigned long long native_sched_clock(vo
> return r;
> }
>
> -/* We need to define a real function for sched_clock, to override the
> - weak default version */
> +/*
> + * We need to define a real function for sched_clock, to override the
> + * weak default version
> + */
> #ifdef CONFIG_PARAVIRT
> unsigned long long sched_clock(void)
> {
> @@ -101,9 +105,11 @@ unsigned long long sched_clock(void)
>
> static int no_sc_for_printk;
>
> -/* printk clock: when it is known the sc results are very non monotonic
> - fall back to jiffies for printk. Other sched_clock users are supposed
> - to handle this. */
> +/*
> + * printk clock: when it is known the sc results are very non monotonic
> + * fall back to jiffies for printk. Other sched_clock users are supposed
> + * to handle this.
> + */
> unsigned long long printk_clock(void)
> {
> if (unlikely(no_sc_for_printk))
> @@ -119,35 +125,46 @@ static void resync_sc_freq(struct sc_dat
> sc->unstable = 1;
> return;
> }
> - /* Handle nesting, but when we're zero multiple calls in a row
> - are ok too and not a bug */
> + /*
> + * Handle nesting, but when we're zero multiple calls in a row
> + * are ok too and not a bug
> + */
> if (sc->unstable > 0)
> sc->unstable--;
> if (sc->unstable)
> return;
> - /* RED-PEN protect with seqlock? I hope that's not needed
> - because sched_clock callers should be able to tolerate small
> - errors. */
> + /*
> + * RED-PEN protect with seqlock? I hope that's not needed
> + * because sched_clock callers should be able to tolerate small
> + * errors.
> + */
> sc->ns_base = ktime_to_ns(ktime_get());
> rdtscll(sc->sync_base);
> sc->cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR) / newfreq;
> }
>
> -static void call_r_s_f(void *arg)
> +static void __resync_freq(void *arg)
> {
> - struct cpufreq_freqs *freq = arg;
> - unsigned f = freq->new;
> - if (!f)
> - f = cpufreq_get(freq->cpu);
> - if (!f)
> - f = tsc_khz;
> - resync_sc_freq(&per_cpu(sc_data, freq->cpu), f);
> + struct cpufreq_freqs *freqp = arg;
> + unsigned freq = freqp->new;
> +
> + if (!freq) {
> + freq = cpufreq_get(freqp->cpu);
> + /*
> + * Still no frequency? Then fall back to tsc_khz:
> + */
> + if (!freq)
> + freq = tsc_khz;
> + }
> + resync_sc_freq(&per_cpu(sc_data, freqp->cpu), freq);
> }
>
> -static void call_r_s_f_here(void *arg)
> +static void resync_freq(void *arg)
> {
> - struct cpufreq_freqs f = { .cpu = get_cpu(), .new = 0 };
> - call_r_s_f(&f);
> + struct cpufreq_freqs f = { .new = 0 };
> +
> + f.cpu = get_cpu();
> + __resync_freq(&f);
> put_cpu();
> }
>
> @@ -155,9 +172,11 @@ static int sc_freq_event(struct notifier
> void *data)
> {
> struct cpufreq_freqs *freq = data;
> - int cpu = get_cpu();
> - struct sc_data *sc = &per_cpu(sc_data, cpu);
> + struct sc_data *sc;
> + int cpu;
>
> + cpu = get_cpu();
> + sc = &per_cpu(sc_data, cpu);
> if (cpu_has(&cpu_data[cpu], X86_FEATURE_CONSTANT_TSC))
> goto out;
> if (freq->old == freq->new)
> @@ -167,25 +186,30 @@ static int sc_freq_event(struct notifier
> case CPUFREQ_SUSPENDCHANGE:
> /* Mark TSC unstable during suspend/resume */
> case CPUFREQ_PRECHANGE:
> - /* Mark TSC as unstable until cpu frequency change is done
> - because we don't know when exactly it will change.
> - unstable in used as a counter to guard against races
> - between the cpu frequency notifiers and normal resyncs */
> + /*
> + * Mark TSC as unstable until cpu frequency change is done
> + * because we don't know when exactly it will change.
> + * unstable in used as a counter to guard against races
> + * between the cpu frequency notifiers and normal resyncs
> + */
> sc->unstable++;
> /* FALL THROUGH */
> case CPUFREQ_RESUMECHANGE:
> case CPUFREQ_POSTCHANGE:
> - /* Frequency change or resume is done -- update everything and
> - mark TSC as stable again. */
> + /*
> + * Frequency change or resume is done -- update everything
> + * and mark TSC as stable again.
> + */
> if (cpu == freq->cpu)
> resync_sc_freq(sc, freq->new);
> else
> - smp_call_function_single(freq->cpu, call_r_s_f,
> + smp_call_function_single(freq->cpu, __resync_freq,
> freq, 0, 1);
> break;
> }
> out:
> put_cpu();
> +
> return NOTIFY_DONE;
> }
>
> @@ -197,9 +221,11 @@ static int __cpuinit
> sc_cpu_event(struct notifier_block *self, unsigned long event, void *hcpu)
> {
> long cpu = (long)hcpu;
> +
> if (event == CPU_ONLINE) {
> struct cpufreq_freqs f = { .cpu = cpu, .new = 0 };
> - smp_call_function_single(cpu, call_r_s_f, &f, 0, 1);
> +
> + smp_call_function_single(cpu, __resync_freq, &f, 0, 1);
> }
> return NOTIFY_DONE;
> }
> @@ -209,13 +235,15 @@ static __init int init_sched_clock(void)
> if (unsynchronized_tsc())
> no_sc_for_printk = 1;
>
> - /* On a race between the various events the initialization might be
> - done multiple times, but code is tolerant to this */
> + /*
> + * On a race between the various events the initialization might be
> + * done multiple times, but code is tolerant to this
> + */
> cpufreq_register_notifier(&sc_freq_notifier,
> CPUFREQ_TRANSITION_NOTIFIER);
> hotcpu_notifier(sc_cpu_event, 0);
> - on_each_cpu(call_r_s_f_here, NULL, 0, 0);
> + on_each_cpu(resync_freq, NULL, 0, 0);
> +
> return 0;
> }
> core_initcall(init_sched_clock);
> -

2007-05-25 08:23:26

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] i386, numaq: enable TSCs again

On Fri, May 25, 2007 at 01:19:44AM -0700, William Lee Irwin III wrote:
> On Fri, May 25, 2007 at 10:08:27AM +0200, Ingo Molnar wrote:
> > Andi, Andrew, do you remember why we disabled TSCs on NUMAQ? It was
> > slightly async between CPUs, right? In that case we should try the patch
> > below.
>
> I remember. It was far beyond "slightly async;" they would drift
> minutes apart during reasonable amounts of uptime, though it would take
> at least several days to drift so far (I don't recall how long it took).

sched_clock should handle that.

> TSC synchronization is uniformly impossible on NUMA-Q. Bootlogs showing
> the results of the attempts are still extant. They shouldn't end up too
> far apart right after booting, but I don't have even ballpark estimates.
> I'd hazard a guess of a few seconds.

You should mark_tsc_instable(), but not tsc disable. In a release or two
hopefully even that will be obsolete.

-Andi

2007-05-25 08:25:04

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [patch] i386, numaq: enable TSCs again

On Fri, May 25, 2007 at 01:19:44AM -0700, William Lee Irwin III wrote:
>> I remember. It was far beyond "slightly async;" they would drift
>> minutes apart during reasonable amounts of uptime, though it would take
>> at least several days to drift so far (I don't recall how long it took).

On Fri, May 25, 2007 at 10:22:59AM +0200, Andi Kleen wrote:
> sched_clock should handle that.

On Fri, May 25, 2007 at 01:19:44AM -0700, William Lee Irwin III wrote:
>> TSC synchronization is uniformly impossible on NUMA-Q. Bootlogs showing
>> the results of the attempts are still extant. They shouldn't end up too
>> far apart right after booting, but I don't have even ballpark estimates.
>> I'd hazard a guess of a few seconds.

On Fri, May 25, 2007 at 10:22:59AM +0200, Andi Kleen wrote:
> You should mark_tsc_instable(), but not tsc disable. In a release or two
> hopefully even that will be obsolete.

I don't have any particular preference here. I'm just donating a memory
dump. I have no intention of attempting to maintain NUMA-Q support code.


-- wli

2007-05-25 08:32:14

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] i386, numaq: enable TSCs again


* William Lee Irwin III <[email protected]> wrote:

> > Andi, Andrew, do you remember why we disabled TSCs on NUMAQ? It was
> > slightly async between CPUs, right? In that case we should try the
> > patch below.
>
> I remember. It was far beyond "slightly async;" they would drift
> minutes apart during reasonable amounts of uptime, though it would
> take at least several days to drift so far (I don't recall how long it
> took).

yes, that's what i meant under 'slightly async'. Some AMD CPUs are like
that too and sched_clock() now handles that fine. So we should try my
patch.

Ingo

2007-05-25 08:34:54

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()


* Andi Kleen <[email protected]> wrote:

> > > Hmm indeed. I actually had it correct at some point (i remember
> > > fixing 64bit compile errors in sched-clock ;-). I guess the
> > > Makefile hunk accidentially dropped out during some later merging
> > > and this didn't get noticed due to the weak attribute.
> >
> > well the tsc.h bit was needed too.
>
> It's not in defconfig at least. I just tried it.

what do you mean? The tsc.h bit is needed because
arch/i386/kernel/sched-clock.c (now built on x86_64 too with the patch i
sent) uses the tsc_disable global flag which is non-existent on x86_64.
So my tsc.h change adds that global flag, always-defined to 0.

Ingo

2007-05-25 08:37:28

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [patch] i386, numaq: enable TSCs again

* William Lee Irwin III <[email protected]> wrote:
>> I remember. It was far beyond "slightly async;" they would drift
>> minutes apart during reasonable amounts of uptime, though it would
>> take at least several days to drift so far (I don't recall how long it
>> took).

On Fri, May 25, 2007 at 10:31:53AM +0200, Ingo Molnar wrote:
> yes, that's what i meant under 'slightly async'. Some AMD CPUs are like
> that too and sched_clock() now handles that fine. So we should try my
> patch.

Sorry, then. I took slight to mean something else. In any event I was
only quantifying things. I've no opinion whatsoever on the impact of
the code on NUMA-Q, only some recall of its operating characteristics.


-- wli

2007-05-25 08:41:21

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()

On Fri, May 25, 2007 at 10:34:31AM +0200, Ingo Molnar wrote:
>
> * Andi Kleen <[email protected]> wrote:
>
> > > > Hmm indeed. I actually had it correct at some point (i remember
> > > > fixing 64bit compile errors in sched-clock ;-). I guess the
> > > > Makefile hunk accidentially dropped out during some later merging
> > > > and this didn't get noticed due to the weak attribute.
> > >
> > > well the tsc.h bit was needed too.
> >
> > It's not in defconfig at least. I just tried it.
>
> what do you mean? The tsc.h bit is needed because

Means i readded the Makefile hunk and it compiled for me in 64bit without
changing anything else.

> arch/i386/kernel/sched-clock.c (now built on x86_64 too with the patch i
> sent) uses the tsc_disable global flag which is non-existent on x86_64.
> So my tsc.h change adds that global flag, always-defined to 0.

My version of sched_clock.c doesn't have any reference to tsc_disable.

-andi

2007-05-25 08:41:39

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] i386, numaq: enable TSCs again


* William Lee Irwin III <[email protected]> wrote:

> > yes, that's what i meant under 'slightly async'. Some AMD CPUs are
> > like that too and sched_clock() now handles that fine. So we should
> > try my patch.
>
> Sorry, then. I took slight to mean something else. In any event I was
> only quantifying things. I've no opinion whatsoever on the impact of
> the code on NUMA-Q, only some recall of its operating characteristics.

there's no need to apologize at all! Thanks for reminding us about the
time-scale and nature of the TSC drift on NUMAQ. I was worried that
maybe the TSC was totally unusable for some reason - but that's
fortunately not the case. So we now have one quirk less, hopefully :-)

Ingo

2007-05-25 08:44:43

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()


* Andi Kleen <[email protected]> wrote:

> > arch/i386/kernel/sched-clock.c (now built on x86_64 too with the
> > patch i sent) uses the tsc_disable global flag which is non-existent
> > on x86_64. So my tsc.h change adds that global flag, always-defined
> > to 0.
>
> My version of sched_clock.c doesn't have any reference to tsc_disable.

must be an -mm fix. I used -mm as a basis of my work. Please apply my
patch.

Ingo

2007-05-25 08:45:57

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()

On Fri, May 25, 2007 at 10:44:26AM +0200, Ingo Molnar wrote:
>
> * Andi Kleen <[email protected]> wrote:
>
> > > arch/i386/kernel/sched-clock.c (now built on x86_64 too with the
> > > patch i sent) uses the tsc_disable global flag which is non-existent
> > > on x86_64. So my tsc.h change adds that global flag, always-defined
> > > to 0.
> >
> > My version of sched_clock.c doesn't have any reference to tsc_disable.
>
> must be an -mm fix. I used -mm as a basis of my work. Please apply my
> patch.

I would prefer to find out why the mm patch was added and then hopefully
remove it. IMNSHO it should not be needed.

-Andi

2007-05-25 08:55:42

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()


* Andi Kleen <[email protected]> wrote:

> > must be an -mm fix. I used -mm as a basis of my work. Please apply
> > my patch.
>
> I would prefer to find out why the mm patch was added and then
> hopefully remove it. IMNSHO it should not be needed.

it comes in via:

fix-x86_64-mm-sched-clock-share.patch

From: Rusty Russell <[email protected]>

If you set tsc_disable (eg "notsc" on cmdline), sched-clock.c gives a
divide by zero on boot.

so IMNSHO it is very much needed ;-)

Ingo

2007-05-25 08:56:17

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()

On Fri, 25 May 2007 10:45:47 +0200 Andi Kleen <[email protected]> wrote:

> On Fri, May 25, 2007 at 10:44:26AM +0200, Ingo Molnar wrote:
> >
> > * Andi Kleen <[email protected]> wrote:
> >
> > > > arch/i386/kernel/sched-clock.c (now built on x86_64 too with the
> > > > patch i sent) uses the tsc_disable global flag which is non-existent
> > > > on x86_64. So my tsc.h change adds that global flag, always-defined
> > > > to 0.
> > >
> > > My version of sched_clock.c doesn't have any reference to tsc_disable.
> >
> > must be an -mm fix. I used -mm as a basis of my work. Please apply my
> > patch.
>
> I would prefer to find out why the mm patch was added and then hopefully
> remove it. IMNSHO it should not be needed.
>

This? I sent it to you earlier this week:

From: Rusty Russell <[email protected]>

If you set tsc_disable (eg "notsc" on cmdline), sched-clock.c gives a
divide by zero on boot.

Signed-off-by: Rusty Russell <[email protected]>
Cc: Andi Kleen <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

arch/i386/kernel/sched-clock.c | 2 +-
1 files changed, 1 insertion(+), 1 deletion(-)

diff -puN arch/i386/kernel/sched-clock.c~fix-x86_64-mm-sched-clock-share arch/i386/kernel/sched-clock.c
--- a/arch/i386/kernel/sched-clock.c~fix-x86_64-mm-sched-clock-share
+++ a/arch/i386/kernel/sched-clock.c
@@ -115,7 +115,7 @@ unsigned long long printk_clock(void)
static void resync_sc_freq(struct sc_data *sc, unsigned int newfreq)
{
sc->sync_base = jiffies;
- if (!cpu_has_tsc) {
+ if (!cpu_has_tsc || tsc_disable) {
sc->unstable = 1;
return;
}
_

2007-05-25 09:03:26

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()

> This? I sent it to you earlier this week:

Sorry haven't processed those yet.

Ah. The correct fix here is to clear the tsc flag in boot_cpu_data
when the option is set. Will do that.

-Andi


>
> From: Rusty Russell <[email protected]>
>
> If you set tsc_disable (eg "notsc" on cmdline), sched-clock.c gives a
> divide by zero on boot.
>
> Signed-off-by: Rusty Russell <[email protected]>
> Cc: Andi Kleen <[email protected]>
> Signed-off-by: Andrew Morton <[email protected]>
> ---
>
> arch/i386/kernel/sched-clock.c | 2 +-
> 1 files changed, 1 insertion(+), 1 deletion(-)
>
> diff -puN arch/i386/kernel/sched-clock.c~fix-x86_64-mm-sched-clock-share arch/i386/kernel/sched-clock.c
> --- a/arch/i386/kernel/sched-clock.c~fix-x86_64-mm-sched-clock-share
> +++ a/arch/i386/kernel/sched-clock.c
> @@ -115,7 +115,7 @@ unsigned long long printk_clock(void)
> static void resync_sc_freq(struct sc_data *sc, unsigned int newfreq)
> {
> sc->sync_base = jiffies;
> - if (!cpu_has_tsc) {
> + if (!cpu_has_tsc || tsc_disable) {
> sc->unstable = 1;
> return;
> }
> _
>

2007-05-25 09:19:45

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()


* Andi Kleen <[email protected]> wrote:

> > This? I sent it to you earlier this week:
>
> Sorry haven't processed those yet.
>
> Ah. The correct fix here is to clear the tsc flag in boot_cpu_data
> when the option is set. Will do that.

please indicate that you've picked up my style cleanups, i dont want to
redo all this a few days/weeks down the line ...

Ingo

2007-05-25 09:46:49

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()

On Fri, May 25, 2007 at 11:19:28AM +0200, Ingo Molnar wrote:
>
> * Andi Kleen <[email protected]> wrote:
>
> > > This? I sent it to you earlier this week:
> >
> > Sorry haven't processed those yet.
> >
> > Ah. The correct fix here is to clear the tsc flag in boot_cpu_data
> > when the option is set. Will do that.
>
> please indicate that you've picked up my style cleanups, i dont want to
> redo all this a few days/weeks down the line ...

It's done slightly differently now due to conflicting earlier
changes, but the end result should be about what you intended.
You're also still credited in the cleanup patch of course.

-Andi

2007-05-25 10:13:14

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()


* Andi Kleen <[email protected]> wrote:

> > please indicate that you've picked up my style cleanups, i dont want
> > to redo all this a few days/weeks down the line ...
>
> It's done slightly differently now due to conflicting earlier changes,
> but the end result should be about what you intended. [...]

please send me your current sched-clock.c, i'll redo any remaining
cleanups.

But ... i find your approach curious, why didnt you just apply the
cleanups i sent? You clearly started working on this as a reaction to my
cleanup patches and to the bugfixes i sent ontop of the cleanup patches.
Your "I'll do this differently" approach is totally unnecessary from a
commit management point of view (this is new code after all and
will/should go upstream in a single clean chunk anyway), the only effect
this has is that that you are discouraging contributors like me from
contributing cleanups to the x86_64 tree.

To further underline this feeling i got, none of your reactions to any
of my patches showed even a single positive thought that you are happy
about people fixing code you are introducing (in fact you didnt even
indicate which patch you took, only at my repeated prodding did you say
anything about the cleanup patch i sent!), so to me the impression is
that deep in yourself you are (subconsciously) not happy about others
contributing to the x86_64 tree. Please tell me that i'm wrong :-(

Ingo

2007-05-25 10:27:19

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()


* Andi Kleen <[email protected]> wrote:

> It's done slightly differently now due to conflicting earlier changes,
> but the end result should be about what you intended. You're also
> still credited in the cleanup patch of course.

you totally misunderstood me. My problem isnt credit. I've got enough
credit for a lifetime ;-) I'd be equally upset if this happened with
someone else's patches (in fact i'd be _more_ upset about it, because
then i'd also be worried about us losing a contributor).

My problem is that (and this might just be an issue of communication)
you often appear as treating the x86_64 code as 'your code' instead of
treating it as 'our code'. Dropping part of my patch, not even telling
whether you took the patches or not, not reacting to patches in a
positive way are all external signs of that.

Ingo

2007-05-25 11:05:53

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()

On Fri, May 25, 2007 at 11:03:15AM +0200, Andi Kleen wrote:
> > This? I sent it to you earlier this week:
>
> Sorry haven't processed those yet.
>
> Ah. The correct fix here is to clear the tsc flag in boot_cpu_data
> when the option is set. Will do that.

Hmm I double checked this now; tsc_disable indeed clears
X86_FEATURE_TSC in identify_cpu and that should be always
called before anything sched_clock related runs on a CPU.

Also the only possibly faulting division is protected
by a cpu_has_tsc which even checks boot_cpu_data. this means
even if the resync frequency code was called for
some reason before the identify_cpu of a AP it should
still work. For the BP this definitely cannot happen.

I also tried it with qemu myself (both one and two cpus) and it worked

Rusty, was this really on a standard kernel? Was it with multiple
CPUs?

-Andi

2007-05-25 11:20:29

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()

On Fri, May 25, 2007 at 12:12:48PM +0200, Ingo Molnar wrote:
>
> * Andi Kleen <[email protected]> wrote:
>
> > > please indicate that you've picked up my style cleanups, i dont want
> > > to redo all this a few days/weeks down the line ...
> >
> > It's done slightly differently now due to conflicting earlier changes,
> > but the end result should be about what you intended. [...]
>
> please send me your current sched-clock.c, i'll redo any remaining
> cleanups.

It needs at least one new preliminary patch (to add on_cpu_single);
please get the series from
ftp://ftp.firstfloor.org/pub/ak/x86_64/quilt/patches-2.6.22-rc2-git7-070525-1.tar.gz
You need at least tsc-unstable upto paravirt-add-a-sched_clock-paravirt
for everything

> But ... i find your approach curious, why didnt you just apply the
> cleanups i sent? You clearly started working on this as a reaction to my
> cleanup patches and to the bugfixes i sent ontop of the cleanup patches.
> Your "I'll do this differently" approach is totally unnecessary from a
> commit management point of view (this is new code after all and
> will/should go upstream in a single clean chunk anyway), the only effect
> this has is that that you are discouraging contributors like me from
> contributing cleanups to the x86_64 tree.

Unfortunately right now it is a already a set of patches; e.g. due to
the paravirt ops change. I can merge back the cleanup change; but
kept it separately due to your earlier complaint about not using
your patch. I think one hunk of your original one is still in there :-)

> so to me the impression is
> that deep in yourself you are (subconsciously) not happy about others
> contributing to the x86_64 tree. Please tell me that i'm wrong :-(

You're reading too much into that. Of course I value contributions
to x86-64, including cleanups. In general when I don't like it I complain
so saying nothing is approval (or me being not reading email, but that
doesn't happen that often)

-Andi

2007-05-25 11:26:17

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()


* Andi Kleen <[email protected]> wrote:

> > please send me your current sched-clock.c, i'll redo any remaining
> > cleanups.
>
> It needs at least one new preliminary patch (to add on_cpu_single);
> please get the series from
> ftp://ftp.firstfloor.org/pub/ak/x86_64/quilt/patches-2.6.22-rc2-git7-070525-1.tar.gz
> You need at least tsc-unstable upto paravirt-add-a-sched_clock-paravirt
> for everything

thanks.

you missed one patch: please pick up the NUMAQ change i did too. (i kept
the printk to make sure someone notices that and actually tests thing -
i dont have a NUMAQ machine to try this on.)

i'm looking at the other things now.

Ingo

2007-05-25 11:31:40

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()


* Ingo Molnar <[email protected]> wrote:

> * Andi Kleen <[email protected]> wrote:
>
> > > please send me your current sched-clock.c, i'll redo any remaining
> > > cleanups.
> >
> > It needs at least one new preliminary patch (to add on_cpu_single);
> > please get the series from
> > ftp://ftp.firstfloor.org/pub/ak/x86_64/quilt/patches-2.6.22-rc2-git7-070525-1.tar.gz
> > You need at least tsc-unstable upto paravirt-add-a-sched_clock-paravirt
> > for everything

> i'm looking at the other things now.

your queue does not apply cleanly here:

Applying patch patches/paravirt-add-a-sched_clock-paravirt_op

patching file arch/i386/kernel/sched-clock.c
Hunk #5 FAILED at 148.
1 out of 5 hunks FAILED -- rejects in file arch/i386/kernel/sched-clock.c

could you please just send me your current sched-clock.c? (that's what
i'm interested in) I'll deal with the on_cpu_single() dependency.
Thanks,

Ingo

2007-05-25 11:46:25

by Ingo Molnar

[permalink] [raw]
Subject: [patch] sched_clock: fix preempt count imbalance


* Ingo Molnar <[email protected]> wrote:

> > > please send me your current sched-clock.c, i'll redo any remaining
> > > cleanups.
> >
> > It needs at least one new preliminary patch (to add on_cpu_single);
> > please get the series from
> > ftp://ftp.firstfloor.org/pub/ak/x86_64/quilt/patches-2.6.22-rc2-git7-070525-1.tar.gz
> > You need at least tsc-unstable upto paravirt-add-a-sched_clock-paravirt
> > for everything
>
> thanks.
>
> you missed one patch: please pick up the NUMAQ change i did too. (i
> kept the printk to make sure someone notices that and actually tests
> thing - i dont have a NUMAQ machine to try this on.)
>
> i'm looking at the other things now.

you introduced a crash-bug via your cleanups - the patch below fixes it.

Ingo

----------------------------->
Subject: [patch] sched_clock: fix preempt count imbalance
From: Ingo Molnar <[email protected]>

fix preempt count imbalance.

Signed-off-by: Ingo Molnar <[email protected]>
---
arch/i386/kernel/sched-clock.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)

Index: linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/sched-clock.c
===================================================================
--- linux-cfs-2.6.22-rc2-mm1.q.orig/arch/i386/kernel/sched-clock.c
+++ linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/sched-clock.c
@@ -164,7 +164,10 @@ static int sc_freq_event(struct notifier
void *data)
{
struct cpufreq_freqs *freq = data;
- struct sc_data *sc = &per_cpu(sc_data, freq->cpu);
+ struct sc_data *sc;
+
+ preempt_disable();
+ sc = &per_cpu(sc_data, freq->cpu);

if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
goto out;
@@ -194,7 +197,8 @@ static int sc_freq_event(struct notifier
break;
}
out:
- put_cpu();
+ preempt_enable();
+
return NOTIFY_DONE;
}

2007-05-25 11:50:22

by Ingo Molnar

[permalink] [raw]
Subject: [patch] sched_clock(): cleanups, #2


* Ingo Molnar <[email protected]> wrote:

> * Andi Kleen <[email protected]> wrote:
>
> > > please send me your current sched-clock.c, i'll redo any remaining
> > > cleanups.
> >
> > It needs at least one new preliminary patch (to add on_cpu_single);
> > please get the series from
> > ftp://ftp.firstfloor.org/pub/ak/x86_64/quilt/patches-2.6.22-rc2-git7-070525-1.tar.gz
> > You need at least tsc-unstable upto paravirt-add-a-sched_clock-paravirt
> > for everything
>
> thanks.
>
> you missed one patch: please pick up the NUMAQ change i did too. (i
> kept the printk to make sure someone notices that and actually tests
> thing - i dont have a NUMAQ machine to try this on.)
>
> i'm looking at the other things now.

find below the cleanups from my first patch that didnt make it into your
cleanups. (plus one more cleanup i noticed while merging the missing
bits from my first patch) Goes after the bugfix i just sent. Please
apply.

Ingo

---------------------->
Subject: [patch] sched_clock(): cleanups, #2
From: Ingo Molnar <[email protected]>

clean up sched-clock.c - the missing bits.

Signed-off-by: Ingo Molnar <[email protected]>
---
arch/i386/kernel/sched-clock.c | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)

Index: linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/sched-clock.c
===================================================================
--- linux-cfs-2.6.22-rc2-mm1.q.orig/arch/i386/kernel/sched-clock.c
+++ linux-cfs-2.6.22-rc2-mm1.q/arch/i386/kernel/sched-clock.c
@@ -89,8 +89,10 @@ unsigned long long native_sched_clock(vo
return r;
}

-/* We need to define a real function for sched_clock, to override the
- weak default version */
+/*
+ * We need to define a real function for sched_clock, to override the
+ * weak default version
+ */
#ifdef CONFIG_PARAVIRT
unsigned long long sched_clock(void)
{
@@ -119,6 +121,9 @@ static void resolve_freq(struct cpufreq_
{
if (!freq->new) {
freq->new = cpufreq_get(freq->cpu);
+ /*
+ * Still no frequency? Then fall back to tsc_khz:
+ */
if (!freq->new)
freq->new = tsc_khz;
}
@@ -129,6 +134,7 @@ static void resync_freq(void *arg)
{
struct cpufreq_freqs *freq = (void *)arg;
struct sc_data *sc = &__get_cpu_var(sc_data);
+
sc->sync_base = jiffies;
if (!cpu_has_tsc) {
sc->unstable = 1;
@@ -155,13 +161,14 @@ static void resync_freq(void *arg)
static void resync_freq_on_cpu(void *arg)
{
struct cpufreq_freqs f = { .new = 0 };
+
f.cpu = get_cpu();
resync_freq(&f);
put_cpu();
}

-static int sc_freq_event(struct notifier_block *nb, unsigned long event,
- void *data)
+static int
+sc_freq_event(struct notifier_block *nb, unsigned long event, void *data)
{
struct cpufreq_freqs *freq = data;
struct sc_data *sc;
@@ -210,8 +217,10 @@ static int __cpuinit
sc_cpu_event(struct notifier_block *self, unsigned long event, void *hcpu)
{
long cpu = (long)hcpu;
+
if (event == CPU_ONLINE) {
struct cpufreq_freqs f = { .cpu = cpu, .new = 0 };
+
on_cpu_single(cpu, resync_freq, &f);
}
return NOTIFY_DONE;
@@ -221,7 +230,6 @@ static __init int init_sched_clock(void)
{
if (unsynchronized_tsc())
no_sc_for_printk = 1;
-
/*
* On a race between the various events the initialization
* might be done multiple times, but code is tolerant to
@@ -231,7 +239,7 @@ static __init int init_sched_clock(void)
CPUFREQ_TRANSITION_NOTIFIER);
hotcpu_notifier(sc_cpu_event, 0);
on_each_cpu(resync_freq_on_cpu, NULL, 0, 0);
+
return 0;
}
core_initcall(init_sched_clock);
-

2007-05-25 11:55:54

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups, #2

On Fri, May 25, 2007 at 01:50:04PM +0200, Ingo Molnar wrote:
>
> * Ingo Molnar <[email protected]> wrote:
>
> > * Andi Kleen <[email protected]> wrote:
> >
> > > > please send me your current sched-clock.c, i'll redo any remaining
> > > > cleanups.
> > >
> > > It needs at least one new preliminary patch (to add on_cpu_single);
> > > please get the series from
> > > ftp://ftp.firstfloor.org/pub/ak/x86_64/quilt/patches-2.6.22-rc2-git7-070525-1.tar.gz
> > > You need at least tsc-unstable upto paravirt-add-a-sched_clock-paravirt
> > > for everything
> >
> > thanks.
> >
> > you missed one patch: please pick up the NUMAQ change i did too. (i
> > kept the printk to make sure someone notices that and actually tests
> > thing - i dont have a NUMAQ machine to try this on.)
> >
> > i'm looking at the other things now.
>
> find below the cleanups from my first patch that didnt make it into your
> cleanups. (plus one more cleanup i noticed while merging the missing
> bits from my first patch) Goes after the bugfix i just sent. Please
> apply.

I cannot apply it as is because it changes code from the paravirtops
patch too.

BTW to nitpick the original formattings you changed are all probably
standard code style and the new comment is a classical
i++; /* Increase i */

I'll fold the comment change into Jeremy's paravirt ops patch.

-Andi

2007-05-25 12:02:43

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups, #2


* Andi Kleen <[email protected]> wrote:

> > find below the cleanups from my first patch that didnt make it into
> > your cleanups. (plus one more cleanup i noticed while merging the
> > missing bits from my first patch) Goes after the bugfix i just sent.
> > Please apply.
>
> I cannot apply it as is because it changes code from the paravirtops
> patch too.

ok - then please merge that single hunk into the paravirtops patch - and
leave the other 6 hunks in this patch.

> BTW to nitpick the original formattings you changed are all probably
> standard code style and the new comment is a classical i++; /*
> Increase i */

could you please quote the portion from my patch that you are talking
about?

Ingo

2007-05-25 12:17:09

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups, #2

On Fri, May 25, 2007 at 02:02:28PM +0200, Ingo Molnar wrote:
>
> * Andi Kleen <[email protected]> wrote:
>
> > > find below the cleanups from my first patch that didnt make it into
> > > your cleanups. (plus one more cleanup i noticed while merging the
> > > missing bits from my first patch) Goes after the bugfix i just sent.
> > > Please apply.
> >
> > I cannot apply it as is because it changes code from the paravirtops
> > patch too.
>
> ok - then please merge that single hunk into the paravirtops patch - and
> leave the other 6 hunks in this patch.

I added the empty lines too.

>
> > BTW to nitpick the original formattings you changed are all probably
> > standard code style and the new comment is a classical i++; /*
> > Increase i */
>
> could you please quote the portion from my patch that you are talking
> about?

-static int sc_freq_event(struct notifier_block *nb, unsigned long event,
- void *data)
+static int
+sc_freq_event(struct notifier_block *nb, unsigned long event, void *data)

and

+ /*
+ * Still no frequency? Then fall back to tsc_khz:
+ */
if (!freq->new)
freq->new = tsc_khz;


-Andi (who hopes this thread will end soon now and we can all go
back to more important issues)

2007-05-25 16:18:46

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups, #2

On Fri, 25 May 2007 14:15:40 +0200 Andi Kleen <[email protected]> wrote:

> -Andi (who hopes this thread will end soon now and we can all go
> back to more important issues)

fyi, the thread has been damn useful for me. If Ingo hadn't spotted that
preempt_count() imbalance then there's a decent chance that I'd have been
the first to hit it.

Time to bisect 1,000 patches: maybe an hour, if I choose the x86 tree as
the first pivot point, which is likely.

Or I wouldn't have hit it, in which case a tester hits it, and someone
(guess who) gets to enter into an intercontinental head-scratching session
trying to work out who broke it this time, consuming the tester's time too.
Plus we have a wrecked -mm and other people's code doesn't get as
well-tested as it might.

All for one silly little mistake.


So. More care, please.

2007-05-25 16:28:53

by Daniel Walker

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups, #2

On Fri, 2007-05-25 at 09:17 -0700, Andrew Morton wrote:
> On Fri, 25 May 2007 14:15:40 +0200 Andi Kleen <[email protected]> wrote:
>
> > -Andi (who hopes this thread will end soon now and we can all go
> > back to more important issues)
>
> fyi, the thread has been damn useful for me. If Ingo hadn't spotted that
> preempt_count() imbalance then there's a decent chance that I'd have been
> the first to hit it.
>
> Time to bisect 1,000 patches: maybe an hour, if I choose the x86 tree as
> the first pivot point, which is likely.
>
> Or I wouldn't have hit it, in which case a tester hits it, and someone
> (guess who) gets to enter into an intercontinental head-scratching session
> trying to work out who broke it this time, consuming the tester's time too.
> Plus we have a wrecked -mm and other people's code doesn't get as
> well-tested as it might.
>
> All for one silly little mistake.

I wonder if this was the source of the lockdep selftest failures, or the
mystery hang this patch caused (IIRC)..

Daniel

2007-05-25 16:33:19

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups, #2

> I wonder if this was the source of the lockdep selftest failures, or the
> mystery hang this patch caused (IIRC)..

No, the inbalance was just on the ftp server for an hour or so.
I doubt anybody except Ingo ever saw that code.

I introduced it while changing the callbacks around to make the code
easier to read.

-Andi

2007-05-25 16:50:16

by Linus Torvalds

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups, #2



On Fri, 25 May 2007, Ingo Molnar wrote:
>
> ok - then please merge that single hunk into the paravirtops patch - and
> leave the other 6 hunks in this patch.

Note: I'm actually much more interested in applying the scheduler changes
than the paravirt-ops changes. It looks like CFS is getting stable, I'd
much rather have Ingo working on making a nice CFS patch on top of 2.6.22
(and the current -git tree should obviously approximate that, I don't
expect any real changes to scheduler stuff), and merge that immediately
after 2.6.22 is out.

Ingo: to make things easier, the _best_ split-up would be not "this is the
historical series of patches leading up to CFS v15" or something like
that, but it would be nice to get that final CFS version as a series of
patches that do some specific things rather than as one final one. Is
there any possibility that could happen?

Linus

2007-05-25 18:08:16

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups, #2

On Fri, May 25, 2007 at 09:49:38AM -0700, Linus Torvalds wrote:
>
>
> On Fri, 25 May 2007, Ingo Molnar wrote:
> >
> > ok - then please merge that single hunk into the paravirtops patch - and
> > leave the other 6 hunks in this patch.
>
> Note: I'm actually much more interested in applying the scheduler changes
> than the paravirt-ops changes. It looks like CFS is getting stable, I'd

That's ok -- that is why I'm keeping this stuff separate.
paravirt ops sched_clock is just on top of the new sched_clock() but
sched_clock doesn't rely on it.

Also my understanding is that CFS works even without the new sched_clock,
just the new one makes it work better.

-Andi

2007-05-25 18:17:12

by Dave Hansen

[permalink] [raw]
Subject: Re: [patch] i386, numaq: enable TSCs again

On Fri, 2007-05-25 at 10:41 +0200, Ingo Molnar wrote:
> * William Lee Irwin III <[email protected]> wrote:
> > > yes, that's what i meant under 'slightly async'. Some AMD CPUs are
> > > like that too and sched_clock() now handles that fine. So we should
> > > try my patch.
> >
> > Sorry, then. I took slight to mean something else. In any event I was
> > only quantifying things. I've no opinion whatsoever on the impact of
> > the code on NUMA-Q, only some recall of its operating characteristics.
>
> there's no need to apologize at all! Thanks for reminding us about the
> time-scale and nature of the TSC drift on NUMAQ. I was worried that
> maybe the TSC was totally unusable for some reason - but that's
> fortunately not the case. So we now have one quirk less, hopefully :-)

Last I remember, it was totally useless for timekeeping, but was useful
for cpu-local time measurements.

John, it's still useless for time, right? Does sched_clock() really fix
it?

-- Dave

2007-05-25 18:23:55

by john stultz

[permalink] [raw]
Subject: Re: [patch] i386, numaq: enable TSCs again

On Fri, 2007-05-25 at 11:16 -0700, Dave Hansen wrote:
> On Fri, 2007-05-25 at 10:41 +0200, Ingo Molnar wrote:
> > * William Lee Irwin III <[email protected]> wrote:
> > > > yes, that's what i meant under 'slightly async'. Some AMD CPUs are
> > > > like that too and sched_clock() now handles that fine. So we should
> > > > try my patch.
> > >
> > > Sorry, then. I took slight to mean something else. In any event I was
> > > only quantifying things. I've no opinion whatsoever on the impact of
> > > the code on NUMA-Q, only some recall of its operating characteristics.
> >
> > there's no need to apologize at all! Thanks for reminding us about the
> > time-scale and nature of the TSC drift on NUMAQ. I was worried that
> > maybe the TSC was totally unusable for some reason - but that's
> > fortunately not the case. So we now have one quirk less, hopefully :-)
>
> Last I remember, it was totally useless for timekeeping, but was useful
> for cpu-local time measurements.
>
> John, it's still useless for time, right? Does sched_clock() really fix
> it?

Yea, on multi-node NUMAQ the TSC shouldn't be used for timekeeping.

However it should be fine for sched_clock(), or other cpu-local
measurements as the TSCs are constant (no cpufreq, no deep sleep
states).

-john

2007-05-25 19:15:05

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups, #2


* Linus Torvalds <[email protected]> wrote:

> Ingo: to make things easier, the _best_ split-up would be not "this is
> the historical series of patches leading up to CFS v15" or something
> like that, but it would be nice to get that final CFS version as a
> series of patches that do some specific things rather than as one
> final one. Is there any possibility that could happen?

sure enough, i'll work something sane out - i already have it partly
split up. It will probably be something along the lines of: 'remove
stuff that we can remove and still have functional scheduling' followed
by an 'add minimal CFS patch' and then nicely split up 'CFS related
add-ons' (like the task-stats accounting things, debugging, etc). Do you
think i should try to split up the core CFS bits some more beyond this
level? (that would be quite nontrivial i suspect)

Ingo

2007-05-25 19:46:51

by Linus Torvalds

[permalink] [raw]
Subject: Re: [patch] sched_clock(): cleanups, #2



On Fri, 25 May 2007, Ingo Molnar wrote:
>
> sure enough, i'll work something sane out - i already have it partly
> split up. It will probably be something along the lines of: 'remove
> stuff that we can remove and still have functional scheduling' followed
> by an 'add minimal CFS patch' and then nicely split up 'CFS related
> add-ons' (like the task-stats accounting things, debugging, etc).

Ok, that sounds good. Some of that is obviously already in separate files
already, even if they then just get #include'd into the main sched.c, so
yeah, splitting it up along those lines makes sense.

> Do you think i should try to split up the core CFS bits some more beyond
> this level? (that would be quite nontrivial i suspect)

I'd love to see some of that too, but I suspect that with something like
the scheduler, it's hard to have any reasonable (and working!) points that
make much sense. At some point, the changes to make it work incrementally
get bigger than the patches themselves, and rather than show what's going
on, it might _hide_ what's going on.

Linus

2007-05-28 10:44:19

by Rusty Russell

[permalink] [raw]
Subject: Re: [patch] x86_64: fix sched_clock()

On Fri, 2007-05-25 at 13:05 +0200, Andi Kleen wrote:
> On Fri, May 25, 2007 at 11:03:15AM +0200, Andi Kleen wrote:
> > > This? I sent it to you earlier this week:
> >
> > Sorry haven't processed those yet.
> >
> > Ah. The correct fix here is to clear the tsc flag in boot_cpu_data
> > when the option is set. Will do that.
>
> Hmm I double checked this now; tsc_disable indeed clears
> X86_FEATURE_TSC in identify_cpu and that should be always
> called before anything sched_clock related runs on a CPU.

Yes, agreed.

> Also the only possibly faulting division is protected
> by a cpu_has_tsc which even checks boot_cpu_data. this means
> even if the resync frequency code was called for
> some reason before the identify_cpu of a AP it should
> still work. For the BP this definitely cannot happen.
>
> I also tried it with qemu myself (both one and two cpus) and it worked
>
> Rusty, was this really on a standard kernel? Was it with multiple
> CPUs?

No, will re-check. Drop it for now, if I can reproduce I'll produce the
real fix or a decent explanation.

Thanks,
Rusty.