2007-05-17 17:41:10

by Sergei Shtylyov

[permalink] [raw]
Subject: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Add PowerPC decrementer clock event driver.

Every effort has been made to support the different implementations of the
decrementer: the classic one (with 970 series variation), 40x and Book E
specific ones.

I had to make CONFIG_GENERIC_CLOCKEVENTS option selectable for the
compatibility reasons -- this option is not compatible with the PPC64
deterministic time accounting.

Thanks to Daniel Walker and Thomas Gleixner for the suggestions they made...

Signed-off-by: Sergei Shtylyov <[email protected]>

---
This patch has been reworked against the 2.6.21 clockevents framework.
It has only been tested on the Book E 32-bit CPU this time, so re-testing on
"classic" PowerPC CPUs is needed (used to work as of 2.6.18-rt7)...

CONFIG_PPC_MULTIPLATFORM was the best option I was able to come up with
to cover machines built on 970 series CPUs...

arch/powerpc/Kconfig | 12 +++-
arch/powerpc/kernel/time.c | 124 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 134 insertions(+), 2 deletions(-)

Index: linux-2.6/arch/powerpc/Kconfig
===================================================================
--- linux-2.6.orig/arch/powerpc/Kconfig
+++ linux-2.6/arch/powerpc/Kconfig
@@ -317,7 +317,7 @@ config PPC_STD_MMU_32

config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
- depends on PPC64
+ depends on PPC64 && !GENERIC_CLOCKEVENTS
default y
help
Select this option to enable more accurate task and CPU time
@@ -760,6 +760,16 @@ config HIGHMEM
depends on PPC32

source kernel/Kconfig.hz
+
+config GENERIC_CLOCKEVENTS
+ bool "Clock event devices support"
+ default n
+ help
+ Enable support for the clock event devices necessary for the
+ high-resolution timers and the tickless system support.
+ NOTE: This is not compatible with the deterministic time accounting
+ option on PPC64.
+
source kernel/Kconfig.preempt

config RWSEM_GENERIC_SPINLOCK
Index: linux-2.6/arch/powerpc/kernel/time.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/time.c
+++ linux-2.6/arch/powerpc/kernel/time.c
@@ -52,6 +52,7 @@
#include <linux/jiffies.h>
#include <linux/posix-timers.h>
#include <linux/irq.h>
+#include <linux/clockchips.h>

#include <asm/io.h>
#include <asm/processor.h>
@@ -128,6 +129,83 @@ unsigned long ppc_tb_freq;
static u64 tb_last_jiffy __cacheline_aligned_in_smp;
static DEFINE_PER_CPU(u64, last_jiffy);

+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+#define DECREMENTER_MAX 0xffffffff
+#else
+#define DECREMENTER_MAX 0x7fffffff /* setting MSB triggers an interrupt */
+#endif
+
+static int decrementer_set_next_event(unsigned long evt,
+ struct clock_event_device *dev)
+{
+#if defined(CONFIG_40x)
+ mtspr(SPRN_PIT, evt); /* 40x has a hidden PIT auto-reload register */
+#elif defined(CONFIG_BOOKE)
+ mtspr(SPRN_DECAR, evt); /* Book E has separate auto-reload register */
+ set_dec(evt);
+#else
+ set_dec(evt - 1); /* Classic decrementer interrupts at -1 */
+#endif
+ return 0;
+}
+
+static void decrementer_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *dev)
+{
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+ u32 tcr = mfspr(SPRN_TCR);
+
+ tcr |= TCR_DIE;
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
+ tcr |= TCR_ARE;
+ break;
+ case CLOCK_EVT_MODE_ONESHOT:
+ tcr &= ~TCR_ARE;
+ break;
+ case CLOCK_EVT_MODE_UNUSED:
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ tcr &= ~TCR_DIE;
+ break;
+ }
+ mtspr(SPRN_TCR, tcr);
+#endif
+ if (mode == CLOCK_EVT_MODE_PERIODIC)
+ decrementer_set_next_event(tb_ticks_per_jiffy, dev);
+}
+
+static struct clock_event_device decrementer_clockevent = {
+ .name = "decrementer",
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+ .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
+#else
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+#endif
+ .shift = 32,
+ .rating = 200,
+ .irq = -1,
+ .set_next_event = decrementer_set_next_event,
+ .set_mode = decrementer_set_mode,
+};
+
+static DEFINE_PER_CPU(struct clock_event_device, decrementers);
+
+static void register_decrementer(void)
+{
+ int cpu = smp_processor_id();
+ struct clock_event_device *decrementer = &per_cpu(decrementers, cpu);
+
+ memcpy(decrementer, &decrementer_clockevent, sizeof(*decrementer));
+
+ decrementer->cpumask = cpumask_of_cpu(cpu);
+
+ clockevents_register_device(decrementer);
+}
+
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/*
* Factors for converting from cputime_t (timebase ticks) to
@@ -313,6 +391,9 @@ void snapshot_timebase(void)
{
__get_cpu_var(last_jiffy) = get_tb();
snapshot_purr();
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+ register_decrementer();
+#endif
}

void __delay(unsigned long loops)
@@ -470,7 +551,31 @@ void timer_interrupt(struct pt_regs * re
old_regs = set_irq_regs(regs);
irq_enter();

+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+#ifdef CONFIG_PPC_MULTIPLATFORM
+ /*
+ * We must write a positive value to the decrementer to clear
+ * the interrupt on the IBM 970 CPU series. In periodic mode,
+ * this happens when the decrementer gets reloaded later, but
+ * in one-shot mode, we have to do it here since an event handler
+ * may skip loading the new value...
+ */
+ if (per_cpu(decrementers, cpu).mode != CLOCK_EVT_MODE_PERIODIC)
+ set_dec(DECREMENTER_MAX);
+#endif
+ /*
+ * We can't disable the decrementer, so in the period between
+ * CPU being marked offline and calling stop-self, it's taking
+ * timer interrupts...
+ */
+ if (!cpu_is_offline(cpu)) {
+ struct clock_event_device *dev = &per_cpu(decrementers, cpu);
+
+ dev->event_handler(dev);
+ }
+#else
profile_tick(CPU_PROFILING);
+#endif
calculate_steal_time();

#ifdef CONFIG_PPC_ISERIES
@@ -486,6 +591,7 @@ void timer_interrupt(struct pt_regs * re
if (__USE_RTC() && per_cpu(last_jiffy, cpu) >= 1000000000)
per_cpu(last_jiffy, cpu) -= 1000000000;

+#ifndef CONFIG_GENERIC_CLOCKEVENTS
/*
* We cannot disable the decrementer, so in the period
* between this cpu's being marked offline in cpu_online_map
@@ -495,6 +601,7 @@ void timer_interrupt(struct pt_regs * re
*/
if (!cpu_is_offline(cpu))
account_process_time(regs);
+#endif

/*
* No need to check whether cpu is offline here; boot_cpuid
@@ -507,15 +614,19 @@ void timer_interrupt(struct pt_regs * re
tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy;
if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) {
tb_last_jiffy = tb_next_jiffy;
+#ifndef CONFIG_GENERIC_CLOCKEVENTS
do_timer(1);
+#endif
/*timer_recalc_offset(tb_last_jiffy);*/
timer_check_rtc();
}
write_sequnlock(&xtime_lock);
}
-
+
+#ifndef CONFIG_GENERIC_CLOCKEVENTS
next_dec = tb_ticks_per_jiffy - ticks;
set_dec(next_dec);
+#endif

#ifdef CONFIG_PPC_ISERIES
if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending())
@@ -770,8 +881,19 @@ void __init time_init(void)
-xtime.tv_sec, -xtime.tv_nsec);
write_sequnlock_irqrestore(&xtime_lock, flags);

+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+ decrementer_clockevent.mult = div_sc(ppc_tb_freq, NSEC_PER_SEC,
+ decrementer_clockevent.shift);
+ decrementer_clockevent.max_delta_ns =
+ clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
+ decrementer_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xf, &decrementer_clockevent);
+
+ register_decrementer();
+#else
/* Not exact, but the timer interrupt takes care of this */
set_dec(tb_ticks_per_jiffy);
+#endif
}

#define FEBRUARY 2


2007-05-17 17:49:46

by Kumar Gala

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver


On May 17, 2007, at 12:42 PM, Sergei Shtylyov wrote:

> Add PowerPC decrementer clock event driver.
>
> Every effort has been made to support the different implementations
> of the
> decrementer: the classic one (with 970 series variation), 40x and
> Book E
> specific ones.
>
> I had to make CONFIG_GENERIC_CLOCKEVENTS option selectable for the
> compatibility reasons -- this option is not compatible with the PPC64
> deterministic time accounting.
>
> Thanks to Daniel Walker and Thomas Gleixner for the suggestions
> they made...
>
> Signed-off-by: Sergei Shtylyov <[email protected]>
>
> ---
> This patch has been reworked against the 2.6.21 clockevents framework.
> It has only been tested on the Book E 32-bit CPU this time, so re-
> testing on
> "classic" PowerPC CPUs is needed (used to work as of 2.6.18-rt7)...
>
> CONFIG_PPC_MULTIPLATFORM was the best option I was able to come up
> with
> to cover machines built on 970 series CPUs...
>
> arch/powerpc/Kconfig | 12 +++-
> arch/powerpc/kernel/time.c | 124 +++++++++++++++++++++++++++++++++
> +++++++++++-
> 2 files changed, 134 insertions(+), 2 deletions(-)
>
> Index: linux-2.6/arch/powerpc/Kconfig
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/Kconfig
> +++ linux-2.6/arch/powerpc/Kconfig
> @@ -317,7 +317,7 @@ config PPC_STD_MMU_32
>
> config VIRT_CPU_ACCOUNTING
> bool "Deterministic task and CPU time accounting"
> - depends on PPC64
> + depends on PPC64 && !GENERIC_CLOCKEVENTS
> default y
> help
> Select this option to enable more accurate task and CPU time
> @@ -760,6 +760,16 @@ config HIGHMEM
> depends on PPC32
>
> source kernel/Kconfig.hz
> +
> +config GENERIC_CLOCKEVENTS
> + bool "Clock event devices support"
> + default n
> + help
> + Enable support for the clock event devices necessary for the
> + high-resolution timers and the tickless system support.
> + NOTE: This is not compatible with the deterministic time
> accounting
> + option on PPC64.
> +
> source kernel/Kconfig.preempt
>
> config RWSEM_GENERIC_SPINLOCK
> Index: linux-2.6/arch/powerpc/kernel/time.c
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/kernel/time.c
> +++ linux-2.6/arch/powerpc/kernel/time.c
> @@ -52,6 +52,7 @@
> #include <linux/jiffies.h>
> #include <linux/posix-timers.h>
> #include <linux/irq.h>
> +#include <linux/clockchips.h>
>
> #include <asm/io.h>
> #include <asm/processor.h>
> @@ -128,6 +129,83 @@ unsigned long ppc_tb_freq;
> static u64 tb_last_jiffy __cacheline_aligned_in_smp;
> static DEFINE_PER_CPU(u64, last_jiffy);
>
> +#ifdef CONFIG_GENERIC_CLOCKEVENTS
> +
> +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
> +#define DECREMENTER_MAX 0xffffffff
> +#else
> +#define DECREMENTER_MAX 0x7fffffff /* setting MSB triggers an
> interrupt */
> +#endif
> +
> +static int decrementer_set_next_event(unsigned long evt,
> + struct clock_event_device *dev)
> +{
> +#if defined(CONFIG_40x)
> + mtspr(SPRN_PIT, evt); /* 40x has a hidden PIT auto-reload
> register */
> +#elif defined(CONFIG_BOOKE)
> + mtspr(SPRN_DECAR, evt); /* Book E has separate auto-reload
> register */
> + set_dec(evt);

what's the point of setting decar if you're just going to set_dec as
well?

> +#else
> + set_dec(evt - 1); /* Classic decrementer interrupts at -1 */
> +#endif
> + return 0;
> +}
> +
> +static void decrementer_set_mode(enum clock_event_mode mode,
> + struct clock_event_device *dev)
> +{
> +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
> + u32 tcr = mfspr(SPRN_TCR);
> +
> + tcr |= TCR_DIE;
> + switch (mode) {
> + case CLOCK_EVT_MODE_PERIODIC:
> + tcr |= TCR_ARE;
> + break;
> + case CLOCK_EVT_MODE_ONESHOT:
> + tcr &= ~TCR_ARE;
> + break;
> + case CLOCK_EVT_MODE_UNUSED:
> + case CLOCK_EVT_MODE_SHUTDOWN:
> + tcr &= ~TCR_DIE;
> + break;
> + }
> + mtspr(SPRN_TCR, tcr);
> +#endif
> + if (mode == CLOCK_EVT_MODE_PERIODIC)
> + decrementer_set_next_event(tb_ticks_per_jiffy, dev);
> +}
> +
> +static struct clock_event_device decrementer_clockevent = {
> + .name = "decrementer",
> +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
> + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
> +#else
> + .features = CLOCK_EVT_FEAT_ONESHOT,
> +#endif
> + .shift = 32,
> + .rating = 200,
> + .irq = -1,
> + .set_next_event = decrementer_set_next_event,
> + .set_mode = decrementer_set_mode,
> +};
> +
> +static DEFINE_PER_CPU(struct clock_event_device, decrementers);
> +
> +static void register_decrementer(void)
> +{
> + int cpu = smp_processor_id();
> + struct clock_event_device *decrementer = &per_cpu(decrementers,
> cpu);
> +
> + memcpy(decrementer, &decrementer_clockevent, sizeof(*decrementer));
> +
> + decrementer->cpumask = cpumask_of_cpu(cpu);
> +
> + clockevents_register_device(decrementer);
> +}
> +
> +#endif /* CONFIG_GENERIC_CLOCKEVENTS */
> +
> #ifdef CONFIG_VIRT_CPU_ACCOUNTING
> /*
> * Factors for converting from cputime_t (timebase ticks) to
> @@ -313,6 +391,9 @@ void snapshot_timebase(void)
> {
> __get_cpu_var(last_jiffy) = get_tb();
> snapshot_purr();
> +#ifdef CONFIG_GENERIC_CLOCKEVENTS
> + register_decrementer();
> +#endif
> }
>
> void __delay(unsigned long loops)
> @@ -470,7 +551,31 @@ void timer_interrupt(struct pt_regs * re
> old_regs = set_irq_regs(regs);
> irq_enter();
>
> +#ifdef CONFIG_GENERIC_CLOCKEVENTS
> +#ifdef CONFIG_PPC_MULTIPLATFORM
> + /*
> + * We must write a positive value to the decrementer to clear
> + * the interrupt on the IBM 970 CPU series. In periodic mode,
> + * this happens when the decrementer gets reloaded later, but
> + * in one-shot mode, we have to do it here since an event handler
> + * may skip loading the new value...
> + */
> + if (per_cpu(decrementers, cpu).mode != CLOCK_EVT_MODE_PERIODIC)
> + set_dec(DECREMENTER_MAX);
> +#endif
> + /*
> + * We can't disable the decrementer, so in the period between
> + * CPU being marked offline and calling stop-self, it's taking
> + * timer interrupts...
> + */
> + if (!cpu_is_offline(cpu)) {
> + struct clock_event_device *dev = &per_cpu(decrementers, cpu);
> +
> + dev->event_handler(dev);
> + }
> +#else
> profile_tick(CPU_PROFILING);
> +#endif
> calculate_steal_time();
>
> #ifdef CONFIG_PPC_ISERIES
> @@ -486,6 +591,7 @@ void timer_interrupt(struct pt_regs * re
> if (__USE_RTC() && per_cpu(last_jiffy, cpu) >= 1000000000)
> per_cpu(last_jiffy, cpu) -= 1000000000;
>
> +#ifndef CONFIG_GENERIC_CLOCKEVENTS
> /*
> * We cannot disable the decrementer, so in the period
> * between this cpu's being marked offline in cpu_online_map
> @@ -495,6 +601,7 @@ void timer_interrupt(struct pt_regs * re
> */
> if (!cpu_is_offline(cpu))
> account_process_time(regs);
> +#endif
>
> /*
> * No need to check whether cpu is offline here; boot_cpuid
> @@ -507,15 +614,19 @@ void timer_interrupt(struct pt_regs * re
> tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy;
> if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) {
> tb_last_jiffy = tb_next_jiffy;
> +#ifndef CONFIG_GENERIC_CLOCKEVENTS
> do_timer(1);
> +#endif
> /*timer_recalc_offset(tb_last_jiffy);*/
> timer_check_rtc();
> }
> write_sequnlock(&xtime_lock);
> }
> -
> +
> +#ifndef CONFIG_GENERIC_CLOCKEVENTS
> next_dec = tb_ticks_per_jiffy - ticks;
> set_dec(next_dec);
> +#endif
>
> #ifdef CONFIG_PPC_ISERIES
> if (firmware_has_feature(FW_FEATURE_ISERIES) &&
> hvlpevent_is_pending())
> @@ -770,8 +881,19 @@ void __init time_init(void)
> -xtime.tv_sec, -xtime.tv_nsec);
> write_sequnlock_irqrestore(&xtime_lock, flags);
>
> +#ifdef CONFIG_GENERIC_CLOCKEVENTS
> + decrementer_clockevent.mult = div_sc(ppc_tb_freq, NSEC_PER_SEC,
> + decrementer_clockevent.shift);
> + decrementer_clockevent.max_delta_ns =
> + clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
> + decrementer_clockevent.min_delta_ns =
> + clockevent_delta2ns(0xf, &decrementer_clockevent);
> +
> + register_decrementer();
> +#else
> /* Not exact, but the timer interrupt takes care of this */
> set_dec(tb_ticks_per_jiffy);
> +#endif
> }
>
> #define FEBRUARY 2
>
> _______________________________________________
> Linuxppc-dev mailing list
> [email protected]
> https://ozlabs.org/mailman/listinfo/linuxppc-dev

2007-05-17 18:06:35

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Hello.

Kumar Gala wrote:

>> Index: linux-2.6/arch/powerpc/kernel/time.c
>> ===================================================================
>> --- linux-2.6.orig/arch/powerpc/kernel/time.c
>> +++ linux-2.6/arch/powerpc/kernel/time.c
[...]
>> @@ -128,6 +129,83 @@ unsigned long ppc_tb_freq;
>> static u64 tb_last_jiffy __cacheline_aligned_in_smp;
>> static DEFINE_PER_CPU(u64, last_jiffy);
>>
>> +#ifdef CONFIG_GENERIC_CLOCKEVENTS
>> +
>> +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
>> +#define DECREMENTER_MAX 0xffffffff
>> +#else
>> +#define DECREMENTER_MAX 0x7fffffff /* setting MSB triggers an
>> interrupt */
>> +#endif
>> +
>> +static int decrementer_set_next_event(unsigned long evt,
>> + struct clock_event_device *dev)
>> +{
>> +#if defined(CONFIG_40x)
>> + mtspr(SPRN_PIT, evt); /* 40x has a hidden PIT auto-reload
>> register */
>> +#elif defined(CONFIG_BOOKE)
>> + mtspr(SPRN_DECAR, evt); /* Book E has separate auto-reload
>> register */
>> + set_dec(evt);

> what's the point of setting decar if you're just going to set_dec as well?

Bothered to read the Book E specs? ;-)
Loading DECAR doesn't auto-reload DEC. What I want is both set the
auto-reload value and start counting down from it at the same time (if the
decrementer is in auto-reload mode).

[big cut-off]

WBR, Sergei

2007-05-17 18:19:15

by Kumar Gala

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver


On May 17, 2007, at 1:07 PM, Sergei Shtylyov wrote:

> Hello.
>
> Kumar Gala wrote:
>
>>> Index: linux-2.6/arch/powerpc/kernel/time.c
>>> ===================================================================
>>> --- linux-2.6.orig/arch/powerpc/kernel/time.c
>>> +++ linux-2.6/arch/powerpc/kernel/time.c
> [...]
>>> @@ -128,6 +129,83 @@ unsigned long ppc_tb_freq;
>>> static u64 tb_last_jiffy __cacheline_aligned_in_smp;
>>> static DEFINE_PER_CPU(u64, last_jiffy);
>>>
>>> +#ifdef CONFIG_GENERIC_CLOCKEVENTS
>>> +
>>> +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
>>> +#define DECREMENTER_MAX 0xffffffff
>>> +#else
>>> +#define DECREMENTER_MAX 0x7fffffff /* setting MSB triggers an
>>> interrupt */
>>> +#endif
>>> +
>>> +static int decrementer_set_next_event(unsigned long evt,
>>> + struct clock_event_device *dev)
>>> +{
>>> +#if defined(CONFIG_40x)
>>> + mtspr(SPRN_PIT, evt); /* 40x has a hidden PIT auto-
>>> reload register */
>>> +#elif defined(CONFIG_BOOKE)
>>> + mtspr(SPRN_DECAR, evt); /* Book E has separate auto-reload
>>> register */
>>> + set_dec(evt);
>
>> what's the point of setting decar if you're just going to set_dec
>> as well?
>
> Bothered to read the Book E specs? ;-)

I have :)

> Loading DECAR doesn't auto-reload DEC. What I want is both set
> the auto-reload value and start counting down from it at the same
> time (if the decrementer is in auto-reload mode).

I see you're using DECAR to make it periodic.

> [big cut-off]
>
> WBR, Sergei

I haven't looked at all the new clock/timer code, is there any
utility in having support for more than one clock source?

- k

2007-05-17 18:25:42

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Hello.

Kumar Gala wrote:

> I haven't looked at all the new clock/timer code, is there any utility
> in having support for more than one clock source?

Of course, you may register as many as you like.

> - k

WBR, Sergei

2007-05-17 18:33:26

by Kumar Gala

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver


On May 17, 2007, at 1:26 PM, Sergei Shtylyov wrote:

> Hello.
>
> Kumar Gala wrote:
>
>> I haven't looked at all the new clock/timer code, is there any
>> utility in having support for more than one clock source?
>
> Of course, you may register as many as you like.

Sure, but is there any utility in registering more than the
decrementer on PPC?

- k

2007-05-17 18:41:21

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Hello.

Kumar Gala wrote:

>> Kumar Gala wrote:

>>> I haven't looked at all the new clock/timer code, is there any
>>> utility in having support for more than one clock source?

>> Of course, you may register as many as you like.

> Sure, but is there any utility in registering more than the decrementer
> on PPC?

Not yet. I'm not sure I know any other PPC CPU facility fitting for
clockevents. In theory, FIT could be used -- but its period is measured in
powers of 2, IIRC.

> - k

WBR, Sergei

2007-05-17 19:27:28

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

> + * We must write a positive value to the decrementer to clear
> + * the interrupt on the IBM 970 CPU series. In periodic mode,
> + * this happens when the decrementer gets reloaded later, but
> + * in one-shot mode, we have to do it here since an event handler
> + * may skip loading the new value...

Nothing special about 970 here -- on *every* PowerPC,
a decrementer exception exists as long as the high
bit of the decrementer equals 1. BookE is different
of course. Some other CPUs might deviate from the
architecture as well.


Segher

2007-05-17 19:42:22

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Segher Boessenkool wrote:
>> + * We must write a positive value to the decrementer to clear
>> + * the interrupt on the IBM 970 CPU series. In periodic mode,
>> + * this happens when the decrementer gets reloaded later, but
>> + * in one-shot mode, we have to do it here since an event handler
>> + * may skip loading the new value...

> Nothing special about 970 here -- on *every* PowerPC,
> a decrementer exception exists as long as the high
> bit of the decrementer equals 1. BookE is different
> of course. Some other CPUs might deviate from the
> architecture as well.

Quoting "PowerPC Operating Environment Architecture":

The Decrementer counts down. On POWER4, a Decrementer exception occurs when
DEC0 changes from 0 to 1. On POWER4+, operation is as follows.

The exception effects of the Decrementer are said to be consistent with the
contents of the Decrementer if one of the following statements is true.

* DEC0=0 and a Decrementer exception does not exist.

* DEC0=1 and a Decrementer exception exists.

If DEC0=0, a context synchronizing instruction or event ensures that the
exception effects of the Decrementer are consistent with the contents of the
Decrementer. Otherwise, when the contents of DEC0 change, the exception
effects of the Decrementer become consistent with the new contents of the
Decrementer reasonably soon after the change.

> Segher

WBR, Sergei

2007-05-17 19:51:17

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

>>> + * We must write a positive value to the decrementer to clear
>>> + * the interrupt on the IBM 970 CPU series. In periodic mode,
>>> + * this happens when the decrementer gets reloaded later, but
>>> + * in one-shot mode, we have to do it here since an event
>>> handler
>>> + * may skip loading the new value...
>
>> Nothing special about 970 here -- on *every* PowerPC,
>> a decrementer exception exists as long as the high
>> bit of the decrementer equals 1. BookE is different
>> of course. Some other CPUs might deviate from the
>> architecture as well.
>
> Quoting "PowerPC Operating Environment Architecture":

<snip>

> Otherwise, when the contents of DEC0 change, the exception effects of
> the Decrementer become consistent with the new contents of the
> Decrementer reasonably soon after the change.

And that is guaranteed on all PowerPC as far as I can see.
The main thing is that a decrementer exception won't go
away until the high bit becomes 0.


Segher

2007-05-17 19:59:16

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Segher Boessenkool wrote:
>>>> + * We must write a positive value to the decrementer to clear
>>>> + * the interrupt on the IBM 970 CPU series. In periodic mode,
>>>> + * this happens when the decrementer gets reloaded later, but
>>>> + * in one-shot mode, we have to do it here since an event handler
>>>> + * may skip loading the new value...
>>
>>
>>> Nothing special about 970 here -- on *every* PowerPC,
>>> a decrementer exception exists as long as the high
>>> bit of the decrementer equals 1. BookE is different
>>> of course. Some other CPUs might deviate from the
>>> architecture as well.
>>
>>
>> Quoting "PowerPC Operating Environment Architecture":

> <snip>

>> Otherwise, when the contents of DEC0 change, the exception effects of
>> the Decrementer become consistent with the new contents of the
>> Decrementer reasonably soon after the change.

> And that is guaranteed on all PowerPC as far as I can see.
> The main thing is that a decrementer exception won't go
> away until the high bit becomes 0.

On both POWER4 and POWER4+, the Decrementer must be implemented such that
requirements 1 to 3 below are satisfied. On POWER4, requirements 4 and 5 must
also be satisfied.

1. The operation of the Time Base and the Decrementer is coherent, i.e., the
counters are driven by the same fundamental time base.
2. Loading a GPR from the Decrementer shall have no effect on the accuracy of
the Decrementer.

3. Storing a GPR to the Decrementer shall replace the value in the Decrementer
with the value in the GPR.

4. Whenever bit 0 of the Decrementer changes from 0 to 1, an interrupt request
is signaled. If multiple Decrementer interrupt requests are received before
the first can be reported, only one interrupt is reported. The occurrence of a
Decrementer interrupt cancels the request.

5. If the Decrementer is altered by software and the contents of bit 0 are
changed from 0 to 1, an interrupt request is signaled.

(4) clearly contradicts your point. I don't mind changing #ifdef though
(so it'll cover all non Book E cases)

> Segher

WBR, Sergei

2007-05-17 20:54:32

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

>>> Otherwise, when the contents of DEC0 change, the exception effects
>>> of the Decrementer become consistent with the new contents of the
>>> Decrementer reasonably soon after the change.
>
>> And that is guaranteed on all PowerPC as far as I can see.
>> The main thing is that a decrementer exception won't go
>> away until the high bit becomes 0.
>
> On both POWER4 and POWER4+, the Decrementer must be implemented such
> that requirements 1 to 3 below are satisfied. On POWER4, requirements
> 4 and 5 must also be satisfied.

<snip>

> 4. Whenever bit 0 of the Decrementer changes from 0 to 1, an interrupt
> request is signaled. If multiple Decrementer interrupt requests are
> received before the first can be reported, only one interrupt is
> reported. The occurrence of a Decrementer interrupt cancels the
> request.
>
> 5. If the Decrementer is altered by software and the contents of bit 0
> are changed from 0 to 1, an interrupt request is signaled.

From the POWER ISA 2.03, the latest public version of the
architecture definition:

When the contents of DEC32 change from 0 to 1, a Decrementer
exception will come into existence within a reasonable period
or time. When the contents of DEC32 change from 1 to 0, an
existing Decrementer exception will cease to exist within a
reasonable period of time, but not later than the completion
of the next context synchronizing instruction or event.

> (4) clearly contradicts your point.

Yes, on some implementations there can be other conditions that
make a decrementer exception go away; there is no contradiction
here (thankfully). My wording was sloppy.

> I don't mind changing #ifdef though (so it'll cover all non Book E
> cases)

That was exactly my point; thank you.


Segher

2007-05-18 03:24:24

by Albert Cahalan

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Sergei Shtylyov writes:
> Kumar Gala wrote:
>> [Sergei Shtylyov]
>>> Kumar Gala wrote:

>>>> I haven't looked at all the new clock/timer code, is there any
>>>> utility in having support for more than one clock source?
>>>
>>> Of course, you may register as many as you like.
>>
>> Sure, but is there any utility in registering more than the
>> decrementer on PPC?
>
> Not yet. I'm not sure I know any other PPC CPU facility fitting
> for clockevents. In theory, FIT could be used -- but its period
> is measured in powers of 2, IIRC.

I'd really like to have that as an option. It would allow oprofile
to safely use hardware events on the MPC74xx "G4" processors.
Alternately it would allow thermal events. It is safe to use at
most one of the three (decrementer,profiling,thermal) interrupts.
If two were to hit at the same time, badness happens.

It's possible to wrapper the interrupt in something that divides
down, calling the normal code only some of the time. I think one
of the FIT choices is about 4 kHz on my system, which would be OK.

Full oprofile functionality would be wonderful.

2007-05-18 05:05:56

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver


>
> Yes, on some implementations there can be other conditions that
> make a decrementer exception go away; there is no contradiction
> here (thankfully). My wording was sloppy.

Some CPUs have the DEC exceptions basically edge triggered (yeah I know
it sucks). That's why, among others, the IRQ soft-disable code has code
to re-trigger DEC exceptions ASAP (by setting it to 1.. note that we
could probably use 0 here, we've been a bit conservative).

Ben.


2007-05-18 05:39:46

by Dave Liu

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

> > Yes, on some implementations there can be other conditions that
> > make a decrementer exception go away; there is no contradiction
> > here (thankfully). My wording was sloppy.
>
> Some CPUs have the DEC exceptions basically edge triggered (yeah I know

for example?

> it sucks). That's why, among others, the IRQ soft-disable code has code
> to re-trigger DEC exceptions ASAP (by setting it to 1.. note that we
> could probably use 0 here, we've been a bit conservative).


2007-05-18 07:09:22

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

On Fri, 2007-05-18 at 13:39 +0800, Dave Liu wrote:
> > > Yes, on some implementations there can be other conditions that
> > > make a decrementer exception go away; there is no contradiction
> > > here (thankfully). My wording was sloppy.
> >
> > Some CPUs have the DEC exceptions basically edge triggered (yeah I know
>
> for example?
>
> > it sucks). That's why, among others, the IRQ soft-disable code has code
> > to re-trigger DEC exceptions ASAP (by setting it to 1.. note that we
> > could probably use 0 here, we've been a bit conservative).

I'm not 100% certain... Paulus thinks all the old 6xx are like that, and
maybe POWER4. If I look at the oldest BookIV I can find (the 601), it
says that an exception is generated when the MSB transitions from 0 to
1. It's not clear wether the exception sticks while that bit is 1 or is
indeed considered as an "edge" event that gets cleared as soon as
delivered.

Ben.


2007-05-18 12:39:19

by Matt Sealey

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Sergei Shtylyov wrote:
> Hello.
>
> Kumar Gala wrote:
>
>>> Kumar Gala wrote:
>
>>>> I haven't looked at all the new clock/timer code, is there any
>>>> utility in having support for more than one clock source?
>
>>> Of course, you may register as many as you like.
>
>> Sure, but is there any utility in registering more than the decrementer
>> on PPC?
>
> Not yet. I'm not sure I know any other PPC CPU facility fitting for
> clockevents. In theory, FIT could be used -- but its period is measured in
> powers of 2, IIRC.

Wouldn't it be possible to use SoC timer functionality, too, and not limit
this to what is only present on the CPU die?

I guess the real question is, how high resolution does a high resolution
timer need to be, and how often do you want to be handling interrupts?

The 52XX has 8 general purpose timers and 2 slice timers. Both will
generate an interrupt once the clock hits a preset value, the timing
of which is determined by prescalers and the IPB clock..

--
Matt Sealey <[email protected]>
Genesi, Manager, Developer Relations

2007-05-18 13:40:12

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Hello.

Benjamin Herrenschmidt wrote:

>>>>Yes, on some implementations there can be other conditions that
>>>>make a decrementer exception go away; there is no contradiction
>>>>here (thankfully). My wording was sloppy.

>>>Some CPUs have the DEC exceptions basically edge triggered (yeah I know

>>for example?

>>>it sucks). That's why, among others, the IRQ soft-disable code has code
>>>to re-trigger DEC exceptions ASAP (by setting it to 1.. note that we
>>>could probably use 0 here, we've been a bit conservative).

Yeah, the classic decrementer is programmed off-by-one.

> I'm not 100% certain... Paulus thinks all the old 6xx are like that, and
> maybe POWER4. If I look at the oldest BookIV I can find (the 601), it

From the "PowerPC Operating Environment Architecture" that I've already
quoated t follows that POWER4-compatible decremented exception *must* be edge
triggered.

> says that an exception is generated when the MSB transitions from 0 to
> 1. It's not clear wether the exception sticks while that bit is 1 or is

Freescale MPC 7450 manual says the same, for example.

> indeed considered as an "edge" event that gets cleared as soon as
> delivered.

> Ben.

WBR, Sergei

2007-05-18 13:44:15

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Hello.

Matt Sealey wrote:

>>>>>I haven't looked at all the new clock/timer code, is there any
>>>>>utility in having support for more than one clock source?

>>>> Of course, you may register as many as you like.

>>>Sure, but is there any utility in registering more than the decrementer
>>>on PPC?

>> Not yet. I'm not sure I know any other PPC CPU facility fitting for
>>clockevents. In theory, FIT could be used -- but its period is measured in
>>powers of 2, IIRC.

> Wouldn't it be possible to use SoC timer functionality, too, and not limit
> this to what is only present on the CPU die?

If you have some SoC timers, you're welcome to add support for it. :-)

> I guess the real question is, how high resolution does a high resolution
> timer need to be,

In the order of microseconds.

> and how often do you want to be handling interrupts?

That depends on a kind of functionality you need: HRT and/or dynamic tick...

> The 52XX has 8 general purpose timers and 2 slice timers. Both will
> generate an interrupt once the clock hits a preset value, the timing
> of which is determined by prescalers and the IPB clock..

WBR, Sergei

2007-05-18 13:47:13

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Hello, I wrote:

>>>>>Yes, on some implementations there can be other conditions that
>>>>>make a decrementer exception go away; there is no contradiction
>>>>>here (thankfully). My wording was sloppy.

>>>>Some CPUs have the DEC exceptions basically edge triggered (yeah I know

>>>for example?

>>>>it sucks). That's why, among others, the IRQ soft-disable code has code
>>>>to re-trigger DEC exceptions ASAP (by setting it to 1.. note that we
>>>>could probably use 0 here, we've been a bit conservative).

> Yeah, the classic decrementer is programmed off-by-one.

>>I'm not 100% certain... Paulus thinks all the old 6xx are like that, and
>>maybe POWER4. If I look at the oldest BookIV I can find (the 601), it

> From the "PowerPC Operating Environment Architecture" that I've already
> quoated t follows that POWER4-compatible decremented exception *must* be edge
> triggered.

... and cleared when delivered.

>>says that an exception is generated when the MSB transitions from 0 to
>>1. It's not clear wether the exception sticks while that bit is 1 or is
>>indeed considered as an "edge" event that gets cleared as soon as
>>delivered.

WBR, Sergei

2007-05-18 14:29:16

by Matt Sealey

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver


Sergei Shtylyov wrote:
> Hello.
>
>>>> Sure, but is there any utility in registering more than the
>>>> decrementer on PPC?
>
>>> Not yet. I'm not sure I know any other PPC CPU facility fitting
>>> for clockevents. In theory, FIT could be used -- but its period is
>>> measured in powers of 2, IIRC.
>
>> Wouldn't it be possible to use SoC timer functionality, too, and not
>> limit this to what is only present on the CPU die?
>
> If you have some SoC timers, you're welcome to add support for it. :-)

Well, that can't be that hard.. I just make a clock_event_device and
clockevents_register_device?

I'm still learning my way through making platform devices (it would be
a shame to write an oldskool module when I can tie it to the device
tree..) but this does look fairly easy.

>> I guess the real question is, how high resolution does a high resolution
>> timer need to be,
>
> In the order of microseconds.

I think both the MPC52xx GPT0-7 and the SLT0-1 fulfil this fairly
easily.

>> and how often do you want to be handling interrupts?
>
> That depends on a kind of functionality you need: HRT and/or dynamic
> tick...

To be honest I never understood how dynamic ticks works, I am just
kind of excited about the functionality it brings :)

High resolution timers, that I can do. It's fairly easy to see the
benefits to presenting the system with multiple timers that can fire
with giddy amounts of precision (slice timer is between 16ns and
508ms at least, this is perfectly good considering comments above).

If I add a clock_event_device for each MPC52xx GPT and the slice timers,
what then? Where do they get used, and how? :)

I'm just curious as to how the system picks up and what the current
users are. It would be a little bit of a non-starter to provide 10
new high resolution timers, and for there be very little use for them,
but I assume because Intel HPET is supported, there must be at least
some PLANNED users for at least 2 or 3 independant, high resolution
timer interrupt sources beyond dynticks and nanosleep, but perhaps it
is too early (hrtimers were only integrated 6 months? ago, dynticks
is still brand new in mainline?) and I am being a bit impatient?

--
Matt Sealey <[email protected]>
Genesi, Manager, Developer Relations

2007-05-18 14:43:24

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

On Fri, 2007-05-18 at 15:28 +0100, Matt Sealey wrote:
> >> I guess the real question is, how high resolution does a high resolution
> >> timer need to be,
> >
> > In the order of microseconds.
>
> I think both the MPC52xx GPT0-7 and the SLT0-1 fulfil this fairly
> easily.

There is some basic work for MPC5200 available:

http://www.pengutronix.de/oselas/bsp/phytec/index_en.html#phyCORE-MPC5200B-tiny

tglx


2007-05-18 14:51:20

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Hello.

Albert Cahalan wrote:

>>>>> I haven't looked at all the new clock/timer code, is there any
>>>>> utility in having support for more than one clock source?

>>>> Of course, you may register as many as you like.

>>> Sure, but is there any utility in registering more than the
>>> decrementer on PPC?

>> Not yet. I'm not sure I know any other PPC CPU facility fitting
>> for clockevents. In theory, FIT could be used -- but its period
>> is measured in powers of 2, IIRC.

> I'd really like to have that as an option. It would allow oprofile
> to safely use hardware events on the MPC74xx "G4" processors.
> Alternately it would allow thermal events. It is safe to use at
> most one of the three (decrementer,profiling,thermal) interrupts.
> If two were to hit at the same time, badness happens.

Unfortunately, FIT exists only on Book E CPUs and MPC74xx aren't Book E, IIUC.

> It's possible to wrapper the interrupt in something that divides
> down, calling the normal code only some of the time. I think one
> of the FIT choices is about 4 kHz on my system, which would be OK.

Erm, are you sure you have FIT (or is your system not MPC74xx based)?

> Full oprofile functionality would be wonderful.

WBR, Sergei

2007-05-18 14:59:26

by Daniel Walker

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

On Thu, 2007-05-17 at 13:17 -0500, Kumar Gala wrote:

>
> I haven't looked at all the new clock/timer code, is there any
> utility in having support for more than one clock source?

There is if the main clocksource has some issues where it can't be used.
On x86 there are lots of different issues with the clocks, so they have
to get switches once in a while. For instance, if the TSC changes
frequencies then it can't be used, or if the TSC stops in different
power states , it can't be used..

Anything like that on PowerPC?

Daniel

2007-05-18 15:04:41

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Daniel Walker wrote:

>>I haven't looked at all the new clock/timer code, is there any
>>utility in having support for more than one clock source?

> There is if the main clocksource has some issues where it can't be used.

You mean, having more than one clocksource is *useful* in this case?

> On x86 there are lots of different issues with the clocks, so they have
> to get switches once in a while. For instance, if the TSC changes
> frequencies then it can't be used, or if the TSC stops in different
> power states , it can't be used..

Yeah, and there's the rating systems for both clock sources/events.

> Anything like that on PowerPC?

Well, the decrementer frequency may change, at least in theory (if the bus
clock changes).

> Daniel

WBR, Sergei

2007-05-18 15:37:32

by Matt Sealey

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

I already have that stuff, but it only implements the decrementer (in fact
it's the patch submitted at the beginning of this thread).

I got it because I was far more interested in the GPIO handling..

--
Matt Sealey <[email protected]>
Genesi, Manager, Developer Relations

Thomas Gleixner wrote:
> On Fri, 2007-05-18 at 15:28 +0100, Matt Sealey wrote:
>>>> I guess the real question is, how high resolution does a high resolution
>>>> timer need to be,
>>> In the order of microseconds.
>> I think both the MPC52xx GPT0-7 and the SLT0-1 fulfil this fairly
>> easily.
>
> There is some basic work for MPC5200 available:
>
> http://www.pengutronix.de/oselas/bsp/phytec/index_en.html#phyCORE-MPC5200B-tiny
>
> tglx
>
>

2007-05-18 15:42:04

by Daniel Walker

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

On Fri, 2007-05-18 at 19:06 +0400, Sergei Shtylyov wrote:
> Daniel Walker wrote:
>
> >>I haven't looked at all the new clock/timer code, is there any
> >>utility in having support for more than one clock source?
>
> > There is if the main clocksource has some issues where it can't be used.
>
> You mean, having more than one clocksource is *useful* in this case?

Yes.

> Well, the decrementer frequency may change, at least in theory (if the bus
> clock changes).

Does that happen very often?

Daniel

2007-05-18 15:47:16

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Hello.

Daniel Walker wrote:

>> Well, the decrementer frequency may change, at least in theory (if the bus
>>clock changes).

> Does that happen very often?

Never, I hope. :-)

> Daniel

WBR, Sergei

2007-05-18 16:31:28

by Kumar Gala

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver


On May 18, 2007, at 9:48 AM, Thomas Gleixner wrote:

> On Fri, 2007-05-18 at 15:28 +0100, Matt Sealey wrote:
>>>> I guess the real question is, how high resolution does a high
>>>> resolution
>>>> timer need to be,
>>>
>>> In the order of microseconds.
>>
>> I think both the MPC52xx GPT0-7 and the SLT0-1 fulfil this fairly
>> easily.
>
> There is some basic work for MPC5200 available:
>
> http://www.pengutronix.de/oselas/bsp/phytec/index_en.html#phyCORE-
> MPC5200B-tiny

I asked this earlier, but figured you might have a better insight.
Is their value in having 'drivers' for more than one clock source?
I'd say most (of not all) the PPC SoCs have timers on the system side
that we could provide drivers for, I'm just not sure if that does
anything for anyone.

- k

2007-05-18 16:45:49

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

On Fri, 2007-05-18 at 11:31 -0500, Kumar Gala wrote:
> I asked this earlier, but figured you might have a better insight.
> Is their value in having 'drivers' for more than one clock source?
> I'd say most (of not all) the PPC SoCs have timers on the system side
> that we could provide drivers for, I'm just not sure if that does
> anything for anyone.

Not necessarily for the tick/highres stuff, but clock events allows
other users as well to utilize such facilities. We have no users yet,
but there are drivers, which utilize special timer hardware with nice
#ifdeffery to allow the driver to be shared. This might be a useful
thing for such stuff.

tglx


2007-05-18 16:52:52

by Matt Sealey

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Kumar Gala wrote:
>
> On May 18, 2007, at 9:48 AM, Thomas Gleixner wrote:
>
>> On Fri, 2007-05-18 at 15:28 +0100, Matt Sealey wrote:
>>>
>>> I think both the MPC52xx GPT0-7 and the SLT0-1 fulfil this fairly
>>> easily.
>>
>> There is some basic work for MPC5200 available:
>>
>> http://www.pengutronix.de/oselas/bsp/phytec/index_en.html#phyCORE-MPC5200B-tiny
>>
>
> I asked this earlier, but figured you might have a better insight. Is
> their value in having 'drivers' for more than one clock source? I'd say
> most (of not all) the PPC SoCs have timers on the system side that we
> could provide drivers for, I'm just not sure if that does anything for
> anyone.

As I asked after, I'm also very intrigued as to what is going to end
up using these timers, but likewise, not much use writing a driver if
everyone can use the extremely high resolution decrementer all at
once..

As I said before too, at least Intel has decided there is a great need
for up to 256 high resolution timer sources on a system, but since this
is a fairly new concept to Linux (and hrtimers and dynticks too) it
only seems to be used in the case of i8254/RTC emulation, mostly on
x86-64.

I'm looking at it now and finding "users" of hrtimers is looking very
thin on the ground. Maybe it's justified on the basis that more is
better, and having support is preferable to not having it (even if
nobody really uses it) but it seems the entire gamut of timing
possibility in Linux can be handled through a simple, and single,
high resolution timer and a queue of events..

So do we need some more? :D

--
Matt Sealey <[email protected]>
Genesi, Manager, Developer Relations

2007-05-18 16:54:53

by Matt Sealey

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver


Thomas Gleixner wrote:
> On Fri, 2007-05-18 at 11:31 -0500, Kumar Gala wrote:
>> I asked this earlier, but figured you might have a better insight.
>> Is their value in having 'drivers' for more than one clock source?
>> I'd say most (of not all) the PPC SoCs have timers on the system side
>> that we could provide drivers for, I'm just not sure if that does
>> anything for anyone.
>
> Not necessarily for the tick/highres stuff, but clock events allows
> other users as well to utilize such facilities. We have no users yet,
> but there are drivers, which utilize special timer hardware with nice
> #ifdeffery to allow the driver to be shared. This might be a useful
> thing for such stuff.

*ahem*

Please indulge my laziness and recommend me one or two to look at? I'm
no good at guessing what to grep for to find an example (I wonder if
we have any candidates in the ppc tree mostly..)

--
Matt Sealey <[email protected]>
Genesi, Manager, Developer Relations

2007-05-18 20:27:44

by David Miller

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

From: Kumar Gala <[email protected]>
Date: Fri, 18 May 2007 11:31:19 -0500

> I asked this earlier, but figured you might have a better insight.
> Is their value in having 'drivers' for more than one clock source?
> I'd say most (of not all) the PPC SoCs have timers on the system side
> that we could provide drivers for, I'm just not sure if that does
> anything for anyone.

I'm in a similar situation on sparc64, so I'm interested in
this as well :-)

2007-05-18 23:28:55

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

On Fri, 2007-05-18 at 17:41 +0400, Sergei Shtylyov wrote:
> From the "PowerPC Operating Environment Architecture" that I've
> already
> quoated t follows that POWER4-compatible decremented exception *must*
> be edge
> triggered.
>
> > says that an exception is generated when the MSB transitions from 0
> to
> > 1. It's not clear wether the exception sticks while that bit is 1 or
> is
>
> Freescale MPC 7450 manual says the same, for example.

I find it extremely silly to implement it as edge anyway. The EE line is
level triggered, and having a mix of edge and level on the same
exception without a clean way to retrigger the DEC one other than
waiting one tick is just causing trouble.

Ben.


2007-05-18 23:50:13

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

> I find it extremely silly to implement it as edge anyway. The EE line
> is
> level triggered, and having a mix of edge and level on the same
> exception without a clean way to retrigger the DEC one other than
> waiting one tick is just causing trouble.

It isn't edge triggered, it just automatically clears the
decrementer exception condition at its source when taken
(on certain implementations -- others leave it to software
to clear it).


Segher

2007-05-19 00:14:21

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

On Sat, 2007-05-19 at 01:49 +0200, Segher Boessenkool wrote:
> > I find it extremely silly to implement it as edge anyway. The EE line
> > is
> > level triggered, and having a mix of edge and level on the same
> > exception without a clean way to retrigger the DEC one other than
> > waiting one tick is just causing trouble.
>
> It isn't edge triggered, it just automatically clears the
> decrementer exception condition at its source when taken

Which is exactly the same thing software-wise as edge triggered...

Ben.


2007-05-19 01:45:50

by Albert Cahalan

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

On 5/18/07, Sergei Shtylyov <[email protected]> wrote:
> Albert Cahalan wrote:

> >>> Sure, but is there any utility in registering more than the
> >>> decrementer on PPC?
>
> >> Not yet. I'm not sure I know any other PPC CPU facility fitting
> >> for clockevents. In theory, FIT could be used -- but its period
> >> is measured in powers of 2, IIRC.
>
> > I'd really like to have that as an option. It would allow oprofile
> > to safely use hardware events on the MPC74xx "G4" processors.
> > Alternately it would allow thermal events. It is safe to use at
> > most one of the three (decrementer,profiling,thermal) interrupts.
> > If two were to hit at the same time, badness happens.
>
> Unfortunately, FIT exists only on Book E CPUs and MPC74xx aren't Book E, IIUC.

By the name "FIT" perhaps, but MPC74xx has essentially
the same thing.

> > It's possible to wrapper the interrupt in something that divides
> > down, calling the normal code only some of the time. I think one
> > of the FIT choices is about 4 kHz on my system, which would be OK.
>
> Erm, are you sure you have FIT (or is your system not MPC74xx based)?

Set MMCR0[TBEE], set MMCR0[PMXE], and choose a TBL bit via MMCR0[TBSEL].
TBSEL is a 2-bit field which selects a timebase bit to use. The timebase
bits that can be chosen are numbered 15, 19, 23, and 31. In the notation
used by every other CPU vendor those would be bits 0, 8, 12, and 16.

Example: My system uses a TBL frequency of 24907667. This gives choices
of 12453833, 48648, 3040, and 190 Hz. The lowest three of those could
be useful, with 48648 only for profiling and extreme real-time.

It's also possible to trigger on the CPU cycle counter, but this would
cost one of the performance counters. MPC7400 has 4, later CPUs have 6
or more, and I think xPC7x0 had only 2. This method is a bit nicer,
since then one could trigger interrupts on arbitrary clock cycles
without needing to write the timebase register.

2007-05-19 03:34:43

by Paul Mackerras

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Daniel Walker writes:

> On Fri, 2007-05-18 at 19:06 +0400, Sergei Shtylyov wrote:
> > Well, the decrementer frequency may change, at least in theory (if the bus
> > clock changes).
>
> Does that happen very often?

If it did, gettimeofday would start reporting seriously wrong values,
since the timebase and the decrementer count at the same rate, and we
don't have any provision for fixing things up if that rate changed.
Fortunately there are no powerpc platforms where the rate is
variable.

Paul.

2007-05-19 03:34:56

by Paul Mackerras

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Sergei Shtylyov writes:

> Yeah, the classic decrementer is programmed off-by-one.

Actually it's programmed off by slightly less than one half on
average, but it doesn't matter since the error doesn't accumulate.

Paul.

2007-05-19 12:28:24

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

>> Unfortunately, FIT exists only on Book E CPUs and MPC74xx aren't Book
>> E, IIUC.
>
> By the name "FIT" perhaps, but MPC74xx has essentially
> the same thing.

> Set MMCR0[TBEE], set MMCR0[PMXE], and choose a TBL bit via
> MMCR0[TBSEL].

That's the performance monitor, which could very well be
in use already (for performance monitoring stuff, who
would have guessed).

> It's also possible to trigger on the CPU cycle counter, but this would
> cost one of the performance counters. MPC7400 has 4, later CPUs have 6
> or more, and I think xPC7x0 had only 2.

7xx has at least four as well.


Segher

2007-05-19 18:22:23

by Albert Cahalan

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

On 5/19/07, Segher Boessenkool <[email protected]> wrote:
> [Albert Cahalan]

> > Set MMCR0[TBEE], set MMCR0[PMXE], and choose a TBL bit via
> > MMCR0[TBSEL].
>
> That's the performance monitor, which could very well be
> in use already (for performance monitoring stuff, who
> would have guessed).

It is the performance monitor, which sadly can not be used
very well unless the decrementer is disabled. The hardware
is buggy. As long as we use the decrementer for timekeeping,
we can not safely generate performance monitor interrupts.

I'd like to have the performance monitor available. It's NOT
available unless we use part of it for timekeeping. That's the
choice the hardware gives us.

We can get TBL bit flip interrupts for free. We don't even need
to give up one of the event counters. If we do give up one of the
event counters (a rather reasonable idea), then we can count
one of those TBL bit flips or the cycle counter.

2007-05-20 02:45:44

by Daniel Walker

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

On Sat, 2007-05-19 at 13:33 +1000, Paul Mackerras wrote:
> Daniel Walker writes:
>
> > On Fri, 2007-05-18 at 19:06 +0400, Sergei Shtylyov wrote:
> > > Well, the decrementer frequency may change, at least in theory (if the bus
> > > clock changes).
> >
> > Does that happen very often?
>
> If it did, gettimeofday would start reporting seriously wrong values,
> since the timebase and the decrementer count at the same rate, and we
> don't have any provision for fixing things up if that rate changed.
> Fortunately there are no powerpc platforms where the rate is
> variable.

In terms of clocksources, gettimeofday() would have to switch to another
clocksource if the decrementer started to act that way .. That's why it
is possible to register more than one clocksource, to allow for the
switching. The decrementer frequency doesn't change even with cpufreq?

Daniel

2007-05-20 03:04:31

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

On Sat, 2007-05-19 at 19:43 -0700, Daniel Walker wrote:

> In terms of clocksources, gettimeofday() would have to switch to another
> clocksource if the decrementer started to act that way .. That's why it
> is possible to register more than one clocksource, to allow for the
> switching. The decrementer frequency doesn't change even with cpufreq?

It's more than just gettimeofday. The linux ppc kernel port has strong
assumptions all over the place that the timbase and decrementer (which
always tick at the same rate) have a constant frequency. It might be
possible to "fix" those assumptions but right now, that is the case.

For example, nowadays, udelay() also uses the timebase. Not only
gettimeofday() & friends. The scheduler ticking too. The precise process
accounting as well, etc...

In fact, while it's never worded explicitely in the spec, it's always
been strongly in the "spirit" of the architecture that the timebase and
decrementer have a constant frequency. This is why processors like the
970 allow for an external sourcing for when they are used in setups
where the various clocks are slewed for power management.

Ben.


2007-05-20 15:03:19

by Matt Sealey

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver


Benjamin Herrenschmidt wrote:
> On Sat, 2007-05-19 at 19:43 -0700, Daniel Walker wrote:
>
>> In terms of clocksources, gettimeofday() would have to switch to another
>> clocksource if the decrementer started to act that way .. That's why it
>> is possible to register more than one clocksource, to allow for the
>> switching. The decrementer frequency doesn't change even with cpufreq?
>
> It's more than just gettimeofday. The linux ppc kernel port has strong
> assumptions all over the place that the timbase and decrementer (which
> always tick at the same rate) have a constant frequency. It might be
> possible to "fix" those assumptions but right now, that is the case.
>
> For example, nowadays, udelay() also uses the timebase. Not only
> gettimeofday() & friends. The scheduler ticking too. The precise process
> accounting as well, etc...

So.. if we get enough clocksources into the tree, can any of those
parts of the code be reworked to use clocksources/clockevents and
hrtimers quickly and easily? I noticed the patch just posted does
some of it.. but not as much as Ben just mentioned.

Or is it a development nightmare?

I'm fairly sure on a PPC970 box even though the decrementer is
monotonic and never changes frequency, one day it just might, and
it would be better to anticipate this (and allow people to
distribute their timing requirements across an entire system
and not just the CPU core anyway, which I think is probably a
good thing from a system integration and possibly the point of
view of redundancy..)

--
Matt Sealey <[email protected]>
Genesi, Manager, Developer Relations

2007-05-20 16:02:58

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

> In fact, while it's never worded explicitely in the spec, it's always
> been strongly in the "spirit" of the architecture that the timebase and
> decrementer have a constant frequency.

The architecture mentions varying time base frequencies,
and how to deal with this, actually. It makes no
recommendations one way or the other.

Fixed frequencies are easier for almost everything of
course :-)

> This is why processors like the
> 970 allow for an external sourcing for when they are used in setups
> where the various clocks are slewed for power management.

Clock spreading on the core clock is the bigger problem,
lack of accuracy on the order of 1% is unacceptable for
certain applications.


Segher

2007-05-20 21:26:55

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

> So.. if we get enough clocksources into the tree, can any of those
> parts of the code be reworked to use clocksources/clockevents and
> hrtimers quickly and easily? I noticed the patch just posted does
> some of it.. but not as much as Ben just mentioned.

Well, some of these are expected to be small & fast and work in all sort
of crazy circumstances, like udelay etc... I'd rather keep that on top
of the TB. Do we have actual examples where the TB freq is changing ?
Beside, on powerpc, we don't have another clock source that is as fast
to access and we have userland using the TB for gettimeofday via the
vdso, so I'd say bad idea ... Just keep the damn thing fixed frequency.

> Or is it a development nightmare?
>
> I'm fairly sure on a PPC970 box even though the decrementer is
> monotonic and never changes frequency, one day it just might, and
> it would be better to anticipate this (and allow people to
> distribute their timing requirements across an entire system
> and not just the CPU core anyway, which I think is probably a
> good thing from a system integration and possibly the point of
> view of redundancy..)

On a -sane- 970 box (which seems to be the case of all of them that
matter so far), the TB is sourced externally specifically for that
reason : to avoid it changing, The DEC is always derived the TB, so it's
not changing.

I don't have any plan to support somebody coming up with a HW design
broken enough to have a variable TB/DEC speed. If they do it, they
support it and they come up with patches that are acceptable (hint: that
will be hard !). Beside, it means the vDSO will not be useable for
gettimeofday on such a platform, which means it will have to fallback to
the syscall which is much slower.

Ben.


2007-05-20 21:27:57

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

On Sun, 2007-05-20 at 18:02 +0200, Segher Boessenkool wrote:
> > In fact, while it's never worded explicitely in the spec, it's always
> > been strongly in the "spirit" of the architecture that the timebase and
> > decrementer have a constant frequency.
>
> The architecture mentions varying time base frequencies,
> and how to deal with this, actually. It makes no
> recommendations one way or the other.

I might be mixing up with PAPR... anyway, it's very stupid to vary it
imho. And as I just said, I have about 0 plan to imlement support for
such a contraption.

> Fixed frequencies are easier for almost everything of
> course :-)

Yup.

> > This is why processors like the
> > 970 allow for an external sourcing for when they are used in setups
> > where the various clocks are slewed for power management.
>
> Clock spreading on the core clock is the bigger problem,
> lack of accuracy on the order of 1% is unacceptable for
> certain applications.

That too.

Ben.


2007-05-24 18:24:16

by Sergei Shtylyov

[permalink] [raw]
Subject: [PATCH 2.6.21-rt7] PowerPC: fix clockevents for classic CPUs

Uncoditionally set a maximum positive value to the decrementer before calling
an event handler for all "classic" PPC CPUs (although this is only necessary
to clear interrupt on POWER4+, I've been asked to do it this way) -- otherwise
it wouldn't have been done for an offline CPU in periodic mode since the event
reprogramming has been delegated to the timer subsystem.
Also, as the classic decrementer doesn't have periodic mode, make set_mode()
method for this case completely empty.
While at it, add a switch case for CLOCK_EVT_MODE_RESUME to hush the warning.

Signed-off-by: Sergei Shtylyov <[email protected]>

---
Testing on "classic" CPUs is still needed (used to work atop of 2.6.18-rt7).

arch/powerpc/kernel/time.c | 15 +++++++--------
1 files changed, 7 insertions(+), 8 deletions(-)

Index: linux-2.6/arch/powerpc/kernel/time.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/time.c
+++ linux-2.6/arch/powerpc/kernel/time.c
@@ -166,11 +166,14 @@ static void decrementer_set_mode(enum cl
case CLOCK_EVT_MODE_SHUTDOWN:
tcr &= ~TCR_DIE;
break;
+ case CLOCK_EVT_MODE_RESUME:
+ break;
}
mtspr(SPRN_TCR, tcr);
-#endif
+
if (mode == CLOCK_EVT_MODE_PERIODIC)
decrementer_set_next_event(tb_ticks_per_jiffy, dev);
+#endif
}

static struct clock_event_device decrementer_clockevent = {
@@ -549,16 +552,12 @@ void timer_interrupt(struct pt_regs * re
irq_enter();

#ifdef CONFIG_GENERIC_CLOCKEVENTS
-#ifdef CONFIG_PPC_MULTIPLATFORM
+#if !defined(CONFIG_40x) && !defined(CONFIG_BOOKE)
/*
* We must write a positive value to the decrementer to clear
- * the interrupt on the IBM 970 CPU series. In periodic mode,
- * this happens when the decrementer gets reloaded later, but
- * in one-shot mode, we have to do it here since an event handler
- * may skip loading the new value...
+ * the interrupt on POWER4+ compatible CPUs.
*/
- if (per_cpu(decrementers, cpu).mode != CLOCK_EVT_MODE_PERIODIC)
- set_dec(DECREMENTER_MAX);
+ set_dec(DECREMENTER_MAX);
#endif
/*
* We can't disable the decrementer, so in the period between

2007-05-24 19:10:29

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt7] PowerPC: fix clockevents for classic CPUs

> * We must write a positive value to the decrementer to clear
> - * the interrupt on the IBM 970 CPU series. In periodic mode,
> - * this happens when the decrementer gets reloaded later, but
> - * in one-shot mode, we have to do it here since an event handler
> - * may skip loading the new value...
> + * the interrupt on POWER4+ compatible CPUs.

No, it is *not* only on POWER4+ compatible CPUs. Sigh.


Segher

2007-10-18 14:45:45

by Sergei Shtylyov

[permalink] [raw]
Subject: Re: [PATCH 2.6.21-rt2] PowerPC: decrementer clockevent driver

Sergei Shtylyov wrote:
> Add PowerPC decrementer clock event driver.

> Every effort has been made to support the different implementations of the
> decrementer: the classic one (with 970 series variation), 40x and Book E
> specific ones.

> I had to make CONFIG_GENERIC_CLOCKEVENTS option selectable for the
> compatibility reasons -- this option is not compatible with the PPC64
> deterministic time accounting.

> Thanks to Daniel Walker and Thomas Gleixner for the suggestions they made...

> Signed-off-by: Sergei Shtylyov <[email protected]>

> Index: linux-2.6/arch/powerpc/Kconfig
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/Kconfig
> +++ linux-2.6/arch/powerpc/Kconfig
> @@ -317,7 +317,7 @@ config PPC_STD_MMU_32
>
> config VIRT_CPU_ACCOUNTING
> bool "Deterministic task and CPU time accounting"
> - depends on PPC64
> + depends on PPC64 && !GENERIC_CLOCKEVENTS
> default y
> help
> Select this option to enable more accurate task and CPU time

Thomas, that part of the ppc-clockevents.patch (part of the -rt patch) is
now broken, it makes CONFIG_MAX_ZONEORDER dependent on
!CONFIG_GENERIC_CLOCKEVENTS -- this is probably due to patch's fuzzy logic.
Do I need to send a patch to fix this?

WBR, Sergei