The following patches provide a driver for synchronized RTC clocksource and
clockevents for SGI systems.
With these patches, a module can be installed that registers the system-wide
synchronized RTC clocksource and timers as both a clocksource and clockevents
device running in high resolution mode.
[PATCH 1/2 v2] SGI RTC: add clocksource driver
[PATCH 2/2 v2] SGI RTC: add RTC system interrupt
This patch provides a driver for SGI RTC clocks and timers.
This provides a high resolution clock and timer source using the SGI
system-wide synchronized RTC clock/timer hardware.
Signed-off-by: Dimitri Sivanich <[email protected]>
---
drivers/clocksource/Makefile | 1
drivers/clocksource/rtc_uv.c | 399 +++++++++++++++++++++++++++++++++++++++
drivers/misc/Kconfig | 9
kernel/time/clockevents.c | 1
kernel/time/clocksource.c | 1
5 files changed, 411 insertions(+)
Index: linux/drivers/clocksource/Makefile
===================================================================
--- linux.orig/drivers/clocksource/Makefile 2008-10-23 08:29:40.000000000 -0500
+++ linux/drivers/clocksource/Makefile 2008-10-23 08:31:28.000000000 -0500
@@ -2,3 +2,4 @@ obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_cl
obj-$(CONFIG_X86_CYCLONE_TIMER) += cyclone.o
obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o
obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o
+obj-$(CONFIG_SGI_RTC_TIMER) += rtc_uv.o
Index: linux/drivers/misc/Kconfig
===================================================================
--- linux.orig/drivers/misc/Kconfig 2008-10-23 08:29:40.000000000 -0500
+++ linux/drivers/misc/Kconfig 2008-10-23 11:06:46.000000000 -0500
@@ -488,4 +488,13 @@ config SGI_GRU_DEBUG
This option enables addition debugging code for the SGI GRU driver. If
you are unsure, say N.
+config SGI_RTC_TIMER
+ tristate "SGI RTC driver"
+ depends on GENERIC_CLOCKEVENTS_BUILD
+ depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
+ default m
+ ---help---
+ This option enables RTC event handling and adds the systemwide RTC
+ as a high resolution clock source for SGI systems.
+
endif # MISC_DEVICES
Index: linux/drivers/clocksource/rtc_uv.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux/drivers/clocksource/rtc_uv.c 2008-10-23 11:02:33.000000000 -0500
@@ -0,0 +1,399 @@
+/*
+ * SGI RTC clock/timer routines.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) Dimitri Sivanich
+ */
+#include <linux/module.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <asm/genapic.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+
+MODULE_LICENSE("GPL");
+
+#define RTC_NAME "sgi_rtc"
+#define cpu_to_pnode(_c_) \
+ uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, (_c_)))
+
+static cycle_t uv_read_rtc(void);
+static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
+static void uv_rtc_timer_setup(enum clock_event_mode,
+ struct clock_event_device *);
+static void uv_rtc_timer_broadcast(cpumask_t);
+
+
+static struct clocksource clocksource_uv = {
+ .name = RTC_NAME,
+ .rating = 400,
+ .read = uv_read_rtc,
+ .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
+ .shift = 0,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static struct clock_event_device clock_event_device_uv = {
+ .name = RTC_NAME,
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+ .shift = 10,
+ .rating = 400,
+ .irq = -1,
+ .set_next_event = uv_rtc_next_event,
+ .set_mode = uv_rtc_timer_setup,
+ .event_handler = NULL,
+ .broadcast = uv_rtc_timer_broadcast
+};
+
+static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
+
+struct uv_rtc_timer_head {
+ spinlock_t lock;
+ int fcpu;
+ int cpus;
+ u64 next_cpu; /* local cpu on this node */
+ u64 expires[];
+};
+
+static DEFINE_PER_CPU(struct uv_rtc_timer_head *, rtc_timer_head);
+
+
+/*
+ * Hardware interface routines
+ */
+
+/* Send IPIs to another node */
+static void
+uv_rtc_send_IPI(int cpu, int vector)
+{
+ unsigned long val, apicid, lapicid;
+ int pnode;
+
+ apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ lapicid = apicid & 0x3f;
+ pnode = uv_apicid_to_pnode(apicid);
+
+ val = (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid <<
+ UVH_IPI_INT_APIC_ID_SHFT) |
+ (vector << UVH_IPI_INT_VECTOR_SHFT);
+ uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+}
+
+/* Check for an RTC interrupt pending */
+static int
+uv_int_pending(int pnode)
+{
+ if (uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
+ UVH_EVENT_OCCURRED0_RTC1_MASK)
+ return 1;
+ else
+ return 0;
+}
+
+static void
+uv_clr_int_pending(int pnode)
+{
+ uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
+ UVH_EVENT_OCCURRED0_RTC1_MASK);
+}
+
+static void
+uv_setup_int(int cpu, u64 expires)
+{
+ u64 val;
+ int pnode = cpu_to_pnode(cpu);
+
+ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
+ UVH_RTC1_INT_CONFIG_M_MASK);
+ uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
+
+ uv_clr_int_pending(pnode);
+
+ val = ((u64)RTC_TIMER_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
+ ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
+ /* Set configuration */
+ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
+ /* Initialize comparator value */
+ uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
+}
+
+
+/*
+ * Per-cpu timer tracking routines
+ */
+
+/* Allocate per-node list of cpu timer expiration times. */
+static void
+uv_rtc_allocate_timers(void)
+{
+ struct uv_rtc_timer_head *head = NULL;
+ int cpu;
+ int max = 0;
+ int pnode = -1;
+ int i = 0;
+
+ /* Determine max possible hyperthreads/pnode for allocation */
+ for_each_present_cpu(cpu) {
+ if (pnode != cpu_to_pnode(cpu)) {
+ i = 0;
+ pnode = cpu_to_pnode(cpu);
+ }
+ if (max < ++i)
+ max = i;
+ }
+
+ pnode = -1;
+ for_each_present_cpu(cpu) {
+ if (pnode != cpu_to_pnode(cpu)) {
+ i = 0;
+ pnode = cpu_to_pnode(cpu);
+ head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
+ (max * sizeof(u64)),
+ GFP_KERNEL, pnode);
+ spin_lock_init(&head->lock);
+ head->fcpu = cpu;
+ head->cpus = 0;
+ head->next_cpu = -1;
+ }
+ head->cpus++;
+ head->expires[i++] = ULLONG_MAX;
+ per_cpu(rtc_timer_head, cpu) = head;
+ }
+}
+
+/* Find and set the next expiring timer. */
+static void
+uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode)
+{
+ u64 exp;
+ u64 lowest = ULLONG_MAX;
+ int cpu = -1;
+ int c;
+
+ head->next_cpu = -1;
+ for (c = 0; c < head->cpus; c++) {
+ exp = head->expires[c];
+ if (exp < lowest) {
+ cpu = c;
+ lowest = exp;
+ }
+ }
+ if (cpu >= 0) {
+ head->next_cpu = cpu;
+ c = head->fcpu + cpu;
+ uv_setup_int(c, lowest);
+ /* If we didn't set it up in time, trigger */
+ if (lowest < uv_read_rtc() && !uv_int_pending(pnode))
+ uv_rtc_send_IPI(c, RTC_TIMER_VECTOR);
+ } else
+ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
+ UVH_RTC1_INT_CONFIG_M_MASK);
+}
+
+/*
+ * Set expiration time for current cpu.
+ *
+ * Returns 1 if we missed the expiration time.
+ */
+static int
+uv_rtc_set_timer(int cpu, u64 expires)
+{
+ int pnode = cpu_to_pnode(cpu);
+ struct uv_rtc_timer_head *head = per_cpu(rtc_timer_head, cpu);
+ int local_cpu = cpu - head->fcpu;
+ u64 *t = &head->expires[local_cpu];
+ unsigned long flags;
+ int next_cpu;
+
+ spin_lock_irqsave(&head->lock, flags);
+
+ next_cpu = head->next_cpu;
+ *t = expires;
+
+ /* Will this one be next to go off? */
+ if (expires < head->expires[next_cpu]) {
+ head->next_cpu = cpu;
+ uv_setup_int(cpu, expires);
+ if (expires <= uv_read_rtc() && !uv_int_pending(pnode)) {
+ *t = ULLONG_MAX;
+ uv_rtc_find_next_timer(head, pnode);
+ spin_unlock_irqrestore(&head->lock, flags);
+ return 1;
+ }
+ }
+
+ spin_unlock_irqrestore(&head->lock, flags);
+ return 0;
+}
+
+/*
+ * Unset expiration time for current cpu.
+ *
+ * Returns 1 if this timer was pending.
+ */
+static int
+uv_rtc_unset_timer(int cpu)
+{
+ int pnode = cpu_to_pnode(cpu);
+ struct uv_rtc_timer_head *head = per_cpu(rtc_timer_head, cpu);
+ int local_cpu = cpu - head->fcpu;
+ u64 *t = &head->expires[local_cpu];
+ unsigned long flags;
+ int rc = 0;
+
+ spin_lock_irqsave(&head->lock, flags);
+
+ if (head->next_cpu == cpu && uv_read_rtc() >= *t)
+ rc = 1;
+
+ *t = ULLONG_MAX;
+ /* Was the hardware setup for this timer? */
+ if (head->next_cpu == cpu)
+ uv_rtc_find_next_timer(head, pnode);
+
+ spin_unlock_irqrestore(&head->lock, flags);
+ return rc;
+}
+
+
+
+/*
+ * Kernel interface routines.
+ */
+
+/*
+ * Read the RTC.
+ */
+static cycle_t
+uv_read_rtc(void)
+{
+ return (cycle_t)uv_read_local_mmr(UVH_RTC);
+}
+
+/*
+ * Program the next event, relative to now
+ */
+static int
+uv_rtc_next_event(unsigned long delta, struct clock_event_device *ced)
+{
+ int ced_cpu = first_cpu(ced->cpumask);
+
+ return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc());
+}
+
+/*
+ * Setup the RTC timer in oneshot mode
+ */
+static void
+uv_rtc_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt)
+{
+ unsigned long flags;
+ int ced_cpu = first_cpu(evt->cpumask);
+
+ local_irq_save(flags);
+
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
+ case CLOCK_EVT_MODE_ONESHOT:
+ case CLOCK_EVT_MODE_RESUME:
+ /* Nothing to do here yet */
+ break;
+ case CLOCK_EVT_MODE_UNUSED:
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ uv_rtc_unset_timer(ced_cpu);
+ break;
+ }
+
+ local_irq_restore(flags);
+}
+
+/*
+ * Local APIC timer broadcast function
+ */
+static void
+uv_rtc_timer_broadcast(cpumask_t mask)
+{
+ int cpu;
+ for_each_cpu_mask(cpu, mask)
+ uv_rtc_send_IPI(cpu, RTC_TIMER_VECTOR);
+}
+
+static void
+uv_rtc_interrupt(void)
+{
+ struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
+ unsigned long flags;
+ int cpu = smp_processor_id();
+
+ local_irq_save(flags);
+ if (!ced || !ced->event_handler || !uv_rtc_unset_timer(cpu)) {
+ local_irq_restore(flags);
+ printk(KERN_WARNING
+ "Spurious uv_rtc timer interrupt on cpu %d\n",
+ smp_processor_id());
+ return;
+ }
+ local_irq_restore(flags);
+ ced->event_handler(ced);
+}
+
+static __init void
+uv_rtc_register_clockevents(void *data)
+{
+ struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
+
+ memcpy(ced, &clock_event_device_uv, sizeof(clock_event_device_uv));
+ cpus_clear(ced->cpumask);
+ cpu_set(smp_processor_id(), ced->cpumask);
+ clockevents_register_device(ced);
+}
+
+static __init int
+uv_rtc_setup_clock(void)
+{
+ int rc;
+
+ if (!is_uv_system() || uv_rtc_reg_extension(uv_rtc_interrupt))
+ return -ENODEV;
+
+ clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
+ clocksource_uv.shift);
+
+ rc = clocksource_register(&clocksource_uv);
+ if (rc) {
+ uv_rtc_unreg_extension();
+ return rc;
+ }
+
+ /* Setup and register clockevents */
+ uv_rtc_allocate_timers();
+
+ clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second,
+ NSEC_PER_SEC, clock_event_device_uv.shift);
+
+ clock_event_device_uv.min_delta_ns = NSEC_PER_SEC /
+ sn_rtc_cycles_per_second;
+ clock_event_device_uv.max_delta_ns = clocksource_uv.mask *
+ (NSEC_PER_SEC / sn_rtc_cycles_per_second);
+
+ smp_call_function(uv_rtc_register_clockevents, NULL, 0);
+ uv_rtc_register_clockevents(NULL);
+
+ return 0;
+}
+module_init(uv_rtc_setup_clock);
Index: linux/kernel/time/clockevents.c
===================================================================
--- linux.orig/kernel/time/clockevents.c 2008-10-23 08:29:40.000000000 -0500
+++ linux/kernel/time/clockevents.c 2008-10-23 08:31:28.000000000 -0500
@@ -183,6 +183,7 @@ void clockevents_register_device(struct
spin_unlock(&clockevents_lock);
}
+EXPORT_SYMBOL_GPL(clockevents_register_device);
/*
* Noop handler when we shut down an event device
Index: linux/kernel/time/clocksource.c
===================================================================
--- linux.orig/kernel/time/clocksource.c 2008-10-23 08:29:40.000000000 -0500
+++ linux/kernel/time/clocksource.c 2008-10-23 08:31:28.000000000 -0500
@@ -369,6 +369,7 @@ void clocksource_unregister(struct clock
next_clocksource = select_clocksource();
spin_unlock_irqrestore(&clocksource_lock, flags);
}
+EXPORT_SYMBOL(clocksource_unregister);
#ifdef CONFIG_SYSFS
/**
This patch allocates a system interrupt vector for architecture specific
use in implementing RTC timer interrupts.
Signed-off-by: Dimitri Sivanich <[email protected]>
---
arch/x86/kernel/entry_64.S | 4 +++
arch/x86/kernel/genx2apic_uv_x.c | 41 +++++++++++++++++++++++++++++++++++
arch/x86/kernel/irqinit_64.c | 3 ++
include/asm-x86/genapic_64.h | 2 +
include/asm-x86/hw_irq.h | 1
include/asm-x86/irq_vectors.h | 1
6 files changed, 52 insertions(+)
Index: linux/arch/x86/kernel/entry_64.S
===================================================================
--- linux.orig/arch/x86/kernel/entry_64.S 2008-10-22 16:09:56.000000000 -0500
+++ linux/arch/x86/kernel/entry_64.S 2008-10-22 16:13:30.000000000 -0500
@@ -858,6 +858,10 @@ ENTRY(apic_timer_interrupt)
apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
END(apic_timer_interrupt)
+ENTRY(uv_rtc_timer_intr)
+ apicinterrupt RTC_TIMER_VECTOR,uv_rtc_timer_interrupt
+END(uv_rtc_timer_intr)
+
ENTRY(uv_bau_message_intr1)
apicinterrupt 220,uv_bau_message_interrupt
END(uv_bau_message_intr1)
Index: linux/arch/x86/kernel/irqinit_64.c
===================================================================
--- linux.orig/arch/x86/kernel/irqinit_64.c 2008-10-22 16:09:56.000000000 -0500
+++ linux/arch/x86/kernel/irqinit_64.c 2008-10-22 16:13:30.000000000 -0500
@@ -201,6 +201,9 @@ static void __init apic_intr_init(void)
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+ /* IPI for RTC timers */
+ alloc_intr_gate(RTC_TIMER_VECTOR, uv_rtc_timer_intr);
+
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
Index: linux/include/asm-x86/irq_vectors.h
===================================================================
--- linux.orig/include/asm-x86/irq_vectors.h 2008-10-22 16:10:03.000000000 -0500
+++ linux/include/asm-x86/irq_vectors.h 2008-10-22 16:13:30.000000000 -0500
@@ -85,6 +85,7 @@
* sources per level' errata.
*/
#define LOCAL_TIMER_VECTOR 0xef
+#define RTC_TIMER_VECTOR 0xee
/*
* First APIC vector available to drivers: (vectors 0x30-0xee) we
Index: linux/arch/x86/kernel/genx2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/genx2apic_uv_x.c 2008-10-22 16:09:56.000000000 -0500
+++ linux/arch/x86/kernel/genx2apic_uv_x.c 2008-10-22 16:13:30.000000000 -0500
@@ -22,6 +22,7 @@
#include <asm/ipi.h>
#include <asm/genapic.h>
#include <asm/pgtable.h>
+#include <asm/idle.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/bios.h>
@@ -356,6 +357,46 @@ static __init void uv_rtc_init(void)
sn_rtc_cycles_per_second = ticks_per_sec;
}
+/* Function pointer for RTC interrupt handling */
+static void (*uv_rtc_interrupt_extension)(void);
+
+int
+uv_rtc_reg_extension(void (*fn)(void))
+{
+ if (uv_rtc_interrupt_extension)
+ return 1;
+
+ uv_rtc_interrupt_extension = fn;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(uv_rtc_reg_extension);
+
+void
+uv_rtc_unreg_extension(void)
+{
+ if (uv_rtc_interrupt_extension)
+ uv_rtc_interrupt_extension = NULL;
+}
+EXPORT_SYMBOL_GPL(uv_rtc_unreg_extension);
+
+void uv_rtc_timer_interrupt(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ ack_APIC_irq();
+
+ exit_idle();
+
+ irq_enter();
+
+ if (uv_rtc_interrupt_extension)
+ uv_rtc_interrupt_extension();
+
+ irq_exit();
+
+ set_irq_regs(old_regs);
+}
+
/*
* Called on each cpu to initialize the per_cpu UV data area.
* ZZZ hotplug not supported yet
Index: linux/include/asm-x86/genapic_64.h
===================================================================
--- linux.orig/include/asm-x86/genapic_64.h 2008-10-22 16:10:03.000000000 -0500
+++ linux/include/asm-x86/genapic_64.h 2008-10-22 16:13:30.000000000 -0500
@@ -49,6 +49,8 @@ extern int is_uv_system(void);
extern struct genapic apic_x2apic_uv_x;
DECLARE_PER_CPU(int, x2apic_extra_bits);
+extern int uv_rtc_reg_extension(void (*fn)(void));
+extern void uv_rtc_unreg_extension(void);
extern void uv_cpu_init(void);
extern void uv_system_init(void);
extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
Index: linux/include/asm-x86/hw_irq.h
===================================================================
--- linux.orig/include/asm-x86/hw_irq.h 2008-10-22 16:10:03.000000000 -0500
+++ linux/include/asm-x86/hw_irq.h 2008-10-22 16:13:30.000000000 -0500
@@ -29,6 +29,7 @@
/* Interrupt handlers registered during init_IRQ */
extern void apic_timer_interrupt(void);
+extern void uv_rtc_timer_intr(void);
extern void error_interrupt(void);
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
This patch allocates a system interrupt vector for architecture specific
use in implementing RTC timer interrupts.
Signed-off-by: Dimitri Sivanich <[email protected]>
---
Note that this version (v3) was created simply to apply on top of the changes
brought about by the move from include/asm-x86 to arch/x86/include/asm:
http://lkml.org/lkml/2008/10/23/63
No changes to the other patch in this set were required.
arch/x86/include/asm/genapic_64.h | 2 +
arch/x86/include/asm/hw_irq.h | 1
arch/x86/include/asm/irq_vectors.h | 1
arch/x86/kernel/entry_64.S | 4 +++
arch/x86/kernel/genx2apic_uv_x.c | 41 +++++++++++++++++++++++++++++++++
arch/x86/kernel/irqinit_64.c | 3 ++
6 files changed, 52 insertions(+)
Index: linux/arch/x86/kernel/entry_64.S
===================================================================
--- linux.orig/arch/x86/kernel/entry_64.S 2008-10-23 21:45:08.000000000 -0500
+++ linux/arch/x86/kernel/entry_64.S 2008-10-23 21:48:12.000000000 -0500
@@ -858,6 +858,10 @@ ENTRY(apic_timer_interrupt)
apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
END(apic_timer_interrupt)
+ENTRY(uv_rtc_timer_intr)
+ apicinterrupt RTC_TIMER_VECTOR,uv_rtc_timer_interrupt
+END(uv_rtc_timer_intr)
+
ENTRY(uv_bau_message_intr1)
apicinterrupt 220,uv_bau_message_interrupt
END(uv_bau_message_intr1)
Index: linux/arch/x86/kernel/irqinit_64.c
===================================================================
--- linux.orig/arch/x86/kernel/irqinit_64.c 2008-10-23 21:45:08.000000000 -0500
+++ linux/arch/x86/kernel/irqinit_64.c 2008-10-23 21:48:12.000000000 -0500
@@ -201,6 +201,9 @@ static void __init apic_intr_init(void)
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+ /* IPI for RTC timers */
+ alloc_intr_gate(RTC_TIMER_VECTOR, uv_rtc_timer_intr);
+
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
Index: linux/arch/x86/kernel/genx2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/genx2apic_uv_x.c 2008-10-23 21:46:38.000000000 -0500
+++ linux/arch/x86/kernel/genx2apic_uv_x.c 2008-10-23 21:48:12.000000000 -0500
@@ -22,6 +22,7 @@
#include <asm/ipi.h>
#include <asm/genapic.h>
#include <asm/pgtable.h>
+#include <asm/idle.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/bios.h>
@@ -356,6 +357,46 @@ static __init void uv_rtc_init(void)
sn_rtc_cycles_per_second = ticks_per_sec;
}
+/* Function pointer for RTC interrupt handling */
+static void (*uv_rtc_interrupt_extension)(void);
+
+int
+uv_rtc_reg_extension(void (*fn)(void))
+{
+ if (uv_rtc_interrupt_extension)
+ return 1;
+
+ uv_rtc_interrupt_extension = fn;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(uv_rtc_reg_extension);
+
+void
+uv_rtc_unreg_extension(void)
+{
+ if (uv_rtc_interrupt_extension)
+ uv_rtc_interrupt_extension = NULL;
+}
+EXPORT_SYMBOL_GPL(uv_rtc_unreg_extension);
+
+void uv_rtc_timer_interrupt(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ ack_APIC_irq();
+
+ exit_idle();
+
+ irq_enter();
+
+ if (uv_rtc_interrupt_extension)
+ uv_rtc_interrupt_extension();
+
+ irq_exit();
+
+ set_irq_regs(old_regs);
+}
+
/*
* Called on each cpu to initialize the per_cpu UV data area.
* ZZZ hotplug not supported yet
Index: linux/arch/x86/include/asm/irq_vectors.h
===================================================================
--- linux.orig/arch/x86/include/asm/irq_vectors.h 2008-10-23 21:45:08.000000000 -0500
+++ linux/arch/x86/include/asm/irq_vectors.h 2008-10-23 21:50:14.000000000 -0500
@@ -85,6 +85,7 @@
* sources per level' errata.
*/
#define LOCAL_TIMER_VECTOR 0xef
+#define RTC_TIMER_VECTOR 0xee
/*
* First APIC vector available to drivers: (vectors 0x30-0xee) we
Index: linux/arch/x86/include/asm/hw_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/hw_irq.h 2008-10-23 21:45:08.000000000 -0500
+++ linux/arch/x86/include/asm/hw_irq.h 2008-10-23 21:54:05.000000000 -0500
@@ -29,6 +29,7 @@
/* Interrupt handlers registered during init_IRQ */
extern void apic_timer_interrupt(void);
+extern void uv_rtc_timer_intr(void);
extern void error_interrupt(void);
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
Index: linux/arch/x86/include/asm/genapic_64.h
===================================================================
--- linux.orig/arch/x86/include/asm/genapic_64.h 2008-10-23 21:45:08.000000000 -0500
+++ linux/arch/x86/include/asm/genapic_64.h 2008-10-23 21:54:59.000000000 -0500
@@ -49,6 +49,8 @@ extern int is_uv_system(void);
extern struct genapic apic_x2apic_uv_x;
DECLARE_PER_CPU(int, x2apic_extra_bits);
+extern int uv_rtc_reg_extension(void (*fn)(void));
+extern void uv_rtc_unreg_extension(void);
extern void uv_cpu_init(void);
extern void uv_system_init(void);
extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
* Dimitri Sivanich <[email protected]> wrote:
> +void uv_rtc_timer_interrupt(struct pt_regs *regs)
> +{
> + struct pt_regs *old_regs = set_irq_regs(regs);
> +
> + ack_APIC_irq();
> +
> + exit_idle();
> +
> + irq_enter();
> +
> + if (uv_rtc_interrupt_extension)
> + uv_rtc_interrupt_extension();
> +
> + irq_exit();
> +
> + set_irq_regs(old_regs);
> +}
hm, the exit_idle() looks weird - why is it done? If we get an IRQ
then the CPU will exit idle state anyway.
Ingo
On Mon, Oct 27, 2008 at 03:08:29PM +0100, Ingo Molnar wrote:
>
> * Dimitri Sivanich <[email protected]> wrote:
>
> > +void uv_rtc_timer_interrupt(struct pt_regs *regs)
> > +{
> > + struct pt_regs *old_regs = set_irq_regs(regs);
> > +
> > + ack_APIC_irq();
> > +
> > + exit_idle();
> > +
> > + irq_enter();
> > +
> > + if (uv_rtc_interrupt_extension)
> > + uv_rtc_interrupt_extension();
> > +
> > + irq_exit();
> > +
> > + set_irq_regs(old_regs);
> > +}
>
> hm, the exit_idle() looks weird - why is it done? If we get an IRQ
> then the CPU will exit idle state anyway.
Ingo,
I'm not quite sure what you mean here. Are you referring to an explicit call to exit_idle() that is done somewhere else (such as do_IRQ())?
The following patches provide a driver for synchronized RTC clocksource and
clockevents for SGI systems, as well as a generic timer system interrupt.
With these patches, a module can be installed that registers the system-wide
synchronized RTC clocksource and timers as both a clocksource and clockevents
device running in high resolution mode.
[PATCH 1/2 v3] SGI RTC: add clocksource driver
[PATCH 2/2 v3] SGI RTC: add generic timer system interrupt
This patch provides a driver for SGI RTC clocks and timers.
This provides a high resolution clock and timer source using the SGI
system-wide synchronized RTC clock/timer hardware.
Signed-off-by: Dimitri Sivanich <[email protected]>
---
drivers/clocksource/Makefile | 1
drivers/clocksource/rtc_uv.c | 399 +++++++++++++++++++++++++++++++++++++++
drivers/misc/Kconfig | 9
kernel/time/clockevents.c | 1
kernel/time/clocksource.c | 1
5 files changed, 411 insertions(+)
Index: linux/drivers/clocksource/Makefile
===================================================================
--- linux.orig/drivers/clocksource/Makefile 2008-11-19 15:08:01.000000000 -0600
+++ linux/drivers/clocksource/Makefile 2008-11-19 15:09:54.000000000 -0600
@@ -2,3 +2,4 @@ obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_cl
obj-$(CONFIG_X86_CYCLONE_TIMER) += cyclone.o
obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o
obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o
+obj-$(CONFIG_SGI_RTC_TIMER) += rtc_uv.o
Index: linux/drivers/misc/Kconfig
===================================================================
--- linux.orig/drivers/misc/Kconfig 2008-11-19 15:08:01.000000000 -0600
+++ linux/drivers/misc/Kconfig 2008-11-19 15:09:54.000000000 -0600
@@ -498,6 +498,15 @@ config SGI_GRU_DEBUG
This option enables addition debugging code for the SGI GRU driver. If
you are unsure, say N.
+config SGI_RTC_TIMER
+ tristate "SGI RTC driver"
+ depends on GENERIC_CLOCKEVENTS_BUILD
+ depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
+ default m
+ ---help---
+ This option enables RTC event handling and adds the systemwide RTC
+ as a high resolution clock source for SGI systems.
+
source "drivers/misc/c2port/Kconfig"
endif # MISC_DEVICES
Index: linux/drivers/clocksource/rtc_uv.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux/drivers/clocksource/rtc_uv.c 2008-11-19 15:09:54.000000000 -0600
@@ -0,0 +1,399 @@
+/*
+ * SGI RTC clock/timer routines.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) Dimitri Sivanich
+ */
+#include <linux/module.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <asm/genapic.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+
+MODULE_LICENSE("GPL");
+
+#define RTC_NAME "sgi_rtc"
+#define cpu_to_pnode(_c_) \
+ uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, (_c_)))
+
+static cycle_t uv_read_rtc(void);
+static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
+static void uv_rtc_timer_setup(enum clock_event_mode,
+ struct clock_event_device *);
+static void uv_rtc_timer_broadcast(cpumask_t);
+
+
+static struct clocksource clocksource_uv = {
+ .name = RTC_NAME,
+ .rating = 400,
+ .read = uv_read_rtc,
+ .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
+ .shift = 0,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static struct clock_event_device clock_event_device_uv = {
+ .name = RTC_NAME,
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+ .shift = 10,
+ .rating = 400,
+ .irq = -1,
+ .set_next_event = uv_rtc_next_event,
+ .set_mode = uv_rtc_timer_setup,
+ .event_handler = NULL,
+ .broadcast = uv_rtc_timer_broadcast
+};
+
+static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
+
+struct uv_rtc_timer_head {
+ spinlock_t lock;
+ int fcpu;
+ int cpus;
+ u64 next_cpu; /* local cpu on this node */
+ u64 expires[];
+};
+
+static DEFINE_PER_CPU(struct uv_rtc_timer_head *, rtc_timer_head);
+
+
+/*
+ * Hardware interface routines
+ */
+
+/* Send IPIs to another node */
+static void
+uv_rtc_send_IPI(int cpu, int vector)
+{
+ unsigned long val, apicid, lapicid;
+ int pnode;
+
+ apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ lapicid = apicid & 0x3f;
+ pnode = uv_apicid_to_pnode(apicid);
+
+ val = (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid <<
+ UVH_IPI_INT_APIC_ID_SHFT) |
+ (vector << UVH_IPI_INT_VECTOR_SHFT);
+ uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+}
+
+/* Check for an RTC interrupt pending */
+static int
+uv_intr_pending(int pnode)
+{
+ if (uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
+ UVH_EVENT_OCCURRED0_RTC1_MASK)
+ return 1;
+ else
+ return 0;
+}
+
+static void
+uv_clr_intr_pending(int pnode)
+{
+ uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
+ UVH_EVENT_OCCURRED0_RTC1_MASK);
+}
+
+static void
+uv_setup_intr(int cpu, u64 expires)
+{
+ u64 val;
+ int pnode = cpu_to_pnode(cpu);
+
+ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
+ UVH_RTC1_INT_CONFIG_M_MASK);
+ uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
+
+ uv_clr_intr_pending(pnode);
+
+ val = ((u64)GENERIC_TIMER_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
+ ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
+ /* Set configuration */
+ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
+ /* Initialize comparator value */
+ uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
+}
+
+
+/*
+ * Per-cpu timer tracking routines
+ */
+
+/* Allocate per-node list of cpu timer expiration times. */
+static void
+uv_rtc_allocate_timers(void)
+{
+ struct uv_rtc_timer_head *head = NULL;
+ int cpu;
+ int max = 0;
+ int pnode = -1;
+ int i = 0;
+
+ /* Determine max possible hyperthreads/pnode for allocation */
+ for_each_present_cpu(cpu) {
+ if (pnode != cpu_to_pnode(cpu)) {
+ i = 0;
+ pnode = cpu_to_pnode(cpu);
+ }
+ if (max < ++i)
+ max = i;
+ }
+
+ pnode = -1;
+ for_each_present_cpu(cpu) {
+ if (pnode != cpu_to_pnode(cpu)) {
+ i = 0;
+ pnode = cpu_to_pnode(cpu);
+ head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
+ (max * sizeof(u64)),
+ GFP_KERNEL, pnode);
+ spin_lock_init(&head->lock);
+ head->fcpu = cpu;
+ head->cpus = 0;
+ head->next_cpu = -1;
+ }
+ head->cpus++;
+ head->expires[i++] = ULLONG_MAX;
+ per_cpu(rtc_timer_head, cpu) = head;
+ }
+}
+
+/* Find and set the next expiring timer. */
+static void
+uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode)
+{
+ u64 exp;
+ u64 lowest = ULLONG_MAX;
+ int cpu = -1;
+ int c;
+
+ head->next_cpu = -1;
+ for (c = 0; c < head->cpus; c++) {
+ exp = head->expires[c];
+ if (exp < lowest) {
+ cpu = c;
+ lowest = exp;
+ }
+ }
+ if (cpu >= 0) {
+ head->next_cpu = cpu;
+ c = head->fcpu + cpu;
+ uv_setup_intr(c, lowest);
+ /* If we didn't set it up in time, trigger */
+ if (lowest < uv_read_rtc() && !uv_intr_pending(pnode))
+ uv_rtc_send_IPI(c, GENERIC_TIMER_VECTOR);
+ } else
+ uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
+ UVH_RTC1_INT_CONFIG_M_MASK);
+}
+
+/*
+ * Set expiration time for current cpu.
+ *
+ * Returns 1 if we missed the expiration time.
+ */
+static int
+uv_rtc_set_timer(int cpu, u64 expires)
+{
+ int pnode = cpu_to_pnode(cpu);
+ struct uv_rtc_timer_head *head = per_cpu(rtc_timer_head, cpu);
+ int local_cpu = cpu - head->fcpu;
+ u64 *t = &head->expires[local_cpu];
+ unsigned long flags;
+ int next_cpu;
+
+ spin_lock_irqsave(&head->lock, flags);
+
+ next_cpu = head->next_cpu;
+ *t = expires;
+
+ /* Will this one be next to go off? */
+ if (expires < head->expires[next_cpu]) {
+ head->next_cpu = cpu;
+ uv_setup_intr(cpu, expires);
+ if (expires <= uv_read_rtc() && !uv_intr_pending(pnode)) {
+ *t = ULLONG_MAX;
+ uv_rtc_find_next_timer(head, pnode);
+ spin_unlock_irqrestore(&head->lock, flags);
+ return 1;
+ }
+ }
+
+ spin_unlock_irqrestore(&head->lock, flags);
+ return 0;
+}
+
+/*
+ * Unset expiration time for current cpu.
+ *
+ * Returns 1 if this timer was pending.
+ */
+static int
+uv_rtc_unset_timer(int cpu)
+{
+ int pnode = cpu_to_pnode(cpu);
+ struct uv_rtc_timer_head *head = per_cpu(rtc_timer_head, cpu);
+ int local_cpu = cpu - head->fcpu;
+ u64 *t = &head->expires[local_cpu];
+ unsigned long flags;
+ int rc = 0;
+
+ spin_lock_irqsave(&head->lock, flags);
+
+ if (head->next_cpu == cpu && uv_read_rtc() >= *t)
+ rc = 1;
+
+ *t = ULLONG_MAX;
+ /* Was the hardware setup for this timer? */
+ if (head->next_cpu == cpu)
+ uv_rtc_find_next_timer(head, pnode);
+
+ spin_unlock_irqrestore(&head->lock, flags);
+ return rc;
+}
+
+
+
+/*
+ * Kernel interface routines.
+ */
+
+/*
+ * Read the RTC.
+ */
+static cycle_t
+uv_read_rtc(void)
+{
+ return (cycle_t)uv_read_local_mmr(UVH_RTC);
+}
+
+/*
+ * Program the next event, relative to now
+ */
+static int
+uv_rtc_next_event(unsigned long delta, struct clock_event_device *ced)
+{
+ int ced_cpu = first_cpu(ced->cpumask);
+
+ return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc());
+}
+
+/*
+ * Setup the RTC timer in oneshot mode
+ */
+static void
+uv_rtc_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt)
+{
+ unsigned long flags;
+ int ced_cpu = first_cpu(evt->cpumask);
+
+ local_irq_save(flags);
+
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
+ case CLOCK_EVT_MODE_ONESHOT:
+ case CLOCK_EVT_MODE_RESUME:
+ /* Nothing to do here yet */
+ break;
+ case CLOCK_EVT_MODE_UNUSED:
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ uv_rtc_unset_timer(ced_cpu);
+ break;
+ }
+
+ local_irq_restore(flags);
+}
+
+/*
+ * Local APIC timer broadcast function
+ */
+static void
+uv_rtc_timer_broadcast(cpumask_t mask)
+{
+ int cpu;
+ for_each_cpu_mask(cpu, mask)
+ uv_rtc_send_IPI(cpu, GENERIC_TIMER_VECTOR);
+}
+
+static void
+uv_rtc_interrupt(void)
+{
+ struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
+ unsigned long flags;
+ int cpu = smp_processor_id();
+
+ local_irq_save(flags);
+ if (!ced || !ced->event_handler || !uv_rtc_unset_timer(cpu)) {
+ local_irq_restore(flags);
+ printk(KERN_WARNING
+ "Spurious uv_rtc timer interrupt on cpu %d\n",
+ smp_processor_id());
+ return;
+ }
+ local_irq_restore(flags);
+ ced->event_handler(ced);
+}
+
+static __init void
+uv_rtc_register_clockevents(void *data)
+{
+ struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
+
+ memcpy(ced, &clock_event_device_uv, sizeof(clock_event_device_uv));
+ cpus_clear(ced->cpumask);
+ cpu_set(smp_processor_id(), ced->cpumask);
+ clockevents_register_device(ced);
+}
+
+static __init int
+uv_rtc_setup_clock(void)
+{
+ int rc;
+
+ if (!is_uv_system() || generic_timer_reg_extension(uv_rtc_interrupt))
+ return -ENODEV;
+
+ clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
+ clocksource_uv.shift);
+
+ rc = clocksource_register(&clocksource_uv);
+ if (rc) {
+ generic_timer_unreg_extension();
+ return rc;
+ }
+
+ /* Setup and register clockevents */
+ uv_rtc_allocate_timers();
+
+ clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second,
+ NSEC_PER_SEC, clock_event_device_uv.shift);
+
+ clock_event_device_uv.min_delta_ns = NSEC_PER_SEC /
+ sn_rtc_cycles_per_second;
+ clock_event_device_uv.max_delta_ns = clocksource_uv.mask *
+ (NSEC_PER_SEC / sn_rtc_cycles_per_second);
+
+ smp_call_function(uv_rtc_register_clockevents, NULL, 0);
+ uv_rtc_register_clockevents(NULL);
+
+ return 0;
+}
+module_init(uv_rtc_setup_clock);
Index: linux/kernel/time/clockevents.c
===================================================================
--- linux.orig/kernel/time/clockevents.c 2008-11-19 15:08:01.000000000 -0600
+++ linux/kernel/time/clockevents.c 2008-11-19 15:09:54.000000000 -0600
@@ -183,6 +183,7 @@ void clockevents_register_device(struct
spin_unlock(&clockevents_lock);
}
+EXPORT_SYMBOL_GPL(clockevents_register_device);
/*
* Noop handler when we shut down an event device
Index: linux/kernel/time/clocksource.c
===================================================================
--- linux.orig/kernel/time/clocksource.c 2008-11-19 15:08:01.000000000 -0600
+++ linux/kernel/time/clocksource.c 2008-11-19 15:09:54.000000000 -0600
@@ -369,6 +369,7 @@ void clocksource_unregister(struct clock
next_clocksource = select_clocksource();
spin_unlock_irqrestore(&clocksource_lock, flags);
}
+EXPORT_SYMBOL(clocksource_unregister);
#ifdef CONFIG_SYSFS
/**
This patch allocates a system interrupt vector for platform specific use
in implementing timer interrupts.
Signed-off-by: Dimitri Sivanich <[email protected]>
---
Refreshing this to use 0xed as the vector. Have repackaged it to be more
generic.
arch/x86/include/asm/hw_irq.h | 1
arch/x86/include/asm/irq.h | 2 +
arch/x86/include/asm/irq_vectors.h | 5 +++
arch/x86/kernel/entry_64.S | 4 ++
arch/x86/kernel/irqinit_64.c | 45 +++++++++++++++++++++++++++++++++
5 files changed, 57 insertions(+)
Index: linux/arch/x86/kernel/entry_64.S
===================================================================
--- linux.orig/arch/x86/kernel/entry_64.S 2008-11-19 14:59:15.000000000 -0600
+++ linux/arch/x86/kernel/entry_64.S 2008-11-19 15:02:16.000000000 -0600
@@ -865,6 +865,10 @@ ENTRY(apic_timer_interrupt)
apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
END(apic_timer_interrupt)
+ENTRY(generic_timer_interrupt)
+ apicinterrupt GENERIC_TIMER_VECTOR,smp_generic_timer_interrupt
+END(generic_timer_interrupt)
+
ENTRY(uv_bau_message_intr1)
apicinterrupt 220,uv_bau_message_interrupt
END(uv_bau_message_intr1)
Index: linux/arch/x86/kernel/irqinit_64.c
===================================================================
--- linux.orig/arch/x86/kernel/irqinit_64.c 2008-11-19 14:59:15.000000000 -0600
+++ linux/arch/x86/kernel/irqinit_64.c 2008-11-19 15:09:36.000000000 -0600
@@ -23,6 +23,7 @@
#include <asm/desc.h>
#include <asm/apic.h>
#include <asm/i8259.h>
+#include <asm/idle.h>
/*
* Common place to define all x86 IRQ vectors
@@ -161,6 +162,47 @@ void __init init_ISA_irqs(void)
void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
+
+/* Function pointer for generic timer interrupt handling */
+static void (*generic_timer_interrupt_extension)(void);
+
+int
+generic_timer_reg_extension(void (*fn)(void))
+{
+ if (generic_timer_interrupt_extension)
+ return 1;
+
+ generic_timer_interrupt_extension = fn;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(generic_timer_reg_extension);
+
+void
+generic_timer_unreg_extension(void)
+{
+ if (generic_timer_interrupt_extension)
+ generic_timer_interrupt_extension = NULL;
+}
+EXPORT_SYMBOL_GPL(generic_timer_unreg_extension);
+
+void smp_generic_timer_interrupt(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ ack_APIC_irq();
+
+ exit_idle();
+
+ irq_enter();
+
+ if (generic_timer_interrupt_extension)
+ generic_timer_interrupt_extension();
+
+ irq_exit();
+
+ set_irq_regs(old_regs);
+}
+
static void __init smp_intr_init(void)
{
#ifdef CONFIG_SMP
@@ -202,6 +244,9 @@ static void __init apic_intr_init(void)
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+ /* IPI for platform specific timers */
+ alloc_intr_gate(GENERIC_TIMER_VECTOR, generic_timer_interrupt);
+
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
Index: linux/arch/x86/include/asm/irq_vectors.h
===================================================================
--- linux.orig/arch/x86/include/asm/irq_vectors.h 2008-11-19 14:59:15.000000000 -0600
+++ linux/arch/x86/include/asm/irq_vectors.h 2008-11-19 15:02:16.000000000 -0600
@@ -92,6 +92,11 @@
#define LOCAL_PERFMON_VECTOR 0xee
/*
+ * Generic timer vector for platform specific use
+ */
+#define GENERIC_TIMER_VECTOR 0xed
+
+/*
* First APIC vector available to drivers: (vectors 0x30-0xee) we
* start at 0x31(0x41) to spread out vectors evenly between priority
* levels. (0x80 is the syscall vector)
Index: linux/arch/x86/include/asm/hw_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/hw_irq.h 2008-11-19 14:59:15.000000000 -0600
+++ linux/arch/x86/include/asm/hw_irq.h 2008-11-19 15:02:16.000000000 -0600
@@ -29,6 +29,7 @@
/* Interrupt handlers registered during init_IRQ */
extern void apic_timer_interrupt(void);
+extern void generic_timer_interrupt(void);
extern void error_interrupt(void);
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
Index: linux/arch/x86/include/asm/irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/irq.h 2008-11-19 14:59:15.000000000 -0600
+++ linux/arch/x86/include/asm/irq.h 2008-11-19 15:02:16.000000000 -0600
@@ -38,6 +38,8 @@ extern void fixup_irqs(cpumask_t map);
extern unsigned int do_IRQ(struct pt_regs *regs);
extern void init_IRQ(void);
+extern int generic_timer_reg_extension(void (*fn)(void));
+extern void generic_timer_unreg_extension(void);
extern void native_init_IRQ(void);
/* Interrupt vector management */
* Dimitri Sivanich <[email protected]> wrote:
> The following patches provide a driver for synchronized RTC
> clocksource and clockevents for SGI systems, as well as a generic
> timer system interrupt.
>
> With these patches, a module can be installed that registers the
> system-wide synchronized RTC clocksource and timers as both a
> clocksource and clockevents device running in high resolution mode.
>
> [PATCH 1/2 v3] SGI RTC: add clocksource driver
> [PATCH 2/2 v3] SGI RTC: add generic timer system interrupt
Looks very clean and well-done to me.
I had to take a good look at the rtc_timer_head->expires[] construct -
but i guess that's the best approach, as the max number of entries is
hard to judge at build time. (and we wont get any real limit
protection from gcc anyway)
Thomas, any objections?
Ingo
On Wed, 19 Nov 2008 15:23:50 -0600
Dimitri Sivanich <[email protected]> wrote:
> This patch provides a driver for SGI RTC clocks and timers.
>
> This provides a high resolution clock and timer source using the SGI
> system-wide synchronized RTC clock/timer hardware.
Plese copy John Stultz on clockevents things.
> @@ -0,0 +1,399 @@
> +/*
> + * SGI RTC clock/timer routines.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> + *
> + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
> + * Copyright (c) Dimitri Sivanich
> + */
> +#include <linux/module.h>
> +#include <linux/clocksource.h>
> +#include <linux/clockchips.h>
> +#include <asm/genapic.h>
> +#include <asm/uv/bios.h>
> +#include <asm/uv/uv_mmrs.h>
> +#include <asm/uv/uv_hub.h>
> +
> +MODULE_LICENSE("GPL");
> +
> +#define RTC_NAME "sgi_rtc"
> +#define cpu_to_pnode(_c_) \
> + uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, (_c_)))
It'd be nicer to implement this as a regular C function.
> +static cycle_t uv_read_rtc(void);
> +static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
> +static void uv_rtc_timer_setup(enum clock_event_mode,
> + struct clock_event_device *);
> +static void uv_rtc_timer_broadcast(cpumask_t);
> +
> +
> +static struct clocksource clocksource_uv = {
> + .name = RTC_NAME,
> + .rating = 400,
> + .read = uv_read_rtc,
> + .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
> + .shift = 0,
> + .flags = CLOCK_SOURCE_IS_CONTINUOUS,
> +};
> +
> +static struct clock_event_device clock_event_device_uv = {
> + .name = RTC_NAME,
> + .features = CLOCK_EVT_FEAT_ONESHOT,
> + .shift = 10,
> + .rating = 400,
> + .irq = -1,
> + .set_next_event = uv_rtc_next_event,
> + .set_mode = uv_rtc_timer_setup,
> + .event_handler = NULL,
> + .broadcast = uv_rtc_timer_broadcast
> +};
> +
> +static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
> +
> +struct uv_rtc_timer_head {
> + spinlock_t lock;
> + int fcpu;
> + int cpus;
> + u64 next_cpu; /* local cpu on this node */
> + u64 expires[];
> +};
> +
> +static DEFINE_PER_CPU(struct uv_rtc_timer_head *, rtc_timer_head);
It would be useful to document the data structures and the relationship
between them. When I read this code I wonder why we didn't just do
static DEFINE_PER_CPU(struct uv_rtc_timer_head, rtc_timer_head);
but then I see that zero-sized array and wonder what that is for, and
what it will contain, etc, etc.
> +
> +/*
> + * Hardware interface routines
> + */
> +
> +/* Send IPIs to another node */
> +static void
> +uv_rtc_send_IPI(int cpu, int vector)
> +{
> + unsigned long val, apicid, lapicid;
> + int pnode;
> +
> + apicid = per_cpu(x86_cpu_to_apicid, cpu);
> + lapicid = apicid & 0x3f;
> + pnode = uv_apicid_to_pnode(apicid);
> +
> + val = (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid <<
> + UVH_IPI_INT_APIC_ID_SHFT) |
> + (vector << UVH_IPI_INT_VECTOR_SHFT);
> + uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
> +}
> +
> +/* Check for an RTC interrupt pending */
> +static int
> +uv_intr_pending(int pnode)
> +{
> + if (uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
> + UVH_EVENT_OCCURRED0_RTC1_MASK)
> + return 1;
> + else
> + return 0;
> +}
> +
> +static void
> +uv_clr_intr_pending(int pnode)
> +{
> + uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
> + UVH_EVENT_OCCURRED0_RTC1_MASK);
> +}
> +
> +static void
> +uv_setup_intr(int cpu, u64 expires)
> +{
> + u64 val;
> + int pnode = cpu_to_pnode(cpu);
> +
> + uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
> + UVH_RTC1_INT_CONFIG_M_MASK);
> + uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
> +
> + uv_clr_intr_pending(pnode);
> +
> + val = ((u64)GENERIC_TIMER_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
> + ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
> + /* Set configuration */
> + uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
> + /* Initialize comparator value */
> + uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
> +}
> +
> +
> +/*
> + * Per-cpu timer tracking routines
> + */
> +
> +/* Allocate per-node list of cpu timer expiration times. */
> +static void
> +uv_rtc_allocate_timers(void)
> +{
> + struct uv_rtc_timer_head *head = NULL;
This initialisation is unneeded.
The definition of `head' could be moved to a more inner scope, down
near where it is used.
> + int cpu;
> + int max = 0;
> + int pnode = -1;
> + int i = 0;
> +
> + /* Determine max possible hyperthreads/pnode for allocation */
> + for_each_present_cpu(cpu) {
Using the present CPU map surprised me. I'd suggest that this is worth
a comment, because the reason cannot be determined in any other way.
That comment should also tell us about the situation with cpu hotplug,
which has me scratching my head.
> + if (pnode != cpu_to_pnode(cpu)) {
> + i = 0;
> + pnode = cpu_to_pnode(cpu);
> + }
> + if (max < ++i)
> + max = i;
> + }
> +
> + pnode = -1;
> + for_each_present_cpu(cpu) {
> + if (pnode != cpu_to_pnode(cpu)) {
> + i = 0;
> + pnode = cpu_to_pnode(cpu);
> + head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
> + (max * sizeof(u64)),
> + GFP_KERNEL, pnode);
> + spin_lock_init(&head->lock);
> + head->fcpu = cpu;
> + head->cpus = 0;
> + head->next_cpu = -1;
This will oops if the allocation fails.
If this code will only ever be called during initial system boot then
sure, there's not much point in doing the check&recover. If this is
the case then this function should be marked __init.
> + }
> + head->cpus++;
> + head->expires[i++] = ULLONG_MAX;
> + per_cpu(rtc_timer_head, cpu) = head;
> + }
> +}
> +
> +/* Find and set the next expiring timer. */
> +static void
> +uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode)
> +{
> + u64 exp;
> + u64 lowest = ULLONG_MAX;
> + int cpu = -1;
> + int c;
> +
> + head->next_cpu = -1;
> + for (c = 0; c < head->cpus; c++) {
> + exp = head->expires[c];
> + if (exp < lowest) {
> + cpu = c;
> + lowest = exp;
> + }
> + }
> + if (cpu >= 0) {
> + head->next_cpu = cpu;
> + c = head->fcpu + cpu;
> + uv_setup_intr(c, lowest);
> + /* If we didn't set it up in time, trigger */
> + if (lowest < uv_read_rtc() && !uv_intr_pending(pnode))
> + uv_rtc_send_IPI(c, GENERIC_TIMER_VECTOR);
> + } else
> + uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
> + UVH_RTC1_INT_CONFIG_M_MASK);
> +}
> +
> +/*
> + * Set expiration time for current cpu.
> + *
> + * Returns 1 if we missed the expiration time.
> + */
> +static int
> +uv_rtc_set_timer(int cpu, u64 expires)
> +{
> + int pnode = cpu_to_pnode(cpu);
> + struct uv_rtc_timer_head *head = per_cpu(rtc_timer_head, cpu);
> + int local_cpu = cpu - head->fcpu;
> + u64 *t = &head->expires[local_cpu];
> + unsigned long flags;
> + int next_cpu;
> +
> + spin_lock_irqsave(&head->lock, flags);
> +
> + next_cpu = head->next_cpu;
> + *t = expires;
> +
> + /* Will this one be next to go off? */
> + if (expires < head->expires[next_cpu]) {
> + head->next_cpu = cpu;
> + uv_setup_intr(cpu, expires);
> + if (expires <= uv_read_rtc() && !uv_intr_pending(pnode)) {
> + *t = ULLONG_MAX;
> + uv_rtc_find_next_timer(head, pnode);
> + spin_unlock_irqrestore(&head->lock, flags);
> + return 1;
> + }
> + }
> +
> + spin_unlock_irqrestore(&head->lock, flags);
> + return 0;
> +}
> +
> +/*
> + * Unset expiration time for current cpu.
> + *
> + * Returns 1 if this timer was pending.
> + */
> +static int
> +uv_rtc_unset_timer(int cpu)
> +{
> + int pnode = cpu_to_pnode(cpu);
> + struct uv_rtc_timer_head *head = per_cpu(rtc_timer_head, cpu);
> + int local_cpu = cpu - head->fcpu;
> + u64 *t = &head->expires[local_cpu];
> + unsigned long flags;
> + int rc = 0;
> +
> + spin_lock_irqsave(&head->lock, flags);
> +
> + if (head->next_cpu == cpu && uv_read_rtc() >= *t)
> + rc = 1;
> +
> + *t = ULLONG_MAX;
> + /* Was the hardware setup for this timer? */
> + if (head->next_cpu == cpu)
> + uv_rtc_find_next_timer(head, pnode);
> +
> + spin_unlock_irqrestore(&head->lock, flags);
> + return rc;
> +}
> +
> +
> +
> +/*
> + * Kernel interface routines.
> + */
> +
> +/*
> + * Read the RTC.
> + */
> +static cycle_t
> +uv_read_rtc(void)
> +{
> + return (cycle_t)uv_read_local_mmr(UVH_RTC);
> +}
> +
> +/*
> + * Program the next event, relative to now
> + */
> +static int
> +uv_rtc_next_event(unsigned long delta, struct clock_event_device *ced)
> +{
> + int ced_cpu = first_cpu(ced->cpumask);
> +
> + return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc());
> +}
> +
> +/*
> + * Setup the RTC timer in oneshot mode
> + */
> +static void
> +uv_rtc_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt)
> +{
> + unsigned long flags;
> + int ced_cpu = first_cpu(evt->cpumask);
> +
> + local_irq_save(flags);
> +
> + switch (mode) {
> + case CLOCK_EVT_MODE_PERIODIC:
> + case CLOCK_EVT_MODE_ONESHOT:
> + case CLOCK_EVT_MODE_RESUME:
> + /* Nothing to do here yet */
> + break;
> + case CLOCK_EVT_MODE_UNUSED:
> + case CLOCK_EVT_MODE_SHUTDOWN:
> + uv_rtc_unset_timer(ced_cpu);
> + break;
> + }
> +
> + local_irq_restore(flags);
> +}
The local_irq_save() is surprising. Is it well-known that
clock_event_device.set_mode() is called in a pinned-on-CPU state? Or
is something else happening here?
Some comments explaining to readers (and to reviewers) what's going on
here would help.
> +/*
> + * Local APIC timer broadcast function
> + */
> +static void
> +uv_rtc_timer_broadcast(cpumask_t mask)
> +{
> + int cpu;
> + for_each_cpu_mask(cpu, mask)
> + uv_rtc_send_IPI(cpu, GENERIC_TIMER_VECTOR);
> +}
> +
> +static void
> +uv_rtc_interrupt(void)
> +{
> + struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
> + unsigned long flags;
> + int cpu = smp_processor_id();
> +
> + local_irq_save(flags);
> + if (!ced || !ced->event_handler || !uv_rtc_unset_timer(cpu)) {
> + local_irq_restore(flags);
> + printk(KERN_WARNING
> + "Spurious uv_rtc timer interrupt on cpu %d\n",
> + smp_processor_id());
> + return;
> + }
> + local_irq_restore(flags);
> + ced->event_handler(ced);
> +}
I'll assume that this actually is a real interrupt handler. If not,
the use of smp_processor_id() might be buggy.
I cannot tell, because I cannot find this generic_timer_reg_extension()
thing anywhere in any trees. What's up with that?
The above printk could use local variable `cpu'.
> +static __init void
> +uv_rtc_register_clockevents(void *data)
> +{
> + struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
> +
> + memcpy(ced, &clock_event_device_uv, sizeof(clock_event_device_uv));
Is that better than
*ced = clock_event_device_uv;
?
> + cpus_clear(ced->cpumask);
> + cpu_set(smp_processor_id(), ced->cpumask);
> + clockevents_register_device(ced);
> +}
> +
> +static __init int
> +uv_rtc_setup_clock(void)
> +{
> + int rc;
> +
> + if (!is_uv_system() || generic_timer_reg_extension(uv_rtc_interrupt))
> + return -ENODEV;
> +
> + clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
> + clocksource_uv.shift);
> +
> + rc = clocksource_register(&clocksource_uv);
> + if (rc) {
> + generic_timer_unreg_extension();
> + return rc;
> + }
> +
> + /* Setup and register clockevents */
> + uv_rtc_allocate_timers();
> +
> + clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second,
> + NSEC_PER_SEC, clock_event_device_uv.shift);
> +
> + clock_event_device_uv.min_delta_ns = NSEC_PER_SEC /
> + sn_rtc_cycles_per_second;
> + clock_event_device_uv.max_delta_ns = clocksource_uv.mask *
> + (NSEC_PER_SEC / sn_rtc_cycles_per_second);
> +
> + smp_call_function(uv_rtc_register_clockevents, NULL, 0);
> + uv_rtc_register_clockevents(NULL);
Use on_each_cpu() here.
Doing that will fix the bug wherein uv_rtc_register_clockevents() calls
smp_processor_id() in preemptible code.
> + return 0;
> +}
> +module_init(uv_rtc_setup_clock);
> Index: linux/kernel/time/clockevents.c
> ===================================================================
> --- linux.orig/kernel/time/clockevents.c 2008-11-19 15:08:01.000000000 -0600
> +++ linux/kernel/time/clockevents.c 2008-11-19 15:09:54.000000000 -0600
> @@ -183,6 +183,7 @@ void clockevents_register_device(struct
>
> spin_unlock(&clockevents_lock);
> }
> +EXPORT_SYMBOL_GPL(clockevents_register_device);
>
> /*
> * Noop handler when we shut down an event device
> Index: linux/kernel/time/clocksource.c
> ===================================================================
> --- linux.orig/kernel/time/clocksource.c 2008-11-19 15:08:01.000000000 -0600
> +++ linux/kernel/time/clocksource.c 2008-11-19 15:09:54.000000000 -0600
> @@ -369,6 +369,7 @@ void clocksource_unregister(struct clock
> next_clocksource = select_clocksource();
> spin_unlock_irqrestore(&clocksource_lock, flags);
> }
> +EXPORT_SYMBOL(clocksource_unregister);
>
> #ifdef CONFIG_SYSFS
> /**
On Wed, 19 Nov 2008 15:26:31 -0600
Dimitri Sivanich <[email protected]> wrote:
> This patch allocates a system interrupt vector for platform specific use
> in implementing timer interrupts.
>
> Signed-off-by: Dimitri Sivanich <[email protected]>
>
> ---
>
> Refreshing this to use 0xed as the vector. Have repackaged it to be more
> generic.
>
> arch/x86/include/asm/hw_irq.h | 1
> arch/x86/include/asm/irq.h | 2 +
> arch/x86/include/asm/irq_vectors.h | 5 +++
> arch/x86/kernel/entry_64.S | 4 ++
> arch/x86/kernel/irqinit_64.c | 45 +++++++++++++++++++++++++++++++++
> 5 files changed, 57 insertions(+)
>
> Index: linux/arch/x86/kernel/entry_64.S
> ===================================================================
> --- linux.orig/arch/x86/kernel/entry_64.S 2008-11-19 14:59:15.000000000 -0600
> +++ linux/arch/x86/kernel/entry_64.S 2008-11-19 15:02:16.000000000 -0600
> @@ -865,6 +865,10 @@ ENTRY(apic_timer_interrupt)
> apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
> END(apic_timer_interrupt)
>
> +ENTRY(generic_timer_interrupt)
> + apicinterrupt GENERIC_TIMER_VECTOR,smp_generic_timer_interrupt
> +END(generic_timer_interrupt)
> +
> ENTRY(uv_bau_message_intr1)
> apicinterrupt 220,uv_bau_message_interrupt
> END(uv_bau_message_intr1)
> Index: linux/arch/x86/kernel/irqinit_64.c
> ===================================================================
> --- linux.orig/arch/x86/kernel/irqinit_64.c 2008-11-19 14:59:15.000000000 -0600
> +++ linux/arch/x86/kernel/irqinit_64.c 2008-11-19 15:09:36.000000000 -0600
> @@ -23,6 +23,7 @@
> #include <asm/desc.h>
> #include <asm/apic.h>
> #include <asm/i8259.h>
> +#include <asm/idle.h>
>
> /*
> * Common place to define all x86 IRQ vectors
> @@ -161,6 +162,47 @@ void __init init_ISA_irqs(void)
>
> void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
>
> +
> +/* Function pointer for generic timer interrupt handling */
> +static void (*generic_timer_interrupt_extension)(void);
> +
> +int
> +generic_timer_reg_extension(void (*fn)(void))
> +{
> + if (generic_timer_interrupt_extension)
> + return 1;
> +
> + generic_timer_interrupt_extension = fn;
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(generic_timer_reg_extension);
ah-hah, there it is! These patches are in the wrong order, no?
> +void
> +generic_timer_unreg_extension(void)
> +{
> + if (generic_timer_interrupt_extension)
> + generic_timer_interrupt_extension = NULL;
> +}
> +EXPORT_SYMBOL_GPL(generic_timer_unreg_extension);
> +
> +void smp_generic_timer_interrupt(struct pt_regs *regs)
> +{
> + struct pt_regs *old_regs = set_irq_regs(regs);
> +
> + ack_APIC_irq();
> +
> + exit_idle();
> +
> + irq_enter();
> +
> + if (generic_timer_interrupt_extension)
> + generic_timer_interrupt_extension();
> +
> + irq_exit();
> +
> + set_irq_regs(old_regs);
> +}
OK, I guess that answers my earlier question.
These function names are awful. I thought that
"generic_timer_reg_extension" had something to to with hardware
registers in the timer. I see now that you abbreviated "register" to
"reg".
Please rename these to
register_generic_timer_extension()
unregister_generic_timer_extension()
> > static void __init smp_intr_init(void)
> {
> #ifdef CONFIG_SMP
> @@ -202,6 +244,9 @@ static void __init apic_intr_init(void)
> /* self generated IPI for local APIC timer */
> alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
>
> + /* IPI for platform specific timers */
> + alloc_intr_gate(GENERIC_TIMER_VECTOR, generic_timer_interrupt);
> +
> /* IPI vectors for APIC spurious and error interrupts */
> alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
> alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
> Index: linux/arch/x86/include/asm/irq_vectors.h
> ===================================================================
> --- linux.orig/arch/x86/include/asm/irq_vectors.h 2008-11-19 14:59:15.000000000 -0600
> +++ linux/arch/x86/include/asm/irq_vectors.h 2008-11-19 15:02:16.000000000 -0600
> @@ -92,6 +92,11 @@
> #define LOCAL_PERFMON_VECTOR 0xee
>
> /*
> + * Generic timer vector for platform specific use
> + */
> +#define GENERIC_TIMER_VECTOR 0xed
> +
> +/*
> * First APIC vector available to drivers: (vectors 0x30-0xee) we
> * start at 0x31(0x41) to spread out vectors evenly between priority
> * levels. (0x80 is the syscall vector)
> Index: linux/arch/x86/include/asm/hw_irq.h
> ===================================================================
> --- linux.orig/arch/x86/include/asm/hw_irq.h 2008-11-19 14:59:15.000000000 -0600
> +++ linux/arch/x86/include/asm/hw_irq.h 2008-11-19 15:02:16.000000000 -0600
> @@ -29,6 +29,7 @@
>
> /* Interrupt handlers registered during init_IRQ */
> extern void apic_timer_interrupt(void);
> +extern void generic_timer_interrupt(void);
> extern void error_interrupt(void);
> extern void spurious_interrupt(void);
> extern void thermal_interrupt(void);
> Index: linux/arch/x86/include/asm/irq.h
> ===================================================================
> --- linux.orig/arch/x86/include/asm/irq.h 2008-11-19 14:59:15.000000000 -0600
> +++ linux/arch/x86/include/asm/irq.h 2008-11-19 15:02:16.000000000 -0600
> @@ -38,6 +38,8 @@ extern void fixup_irqs(cpumask_t map);
>
> extern unsigned int do_IRQ(struct pt_regs *regs);
> extern void init_IRQ(void);
> +extern int generic_timer_reg_extension(void (*fn)(void));
> +extern void generic_timer_unreg_extension(void);
> extern void native_init_IRQ(void);
>
> /* Interrupt vector management */
Andrew Morton wrote:
>>
>> Refreshing this to use 0xed as the vector. Have repackaged it to be more
>> generic.
>>
What happened to the concept of using normal IRQs for this, via a custom
irqchip? Adding more reserved vectors really sucks, and it affects
non-SGI platforms, which could potentially run out of vectors as a result.
-hpa
On Thu, 2008-11-20 at 15:08 -0800, Andrew Morton wrote:
> On Wed, 19 Nov 2008 15:23:50 -0600
> Dimitri Sivanich <[email protected]> wrote:
>
> > This patch provides a driver for SGI RTC clocks and timers.
> >
> > This provides a high resolution clock and timer source using the SGI
> > system-wide synchronized RTC clock/timer hardware.
[snip]
> > +static struct clocksource clocksource_uv = {
> > + .name = RTC_NAME,
> > + .rating = 400,
> > + .read = uv_read_rtc,
> > + .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
> > + .shift = 0,
> > + .flags = CLOCK_SOURCE_IS_CONTINUOUS,
> > +};
Hey Dimitri,
One issue with this clocksource is the shift value. The clocksource
frequency is calculated from the mult/shift pair (f = mult/2^shift).
When NTP steers the clock in the generic timekeeping code, it does so by
tweaking the mult value up or down. However if the shift value is too
low, that ends up being quite a large change to the frequency.
So in order to get fine grained frequency adjustments I'd recommend
using a larger shift value (8 at least). Ideally you want the largest
shift value possible, but some care is needed that it isn't too big,
because larger shift values mean larger mult values, and you don't want
to overflow on the multiplication.
A rule of thumb I use is to find a shift value so the resulting mult
value won't overflow 1 second worth of cycles.
thanks
-john
Ingo Molnar wrote:
> * Dimitri Sivanich <[email protected]> wrote:
>
>> The following patches provide a driver for synchronized RTC
>> clocksource and clockevents for SGI systems, as well as a generic
>> timer system interrupt.
>>
>> With these patches, a module can be installed that registers the
>> system-wide synchronized RTC clocksource and timers as both a
>> clocksource and clockevents device running in high resolution mode.
>>
>> [PATCH 1/2 v3] SGI RTC: add clocksource driver
>> [PATCH 2/2 v3] SGI RTC: add generic timer system interrupt
>
> Looks very clean and well-done to me.
>
> I had to take a good look at the rtc_timer_head->expires[] construct -
> but i guess that's the best approach, as the max number of entries is
> hard to judge at build time. (and we wont get any real limit
> protection from gcc anyway)
>
> Thomas, any objections?
I have *extremely* serious reservations about reserving even more
hardware vectors for SGI only. This affects all systems, and quite
frankly should not be necessary at all.
The SGI UV people have pushed this at a number of points in the past,
and we have told them to use an irqchip instead. This patch tries to
allocate yet another reserved vector, instead.
-hpa
* H. Peter Anvin <[email protected]> wrote:
> Ingo Molnar wrote:
> > * Dimitri Sivanich <[email protected]> wrote:
> >
> >> The following patches provide a driver for synchronized RTC
> >> clocksource and clockevents for SGI systems, as well as a generic
> >> timer system interrupt.
> >>
> >> With these patches, a module can be installed that registers the
> >> system-wide synchronized RTC clocksource and timers as both a
> >> clocksource and clockevents device running in high resolution mode.
> >>
> >> [PATCH 1/2 v3] SGI RTC: add clocksource driver
> >> [PATCH 2/2 v3] SGI RTC: add generic timer system interrupt
> >
> > Looks very clean and well-done to me.
> >
> > I had to take a good look at the rtc_timer_head->expires[] construct -
> > but i guess that's the best approach, as the max number of entries is
> > hard to judge at build time. (and we wont get any real limit
> > protection from gcc anyway)
> >
> > Thomas, any objections?
>
> I have *extremely* serious reservations about reserving even more
> hardware vectors for SGI only. This affects all systems, and quite
> frankly should not be necessary at all.
>
> The SGI UV people have pushed this at a number of points in the
> past, and we have told them to use an irqchip instead. This patch
> tries to allocate yet another reserved vector, instead.
ah, yes, i suggested that in the past. And i was _so_ happy that this
driver wasnt calling into the BIOS anymore but talking straight to the
hardware ;-)
it shouldnt be hard to define a proper irqchip here.
Ingo
On Thu, Nov 20, 2008 at 03:19:49PM -0800, H. Peter Anvin wrote:
> Andrew Morton wrote:
> >>
> >> Refreshing this to use 0xed as the vector. Have repackaged it to be more
> >> generic.
> >>
>
> What happened to the concept of using normal IRQs for this, via a custom
> irqchip? Adding more reserved vectors really sucks, and it affects
> non-SGI platforms, which could potentially run out of vectors as a result.
There are basically two issues with using 'normal IRQs' in cases like this:
- Using normal IRQs would mean we would have an IRQ per cpu. The current
hard coded maximum, NR_IRQS, is 4352 (NR_VECTORS + (32 * MAX_IO_APICS)).
On machines with large numbers of cpus and an irq per cpu for each desired
interrupt, that's a lot of IRQs. In addition, the GRU, will need 2 such
IRQs per cpu. On 4096 cpu systems, you've already used up more than the
limit just for that. Until some sort of infrastructure change takes place
that would potentially allow this to be dynamically increased, such as
Yinghai Lu's "sparse_irq aka dyn_irq v14" patch, this problem will exist.
Furthermore, the actual runtime limit, nr_irqs, is set to 96 by
probe_nr_irqs for our configuration. This is because that code assumes all
vectors are io-apic vectors, not cpu centric vectors like the ones I'm
talking about. With the current, scheme, even on a 128 cpu system, I'm out
of IRQs immediately.
- The current infrastructure for requesting vector/IRQ combinations doesn't
allow one to request an interrupt priority higher than i/o device interrupt
priorities. Clock event (high resolution timer) code should run at higher
interrupt priority.
I labeled the system vector as generic to indicate that this is available to
other platforms.
Given the above issues, do you have any suggestions as to how I might best
resolve this?
On Thu, Nov 20, 2008 at 05:44:22PM -0800, H. Peter Anvin wrote:
> Ingo Molnar wrote:
> > * Dimitri Sivanich <[email protected]> wrote:
> >
> >> The following patches provide a driver for synchronized RTC
> >> clocksource and clockevents for SGI systems, as well as a generic
> >> timer system interrupt.
> >>
> >> With these patches, a module can be installed that registers the
> >> system-wide synchronized RTC clocksource and timers as both a
> >> clocksource and clockevents device running in high resolution mode.
> >>
> >> [PATCH 1/2 v3] SGI RTC: add clocksource driver
> >> [PATCH 2/2 v3] SGI RTC: add generic timer system interrupt
> >
> > Looks very clean and well-done to me.
> >
> > I had to take a good look at the rtc_timer_head->expires[] construct -
> > but i guess that's the best approach, as the max number of entries is
> > hard to judge at build time. (and we wont get any real limit
> > protection from gcc anyway)
> >
> > Thomas, any objections?
>
> I have *extremely* serious reservations about reserving even more
> hardware vectors for SGI only. This affects all systems, and quite
> frankly should not be necessary at all.
>
> The SGI UV people have pushed this at a number of points in the past,
> and we have told them to use an irqchip instead. This patch tries to
> allocate yet another reserved vector, instead.
I labeled the system vector as generic to indicate that this is available
to other platforms. See my other email to you for the reasons why I did
not use an irq instead.
I will address your concerns with this in forthcoming patchsets.
On Thu, Nov 20, 2008 at 03:12:08PM -0800, Andrew Morton wrote:
> On Wed, 19 Nov 2008 15:26:31 -0600
> Dimitri Sivanich <[email protected]> wrote:
>
> > This patch allocates a system interrupt vector for platform specific use
> > in implementing timer interrupts.
> >
> > Signed-off-by: Dimitri Sivanich <[email protected]>
> >
> > ---
> >
> > Refreshing this to use 0xed as the vector. Have repackaged it to be more
> > generic.
> >
> > arch/x86/include/asm/hw_irq.h | 1
> > arch/x86/include/asm/irq.h | 2 +
> > arch/x86/include/asm/irq_vectors.h | 5 +++
> > arch/x86/kernel/entry_64.S | 4 ++
> > arch/x86/kernel/irqinit_64.c | 45 +++++++++++++++++++++++++++++++++
> > 5 files changed, 57 insertions(+)
> >
> > Index: linux/arch/x86/kernel/entry_64.S
> > ===================================================================
> > --- linux.orig/arch/x86/kernel/entry_64.S 2008-11-19 14:59:15.000000000 -0600
> > +++ linux/arch/x86/kernel/entry_64.S 2008-11-19 15:02:16.000000000 -0600
> > @@ -865,6 +865,10 @@ ENTRY(apic_timer_interrupt)
> > apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
> > END(apic_timer_interrupt)
> >
> > +ENTRY(generic_timer_interrupt)
> > + apicinterrupt GENERIC_TIMER_VECTOR,smp_generic_timer_interrupt
> > +END(generic_timer_interrupt)
> > +
> > ENTRY(uv_bau_message_intr1)
> > apicinterrupt 220,uv_bau_message_interrupt
> > END(uv_bau_message_intr1)
> > Index: linux/arch/x86/kernel/irqinit_64.c
> > ===================================================================
> > --- linux.orig/arch/x86/kernel/irqinit_64.c 2008-11-19 14:59:15.000000000 -0600
> > +++ linux/arch/x86/kernel/irqinit_64.c 2008-11-19 15:09:36.000000000 -0600
> > @@ -23,6 +23,7 @@
> > #include <asm/desc.h>
> > #include <asm/apic.h>
> > #include <asm/i8259.h>
> > +#include <asm/idle.h>
> >
> > /*
> > * Common place to define all x86 IRQ vectors
> > @@ -161,6 +162,47 @@ void __init init_ISA_irqs(void)
> >
> > void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
> >
> > +
> > +/* Function pointer for generic timer interrupt handling */
> > +static void (*generic_timer_interrupt_extension)(void);
> > +
> > +int
> > +generic_timer_reg_extension(void (*fn)(void))
> > +{
> > + if (generic_timer_interrupt_extension)
> > + return 1;
> > +
> > + generic_timer_interrupt_extension = fn;
> > + return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(generic_timer_reg_extension);
>
> ah-hah, there it is! These patches are in the wrong order, no?
>
>
> > +void
> > +generic_timer_unreg_extension(void)
> > +{
> > + if (generic_timer_interrupt_extension)
> > + generic_timer_interrupt_extension = NULL;
> > +}
> > +EXPORT_SYMBOL_GPL(generic_timer_unreg_extension);
> > +
> > +void smp_generic_timer_interrupt(struct pt_regs *regs)
> > +{
> > + struct pt_regs *old_regs = set_irq_regs(regs);
> > +
> > + ack_APIC_irq();
> > +
> > + exit_idle();
> > +
> > + irq_enter();
> > +
> > + if (generic_timer_interrupt_extension)
> > + generic_timer_interrupt_extension();
> > +
> > + irq_exit();
> > +
> > + set_irq_regs(old_regs);
> > +}
>
> OK, I guess that answers my earlier question.
>
> These function names are awful. I thought that
> "generic_timer_reg_extension" had something to to with hardware
> registers in the timer. I see now that you abbreviated "register" to
> "reg".
>
> Please rename these to
>
> register_generic_timer_extension()
> unregister_generic_timer_extension()
>
>
>
> > > static void __init smp_intr_init(void)
> > {
> > #ifdef CONFIG_SMP
> > @@ -202,6 +244,9 @@ static void __init apic_intr_init(void)
> > /* self generated IPI for local APIC timer */
> > alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
> >
> > + /* IPI for platform specific timers */
> > + alloc_intr_gate(GENERIC_TIMER_VECTOR, generic_timer_interrupt);
> > +
> > /* IPI vectors for APIC spurious and error interrupts */
> > alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
> > alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
> > Index: linux/arch/x86/include/asm/irq_vectors.h
> > ===================================================================
> > --- linux.orig/arch/x86/include/asm/irq_vectors.h 2008-11-19 14:59:15.000000000 -0600
> > +++ linux/arch/x86/include/asm/irq_vectors.h 2008-11-19 15:02:16.000000000 -0600
> > @@ -92,6 +92,11 @@
> > #define LOCAL_PERFMON_VECTOR 0xee
> >
> > /*
> > + * Generic timer vector for platform specific use
> > + */
> > +#define GENERIC_TIMER_VECTOR 0xed
> > +
> > +/*
> > * First APIC vector available to drivers: (vectors 0x30-0xee) we
> > * start at 0x31(0x41) to spread out vectors evenly between priority
> > * levels. (0x80 is the syscall vector)
> > Index: linux/arch/x86/include/asm/hw_irq.h
> > ===================================================================
> > --- linux.orig/arch/x86/include/asm/hw_irq.h 2008-11-19 14:59:15.000000000 -0600
> > +++ linux/arch/x86/include/asm/hw_irq.h 2008-11-19 15:02:16.000000000 -0600
> > @@ -29,6 +29,7 @@
> >
> > /* Interrupt handlers registered during init_IRQ */
> > extern void apic_timer_interrupt(void);
> > +extern void generic_timer_interrupt(void);
> > extern void error_interrupt(void);
> > extern void spurious_interrupt(void);
> > extern void thermal_interrupt(void);
> > Index: linux/arch/x86/include/asm/irq.h
> > ===================================================================
> > --- linux.orig/arch/x86/include/asm/irq.h 2008-11-19 14:59:15.000000000 -0600
> > +++ linux/arch/x86/include/asm/irq.h 2008-11-19 15:02:16.000000000 -0600
> > @@ -38,6 +38,8 @@ extern void fixup_irqs(cpumask_t map);
> >
> > extern unsigned int do_IRQ(struct pt_regs *regs);
> > extern void init_IRQ(void);
> > +extern int generic_timer_reg_extension(void (*fn)(void));
> > +extern void generic_timer_unreg_extension(void);
> > extern void native_init_IRQ(void);
> >
> > /* Interrupt vector management */
John,
Thanks for the reply. I will address this in forthcoming patchsets.
On Thu, Nov 20, 2008 at 04:08:08PM -0800, john stultz wrote:
> On Thu, 2008-11-20 at 15:08 -0800, Andrew Morton wrote:
> > On Wed, 19 Nov 2008 15:23:50 -0600
> > Dimitri Sivanich <[email protected]> wrote:
> >
> > > This patch provides a driver for SGI RTC clocks and timers.
> > >
> > > This provides a high resolution clock and timer source using the SGI
> > > system-wide synchronized RTC clock/timer hardware.
> [snip]
> > > +static struct clocksource clocksource_uv = {
> > > + .name = RTC_NAME,
> > > + .rating = 400,
> > > + .read = uv_read_rtc,
> > > + .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
> > > + .shift = 0,
> > > + .flags = CLOCK_SOURCE_IS_CONTINUOUS,
> > > +};
>
> Hey Dimitri,
> One issue with this clocksource is the shift value. The clocksource
> frequency is calculated from the mult/shift pair (f = mult/2^shift).
> When NTP steers the clock in the generic timekeeping code, it does so by
> tweaking the mult value up or down. However if the shift value is too
> low, that ends up being quite a large change to the frequency.
>
> So in order to get fine grained frequency adjustments I'd recommend
> using a larger shift value (8 at least). Ideally you want the largest
> shift value possible, but some care is needed that it isn't too big,
> because larger shift values mean larger mult values, and you don't want
> to overflow on the multiplication.
>
> A rule of thumb I use is to find a shift value so the resulting mult
> value won't overflow 1 second worth of cycles.
>
Dimitri Sivanich wrote:
>
> There are basically two issues with using 'normal IRQs' in cases like this:
>
> - Using normal IRQs would mean we would have an IRQ per cpu. The current
> hard coded maximum, NR_IRQS, is 4352 (NR_VECTORS + (32 * MAX_IO_APICS)).
> On machines with large numbers of cpus and an irq per cpu for each desired
> interrupt, that's a lot of IRQs. In addition, the GRU, will need 2 such
> IRQs per cpu. On 4096 cpu systems, you've already used up more than the
> limit just for that. Until some sort of infrastructure change takes place
> that would potentially allow this to be dynamically increased, such as
> Yinghai Lu's "sparse_irq aka dyn_irq v14" patch, this problem will exist.
>
> Furthermore, the actual runtime limit, nr_irqs, is set to 96 by
> probe_nr_irqs for our configuration. This is because that code assumes all
> vectors are io-apic vectors, not cpu centric vectors like the ones I'm
> talking about. With the current, scheme, even on a 128 cpu system, I'm out
> of IRQs immediately.
>
> - The current infrastructure for requesting vector/IRQ combinations doesn't
> allow one to request an interrupt priority higher than i/o device interrupt
> priorities. Clock event (high resolution timer) code should run at higher
> interrupt priority.
Okay, so it is a hack pending us taking care of issues in the current
code. #1 we're obviously working on, #2 I need to think some more about
but shouldn't be too hard to fix -- if real, it also affects other
interrupt-driven clock sources.
I'm OK with this being a temporary hack, but I want it to be recognized
as such and cleaned up as soon as possible.
-hpa
On Fri, Nov 21, 2008 at 10:26 AM, H. Peter Anvin <[email protected]> wrote:
> Dimitri Sivanich wrote:
>>
>> There are basically two issues with using 'normal IRQs' in cases like this:
>>
>> - Using normal IRQs would mean we would have an IRQ per cpu. The current
>> hard coded maximum, NR_IRQS, is 4352 (NR_VECTORS + (32 * MAX_IO_APICS)).
>> On machines with large numbers of cpus and an irq per cpu for each desired
>> interrupt, that's a lot of IRQs. In addition, the GRU, will need 2 such
>> IRQs per cpu. On 4096 cpu systems, you've already used up more than the
>> limit just for that. Until some sort of infrastructure change takes place
>> that would potentially allow this to be dynamically increased, such as
>> Yinghai Lu's "sparse_irq aka dyn_irq v14" patch, this problem will exist.
>>
>> Furthermore, the actual runtime limit, nr_irqs, is set to 96 by
>> probe_nr_irqs for our configuration. This is because that code assumes all
>> vectors are io-apic vectors, not cpu centric vectors like the ones I'm
>> talking about. With the current, scheme, even on a 128 cpu system, I'm out
>> of IRQs immediately.
>>
>> - The current infrastructure for requesting vector/IRQ combinations doesn't
>> allow one to request an interrupt priority higher than i/o device interrupt
>> priorities. Clock event (high resolution timer) code should run at higher
>> interrupt priority.
>
> Okay, so it is a hack pending us taking care of issues in the current
> code. #1 we're obviously working on, #2 I need to think some more about
> but shouldn't be too hard to fix -- if real, it also affects other
> interrupt-driven clock sources.
should just remove probe_nr_irqs.
YH
* H. Peter Anvin <[email protected]> wrote:
> Dimitri Sivanich wrote:
> >
> > There are basically two issues with using 'normal IRQs' in cases like this:
> >
> > - Using normal IRQs would mean we would have an IRQ per cpu. The current
> > hard coded maximum, NR_IRQS, is 4352 (NR_VECTORS + (32 * MAX_IO_APICS)).
> > On machines with large numbers of cpus and an irq per cpu for each desired
> > interrupt, that's a lot of IRQs. In addition, the GRU, will need 2 such
> > IRQs per cpu. On 4096 cpu systems, you've already used up more than the
> > limit just for that. Until some sort of infrastructure change takes place
> > that would potentially allow this to be dynamically increased, such as
> > Yinghai Lu's "sparse_irq aka dyn_irq v14" patch, this problem will exist.
> >
> > Furthermore, the actual runtime limit, nr_irqs, is set to 96 by
> > probe_nr_irqs for our configuration. This is because that code assumes all
> > vectors are io-apic vectors, not cpu centric vectors like the ones I'm
> > talking about. With the current, scheme, even on a 128 cpu system, I'm out
> > of IRQs immediately.
> >
> > - The current infrastructure for requesting vector/IRQ combinations doesn't
> > allow one to request an interrupt priority higher than i/o device interrupt
> > priorities. Clock event (high resolution timer) code should run at higher
> > interrupt priority.
>
> Okay, so it is a hack pending us taking care of issues in the
> current code. #1 we're obviously working on, #2 I need to think
> some more about but shouldn't be too hard to fix -- if real, it also
> affects other interrupt-driven clock sources.
>
> I'm OK with this being a temporary hack, but I want it to be
> recognized as such and cleaned up as soon as possible.
okay.
Dimitri, looks like there are no blocker issues - John's clocksource
comments need to be addressed and then we should be green to go for
having this applied.
Ingo