From: David Brownell <[email protected]>
Create <linux/atmel_tc.h> based on <asm-arm/arch-at91/at91-tc.h> and the
at91sam9263 and at32ap7000 datasheets. Most AT91 and AT32 SOCs have one
or two of these TC blocks, which include three 16-bit timers that can be
interconnected in various ways.
These TC blocks can be used for external interfacing (such as PWM and
measurement), or used as somewhat quirky sixteen-bit timers.
Changes relative to the original version:
* Drop unneeded inclusion of <linux/mutex.h>
* Support an arbitrary number of TC blocks
* Return a struct with information about a TC block from
atmel_tc_alloc() instead of using a combination of return values
and "out" parameters.
* ioremap() the I/O registers on allocation
* Look up clocks and irqs for all channels
* Add "name" parameter to atmel_tc_alloc() and use this when
requesting the iomem resource.
* Check if the platform provided the necessary resources at probe()
time instead of when the TCB is allocated.
Signed-off-by: David Brownell <[email protected]>
Signed-off-by: Haavard Skinnemoen <[email protected]>
Cc: Nicolas Ferre <[email protected]>
Cc: Andrew Victor <[email protected]>
---
This patch is meant to replace atmel_tc-library.patch
drivers/misc/Kconfig | 8 ++
drivers/misc/Makefile | 1 +
drivers/misc/atmel_tclib.c | 161 ++++++++++++++++++++++++++++
include/linux/atmel_tc.h | 252 ++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 422 insertions(+), 0 deletions(-)
create mode 100644 drivers/misc/atmel_tclib.c
create mode 100644 include/linux/atmel_tc.h
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 982e27b..b3ba681 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -22,6 +22,14 @@ config ATMEL_PWM
purposes including software controlled power-efficent backlights
on LCD displays, motor control, and waveform generation.
+config ATMEL_TCLIB
+ bool "Atmel AT32/AT91 Timer/Counter Library"
+ depends on (AVR32 || ARCH_AT91)
+ help
+ Select this if you want a library to allocate the Timer/Counter
+ blocks found on many Atmel processors. This facilitates using
+ these blocks by different drivers despite processor differences.
+
config IBM_ASM
tristate "Device driver for IBM RSA service processor"
depends on X86 && PCI && INPUT && EXPERIMENTAL
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 3b12f5d..c975028 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_ACER_WMI) += acer-wmi.o
obj-$(CONFIG_ASUS_LAPTOP) += asus-laptop.o
obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o
obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o
+obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o
obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o
obj-$(CONFIG_LKDTM) += lkdtm.o
obj-$(CONFIG_TIFM_CORE) += tifm_core.o
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
new file mode 100644
index 0000000..05dc8a3
--- /dev/null
+++ b/drivers/misc/atmel_tclib.c
@@ -0,0 +1,161 @@
+#include <linux/atmel_tc.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+/* Number of bytes to reserve for the iomem resource */
+#define ATMEL_TC_IOMEM_SIZE 256
+
+
+/*
+ * This is a thin library to solve the problem of how to portably allocate
+ * one of the TC blocks. For simplicity, it doesn't currently expect to
+ * share individual timers between different drivers.
+ */
+
+#if defined(CONFIG_AVR32)
+/* AVR32 has these divide PBB */
+const u8 atmel_tc_divisors[5] = { 0, 4, 8, 16, 32, };
+EXPORT_SYMBOL(atmel_tc_divisors);
+
+#elif defined(CONFIG_ARCH_AT91)
+/* AT91 has these divide MCK */
+const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
+EXPORT_SYMBOL(atmel_tc_divisors);
+
+#endif
+
+static DEFINE_SPINLOCK(tc_list_lock);
+static LIST_HEAD(tc_list);
+
+/**
+ * atmel_tc_alloc - allocate a specified TC block
+ * @block: which block to allocate
+ * @name: name to be associated with the iomem resource
+ *
+ * Caller allocates a block. If it is available, a pointer to a
+ * pre-initialized struct atmel_tc is returned. The caller can access
+ * the registers directly through the "regs" field.
+ */
+struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name)
+{
+ struct atmel_tc *tc;
+ struct platform_device *pdev = NULL;
+ struct resource *r;
+
+ spin_lock(&tc_list_lock);
+ list_for_each_entry(tc, &tc_list, node) {
+ if (tc->pdev->id == block) {
+ pdev = tc->pdev;
+ break;
+ }
+ }
+
+ if (!pdev || tc->iomem)
+ goto fail;
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ r = request_mem_region(r->start, ATMEL_TC_IOMEM_SIZE, name);
+ if (!r)
+ goto fail;
+
+ tc->regs = ioremap(r->start, ATMEL_TC_IOMEM_SIZE);
+ if (!tc->regs)
+ goto fail_ioremap;
+
+ tc->iomem = r;
+
+out:
+ spin_unlock(&tc_list_lock);
+ return tc;
+
+fail_ioremap:
+ release_resource(r);
+fail:
+ tc = NULL;
+ goto out;
+}
+EXPORT_SYMBOL_GPL(atmel_tc_alloc);
+
+/**
+ * atmel_tc_free - release a specified TC block
+ * @tc: Timer/counter block that was returned by atmel_tc_alloc()
+ *
+ * This reverses the effect of atmel_tc_alloc(), unmapping the I/O
+ * registers, invalidating the resource returned by that routine and
+ * making the TC available to other drivers.
+ */
+void atmel_tc_free(struct atmel_tc *tc)
+{
+ spin_lock(&tc_list_lock);
+ if (tc->regs) {
+ iounmap(tc->regs);
+ release_resource(tc->iomem);
+ tc->regs = NULL;
+ tc->iomem = NULL;
+ }
+ spin_unlock(&tc_list_lock);
+}
+EXPORT_SYMBOL_GPL(atmel_tc_free);
+
+static int __init tc_probe(struct platform_device *pdev)
+{
+ struct atmel_tc *tc;
+ struct clk *clk;
+ int irq;
+
+ if (!platform_get_resource(pdev, IORESOURCE_MEM, 0))
+ return -EINVAL;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return -EINVAL;
+
+ tc = kzalloc(sizeof(struct atmel_tc), GFP_KERNEL);
+ if (!tc)
+ return -ENOMEM;
+
+ tc->pdev = pdev;
+
+ clk = clk_get(&pdev->dev, "t0_clk");
+ if (IS_ERR(clk)) {
+ kfree(tc);
+ return -EINVAL;
+ }
+
+ tc->clk[0] = clk;
+ tc->clk[1] = clk_get(&pdev->dev, "t1_clk");
+ if (IS_ERR(tc->clk[1]))
+ tc->clk[1] = clk;
+ tc->clk[2] = clk_get(&pdev->dev, "t2_clk");
+ if (IS_ERR(tc->clk[2]))
+ tc->clk[2] = clk;
+
+ tc->irq[0] = irq;
+ tc->irq[1] = platform_get_irq(pdev, 1);
+ if (tc->irq[1] < 0)
+ tc->irq[1] = irq;
+ tc->irq[2] = platform_get_irq(pdev, 2);
+ if (tc->irq[2] < 0)
+ tc->irq[2] = irq;
+
+ spin_lock(&tc_list_lock);
+ list_add_tail(&tc->node, &tc_list);
+ spin_unlock(&tc_list_lock);
+
+ return 0;
+}
+
+static struct platform_driver tc_driver = {
+ .driver.name = "atmel_tcb",
+};
+
+static int __init tc_init(void)
+{
+ return platform_driver_probe(&tc_driver, tc_probe);
+}
+arch_initcall(tc_init);
diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h
new file mode 100644
index 0000000..53ba65e
--- /dev/null
+++ b/include/linux/atmel_tc.h
@@ -0,0 +1,252 @@
+/*
+ * Timer/Counter Unit (TC) registers.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef ATMEL_TC_H
+#define ATMEL_TC_H
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+
+/*
+ * Many 32-bit Atmel SOCs include one or more TC blocks, each of which holds
+ * three general-purpose 16-bit timers. These timers share one register bank.
+ * Depending on the SOC, each timer may have its own clock and IRQ, or those
+ * may be shared by the whole TC block.
+ *
+ * These TC blocks may have up to nine external pins: TCLK0..2 signals for
+ * clocks or clock gates, and per-timer TIOA and TIOB signals used for PWM
+ * or triggering. Those pins need to be set up for use with the TC block,
+ * else they will be used as GPIOs or for a different controller.
+ *
+ * Although we expect each TC block to have a platform_device node, those
+ * nodes are not what drivers bind to. Instead, they ask for a specific
+ * TC block, by number ... which is a common approach on systems with many
+ * timers. Then they use clk_get() and platform_get_irq() to get clock and
+ * IRQ resources.
+ */
+
+struct clk;
+
+/**
+ * struct atmel_tc - information about a Timer/Counter Block
+ * @pdev: physical device
+ * @iomem: resource associated with the I/O register
+ * @regs: mapping through which the I/O registers can be accessed
+ * @irq: irq for each of the three channels
+ * @clk: internal clock source for each of the three channels
+ * @node: list node, for tclib internal use
+ *
+ * On some platforms, each TC channel has its own clocks and IRQs,
+ * while on others, all TC channels share the same clock and IRQ.
+ * Drivers should clk_enable() all the clocks they need even though
+ * all the entries in @clk may point to the same physical clock.
+ * Likewise, drivers should request irqs independently for each
+ * channel, but they must use IRQF_SHARED in case some of the entries
+ * in @irq are actually the same IRQ.
+ */
+struct atmel_tc {
+ struct platform_device *pdev;
+ struct resource *iomem;
+ void __iomem *regs;
+ int irq[3];
+ struct clk *clk[3];
+ struct list_head node;
+};
+
+extern struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name);
+extern void atmel_tc_free(struct atmel_tc *tc);
+
+/* platform-specific ATMEL_TC_TIMER_CLOCKx divisors (0 means 32KiHz) */
+extern const u8 atmel_tc_divisors[5];
+
+
+/*
+ * Two registers have block-wide controls. These are: configuring the three
+ * "external" clocks (or event sources) used by the timer channels; and
+ * synchronizing the timers by resetting them all at once.
+ *
+ * "External" can mean "external to chip" using the TCLK0, TCLK1, or TCLK2
+ * signals. Or, it can mean "external to timer", using the TIOA output from
+ * one of the other two timers that's being run in waveform mode.
+ */
+
+#define ATMEL_TC_BCR 0xc0 /* TC Block Control Register */
+#define ATMEL_TC_SYNC (1 << 0) /* synchronize timers */
+
+#define ATMEL_TC_BMR 0xc4 /* TC Block Mode Register */
+#define ATMEL_TC_TC0XC0S (3 << 0) /* external clock 0 source */
+#define ATMEL_TC_TC0XC0S_TCLK0 (0 << 0)
+#define ATMEL_TC_TC0XC0S_NONE (1 << 0)
+#define ATMEL_TC_TC0XC0S_TIOA1 (2 << 0)
+#define ATMEL_TC_TC0XC0S_TIOA2 (3 << 0)
+#define ATMEL_TC_TC1XC1S (3 << 2) /* external clock 1 source */
+#define ATMEL_TC_TC1XC1S_TCLK1 (0 << 2)
+#define ATMEL_TC_TC1XC1S_NONE (1 << 2)
+#define ATMEL_TC_TC1XC1S_TIOA0 (2 << 2)
+#define ATMEL_TC_TC1XC1S_TIOA2 (3 << 2)
+#define ATMEL_TC_TC2XC2S (3 << 4) /* external clock 2 source */
+#define ATMEL_TC_TC2XC2S_TCLK2 (0 << 4)
+#define ATMEL_TC_TC2XC2S_NONE (1 << 4)
+#define ATMEL_TC_TC2XC2S_TIOA0 (2 << 4)
+#define ATMEL_TC_TC2XC2S_TIOA1 (3 << 4)
+
+
+/*
+ * Each TC block has three "channels", each with one counter and controls.
+ *
+ * Note that the semantics of ATMEL_TC_TIMER_CLOCKx (input clock selection
+ * when it's not "external") is silicon-specific. AT91 platforms use one
+ * set of definitions; AVR32 platforms use a different set. Don't hard-wire
+ * such knowledge into your code, use the global "atmel_tc_divisors" ...
+ * where index N is the divisor for clock N+1, else zero to indicate it uses
+ * the 32 KiHz clock.
+ *
+ * The timers can be chained in various ways, and operated in "waveform"
+ * generation mode (including PWM) or "capture" mode (to time events). In
+ * both modes, behavior can be configured in many ways.
+ *
+ * Each timer has two I/O pins, TIOA and TIOB. Waveform mode uses TIOA as a
+ * PWM output, and TIOB as either another PWM or as a trigger. Capture mode
+ * uses them only as inputs.
+ */
+#define ATMEL_TC_CHAN(idx) ((idx)*0x40)
+#define ATMEL_TC_REG(idx, reg) (ATMEL_TC_CHAN(idx) + ATMEL_TC_ ## reg)
+
+#define ATMEL_TC_CCR 0x00 /* Channel Control Register */
+#define ATMEL_TC_CLKEN (1 << 0) /* clock enable */
+#define ATMEL_TC_CLKDIS (1 << 1) /* clock disable */
+#define ATMEL_TC_SWTRG (1 << 2) /* software trigger */
+
+#define ATMEL_TC_CMR 0x04 /* Channel Mode Register */
+
+/* Both modes share some CMR bits */
+#define ATMEL_TC_TCCLKS (7 << 0) /* clock source */
+#define ATMEL_TC_TIMER_CLOCK1 (0 << 0)
+#define ATMEL_TC_TIMER_CLOCK2 (1 << 0)
+#define ATMEL_TC_TIMER_CLOCK3 (2 << 0)
+#define ATMEL_TC_TIMER_CLOCK4 (3 << 0)
+#define ATMEL_TC_TIMER_CLOCK5 (4 << 0)
+#define ATMEL_TC_XC0 (5 << 0)
+#define ATMEL_TC_XC1 (6 << 0)
+#define ATMEL_TC_XC2 (7 << 0)
+#define ATMEL_TC_CLKI (1 << 3) /* clock invert */
+#define ATMEL_TC_BURST (3 << 4) /* clock gating */
+#define ATMEL_TC_GATE_NONE (0 << 4)
+#define ATMEL_TC_GATE_XC0 (1 << 4)
+#define ATMEL_TC_GATE_XC1 (2 << 4)
+#define ATMEL_TC_GATE_XC2 (3 << 4)
+#define ATMEL_TC_WAVE (1 << 15) /* true = Waveform mode */
+
+/* CAPTURE mode CMR bits */
+#define ATMEL_TC_LDBSTOP (1 << 6) /* counter stops on RB load */
+#define ATMEL_TC_LDBDIS (1 << 7) /* counter disable on RB load */
+#define ATMEL_TC_ETRGEDG (3 << 8) /* external trigger edge */
+#define ATMEL_TC_ETRGEDG_NONE (0 << 8)
+#define ATMEL_TC_ETRGEDG_RISING (1 << 8)
+#define ATMEL_TC_ETRGEDG_FALLING (2 << 8)
+#define ATMEL_TC_ETRGEDG_BOTH (3 << 8)
+#define ATMEL_TC_ABETRG (1 << 10) /* external trigger is TIOA? */
+#define ATMEL_TC_CPCTRG (1 << 14) /* RC compare trigger enable */
+#define ATMEL_TC_LDRA (3 << 16) /* RA loading edge (of TIOA) */
+#define ATMEL_TC_LDRA_NONE (0 << 16)
+#define ATMEL_TC_LDRA_RISING (1 << 16)
+#define ATMEL_TC_LDRA_FALLING (2 << 16)
+#define ATMEL_TC_LDRA_BOTH (3 << 16)
+#define ATMEL_TC_LDRB (3 << 18) /* RB loading edge (of TIOA) */
+#define ATMEL_TC_LDRB_NONE (0 << 18)
+#define ATMEL_TC_LDRB_RISING (1 << 18)
+#define ATMEL_TC_LDRB_FALLING (2 << 18)
+#define ATMEL_TC_LDRB_BOTH (3 << 18)
+
+/* WAVEFORM mode CMR bits */
+#define ATMEL_TC_CPCSTOP (1 << 6) /* RC compare stops counter */
+#define ATMEL_TC_CPCDIS (1 << 7) /* RC compare disables counter */
+#define ATMEL_TC_EEVTEDG (3 << 8) /* external event edge */
+#define ATMEL_TC_EEVTEDG_NONE (0 << 8)
+#define ATMEL_TC_EEVTEDG_RISING (1 << 8)
+#define ATMEL_TC_EEVTEDG_FALLING (2 << 8)
+#define ATMEL_TC_EEVTEDG_BOTH (3 << 8)
+#define ATMEL_TC_EEVT (3 << 10) /* external event source */
+#define ATMEL_TC_EEVT_TIOB (0 << 10)
+#define ATMEL_TC_EEVT_XC0 (1 << 10)
+#define ATMEL_TC_EEVT_XC1 (2 << 10)
+#define ATMEL_TC_EEVT_XC2 (3 << 10)
+#define ATMEL_TC_ENETRG (1 << 12) /* external event is trigger */
+#define ATMEL_TC_WAVESEL (3 << 13) /* waveform type */
+#define ATMEL_TC_WAVESEL_UP (0 << 13)
+#define ATMEL_TC_WAVESEL_UPDOWN (1 << 13)
+#define ATMEL_TC_WAVESEL_UP_AUTO (2 << 13)
+#define ATMEL_TC_WAVESEL_UPDOWN_AUTO (3 << 13)
+#define ATMEL_TC_ACPA (3 << 16) /* RA compare changes TIOA */
+#define ATMEL_TC_ACPA_NONE (0 << 16)
+#define ATMEL_TC_ACPA_SET (1 << 16)
+#define ATMEL_TC_ACPA_CLEAR (2 << 16)
+#define ATMEL_TC_ACPA_TOGGLE (3 << 16)
+#define ATMEL_TC_ACPC (3 << 18) /* RC compare changes TIOA */
+#define ATMEL_TC_ACPC_NONE (0 << 18)
+#define ATMEL_TC_ACPC_SET (1 << 18)
+#define ATMEL_TC_ACPC_CLEAR (2 << 18)
+#define ATMEL_TC_ACPC_TOGGLE (3 << 18)
+#define ATMEL_TC_AEEVT (3 << 20) /* external event changes TIOA */
+#define ATMEL_TC_AEEVT_NONE (0 << 20)
+#define ATMEL_TC_AEEVT_SET (1 << 20)
+#define ATMEL_TC_AEEVT_CLEAR (2 << 20)
+#define ATMEL_TC_AEEVT_TOGGLE (3 << 20)
+#define ATMEL_TC_ASWTRG (3 << 22) /* software trigger changes TIOA */
+#define ATMEL_TC_ASWTRG_NONE (0 << 22)
+#define ATMEL_TC_ASWTRG_SET (1 << 22)
+#define ATMEL_TC_ASWTRG_CLEAR (2 << 22)
+#define ATMEL_TC_ASWTRG_TOGGLE (3 << 22)
+#define ATMEL_TC_BCPB (3 << 24) /* RB compare changes TIOB */
+#define ATMEL_TC_BCPB_NONE (0 << 24)
+#define ATMEL_TC_BCPB_SET (1 << 24)
+#define ATMEL_TC_BCPB_CLEAR (2 << 24)
+#define ATMEL_TC_BCPB_TOGGLE (3 << 24)
+#define ATMEL_TC_BCPC (3 << 26) /* RC compare changes TIOB */
+#define ATMEL_TC_BCPC_NONE (0 << 26)
+#define ATMEL_TC_BCPC_SET (1 << 26)
+#define ATMEL_TC_BCPC_CLEAR (2 << 26)
+#define ATMEL_TC_BCPC_TOGGLE (3 << 26)
+#define ATMEL_TC_BEEVT (3 << 28) /* external event changes TIOB */
+#define ATMEL_TC_BEEVT_NONE (0 << 28)
+#define ATMEL_TC_BEEVT_SET (1 << 28)
+#define ATMEL_TC_BEEVT_CLEAR (2 << 28)
+#define ATMEL_TC_BEEVT_TOGGLE (3 << 28)
+#define ATMEL_TC_BSWTRG (3 << 30) /* software trigger changes TIOB */
+#define ATMEL_TC_BSWTRG_NONE (0 << 30)
+#define ATMEL_TC_BSWTRG_SET (1 << 30)
+#define ATMEL_TC_BSWTRG_CLEAR (2 << 30)
+#define ATMEL_TC_BSWTRG_TOGGLE (3 << 30)
+
+#define ATMEL_TC_CV 0x10 /* counter Value */
+#define ATMEL_TC_RA 0x14 /* register A */
+#define ATMEL_TC_RB 0x18 /* register B */
+#define ATMEL_TC_RC 0x1c /* register C */
+
+#define ATMEL_TC_SR 0x20 /* status (read-only) */
+/* Status-only flags */
+#define ATMEL_TC_CLKSTA (1 << 16) /* clock enabled */
+#define ATMEL_TC_MTIOA (1 << 17) /* TIOA mirror */
+#define ATMEL_TC_MTIOB (1 << 18) /* TIOB mirror */
+
+#define ATMEL_TC_IER 0x24 /* interrupt enable (write-only) */
+#define ATMEL_TC_IDR 0x28 /* interrupt disable (write-only) */
+#define ATMEL_TC_IMR 0x2c /* interrupt mask (read-only) */
+
+/* Status and IRQ flags */
+#define ATMEL_TC_COVFS (1 << 0) /* counter overflow */
+#define ATMEL_TC_LOVRS (1 << 1) /* load overrun */
+#define ATMEL_TC_CPAS (1 << 2) /* RA compare */
+#define ATMEL_TC_CPBS (1 << 3) /* RB compare */
+#define ATMEL_TC_CPCS (1 << 4) /* RC compare */
+#define ATMEL_TC_LDRAS (1 << 5) /* RA loading */
+#define ATMEL_TC_LDRBS (1 << 6) /* RB loading */
+#define ATMEL_TC_ETRGS (1 << 7) /* external trigger */
+
+#endif
--
1.5.4.1
From: David Brownell <[email protected]>
Clocksource and clockevent device based on the Atmel TC blocks.
The clockevent device handles both periodic and oneshot modes, so this
enables NO_HZ and high res timers on some platforms that previously
couldn't use those mechanisms.
This works on both AVR32 and AT91 chips, given relevant patches for
tclib support (always) and clockevents (or else this will only look
like a higher precision clocksource). It's an updated and modularized
version of an AT91-only patch that has circulated for some time now.
Changes relative to the original patch:
* Update to use new tclib API
* Replace open-coded do-while loop using goto with a real do-while loop
* Minor irq handler optimization: Load register base address from
dev_id instead of a global variable.
* Aggressively turn off clocks when the clockevent isn't being used
* Include the clockevent code on AT91RM9200 as well. The rating is
lower than the System Timer, so the clock will usually stay off.
* Don't assume that the number of clocks is always equal to the
number of irqs.
Signed-off-by: David Brownell <[email protected]>
Signed-off-by: Haavard Skinnemoen <[email protected]>
Cc: Nicolas Ferre <[email protected]>
Cc: Andrew Victor <[email protected]>
Cc: john stultz <[email protected]>
Cc: Thomas Gleixner <[email protected]>
---
This patch is meant to replace atmel_tc-clocksource-clockevent-code.patch
drivers/clocksource/Makefile | 1 +
drivers/clocksource/tcb_clksrc.c | 305 ++++++++++++++++++++++++++++++++++++++
drivers/misc/Kconfig | 25 +++
3 files changed, 331 insertions(+), 0 deletions(-)
create mode 100644 drivers/clocksource/tcb_clksrc.c
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index a522254..1525882 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
obj-$(CONFIG_X86_CYCLONE_TIMER) += cyclone.o
obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o
obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o
diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
new file mode 100644
index 0000000..17facda
--- /dev/null
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -0,0 +1,305 @@
+#include <linux/init.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/atmel_tc.h>
+
+
+/*
+ * We're configured to use a specific TC block, one that's not hooked
+ * up to external hardware, to provide a time solution:
+ *
+ * - Two channels combine to create a free-running 32 bit counter
+ * with a base rate of 5+ MHz, packaged as a clocksource (with
+ * resolution better than 200 nsec).
+ *
+ * - The third channel may be used to provide a 16-bit clockevent
+ * source, used in either periodic or oneshot mode. This runs
+ * at 32 KiHZ, and can handle delays of up to two seconds.
+ *
+ * A boot clocksource and clockevent source are also currently needed,
+ * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
+ * this code can be used when init_timers() is called, well before most
+ * devices are set up. (Some low end AT91 parts, which can run uClinux,
+ * have only the timers in one TC block... they currently don't support
+ * the tclib code, because of that initialization issue.)
+ *
+ * REVISIT behavior during system suspend states... we should disable
+ * all clocks and save the power. Easily done for clockevent devices,
+ * but clocksources won't necessarily get the needed notifications.
+ * For deeper system sleep states, this will be mandatory...
+ */
+
+static void __iomem *tcaddr;
+
+static cycle_t tc_get_cycles(void)
+{
+ unsigned long flags;
+ u32 lower, upper;
+
+ raw_local_irq_save(flags);
+ do {
+ upper = __raw_readl(tcaddr + ATMEL_TC_REG(1, CV));
+ lower = __raw_readl(tcaddr + ATMEL_TC_REG(0, CV));
+ } while (upper != __raw_readl(tcaddr + ATMEL_TC_REG(1, CV)));
+
+ raw_local_irq_restore(flags);
+ return (upper << 16) | lower;
+}
+
+static struct clocksource clksrc = {
+ .name = "tcb_clksrc",
+ .rating = 200,
+ .read = tc_get_cycles,
+ .mask = CLOCKSOURCE_MASK(32),
+ .shift = 18,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+
+struct tc_clkevt_device {
+ struct clock_event_device clkevt;
+ struct clk *clk;
+ void __iomem *regs;
+};
+
+static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt)
+{
+ return container_of(clkevt, struct tc_clkevt_device, clkevt);
+}
+
+/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
+ * because using one of the divided clocks would usually mean the
+ * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
+ *
+ * A divided clock could be good for high resolution timers, since
+ * 30.5 usec resolution can seem "low".
+ */
+static u32 timer_clock;
+
+static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
+{
+ struct tc_clkevt_device *tcd = to_tc_clkevt(d);
+ void __iomem *regs = tcd->regs;
+
+ if (tcd->clkevt.mode == CLOCK_EVT_MODE_PERIODIC
+ || tcd->clkevt.mode == CLOCK_EVT_MODE_ONESHOT) {
+ __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
+ __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
+ clk_disable(tcd->clk);
+ }
+
+ switch (m) {
+
+ /* By not making the gentime core emulate periodic mode on top
+ * of oneshot, we get lower overhead and improved accuracy.
+ */
+ case CLOCK_EVT_MODE_PERIODIC:
+ clk_enable(tcd->clk);
+
+ /* slow clock, count up to RC, then irq and restart */
+ __raw_writel(timer_clock
+ | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
+ regs + ATMEL_TC_REG(2, CMR));
+ __raw_writel((32768 + HZ/2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
+
+ /* Enable clock and interrupts on RC compare */
+ __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
+
+ /* go go gadget! */
+ __raw_writel(ATMEL_TC_CLKEN | ATMEL_TC_SWTRG,
+ regs + ATMEL_TC_REG(2, CCR));
+ break;
+
+ case CLOCK_EVT_MODE_ONESHOT:
+ clk_enable(tcd->clk);
+
+ /* slow clock, count up to RC, then irq and stop */
+ __raw_writel(timer_clock | ATMEL_TC_CPCSTOP
+ | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
+ regs + ATMEL_TC_REG(2, CMR));
+ __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
+
+ /* set_next_event() configures and starts the timer */
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int tc_next_event(unsigned long delta, struct clock_event_device *d)
+{
+ __raw_writel(delta, tcaddr + ATMEL_TC_REG(2, RC));
+
+ /* go go gadget! */
+ __raw_writel(ATMEL_TC_CLKEN | ATMEL_TC_SWTRG,
+ tcaddr + ATMEL_TC_REG(2, CCR));
+ return 0;
+}
+
+static struct tc_clkevt_device clkevt = {
+ .clkevt = {
+ .name = "tc_clkevt",
+ .features = CLOCK_EVT_FEAT_PERIODIC
+ | CLOCK_EVT_FEAT_ONESHOT,
+ .shift = 32,
+ /* Should be lower than at91rm9200's system timer */
+ .rating = 125,
+ .cpumask = CPU_MASK_CPU0,
+ .set_next_event = tc_next_event,
+ .set_mode = tc_mode,
+ },
+};
+
+static irqreturn_t ch2_irq(int irq, void *handle)
+{
+ struct tc_clkevt_device *dev = handle;
+ unsigned int sr;
+
+ sr = __raw_readl(dev->regs + ATMEL_TC_REG(2, SR));
+ if (sr & ATMEL_TC_CPCS) {
+ dev->clkevt.event_handler(&dev->clkevt);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static struct irqaction tc_irqaction = {
+ .name = "tc_clkevt",
+ .flags = IRQF_TIMER | IRQF_DISABLED,
+ .handler = ch2_irq,
+};
+
+static void __init setup_clkevents(struct atmel_tc *tc,
+ struct clk *t0_clk, int clk32k_divisor_idx)
+{
+ struct platform_device *pdev = tc->pdev;
+ struct clk *t2_clk = tc->clk[2];
+ int irq = tc->irq[2];
+
+ clkevt.regs = tc->regs;
+ clkevt.clk = t2_clk;
+ tc_irqaction.dev_id = &clkevt;
+
+ timer_clock = clk32k_divisor_idx;
+
+ clkevt.clkevt.mult = div_sc(32768, NSEC_PER_SEC, clkevt.clkevt.shift);
+ clkevt.clkevt.max_delta_ns
+ = clockevent_delta2ns(0xffff, &clkevt.clkevt);
+ clkevt.clkevt.min_delta_ns = clockevent_delta2ns(1, &clkevt.clkevt) + 1;
+
+ setup_irq(irq, &tc_irqaction);
+
+ clockevents_register_device(&clkevt.clkevt);
+}
+
+#else /* !CONFIG_GENERIC_CLOCKEVENTS */
+
+static void __init setup_clkevents(struct atmel_tc *tc,
+ struct clk *t0_clk, int clk32k_divisor_idx)
+{
+ /* NOTHING */
+}
+
+#endif
+
+static int __init tcb_clksrc_init(void)
+{
+ static char bootinfo[] __initdata
+ = KERN_DEBUG "%s: tc%d at %d.%03d MHz\n";
+
+ struct platform_device *pdev;
+ struct atmel_tc *tc;
+ struct clk *t0_clk, *t1_clk;
+ u32 rate, divided_rate = 0;
+ int best_divisor_idx = -1;
+ int clk32k_divisor_idx = -1;
+ int i;
+
+ tc = atmel_tc_alloc(CONFIG_ATMEL_TCB_CLKSRC_BLOCK, clksrc.name);
+ if (!tc) {
+ pr_debug("can't alloc TC for clocksource\n");
+ return -ENODEV;
+ }
+ tcaddr = tc->regs;
+ pdev = tc->pdev;
+
+ t0_clk = tc->clk[0];
+ clk_enable(t0_clk);
+
+ /* How fast will we be counting? Pick something over 5 MHz. */
+ rate = (u32) clk_get_rate(t0_clk);
+ for (i = 0; i < 5; i++) {
+ unsigned divisor = atmel_tc_divisors[i];
+ unsigned tmp;
+
+ /* remember 32 KiHz clock for later */
+ if (!divisor) {
+ clk32k_divisor_idx = i;
+ continue;
+ }
+
+ tmp = rate / divisor;
+ pr_debug("TC: %u / %-3u [%d] --> %u\n", rate, divisor, i, tmp);
+ if (best_divisor_idx > 0) {
+ if (tmp < 5 * 1000 * 1000)
+ continue;
+ }
+ divided_rate = tmp;
+ best_divisor_idx = i;
+ }
+
+ clksrc.mult = clocksource_hz2mult(divided_rate, clksrc.shift);
+
+ printk(bootinfo, clksrc.name, CONFIG_ATMEL_TCB_CLKSRC_BLOCK,
+ divided_rate / 1000000,
+ ((divided_rate + 500000) % 1000000) / 1000);
+
+ /* tclib will give us three clocks no matter what the
+ * underlying platform supports.
+ */
+ clk_enable(tc->clk[1]);
+
+ /* channel 0: waveform mode, input mclk/8, clock TIOA0 on overflow */
+ __raw_writel(best_divisor_idx /* likely divide-by-8 */
+ | ATMEL_TC_WAVE
+ | ATMEL_TC_WAVESEL_UP /* free-run */
+ | ATMEL_TC_ACPA_SET /* TIOA0 rises at 0 */
+ | ATMEL_TC_ACPC_CLEAR, /* (duty cycle 50%) */
+ tcaddr + ATMEL_TC_REG(0, CMR));
+ __raw_writel(0x0000, tcaddr + ATMEL_TC_REG(0, RA));
+ __raw_writel(0x8000, tcaddr + ATMEL_TC_REG(0, RC));
+ __raw_writel(0xff, tcaddr + ATMEL_TC_REG(0, IDR)); /* no irqs */
+ __raw_writel(ATMEL_TC_CLKEN, tcaddr + ATMEL_TC_REG(0, CCR));
+
+ /* channel 1: waveform mode, input TIOA0 */
+ __raw_writel(ATMEL_TC_XC1 /* input: TIOA0 */
+ | ATMEL_TC_WAVE
+ | ATMEL_TC_WAVESEL_UP, /* free-run */
+ tcaddr + ATMEL_TC_REG(1, CMR));
+ __raw_writel(0xff, tcaddr + ATMEL_TC_REG(1, IDR)); /* no irqs */
+ __raw_writel(ATMEL_TC_CLKEN, tcaddr + ATMEL_TC_REG(1, CCR));
+
+ /* chain channel 0 to channel 1, then reset all the timers */
+ __raw_writel(ATMEL_TC_TC1XC1S_TIOA0, tcaddr + ATMEL_TC_BMR);
+ __raw_writel(ATMEL_TC_SYNC, tcaddr + ATMEL_TC_BCR);
+
+ /* and away we go! */
+ clocksource_register(&clksrc);
+
+ /* channel 2: periodic and oneshot timer support */
+ setup_clkevents(tc, t0_clk, clk32k_divisor_idx);
+
+ return 0;
+}
+arch_initcall(tcb_clksrc_init);
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index b3ba681..4cf928e 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -30,6 +30,31 @@ config ATMEL_TCLIB
blocks found on many Atmel processors. This facilitates using
these blocks by different drivers despite processor differences.
+config ATMEL_TCB_CLKSRC
+ bool "TC Block Clocksource"
+ depends on ATMEL_TCLIB && GENERIC_TIME
+ default y
+ help
+ Select this to get a high precision clocksource based on a
+ TC block with a 5+ MHz base clock rate. Two timer channels
+ are combined to make a single 32-bit timer.
+
+ When GENERIC_CLOCKEVENTS is defined, the third timer channel
+ may be used as a clock event device supporting oneshot mode
+ (delays of up to two seconds) based on the 32 KiHz clock.
+
+config ATMEL_TCB_CLKSRC_BLOCK
+ int
+ depends on ATMEL_TCB_CLKSRC
+ prompt "TC Block" if ARCH_AT91RM9200 || ARCH_AT91SAM9260 || CPU_AT32AP700X
+ default 0
+ range 0 1
+ help
+ Some chips provide more than one TC block, so you have the
+ choice of which one to use for the clock framework. The other
+ TC can be used for other purposes, such as PWM generation and
+ interval timing.
+
config IBM_ASM
tristate "Device driver for IBM RSA service processor"
depends on X86 && PCI && INPUT && EXPERIMENTAL
--
1.5.4.1
Hello Haavard,
Good work on improving this patch. I am very happy with the accurate
clocksource devices, I use the patches from David for quite some time
now.
But, I noticed this comment in the patch:
> +/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
> + * because using one of the divided clocks would usually mean the
> + * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
> + *
> + * A divided clock could be good for high resolution timers, since
> + * 30.5 usec resolution can seem "low".
I want to comment on that.
I noticed that on rm9200 the timer interrupt handler itself can cost
about 50us to more than 100usec in itself (measured through ETM
trace), combined with a worst case interrupt latency of about 75us.
So, a higher resolution than 30usec is useless on these cores, and
setting timers on a frequency that high will just choke the processor
in only handling timers...
To prevent applications to start timers too fast after each other, it
is even useful to limit the granularity of these timers, and to get
them aligned so that they can be handled on just 1 interrupt. (Hmm,
this sounds like the behavior of a ticking kernel ;-))
This is the reason why I stopped using the High-resolution timer
framework on these cores. It just overloads the CPU too much when
several applications use independent timers that are not in sync with
each other. A ticking kernel is therefor more CPU-load friendly...
Kind Regards,
Remy
On Tuesday 04 March 2008, Remy Bohmer wrote:
> > +/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
> > + * because using one of the divided clocks would usually mean the
> > + * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
> > + *
> > + * A divided clock could be good for high resolution timers, since
> > + * 30.5 usec resolution can seem "low".
>
> I want to comment on that.
>
> I noticed that on rm9200 the timer interrupt handler itself can cost
> about 50us to more than 100usec in itself (measured through ETM
> trace), combined with a worst case interrupt latency of about 75us.
Could you elaborate on where that 50-100 usec gets spent? Does
the same issue happpen with the $SUBJECT patch (if you tweak the
clocksource ratings to use its clockevents on rm9200)?
I'd not think the timer IRQ logic accounts for much of that time,
and the rest of the instruction cycles would seem to be either
in genirq or gentime. Unless the issue is that accessing that
particular hardware is slow, with clockdomain crossing etc ...
or that AT91 has way too many handlers sitting on IRQ-1.
If it's a gentime or genirq issue, that's somewhat amenable to
fixing ... and it would also impact the $SUBJECT patch both on
other AT91 ARM9 cores and on the AVR32 AP7 cores.
> So, a higher resolution than 30usec is useless on these cores, and
> setting timers on a frequency that high will just choke the processor
> in only handling timers...
Should the min_delta_ns be increased in at91rm9200_time.c then?
Right now, as you probably recall, it's at the lowest value
needed for correctness: a smidgeon over two ticks (~ 72 nsec).
- Dave
Hello David,
> Could you elaborate on where that 50-100 usec gets spent?
Attached I have put a screendump of my ETM debugger. It shows a
complete flow of kernel function-calls of what happens on a timer
interrupt. In this example the complete sequence takes about 154 us.
Notice that the ETM is non-intrusive, and that the times are real and
accurate in this trace. (you can even see the effects of CPU-caches,
sometimes the same code just runs faster)
It is based on 2.6.23.12-rt14, with a ticking kernel, I have attached
the .config of the kernel under trace also. The CPU is running at
160/80Mhz.
I have some custom patches to the kernel, but none of them effect this
particular trace.
> Does the same issue happpen with the $SUBJECT patch (if you tweak the
> clocksource ratings to use its clockevents on rm9200)?
not tested yet, but I will generate a trace for it, I will post it later.
> I'd not think the timer IRQ logic accounts for much of that time,
> and the rest of the instruction cycles would seem to be either
> in genirq or gentime.
There is more to it than just the genIRQ mechanism. The softirqs are
kicked, the scheduler is triggered and so on. It is a waterfall of
events that happen, just by having a timer interrupt.
> Unless the issue is that accessing that
> particular hardware is slow, with clockdomain crossing etc ...
> or that AT91 has way too many handlers sitting on IRQ-1.
> If it's a gentime or genirq issue, that's somewhat amenable to
> fixing ... and it would also impact the $SUBJECT patch both on
> other AT91 ARM9 cores and on the AVR32 AP7 cores.
> > So, a higher resolution than 30usec is useless on these cores, and
> > setting timers on a frequency that high will just choke the processor
> > in only handling timers...
> Should the min_delta_ns be increased in at91rm9200_time.c then?
Maybe it should be configurable for these kinds of CPUs?
If a CPU is infinite fast, one could probably want a HRT resolution of zero.
But, no CPUs are that fast, and I assume there is always a need to get
a good balance between timer resolution compared to CPU load.
We probably do not want to cause a DoS by building some application
that starts (periodic) timers that fast, that the CPU is doing nothing
more than just that. To me, it appears that the HRT framework is going
that route here. It seems to me now, that if it is technically
possible to create a microsec accurate timer-framework, we make them,
regardless of it fits the rest of the system.
Notice that I also fell in this pitfall while using HRT, and I only
wanted an application that made a 1ms accurate timer... Other
processes/daemons in the system also uses timers, which eventually
resulted in intervals in the sub-millisec range, and thus due to the
overhead that will bring tot the system, the CPU-load just goes
sky-high, doing actually nothing really special.
So, hires timestamps -> really really welcome.
hires timers -> there should be a (configurable) minimal resolution
that fits the hardware to not overload the CPU.
> Right now, as you probably recall, it's at the lowest value
> needed for correctness: a smidgeon over two ticks (~ 72 nsec).
I remember...
Kind Regards,
Remy
On Wednesday 05 March 2008, Remy Bohmer wrote:
> Hello David,
>
> > Could you elaborate on where that 50-100 usec gets spent?
>
> Attached I have put a screendump of my ETM debugger. It shows a
> complete flow of kernel function-calls of what happens on a timer
> interrupt. In this example the complete sequence takes about 154 us.
Thanks -- this is quite informative. (Presumably it'd look similar
using NO_HZ too: hardly any overhead is hardware-specific.)
An ETM trace is really nice for this kind of stuff; it'd be nice
if such tech were more widely available! (Built into most ARM cores
and all that ... but the hardware and software tools to access the
data aren't as available.)
> Notice that the ETM is non-intrusive, and that the times are real and
> accurate in this trace. (you can even see the effects of CPU-caches,
> sometimes the same code just runs faster)
Yeah, the intrusive schemes (like automatic probe insertion) perturb
timings at this level.
> > Does the same issue happpen with the $SUBJECT patch (if you tweak the
> > clocksource ratings to use its clockevents on rm9200)?
>
> not tested yet, but I will generate a trace for it, I will post it later.
Based on how little of that time was spent in the rm9200 clockevent
code -- I'll be generous and call it 10 usec -- I'd can't imagine that
could make much of a real difference.
> There is more to it than just the genIRQ mechanism. The softirqs are
> kicked, the scheduler is triggered and so on. It is a waterfall of
> events that happen, just by having a timer interrupt.
Right.
> > Should the min_delta_ns be increased in at91rm9200_time.c then?
>
> Maybe it should be configurable for these kinds of CPUs?
It shouldn't require tweaking individual clockevent devices, or
IMO be specific to e.g. lower powered CPUs ... but a global
min_delta_ns would be easy to implement, and might help.
That'd resemble what the init_timer_deferrable() mechanism
achieves, but the scale for bunching timers would be fine
not coarse.
> Notice that I also fell in this pitfall while using HRT, and I only
> wanted an application that made a 1ms accurate timer... Other
> processes/daemons in the system also uses timers, which eventually
> resulted in intervals in the sub-millisec range, and thus due to the
> overhead that will bring tot the system, the CPU-load just goes
> sky-high, doing actually nothing really special.
In your case, maybe a global min_delta_ns of 1000 * 1000 would
help ... combine with NO_HZ and you'd get the accuracy you need,
with reduced scheduling overhead. Sound about right?
- Dave
> So, hires timestamps -> really really welcome.
> hires timers -> there should be a (configurable) minimal resolution
> that fits the hardware to not overload the CPU.
>
> > Right now, as you probably recall, it's at the lowest value
> > needed for correctness: a smidgeon over two ticks (~ 72 nsec).
>
> I remember...
>
> Kind Regards,
>
> Remy
>
On Wed, 5 Mar 2008, Remy Bohmer wrote:
> Hello David,
>
> > Could you elaborate on where that 50-100 usec gets spent?
>
> Attached I have put a screendump of my ETM debugger. It shows a
> complete flow of kernel function-calls of what happens on a timer
> interrupt. In this example the complete sequence takes about 154 us.
> Notice that the ETM is non-intrusive, and that the times are real and
> accurate in this trace. (you can even see the effects of CPU-caches,
> sometimes the same code just runs faster)
Is there any chance to convert this to a text table? Following that
png is pretty hard.
> So, hires timestamps -> really really welcome.
> hires timers -> there should be a (configurable) minimal resolution
> that fits the hardware to not overload the CPU.
clockevents let you set a minimum delta already. This can be set at
runtime before registering the device.
Thanks,
tglx
On Wednesday 05 March 2008, Thomas Gleixner wrote:
> > So, hires timestamps -> really really welcome.
> > hires timers -> there should be a (configurable) minimal resolution
> > that fits the hardware to not overload the CPU.
>
> clockevents let you set a minimum delta already. This can be set at
> runtime before registering the device.
But the overhead isn't specific to any given clockevent
device. The same issue was reported on other ARMs.
The acceptable overhead is a function of system-specific
factors including load and CPU clock rate, not of a given
clockevent device.
I wouldn't think teaching multiple such drivers about such
issues could be better than having clockevents_program_event()
consider a system-specific lower bound ...
- Dave
==============
Add a "min_delta_ns" kernel parameter to help systems avoid excess
scheduling and timer overheads. As with init_timer_deferrable(),
this batches timer IRQs to reduce those overheads.
For example, on one ARM9 platform the timer IRQ takes 2 usec, but the
system then needs over 150 usec to handle scheduler-related tasks.
On such a system, applications using many high resolution timers can
work better if min_delta_ns is used to avoid scheduling timer IRQs
so often that no other work gets done.
---
Documentation/kernel-parameters.txt | 7 +++++++
kernel/time/clockevents.c | 15 +++++++++++++++
2 files changed, 22 insertions(+)
--- a/Documentation/kernel-parameters.txt 2008-02-23 11:36:13.000000000 -0800
+++ b/Documentation/kernel-parameters.txt 2008-03-05 12:58:20.000000000 -0800
@@ -1146,6 +1146,13 @@ and is between 256 and 4096 characters.
mga= [HW,DRM]
+ min_delta_ns=n [GENERIC_TIME] Defines the minimum amount of time
+ that oneshot clockevent sources will be asked to
+ sleep. This is measured in nanoseconds, and may be
+ overridden by the clockevent device. You probably
+ want this number to be more than your system's timer
+ IRQ overhead.
+
mousedev.tap_time=
[MOUSE] Maximum time between finger touching and
leaving touchpad surface for touch to be considered
--- a/kernel/time/clockevents.c 2008-02-10 15:48:29.000000000 -0800
+++ b/kernel/time/clockevents.c 2008-03-05 12:38:14.000000000 -0800
@@ -29,6 +29,18 @@ static RAW_NOTIFIER_HEAD(clockevents_cha
/* Protection for the above */
static DEFINE_SPINLOCK(clockevents_lock);
+/* This is the lower bound on oneshot timer intervals; setting its
+ * value too low can maintain HRT overhead at objectionable levels.
+ */
+static unsigned long min_delta_ns;
+
+static int __init set_min_delta_ns(char *str)
+{
+ simple_strtoul(str, NULL, 0);
+ return 1;
+}
+__setup("min_delta_ns", set_min_delta_ns);
+
/**
* clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
* @latch: value to convert
@@ -98,6 +110,9 @@ int clockevents_program_event(struct clo
if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
return 0;
+ if (delta < min_delta_ns)
+ delta = min_delta_ns;
+
if (delta > dev->max_delta_ns)
delta = dev->max_delta_ns;
if (delta < dev->min_delta_ns)
Hello Thomas,
> > Attached I have put a screendump of my ETM debugger. It shows a
> > complete flow of kernel function-calls of what happens on a timer
> > interrupt. In this example the complete sequence takes about 154 us.
> > Notice that the ETM is non-intrusive, and that the times are real and
> > accurate in this trace. (you can even see the effects of CPU-caches,
> > sometimes the same code just runs faster)
>
> Is there any chance to convert this to a text table? Following that
> png is pretty hard.
I will see what I can do, but this was the easiest way (it was just a
print-screen;-) )
But, will text-dump make it more clear? It could also contain the time
each assembler instruction will take behind the routines...
But, I will look into that tomorrow. (approx. 12 hours from now)
> > So, hires timestamps -> really really welcome.
> > hires timers -> there should be a (configurable) minimal resolution
> > that fits the hardware to not overload the CPU.
>
> clockevents let you set a minimum delta already. This can be set at
> runtime before registering the device.
But, I want to expose a bigger risk here.
Apparently it is possible that a non privileged user can overload the
system easily, by starting a high frequency periodic timer. The system
will be that busy handling that timer that the system stops
responding, thus it will result in some kind of Denial-of-Service
situation, even on X86.
I think that there should be a hard limit to prevent starting timers
at higher frequencies than they can be handled by the platform,
included the scheduler overhead.
David has proposed a solution via the kernel-commandline, but I think
it should be a hard coded limit, to prevent that it is forgotten by
the end-user to put it on the kernel-cmd-line. Maybe a sub-option that
gets visible when HRT is enabled in menuconfig, this option should be
default very conservative, and the user should think about the lower
bound he really needs before he really gets HRT enabled.
What do you think of this? (I can propose a patch for this tomorrow)
Kind Regards,
Remy
On Wed, 5 Mar 2008, Remy Bohmer wrote:
> Hello Thomas,
>
> > > Attached I have put a screendump of my ETM debugger. It shows a
> > > complete flow of kernel function-calls of what happens on a timer
> > > interrupt. In this example the complete sequence takes about 154 us.
> > > Notice that the ETM is non-intrusive, and that the times are real and
> > > accurate in this trace. (you can even see the effects of CPU-caches,
> > > sometimes the same code just runs faster)
> >
> > Is there any chance to convert this to a text table? Following that
> > png is pretty hard.
>
> I will see what I can do, but this was the easiest way (it was just a
> print-screen;-) )
> But, will text-dump make it more clear? It could also contain the time
> each assembler instruction will take behind the routines...
text files can be parsed by scripts :)
> But, I will look into that tomorrow. (approx. 12 hours from now)
thnx
> > > So, hires timestamps -> really really welcome.
> > > hires timers -> there should be a (configurable) minimal resolution
> > > that fits the hardware to not overload the CPU.
> >
> > clockevents let you set a minimum delta already. This can be set at
> > runtime before registering the device.
>
> But, I want to expose a bigger risk here.
> Apparently it is possible that a non privileged user can overload the
> system easily, by starting a high frequency periodic timer. The system
> will be that busy handling that timer that the system stops
> responding, thus it will result in some kind of Denial-of-Service
> situation, even on X86.
Wrong. The rearm condition is that the task is rescheduled. On any
platform.
Thanks,
tglx