2019-04-23 05:06:17

by Daniel Drake

[permalink] [raw]
Subject: [PATCH 1/2] x86/time: check usability of IRQ0 PIT timer

Modern Intel SoCs now include a special ITSSPRC register that can be
used to "gate" the PIT such that IRQ0 interrupts do not fire.

With Intel Apollo Lake we are starting to see consumer products that
have a BIOS option to apply this (defaulting to gated). Some such
products also lack the HPET ACPI table, so there is no HPET either.

At this point, Linux needs to stop assuming that the IRQ0 timer is
available.

Move APIC code to check IRQ0 to time.c, then check and record the IRQ0
PIT timer usability after it is set up. If it does not produce any
interrupts, unregister the clock event source.

Signed-off-by: Daniel Drake <[email protected]>
Link: https://lkml.kernel.org/r/CAD8Lp45fedoPLnK=UmUhhtkjy5u2h04sYKrx3U+m04U6FpVZ4A@mail.gmail.com
---
arch/x86/include/asm/time.h | 2 +
arch/x86/kernel/apic/io_apic.c | 101 ++++---------------------------
arch/x86/kernel/i8253.c | 6 ++
arch/x86/kernel/time.c | 106 ++++++++++++++++++++++++++++++++-
drivers/clocksource/i8253.c | 6 ++
include/linux/clockchips.h | 3 +
include/linux/i8253.h | 2 +
kernel/time/tick-internal.h | 2 -
8 files changed, 134 insertions(+), 94 deletions(-)

diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index cef818b16045..e6e00d18b39f 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -10,4 +10,6 @@ extern void time_init(void);

extern struct clock_event_device *global_clock_event;

+extern bool irq0_timer_works(void);
+
#endif /* _ASM_X86_TIME_H */
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 53aa234a6803..ae46da48c07b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -57,6 +57,7 @@
#include <asm/proto.h>
#include <asm/acpi.h>
#include <asm/dma.h>
+#include <asm/time.h>
#include <asm/timer.h>
#include <asm/i8259.h>
#include <asm/setup.h>
@@ -1573,92 +1574,6 @@ void __init setup_ioapic_ids_from_mpc(void)
}
#endif

-int no_timer_check __initdata;
-
-static int __init notimercheck(char *s)
-{
- no_timer_check = 1;
- return 1;
-}
-__setup("no_timer_check", notimercheck);
-
-static void __init delay_with_tsc(void)
-{
- unsigned long long start, now;
- unsigned long end = jiffies + 4;
-
- start = rdtsc();
-
- /*
- * We don't know the TSC frequency yet, but waiting for
- * 40000000000/HZ TSC cycles is safe:
- * 4 GHz == 10 jiffies
- * 1 GHz == 40 jiffies
- */
- do {
- rep_nop();
- now = rdtsc();
- } while ((now - start) < 40000000000ULL / HZ &&
- time_before_eq(jiffies, end));
-}
-
-static void __init delay_without_tsc(void)
-{
- unsigned long end = jiffies + 4;
- int band = 1;
-
- /*
- * We don't know any frequency yet, but waiting for
- * 40940000000/HZ cycles is safe:
- * 4 GHz == 10 jiffies
- * 1 GHz == 40 jiffies
- * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094
- */
- do {
- __delay(((1U << band++) * 10000000UL) / HZ);
- } while (band < 12 && time_before_eq(jiffies, end));
-}
-
-/*
- * There is a nasty bug in some older SMP boards, their mptable lies
- * about the timer IRQ. We do the following to work around the situation:
- *
- * - timer IRQ defaults to IO-APIC IRQ
- * - if this function detects that timer IRQs are defunct, then we fall
- * back to ISA timer IRQs
- */
-static int __init timer_irq_works(void)
-{
- unsigned long t1 = jiffies;
- unsigned long flags;
-
- if (no_timer_check)
- return 1;
-
- local_save_flags(flags);
- local_irq_enable();
-
- if (boot_cpu_has(X86_FEATURE_TSC))
- delay_with_tsc();
- else
- delay_without_tsc();
-
- local_irq_restore(flags);
-
- /*
- * Expect a few ticks at least, to be sure some possible
- * glue logic does not lock up after one or two first
- * ticks in a non-ExtINT mode. Also the local APIC
- * might have cached one ExtINT interrupt. Finally, at
- * least one tick may be lost due to delays.
- */
-
- /* jiffies wrap? */
- if (time_after(jiffies, t1 + 4))
- return 1;
- return 0;
-}
-
/*
* In the SMP+IOAPIC case it might happen that there are an unspecified
* number of pending IRQ events unhandled. These cases are very rare,
@@ -2066,6 +1981,12 @@ static int mp_alloc_timer_irq(int ioapic, int pin)
}

/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ *
* This code may look a bit paranoid, but it's supposed to cooperate with
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
* is so screwy. Thanks to Brian Perkins for testing/hacking this beast
@@ -2145,7 +2066,7 @@ static inline void __init check_timer(void)
}
irq_domain_deactivate_irq(irq_data);
irq_domain_activate_irq(irq_data, false);
- if (timer_irq_works()) {
+ if (irq0_timer_works()) {
if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(0, pin1);
goto out;
@@ -2168,7 +2089,7 @@ static inline void __init check_timer(void)
irq_domain_deactivate_irq(irq_data);
irq_domain_activate_irq(irq_data, false);
legacy_pic->unmask(0);
- if (timer_irq_works()) {
+ if (irq0_timer_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
goto out;
}
@@ -2188,7 +2109,7 @@ static inline void __init check_timer(void)
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
legacy_pic->unmask(0);

- if (timer_irq_works()) {
+ if (irq0_timer_works()) {
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
@@ -2206,7 +2127,7 @@ static inline void __init check_timer(void)

unlock_ExtINT_logic();

- if (timer_irq_works()) {
+ if (irq0_timer_works()) {
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 0d307a657abb..0115ebff38f5 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -24,6 +24,12 @@ void __init setup_pit_timer(void)
global_clock_event = &i8253_clockevent;
}

+void __init remove_pit_timer(void)
+{
+ clockevent_i8253_exit();
+ global_clock_event = NULL;
+}
+
#ifndef CONFIG_X86_64
static int __init init_pit_clocksource(void)
{
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 0e14f6c0d35e..abf634d391eb 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -25,6 +25,15 @@
#include <asm/hpet.h>
#include <asm/time.h>

+int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+ no_timer_check = 1;
+ return 1;
+}
+__setup("no_timer_check", notimercheck);
+
#ifdef CONFIG_X86_64
__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES;
#endif
@@ -54,6 +63,77 @@ unsigned long profile_pc(struct pt_regs *regs)
}
EXPORT_SYMBOL(profile_pc);

+static void __init delay_with_tsc(void)
+{
+ unsigned long long start, now;
+ unsigned long end = jiffies + 4;
+
+ start = rdtsc();
+
+ /*
+ * We don't know the TSC frequency yet, but waiting for
+ * 40000000000/HZ TSC cycles is safe:
+ * 4 GHz == 10 jiffies
+ * 1 GHz == 40 jiffies
+ */
+ do {
+ rep_nop();
+ now = rdtsc();
+ } while ((now - start) < 40000000000ULL / HZ &&
+ time_before_eq(jiffies, end));
+}
+
+static void __init delay_without_tsc(void)
+{
+ unsigned long end = jiffies + 4;
+ int band = 1;
+
+ /*
+ * We don't know any frequency yet, but waiting for
+ * 40940000000/HZ cycles is safe:
+ * 4 GHz == 10 jiffies
+ * 1 GHz == 40 jiffies
+ * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094
+ */
+ do {
+ __delay(((1U << band++) * 10000000UL) / HZ);
+ } while (band < 12 && time_before_eq(jiffies, end));
+}
+
+/*
+ * Test if the IRQ0 timer is working by delaying a short while and
+ * checking that jiffies has incremented.
+ */
+bool __init irq0_timer_works(void)
+{
+ unsigned long t1 = jiffies;
+ unsigned long flags;
+
+ if (no_timer_check)
+ return true;
+
+ local_save_flags(flags);
+ local_irq_enable();
+
+ if (boot_cpu_has(X86_FEATURE_TSC))
+ delay_with_tsc();
+ else
+ delay_without_tsc();
+
+ local_irq_restore(flags);
+
+ /*
+ * Expect a few ticks at least, to be sure some possible
+ * glue logic does not lock up after one or two first
+ * ticks in a non-ExtINT mode. Also the local APIC
+ * might have cached one ExtINT interrupt. Finally, at
+ * least one tick may be lost due to delays.
+ */
+
+ /* jiffies wrap? */
+ return time_after(jiffies, t1 + 4);
+}
+
/*
* Default timer interrupt handler for PIT/HPET
*/
@@ -79,12 +159,34 @@ static void __init setup_default_timer_irq(void)
pr_info("Failed to register legacy timer interrupt\n");
}

+static void __init remove_default_timer_irq(void)
+{
+ remove_irq(0, &irq0);
+}
+
/* Default timer init function */
void __init hpet_time_init(void)
{
- if (!hpet_enable())
- setup_pit_timer();
+ if (hpet_enable()) {
+ setup_default_timer_irq();
+ return;
+ }
+
+ /* Fall back on legacy 8253 PIT */
+ setup_pit_timer();
setup_default_timer_irq();
+
+ /*
+ * Intel SoCs like ApolloLake, Skylake and newer can have
+ * their PIT "gated" by the BIOS such that IRQ0 does not
+ * tick. Check for that situation here.
+ */
+ if (!irq0_timer_works()) {
+ pr_info("HPET is not available, and 8253 timer is not working. "
+ "Continuing without IRQ0 timer.\n");
+ remove_default_timer_irq();
+ remove_pit_timer();
+ }
}

static __init void x86_late_time_init(void)
diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c
index d4350bb10b83..4ca4bb44f6dd 100644
--- a/drivers/clocksource/i8253.c
+++ b/drivers/clocksource/i8253.c
@@ -193,4 +193,10 @@ void __init clockevent_i8253_init(bool oneshot)
clockevents_config_and_register(&i8253_clockevent, PIT_TICK_RATE,
0xF, 0x7FFF);
}
+
+void __init clockevent_i8253_exit(void)
+{
+ clockevents_switch_state(&i8253_clockevent, CLOCK_EVT_STATE_DETACHED);
+ clockevents_unbind_device(&i8253_clockevent, smp_processor_id());
+}
#endif
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 8ae9a95ebf5b..c54d20272e8c 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -187,6 +187,9 @@ extern void clockevents_config_and_register(struct clock_event_device *dev,
u32 freq, unsigned long min_delta,
unsigned long max_delta);

+extern void clockevents_switch_state(struct clock_event_device *dev,
+ enum clock_event_state state);
+
extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq);

static inline void
diff --git a/include/linux/i8253.h b/include/linux/i8253.h
index 8336b2f6f834..55e620eabd36 100644
--- a/include/linux/i8253.h
+++ b/include/linux/i8253.h
@@ -24,7 +24,9 @@ extern raw_spinlock_t i8253_lock;
extern bool i8253_clear_counter_on_shutdown;
extern struct clock_event_device i8253_clockevent;
extern void clockevent_i8253_init(bool oneshot);
+extern void clockevent_i8253_exit(void);

extern void setup_pit_timer(void);
+extern void remove_pit_timer(void);

#endif /* __LINUX_I8253_H */
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index e277284c2831..00a74a8fd8f2 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -51,8 +51,6 @@ static inline void clockevent_set_state(struct clock_event_device *dev,
extern void clockevents_shutdown(struct clock_event_device *dev);
extern void clockevents_exchange_device(struct clock_event_device *old,
struct clock_event_device *new);
-extern void clockevents_switch_state(struct clock_event_device *dev,
- enum clock_event_state state);
extern int clockevents_program_event(struct clock_event_device *dev,
ktime_t expires, bool force);
extern void clockevents_handle_noop(struct clock_event_device *dev);
--
2.19.1


2019-04-23 05:06:17

by Daniel Drake

[permalink] [raw]
Subject: [PATCH 2/2] x86/ioapic: avoid timer manipulation when IRQ0 timer is unavailable

New products based on Intel Apollo Lake are appearing where the HPET is
not present in ACPI, and the legacy 8254 PIT is "gated" by default in
the BIOS setup menu.

This leads an early boot "IO-APIC + timer doesn't work!" kernel panic
on a black screen (before the framebuffer is initialized).

Avoid the IO-APIC IRQ0 timer manipulation & verification on platforms
where the legacy IRQ0 timer has been determined as unavailable.

This fixes boot on Connex L1430 and Scope SN116PYA with default BIOS
settings.

Signed-off-by: Daniel Drake <[email protected]>
Link: https://lkml.kernel.org/r/CAD8Lp45fedoPLnK=UmUhhtkjy5u2h04sYKrx3U+m04U6FpVZ4A@mail.gmail.com
---
arch/x86/kernel/apic/io_apic.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ae46da48c07b..2d29c62abbcb 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2243,7 +2243,7 @@ void __init setup_IO_APIC(void)
sync_Arb_IDs();
setup_IO_APIC_irqs();
init_IO_APIC_traps();
- if (nr_legacy_irqs())
+ if (global_clock_event && nr_legacy_irqs())
check_timer();

ioapic_initialized = 1;
--
2.19.1

2019-04-25 10:03:50

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/time: check usability of IRQ0 PIT timer

On Tue, 23 Apr 2019, Daniel Drake wrote:
> /* Default timer init function */
> void __init hpet_time_init(void)
> {
> - if (!hpet_enable())
> - setup_pit_timer();
> + if (hpet_enable()) {
> + setup_default_timer_irq();
> + return;
> + }
> +
> + /* Fall back on legacy 8253 PIT */
> + setup_pit_timer();
> setup_default_timer_irq();
> +
> + /*
> + * Intel SoCs like ApolloLake, Skylake and newer can have
> + * their PIT "gated" by the BIOS such that IRQ0 does not
> + * tick. Check for that situation here.
> + */
> + if (!irq0_timer_works()) {

So you rely on the fact that the legacy PIC delivery is working
here. That's fragile at best especially when this is a boot of a crash
kernel.

> + pr_info("HPET is not available, and 8253 timer is not working. "
> + "Continuing without IRQ0 timer.\n");
> + remove_default_timer_irq();
> + remove_pit_timer();
> + }
> +
> +void __init clockevent_i8253_exit(void)
> +{
> + clockevents_switch_state(&i8253_clockevent, CLOCK_EVT_STATE_DETACHED);

Smart, but broken. The PIT clockevent is still referenced in the
clockevents core code after the unbind. The unbind logic is there for a
reason and just because the above duct tape does not explode in your face
does not make it more correct. :)

> + clockevents_unbind_device(&i8253_clockevent, smp_processor_id());

We really want to avoid the whole register and whatever dance at all for
these devices. Let me think about it some more.

Thanks,

tglx