2007-08-20 16:56:21

by Konstantin Baydarov

[permalink] [raw]
Subject: [PATCH] kexec: reenable HPET before kexec

Hi, I've faced problem:
I have two x86_64 kernels with HPET enabled:
kernel 1 - with PM enabled,
kernel 2 - with PM disabled.
When I execute kernel 2 from kernel 1 on pentiumd based PC, kernel 2 hangs
during boot:
[email protected]:~#
[email protected]:~# ./kexec.sh ./ko_bzImage_x86_64_nopm
+ kexec -l ././ko_bzImage_x86_64_nopm '--command-line=kdb=on kdb=early
apic=debug nmi_watchdog=0 console=ttyS0
,115200 ip=bootp root=/dev/nfs rw'
+ kexec -e
md: stopping all md devices.
sd 1:0:0:0: [sda] Synchronizing SCSI cache
ACPI: PCI interrupt for device 0000:01:00.0 disabled
Starting new kernel
Linux version 2.6.23-rc2 ([email protected]) (gcc version 4.2.0
(MontaVista 4.2.0-8.0.0.0703430 2
007-06-22)) #3 SMP PREEMPT Mon Aug 20 20:26:17 MSD 2007
Command line: kdb=on kdb=early apic=debug nmi_watchdog=0 console=ttyS0,115200
ip=bootp root=/dev/nfs rw
BIOS-provided physical RAM map:
BIOS-e820: 0000000000000100 - 000000000009fc00 (usable)
BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved)
BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
BIOS-e820: 0000000000100000 - 000000001f790000 (usable)
BIOS-e820: 000000001f790000 - 000000001f79e000 (ACPI data)
BIOS-e820: 000000001f79e000 - 000000001f7e0000 (ACPI NVS)
BIOS-e820: 000000001f7e0000 - 000000001f800000 (reserved)
BIOS-e820: 00000000ffb80000 - 0000000100000000 (reserved)
end_pfn_map = 1048576
DMI 2.3 present.
Zone PFN ranges:
DMA 1 -> 4096
DMA32 4096 -> 1048576
Normal 1048576 -> 1048576
Movable zone start PFN for each node
early_node_map[2] active PFN ranges
0: 1 -> 159
0: 256 -> 128912
Intel MultiProcessor Specification v1.4
MPTABLE: OEM ID: INTEL MPTABLE: Product ID: MPTABLE: APIC at: 0xFEE00000
Processor #0 (Bootup-CPU)
Processor #1
I/O APIC #2 at 0xFEC00000.
Setting APIC routing to flat
Processors: 2
mapped APIC to ffffffffff5fb000 ( fee00000)
mapped IOAPIC to ffffffffff5fa000 (00000000fec00000)
Allocating PCI resources starting at 20000000 (gap: 1f800000:e0380000)
PERCPU: Allocating 31696 bytes of per cpu data
Built 1 zonelists in Zone order. Total pages: 125250
Kernel command line: kdb=on kdb=early apic=debug nmi_watchdog=0
console=ttyS0,115200 ip=bootp root=/dev/nfs rw
Initializing CPU#0
PID hash table entries: 2048 (order: 11, 16384 bytes)
time.c: Detected 3000.006 MHz processor.
Console: colour VGA+ 80x25
console [ttyS0] enabled
Dentry cache hash table entries: 65536 (order: 7, 524288 bytes)
Inode-cache hash table entries: 32768 (order: 6, 262144 bytes)
Checking aperture...
Memory: 500048k/515648k available (4577k kernel code, 15136k reserved, 1755k
data, 280k init)
-------------------------------- kernel hangs here --------------------------

Root case:
When kernel 1 switches to Local APIC timer It disables HPET.
When kernel 1 executes kernel 2, HPET isn't reenabled early during boot, because
PM and ACPI is disabled and kernel 2 doesn't search for HPET in ACPI tables.
HPET is disabled, so kernel enables PIT timer, but it doesn't work on pentiumd
(don't know why).
HPET is disabled, PIT isn't work - IRQ0 isn't triggered jiffies isn't
incremented so kernel hangs in calibrate_delay().

How solved:
I reenable HPET timer in machine_kexec() before switching to kernel 2.

Also on some machines I can reproduce bug with i386 kernel from kernel.org, so
I i386 kernel might be fixed too, so I'm adding lkml to cc. Thanks.

Patch against patch-2.6.23-rc2-rt2.

Signed-off-by: Konstantin Baydarov <[email protected]>

Index: linux-2.6.23-rc2-kexec-pm/arch/i386/kernel/hpet.c
===================================================================
--- linux-2.6.23-rc2-kexec-pm.orig/arch/i386/kernel/hpet.c
+++ linux-2.6.23-rc2-kexec-pm/arch/i386/kernel/hpet.c
@@ -443,6 +443,22 @@ static __init int hpet_late_init(void)
}
fs_initcall(hpet_late_init);

+#ifdef CONFIG_KEXEC
+/*
+ * reenable HPET timer
+ */
+int hpet_reenable(void)
+{
+ if (!is_hpet_capable())
+ return -1;
+
+ if (hpet_clockevent.mode != CLOCK_EVT_MODE_PERIODIC)
+ hpet_legacy_set_mode(CLOCK_EVT_MODE_PERIODIC, &hpet_clockevent);
+
+ return 0;
+}
+#endif
+
#ifdef CONFIG_HPET_EMULATE_RTC

/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
Index: linux-2.6.23-rc2-kexec-pm/arch/x86_64/kernel/machine_kexec.c
===================================================================
--- linux-2.6.23-rc2-kexec-pm.orig/arch/x86_64/kernel/machine_kexec.c
+++ linux-2.6.23-rc2-kexec-pm/arch/x86_64/kernel/machine_kexec.c
@@ -14,6 +14,7 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/io.h>
+#include <asm/hpet.h>

#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
static u64 kexec_pgd[512] PAGE_ALIGNED;
@@ -183,6 +184,10 @@ NORET_TYPE void machine_kexec(struct kim
unsigned long page_list[PAGES_NR];
void *control_page;

+#ifdef CONFIG_HPET_TIMER
+ hpet_reenable();
+#endif
+
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();

Index: linux-2.6.23-rc2-kexec-pm/arch/i386/kernel/machine_kexec.c
===================================================================
--- linux-2.6.23-rc2-kexec-pm.orig/arch/i386/kernel/machine_kexec.c
+++ linux-2.6.23-rc2-kexec-pm/arch/i386/kernel/machine_kexec.c
@@ -19,6 +19,7 @@
#include <asm/cpufeature.h>
#include <asm/desc.h>
#include <asm/system.h>
+#include <asm/hpet.h>

#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -106,6 +107,10 @@ NORET_TYPE void machine_kexec(struct kim
unsigned long page_list[PAGES_NR];
void *control_page;

+#ifdef CONFIG_HPET_TIMER
+ hpet_reenable();
+#endif
+
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();

Index: linux-2.6.23-rc2-kexec-pm/include/asm-i386/hpet.h
===================================================================
--- linux-2.6.23-rc2-kexec-pm.orig/include/asm-i386/hpet.h
+++ linux-2.6.23-rc2-kexec-pm/include/asm-i386/hpet.h
@@ -67,6 +67,9 @@ extern unsigned long hpet_address;
extern unsigned long force_hpet_address;
extern int is_hpet_enabled(void);
extern int hpet_enable(void);
+#ifdef CONFIG_KEXEC
+extern int hpet_reenable(void);
+#endif
extern unsigned long hpet_readl(unsigned long a);
extern void force_hpet_resume(void);


2007-08-20 17:33:04

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

Konstantin Baydarov <[email protected]> writes:

> Hi, I've faced problem:
> I have two x86_64 kernels with HPET enabled:

Is this for a standard kernel or for a RT kernel?

-Andi

2007-08-20 21:10:52

by Konstantin Baydarov

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

Konstantin Baydarov wrote:
> Hi, I've faced problem:
> I have two x86_64 kernels with HPET enabled:
> kernel 1 - with PM enabled,
> kernel 2 - with PM disabled.
>
Forgot to say that it's RT kernel, version is 2.6.23-rc2-rt2.

2007-08-20 21:34:58

by Daniel Walker

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

On Tue, 2007-08-21 at 01:10 +0400, Konstantin Baydarov wrote:
> Konstantin Baydarov wrote:
> > Hi, I've faced problem:
> > I have two x86_64 kernels with HPET enabled:
> > kernel 1 - with PM enabled,
> > kernel 2 - with PM disabled.
> >
> Forgot to say that it's RT kernel, version is 2.6.23-rc2-rt2.

It was in your original email .. Are you saying this it is not a
problem in plain 2.6.23-rc2? I thought I read that it was a problem on
i386 ..

Daniel

2007-08-21 12:55:41

by Konstantin Baydarov

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

On Mon, 20 Aug 2007 14:31:04 -0700
Daniel Walker <[email protected]> wrote:

> On Tue, 2007-08-21 at 01:10 +0400, Konstantin Baydarov wrote:
>
> It was in your original email .. Are you saying this it is not a
> problem in plain 2.6.23-rc2? I thought I read that it was a problem on
> i386 ..
>
> Daniel
>

I can reproduce bug with plain 2.6.23-rc3 i386 kernel, without RT patch,
on pentiumd.
So I fixed this issue the same way as I fixed RT kernel. The side effect
of this fix is that when kernel 2 executes both Local APIC timer and HPET
are enabled and produce timer interrupts:
[email protected]:~# cat /proc/interrupts
CPU0 CPU1
0: 5643 0 IO-APIC-edge timer
1: 8 0 IO-APIC-edge i8042
2: 0 0 XT-PIC-XT cascade
4: 303 181 IO-APIC-edge serial
6: 3 0 IO-APIC-edge floppy
12: 4 0 IO-APIC-edge i8042
14: 11 0 IO-APIC-edge ide0
17: 5701 16 IO-APIC-fasteoi uhci_hcd:usb3, eth0
18: 0 0 IO-APIC-fasteoi uhci_hcd:usb4
19: 0 0 IO-APIC-fasteoi uhci_hcd:usb5
20: 38 0 IO-APIC-fasteoi ehci_hcd:usb1, uhci_hcd:usb2
23: 3 0 IO-APIC-fasteoi libata
NMI: 0 0
LOC: 5525 5489
ERR: 0
MIS: 0
[email protected]:~#

IRQ0 and LOC values practically the same.

Also I didn't see this bug on [email protected], maybe I should
resend it?

Patch against 2.6.23-rc3.

Signed-off-by: Konstantin Baydarov <[email protected]>

arch/i386/kernel/hpet.c | 15 +++++++++++++++
arch/i386/kernel/machine_kexec.c | 5 +++++
include/asm-i386/hpet.h | 3 +++
3 files changed, 23 insertions(+)

Index: linux-2.6.23-rc3-i386/arch/i386/kernel/hpet.c
===================================================================
--- linux-2.6.23-rc3-i386.orig/arch/i386/kernel/hpet.c
+++ linux-2.6.23-rc3-i386/arch/i386/kernel/hpet.c
@@ -340,6 +340,21 @@ out_nohpet:
return 0;
}

+#ifdef CONFIG_KEXEC
+/*
+ * reenable HPET timer
+ */
+int hpet_reenable(void)
+{
+ if (!is_hpet_capable())
+ return -1;
+
+ if (hpet_clockevent.mode != CLOCK_EVT_MODE_PERIODIC)
+ hpet_set_mode(CLOCK_EVT_MODE_PERIODIC, &hpet_clockevent);
+
+ return 0;
+}
+#endif

#ifdef CONFIG_HPET_EMULATE_RTC

Index: linux-2.6.23-rc3-i386/arch/i386/kernel/machine_kexec.c
===================================================================
--- linux-2.6.23-rc3-i386.orig/arch/i386/kernel/machine_kexec.c
+++ linux-2.6.23-rc3-i386/arch/i386/kernel/machine_kexec.c
@@ -19,6 +19,7 @@
#include <asm/cpufeature.h>
#include <asm/desc.h>
#include <asm/system.h>
+#include <asm/hpet.h>

#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -109,6 +110,10 @@ NORET_TYPE void machine_kexec(struct kim
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();

+#ifdef CONFIG_HPET_TIMER
+ hpet_reenable();
+#endif
+
control_page = page_address(image->control_code_page);
memcpy(control_page, relocate_kernel, PAGE_SIZE);

Index: linux-2.6.23-rc3-i386/include/asm-i386/hpet.h
===================================================================
--- linux-2.6.23-rc3-i386.orig/include/asm-i386/hpet.h
+++ linux-2.6.23-rc3-i386/include/asm-i386/hpet.h
@@ -66,6 +66,9 @@
extern unsigned long hpet_address;
extern int is_hpet_enabled(void);
extern int hpet_enable(void);
+#ifdef CONFIG_KEXEC
+extern int hpet_reenable(void);
+#endif

#ifdef CONFIG_HPET_EMULATE_RTC

2007-08-23 09:08:46

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

On Mon, Aug 20, 2007 at 08:55:30PM +0400, Konstantin Baydarov wrote:
> Hi, I've faced problem:
> I have two x86_64 kernels with HPET enabled:
> kernel 1 - with PM enabled,
> kernel 2 - with PM disabled.
> When I execute kernel 2 from kernel 1 on pentiumd based PC, kernel 2 hangs
> during boot:
> [email protected]:~#
> [email protected]:~# ./kexec.sh ./ko_bzImage_x86_64_nopm
> + kexec -l ././ko_bzImage_x86_64_nopm '--command-line=kdb=on kdb=early
> apic=debug nmi_watchdog=0 console=ttyS0
> ,115200 ip=bootp root=/dev/nfs rw'
> + kexec -e
> md: stopping all md devices.
> sd 1:0:0:0: [sda] Synchronizing SCSI cache
> ACPI: PCI interrupt for device 0000:01:00.0 disabled
> Starting new kernel
> Linux version 2.6.23-rc2 ([email protected]) (gcc version 4.2.0
> (MontaVista 4.2.0-8.0.0.0703430 2
> 007-06-22)) #3 SMP PREEMPT Mon Aug 20 20:26:17 MSD 2007
> Command line: kdb=on kdb=early apic=debug nmi_watchdog=0 console=ttyS0,115200
> ip=bootp root=/dev/nfs rw
> BIOS-provided physical RAM map:
> BIOS-e820: 0000000000000100 - 000000000009fc00 (usable)
> BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved)
> BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
> BIOS-e820: 0000000000100000 - 000000001f790000 (usable)
> BIOS-e820: 000000001f790000 - 000000001f79e000 (ACPI data)
> BIOS-e820: 000000001f79e000 - 000000001f7e0000 (ACPI NVS)
> BIOS-e820: 000000001f7e0000 - 000000001f800000 (reserved)
> BIOS-e820: 00000000ffb80000 - 0000000100000000 (reserved)
> end_pfn_map = 1048576
> DMI 2.3 present.
> Zone PFN ranges:
> DMA 1 -> 4096
> DMA32 4096 -> 1048576
> Normal 1048576 -> 1048576
> Movable zone start PFN for each node
> early_node_map[2] active PFN ranges
> 0: 1 -> 159
> 0: 256 -> 128912
> Intel MultiProcessor Specification v1.4
> MPTABLE: OEM ID: INTEL MPTABLE: Product ID: MPTABLE: APIC at: 0xFEE00000
> Processor #0 (Bootup-CPU)
> Processor #1
> I/O APIC #2 at 0xFEC00000.
> Setting APIC routing to flat
> Processors: 2
> mapped APIC to ffffffffff5fb000 ( fee00000)
> mapped IOAPIC to ffffffffff5fa000 (00000000fec00000)
> Allocating PCI resources starting at 20000000 (gap: 1f800000:e0380000)
> PERCPU: Allocating 31696 bytes of per cpu data
> Built 1 zonelists in Zone order. Total pages: 125250
> Kernel command line: kdb=on kdb=early apic=debug nmi_watchdog=0
> console=ttyS0,115200 ip=bootp root=/dev/nfs rw
> Initializing CPU#0
> PID hash table entries: 2048 (order: 11, 16384 bytes)
> time.c: Detected 3000.006 MHz processor.
> Console: colour VGA+ 80x25
> console [ttyS0] enabled
> Dentry cache hash table entries: 65536 (order: 7, 524288 bytes)
> Inode-cache hash table entries: 32768 (order: 6, 262144 bytes)
> Checking aperture...
> Memory: 500048k/515648k available (4577k kernel code, 15136k reserved, 1755k
> data, 280k init)
> -------------------------------- kernel hangs here --------------------------
>
> Root case:
> When kernel 1 switches to Local APIC timer It disables HPET.
> When kernel 1 executes kernel 2, HPET isn't reenabled early during boot, because
> PM and ACPI is disabled and kernel 2 doesn't search for HPET in ACPI tables.
> HPET is disabled, so kernel enables PIT timer, but it doesn't work on pentiumd
> (don't know why).

Does your kernel 2 boot normally? I mean through BIOS and boot-loader?
This explanation seems to be suggesting that because PM and ACPI is disabled,
kernel 2 does not search for HPET. If this is the case, this kernel will
not boot even through normal boot-loader and will try to use PIT instead?

If so, it is not an kexec issue at all.

Thanks
Vivek

2007-08-23 11:43:34

by Konstantin Baydarov

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

On Thu, 23 Aug 2007 14:38:45 +0530
Vivek Goyal <[email protected]> wrote:
> Does your kernel 2 boot normally? I mean through BIOS and boot-loader?
> This explanation seems to be suggesting that because PM and ACPI is
> disabled, kernel 2 does not search for HPET. If this is the case,
> this kernel will not boot even through normal boot-loader and will
> try to use PIT instead?
>
> If so, it is not an kexec issue at all.
>
> Thanks
> Vivek

kernel 2 boots normally through BIOS and boot-loader. I agree with your
explanation.
It seems that kernel 2 can't enable PIT, when it is executed by kexec.
As kernel 1 disabled HPET(IRQ0 source) and PIT is "broken"(or isn't
enabled) IRQ0 are not triggered at all, and kernel 2 hangs.
As kernel 2 boots normally through BIOS and boot-loader, than
additional code needed(in Linux kernel init code) for PIT or ACPI
or APIC initialization, I mean the same code as executed on BIOS stage.
I agree that it's not an kexec issue. But can we use my fix as a
workaround(to make kexec work) until PIT will be fixed?
Everything above is correct for i386/x86_64 RT(2.6.23-rc2-rt2) kernel
and for i368 "plain"(2.6.23-rc3) kernel. Bug isn't reproduced in 2.6.23-rc3
x86_64 kernel, because x86_64 code never disables HPET. So on every
boot(initiated by kexec or BIOS) IRQ0 are triggered.

2007-08-27 05:37:14

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

On Thu, Aug 23, 2007 at 03:49:36PM +0400, Konstantin Baydarov wrote:
> On Thu, 23 Aug 2007 14:38:45 +0530
> Vivek Goyal <[email protected]> wrote:
> > Does your kernel 2 boot normally? I mean through BIOS and boot-loader?
> > This explanation seems to be suggesting that because PM and ACPI is
> > disabled, kernel 2 does not search for HPET. If this is the case,
> > this kernel will not boot even through normal boot-loader and will
> > try to use PIT instead?
> >
> > If so, it is not an kexec issue at all.
> >
> > Thanks
> > Vivek
>
> kernel 2 boots normally through BIOS and boot-loader. I agree with your
> explanation.
> It seems that kernel 2 can't enable PIT, when it is executed by kexec.
> As kernel 1 disabled HPET(IRQ0 source) and PIT is "broken"(or isn't
> enabled) IRQ0 are not triggered at all, and kernel 2 hangs.

My hunch is that PIT might be disabled here. Because when first kernel
must have detected HPET and enabled it, then it might have disabled
PIT and second kernel never sees the interrupts from PIT. But it should
have seen the interrupts from HPET as no body disabled it?

I think kexec does not enable/disable any timer devices. It just tries
to bring LAPIC/IOAPIC in a state so that next kernel still seems
timer interrupts before next kernel sets up LAPIC and IPAOIC.

> As kernel 2 boots normally through BIOS and boot-loader, than
> additional code needed(in Linux kernel init code) for PIT or ACPI
> or APIC initialization, I mean the same code as executed on BIOS stage.

I am not sure which initial code you are referring to? During kexec, we
will run kernel initial code (except real mode code). But one thing to find
out here will be how does BIOS pass the control to the OS when HPET device is
present. Does it enable the HPET and put LAPIC and IOAPIC in virtual wire
mode or it enables PIT and then later kernel switches from PIT to HPET?

> I agree that it's not an kexec issue. But can we use my fix as a
> workaround(to make kexec work) until PIT will be fixed?
> Everything above is correct for i386/x86_64 RT(2.6.23-rc2-rt2) kernel
> and for i368 "plain"(2.6.23-rc3) kernel. Bug isn't reproduced in 2.6.23-rc3
> x86_64 kernel, because x86_64 code never disables HPET. So on every
> boot(initiated by kexec or BIOS) IRQ0 are triggered.

I think we should dive little deeper to find out the root cause of the problem
instead of putting the intermediate patch. These timer issues are tricky
ones and we have already solved few of these.

Going back to your original mail where you specify root cause.

- You mentioned that first kernel disables HPET while enabling Local APIC
timer. Can you please point me when exactly it happens. I had thought local
APIC timer and HPET server different purpose and co-exist.

- You also mentioned that kernel tries to setup PIT as it does not find
HPET in second kernel. Where exactly does it do that? I think we need to
then go deeper to find out why PIT is not working now? Is it disabled? or
LAPIC/IOAPIC have not been setup properly and PIT interrupts never reach
CPU?

Thanks
Vivek

2007-08-27 18:26:42

by Pallipadi, Venkatesh

[permalink] [raw]
Subject: RE: [PATCH] kexec: reenable HPET before kexec


>
>I think we should dive little deeper to find out the root
>cause of the problem
>instead of putting the intermediate patch. These timer issues
>are tricky
>ones and we have already solved few of these.
>
>Going back to your original mail where you specify root cause.
>
>- You mentioned that first kernel disables HPET while enabling
>Local APIC
>timer. Can you please point me when exactly it happens. I had
>thought local
>APIC timer and HPET server different purpose and co-exist.
>
>- You also mentioned that kernel tries to setup PIT as it does not find
>HPET in second kernel. Where exactly does it do that? I think
>we need to
>then go deeper to find out why PIT is not working now? Is it
>disabled? or
>LAPIC/IOAPIC have not been setup properly and PIT interrupts
>never reach
>CPU?
>

- Another thing to try is to disable HPET and boot with PIT in the first
kernel. Just to check whether PIT never works on this platform or the
first kernel is doing something to stop PIT. You can try "hpet=disable"
boot option for that.

Thanks,
Venki

2007-08-27 18:33:39

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

"Pallipadi, Venkatesh" <[email protected]> writes:

> - Another thing to try is to disable HPET and boot with PIT in the first
> kernel. Just to check whether PIT never works on this platform or the
> first kernel is doing something to stop PIT. You can try "hpet=disable"
> boot option for that.

I don't know what the original patch looked like. It doesn't
seem to have made it to any mailling lists to which I'm subscribed.
But I'm wondering of there is a bug in the shutdown routines for
the pit or the hpet that is causing problems. As this is
a normal kexec those routines will get called.

Eric

2007-08-28 12:52:25

by Konstantin Baydarov

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

On Mon, 27 Aug 2007 11:26:29 -0700
"Pallipadi, Venkatesh" <[email protected]> wrote:

>
>
> - Another thing to try is to disable HPET and boot with PIT in the
> first kernel. Just to check whether PIT never works on this platform
> or the first kernel is doing something to stop PIT. You can try
> "hpet=disable" boot option for that.
>
> Thanks,
> Venki

I've tried kernel 1 with HPET disabled - it boots fine, PIT works!
Then I made additional investigations and found out that PIT won't work
in kernel 2 if bit HPET_CFG_LEGACY is set.
Bit HPET_CFG_LEGACY is set by hpet_enable_int() during HPET
initialization, so if this bit is cleared in machine_kexec() kernel 2
boots fine.
I can't explain this magic, maybe someone can explain this. Thanks.

Here is new version of workaround for 2.6.23-rc3

Signed-off-by: Konstantin Baydarov <[email protected]>

arch/i386/kernel/hpet.c | 12 ++++++++++++
arch/i386/kernel/machine_kexec.c | 6 ++++++
include/asm-i386/hpet.h | 3 +++
3 files changed, 21 insertions(+)

Index: linux-2.6.23-rc3/arch/i386/kernel/hpet.c
===================================================================
--- linux-2.6.23-rc3.orig/arch/i386/kernel/hpet.c
+++ linux-2.6.23-rc3/arch/i386/kernel/hpet.c
@@ -149,6 +149,18 @@ static void hpet_enable_int(void)
hpet_legacy_int_enabled = 1;
}

+#ifdef CONFIG_KEXEC
+void hpet_disable_int(void)
+{
+ unsigned long cfg = hpet_readl(HPET_CFG);
+
+ cfg &= ~HPET_CFG_LEGACY;
+ hpet_writel(cfg, HPET_CFG);
+ hpet_legacy_int_enabled = 0;
+
+}
+#endif
+
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt)
{
Index: linux-2.6.23-rc3/arch/i386/kernel/machine_kexec.c
===================================================================
--- linux-2.6.23-rc3.orig/arch/i386/kernel/machine_kexec.c
+++ linux-2.6.23-rc3/arch/i386/kernel/machine_kexec.c
@@ -19,6 +19,7 @@
#include <asm/cpufeature.h>
#include <asm/desc.h>
#include <asm/system.h>
+#include <asm/hpet.h>

#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -109,6 +110,11 @@ NORET_TYPE void machine_kexec(struct kim
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();

+#ifdef CONFIG_HPET_TIMER
+ /* Without this PIT won't work in executed kernel */
+ hpet_disable_int();
+#endif
+
control_page = page_address(image->control_code_page);
memcpy(control_page, relocate_kernel, PAGE_SIZE);

Index: linux-2.6.23-rc3/include/asm-i386/hpet.h
===================================================================
--- linux-2.6.23-rc3.orig/include/asm-i386/hpet.h
+++ linux-2.6.23-rc3/include/asm-i386/hpet.h
@@ -66,6 +66,9 @@
extern unsigned long hpet_address;
extern int is_hpet_enabled(void);
extern int hpet_enable(void);
+#ifdef CONFIG_KEXEC
+extern void hpet_disable_int(void);
+#endif

#ifdef CONFIG_HPET_EMULATE_RTC

2007-08-30 07:37:16

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

Konstantin Baydarov <[email protected]> writes:

> On Mon, 27 Aug 2007 11:26:29 -0700
> "Pallipadi, Venkatesh" <[email protected]> wrote:
>
>>
>>
>> - Another thing to try is to disable HPET and boot with PIT in the
>> first kernel. Just to check whether PIT never works on this platform
>> or the first kernel is doing something to stop PIT. You can try
>> "hpet=disable" boot option for that.
>>
>> Thanks,
>> Venki
>
> I've tried kernel 1 with HPET disabled - it boots fine, PIT works!
> Then I made additional investigations and found out that PIT won't work
> in kernel 2 if bit HPET_CFG_LEGACY is set.
> Bit HPET_CFG_LEGACY is set by hpet_enable_int() during HPET
> initialization, so if this bit is cleared in machine_kexec() kernel 2
> boots fine.
> I can't explain this magic, maybe someone can explain this. Thanks.
>
> Here is new version of workaround for 2.6.23-rc3

Ok. It looks like you understand this issue.

Can you please try calling hpet_disable_int from
hpet_set_mode under CLOCK_EVT_MODE_SHUTDOWN. I haven't
traced the clock event methods all of the way through
but as a first approximation I think that will get
things called at the appropriate time with out needing
to patch machine_kexec. Which is very much the wrong
place to add call any hpet code from.

We may also need to make the hpet initialization more
robust so we can do something sane in the kexec on panic
case, where we deliberately don't run any shutdown methods.

Eric

2007-08-30 16:01:37

by Konstantin Baydarov

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

On Thu, 30 Aug 2007 01:32:30 -0600
[email protected] (Eric W. Biederman) wrote:
>
> Ok. It looks like you understand this issue.
>
> Can you please try calling hpet_disable_int from
> hpet_set_mode under CLOCK_EVT_MODE_SHUTDOWN. I haven't
> traced the clock event methods all of the way through
> but as a first approximation I think that will get
> things called at the appropriate time with out needing
> to patch machine_kexec. Which is very much the wrong
> place to add call any hpet code from.
>
> We may also need to make the hpet initialization more
> robust so we can do something sane in the kexec on panic
> case, where we deliberately don't run any shutdown methods.
>
> Eric

Eric, actually calling hpet_disable_int() under
CLOCK_EVT_MODE_SHUTDOWN is not enough, because
HPET might not be shutdown at all (we might want to use HPET and don't
want to use LAPIC timer or in some cases HPET is used as broadcast
device with LAPIC timers enabled). So, somehow, we should call
hpet_set_mode with CLOCK_EVT_MODE_SHUTDOWN as an argument before
machine_kexec. To solve this I've added timekeeping_shutdown() to
timekeeping.c that calls clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND,
NULL). Function timekeeping_shutdown() is called from sysdev_shutdown().
Also If we are adding hpet_disable_int() under
CLOCK_EVT_MODE_SHUTDOWN we have to add hpet_enable_int under
CLOCK_EVT_MODE_PERIODIC and CLOCK_EVT_MODE_ONESHOT.
Also I've added call of hpet_disable_int() to machine_crash_shutdown()
because, as you said, sysdev_shutdown() won't be called on crash before
machine_kexec().
Also, as we have to make hpet_disable_int() global and call it from
machine_crash_shutdown(), I suggest not to add hpet_disable_int() under
CLOCK_EVT_MODE_SHUTDOWN. If we don't add hpet_disable_int() under
CLOCK_EVT_MODE_SHUTDOWN - patch will be more smaller because we don't
have to call clockevents_notify() and we don't have to add
hpet_enable_int. We just have to add hpet_disable_int() call to
timekeeping_shutdown(). But it's just suggestion - attached patch adds
hpet_disable_int() under CLOCK_EVT_MODE_SHUTDOWN.

Here is new version of fix. It still against kernel 2.6.23-rc3. Thanks.

Signed-off-by: Konstantin Baydarov <[email protected]>

arch/i386/kernel/crash.c | 4 ++++
arch/i386/kernel/hpet.c | 33 +++++++++++++++++++++++++++++++++
include/asm-i386/hpet.h | 3 +++
kernel/time/timekeeping.c | 13 +++++++++++++
4 files changed, 53 insertions(+)

Index: linux-2.6.23-rc3/arch/i386/kernel/hpet.c
===================================================================
--- linux-2.6.23-rc3.orig/arch/i386/kernel/hpet.c
+++ linux-2.6.23-rc3/arch/i386/kernel/hpet.c
@@ -144,11 +144,35 @@ static void hpet_enable_int(void)
{
unsigned long cfg = hpet_readl(HPET_CFG);

+#ifdef CONFIG_KEXEC
+ if (hpet_legacy_int_enabled)
+ return;
+#endif
+
cfg |= HPET_CFG_LEGACY;
hpet_writel(cfg, HPET_CFG);
hpet_legacy_int_enabled = 1;
}

+#ifdef CONFIG_KEXEC
+void hpet_disable_int(void)
+{
+ unsigned long cfg;
+
+ if (!hpet_legacy_int_enabled)
+ return;
+
+ if (!is_hpet_capable())
+ return;
+
+ cfg = hpet_readl(HPET_CFG);
+ cfg &= ~HPET_CFG_LEGACY;
+ hpet_writel(cfg, HPET_CFG);
+ hpet_legacy_int_enabled = 0;
+
+}
+#endif
+
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt)
{
@@ -157,6 +181,9 @@ static void hpet_set_mode(enum clock_eve

switch(mode) {
case CLOCK_EVT_MODE_PERIODIC:
+#ifdef CONFIG_KEXEC
+ hpet_enable_int();
+#endif
delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult;
delta >>= hpet_clockevent.shift;
now = hpet_readl(HPET_COUNTER);
@@ -176,6 +203,9 @@ static void hpet_set_mode(enum clock_eve
break;

case CLOCK_EVT_MODE_ONESHOT:
+#ifdef CONFIG_KEXEC
+ hpet_enable_int();
+#endif
cfg = hpet_readl(HPET_T0_CFG);
cfg &= ~HPET_TN_PERIODIC;
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
@@ -187,6 +217,9 @@ static void hpet_set_mode(enum clock_eve
cfg = hpet_readl(HPET_T0_CFG);
cfg &= ~HPET_TN_ENABLE;
hpet_writel(cfg, HPET_T0_CFG);
+#ifdef CONFIG_KEXEC
+ hpet_disable_int();
+#endif
break;

case CLOCK_EVT_MODE_RESUME:
Index: linux-2.6.23-rc3/include/asm-i386/hpet.h
===================================================================
--- linux-2.6.23-rc3.orig/include/asm-i386/hpet.h
+++ linux-2.6.23-rc3/include/asm-i386/hpet.h
@@ -66,6 +66,9 @@
extern unsigned long hpet_address;
extern int is_hpet_enabled(void);
extern int hpet_enable(void);
+#ifdef CONFIG_KEXEC
+extern void hpet_disable_int(void);
+#endif

#ifdef CONFIG_HPET_EMULATE_RTC

Index: linux-2.6.23-rc3/arch/i386/kernel/crash.c
===================================================================
--- linux-2.6.23-rc3.orig/arch/i386/kernel/crash.c
+++ linux-2.6.23-rc3/arch/i386/kernel/crash.c
@@ -24,6 +24,7 @@
#include <asm/apic.h>
#include <linux/kdebug.h>
#include <asm/smp.h>
+#include <asm/hpet.h>

#include <mach_ipi.h>

@@ -128,6 +129,9 @@ void machine_crash_shutdown(struct pt_re

/* Make a note of crashing cpu. Will be used in NMI callback.*/
crashing_cpu = safe_smp_processor_id();
+#ifdef CONFIG_KEXEC
+ hpet_disable_int();
+#endif
nmi_shootdown_cpus();
lapic_shutdown();
#if defined(CONFIG_X86_IO_APIC)
Index: linux-2.6.23-rc3/kernel/time/timekeeping.c
===================================================================
--- linux-2.6.23-rc3.orig/kernel/time/timekeeping.c
+++ linux-2.6.23-rc3/kernel/time/timekeeping.c
@@ -335,8 +335,21 @@ static int timekeeping_suspend(struct sy
return 0;
}

+#ifdef CONFIG_KEXEC
+static int timekeeping_shutdown(struct sys_device *dev)
+{
+ clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+// hpet_disable_int();
+
+ return 0;
+}
+#endif
+
/* sysfs resume/suspend bits for timekeeping */
static struct sysdev_class timekeeping_sysclass = {
+#ifdef CONFIG_KEXEC
+ .shutdown = timekeeping_shutdown,
+#endif
.resume = timekeeping_resume,
.suspend = timekeeping_suspend,
set_kset_name("timekeeping"),

2007-08-30 18:07:17

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

Konstantin Baydarov <[email protected]> writes:

> Eric, actually calling hpet_disable_int() under
> CLOCK_EVT_MODE_SHUTDOWN is not enough, because
> HPET might not be shutdown at all (we might want to use HPET and don't
> want to use LAPIC timer or in some cases HPET is used as broadcast
> device with LAPIC timers enabled). So, somehow, we should call
> hpet_set_mode with CLOCK_EVT_MODE_SHUTDOWN as an argument before
> machine_kexec. To solve this I've added timekeeping_shutdown() to
> timekeeping.c that calls clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND,
> NULL). Function timekeeping_shutdown() is called from sysdev_shutdown().

Is suspend the right thing? The fact that time keeping seems
to be reinventing the standard infrastructure for shutting
down instead of just reusing it has me a bit confused.

Regardless hooking into the shutdown is the right thing
to do here.

> Also If we are adding hpet_disable_int() under
> CLOCK_EVT_MODE_SHUTDOWN we have to add hpet_enable_int under
> CLOCK_EVT_MODE_PERIODIC and CLOCK_EVT_MODE_ONESHOT.

However that works. I just know that in the device tree we
have shutdown methods that are called on reboot and kexec
and the hpet needs one, and that is where the code belongs.

I was assuming that CLOCK_EVT_MODE_SHUTDOWN just mapped
to the shutdown method of the clock events or something else.
But it shutdown means something different in this context we
can certainly find a better place to hook into the device
tree and call shutdown methods. Especially if that will
make the code simpler.

> Also I've added call of hpet_disable_int() to machine_crash_shutdown()
> because, as you said, sysdev_shutdown() won't be called on crash before
> machine_kexec().

This is a design feature. machine_crash_shutdown is not really
supposed to disable any hardware. There is a very minimal set that we
haven't been able to figure out how to get the kernels initialization
routines to deal with properly. Which is a temporary justification
for not doing more now. If we can't find anyway to make the
initialization code more robust for the hpet we can revisit this.

> Also, as we have to make hpet_disable_int() global and call it from
> machine_crash_shutdown(), I suggest not to add hpet_disable_int() under
> CLOCK_EVT_MODE_SHUTDOWN. If we don't add hpet_disable_int() under
> CLOCK_EVT_MODE_SHUTDOWN - patch will be more smaller because we don't
> have to call clockevents_notify() and we don't have to add
> hpet_enable_int. We just have to add hpet_disable_int() call to
> timekeeping_shutdown(). But it's just suggestion - attached patch adds
> hpet_disable_int() under CLOCK_EVT_MODE_SHUTDOWN.

Please remove the CONFIG_KEXEC. We need to do this on a reboot also
so we don't confuse the BIOS. BIOS's frequently but not always
can just reset the board to avoid complications like this, but if
we need a shutdown method we need a shutdown method. The kexec
case just exercises things more.

Please can we ignore the machine_crash_shutdown path for the moment.
And revisit it again after we have the normal case fixed.

Adding hpet_disable_int into timekeeping_shutdown at first glance
looks like a layering and maintenance violation.

Eric

> Here is new version of fix. It still against kernel 2.6.23-rc3. Thanks.
>
> Signed-off-by: Konstantin Baydarov <[email protected]>
>
> arch/i386/kernel/crash.c | 4 ++++
> arch/i386/kernel/hpet.c | 33 +++++++++++++++++++++++++++++++++
> include/asm-i386/hpet.h | 3 +++
> kernel/time/timekeeping.c | 13 +++++++++++++
> 4 files changed, 53 insertions(+)
>
> Index: linux-2.6.23-rc3/arch/i386/kernel/hpet.c
> ===================================================================
> --- linux-2.6.23-rc3.orig/arch/i386/kernel/hpet.c
> +++ linux-2.6.23-rc3/arch/i386/kernel/hpet.c
> @@ -144,11 +144,35 @@ static void hpet_enable_int(void)
> {
> unsigned long cfg = hpet_readl(HPET_CFG);
>
> +#ifdef CONFIG_KEXEC
> + if (hpet_legacy_int_enabled)
> + return;
> +#endif

Why do we need this test only for kexec?
> +
> cfg |= HPET_CFG_LEGACY;
> hpet_writel(cfg, HPET_CFG);
> hpet_legacy_int_enabled = 1;
> }
>
> +#ifdef CONFIG_KEXEC
> +void hpet_disable_int(void)
> +{
> + unsigned long cfg;
> +
> + if (!hpet_legacy_int_enabled)
> + return;
> +
> + if (!is_hpet_capable())
> + return;
> +
> + cfg = hpet_readl(HPET_CFG);
> + cfg &= ~HPET_CFG_LEGACY;
> + hpet_writel(cfg, HPET_CFG);
> + hpet_legacy_int_enabled = 0;
> +
> +}
> +#endif
> +
> static void hpet_set_mode(enum clock_event_mode mode,
> struct clock_event_device *evt)
> {
> @@ -157,6 +181,9 @@ static void hpet_set_mode(enum clock_eve
>
> switch(mode) {
> case CLOCK_EVT_MODE_PERIODIC:
> +#ifdef CONFIG_KEXEC
> + hpet_enable_int();
> +#endif
> delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult;
> delta >>= hpet_clockevent.shift;
> now = hpet_readl(HPET_COUNTER);
> @@ -176,6 +203,9 @@ static void hpet_set_mode(enum clock_eve
> break;
>
> case CLOCK_EVT_MODE_ONESHOT:
> +#ifdef CONFIG_KEXEC
> + hpet_enable_int();
> +#endif
> cfg = hpet_readl(HPET_T0_CFG);
> cfg &= ~HPET_TN_PERIODIC;
> cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
> @@ -187,6 +217,9 @@ static void hpet_set_mode(enum clock_eve
> cfg = hpet_readl(HPET_T0_CFG);
> cfg &= ~HPET_TN_ENABLE;
> hpet_writel(cfg, HPET_T0_CFG);
> +#ifdef CONFIG_KEXEC
> + hpet_disable_int();
> +#endif
> break;
>
> case CLOCK_EVT_MODE_RESUME:
> Index: linux-2.6.23-rc3/include/asm-i386/hpet.h
> ===================================================================
> --- linux-2.6.23-rc3.orig/include/asm-i386/hpet.h
> +++ linux-2.6.23-rc3/include/asm-i386/hpet.h
> @@ -66,6 +66,9 @@
> extern unsigned long hpet_address;
> extern int is_hpet_enabled(void);
> extern int hpet_enable(void);
> +#ifdef CONFIG_KEXEC
> +extern void hpet_disable_int(void);
> +#endif
>
> #ifdef CONFIG_HPET_EMULATE_RTC
>
> Index: linux-2.6.23-rc3/arch/i386/kernel/crash.c
> ===================================================================
> --- linux-2.6.23-rc3.orig/arch/i386/kernel/crash.c
> +++ linux-2.6.23-rc3/arch/i386/kernel/crash.c
> @@ -24,6 +24,7 @@
> #include <asm/apic.h>
> #include <linux/kdebug.h>
> #include <asm/smp.h>
> +#include <asm/hpet.h>
>
> #include <mach_ipi.h>
>
> @@ -128,6 +129,9 @@ void machine_crash_shutdown(struct pt_re
>
> /* Make a note of crashing cpu. Will be used in NMI callback.*/
> crashing_cpu = safe_smp_processor_id();
> +#ifdef CONFIG_KEXEC
> + hpet_disable_int();
> +#endif
> nmi_shootdown_cpus();
> lapic_shutdown();
> #if defined(CONFIG_X86_IO_APIC)
> Index: linux-2.6.23-rc3/kernel/time/timekeeping.c
> ===================================================================
> --- linux-2.6.23-rc3.orig/kernel/time/timekeeping.c
> +++ linux-2.6.23-rc3/kernel/time/timekeeping.c
> @@ -335,8 +335,21 @@ static int timekeeping_suspend(struct sy
> return 0;
> }
>
> +#ifdef CONFIG_KEXEC
> +static int timekeeping_shutdown(struct sys_device *dev)
> +{
> + clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
Hmm. Do we want to add a CLOCK_EVT_NOTIFY_SHUTDOWN?
Would that make anything simpler?
> +// hpet_disable_int();
> +
> + return 0;
> +}
> +#endif
> +
> /* sysfs resume/suspend bits for timekeeping */
> static struct sysdev_class timekeeping_sysclass = {
> +#ifdef CONFIG_KEXEC
> + .shutdown = timekeeping_shutdown,
> +#endif
> .resume = timekeeping_resume,
> .suspend = timekeeping_suspend,
> set_kset_name("timekeeping"),

2007-08-31 10:25:37

by Konstantin Baydarov

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

On Thu, 30 Aug 2007 12:04:33 -0600
[email protected] (Eric W. Biederman) wrote:

> I was assuming that CLOCK_EVT_MODE_SHUTDOWN just mapped
> to the shutdown method of the clock events or something else.
> But it shutdown means something different in this context we
> can certainly find a better place to hook into the device
> tree and call shutdown methods. Especially if that will
> make the code simpler.

Agree. I've got rig from timekeeping. Now I'm using system device tree shutdown interface, as you suggested. I've added HPET system device class with shutdown method and HPET device to sysdev.

> This is a design feature. machine_crash_shutdown is not really
> supposed to disable any hardware. There is a very minimal set that we
> haven't been able to figure out how to get the kernels initialization
> routines to deal with properly. Which is a temporary justification
> for not doing more now. If we can't find anyway to make the
> initialization code more robust for the hpet we can revisit this.

So you suggest to check if HPET is present in HPET init code even if HPET disabled in boot kernel command line or APIC is disabled. And if HPET is present and kernel not going to use it - disable HPET interrupts?

> Please remove the CONFIG_KEXEC. We need to do this on a reboot also
> so we don't confuse the BIOS. BIOS's frequently but not always
> can just reset the board to avoid complications like this, but if
> we need a shutdown method we need a shutdown method. The kexec
> case just exercises things more.
Removed CONFIG_KEXEC.

So here is new version of fix. Patch against 2.6.23-rc3.
Eric, review please. Thanks.

Signed-off-by: Konstantin Baydarov <[email protected]>

arch/i386/kernel/hpet.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)

Index: linux-2.6.23-rc3/arch/i386/kernel/hpet.c
===================================================================
--- linux-2.6.23-rc3.orig/arch/i386/kernel/hpet.c
+++ linux-2.6.23-rc3/arch/i386/kernel/hpet.c
@@ -144,11 +144,31 @@ static void hpet_enable_int(void)
{
unsigned long cfg = hpet_readl(HPET_CFG);

+ if (hpet_legacy_int_enabled)
+ return;
+
cfg |= HPET_CFG_LEGACY;
hpet_writel(cfg, HPET_CFG);
hpet_legacy_int_enabled = 1;
}

+static void hpet_disable_int(void)
+{
+ unsigned long cfg;
+
+ if (!hpet_legacy_int_enabled)
+ return;
+
+ if (!is_hpet_capable())
+ return;
+
+ cfg = hpet_readl(HPET_CFG);
+ cfg &= ~HPET_CFG_LEGACY;
+ hpet_writel(cfg, HPET_CFG);
+ hpet_legacy_int_enabled = 0;
+
+}
+
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt)
{
@@ -551,3 +571,31 @@ irqreturn_t hpet_rtc_interrupt(int irq,
return IRQ_HANDLED;
}
#endif
+
+static int hpet_shutdown(struct sys_device *dev)
+{
+ /* We need this to make PIT works in KEXECuted kernel */
+ hpet_disable_int();
+
+ return 0;
+}
+
+static struct sysdev_class hpet_sysdev_class = {
+ set_kset_name("hpet"),
+ .shutdown = hpet_shutdown,
+};
+
+static struct sys_device device_hpet = {
+ .id = 0,
+ .cls = &hpet_sysdev_class,
+};
+
+static int __init hpet_init_sysfs(void)
+{
+ int error = sysdev_class_register(&hpet_sysdev_class);
+ if (!error)
+ error = sysdev_register(&device_hpet);
+ return error;
+}
+
+device_initcall(hpet_init_sysfs);

2007-09-17 09:41:05

by Konstantin Baydarov

[permalink] [raw]
Subject: Re: [PATCH] kexec: reenable HPET before kexec

On Fri, 31 Aug 2007 14:31:51 +0400
Konstantin Baydarov <[email protected]> wrote:

> On Thu, 30 Aug 2007 12:04:33 -0600
> [email protected] (Eric W. Biederman) wrote:
>
> > I was assuming that CLOCK_EVT_MODE_SHUTDOWN just mapped
> > to the shutdown method of the clock events or something else.
> > But it shutdown means something different in this context we
> > can certainly find a better place to hook into the device
> > tree and call shutdown methods. Especially if that will
> > make the code simpler.
>
> Agree. I've got rig from timekeeping. Now I'm using system device
> tree shutdown interface, as you suggested. I've added HPET system
> device class with shutdown method and HPET device to sysdev.
>
> > This is a design feature. machine_crash_shutdown is not really
> > supposed to disable any hardware. There is a very minimal set that
> > we haven't been able to figure out how to get the kernels
> > initialization routines to deal with properly. Which is a
> > temporary justification for not doing more now. If we can't find
> > anyway to make the initialization code more robust for the hpet we
> > can revisit this.
>
> So you suggest to check if HPET is present in HPET init code even if
> HPET disabled in boot kernel command line or APIC is disabled. And if
> HPET is present and kernel not going to use it - disable HPET
> interrupts?
>
> > Please remove the CONFIG_KEXEC. We need to do this on a reboot also
> > so we don't confuse the BIOS. BIOS's frequently but not always
> > can just reset the board to avoid complications like this, but if
> > we need a shutdown method we need a shutdown method. The kexec
> > case just exercises things more.
> Removed CONFIG_KEXEC.
>
> So here is new version of fix. Patch against 2.6.23-rc3.
> Eric, review please. Thanks.
>
> Signed-off-by: Konstantin Baydarov <[email protected]>
>
> arch/i386/kernel/hpet.c | 48
> ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48
> insertions(+)
>
> Index: linux-2.6.23-rc3/arch/i386/kernel/hpet.c
> ===================================================================
> --- linux-2.6.23-rc3.orig/arch/i386/kernel/hpet.c
> +++ linux-2.6.23-rc3/arch/i386/kernel/hpet.c
> @@ -144,11 +144,31 @@ static void hpet_enable_int(void)
> {
> unsigned long cfg = hpet_readl(HPET_CFG);
>
> + if (hpet_legacy_int_enabled)
> + return;
> +
> cfg |= HPET_CFG_LEGACY;
> hpet_writel(cfg, HPET_CFG);
> hpet_legacy_int_enabled = 1;
> }
>
> +static void hpet_disable_int(void)
> +{
> + unsigned long cfg;
> +
> + if (!hpet_legacy_int_enabled)
> + return;
> +
> + if (!is_hpet_capable())
> + return;
> +
> + cfg = hpet_readl(HPET_CFG);
> + cfg &= ~HPET_CFG_LEGACY;
> + hpet_writel(cfg, HPET_CFG);
> + hpet_legacy_int_enabled = 0;
> +
> +}
> +
> static void hpet_set_mode(enum clock_event_mode mode,
> struct clock_event_device *evt)
> {
> @@ -551,3 +571,31 @@ irqreturn_t hpet_rtc_interrupt(int irq,
> return IRQ_HANDLED;
> }
> #endif
> +
> +static int hpet_shutdown(struct sys_device *dev)
> +{
> + /* We need this to make PIT works in KEXECuted kernel */
> + hpet_disable_int();
> +
> + return 0;
> +}
> +
> +static struct sysdev_class hpet_sysdev_class = {
> + set_kset_name("hpet"),
> + .shutdown = hpet_shutdown,
> +};
> +
> +static struct sys_device device_hpet = {
> + .id = 0,
> + .cls = &hpet_sysdev_class,
> +};
> +
> +static int __init hpet_init_sysfs(void)
> +{
> + int error = sysdev_class_register(&hpet_sysdev_class);
> + if (!error)
> + error = sysdev_register(&device_hpet);
> + return error;
> +}
> +
> +device_initcall(hpet_init_sysfs);
> -
> To unsubscribe from this list: send the line "unsubscribe
> linux-rt-users" in the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

Eric, can you tell me, when are you going to review last version of fix?
Thanks.