In general, unknown NMI is used by hardware and firmware to notify
fatal hardware errors to OS. So the Linux should treat unknown NMI as
hardware error and go panic upon unknown NMI for better error
containment.
But there are some legacy machine which would randomly send unknown
NMIs for no good reason. To support these machines, a white list
mechanism is provided to treat unknown NMI as hardware error only on
some known working system.
These systems are identified via the presentation of APEI HEST or
some PCI ID of the host bridge. The PCI ID of host bridge instead of
DMI ID is used, so that the checking can be done based on the platform
type instead of motherboard. This should be simpler and sufficient.
The method to identify the platforms is designed by Andi Kleen.
Signed-off-by: Huang Ying <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Don Zickus <[email protected]>
---
arch/x86/include/asm/nmi.h | 1
arch/x86/kernel/Makefile | 2 +
arch/x86/kernel/hwerr.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kernel/traps.c | 31 +++++++++++++++++-----
drivers/acpi/apei/hest.c | 8 +++++
5 files changed, 96 insertions(+), 7 deletions(-)
create mode 100644 arch/x86/kernel/hwerr.c
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -17,6 +17,7 @@ struct ctl_table;
extern int proc_nmi_enabled(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
extern int unknown_nmi_panic;
+extern void set_unknown_nmi_as_hwerr(void);
void arch_trigger_all_cpu_backtrace(void);
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -112,6 +112,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION)
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_OF) += devicetree.o
+obj-y += hwerr.o
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
--- /dev/null
+++ b/arch/x86/kernel/hwerr.c
@@ -0,0 +1,61 @@
+/*
+ * Hardware error architecture dependent processing
+ *
+ * Copyright 2010,2011 Intel Corp.
+ * Author: Huang Ying <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/nmi.h>
+
+/*
+ * In general, unknown NMI is used by hardware and firmware to notify
+ * fatal hardware errors to OS. So the Linux should treat unknown NMI
+ * as hardware error and go panic upon unknown NMI for better error
+ * containment.
+ *
+ * But there are some legacy machine which would randomly send unknown
+ * NMIs for no good reason. To support these systems, a white list
+ * mechanism is used to treat unknown NMI as hardware error only on
+ * some known working system.
+ *
+ * The PCI ID of host bridge instead of DMI ID is used, so that the
+ * checking can be done based on the platform instead of motherboard.
+ * This should be simpler and sufficient.
+ */
+static const
+struct pci_device_id unknown_nmi_as_hwerr_platform[] __initdata = {
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3406) },
+ { 0, }
+};
+
+int __init check_unknown_nmi_as_hwerr(void)
+{
+ struct pci_dev *dev = NULL;
+
+ for_each_pci_dev(dev) {
+ if (pci_match_id(unknown_nmi_as_hwerr_platform, dev)) {
+ pr_info("System has working NMI, will treat unknown NMI as hardware error!\n");
+ set_unknown_nmi_as_hwerr();
+ break;
+ }
+ }
+
+ return 0;
+}
+late_initcall(check_unknown_nmi_as_hwerr);
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
static int ignore_nmis;
+static int unknown_nmi_as_hwerr;
+
int unknown_nmi_panic;
/*
* Prevent NMI reason port (0x61) being accessed simultaneously, can
@@ -368,12 +370,18 @@ io_check_error(unsigned char reason, str
outb(reason, NMI_REASON_PORT);
}
+void set_unknown_nmi_as_hwerr(void)
+{
+ unknown_nmi_as_hwerr = 1;
+}
+
static notrace __kprobes void
unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
{
if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
NOTIFY_STOP)
return;
+
#ifdef CONFIG_MCA
/*
* Might actually be able to figure out what the guilty party
@@ -384,14 +392,23 @@ unknown_nmi_error(unsigned char reason,
return;
}
#endif
- pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
-
- pr_emerg("Do you have a strange power saving mode enabled?\n");
- if (unknown_nmi_panic || panic_on_unrecovered_nmi)
- panic("NMI: Not continuing");
+ /*
+ * On modern systems, unknown NMI means fatal hardware error, but
+ * this may be not true on some legacy system.
+ */
+ if (unknown_nmi_as_hwerr) {
+ panic("NMI for hardware error without error record: Not continuing\n"
+ "Please check BIOS/BMC log for further information.");
+ } else {
+ pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
+
+ pr_emerg("Do you have a strange power saving mode enabled?\n");
+ if (unknown_nmi_panic || panic_on_unrecovered_nmi)
+ panic("NMI: Not continuing");
- pr_emerg("Dazed and confused, but trying to continue\n");
+ pr_emerg("Dazed and confused, but trying to continue\n");
+ }
}
static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -35,6 +35,7 @@
#include <linux/highmem.h>
#include <linux/io.h>
#include <linux/platform_device.h>
+#include <linux/nmi.h>
#include <acpi/apei.h>
#include "apei-internal.h"
@@ -225,6 +226,13 @@ void __init acpi_hest_init(void)
if (rc)
goto err;
+ /*
+ * System has proper HEST should treat unknown NMI as fatal
+ * hardware error notification
+ */
+ pr_info(HEST_PFX "HEST is valid, will treat unknown NMI as hardware error!\n");
+ set_unknown_nmi_as_hwerr();
+
rc = hest_ghes_dev_register(ghes_count);
if (!rc) {
pr_info(HEST_PFX "Table parsing has been initialized.\n");
On Fri, May 13, 2011 at 04:23:38PM +0800, Huang Ying wrote:
> In general, unknown NMI is used by hardware and firmware to notify
> fatal hardware errors to OS. So the Linux should treat unknown NMI as
> hardware error and go panic upon unknown NMI for better error
> containment.
I have a couple of concerns about this patch. One I don't think BIOSes
are ready for this. I have Intel Westmere boxes that say they have a
valid HEST, GHES, and EINJ table, but when I inject an error there is no
GHES record. This leaves me with an unknown NMI and panic. Yeah, it is a
BIOS bug I guess, but I think vendors are going to be slow fixing all this
stuff (my Nehalem box is in even worse shape with this stuff).
Also, is there any known issues with x86_64 platforms with bad NMIs? RHEL
has had unknown NMI's panic on x86_64 since x86_64 first came out, I don't
recall any exceptions we had to add to handle 'quirky' hardware.
Then for the i686 case, because the 'quirky' hardware is so old, can't we
just leave it a kernel config option to switch between using a 'printk'
vs. a 'panic'? Or even a kernel command line option.
I figure these 'quirky' hardware machines are more the exception nowdays,
do we really need to add code to whitelist machines?
Granted I am not familiar enough with the quirky hardware (in fact I don't
think I have seen any mainly because I haven't been around long enough).
Most cases I see when trolling through the fedora bugzilla list for
unknown NMIs, is just bad firmware or acpi power configurations.
Just wondering if we could just simplify the patch somehow with better
assumptions.
Cheers,
Don
>
> But there are some legacy machine which would randomly send unknown
> NMIs for no good reason. To support these machines, a white list
> mechanism is provided to treat unknown NMI as hardware error only on
> some known working system.
>
> These systems are identified via the presentation of APEI HEST or
> some PCI ID of the host bridge. The PCI ID of host bridge instead of
> DMI ID is used, so that the checking can be done based on the platform
> type instead of motherboard. This should be simpler and sufficient.
>
> The method to identify the platforms is designed by Andi Kleen.
>
> Signed-off-by: Huang Ying <[email protected]>
> Cc: Andi Kleen <[email protected]>
> Cc: Don Zickus <[email protected]>
> ---
> arch/x86/include/asm/nmi.h | 1
> arch/x86/kernel/Makefile | 2 +
> arch/x86/kernel/hwerr.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
> arch/x86/kernel/traps.c | 31 +++++++++++++++++-----
> drivers/acpi/apei/hest.c | 8 +++++
> 5 files changed, 96 insertions(+), 7 deletions(-)
> create mode 100644 arch/x86/kernel/hwerr.c
>
> --- a/arch/x86/include/asm/nmi.h
> +++ b/arch/x86/include/asm/nmi.h
> @@ -17,6 +17,7 @@ struct ctl_table;
> extern int proc_nmi_enabled(struct ctl_table *, int ,
> void __user *, size_t *, loff_t *);
> extern int unknown_nmi_panic;
> +extern void set_unknown_nmi_as_hwerr(void);
>
> void arch_trigger_all_cpu_backtrace(void);
> #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -112,6 +112,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION)
> obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
> obj-$(CONFIG_OF) += devicetree.o
>
> +obj-y += hwerr.o
> +
> ###
> # 64 bit specific files
> ifeq ($(CONFIG_X86_64),y)
> --- /dev/null
> +++ b/arch/x86/kernel/hwerr.c
> @@ -0,0 +1,61 @@
> +/*
> + * Hardware error architecture dependent processing
> + *
> + * Copyright 2010,2011 Intel Corp.
> + * Author: Huang Ying <[email protected]>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/pci.h>
> +#include <linux/init.h>
> +#include <linux/nmi.h>
> +
> +/*
> + * In general, unknown NMI is used by hardware and firmware to notify
> + * fatal hardware errors to OS. So the Linux should treat unknown NMI
> + * as hardware error and go panic upon unknown NMI for better error
> + * containment.
> + *
> + * But there are some legacy machine which would randomly send unknown
> + * NMIs for no good reason. To support these systems, a white list
> + * mechanism is used to treat unknown NMI as hardware error only on
> + * some known working system.
> + *
> + * The PCI ID of host bridge instead of DMI ID is used, so that the
> + * checking can be done based on the platform instead of motherboard.
> + * This should be simpler and sufficient.
> + */
> +static const
> +struct pci_device_id unknown_nmi_as_hwerr_platform[] __initdata = {
> + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3406) },
> + { 0, }
> +};
> +
> +int __init check_unknown_nmi_as_hwerr(void)
> +{
> + struct pci_dev *dev = NULL;
> +
> + for_each_pci_dev(dev) {
> + if (pci_match_id(unknown_nmi_as_hwerr_platform, dev)) {
> + pr_info("System has working NMI, will treat unknown NMI as hardware error!\n");
> + set_unknown_nmi_as_hwerr();
> + break;
> + }
> + }
> +
> + return 0;
> +}
> +late_initcall(check_unknown_nmi_as_hwerr);
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
>
> static int ignore_nmis;
>
> +static int unknown_nmi_as_hwerr;
> +
> int unknown_nmi_panic;
> /*
> * Prevent NMI reason port (0x61) being accessed simultaneously, can
> @@ -368,12 +370,18 @@ io_check_error(unsigned char reason, str
> outb(reason, NMI_REASON_PORT);
> }
>
> +void set_unknown_nmi_as_hwerr(void)
> +{
> + unknown_nmi_as_hwerr = 1;
> +}
> +
> static notrace __kprobes void
> unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
> {
> if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
> NOTIFY_STOP)
> return;
> +
> #ifdef CONFIG_MCA
> /*
> * Might actually be able to figure out what the guilty party
> @@ -384,14 +392,23 @@ unknown_nmi_error(unsigned char reason,
> return;
> }
> #endif
> - pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
> - reason, smp_processor_id());
> -
> - pr_emerg("Do you have a strange power saving mode enabled?\n");
> - if (unknown_nmi_panic || panic_on_unrecovered_nmi)
> - panic("NMI: Not continuing");
> + /*
> + * On modern systems, unknown NMI means fatal hardware error, but
> + * this may be not true on some legacy system.
> + */
> + if (unknown_nmi_as_hwerr) {
> + panic("NMI for hardware error without error record: Not continuing\n"
> + "Please check BIOS/BMC log for further information.");
> + } else {
> + pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
> + reason, smp_processor_id());
> +
> + pr_emerg("Do you have a strange power saving mode enabled?\n");
> + if (unknown_nmi_panic || panic_on_unrecovered_nmi)
> + panic("NMI: Not continuing");
>
> - pr_emerg("Dazed and confused, but trying to continue\n");
> + pr_emerg("Dazed and confused, but trying to continue\n");
> + }
> }
>
> static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
> --- a/drivers/acpi/apei/hest.c
> +++ b/drivers/acpi/apei/hest.c
> @@ -35,6 +35,7 @@
> #include <linux/highmem.h>
> #include <linux/io.h>
> #include <linux/platform_device.h>
> +#include <linux/nmi.h>
> #include <acpi/apei.h>
>
> #include "apei-internal.h"
> @@ -225,6 +226,13 @@ void __init acpi_hest_init(void)
> if (rc)
> goto err;
>
> + /*
> + * System has proper HEST should treat unknown NMI as fatal
> + * hardware error notification
> + */
> + pr_info(HEST_PFX "HEST is valid, will treat unknown NMI as hardware error!\n");
> + set_unknown_nmi_as_hwerr();
> +
> rc = hest_ghes_dev_register(ghes_count);
> if (!rc) {
> pr_info(HEST_PFX "Table parsing has been initialized.\n");
* Don Zickus <[email protected]> wrote:
> On Fri, May 13, 2011 at 04:23:38PM +0800, Huang Ying wrote:
> > In general, unknown NMI is used by hardware and firmware to notify
> > fatal hardware errors to OS. So the Linux should treat unknown NMI as
> > hardware error and go panic upon unknown NMI for better error
> > containment.
>
> I have a couple of concerns about this patch. One I don't think BIOSes
> are ready for this. I have Intel Westmere boxes that say they have a
> valid HEST, GHES, and EINJ table, but when I inject an error there is no
> GHES record. This leaves me with an unknown NMI and panic. Yeah, it is a
> BIOS bug I guess, but I think vendors are going to be slow fixing all this
> stuff (my Nehalem box is in even worse shape with this stuff).
Agreed, doing this is not a very good idea - we have spurious unknown NMIs
again and again, crashing the box is not a good idea.
What should be done instead is to add an event for unknown NMIs, which can then
be processed by the RAS daemon to implement policy.
By using 'active' event filters it could even be set on a system to panic the
box by default.
Thanks,
Ingo
Hi, Don,
On Fri, May 13, 2011 at 8:45 PM, Don Zickus <[email protected]> wrote:
> On Fri, May 13, 2011 at 04:23:38PM +0800, Huang Ying wrote:
>> In general, unknown NMI is used by hardware and firmware to notify
>> fatal hardware errors to OS. So the Linux should treat unknown NMI as
>> hardware error and go panic upon unknown NMI for better error
>> containment.
>
> I have a couple of concerns about this patch. One I don't think BIOSes
> are ready for this. I have Intel Westmere boxes that say they have a
> valid HEST, GHES, and EINJ table, but when I inject an error there is no
> GHES record. This leaves me with an unknown NMI and panic. Yeah, it is a
> BIOS bug I guess, but I think vendors are going to be slow fixing all this
> stuff (my Nehalem box is in even worse shape with this stuff).
Although there is no GHES record, I think the Westmere box behavior is
acceptable, an unknown NMI is used by BIOS to notify hardware error,
this is what we want to deal with in this patch.
> Also, is there any known issues with x86_64 platforms with bad NMIs? RHEL
> has had unknown NMI's panic on x86_64 since x86_64 first came out, I don't
> recall any exceptions we had to add to handle 'quirky' hardware.
>
> Then for the i686 case, because the 'quirky' hardware is so old, can't we
> just leave it a kernel config option to switch between using a 'printk'
> vs. a 'panic'? Or even a kernel command line option.
>
> I figure these 'quirky' hardware machines are more the exception nowdays,
> do we really need to add code to whitelist machines?
>
> Granted I am not familiar enough with the quirky hardware (in fact I don't
> think I have seen any mainly because I haven't been around long enough).
> Most cases I see when trolling through the fedora bugzilla list for
> unknown NMIs, is just bad firmware or acpi power configurations.
>
> Just wondering if we could just simplify the patch somehow with better
> assumptions.
So there is still unknown NMIs on real hardware now. I am afraid turn
on panic on unknown NMI by default may be not acceptable for someone.
Best Regards,
Huang Ying
Hi, Ingo,
On Fri, May 13, 2011 at 9:00 PM, Ingo Molnar <[email protected]> wrote:
>
> * Don Zickus <[email protected]> wrote:
>
>> On Fri, May 13, 2011 at 04:23:38PM +0800, Huang Ying wrote:
>> > In general, unknown NMI is used by hardware and firmware to notify
>> > fatal hardware errors to OS. So the Linux should treat unknown NMI as
>> > hardware error and go panic upon unknown NMI for better error
>> > containment.
>>
>> I have a couple of concerns about this patch. One I don't think BIOSes
>> are ready for this. I have Intel Westmere boxes that say they have a
>> valid HEST, GHES, and EINJ table, but when I inject an error there is no
>> GHES record. This leaves me with an unknown NMI and panic. Yeah, it is a
>> BIOS bug I guess, but I think vendors are going to be slow fixing all this
>> stuff (my Nehalem box is in even worse shape with this stuff).
>
> Agreed, doing this is not a very good idea - we have spurious unknown NMIs
> again and again, crashing the box is not a good idea.
So we use white list to filter out spurious hardware.
> What should be done instead is to add an event for unknown NMIs, which can then
> be processed by the RAS daemon to implement policy.
>
> By using 'active' event filters it could even be set on a system to panic the
> box by default.
If there is real fatal hardware error, maybe we have no luxury to go
from NMI handler to user space RAS daemon to determine what to do.
System may explode, bad data may go to disk before that.
Best Regards,
Huang Ying
On Fri, May 13, 2011 at 09:17:13PM +0800, huang ying wrote:
> Hi, Don,
>
> On Fri, May 13, 2011 at 8:45 PM, Don Zickus <[email protected]> wrote:
> > On Fri, May 13, 2011 at 04:23:38PM +0800, Huang Ying wrote:
> >> In general, unknown NMI is used by hardware and firmware to notify
> >> fatal hardware errors to OS. So the Linux should treat unknown NMI as
> >> hardware error and go panic upon unknown NMI for better error
> >> containment.
> >
> > I have a couple of concerns about this patch. ?One I don't think BIOSes
> > are ready for this. ?I have Intel Westmere boxes that say they have a
> > valid HEST, GHES, and EINJ table, but when I inject an error there is no
> > GHES record. ?This leaves me with an unknown NMI and panic. ?Yeah, it is a
> > BIOS bug I guess, but I think vendors are going to be slow fixing all this
> > stuff (my Nehalem box is in even worse shape with this stuff).
>
> Although there is no GHES record, I think the Westmere box behavior is
> acceptable, an unknown NMI is used by BIOS to notify hardware error,
> this is what we want to deal with in this patch.
I don't think having HEST changes the situation. I agree with your
statement above, but I can also generate unknown NMIs from stressing perf.
Broken hardware usually generated NMIs, sometimes they propogated to the
cpu, other times, the were swallowed by the chipset. Which means having
HEST or not having HEST doesn't improve anything nor make it any worse.
IOW I don't think we gain anything with this patch.
>
> > Also, is there any known issues with x86_64 platforms with bad NMIs? ?RHEL
> > has had unknown NMI's panic on x86_64 since x86_64 first came out, I don't
> > recall any exceptions we had to add to handle 'quirky' hardware.
> >
> > Then for the i686 case, because the 'quirky' hardware is so old, can't we
> > just leave it a kernel config option to switch between using a 'printk'
> > vs. a 'panic'? ?Or even a kernel command line option.
> >
> > I figure these 'quirky' hardware machines are more the exception nowdays,
> > do we really need to add code to whitelist machines?
> >
> > Granted I am not familiar enough with the quirky hardware (in fact I don't
> > think I have seen any mainly because I haven't been around long enough).
> > Most cases I see when trolling through the fedora bugzilla list for
> > unknown NMIs, is just bad firmware or acpi power configurations.
> >
> > Just wondering if we could just simplify the patch somehow with better
> > assumptions.
>
> So there is still unknown NMIs on real hardware now. I am afraid turn
> on panic on unknown NMI by default may be not acceptable for someone.
The opposite could be said too. I think that was Ingo's point. The
policy should be left in the hands of the user or distro because there is
no right answer.
Cheers,
Don
On 05/13/2011 12:23 PM, Huang Ying wrote:
> In general, unknown NMI is used by hardware and firmware to notify
> fatal hardware errors to OS. So the Linux should treat unknown NMI as
> hardware error and go panic upon unknown NMI for better error
> containment.
>
> But there are some legacy machine which would randomly send unknown
> NMIs for no good reason. To support these machines, a white list
> mechanism is provided to treat unknown NMI as hardware error only on
> some known working system.
>
> These systems are identified via the presentation of APEI HEST or
> some PCI ID of the host bridge. The PCI ID of host bridge instead of
> DMI ID is used, so that the checking can be done based on the platform
> type instead of motherboard. This should be simpler and sufficient.
>
> The method to identify the platforms is designed by Andi Kleen.
>
> Signed-off-by: Huang Ying <[email protected]>
> Cc: Andi Kleen <[email protected]>
> Cc: Don Zickus <[email protected]>
> ---
...
Hi Ying,
just curious (regardless the concerns Don and Ingo have) -- if there still a need
for such semi-unknown nmi handling maybe it's worth to register a *notifier* for it
and we panic only when user *explicitly* specify how to treat this class of NMIs
(via say "hest-nmi-panic" boot option or something like that). Maybe such partially
modular scheme would be better? If only I don't miss anything.
--
Cyrill
* huang ying <[email protected]> wrote:
> > What should be done instead is to add an event for unknown NMIs, which can
> > then be processed by the RAS daemon to implement policy.
> >
> > By using 'active' event filters it could even be set on a system to panic
> > the box by default.
>
> If there is real fatal hardware error, maybe we have no luxury to go from NMI
> handler to user space RAS daemon to determine what to do. System may explode,
> bad data may go to disk before that.
That is why i suggested:
> > By using 'active' event filters it could even be set on a system to panic
> > the box by default.
event filters are evaluated in the kernel, so the panic could be instantaneous,
without the event having to reach user-space.
Thanks,
Ingo
On Fri, May 13, 2011 at 05:20:33PM +0200, Ingo Molnar wrote:
>
> * huang ying <[email protected]> wrote:
>
> > > What should be done instead is to add an event for unknown NMIs, which can
> > > then be processed by the RAS daemon to implement policy.
> > >
> > > By using 'active' event filters it could even be set on a system to panic
> > > the box by default.
> >
> > If there is real fatal hardware error, maybe we have no luxury to go from NMI
> > handler to user space RAS daemon to determine what to do. System may explode,
> > bad data may go to disk before that.
>
> That is why i suggested:
>
> > > By using 'active' event filters it could even be set on a system to panic
> > > the box by default.
>
> event filters are evaluated in the kernel, so the panic could be instantaneous,
> without the event having to reach user-space.
Interesting. Question though, what do you mean by 'event filtering'. Is
that different then setting 'unknown_nmi_panic' panic on the commandline
or procfs?
Or are you suggesting something like registering another callback on the
die_chain that looks for DIE_NMIUNKNOWN as the event, swallows them and
implements the policy? That way only on HEST related platforms would
register them while others would keep the default of 'Dazed and confused'
messages?
Cheers,
Don
On Fri, May 13, 2011 at 9:51 PM, Don Zickus <[email protected]> wrote:
> On Fri, May 13, 2011 at 09:17:13PM +0800, huang ying wrote:
>> Hi, Don,
>>
>> On Fri, May 13, 2011 at 8:45 PM, Don Zickus <[email protected]> wrote:
>> > On Fri, May 13, 2011 at 04:23:38PM +0800, Huang Ying wrote:
>> >> In general, unknown NMI is used by hardware and firmware to notify
>> >> fatal hardware errors to OS. So the Linux should treat unknown NMI as
>> >> hardware error and go panic upon unknown NMI for better error
>> >> containment.
>> >
>> > I have a couple of concerns about this patch. One I don't think BIOSes
>> > are ready for this. I have Intel Westmere boxes that say they have a
>> > valid HEST, GHES, and EINJ table, but when I inject an error there is no
>> > GHES record. This leaves me with an unknown NMI and panic. Yeah, it is a
>> > BIOS bug I guess, but I think vendors are going to be slow fixing all this
>> > stuff (my Nehalem box is in even worse shape with this stuff).
>>
>> Although there is no GHES record, I think the Westmere box behavior is
>> acceptable, an unknown NMI is used by BIOS to notify hardware error,
>> this is what we want to deal with in this patch.
>
> I don't think having HEST changes the situation. I agree with your
> statement above, but I can also generate unknown NMIs from stressing perf.
Yes. perf can still generate unknown NMIs. Maybe we should turn off
panic on unknown NMI logic if perf is running. Maybe add warning to
users that if you use perf, you may lose some RAS feature.
> Broken hardware usually generated NMIs, sometimes they propogated to the
> cpu, other times, the were swallowed by the chipset. Which means having
> HEST or not having HEST doesn't improve anything nor make it any worse.
>
> IOW I don't think we gain anything with this patch.
Without this patch, a real fatal hardware error may silently ruin your
disk data. But with this patch, you can panic before that. I think
this is what we gain with this patch.
>>
>> > Also, is there any known issues with x86_64 platforms with bad NMIs? RHEL
>> > has had unknown NMI's panic on x86_64 since x86_64 first came out, I don't
>> > recall any exceptions we had to add to handle 'quirky' hardware.
>> >
>> > Then for the i686 case, because the 'quirky' hardware is so old, can't we
>> > just leave it a kernel config option to switch between using a 'printk'
>> > vs. a 'panic'? Or even a kernel command line option.
>> >
>> > I figure these 'quirky' hardware machines are more the exception nowdays,
>> > do we really need to add code to whitelist machines?
>> >
>> > Granted I am not familiar enough with the quirky hardware (in fact I don't
>> > think I have seen any mainly because I haven't been around long enough).
>> > Most cases I see when trolling through the fedora bugzilla list for
>> > unknown NMIs, is just bad firmware or acpi power configurations.
>> >
>> > Just wondering if we could just simplify the patch somehow with better
>> > assumptions.
>>
>> So there is still unknown NMIs on real hardware now. I am afraid turn
>> on panic on unknown NMI by default may be not acceptable for someone.
>
> The opposite could be said too. I think that was Ingo's point. The
> policy should be left in the hands of the user or distro because there is
> no right answer.
IMHO, Linux is not X, so Linux kernel will not push all policy to user
space. And for fatal hardware error processing, there may be no
opportunity for user space to run.
Best Regards,
Huang Ying
On Fri, May 13, 2011 at 11:17 PM, Cyrill Gorcunov <[email protected]> wrote:
> On 05/13/2011 12:23 PM, Huang Ying wrote:
>> In general, unknown NMI is used by hardware and firmware to notify
>> fatal hardware errors to OS. So the Linux should treat unknown NMI as
>> hardware error and go panic upon unknown NMI for better error
>> containment.
>>
>> But there are some legacy machine which would randomly send unknown
>> NMIs for no good reason. To support these machines, a white list
>> mechanism is provided to treat unknown NMI as hardware error only on
>> some known working system.
>>
>> These systems are identified via the presentation of APEI HEST or
>> some PCI ID of the host bridge. The PCI ID of host bridge instead of
>> DMI ID is used, so that the checking can be done based on the platform
>> type instead of motherboard. This should be simpler and sufficient.
>>
>> The method to identify the platforms is designed by Andi Kleen.
>>
>> Signed-off-by: Huang Ying <[email protected]>
>> Cc: Andi Kleen <[email protected]>
>> Cc: Don Zickus <[email protected]>
>> ---
> ...
>
> Hi Ying,
>
> just curious (regardless the concerns Don and Ingo have) -- if there still a need
> for such semi-unknown nmi handling maybe it's worth to register a *notifier* for it
> and we panic only when user *explicitly* specify how to treat this class of NMIs
> (via say "hest-nmi-panic" boot option or something like that). Maybe such partially
> modular scheme would be better? If only I don't miss anything.
Hi, Cyrill,
IMHO, Pushing all policy to user is not good too. How many users
understand unknown NMI and hardware error clearly? It is better if we
can determine what is the right behavior.
Best Regards,
Huang Ying
On Fri, May 13, 2011 at 11:17 PM, Cyrill Gorcunov <[email protected]> wrote:
> On 05/13/2011 12:23 PM, Huang Ying wrote:
>> In general, unknown NMI is used by hardware and firmware to notify
>> fatal hardware errors to OS. So the Linux should treat unknown NMI as
>> hardware error and go panic upon unknown NMI for better error
>> containment.
>>
>> But there are some legacy machine which would randomly send unknown
>> NMIs for no good reason. To support these machines, a white list
>> mechanism is provided to treat unknown NMI as hardware error only on
>> some known working system.
>>
>> These systems are identified via the presentation of APEI HEST or
>> some PCI ID of the host bridge. The PCI ID of host bridge instead of
>> DMI ID is used, so that the checking can be done based on the platform
>> type instead of motherboard. This should be simpler and sufficient.
>>
>> The method to identify the platforms is designed by Andi Kleen.
>>
>> Signed-off-by: Huang Ying <[email protected]>
>> Cc: Andi Kleen <[email protected]>
>> Cc: Don Zickus <[email protected]>
>> ---
> ...
>
> Hi Ying,
>
> just curious (regardless the concerns Don and Ingo have) -- if there still a need
> for such semi-unknown nmi handling maybe it's worth to register a *notifier* for it
> and we panic only when user *explicitly* specify how to treat this class of NMIs
> (via say "hest-nmi-panic" boot option or something like that). Maybe such partially
> modular scheme would be better? If only I don't miss anything.
Hi, Cyrill,
IMHO, Pushing all policy to user is not good too. How many users
understand unknown NMI and hardware error clearly? It is better if we
can determine what is the right behavior.
Best Regards,
Huang Ying
On Fri, May 13, 2011 at 11:20 PM, Ingo Molnar <[email protected]> wrote:
>
> * huang ying <[email protected]> wrote:
>
>> > What should be done instead is to add an event for unknown NMIs, which can
>> > then be processed by the RAS daemon to implement policy.
>> >
>> > By using 'active' event filters it could even be set on a system to panic
>> > the box by default.
>>
>> If there is real fatal hardware error, maybe we have no luxury to go from NMI
>> handler to user space RAS daemon to determine what to do. System may explode,
>> bad data may go to disk before that.
>
> That is why i suggested:
>
> > > By using 'active' event filters it could even be set on a system to panic
> > > the box by default.
>
> event filters are evaluated in the kernel, so the panic could be instantaneous,
> without the event having to reach user-space.
Yes. If we do that in kernel, that should be doable.
Does 'active' event filters have much difference with DIE_UNKNOWNNMI
notifier chain? What can we get from the added complexity? What do
you think is the better way to determine go panic on unknown NMI or
not?
Best Regards,
Huang Ying
> IMHO, Linux is not X, so Linux kernel will not push all policy to user
> space. And for fatal hardware error processing, there may be no
> opportunity for user space to run.
s/may/is/
Everyone who claims something else doesn't like your data :-)
-Andi
On 05/14/2011 04:26 AM, huang ying wrote:
> On Fri, May 13, 2011 at 11:17 PM, Cyrill Gorcunov <[email protected]> wrote:
>> On 05/13/2011 12:23 PM, Huang Ying wrote:
>>> In general, unknown NMI is used by hardware and firmware to notify
>>> fatal hardware errors to OS. So the Linux should treat unknown NMI as
>>> hardware error and go panic upon unknown NMI for better error
>>> containment.
>>>
>>> But there are some legacy machine which would randomly send unknown
>>> NMIs for no good reason. To support these machines, a white list
>>> mechanism is provided to treat unknown NMI as hardware error only on
>>> some known working system.
>>>
>>> These systems are identified via the presentation of APEI HEST or
>>> some PCI ID of the host bridge. The PCI ID of host bridge instead of
>>> DMI ID is used, so that the checking can be done based on the platform
>>> type instead of motherboard. This should be simpler and sufficient.
>>>
>>> The method to identify the platforms is designed by Andi Kleen.
>>>
>>> Signed-off-by: Huang Ying <[email protected]>
>>> Cc: Andi Kleen <[email protected]>
>>> Cc: Don Zickus <[email protected]>
>>> ---
>> ...
>>
>> Hi Ying,
>>
>> just curious (regardless the concerns Don and Ingo have) -- if there still a need
>> for such semi-unknown nmi handling maybe it's worth to register a *notifier* for it
>> and we panic only when user *explicitly* specify how to treat this class of NMIs
>> (via say "hest-nmi-panic" boot option or something like that). Maybe such partially
>> modular scheme would be better? If only I don't miss anything.
>
> Hi, Cyrill,
>
> IMHO, Pushing all policy to user is not good too. How many users
> understand unknown NMI and hardware error clearly? It is better if we
> can determine what is the right behavior.
>
> Best Regards,
> Huang Ying
Hi Ying,
yes, is not good. But at least we *must* provide a way to turn this new feature off
via command line I think. One of a reason for me is perf unknown nmis (at moment we seems
to have captured and cured all parasite NMIs sources but there is no guarantee we wont
meet them in future due to some code change or whatever). And bloating trap.c with
new if()'s is not that good I guess, that is why I asked if there a way to do all the
work via notifiers ;)
--
Cyrill
On Sat, May 14, 2011 at 3:51 PM, Cyrill Gorcunov <[email protected]> wrote:
> On 05/14/2011 04:26 AM, huang ying wrote:
>> On Fri, May 13, 2011 at 11:17 PM, Cyrill Gorcunov <[email protected]> wrote:
>>> Hi Ying,
>>>
>>> just curious (regardless the concerns Don and Ingo have) -- if there still a need
>>> for such semi-unknown nmi handling maybe it's worth to register a *notifier* for it
>>> and we panic only when user *explicitly* specify how to treat this class of NMIs
>>> (via say "hest-nmi-panic" boot option or something like that). Maybe such partially
>>> modular scheme would be better? If only I don't miss anything.
>>
>> Hi, Cyrill,
>>
>> IMHO, Pushing all policy to user is not good too. How many users
>> understand unknown NMI and hardware error clearly? It is better if we
>> can determine what is the right behavior.
>>
>
> yes, is not good. But at least we *must* provide a way to turn this new feature off
> via command line I think. One of a reason for me is perf unknown nmis (at moment we seems
> to have captured and cured all parasite NMIs sources but there is no guarantee we wont
> meet them in future due to some code change or whatever). And bloating trap.c with
> new if()'s is not that good I guess, that is why I asked if there a way to do all the
> work via notifiers ;)
Yes. We should consider about perf unknown NMI issues. But compared
with pushing all magic to user, I think the better way is to have a
better default behavior in kernel. For example, we can turn off
unknown NMI as hwerr logic temporarily if there are more than 1 perf
NMI events in action. Is that reasonable?
And, I am not a big fan of notifiers, that makes code hard to be
understood. If you have concerns about the size of traps.c, we can
move all NMI logic to a new file.
Best Regards,
Huang Ying
On 05/15/2011 04:06 AM, huang ying wrote:
...
>>
>> yes, is not good. But at least we *must* provide a way to turn this new feature off
>> via command line I think. One of a reason for me is perf unknown nmis (at moment we seems
>> to have captured and cured all parasite NMIs sources but there is no guarantee we wont
>> meet them in future due to some code change or whatever). And bloating trap.c with
>> new if()'s is not that good I guess, that is why I asked if there a way to do all the
>> work via notifiers ;)
>
> Yes. We should consider about perf unknown NMI issues. But compared
> with pushing all magic to user, I think the better way is to have a
> better default behavior in kernel. For example, we can turn off
> unknown NMI as hwerr logic temporarily if there are more than 1 perf
> NMI events in action. Is that reasonable?
I'm personally fine even if it's enabled by default, only worried to have
an option to disable hwerr from boot line.
>
> And, I am not a big fan of notifiers, that makes code hard to be
> understood. If you have concerns about the size of traps.c, we can
> move all NMI logic to a new file.
Ying, the concern is rather related to the code scheme in general. Since
we have notifiers I think the better way to be consistent here and use
hwerr notifier too. But it's IMHO ;)
>
> Best Regards,
> Huang Ying
--
Cyrill
On 05/15/2011 02:34 PM, Cyrill Gorcunov wrote:
> On 05/15/2011 04:06 AM, huang ying wrote:
> ...
>>>
>>> yes, is not good. But at least we *must* provide a way to turn this new feature off
>>> via command line I think. One of a reason for me is perf unknown nmis (at moment we seems
>>> to have captured and cured all parasite NMIs sources but there is no guarantee we wont
>>> meet them in future due to some code change or whatever). And bloating trap.c with
>>> new if()'s is not that good I guess, that is why I asked if there a way to do all the
>>> work via notifiers ;)
>>
>> Yes. We should consider about perf unknown NMI issues. But compared
>> with pushing all magic to user, I think the better way is to have a
>> better default behavior in kernel. For example, we can turn off
>> unknown NMI as hwerr logic temporarily if there are more than 1 perf
>> NMI events in action. Is that reasonable?
>
> I'm personally fine even if it's enabled by default, only worried to have
> an option to disable hwerr from boot line.
The white list mechanism is not sufficient? Spurious unknown NMI can
occur on white list machines? People don't want to protect their data?
>> And, I am not a big fan of notifiers, that makes code hard to be
>> understood. If you have concerns about the size of traps.c, we can
>> move all NMI logic to a new file.
>
> Ying, the concern is rather related to the code scheme in general. Since
> we have notifiers I think the better way to be consistent here and use
> hwerr notifier too. But it's IMHO ;)
As for go notifiers or not. IMHO, a rule can be:
- If it is something like a driver, than it should go notifier
- If it is architectural/PC defacto standard, it can sit outside of
notifier.
I think that seeing unknown NMI as hardware error should be part of PC
defacto standard. Do you think so?
Best Regards,
Huang Ying
* Don Zickus <[email protected]> wrote:
> On Fri, May 13, 2011 at 05:20:33PM +0200, Ingo Molnar wrote:
> >
> > * huang ying <[email protected]> wrote:
> >
> > > > What should be done instead is to add an event for unknown NMIs, which can
> > > > then be processed by the RAS daemon to implement policy.
> > > >
> > > > By using 'active' event filters it could even be set on a system to panic
> > > > the box by default.
> > >
> > > If there is real fatal hardware error, maybe we have no luxury to go from NMI
> > > handler to user space RAS daemon to determine what to do. System may explode,
> > > bad data may go to disk before that.
> >
> > That is why i suggested:
> >
> > > > By using 'active' event filters it could even be set on a system to panic
> > > > the box by default.
> >
> > event filters are evaluated in the kernel, so the panic could be instantaneous,
> > without the event having to reach user-space.
>
> Interesting. Question though, what do you mean by 'event filtering'. Is
> that different then setting 'unknown_nmi_panic' panic on the commandline or
> procfs?
>
> Or are you suggesting something like registering another callback on the
> die_chain that looks for DIE_NMIUNKNOWN as the event, swallows them and
> implements the policy? That way only on HEST related platforms would
> register them while others would keep the default of 'Dazed and confused'
> messages?
The idea is that "event filters", which are an existing upstream feature and
which can be used in rather flexible ways:
http://lkml.org/lkml/2011/4/27/660
Could be used to trigger non-standard policy action as well - such as to panic
the box.
This would replace various very limited /debugfs and /sys event filtering hacks
(and hardcoded policies) such as arch/x86/kernel/cpu/mcheck/mce-severity.c, and
it would allow nonstandard behavior like 'panic the box on unknown NMIs' as
well.
This could be set by the RAS daemon, and it could be propagated to the kernel
boot line as well, where event filter syntax would look like this:
events=nmi::unknown"if (reason == 0) panic();"
(Where the 'reason' field of the NMI event is the current legacy 'reason' value
there.)
The filter code would have to be modified to be able to recognize the panic()
bit, but that's desirable anyway and it is a one-time effort.
This:
events=nmi::unknown:"if (reason == 0) ignore();"
would be a possible outcome as well, on certain boxes - to skip certain events.
Thanks,
Ingo
On Mon, May 16, 2011 at 09:09:45AM +0800, Huang Ying wrote:
> > Ying, the concern is rather related to the code scheme in general. Since
> > we have notifiers I think the better way to be consistent here and use
> > hwerr notifier too. But it's IMHO ;)
>
> As for go notifiers or not. IMHO, a rule can be:
>
> - If it is something like a driver, than it should go notifier
> - If it is architectural/PC defacto standard, it can sit outside of
> notifier.
Hmm, then what do you do about perf? That is architectural and a defacto
standard, but I am not sure hardcoding that would be appropriate.
>
> I think that seeing unknown NMI as hardware error should be part of PC
> defacto standard. Do you think so?
Well after thinking about it, I would say no. And my reason is, if
vendors are really serious about using NMIs as an indicator for hardware
errors, shouldn't they be setting a bit in the memory controller/north
bridge or south bridge/IOHC for an NMI handler to read? I mean hardware
devices don't just get wired directly to the NMI pin on the cpu, right?
They generally have to go through some hub that acts as a multiplexer.
In those cases, why can't those hubs set a bit saying it detected an error
(don't PCIe bridges already do that?) and let the NMI handler read it to
confirm. This way we can leave 'unknown NMIs' as a way to say an
unclaimed NMI entered the system and we can have users set policy about
what to do, panic, printk, whatever.
But for the HEST stuff, it should be smart enough by now to trap any
hardware error, no? How does a machine that supports HEST let a hardware
error get through without detecting it? Isn't that the point? Detect a
hardware error, grab as much info about it as possible, save the error
record and then panic?
Otherwise if you just panic, then you have no idea why the machine errored
in the first place. It might be the safe thing to do in some
circumstances, but then you have to wonder why the fancy HEST enabled
server didn't catch it. Isn't that what people are spending extra money
for those Intel servers with RAS features?
Cheers,
Don
On Mon, May 16, 2011 at 01:29:34PM +0200, Ingo Molnar wrote:
> > Interesting. Question though, what do you mean by 'event filtering'. Is
> > that different then setting 'unknown_nmi_panic' panic on the commandline or
> > procfs?
> >
> > Or are you suggesting something like registering another callback on the
> > die_chain that looks for DIE_NMIUNKNOWN as the event, swallows them and
> > implements the policy? That way only on HEST related platforms would
> > register them while others would keep the default of 'Dazed and confused'
> > messages?
>
> The idea is that "event filters", which are an existing upstream feature and
> which can be used in rather flexible ways:
>
> http://lkml.org/lkml/2011/4/27/660
>
> Could be used to trigger non-standard policy action as well - such as to panic
> the box.
>
> This would replace various very limited /debugfs and /sys event filtering hacks
> (and hardcoded policies) such as arch/x86/kernel/cpu/mcheck/mce-severity.c, and
> it would allow nonstandard behavior like 'panic the box on unknown NMIs' as
> well.
>
> This could be set by the RAS daemon, and it could be propagated to the kernel
> boot line as well, where event filter syntax would look like this:
>
> events=nmi::unknown"if (reason == 0) panic();"
Wow. ok. I believe that is the most complicated kernel boot param I have
ever seen. :-) Powerful, no doubt.
So this would sorta be a meta-notifier? I guess you are saying platforms
that implement something like HEST could setup an event like that to
trigger the behaviour they want on a per-platform basis?
My only argument against it would be sorta of what Ying complains about is
that you start to lose track of who is hooked into the NMI. It is one
thing to search for all the users in the die_notifier to track down who is
swallowing NMIs. But to look for event users, is going to be harder.
Unless the events processing has a switch to turn on logging? :-)
Cheers,
Don
>
> (Where the 'reason' field of the NMI event is the current legacy 'reason' value
> there.)
>
> The filter code would have to be modified to be able to recognize the panic()
> bit, but that's desirable anyway and it is a one-time effort.
>
> This:
>
> events=nmi::unknown:"if (reason == 0) ignore();"
>
> would be a possible outcome as well, on certain boxes - to skip certain events.
>
> Thanks,
>
> Ingo
On 05/16/2011 05:09 AM, Huang Ying wrote:
...
>>
>> I'm personally fine even if it's enabled by default, only worried to have
>> an option to disable hwerr from boot line.
>
> The white list mechanism is not sufficient? Spurious unknown NMI can
> occur on white list machines? People don't want to protect their data?
>
I suppose no, it's not sufficient considering how many cpu errata already
out in general. And I see no guarantee that unknown NMIs never triggers on
white list machines and I know that you know that as well ;)
>>> And, I am not a big fan of notifiers, that makes code hard to be
>>> understood. If you have concerns about the size of traps.c, we can
>>> move all NMI logic to a new file.
>>
>> Ying, the concern is rather related to the code scheme in general. Since
>> we have notifiers I think the better way to be consistent here and use
>> hwerr notifier too. But it's IMHO ;)
>
> As for go notifiers or not. IMHO, a rule can be:
>
> - If it is something like a driver, than it should go notifier
> - If it is architectural/PC defacto standard, it can sit outside of notifier.
>
> I think that seeing unknown NMI as hardware error should be part of PC
> defacto standard. Do you think so?
Ying, movin the handler into notifier is my IMHO, this would release nmi handler
from details since with time more and more "standarts" would appear. If Don, Ingo
and x86-team is fine with your approach -- of course I'm pretty fine too ;)
>
> Best Regards,
> Huang Ying
/me Just found Don has some more concerns
--
Cyrill
On 05/16/2011 11:03 PM, Don Zickus wrote:
> On Mon, May 16, 2011 at 09:09:45AM +0800, Huang Ying wrote:
>>> Ying, the concern is rather related to the code scheme in general. Since
>>> we have notifiers I think the better way to be consistent here and use
>>> hwerr notifier too. But it's IMHO ;)
>>
>> As for go notifiers or not. IMHO, a rule can be:
>>
>> - If it is something like a driver, than it should go notifier
>> - If it is architectural/PC defacto standard, it can sit outside of
>> notifier.
>
> Hmm, then what do you do about perf? That is architectural and a defacto
> standard, but I am not sure hardcoding that would be appropriate.
Good point!
>
>>
>> I think that seeing unknown NMI as hardware error should be part of PC
>> defacto standard. Do you think so?
>
> Well after thinking about it, I would say no. And my reason is, if
> vendors are really serious about using NMIs as an indicator for hardware
> errors, shouldn't they be setting a bit in the memory controller/north
> bridge or south bridge/IOHC for an NMI handler to read? I mean hardware
UV platform has such bit iirc :)
> devices don't just get wired directly to the NMI pin on the cpu, right?
> They generally have to go through some hub that acts as a multiplexer.
>
> In those cases, why can't those hubs set a bit saying it detected an error
> (don't PCIe bridges already do that?) and let the NMI handler read it to
> confirm. This way we can leave 'unknown NMIs' as a way to say an
> unclaimed NMI entered the system and we can have users set policy about
> what to do, panic, printk, whatever.
>
> But for the HEST stuff, it should be smart enough by now to trap any
> hardware error, no? How does a machine that supports HEST let a hardware
> error get through without detecting it? Isn't that the point? Detect a
> hardware error, grab as much info about it as possible, save the error
> record and then panic?
>
> Otherwise if you just panic, then you have no idea why the machine errored
> in the first place. It might be the safe thing to do in some
> circumstances, but then you have to wonder why the fancy HEST enabled
> server didn't catch it. Isn't that what people are spending extra money
> for those Intel servers with RAS features?
>
> Cheers,
> Don
--
Cyrill
On 05/17/2011 03:03 AM, Don Zickus wrote:
> On Mon, May 16, 2011 at 09:09:45AM +0800, Huang Ying wrote:
>>> Ying, the concern is rather related to the code scheme in general. Since
>>> we have notifiers I think the better way to be consistent here and use
>>> hwerr notifier too. But it's IMHO ;)
>>
>> As for go notifiers or not. IMHO, a rule can be:
>>
>> - If it is something like a driver, than it should go notifier
>> - If it is architectural/PC defacto standard, it can sit outside of
>> notifier.
>
> Hmm, then what do you do about perf? That is architectural and a defacto
> standard, but I am not sure hardcoding that would be appropriate.
Yes. perf is architectural, so its source is not put into drivers
directory. And I think it is a good idea to put perf NMI handler call
directly into system NMI handler instead of a notifier chain. Unknown
NMI as HW error is far more smaller than perf. So it can be put into
system NMI handler directly.
>> I think that seeing unknown NMI as hardware error should be part of PC
>> defacto standard. Do you think so?
>
> Well after thinking about it, I would say no. And my reason is, if
> vendors are really serious about using NMIs as an indicator for hardware
> errors, shouldn't they be setting a bit in the memory controller/north
> bridge or south bridge/IOHC for an NMI handler to read? I mean hardware
> devices don't just get wired directly to the NMI pin on the cpu, right?
> They generally have to go through some hub that acts as a multiplexer.
>
> In those cases, why can't those hubs set a bit saying it detected an error
> (don't PCIe bridges already do that?) and let the NMI handler read it to
> confirm. This way we can leave 'unknown NMIs' as a way to say an
> unclaimed NMI entered the system and we can have users set policy about
> what to do, panic, printk, whatever.
>
> But for the HEST stuff, it should be smart enough by now to trap any
> hardware error, no? How does a machine that supports HEST let a hardware
> error get through without detecting it? Isn't that the point? Detect a
> hardware error, grab as much info about it as possible, save the error
> record and then panic?
>
> Otherwise if you just panic, then you have no idea why the machine errored
> in the first place. It might be the safe thing to do in some
> circumstances, but then you have to wonder why the fancy HEST enabled
> server didn't catch it. Isn't that what people are spending extra money
> for those Intel servers with RAS features?
All you said is possible in theory. But as far as I know, Windows
thinks unknown NMI is for hardware error and displays blue scrren for
it. So some hardware OEMs use unknown NMI to report hardware error.
Even on machines with HEST, there may be no GHES record (just unknown
NMI) if Windows does not tell BIOS that it has support for GHES.
Best Regards,
Huang Ying
On 05/17/2011 03:44 AM, Cyrill Gorcunov wrote:
> On 05/16/2011 05:09 AM, Huang Ying wrote:
> ...
>>>
>>> I'm personally fine even if it's enabled by default, only worried to have
>>> an option to disable hwerr from boot line.
>>
>> The white list mechanism is not sufficient? Spurious unknown NMI can
>> occur on white list machines? People don't want to protect their data?
>>
>
> I suppose no, it's not sufficient considering how many cpu errata already
> out in general. And I see no guarantee that unknown NMIs never triggers on
> white list machines and I know that you know that as well ;)
I write this patch because I believe white listed machines will not
generate unknown NMI for no good reason ;)
If we just want to push the unknown NMI logic to user space, existing
"unknown_nmi_panic" is sufficient.
Best Regards,
Huang Ying
On 05/16/2011 07:29 PM, Ingo Molnar wrote:
>
> * Don Zickus <[email protected]> wrote:
>
>> On Fri, May 13, 2011 at 05:20:33PM +0200, Ingo Molnar wrote:
>>>
>>> * huang ying <[email protected]> wrote:
>>>
>>>>> What should be done instead is to add an event for unknown NMIs, which can
>>>>> then be processed by the RAS daemon to implement policy.
>>>>>
>>>>> By using 'active' event filters it could even be set on a system to panic
>>>>> the box by default.
>>>>
>>>> If there is real fatal hardware error, maybe we have no luxury to go from NMI
>>>> handler to user space RAS daemon to determine what to do. System may explode,
>>>> bad data may go to disk before that.
>>>
>>> That is why i suggested:
>>>
>>> > > By using 'active' event filters it could even be set on a system to panic
>>> > > the box by default.
>>>
>>> event filters are evaluated in the kernel, so the panic could be instantaneous,
>>> without the event having to reach user-space.
>>
>> Interesting. Question though, what do you mean by 'event filtering'. Is
>> that different then setting 'unknown_nmi_panic' panic on the commandline or
>> procfs?
>>
>> Or are you suggesting something like registering another callback on the
>> die_chain that looks for DIE_NMIUNKNOWN as the event, swallows them and
>> implements the policy? That way only on HEST related platforms would
>> register them while others would keep the default of 'Dazed and confused'
>> messages?
>
> The idea is that "event filters", which are an existing upstream feature and
> which can be used in rather flexible ways:
>
> http://lkml.org/lkml/2011/4/27/660
>
> Could be used to trigger non-standard policy action as well - such as to panic
> the box.
>
> This would replace various very limited /debugfs and /sys event filtering hacks
> (and hardcoded policies) such as arch/x86/kernel/cpu/mcheck/mce-severity.c, and
> it would allow nonstandard behavior like 'panic the box on unknown NMIs' as
> well.
>
> This could be set by the RAS daemon, and it could be propagated to the kernel
> boot line as well, where event filter syntax would look like this:
>
> events=nmi::unknown"if (reason == 0) panic();"
>
> (Where the 'reason' field of the NMI event is the current legacy 'reason' value
> there.)
>
> The filter code would have to be modified to be able to recognize the panic()
> bit, but that's desirable anyway and it is a one-time effort.
>
> This:
>
> events=nmi::unknown:"if (reason == 0) ignore();"
>
> would be a possible outcome as well, on certain boxes - to skip certain events.
We can determine whether NMI is unknown in kernel now. If you want to
push all unknown NMI logic into user space (although I don't think that
is the best solution), is it not sufficient that just check system in
user space (via PCI ID or DMI ID, etc) and set existing
"unknown_nmi_panic" accordingly?
Best Regards,
Huang Ying
* Don Zickus <[email protected]> wrote:
> On Mon, May 16, 2011 at 01:29:34PM +0200, Ingo Molnar wrote:
> > > Interesting. Question though, what do you mean by 'event filtering'. Is
> > > that different then setting 'unknown_nmi_panic' panic on the commandline or
> > > procfs?
> > >
> > > Or are you suggesting something like registering another callback on the
> > > die_chain that looks for DIE_NMIUNKNOWN as the event, swallows them and
> > > implements the policy? That way only on HEST related platforms would
> > > register them while others would keep the default of 'Dazed and confused'
> > > messages?
> >
> > The idea is that "event filters", which are an existing upstream feature and
> > which can be used in rather flexible ways:
> >
> > http://lkml.org/lkml/2011/4/27/660
> >
> > Could be used to trigger non-standard policy action as well - such as to panic
> > the box.
> >
> > This would replace various very limited /debugfs and /sys event filtering hacks
> > (and hardcoded policies) such as arch/x86/kernel/cpu/mcheck/mce-severity.c, and
> > it would allow nonstandard behavior like 'panic the box on unknown NMIs' as
> > well.
> >
> > This could be set by the RAS daemon, and it could be propagated to the kernel
> > boot line as well, where event filter syntax would look like this:
> >
> > events=nmi::unknown"if (reason == 0) panic();"
>
> Wow. ok. I believe that is the most complicated kernel boot param I have
> ever seen. :-) Powerful, no doubt.
It would not have to be typed normally - the defaults would still be sane.
> So this would sorta be a meta-notifier? I guess you are saying platforms
> that implement something like HEST could setup an event like that to trigger
> the behaviour they want on a per-platform basis?
Yeah - or if they dislike the default they could tweak the policy action in a
rather flexible way.
> My only argument against it would be sorta of what Ying complains about is
> that you start to lose track of who is hooked into the NMI. It is one thing
> to search for all the users in the die_notifier to track down who is
> swallowing NMIs. But to look for event users, is going to be harder. Unless
> the events processing has a switch to turn on logging? :-)
Yeah, all such types of filters should be printed during bootup, to make it
really clear what is happening.
We also want all the current state visible readily under /sys/events or
/events.
Thanks,
Ingo
* Huang Ying <[email protected]> wrote:
> On 05/16/2011 07:29 PM, Ingo Molnar wrote:
> >
> > * Don Zickus <[email protected]> wrote:
> >
> >> On Fri, May 13, 2011 at 05:20:33PM +0200, Ingo Molnar wrote:
> >>>
> >>> * huang ying <[email protected]> wrote:
> >>>
> >>>>> What should be done instead is to add an event for unknown NMIs, which can
> >>>>> then be processed by the RAS daemon to implement policy.
> >>>>>
> >>>>> By using 'active' event filters it could even be set on a system to panic
> >>>>> the box by default.
> >>>>
> >>>> If there is real fatal hardware error, maybe we have no luxury to go from NMI
> >>>> handler to user space RAS daemon to determine what to do. System may explode,
> >>>> bad data may go to disk before that.
> >>>
> >>> That is why i suggested:
> >>>
> >>> > > By using 'active' event filters it could even be set on a system to panic
> >>> > > the box by default.
> >>>
> >>> event filters are evaluated in the kernel, so the panic could be instantaneous,
> >>> without the event having to reach user-space.
> >>
> >> Interesting. Question though, what do you mean by 'event filtering'. Is
> >> that different then setting 'unknown_nmi_panic' panic on the commandline or
> >> procfs?
> >>
> >> Or are you suggesting something like registering another callback on the
> >> die_chain that looks for DIE_NMIUNKNOWN as the event, swallows them and
> >> implements the policy? That way only on HEST related platforms would
> >> register them while others would keep the default of 'Dazed and confused'
> >> messages?
> >
> > The idea is that "event filters", which are an existing upstream feature and
> > which can be used in rather flexible ways:
> >
> > http://lkml.org/lkml/2011/4/27/660
> >
> > Could be used to trigger non-standard policy action as well - such as to panic
> > the box.
> >
> > This would replace various very limited /debugfs and /sys event filtering hacks
> > (and hardcoded policies) such as arch/x86/kernel/cpu/mcheck/mce-severity.c, and
> > it would allow nonstandard behavior like 'panic the box on unknown NMIs' as
> > well.
> >
> > This could be set by the RAS daemon, and it could be propagated to the kernel
> > boot line as well, where event filter syntax would look like this:
> >
> > events=nmi::unknown"if (reason == 0) panic();"
> >
> > (Where the 'reason' field of the NMI event is the current legacy 'reason' value
> > there.)
> >
> > The filter code would have to be modified to be able to recognize the panic()
> > bit, but that's desirable anyway and it is a one-time effort.
> >
> > This:
> >
> > events=nmi::unknown:"if (reason == 0) ignore();"
> >
> > would be a possible outcome as well, on certain boxes - to skip certain events.
>
> We can determine whether NMI is unknown in kernel now. If you want to push
> all unknown NMI logic into user space (although I don't think that is the
> best solution), is it not sufficient that just check system in user space
> (via PCI ID or DMI ID, etc) and set existing "unknown_nmi_panic" accordingly?
yeah - no need to push the 'reason' if it's not needed.
We want the kernel defaults to be sane - i.e. this is not to 'push' anything to
user-space in a forced way, this is to make *optional*, different policy action
possible to configure.
For example the weird hack in arch/x86/kernel/cpu/mcheck/mce-severity.c would
be handled via a set of default filters. The debugfs mce-severity hack API
would go away, it could all be configured in the same way (and more) by using
persistent events and such active filters.
That's roughly the direction the MCE code should take IMO.
Thanks,
Ingo
On Tue, May 17, 2011 at 01:39:59PM +0800, Huang Ying wrote:
> On 05/17/2011 03:03 AM, Don Zickus wrote:
> > On Mon, May 16, 2011 at 09:09:45AM +0800, Huang Ying wrote:
> >>> Ying, the concern is rather related to the code scheme in general. Since
> >>> we have notifiers I think the better way to be consistent here and use
> >>> hwerr notifier too. But it's IMHO ;)
> >>
> >> As for go notifiers or not. IMHO, a rule can be:
> >>
> >> - If it is something like a driver, than it should go notifier
> >> - If it is architectural/PC defacto standard, it can sit outside of
> >> notifier.
> >
> > Hmm, then what do you do about perf? That is architectural and a defacto
> > standard, but I am not sure hardcoding that would be appropriate.
>
> Yes. perf is architectural, so its source is not put into drivers
> directory. And I think it is a good idea to put perf NMI handler call
> directly into system NMI handler instead of a notifier chain. Unknown
> NMI as HW error is far more smaller than perf. So it can be put into
> system NMI handler directly.
>
> >> I think that seeing unknown NMI as hardware error should be part of PC
> >> defacto standard. Do you think so?
> >
> > Well after thinking about it, I would say no. And my reason is, if
> > vendors are really serious about using NMIs as an indicator for hardware
> > errors, shouldn't they be setting a bit in the memory controller/north
> > bridge or south bridge/IOHC for an NMI handler to read? I mean hardware
> > devices don't just get wired directly to the NMI pin on the cpu, right?
> > They generally have to go through some hub that acts as a multiplexer.
> >
> > In those cases, why can't those hubs set a bit saying it detected an error
> > (don't PCIe bridges already do that?) and let the NMI handler read it to
> > confirm. This way we can leave 'unknown NMIs' as a way to say an
> > unclaimed NMI entered the system and we can have users set policy about
> > what to do, panic, printk, whatever.
> >
> > But for the HEST stuff, it should be smart enough by now to trap any
> > hardware error, no? How does a machine that supports HEST let a hardware
> > error get through without detecting it? Isn't that the point? Detect a
> > hardware error, grab as much info about it as possible, save the error
> > record and then panic?
> >
> > Otherwise if you just panic, then you have no idea why the machine errored
> > in the first place. It might be the safe thing to do in some
> > circumstances, but then you have to wonder why the fancy HEST enabled
> > server didn't catch it. Isn't that what people are spending extra money
> > for those Intel servers with RAS features?
>
> All you said is possible in theory. But as far as I know, Windows
> thinks unknown NMI is for hardware error and displays blue scrren for
Right, because as I was told, Windows don't use NMI for anything else.
Linux uses it for perf, hw breakpoints, kdump, watchdog, etc.
> it. So some hardware OEMs use unknown NMI to report hardware error.
Yes, I know one of those OEMs and had to restructure the NMI code to
accomodate them.
> Even on machines with HEST, there may be no GHES record (just unknown
> NMI) if Windows does not tell BIOS that it has support for GHES.
Ok, that's fine, but doesn't Linux tell the BIOS it supports GHES? Also
what would be the point of implementing HEST in your firmware if all it
does is just pass the error along to the NMI?
Ok, so I am naive and am just learning that the ACPI spec is just
'guidelines' for how stuff should work (and people rarely follow it), but
I find it hard to believe that OEMs would implement HEST as just an error
pass-through. Isn't the point of HEST trying to _determine_ what the error is?
Otherwise why bother.
Can we agree on this, that if an OEM implemented HEST properly such that a
hardware error happens it will generate a GHES record. The subsequent NMI
that follows will find that GHES record and properly panic.
If the OEM can't implement HEST properly and instead just sends the NMI
with no GHES record, how much should we care?
Cheers,
Don
Cheers,
Don
> Can we agree on this, that if an OEM implemented HEST properly such that a
> hardware error happens it will generate a GHES record. The subsequent NMI
Apparently there are some (rare) corner cases where it's hard/impossible
to do. So in those cases you will see an NMI without record.
In the "normal" error case you will a GHES record on NMI.
-Andi
On Tue, May 17, 2011 at 09:38:47AM -0700, Andi Kleen wrote:
> > Can we agree on this, that if an OEM implemented HEST properly such that a
> > hardware error happens it will generate a GHES record. The subsequent NMI
>
> Apparently there are some (rare) corner cases where it's hard/impossible
> to do. So in those cases you will see an NMI without record.
Hmm, I would be interested to know what kind, but it doesn't matter I
guess.
>
> In the "normal" error case you will a GHES record on NMI.
Random thought, in the Firmware first mode of HEST (which is the only way
GHES records get produced??), does an SCI happen first to jump into the
firmware for processing, then an NMI?
Cheers,
Don
> Random thought, in the Firmware first mode of HEST (which is the only way
> GHES records get produced??), does an SCI happen first to jump into the
> firmware for processing, then an NMI?
Either that or there is a separate service processor which handles it.
Presumably it depends a lot on the particular system.
-Andi
On Tue, May 17, 2011 at 11:18:59AM -0700, Andi Kleen wrote:
> > Random thought, in the Firmware first mode of HEST (which is the only way
> > GHES records get produced??), does an SCI happen first to jump into the
> > firmware for processing, then an NMI?
>
> Either that or there is a separate service processor which handles it.
> Presumably it depends a lot on the particular system.
Ah interesting. I was going to suggest somehow setting a bit when an SCI
comes in and check that bit in the unknown NMI path as a possible hint
that the NMI might be related to HEST (sorta how we flag unknown NMIs in
the perf code).
It was just an idea. Obviously a service processor will make that more
difficult. :-)
Cheers,
Don
On 05/17/2011 04:53 PM, Ingo Molnar wrote:
>
> * Huang Ying <[email protected]> wrote:
>
>> On 05/16/2011 07:29 PM, Ingo Molnar wrote:
>>>
>>> * Don Zickus <[email protected]> wrote:
>>>
>>>> On Fri, May 13, 2011 at 05:20:33PM +0200, Ingo Molnar wrote:
>>>>>
>>>>> * huang ying <[email protected]> wrote:
>>>>>
>>>>>>> What should be done instead is to add an event for unknown NMIs, which can
>>>>>>> then be processed by the RAS daemon to implement policy.
>>>>>>>
>>>>>>> By using 'active' event filters it could even be set on a system to panic
>>>>>>> the box by default.
>>>>>>
>>>>>> If there is real fatal hardware error, maybe we have no luxury to go from NMI
>>>>>> handler to user space RAS daemon to determine what to do. System may explode,
>>>>>> bad data may go to disk before that.
>>>>>
>>>>> That is why i suggested:
>>>>>
>>>>> > > By using 'active' event filters it could even be set on a system to panic
>>>>> > > the box by default.
>>>>>
>>>>> event filters are evaluated in the kernel, so the panic could be instantaneous,
>>>>> without the event having to reach user-space.
>>>>
>>>> Interesting. Question though, what do you mean by 'event filtering'. Is
>>>> that different then setting 'unknown_nmi_panic' panic on the commandline or
>>>> procfs?
>>>>
>>>> Or are you suggesting something like registering another callback on the
>>>> die_chain that looks for DIE_NMIUNKNOWN as the event, swallows them and
>>>> implements the policy? That way only on HEST related platforms would
>>>> register them while others would keep the default of 'Dazed and confused'
>>>> messages?
>>>
>>> The idea is that "event filters", which are an existing upstream feature and
>>> which can be used in rather flexible ways:
>>>
>>> http://lkml.org/lkml/2011/4/27/660
>>>
>>> Could be used to trigger non-standard policy action as well - such as to panic
>>> the box.
>>>
>>> This would replace various very limited /debugfs and /sys event filtering hacks
>>> (and hardcoded policies) such as arch/x86/kernel/cpu/mcheck/mce-severity.c, and
>>> it would allow nonstandard behavior like 'panic the box on unknown NMIs' as
>>> well.
>>>
>>> This could be set by the RAS daemon, and it could be propagated to the kernel
>>> boot line as well, where event filter syntax would look like this:
>>>
>>> events=nmi::unknown"if (reason == 0) panic();"
>>>
>>> (Where the 'reason' field of the NMI event is the current legacy 'reason' value
>>> there.)
>>>
>>> The filter code would have to be modified to be able to recognize the panic()
>>> bit, but that's desirable anyway and it is a one-time effort.
>>>
>>> This:
>>>
>>> events=nmi::unknown:"if (reason == 0) ignore();"
>>>
>>> would be a possible outcome as well, on certain boxes - to skip certain events.
>>
>> We can determine whether NMI is unknown in kernel now. If you want to push
>> all unknown NMI logic into user space (although I don't think that is the
>> best solution), is it not sufficient that just check system in user space
>> (via PCI ID or DMI ID, etc) and set existing "unknown_nmi_panic" accordingly?
>
> yeah - no need to push the 'reason' if it's not needed.
>
> We want the kernel defaults to be sane - i.e. this is not to 'push' anything to
> user-space in a forced way, this is to make *optional*, different policy action
> possible to configure.
OK. Then, what is the proper default behavior? We think Linux kernel
should treat unknown NMI as hardware error reporting, at least on some
modern machines (via a white list). Do you agree?
Best Regards,
Huang Ying
Hi, Don,
On 05/18/2011 03:07 AM, Don Zickus wrote:
> On Tue, May 17, 2011 at 11:18:59AM -0700, Andi Kleen wrote:
>>> Random thought, in the Firmware first mode of HEST (which is the only way
>>> GHES records get produced??), does an SCI happen first to jump into the
>>> firmware for processing, then an NMI?
>>
>> Either that or there is a separate service processor which handles it.
>> Presumably it depends a lot on the particular system.
>
> Ah interesting. I was going to suggest somehow setting a bit when an SCI
> comes in and check that bit in the unknown NMI path as a possible hint
> that the NMI might be related to HEST (sorta how we flag unknown NMIs in
> the perf code).
>
> It was just an idea. Obviously a service processor will make that more
> difficult. :-)
Hmm, what's the conclusion? Do you think unknown NMI should be seen as
hardware error? At least on some white listed machines?
Best Regards,
Huang Ying
* Huang Ying <[email protected]> wrote:
> On 05/17/2011 04:53 PM, Ingo Molnar wrote:
> >
> > * Huang Ying <[email protected]> wrote:
> >
> >> On 05/16/2011 07:29 PM, Ingo Molnar wrote:
> >>>
> >>> * Don Zickus <[email protected]> wrote:
> >>>
> >>>> On Fri, May 13, 2011 at 05:20:33PM +0200, Ingo Molnar wrote:
> >>>>>
> >>>>> * huang ying <[email protected]> wrote:
> >>>>>
> >>>>>>> What should be done instead is to add an event for unknown NMIs, which can
> >>>>>>> then be processed by the RAS daemon to implement policy.
> >>>>>>>
> >>>>>>> By using 'active' event filters it could even be set on a system to panic
> >>>>>>> the box by default.
> >>>>>>
> >>>>>> If there is real fatal hardware error, maybe we have no luxury to go from NMI
> >>>>>> handler to user space RAS daemon to determine what to do. System may explode,
> >>>>>> bad data may go to disk before that.
> >>>>>
> >>>>> That is why i suggested:
> >>>>>
> >>>>> > > By using 'active' event filters it could even be set on a system to panic
> >>>>> > > the box by default.
> >>>>>
> >>>>> event filters are evaluated in the kernel, so the panic could be instantaneous,
> >>>>> without the event having to reach user-space.
> >>>>
> >>>> Interesting. Question though, what do you mean by 'event filtering'. Is
> >>>> that different then setting 'unknown_nmi_panic' panic on the commandline or
> >>>> procfs?
> >>>>
> >>>> Or are you suggesting something like registering another callback on the
> >>>> die_chain that looks for DIE_NMIUNKNOWN as the event, swallows them and
> >>>> implements the policy? That way only on HEST related platforms would
> >>>> register them while others would keep the default of 'Dazed and confused'
> >>>> messages?
> >>>
> >>> The idea is that "event filters", which are an existing upstream feature and
> >>> which can be used in rather flexible ways:
> >>>
> >>> http://lkml.org/lkml/2011/4/27/660
> >>>
> >>> Could be used to trigger non-standard policy action as well - such as to panic
> >>> the box.
> >>>
> >>> This would replace various very limited /debugfs and /sys event filtering hacks
> >>> (and hardcoded policies) such as arch/x86/kernel/cpu/mcheck/mce-severity.c, and
> >>> it would allow nonstandard behavior like 'panic the box on unknown NMIs' as
> >>> well.
> >>>
> >>> This could be set by the RAS daemon, and it could be propagated to the kernel
> >>> boot line as well, where event filter syntax would look like this:
> >>>
> >>> events=nmi::unknown"if (reason == 0) panic();"
> >>>
> >>> (Where the 'reason' field of the NMI event is the current legacy 'reason' value
> >>> there.)
> >>>
> >>> The filter code would have to be modified to be able to recognize the panic()
> >>> bit, but that's desirable anyway and it is a one-time effort.
> >>>
> >>> This:
> >>>
> >>> events=nmi::unknown:"if (reason == 0) ignore();"
> >>>
> >>> would be a possible outcome as well, on certain boxes - to skip certain events.
> >>
> >> We can determine whether NMI is unknown in kernel now. If you want to push
> >> all unknown NMI logic into user space (although I don't think that is the
> >> best solution), is it not sufficient that just check system in user space
> >> (via PCI ID or DMI ID, etc) and set existing "unknown_nmi_panic" accordingly?
> >
> > yeah - no need to push the 'reason' if it's not needed.
> >
> > We want the kernel defaults to be sane - i.e. this is not to 'push' anything to
> > user-space in a forced way, this is to make *optional*, different policy action
> > possible to configure.
>
> OK. Then, what is the proper default behavior? We think Linux kernel
> should treat unknown NMI as hardware error reporting, at least on some
> modern machines (via a white list). Do you agree?
No, i do not agree *at all*.
We are seeing cases of spurious NMIs again and again. Crashing boxes should be
a niche thing, something you can configure if you want to but the kernel should
not default it until NMI demultiplexing becomes more robust - and i doubt it
ever will.
Thanks,
Ingo
On Fri, May 20, 2011 at 04:13:25PM +0800, Huang Ying wrote:
> Hi, Don,
>
> On 05/18/2011 03:07 AM, Don Zickus wrote:
> > On Tue, May 17, 2011 at 11:18:59AM -0700, Andi Kleen wrote:
> >>> Random thought, in the Firmware first mode of HEST (which is the only way
> >>> GHES records get produced??), does an SCI happen first to jump into the
> >>> firmware for processing, then an NMI?
> >>
> >> Either that or there is a separate service processor which handles it.
> >> Presumably it depends a lot on the particular system.
> >
> > Ah interesting. I was going to suggest somehow setting a bit when an SCI
> > comes in and check that bit in the unknown NMI path as a possible hint
> > that the NMI might be related to HEST (sorta how we flag unknown NMIs in
> > the perf code).
> >
> > It was just an idea. Obviously a service processor will make that more
> > difficult. :-)
>
> Hmm, what's the conclusion? Do you think unknown NMI should be seen as
> hardware error? At least on some white listed machines?
I still sorta have the opinion that a hardware error should be able be
recognizable either through a GHES record or a bit in the southbridge.
Whereas an unknown NMI is something lost and has no owner as the result of
either a buggy NMI handler or an unimplemented NMI handler.
Yeah, I can see hardware errors coming in through an unknown NMI but to me
(from what I am reading about with APEI/GHES) is those should be trapped
by the firmware and if they aren't then the firmware is broken. In those
cases it should be up to the OEM to provide proper firmware (even certify
them) to allow the proper experience, which includes being properly
trapped by an NMI handler.
Perhaps I am a bit naive in my belief but I am a little nervous panicing
all the time on unknown NMIs when we are still chasing missed perf NMIs on
a loaded box.
Cheers,
Don
On Thu, Jun 09, 2011 at 08:09:28AM -0400, Don Zickus wrote:
...
> Perhaps I am a bit naive in my belief but I am a little nervous panicing
> all the time on unknown NMIs when we are still chasing missed perf NMIs on
> a loaded box.
>
> Cheers,
> Don
>
Agreed, though I always vote for notifier chain (with either CONFIG_ option
or command line one) so there would be a way to turn panic off if needed.
I believe there is a way to make such NMI handler being with highest priority
and Ying will be able to do with it whatever he wants -- panic, warning,
or even immediate reboot, whatever ;)
Cyrill
On 06/09/2011 08:09 PM, Don Zickus wrote:
> On Fri, May 20, 2011 at 04:13:25PM +0800, Huang Ying wrote:
>> Hi, Don,
>>
>> On 05/18/2011 03:07 AM, Don Zickus wrote:
>>> On Tue, May 17, 2011 at 11:18:59AM -0700, Andi Kleen wrote:
>>>>> Random thought, in the Firmware first mode of HEST (which is the only way
>>>>> GHES records get produced??), does an SCI happen first to jump into the
>>>>> firmware for processing, then an NMI?
>>>>
>>>> Either that or there is a separate service processor which handles it.
>>>> Presumably it depends a lot on the particular system.
>>>
>>> Ah interesting. I was going to suggest somehow setting a bit when an SCI
>>> comes in and check that bit in the unknown NMI path as a possible hint
>>> that the NMI might be related to HEST (sorta how we flag unknown NMIs in
>>> the perf code).
>>>
>>> It was just an idea. Obviously a service processor will make that more
>>> difficult. :-)
>>
>> Hmm, what's the conclusion? Do you think unknown NMI should be seen as
>> hardware error? At least on some white listed machines?
>
> I still sorta have the opinion that a hardware error should be able be
> recognizable either through a GHES record or a bit in the southbridge.
> Whereas an unknown NMI is something lost and has no owner as the result of
> either a buggy NMI handler or an unimplemented NMI handler.
>
> Yeah, I can see hardware errors coming in through an unknown NMI but to me
> (from what I am reading about with APEI/GHES) is those should be trapped
> by the firmware and if they aren't then the firmware is broken. In those
> cases it should be up to the OEM to provide proper firmware (even certify
> them) to allow the proper experience, which includes being properly
> trapped by an NMI handler.
>
> Perhaps I am a bit naive in my belief but I am a little nervous panicing
> all the time on unknown NMIs when we are still chasing missed perf NMIs on
> a loaded box.
I think things SHOULD go this way too. This just is not the reality.
Best Regards,
Huang Ying