When Firmware First is enabled, BIOS handles errors first and then it
makes them available to the kernel via the Common Platform Error Record
(CPER) sections (UEFI 2.10 Appendix N). Linux parses the CPER sections
via two similar paths, ELOG and GHES.
Currently, ELOG and GHES show some inconsistencies in how they print to
the kernel log as well as in how they report to userspace via trace
events.
This short series wants to make these two paths act similarly for what
relates to logging and tracing.
--- Changes in v2 ---
- 0/3: rework the subject line and the letter.
- 1/3: no changes.
- 2/3: trace CPER PCIe Section only if CONFIG_ACPI_APEI_PCIEAER
is defined; the kernel test robot reported the use of two
undefined symbols because the test for the config option was
missing; rewrite the subject line and part of commit message.
- 3/3: no changes.
Fabio M. De Francesco (3):
ACPI: extlog: Trace CPER Non-standard Section Body
ACPI: extlog: Trace CPER PCI Express Error Section
ACPI: extlog: Make print_extlog_rcd() log unconditionally
drivers/acpi/acpi_extlog.c | 47 ++++++++++++++++++++++++++++++++++----
drivers/ras/debugfs.c | 6 -----
include/linux/ras.h | 2 --
3 files changed, 42 insertions(+), 13 deletions(-)
--
2.45.0
Currently, extlog_print() (ELOG) only reports CPER PCIe section (UEFI
v2.10, Appendix N.2.7) to the kernel log via print_extlog_rcd(). Instead,
the similar ghes_do_proc() (GHES) prints to kernel log and calls
pci_print_aer() to report via the ftrace infrastructure.
Add support to report the CPER PCIe Error section also via the ftrace
infrastructure by calling pci_print_aer() to make ELOG act consistently
with GHES.
Cc: Dan Williams <[email protected]>
Signed-off-by: Fabio M. De Francesco <[email protected]>
---
drivers/acpi/acpi_extlog.c | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 4e62d7235d33..fb7b0c73f86a 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -131,6 +131,36 @@ static int print_extlog_rcd(const char *pfx,
return 1;
}
+static void extlog_print_pcie(struct cper_sec_pcie *pcie_err,
+ int severity)
+{
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+ struct aer_capability_regs *aer;
+ struct pci_dev *pdev;
+ unsigned int devfn;
+ unsigned int bus;
+ int aer_severity;
+ int domain;
+
+ if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
+ pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
+ aer_severity = cper_severity_to_aer(severity);
+ aer = (struct aer_capability_regs *)pcie_err->aer_info;
+ domain = pcie_err->device_id.segment;
+ bus = pcie_err->device_id.bus;
+ devfn = PCI_DEVFN(pcie_err->device_id.device,
+ pcie_err->device_id.function);
+ pdev = pci_get_domain_bus_and_slot(domain, bus, devfn);
+ if (!pdev) {
+ pr_err("no pci_dev for %04x:%02x:%02x.%x\n",
+ domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ return;
+ }
+ pci_print_aer(pdev, aer_severity, aer);
+ }
+#endif
+}
+
static int extlog_print(struct notifier_block *nb, unsigned long val,
void *data)
{
@@ -182,6 +212,10 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
if (gdata->error_data_length >= sizeof(*mem))
trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
(u8)gdata->error_severity);
+ } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
+ struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
+
+ extlog_print_pcie(pcie_err, gdata->error_severity);
} else {
void *err = acpi_hest_get_payload(gdata);
@@ -331,3 +365,4 @@ module_exit(extlog_exit);
MODULE_AUTHOR("Chen, Gong <[email protected]>");
MODULE_DESCRIPTION("Extended MCA Error Log Driver");
MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(CXL);
--
2.45.0
Make extlog_print() (ELOG) trace "Non-standard Section Body" reported by
firmware to the OS via Common Platform Error Record (CPER) (UEFI v2.10
Appendix N 2.3).
This adds further debug information and makes ELOG logs consistent with
ghes_do_proc() (GHES).
Cc: Dan Williams <[email protected]>
Signed-off-by: Fabio M. De Francesco <[email protected]>
---
drivers/acpi/acpi_extlog.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index ca87a0939135..4e62d7235d33 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -182,6 +182,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
if (gdata->error_data_length >= sizeof(*mem))
trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
(u8)gdata->error_severity);
+ } else {
+ void *err = acpi_hest_get_payload(gdata);
+
+ trace_non_standard_event(sec_type, fru_id, fru_text,
+ gdata->error_severity, err,
+ gdata->error_data_length);
}
}
--
2.45.0
Make extlog_print_rcd() log unconditionally to report errors even if
userspace is not consuming trace events. Remove ras_userspace_consumers()
because it is not anymore used.
This change makes extlog_print() (ELOG) log consistently with ghes_proc()
(GHES).
Cc: Dan Williams <[email protected]>
Signed-off-by: Fabio M. De Francesco <[email protected]>
---
drivers/acpi/acpi_extlog.c | 6 +-----
drivers/ras/debugfs.c | 6 ------
include/linux/ras.h | 2 --
3 files changed, 1 insertion(+), 13 deletions(-)
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index fb7b0c73f86a..41ee66306234 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -189,10 +189,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
tmp = (struct acpi_hest_generic_status *)elog_buf;
- if (!ras_userspace_consumers()) {
- print_extlog_rcd(NULL, tmp, cpu);
- goto out;
- }
+ print_extlog_rcd(NULL, tmp, cpu);
/* log event via trace */
err_seq++;
@@ -225,7 +222,6 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
}
}
-out:
mce->kflags |= MCE_HANDLED_EXTLOG;
return NOTIFY_OK;
}
diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c
index 42afd3de68b2..6633844acdd6 100644
--- a/drivers/ras/debugfs.c
+++ b/drivers/ras/debugfs.c
@@ -13,12 +13,6 @@ struct dentry *ras_get_debugfs_root(void)
}
EXPORT_SYMBOL_GPL(ras_get_debugfs_root);
-int ras_userspace_consumers(void)
-{
- return atomic_read(&trace_count);
-}
-EXPORT_SYMBOL_GPL(ras_userspace_consumers);
-
static int trace_show(struct seq_file *m, void *v)
{
return 0;
diff --git a/include/linux/ras.h b/include/linux/ras.h
index a64182bc72ad..98840f16b697 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -7,11 +7,9 @@
#include <linux/cper.h>
#ifdef CONFIG_DEBUG_FS
-int ras_userspace_consumers(void);
void ras_debugfs_init(void);
int ras_add_daemon_trace(void);
#else
-static inline int ras_userspace_consumers(void) { return 0; }
static inline void ras_debugfs_init(void) { }
static inline int ras_add_daemon_trace(void) { return 0; }
#endif
--
2.45.0
On Fri, May 10, 2024 at 09:00:34PM +0200, Fabio M. De Francesco wrote:
> I thought that ELOG and GHES should be modeled consistently. ghes_proc()
> prints to the console while ghes_do_proc() also uses ftrace.
ghes_proc() calls ghes_do_proc(). I have no clue what you mean here.
> If we want to make ELOG and GHES act similarly, this patch is needed.
I still don't know what the problem is.
> Please note that I was introducing the "why" part of the message. I never
> refer to this patch for the "what", and I always use an imperative tone only
> in the "what" part.
No, this is not what I meant. I mean, don't say "This patch does" or "This
commit does" or "This change ... " or whatnot as that is not necessary.
IOW:
"Make extlog_print() (ELOG) log consistently with ghes_proc() (GHES)
because... " and this is where you come in.
So let's start with the problem: what is it and why do you think it is
a problem?
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Friday, May 10, 2024 9:25:56 PM GMT+2 Borislav Petkov wrote:
> On Fri, May 10, 2024 at 09:00:34PM +0200, Fabio M. De Francesco wrote:
> > I thought that ELOG and GHES should be modeled consistently. ghes_proc()
> > prints to the console while ghes_do_proc() also uses ftrace.
>
> ghes_proc() calls ghes_do_proc(). I have no clue what you mean here.
>
My understanding is that ghes_proc() logs to the console and ghes_do_proc()
calls the tracers.
Therefore, GHES at the same time always reports the errors via two different
means.
Instead ELOG depends on the check on ras_userspace_consumers() to decide
whether to call print_extlog_rcd() to print the logs. And if it print to the
kernel logs, it jumps to "out" and skips the tracers.
Why is it different with respect to how error reporting is made in GHES?
I thought that ELOG should be modeled similarly to GHES and so it should print
to the kernel logs always unconditionally and then it should also use ftrace
(no goto "out" and skip tracers).
(1) Is my understanding of logging and tracing in ELOG and GHES correct?
(2) If it is, does it make sense for ELOG to print to the kernel log,
unconditionally, and then call the tracers like ghes_proc() + ghes_do_proc()
do?
Fabio
Fabio M. De Francesco wrote:
> On Friday, May 10, 2024 9:25:56 PM GMT+2 Borislav Petkov wrote:
> > On Fri, May 10, 2024 at 09:00:34PM +0200, Fabio M. De Francesco wrote:
> > > I thought that ELOG and GHES should be modeled consistently. ghes_proc()
> > > prints to the console while ghes_do_proc() also uses ftrace.
> >
> > ghes_proc() calls ghes_do_proc(). I have no clue what you mean here.
> >
>
> My understanding is that ghes_proc() logs to the console and ghes_do_proc()
> calls the tracers.
>
> Therefore, GHES at the same time always reports the errors via two different
> means.
>
> Instead ELOG depends on the check on ras_userspace_consumers() to decide
> whether to call print_extlog_rcd() to print the logs. And if it print to the
> kernel logs, it jumps to "out" and skips the tracers.
>
> Why is it different with respect to how error reporting is made in GHES?
>
> I thought that ELOG should be modeled similarly to GHES and so it should print
> to the kernel logs always unconditionally and then it should also use ftrace
> (no goto "out" and skip tracers).
>
> (1) Is my understanding of logging and tracing in ELOG and GHES correct?
> (2) If it is, does it make sense for ELOG to print to the kernel log,
> unconditionally, and then call the tracers like ghes_proc() + ghes_do_proc()
> do?
I had asked Fabio to take a look at whether it made sense to continue
with the concept of ras_userspace_consumers() especially since it seems
limited to the EXTLOG case.
In general I am finding that between OS Native and Firmware First error
reporting the logging approaches are inconsistent.
As far I can see rasdaemon would not even notice is the "daemon_active"
debugfs file went away [1], and it should be the case that the
tracepoints always fire whether daemon_active is open or not.
So I was expecting this removal to be a conversation starter on the
wider topic of error reporting consistency.
[1]: https://github.com/mchehab/rasdaemon/blob/master/ras-events.c#L992
On Fri, May 10, 2024 at 03:12:36PM -0700, Dan Williams wrote:
> I had asked Fabio to take a look at whether it made sense to continue
> with the concept of ras_userspace_consumers() especially since it seems
> limited to the EXTLOG case.
>
> In general I am finding that between OS Native and Firmware First error
> reporting the logging approaches are inconsistent.
>
> As far I can see rasdaemon would not even notice is the "daemon_active"
> debugfs file went away [1],
It tells the kernel that it is consuming the error info from the
tracepoints.
> and it should be the case that the tracepoints always fire whether
> daemon_active is open or not.
>
> So I was expecting this removal to be a conversation starter on the
> wider topic of error reporting consistency.
Yeah, and then they'll come and say: ew, we're getting error duplicates
- once logged in dmesg and once through the tracepoints.
So just like with the other thread, we have to figure out what our
scheme will be wrt hw error logging, agree on it and then make it
consistent.
Do we want to have both? Should it be configurable? Probably...
Anything else...?
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
Borislav Petkov wrote:
> On Fri, May 10, 2024 at 03:12:36PM -0700, Dan Williams wrote:
> > I had asked Fabio to take a look at whether it made sense to continue
> > with the concept of ras_userspace_consumers() especially since it seems
> > limited to the EXTLOG case.
> >
> > In general I am finding that between OS Native and Firmware First error
> > reporting the logging approaches are inconsistent.
> >
> > As far I can see rasdaemon would not even notice is the "daemon_active"
> > debugfs file went away [1],
>
> It tells the kernel that it is consuming the error info from the
> tracepoints.
Yes, my point though was that if it got deleted I doubt anyone would
notice. rasdaemon explicitly does not check the return from
open("daemon_active").
I am also curious about the history here. This "daemon_active" scheme is
an awkward way to detect that something is consuming the tracepoint. It
was added on v4.0, but Steven had added "tracepoint_enabled()" back in
v3.17:
7c65bbc7dcfa tracing: Add trace_<tracepoint>_enabled() function
So even if non-rasdaemon userspace was watching the extlog tracepoints
they would not fire because ras_userspace_consumers() prevents it.
I am finding it difficult to see why ras_userspace_consumers() needs to
continue to be maintained.
> > and it should be the case that the tracepoints always fire whether
> > daemon_active is open or not.
> >
> > So I was expecting this removal to be a conversation starter on the
> > wider topic of error reporting consistency.
>
> Yeah, and then they'll come and say: ew, we're getting error duplicates
> - once logged in dmesg and once through the tracepoints.
That would be odd since there is no ras_userspace_consumers() in the
ACPI GHES path, so it is already the case that you can get duplicate
error information depending on which path triggers the error.
Tracepoints are individually configurable.
> So just like with the other thread, we have to figure out what our
> scheme will be wrt hw error logging, agree on it and then make it
> consistent.
From my perspective I want alignement between "firmware first" and "OS
Native" events and I think any movement away from kernel log messages as
a hardware error mechanism towards tracepoints is a good thing.
Recall that tracepoints can also be configured to emit to the kernel
log, so that might be a way to keep legacy kernel log message parsing
environments happy.
> Do we want to have both? Should it be configurable? Probably...
Would be great to hear from folks that have a reasons for kernel log
message error reporting to continue.
> Anything else...?
Uniformity of error response to "fatal" events, but that is mainly a
PCIe error handling concern not CPU errors.
On Sun, May 12, 2024 at 04:45:08PM -0700, Dan Williams wrote:
> Yes, my point though was that if it got deleted I doubt anyone would
> notice. rasdaemon explicitly does not check the return from
> open("daemon_active").
The intent was for userspace to open it and thus it'll increment
trace_count which then ras_userspace_consumers() reads...
> I am also curious about the history here. This "daemon_active" scheme is
> an awkward way to detect that something is consuming the tracepoint. It
> was added on v4.0, but Steven had added "tracepoint_enabled()" back in
> v3.17:
>
> 7c65bbc7dcfa tracing: Add trace_<tracepoint>_enabled() function
Ha, I usually talk to Rostedt for all things tracepoint when wondering
how we could use them for RAS purposes but I haven't this time, it
seems.
> So even if non-rasdaemon userspace was watching the extlog tracepoints
> they would not fire because ras_userspace_consumers() prevents it.
>
> I am finding it difficult to see why ras_userspace_consumers() needs to
> continue to be maintained.
Well, you still need some functionality which tests whether a userspace
daemon consumes RAS events. Whether it is something cludgy like now or
something which checks whether all RAS tracepoints have been enabled,
something's gotta be there.
> That would be odd since there is no ras_userspace_consumers() in the
> ACPI GHES path,
Probably because no one's using RAS daemon with GHES. I at least haven't
heard of anyone complaining about this yet...
> so it is already the case that you can get duplicate error information
> depending on which path triggers the error.
>
> Tracepoints are individually configurable.
Sure.
> From my perspective I want alignement between "firmware first" and "OS
> Native" events and I think any movement away from kernel log messages as
> a hardware error mechanism towards tracepoints is a good thing.
That has been the goal for a while now, yap.
Anyone who parses the kernel log for anything serious has been living
under a rock in the last decade at least. :)
> Recall that tracepoints can also be configured to emit to the kernel
> log, so that might be a way to keep legacy kernel log message parsing
> environments happy.
Ok.
> Would be great to hear from folks that have a reasons for kernel log
> message error reporting to continue.
Right, from my experience so far, you never hear anything. :-\
So if we do anything, it should be something simple and which works for
almost everyone.
With RAS, everyone does their own thing. And then there's the firmware
which claims that it can do better RAS but then f*cks up on basic things
like *actually* shipping a working EINJ or whatever implementation.
So in the end of the day it is, oh, we need our drivers in the OS
because we can't fix firmware. It is harder to fix it than *hardware*
:-P
> Uniformity of error response to "fatal" events, but that is mainly a
> PCIe error handling concern not CPU errors.
Sure, just make sure to keep it simple and generic.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
[ add Steven for a "deprecate emitting hardware errors with explicit printk" discussion ]
Borislav Petkov wrote:
> On Sun, May 12, 2024 at 04:45:08PM -0700, Dan Williams wrote:
> > Yes, my point though was that if it got deleted I doubt anyone would
> > notice. rasdaemon explicitly does not check the return from
> > open("daemon_active").
>
> The intent was for userspace to open it and thus it'll increment
> trace_count which then ras_userspace_consumers() reads...
>
> > I am also curious about the history here. This "daemon_active" scheme is
> > an awkward way to detect that something is consuming the tracepoint. It
> > was added on v4.0, but Steven had added "tracepoint_enabled()" back in
> > v3.17:
> >
> > 7c65bbc7dcfa tracing: Add trace_<tracepoint>_enabled() function
>
> Ha, I usually talk to Rostedt for all things tracepoint when wondering
> how we could use them for RAS purposes but I haven't this time, it
> seems.
>
> > So even if non-rasdaemon userspace was watching the extlog tracepoints
> > they would not fire because ras_userspace_consumers() prevents it.
> >
> > I am finding it difficult to see why ras_userspace_consumers() needs to
> > continue to be maintained.
>
> Well, you still need some functionality which tests whether a userspace
> daemon consumes RAS events. Whether it is something cludgy like now or
> something which checks whether all RAS tracepoints have been enabled,
> something's gotta be there.
Honest question, why? Lets deprecate that path. As an example, this
extlog path has bit-rotted and not kept pace with the same level of
error reporting that is available via ACPI GHES or OS native
tracepoints.
Given that is has not kept pace the next question is whether the kernel
should bother to maintain the contract => "if nothing is watching
tracepoints some subset (all?) hardware error messages will be reflected
to the kernel log".
I would point to tp_printk as a way to get tracepoints into the kernel
log. If that is too coarse-grained a replacement for print_extlog_rcd()
I would advocate spending time making tp_printk control more
fine-grained rather than perpetuate this duplicated emission path for
error info.
Something like a new annotation for tracepoints marking them as "emit to
kernel log if no-one is watching this high-priority trace event"?
> > That would be odd since there is no ras_userspace_consumers() in the
> > ACPI GHES path,
>
> Probably because no one's using RAS daemon with GHES. I at least haven't
> heard of anyone complaining about this yet...
Well no, there is little to complain about in that path because that
path does not play "is anybody watching" games. It always emits to the
kernel log (subject to rate limiting) and then follows up with always
emitting a tracepoint (subject to standard trace filtering).
For CXL I asked that its events deprecate the kernel log path with the
hope of not growing new userspace dependencies on kernel log parsing for
newfangled CXL errors.
[..]
> > Would be great to hear from folks that have a reasons for kernel log
> > message error reporting to continue.
>
> Right, from my experience so far, you never hear anything. :-\
>
> So if we do anything, it should be something simple and which works for
> almost everyone.
>
> With RAS, everyone does their own thing. And then there's the firmware
> which claims that it can do better RAS but then f*cks up on basic things
> like *actually* shipping a working EINJ or whatever implementation.
*sad nod*
> So in the end of the day it is, oh, we need our drivers in the OS
> because we can't fix firmware. It is harder to fix it than *hardware*
> :-P
At least when the firmware gets out of the way Linux has a chance to
solve user issues.
> > Uniformity of error response to "fatal" events, but that is mainly a
> > PCIe error handling concern not CPU errors.
>
> Sure, just make sure to keep it simple and generic.
Yes, tracepoints feel simple and generic to me while kernel log messages
with rate-limiting is already a lossy proposition.
> Given that is has not kept pace the next question is whether the kernel
> should bother to maintain the contract => "if nothing is watching
> tracepoints some subset (all?) hardware error messages will be reflected
> to the kernel log".
I'm with you for corrected & recoverable errors. The console is a terrible
place for those logs.
But it's handy to have fatal errors go to the console. They may make it out to a
serial port, or be stashed in pstore for later retrieval. Trace event logs that
are handled by some user level daemon are just going to disappear in a
puff of reset smoke.
-Tony
Luck, Tony wrote:
> > Given that is has not kept pace the next question is whether the kernel
> > should bother to maintain the contract => "if nothing is watching
> > tracepoints some subset (all?) hardware error messages will be reflected
> > to the kernel log".
>
> I'm with you for corrected & recoverable errors. The console is a terrible
> place for those logs.
>
> But it's handy to have fatal errors go to the console. They may make it out to a
> serial port, or be stashed in pstore for later retrieval. Trace event logs that
> are handled by some user level daemon are just going to disappear in a
> puff of reset smoke.
That is true, and arguably that fatal event logging to the kernel log should be
independent of whether userspace is watching. I.e. ras_userpsace_consumers()
gating kernel logging is actively getting in the way when the error severity
implies userspace may not get a chance to see or process the event.
I would need help from Steven on whether ftrace would be amenable to have a
localized tracepoint_printk context to trigger emission of fatal events to the
kernel log in addition to the trace buffer.
On Thu, May 16, 2024 at 11:56:15AM -0700, Dan Williams wrote:
> Something like a new annotation for tracepoints marking them as "emit to
> kernel log if no-one is watching this high-priority trace event"?
I'm not sure anymore what we want here now... :)
We want for the kernel to not pay attention to whether userspace is
consuming error info from the tracepoints and to issue errors into the
kernel log too.
So you have them in the kernel log *and* in the tracepoints.
Right?
Or only through the tracepoints?
> Well no, there is little to complain about in that path because that
> path does not play "is anybody watching" games. It always emits to the
> kernel log (subject to rate limiting) and then follows up with always
> emitting a tracepoint (subject to standard trace filtering).
>
> For CXL I asked that its events deprecate the kernel log path with the
> hope of not growing new userspace dependencies on kernel log parsing for
Yeah, kernel log string format is not an ABI so no problem.
> newfangled CXL errors.
So shuffling hw error info solely through the tracepoints is probably ok
but I am unable to rule out *all* possible cases where it would still
make sense to dump to the kernel log, be it there's no userspace, be it
it is a critical error and you want to dump it immediately or whatnot.
It should probably be configurable.
As in: by default all hw error info goes only through tracepoints with
the exception of fatal errors - they go both to the kernel log and
tracepoints.
And then perhaps add "ras=dump_all_error_info_to_kernel_log_too" or so,
cmdline param.
> At least when the firmware gets out of the way Linux has a chance to
> solve user issues.
Yeap.
> Yes, tracepoints feel simple and generic to me while kernel log messages
> with rate-limiting is already a lossy proposition.
Right. Btw, do bear in mind that ratelimiting of hw errors can be bad
sometimes: imagine it ratelimits exactly that one fatal error which you
absolutely should've seen... :-\
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
Fabio M. De Francesco wrote:
> Currently, extlog_print() (ELOG) only reports CPER PCIe section (UEFI
> v2.10, Appendix N.2.7) to the kernel log via print_extlog_rcd(). Instead,
> the similar ghes_do_proc() (GHES) prints to kernel log and calls
> pci_print_aer() to report via the ftrace infrastructure.
>
> Add support to report the CPER PCIe Error section also via the ftrace
> infrastructure by calling pci_print_aer() to make ELOG act consistently
> with GHES.
>
> Cc: Dan Williams <[email protected]>
> Signed-off-by: Fabio M. De Francesco <[email protected]>
> ---
> drivers/acpi/acpi_extlog.c | 35 +++++++++++++++++++++++++++++++++++
> 1 file changed, 35 insertions(+)
>
> diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
> index 4e62d7235d33..fb7b0c73f86a 100644
> --- a/drivers/acpi/acpi_extlog.c
> +++ b/drivers/acpi/acpi_extlog.c
> @@ -131,6 +131,36 @@ static int print_extlog_rcd(const char *pfx,
> return 1;
> }
>
> +static void extlog_print_pcie(struct cper_sec_pcie *pcie_err,
> + int severity)
> +{
> +#ifdef CONFIG_ACPI_APEI_PCIEAER
Whenever possible do not use ifdef in C files, see "21) Conditional Compilation"
in coding style.
I suspect the issue here is that pci_print_aer() is not always defined, but the
solution there is something like:
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 4b97f38f3fcf..6ff54197480d 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -42,16 +42,20 @@ int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log *log);
#if defined(CONFIG_PCIEAER)
int pci_aer_clear_nonfatal_status(struct pci_dev *dev);
int pcie_aer_is_native(struct pci_dev *dev);
+void pci_print_aer(struct pci_dev *dev, int aer_severity,
+ struct aer_capability_regs *aer);
#else
static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
{
return -EINVAL;
}
static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; }
+static inline void pci_print_aer(struct pci_dev *dev, int aer_severity,
+ struct aer_capability_regs *aer)
+{
+}
#endif
-void pci_print_aer(struct pci_dev *dev, int aer_severity,
- struct aer_capability_regs *aer);
int cper_severity_to_aer(int cper_severity);
void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
int severity, struct aer_capability_regs *aer_regs);
> + struct aer_capability_regs *aer;
> + struct pci_dev *pdev;
> + unsigned int devfn;
> + unsigned int bus;
> + int aer_severity;
> + int domain;
> +
> + if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
> + pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
> + aer_severity = cper_severity_to_aer(severity);
> + aer = (struct aer_capability_regs *)pcie_err->aer_info;
> + domain = pcie_err->device_id.segment;
> + bus = pcie_err->device_id.bus;
> + devfn = PCI_DEVFN(pcie_err->device_id.device,
> + pcie_err->device_id.function);
> + pdev = pci_get_domain_bus_and_slot(domain, bus, devfn);
This pci_get_domain_bus_and_slot() takes a refernce on @pdev that reference
needs to be dropped after operations on @pdev are complete.
> + if (!pdev) {
> + pr_err("no pci_dev for %04x:%02x:%02x.%x\n",
> + domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
I am not sure this error message is useful. I see this was copied from
aer_recover_work_func(). I wonder if a better approach is to just teach
pci_print_aer() to be tolerant of a NULL @pdev argument.
Most of what pci_print_aer() is just a device name which can be recreated as:
sprintf(name, "%04x:%02x:%02x.%d", domain, bus, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn));
For now, I would just return if a device cannot be found, and skip the print.
> + }
> +#endif
> +}
> +
> static int extlog_print(struct notifier_block *nb, unsigned long val,
> void *data)
> {
> @@ -182,6 +212,10 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
> if (gdata->error_data_length >= sizeof(*mem))
> trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
> (u8)gdata->error_severity);
> + } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
> + struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
> +
> + extlog_print_pcie(pcie_err, gdata->error_severity);
> } else {
> void *err = acpi_hest_get_payload(gdata);
>
> @@ -331,3 +365,4 @@ module_exit(extlog_exit);
> MODULE_AUTHOR("Chen, Gong <[email protected]>");
> MODULE_DESCRIPTION("Extended MCA Error Log Driver");
> MODULE_LICENSE("GPL");
> +MODULE_IMPORT_NS(CXL);
This is awkward now that pci_print_aer() is consumed by a module that is not
"CXL", so perhaps we should either drop the namespacing of the pci_print_aer()
symbol or put it in its own namespace. I think the former is the way to go.