2023-09-15 22:00:44

by Shiju Jose

[permalink] [raw]
Subject: [RFC PATCH 1/1] ACPI / APEI: Fix for overwriting aer info when error status data have multiple sections

From: Shiju Jose <[email protected]>

ghes_handle_aer() lacks synchronization with aer_recover_work_func(),
so when error status data have multiple sections, aer_recover_work_func()
may use estatus data for aer_capability_regs after it has been overwritten.

The problem statement is here,
https://lore.kernel.org/all/20230901225755.GA90053@bhelgaas/

In ghes_handle_aer() allocates memory for aer_capability_regs from the
ghes_estatus_pool and copy data for aer_capability_regs from the estatus
buffer. Free the memory in aer_recover_work_func() after processing the
data using the ghes_estatus_pool_region_free() added.

Reported-by: Bjorn Helgaas <[email protected]>
Signed-off-by: Shiju Jose <[email protected]>
---
drivers/acpi/apei/ghes.c | 23 ++++++++++++++++++++++-
drivers/pci/pcie/aer.c | 10 ++++++++++
include/acpi/ghes.h | 1 +
3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ef59d6ea16da..63ad0541db38 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -209,6 +209,20 @@ int ghes_estatus_pool_init(unsigned int num_ghes)
return -ENOMEM;
}

+/**
+ * ghes_estatus_pool_region_free - free previously allocated memory
+ * from the ghes_estatus_pool.
+ * @addr: address of memory to free.
+ * @size: size of memory to free.
+ *
+ * Returns none.
+ */
+void ghes_estatus_pool_region_free(unsigned long addr, u32 size)
+{
+ gen_pool_free(ghes_estatus_pool, addr, size);
+}
+EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
+
static int map_gen_v2(struct ghes *ghes)
{
return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
@@ -564,6 +578,7 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
unsigned int devfn;
int aer_severity;
+ u8 *aer_info;

devfn = PCI_DEVFN(pcie_err->device_id.device,
pcie_err->device_id.function);
@@ -577,11 +592,17 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
if (gdata->flags & CPER_SEC_RESET)
aer_severity = AER_FATAL;

+ aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
+ sizeof(struct aer_capability_regs));
+ if (!aer_info)
+ return;
+ memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs));
+
aer_recover_queue(pcie_err->device_id.segment,
pcie_err->device_id.bus,
devfn, aer_severity,
(struct aer_capability_regs *)
- pcie_err->aer_info);
+ aer_info);
}
#endif
}
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index e85ff946e8c8..388b614c11fd 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -29,6 +29,7 @@
#include <linux/kfifo.h>
#include <linux/slab.h>
#include <acpi/apei.h>
+#include <acpi/ghes.h>
#include <ras/ras_event.h>

#include "../pci.h"
@@ -996,6 +997,15 @@ static void aer_recover_work_func(struct work_struct *work)
continue;
}
cper_print_aer(pdev, entry.severity, entry.regs);
+ /*
+ * Memory for aer_capability_regs(entry.regs) is being allocated from the
+ * ghes_estatus_pool to protect it from overwriting when multiple sections
+ * are present in the error status. Thus free the same after processing
+ * the data.
+ */
+ ghes_estatus_pool_region_free((unsigned long)entry.regs,
+ sizeof(struct aer_capability_regs));
+
if (entry.severity == AER_NONFATAL)
pcie_do_recovery(pdev, pci_channel_io_normal,
aer_root_reset);
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 3c8bba9f1114..40d89e161076 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -78,6 +78,7 @@ static inline struct list_head *ghes_get_devices(void) { return NULL; }
#endif

int ghes_estatus_pool_init(unsigned int num_ghes);
+void ghes_estatus_pool_region_free(unsigned long addr, u32 size);

static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata)
{
--
2.34.1


2023-09-15 22:08:35

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [RFC PATCH 1/1] ACPI / APEI: Fix for overwriting aer info when error status data have multiple sections

[+cc Dave, since CXL is also fiddling with aer.c]

On Sat, Sep 16, 2023 at 01:44:35AM +0800, [email protected] wrote:
> From: Shiju Jose <[email protected]>
>
> ghes_handle_aer() lacks synchronization with aer_recover_work_func(),
> so when error status data have multiple sections, aer_recover_work_func()
> may use estatus data for aer_capability_regs after it has been overwritten.
>
> The problem statement is here,
> https://lore.kernel.org/all/20230901225755.GA90053@bhelgaas/
>
> In ghes_handle_aer() allocates memory for aer_capability_regs from the
> ghes_estatus_pool and copy data for aer_capability_regs from the estatus
> buffer. Free the memory in aer_recover_work_func() after processing the
> data using the ghes_estatus_pool_region_free() added.

Thanks for working this up! I had it on my to-do list, but obviously
had not gotten to it yet.

> Reported-by: Bjorn Helgaas <[email protected]>
> Signed-off-by: Shiju Jose <[email protected]>
> ---
> drivers/acpi/apei/ghes.c | 23 ++++++++++++++++++++++-
> drivers/pci/pcie/aer.c | 10 ++++++++++
> include/acpi/ghes.h | 1 +
> 3 files changed, 33 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index ef59d6ea16da..63ad0541db38 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -209,6 +209,20 @@ int ghes_estatus_pool_init(unsigned int num_ghes)
> return -ENOMEM;
> }
>
> +/**
> + * ghes_estatus_pool_region_free - free previously allocated memory
> + * from the ghes_estatus_pool.
> + * @addr: address of memory to free.
> + * @size: size of memory to free.
> + *
> + * Returns none.
> + */
> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size)
> +{
> + gen_pool_free(ghes_estatus_pool, addr, size);
> +}
> +EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
> +
> static int map_gen_v2(struct ghes *ghes)
> {
> return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
> @@ -564,6 +578,7 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
> pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
> unsigned int devfn;
> int aer_severity;
> + u8 *aer_info;
>
> devfn = PCI_DEVFN(pcie_err->device_id.device,
> pcie_err->device_id.function);
> @@ -577,11 +592,17 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
> if (gdata->flags & CPER_SEC_RESET)
> aer_severity = AER_FATAL;
>
> + aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
> + sizeof(struct aer_capability_regs));
> + if (!aer_info)
> + return;
> + memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs));

This is a very straightforward approach to fixing this, and it looks
pretty reasonable, although I'd rather not have to pull more GHES
stuff into aer.c (ghes.h and ghes_estatus_pool_region_free()).

What I had in mind was to put a queue of aer_capability_regs on the
PCI side that could be used by both the APEI path and the native path.

In the APEI path, platform firmware reads the error information from
the hardware, and it feeds into PCI AER via aer_recover_queue().

In the native path, Linux should be reading reads the same error
information from the hardware, but it feeds into PCI AER quite
differently, via aer_process_err_devices() and handle_error_source().

These paths are fundamentally doing the exact same thing, but the data
handling and dmesg logging are needlessly different. I'd like to see
them get a little more unified, so the native path could someday also
feed into aer_recover_queue().

Does that make any sense?

> aer_recover_queue(pcie_err->device_id.segment,
> pcie_err->device_id.bus,
> devfn, aer_severity,
> (struct aer_capability_regs *)
> - pcie_err->aer_info);
> + aer_info);
> }
> #endif
> }
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index e85ff946e8c8..388b614c11fd 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -29,6 +29,7 @@
> #include <linux/kfifo.h>
> #include <linux/slab.h>
> #include <acpi/apei.h>
> +#include <acpi/ghes.h>
> #include <ras/ras_event.h>
>
> #include "../pci.h"
> @@ -996,6 +997,15 @@ static void aer_recover_work_func(struct work_struct *work)
> continue;
> }
> cper_print_aer(pdev, entry.severity, entry.regs);
> + /*
> + * Memory for aer_capability_regs(entry.regs) is being allocated from the
> + * ghes_estatus_pool to protect it from overwriting when multiple sections
> + * are present in the error status. Thus free the same after processing
> + * the data.
> + */
> + ghes_estatus_pool_region_free((unsigned long)entry.regs,
> + sizeof(struct aer_capability_regs));
> +
> if (entry.severity == AER_NONFATAL)
> pcie_do_recovery(pdev, pci_channel_io_normal,
> aer_root_reset);
> diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
> index 3c8bba9f1114..40d89e161076 100644
> --- a/include/acpi/ghes.h
> +++ b/include/acpi/ghes.h
> @@ -78,6 +78,7 @@ static inline struct list_head *ghes_get_devices(void) { return NULL; }
> #endif
>
> int ghes_estatus_pool_init(unsigned int num_ghes);
> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size);
>
> static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata)
> {
> --
> 2.34.1
>

2023-09-18 16:15:17

by Shiju Jose

[permalink] [raw]
Subject: RE: [RFC PATCH 1/1] ACPI / APEI: Fix for overwriting aer info when error status data have multiple sections

Hi Bjorn,

>-----Original Message-----
>From: Bjorn Helgaas <[email protected]>
>Sent: 15 September 2023 23:02
>To: Shiju Jose <[email protected]>
>Cc: [email protected]; [email protected]; [email protected];
>[email protected]; [email protected]; [email protected]; linux-
>[email protected]; [email protected]; [email protected];
>Linuxarm <[email protected]>; Jonathan Cameron
><[email protected]>; tanxiaofei <[email protected]>;
>Zengtao (B) <[email protected]>; Dave Jiang <[email protected]>
>Subject: Re: [RFC PATCH 1/1] ACPI / APEI: Fix for overwriting aer info when error
>status data have multiple sections
>
>[+cc Dave, since CXL is also fiddling with aer.c]
>
>On Sat, Sep 16, 2023 at 01:44:35AM +0800, [email protected] wrote:
>> From: Shiju Jose <[email protected]>
>>
>> ghes_handle_aer() lacks synchronization with aer_recover_work_func(),
>> so when error status data have multiple sections,
>> aer_recover_work_func() may use estatus data for aer_capability_regs after it
>has been overwritten.
>>
>> The problem statement is here,
>> https://lore.kernel.org/all/20230901225755.GA90053@bhelgaas/
>>
>> In ghes_handle_aer() allocates memory for aer_capability_regs from the
>> ghes_estatus_pool and copy data for aer_capability_regs from the
>> estatus buffer. Free the memory in aer_recover_work_func() after
>> processing the data using the ghes_estatus_pool_region_free() added.
>
>Thanks for working this up! I had it on my to-do list, but obviously had not gotten
>to it yet.
>
>> Reported-by: Bjorn Helgaas <[email protected]>
>> Signed-off-by: Shiju Jose <[email protected]>
>> ---
>> drivers/acpi/apei/ghes.c | 23 ++++++++++++++++++++++-
>> drivers/pci/pcie/aer.c | 10 ++++++++++
>> include/acpi/ghes.h | 1 +
>> 3 files changed, 33 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index
>> ef59d6ea16da..63ad0541db38 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -209,6 +209,20 @@ int ghes_estatus_pool_init(unsigned int num_ghes)
>> return -ENOMEM;
>> }
>>
>> +/**
>> + * ghes_estatus_pool_region_free - free previously allocated memory
>> + * from the ghes_estatus_pool.
>> + * @addr: address of memory to free.
>> + * @size: size of memory to free.
>> + *
>> + * Returns none.
>> + */
>> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size) {
>> + gen_pool_free(ghes_estatus_pool, addr, size); }
>> +EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
>> +
>> static int map_gen_v2(struct ghes *ghes) {
>> return
>> apei_map_generic_address(&ghes->generic_v2->read_ack_register);
>> @@ -564,6 +578,7 @@ static void ghes_handle_aer(struct
>acpi_hest_generic_data *gdata)
>> pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
>> unsigned int devfn;
>> int aer_severity;
>> + u8 *aer_info;
>>
>> devfn = PCI_DEVFN(pcie_err->device_id.device,
>> pcie_err->device_id.function);
>> @@ -577,11 +592,17 @@ static void ghes_handle_aer(struct
>acpi_hest_generic_data *gdata)
>> if (gdata->flags & CPER_SEC_RESET)
>> aer_severity = AER_FATAL;
>>
>> + aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
>> + sizeof(struct
>aer_capability_regs));
>> + if (!aer_info)
>> + return;
>> + memcpy(aer_info, pcie_err->aer_info, sizeof(struct
>> +aer_capability_regs));
>
>This is a very straightforward approach to fixing this, and it looks pretty
>reasonable, although I'd rather not have to pull more GHES stuff into aer.c
>(ghes.h and ghes_estatus_pool_region_free()).
>
>What I had in mind was to put a queue of aer_capability_regs on the PCI side
>that could be used by both the APEI path and the native path.
>
>In the APEI path, platform firmware reads the error information from the
>hardware, and it feeds into PCI AER via aer_recover_queue().
>
>In the native path, Linux should be reading reads the same error information
>from the hardware, but it feeds into PCI AER quite differently, via
>aer_process_err_devices() and handle_error_source().
>
>These paths are fundamentally doing the exact same thing, but the data
>handling and dmesg logging are needlessly different. I'd like to see them get a
>little more unified, so the native path could someday also feed into
>aer_recover_queue().
>
>Does that make any sense?

Thanks for letting us know.
Make sense, solution with in the AER looks better.

>
>> aer_recover_queue(pcie_err->device_id.segment,
>> pcie_err->device_id.bus,
>> devfn, aer_severity,
>> (struct aer_capability_regs *)
>> - pcie_err->aer_info);
>> + aer_info);
>> }
>> #endif
>> }
>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index
>> e85ff946e8c8..388b614c11fd 100644
>> --- a/drivers/pci/pcie/aer.c
>> +++ b/drivers/pci/pcie/aer.c
>> @@ -29,6 +29,7 @@
>> #include <linux/kfifo.h>
>> #include <linux/slab.h>
>> #include <acpi/apei.h>
>> +#include <acpi/ghes.h>
>> #include <ras/ras_event.h>
>>
>> #include "../pci.h"
>> @@ -996,6 +997,15 @@ static void aer_recover_work_func(struct work_struct
>*work)
>> continue;
>> }
>> cper_print_aer(pdev, entry.severity, entry.regs);
>> + /*
>> + * Memory for aer_capability_regs(entry.regs) is being
>allocated from the
>> + * ghes_estatus_pool to protect it from overwriting when
>multiple sections
>> + * are present in the error status. Thus free the same after
>processing
>> + * the data.
>> + */
>> + ghes_estatus_pool_region_free((unsigned long)entry.regs,
>> + sizeof(struct aer_capability_regs));
>> +
>> if (entry.severity == AER_NONFATAL)
>> pcie_do_recovery(pdev, pci_channel_io_normal,
>> aer_root_reset);
>> diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index
>> 3c8bba9f1114..40d89e161076 100644
>> --- a/include/acpi/ghes.h
>> +++ b/include/acpi/ghes.h
>> @@ -78,6 +78,7 @@ static inline struct list_head
>> *ghes_get_devices(void) { return NULL; } #endif
>>
>> int ghes_estatus_pool_init(unsigned int num_ghes);
>> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size);
>>
>> static inline int acpi_hest_get_version(struct acpi_hest_generic_data
>> *gdata) {
>> --
>> 2.34.1
>>

Thanks,
Shiju

2023-09-18 18:36:58

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [RFC PATCH 1/1] ACPI / APEI: Fix for overwriting aer info when error status data have multiple sections

On Mon, Sep 18, 2023 at 8:16 PM Bjorn Helgaas <[email protected]> wrote:
>
> [+to Rafael, probably the logical place to apply this]
>
> On Mon, Sep 18, 2023 at 01:07:45PM +0000, Shiju Jose wrote:
> > >-----Original Message-----
> > >From: Bjorn Helgaas <[email protected]>
> > > ...
>
> > >On Sat, Sep 16, 2023 at 01:44:35AM +0800, [email protected] wrote:
> > >> From: Shiju Jose <[email protected]>
> > >>
> > >> ghes_handle_aer() lacks synchronization with
> > >> aer_recover_work_func(), so when error status data have multiple
> > >> sections, aer_recover_work_func() may use estatus data for
> > >> aer_capability_regs after it has been overwritten.
> > >>
> > >> The problem statement is here,
> > >> https://lore.kernel.org/all/20230901225755.GA90053@bhelgaas/
> > >>
> > >> In ghes_handle_aer() allocates memory for aer_capability_regs from the
> > >> ghes_estatus_pool and copy data for aer_capability_regs from the
> > >> estatus buffer. Free the memory in aer_recover_work_func() after
> > >> processing the data using the ghes_estatus_pool_region_free() added.
> > >
> > >Thanks for working this up! I had it on my to-do list, but
> > >obviously had not gotten to it yet.
> > >
> > >> Reported-by: Bjorn Helgaas <[email protected]>
> > >> Signed-off-by: Shiju Jose <[email protected]>
> > >> ---
> > >> drivers/acpi/apei/ghes.c | 23 ++++++++++++++++++++++-
> > >> drivers/pci/pcie/aer.c | 10 ++++++++++
> > >> include/acpi/ghes.h | 1 +
> > >> 3 files changed, 33 insertions(+), 1 deletion(-)
> > >>
> > >> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index
> > >> ef59d6ea16da..63ad0541db38 100644
> > >> --- a/drivers/acpi/apei/ghes.c
> > >> +++ b/drivers/acpi/apei/ghes.c
> > >> @@ -209,6 +209,20 @@ int ghes_estatus_pool_init(unsigned int num_ghes)
> > >> return -ENOMEM;
> > >> }
> > >>
> > >> +/**
> > >> + * ghes_estatus_pool_region_free - free previously allocated memory
> > >> + * from the ghes_estatus_pool.
> > >> + * @addr: address of memory to free.
> > >> + * @size: size of memory to free.
> > >> + *
> > >> + * Returns none.
> > >> + */
> > >> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size) {
> > >> + gen_pool_free(ghes_estatus_pool, addr, size); }
> > >> +EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
> > >> +
> > >> static int map_gen_v2(struct ghes *ghes) {
> > >> return
> > >> apei_map_generic_address(&ghes->generic_v2->read_ack_register);
> > >> @@ -564,6 +578,7 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
> > >> pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
> > >> unsigned int devfn;
> > >> int aer_severity;
> > >> + u8 *aer_info;
> > >>
> > >> devfn = PCI_DEVFN(pcie_err->device_id.device,
> > >> pcie_err->device_id.function);
> > >> @@ -577,11 +592,17 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
> > >> if (gdata->flags & CPER_SEC_RESET)
> > >> aer_severity = AER_FATAL;
> > >>
> > >> + aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
> > >> + sizeof(struct aer_capability_regs));
> > >> + if (!aer_info)
> > >> + return;
> > >> + memcpy(aer_info, pcie_err->aer_info, sizeof(struct
> > >> +aer_capability_regs));
> > >
> > >This is a very straightforward approach to fixing this, and it looks pretty
> > >reasonable, although I'd rather not have to pull more GHES stuff into aer.c
> > >(ghes.h and ghes_estatus_pool_region_free()).
> > >
> > >What I had in mind was to put a queue of aer_capability_regs on the PCI side
> > >that could be used by both the APEI path and the native path.
> > >
> > >In the APEI path, platform firmware reads the error information from the
> > >hardware, and it feeds into PCI AER via aer_recover_queue().
> > >
> > >In the native path, Linux should be reading reads the same error information
> > >from the hardware, but it feeds into PCI AER quite differently, via
> > >aer_process_err_devices() and handle_error_source().
> > >
> > >These paths are fundamentally doing the exact same thing, but the data
> > >handling and dmesg logging are needlessly different. I'd like to see them get a
> > >little more unified, so the native path could someday also feed into
> > >aer_recover_queue().
> > >
> > >Does that make any sense?
> >
> > Thanks for letting us know.
> > Make sense, solution with in the AER looks better.
>
> Thanks for considering this. The AER rework I have in mind is a
> longer-term project, so unless you have time and interest in doing
> that, I think we should apply your patch since it's all ready to go.
>
> You pointed to the problem statement, but I think it would be good to
> include a brief outline directly in the commit log, e.g., something
> like this:

Agreed.

> ghes_handle_aer() passes AER data to the PCI core for logging and
> recovery by calling aer_recover_queue() with a pointer to struct
> aer_capability_regs.
>
> The problem was that aer_recover_queue() queues the pointer directly
> without copying the aer_capability_regs data. The pointer was to
> the ghes->estatus buffer, which could be reused before
> aer_recover_work_func() reads the data.
>
> To avoid this problem, allocate a new aer_capability_regs structure
> from the ghes_estatus_pool, copy the AER data from the ghes->estatus
> buffer into it, pass a pointer to the new struct to
> aer_recover_queue(), and free it after aer_recover_work_func() has
> processed it.
>
> What's your take on this, Rafael?

I can apply it if the changelog is updated along the lines of the above.

> The biggest change is in
> drivers/acpi, so if you want to take it, feel free to add my:
>
> Acked-by: Bjorn Helgaas <[email protected]>

Thanks!

2023-09-18 19:00:01

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [RFC PATCH 1/1] ACPI / APEI: Fix for overwriting aer info when error status data have multiple sections

[+to Rafael, probably the logical place to apply this]

On Mon, Sep 18, 2023 at 01:07:45PM +0000, Shiju Jose wrote:
> >-----Original Message-----
> >From: Bjorn Helgaas <[email protected]>
> > ...

> >On Sat, Sep 16, 2023 at 01:44:35AM +0800, [email protected] wrote:
> >> From: Shiju Jose <[email protected]>
> >>
> >> ghes_handle_aer() lacks synchronization with
> >> aer_recover_work_func(), so when error status data have multiple
> >> sections, aer_recover_work_func() may use estatus data for
> >> aer_capability_regs after it has been overwritten.
> >>
> >> The problem statement is here,
> >> https://lore.kernel.org/all/20230901225755.GA90053@bhelgaas/
> >>
> >> In ghes_handle_aer() allocates memory for aer_capability_regs from the
> >> ghes_estatus_pool and copy data for aer_capability_regs from the
> >> estatus buffer. Free the memory in aer_recover_work_func() after
> >> processing the data using the ghes_estatus_pool_region_free() added.
> >
> >Thanks for working this up! I had it on my to-do list, but
> >obviously had not gotten to it yet.
> >
> >> Reported-by: Bjorn Helgaas <[email protected]>
> >> Signed-off-by: Shiju Jose <[email protected]>
> >> ---
> >> drivers/acpi/apei/ghes.c | 23 ++++++++++++++++++++++-
> >> drivers/pci/pcie/aer.c | 10 ++++++++++
> >> include/acpi/ghes.h | 1 +
> >> 3 files changed, 33 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index
> >> ef59d6ea16da..63ad0541db38 100644
> >> --- a/drivers/acpi/apei/ghes.c
> >> +++ b/drivers/acpi/apei/ghes.c
> >> @@ -209,6 +209,20 @@ int ghes_estatus_pool_init(unsigned int num_ghes)
> >> return -ENOMEM;
> >> }
> >>
> >> +/**
> >> + * ghes_estatus_pool_region_free - free previously allocated memory
> >> + * from the ghes_estatus_pool.
> >> + * @addr: address of memory to free.
> >> + * @size: size of memory to free.
> >> + *
> >> + * Returns none.
> >> + */
> >> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size) {
> >> + gen_pool_free(ghes_estatus_pool, addr, size); }
> >> +EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
> >> +
> >> static int map_gen_v2(struct ghes *ghes) {
> >> return
> >> apei_map_generic_address(&ghes->generic_v2->read_ack_register);
> >> @@ -564,6 +578,7 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
> >> pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
> >> unsigned int devfn;
> >> int aer_severity;
> >> + u8 *aer_info;
> >>
> >> devfn = PCI_DEVFN(pcie_err->device_id.device,
> >> pcie_err->device_id.function);
> >> @@ -577,11 +592,17 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
> >> if (gdata->flags & CPER_SEC_RESET)
> >> aer_severity = AER_FATAL;
> >>
> >> + aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
> >> + sizeof(struct aer_capability_regs));
> >> + if (!aer_info)
> >> + return;
> >> + memcpy(aer_info, pcie_err->aer_info, sizeof(struct
> >> +aer_capability_regs));
> >
> >This is a very straightforward approach to fixing this, and it looks pretty
> >reasonable, although I'd rather not have to pull more GHES stuff into aer.c
> >(ghes.h and ghes_estatus_pool_region_free()).
> >
> >What I had in mind was to put a queue of aer_capability_regs on the PCI side
> >that could be used by both the APEI path and the native path.
> >
> >In the APEI path, platform firmware reads the error information from the
> >hardware, and it feeds into PCI AER via aer_recover_queue().
> >
> >In the native path, Linux should be reading reads the same error information
> >from the hardware, but it feeds into PCI AER quite differently, via
> >aer_process_err_devices() and handle_error_source().
> >
> >These paths are fundamentally doing the exact same thing, but the data
> >handling and dmesg logging are needlessly different. I'd like to see them get a
> >little more unified, so the native path could someday also feed into
> >aer_recover_queue().
> >
> >Does that make any sense?
>
> Thanks for letting us know.
> Make sense, solution with in the AER looks better.

Thanks for considering this. The AER rework I have in mind is a
longer-term project, so unless you have time and interest in doing
that, I think we should apply your patch since it's all ready to go.

You pointed to the problem statement, but I think it would be good to
include a brief outline directly in the commit log, e.g., something
like this:

ghes_handle_aer() passes AER data to the PCI core for logging and
recovery by calling aer_recover_queue() with a pointer to struct
aer_capability_regs.

The problem was that aer_recover_queue() queues the pointer directly
without copying the aer_capability_regs data. The pointer was to
the ghes->estatus buffer, which could be reused before
aer_recover_work_func() reads the data.

To avoid this problem, allocate a new aer_capability_regs structure
from the ghes_estatus_pool, copy the AER data from the ghes->estatus
buffer into it, pass a pointer to the new struct to
aer_recover_queue(), and free it after aer_recover_work_func() has
processed it.

What's your take on this, Rafael? The biggest change is in
drivers/acpi, so if you want to take it, feel free to add my:

Acked-by: Bjorn Helgaas <[email protected]>

> >> aer_recover_queue(pcie_err->device_id.segment,
> >> pcie_err->device_id.bus,
> >> devfn, aer_severity,
> >> (struct aer_capability_regs *)
> >> - pcie_err->aer_info);
> >> + aer_info);
> >> }
> >> #endif
> >> }
> >> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index
> >> e85ff946e8c8..388b614c11fd 100644
> >> --- a/drivers/pci/pcie/aer.c
> >> +++ b/drivers/pci/pcie/aer.c
> >> @@ -29,6 +29,7 @@
> >> #include <linux/kfifo.h>
> >> #include <linux/slab.h>
> >> #include <acpi/apei.h>
> >> +#include <acpi/ghes.h>
> >> #include <ras/ras_event.h>
> >>
> >> #include "../pci.h"
> >> @@ -996,6 +997,15 @@ static void aer_recover_work_func(struct work_struct
> >*work)
> >> continue;
> >> }
> >> cper_print_aer(pdev, entry.severity, entry.regs);
> >> + /*
> >> + * Memory for aer_capability_regs(entry.regs) is being
> >allocated from the
> >> + * ghes_estatus_pool to protect it from overwriting when
> >multiple sections
> >> + * are present in the error status. Thus free the same after
> >processing
> >> + * the data.
> >> + */
> >> + ghes_estatus_pool_region_free((unsigned long)entry.regs,
> >> + sizeof(struct aer_capability_regs));
> >> +
> >> if (entry.severity == AER_NONFATAL)
> >> pcie_do_recovery(pdev, pci_channel_io_normal,
> >> aer_root_reset);
> >> diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index
> >> 3c8bba9f1114..40d89e161076 100644
> >> --- a/include/acpi/ghes.h
> >> +++ b/include/acpi/ghes.h
> >> @@ -78,6 +78,7 @@ static inline struct list_head
> >> *ghes_get_devices(void) { return NULL; } #endif
> >>
> >> int ghes_estatus_pool_init(unsigned int num_ghes);
> >> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size);
> >>
> >> static inline int acpi_hest_get_version(struct acpi_hest_generic_data
> >> *gdata) {
> >> --
> >> 2.34.1
> >>
>
> Thanks,
> Shiju