2021-11-04 10:57:32

by Xuesong Chen

[permalink] [raw]
Subject: [PATCH v5 0/4] PCI MCFG consolidation and APEI resource filtering

The issue of commit d91525eb8ee6 ("ACPI, EINJ: Enhance error injection tolerance
level") on x86 is also happened on our own ARM64 platform. We sent a patch[1]
trying to fix this issue in an arch-specific way as x86 does at first, but
according to the suggestion from Lorenzo Pieralisi and Catalin Marinas, we can
consolidate the PCI MCFG part then fix it in a more common way, that's why this
patch series comes.

[1] https://marc.info/?l=linux-arm-kernel&m=163108478627166&w=2

---
Change from v4 to v5:
- Fix the warning: no previous prototype for 'remove_quirk_mcfg_res' warning
reported by the kernel test robot.

Change from v3 to v4:
- Add a new patch (patch #3) to address the quirk ECAM access issue. Because
the normal ECAM config space can be accessed in a lockless way, so we don't
need the mutual exclusion with the EINJ action. But those quirks maybe break
this rule and corrupt the configuration access, reserve its MCFG address
regions in this case to avoid that happens.

- Add another patch (patch #4) to log the PCI MCFG entry parse message per
the suggestion from Bjorn Helgaas. The output on ARM64 as:
ACPI: MCFG entry for domain 0000 [bus 00-0f] at [mem 0x50000000-0x50ffffff] (base 0x50000000)

- Commit message updated with more details of patch #2

Change from v2 to v3:
- Address the comments of Lorenzo Pieralisi about the CONFIG_PCI
dependence issue in APEI module (patch #2)

Change from v1 to v2:
- Fix the "undefined reference to `pci_mmcfg_list'" build error in case
of PCI_CONFIG=n, reported by the kernel test robot

Xuesong Chen (4):
PCI: MCFG: Consolidate the separate PCI MCFG table entry list
ACPI: APEI: Filter the PCI MCFG address with an arch-agnostic method
ACPI: APEI: Reserve the MCFG address for quirk ECAM implementation
PCI: MCFG: Add the MCFG entry parse log message

arch/x86/include/asm/pci_x86.h | 17 +----------
arch/x86/pci/mmconfig-shared.c | 30 -------------------
drivers/acpi/apei/apei-base.c | 68 ++++++++++++++++++++++++++++++++----------
drivers/acpi/pci_mcfg.c | 46 +++++++++++++++-------------
drivers/pci/pci.c | 2 ++
drivers/pci/quirks.c | 2 ++
include/linux/pci.h | 18 +++++++++++
7 files changed, 101 insertions(+), 82 deletions(-)

--
2.9.5



2021-11-04 10:59:27

by Xuesong Chen

[permalink] [raw]
Subject: [PATCH v5 1/4] PCI: MCFG: Consolidate the separate PCI MCFG table entry list

The PCI MCFG entry list is redundant on x86 and other arches like ARM64
in current implementation, this list variable can be consolidated for
unnecessary duplication and other purposes, for example, we can remove
some of the arch-specific codes in the APEI/EINJ module and re-implement
it in a more common arch-agnostic way.

To reduce the redundancy, it:
- Moves the "struct pci_mmcfg_region" definition from
arch/x86/include/asm/pci_x86.h to include/linux/pci.h, where it
can be shared across arches.

- Moves pci_mmcfg_list (a list of pci_mmcfg_region structs) from
arch/x86/pci/mmconfig-shared.c to drivers/pci/pci.c, where it can
be shared across arches.

- On x86 (which does not enable CONFIG_ACPI_MCFG), pci_mmcfg_list is
built in arch/x86/pci/mmconfig-shared.c as before.

- Removes the "struct mcfg_entry" from drivers/acpi/pci_mcfg.c.

- Replaces pci_mcfg_list (previously a list of mcfg_entry structs)
in drivers/acpi/pci_mcfg.c with the newly-shared pci_mmcfg_list (a
list of pci_mmcfg_region structs).

- On ARM64 (which does enable CONFIG_ACPI_MCFG), pci_mmcfg_list is
built in drivers/acpi/pci_mcfg.c.

Signed-off-by: Xuesong Chen <[email protected]>
---
arch/x86/include/asm/pci_x86.h | 17 +----------------
arch/x86/pci/mmconfig-shared.c | 2 --
drivers/acpi/pci_mcfg.c | 35 ++++++++++++++---------------------
drivers/pci/pci.c | 2 ++
include/linux/pci.h | 17 +++++++++++++++++
5 files changed, 34 insertions(+), 39 deletions(-)

diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 490411d..1f4257c 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -146,20 +146,7 @@ extern int pci_legacy_init(void);
extern void pcibios_fixup_irqs(void);

/* pci-mmconfig.c */
-
-/* "PCI MMCONFIG %04x [bus %02x-%02x]" */
-#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2)
-
-struct pci_mmcfg_region {
- struct list_head list;
- struct resource res;
- u64 address;
- char __iomem *virt;
- u16 segment;
- u8 start_bus;
- u8 end_bus;
- char name[PCI_MMCFG_RESOURCE_NAME_LEN];
-};
+struct pci_mmcfg_region;

extern int __init pci_mmcfg_arch_init(void);
extern void __init pci_mmcfg_arch_free(void);
@@ -174,8 +161,6 @@ extern struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start,

extern struct list_head pci_mmcfg_list;

-#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20)
-
/*
* On AMD Fam10h CPUs, all PCI MMIO configuration space accesses must use
* %eax. No other source or target registers may be used. The following
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 758cbfe..0b961fe6 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -31,8 +31,6 @@ static bool pci_mmcfg_arch_init_failed;
static DEFINE_MUTEX(pci_mmcfg_lock);
#define pci_mmcfg_lock_held() lock_is_held(&(pci_mmcfg_lock).dep_map)

-LIST_HEAD(pci_mmcfg_list);
-
static void __init pci_mmconfig_remove(struct pci_mmcfg_region *cfg)
{
if (cfg->res.parent)
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index 53cab97..6ce467f 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -13,14 +13,7 @@
#include <linux/pci-acpi.h>
#include <linux/pci-ecam.h>

-/* Structure to hold entries from the MCFG table */
-struct mcfg_entry {
- struct list_head list;
- phys_addr_t addr;
- u16 segment;
- u8 bus_start;
- u8 bus_end;
-};
+extern struct list_head pci_mmcfg_list;

#ifdef CONFIG_PCI_QUIRKS
struct mcfg_fixup {
@@ -214,16 +207,13 @@ static void pci_mcfg_apply_quirks(struct acpi_pci_root *root,
#endif
}

-/* List to save MCFG entries */
-static LIST_HEAD(pci_mcfg_list);
-
int pci_mcfg_lookup(struct acpi_pci_root *root, struct resource *cfgres,
const struct pci_ecam_ops **ecam_ops)
{
const struct pci_ecam_ops *ops = &pci_generic_ecam_ops;
struct resource *bus_res = &root->secondary;
u16 seg = root->segment;
- struct mcfg_entry *e;
+ struct pci_mmcfg_region *e;
struct resource res;

/* Use address from _CBA if present, otherwise lookup MCFG */
@@ -233,10 +223,10 @@ int pci_mcfg_lookup(struct acpi_pci_root *root, struct resource *cfgres,
/*
* We expect the range in bus_res in the coverage of MCFG bus range.
*/
- list_for_each_entry(e, &pci_mcfg_list, list) {
- if (e->segment == seg && e->bus_start <= bus_res->start &&
- e->bus_end >= bus_res->end) {
- root->mcfg_addr = e->addr;
+ list_for_each_entry(e, &pci_mmcfg_list, list) {
+ if (e->segment == seg && e->start_bus <= bus_res->start &&
+ e->end_bus >= bus_res->end) {
+ root->mcfg_addr = e->address;
}

}
@@ -268,7 +258,7 @@ static __init int pci_mcfg_parse(struct acpi_table_header *header)
{
struct acpi_table_mcfg *mcfg;
struct acpi_mcfg_allocation *mptr;
- struct mcfg_entry *e, *arr;
+ struct pci_mmcfg_region *e, *arr;
int i, n;

if (header->length < sizeof(struct acpi_table_mcfg))
@@ -285,10 +275,13 @@ static __init int pci_mcfg_parse(struct acpi_table_header *header)

for (i = 0, e = arr; i < n; i++, mptr++, e++) {
e->segment = mptr->pci_segment;
- e->addr = mptr->address;
- e->bus_start = mptr->start_bus_number;
- e->bus_end = mptr->end_bus_number;
- list_add(&e->list, &pci_mcfg_list);
+ e->address = mptr->address;
+ e->start_bus = mptr->start_bus_number;
+ e->end_bus = mptr->end_bus_number;
+ e->res.start = e->address + PCI_MMCFG_BUS_OFFSET(e->start_bus);
+ e->res.end = e->address + PCI_MMCFG_BUS_OFFSET(e->end_bus + 1) - 1;
+ e->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ list_add(&e->list, &pci_mmcfg_list);
}

#ifdef CONFIG_PCI_QUIRKS
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index f2cd111..29ddfdd 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -47,6 +47,8 @@ EXPORT_SYMBOL(isa_dma_bridge_buggy);
int pci_pci_problems;
EXPORT_SYMBOL(pci_pci_problems);

+LIST_HEAD(pci_mmcfg_list);
+
unsigned int pci_pm_d3hot_delay;

static void pci_pme_list_scan(struct work_struct *work);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c8afbee..af8dcc8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -55,6 +55,23 @@
#define PCI_RESET_PROBE true
#define PCI_RESET_DO_RESET false

+#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20)
+
+/* "PCI MMCONFIG %04x [bus %02x-%02x]" */
+#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2)
+
+/* pci mcfg region */
+struct pci_mmcfg_region {
+ struct list_head list;
+ struct resource res;
+ u64 address;
+ char __iomem *virt;
+ u16 segment;
+ u8 start_bus;
+ u8 end_bus;
+ char name[PCI_MMCFG_RESOURCE_NAME_LEN];
+};
+
/*
* The PCI interface treats multi-function devices as independent
* devices. The slot/function address of each device is encoded
--
2.9.5


2021-11-04 10:59:57

by Xuesong Chen

[permalink] [raw]
Subject: [PATCH v5 2/4] ACPI: APEI: Filter the PCI MCFG address with an arch-agnostic method

The commit d91525eb8ee6 ("ACPI, EINJ: Enhance error injection tolerance
level") fixes the issue that the ACPI/APEI can not access the PCI MCFG
address on x86 platform, but this issue can also happen on other
architectures, for instance, we got below error message on ARM64 platform:
...
APEI: Can not request [mem 0x50100000-0x50100003] for APEI EINJ Trigger registers
...

The above register range is within the MCFG area, because the PCI ECAM
can access the configuration space in an atomic way in case of the
hardware implementation of ECAM is correct, which means we don't need
a mutual exclusion for the EINJ action, thus we can remove this register
address region from the MCFG safely just like the x86 fix does.

Since all the MCFG resources have been saved into the pci_mmcfg_list
which is shared across different arches, thus we can filter the MCFG
resources from the APEI by apei_resources_sub(...) in a more common
arch-agnostic way, which will be beneficial to all the APEI-dependent
platforms after that.

Signed-off-by: Xuesong Chen <[email protected]>
---
arch/x86/pci/mmconfig-shared.c | 28 --------------------------
drivers/acpi/apei/apei-base.c | 45 ++++++++++++++++++++++++++++--------------
2 files changed, 30 insertions(+), 43 deletions(-)

diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 0b961fe6..12f7d96 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -605,32 +605,6 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)
return 0;
}

-#ifdef CONFIG_ACPI_APEI
-extern int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size,
- void *data), void *data);
-
-static int pci_mmcfg_for_each_region(int (*func)(__u64 start, __u64 size,
- void *data), void *data)
-{
- struct pci_mmcfg_region *cfg;
- int rc;
-
- if (list_empty(&pci_mmcfg_list))
- return 0;
-
- list_for_each_entry(cfg, &pci_mmcfg_list, list) {
- rc = func(cfg->res.start, resource_size(&cfg->res), data);
- if (rc)
- return rc;
- }
-
- return 0;
-}
-#define set_apei_filter() (arch_apei_filter_addr = pci_mmcfg_for_each_region)
-#else
-#define set_apei_filter()
-#endif
-
static void __init __pci_mmcfg_init(int early)
{
pci_mmcfg_reject_broken(early);
@@ -665,8 +639,6 @@ void __init pci_mmcfg_early_init(void)
else
acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
__pci_mmcfg_init(1);
-
- set_apei_filter();
}
}

diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index c7fdb12..daae75a 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -21,6 +21,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/slab.h>
#include <linux/io.h>
@@ -448,13 +449,34 @@ static int apei_get_nvs_resources(struct apei_resources *resources)
return acpi_nvs_for_each_region(apei_get_res_callback, resources);
}

-int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size,
- void *data), void *data);
-static int apei_get_arch_resources(struct apei_resources *resources)
+#ifdef CONFIG_PCI
+extern struct list_head pci_mmcfg_list;
+static int apei_filter_mcfg_addr(struct apei_resources *res,
+ struct apei_resources *mcfg_res)
+{
+ int rc = 0;
+ struct pci_mmcfg_region *cfg;
+
+ if (list_empty(&pci_mmcfg_list))
+ return 0;
+
+ apei_resources_init(mcfg_res);
+ list_for_each_entry(cfg, &pci_mmcfg_list, list) {
+ rc = apei_res_add(&mcfg_res->iomem, cfg->res.start, resource_size(&cfg->res));
+ if (rc)
+ return rc;
+ }

+ /* filter the mcfg resource from current APEI's */
+ return apei_resources_sub(res, mcfg_res);
+}
+#else
+static inline int apei_filter_mcfg_addr(struct apei_resources *res,
+ struct apei_resources *mcfg_res)
{
- return arch_apei_filter_addr(apei_get_res_callback, resources);
+ return 0;
}
+#endif

/*
* IO memory/port resource management mechanism is used to check
@@ -486,15 +508,9 @@ int apei_resources_request(struct apei_resources *resources,
if (rc)
goto nvs_res_fini;

- if (arch_apei_filter_addr) {
- apei_resources_init(&arch_res);
- rc = apei_get_arch_resources(&arch_res);
- if (rc)
- goto arch_res_fini;
- rc = apei_resources_sub(resources, &arch_res);
- if (rc)
- goto arch_res_fini;
- }
+ rc = apei_filter_mcfg_addr(resources, &arch_res);
+ if (rc)
+ goto arch_res_fini;

rc = -EINVAL;
list_for_each_entry(res, &resources->iomem, list) {
@@ -544,8 +560,7 @@ int apei_resources_request(struct apei_resources *resources,
release_mem_region(res->start, res->end - res->start);
}
arch_res_fini:
- if (arch_apei_filter_addr)
- apei_resources_fini(&arch_res);
+ apei_resources_fini(&arch_res);
nvs_res_fini:
apei_resources_fini(&nvs_resources);
return rc;
--
2.9.5


2021-11-04 11:00:34

by Xuesong Chen

[permalink] [raw]
Subject: [PATCH v5 3/4] ACPI: APEI: Reserve the MCFG address for quirk ECAM implementation

On some platforms, the hardware ECAM implementiation is not generic
as expected, which will make the PCI configuration access atomic
primitive lost. In this case, we need to reserve those quirk MCFG
address regions when filtering the normal MCFG resource to make sure
the mutual exclusion still works between the MCFG configuration
access and EINJ's operation.

Signed-off-by: Xuesong Chen <[email protected]>
---
drivers/acpi/apei/apei-base.c | 25 ++++++++++++++++++++++++-
drivers/acpi/pci_mcfg.c | 8 ++++++++
drivers/pci/quirks.c | 2 ++
include/linux/pci.h | 1 +
4 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index daae75a..dbb0fa4 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -450,6 +450,23 @@ static int apei_get_nvs_resources(struct apei_resources *resources)
}

#ifdef CONFIG_PCI
+static int remove_quirk_mcfg_res(struct apei_resources *mcfg_res)
+{
+#ifdef CONFIG_PCI_QUIRKS
+ int rc = 0;
+ struct apei_resources quirk_res;
+
+ apei_resources_init(&quirk_res);
+ rc = apei_res_add(&quirk_res.iomem, pci_quirk_mcfg_res.start,
+ resource_size(&pci_quirk_mcfg_res));
+ if (rc)
+ return rc;
+
+ return apei_resources_sub(mcfg_res, &quirk_res);
+#else
+ return 0;
+#endif
+}
extern struct list_head pci_mmcfg_list;
static int apei_filter_mcfg_addr(struct apei_resources *res,
struct apei_resources *mcfg_res)
@@ -462,11 +479,17 @@ static int apei_filter_mcfg_addr(struct apei_resources *res,

apei_resources_init(mcfg_res);
list_for_each_entry(cfg, &pci_mmcfg_list, list) {
- rc = apei_res_add(&mcfg_res->iomem, cfg->res.start, resource_size(&cfg->res));
+ rc = apei_res_add(&mcfg_res->iomem, cfg->res.start,
+ resource_size(&cfg->res));
if (rc)
return rc;
}

+ /* remove the pci quirk mcfg resource if any from the mcfg_res */
+ rc = remove_quirk_mcfg_res(mcfg_res);
+ if (rc)
+ return rc;
+
/* filter the mcfg resource from current APEI's */
return apei_resources_sub(res, mcfg_res);
}
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index 6ce467f..b5ab866 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -26,6 +26,8 @@ struct mcfg_fixup {
struct resource cfgres;
};

+static bool pci_quirk_matched;
+
#define MCFG_BUS_RANGE(start, end) DEFINE_RES_NAMED((start), \
((end) - (start) + 1), \
NULL, IORESOURCE_BUS)
@@ -195,6 +197,7 @@ static void pci_mcfg_apply_quirks(struct acpi_pci_root *root,

for (i = 0, f = mcfg_quirks; i < ARRAY_SIZE(mcfg_quirks); i++, f++) {
if (pci_mcfg_quirk_matches(f, segment, bus_range)) {
+ pci_quirk_matched = true;
if (f->cfgres.start)
*cfgres = f->cfgres;
if (f->ops)
@@ -251,6 +254,11 @@ int pci_mcfg_lookup(struct acpi_pci_root *root, struct resource *cfgres,

*cfgres = res;
*ecam_ops = ops;
+#ifdef CONFIG_PCI_QUIRKS
+ if (pci_quirk_matched)
+ pci_quirk_mcfg_res = res;
+#endif
+
return 0;
}

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 1dd426f..1e2fa1c 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -32,6 +32,8 @@
#include <asm/dma.h> /* isa_dma_bridge_buggy */
#include "pci.h"

+struct resource pci_quirk_mcfg_res;
+
static ktime_t fixup_debug_start(struct pci_dev *dev,
void (*fn)(struct pci_dev *dev))
{
diff --git a/include/linux/pci.h b/include/linux/pci.h
index af8dcc8..763a1b2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2104,6 +2104,7 @@ enum pci_fixup_pass {
suspend_late##hook, vendor, device, PCI_ANY_ID, 0, hook)

#ifdef CONFIG_PCI_QUIRKS
+extern struct resource pci_quirk_mcfg_res;
void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
#else
static inline void pci_fixup_device(enum pci_fixup_pass pass,
--
2.9.5


2021-11-04 11:00:54

by Xuesong Chen

[permalink] [raw]
Subject: [PATCH v5 4/4] PCI: MCFG: Add the MCFG entry parse log message

To make it to be consistent with x86's MMCONFIG and ease the
disection of PCI MCFG entry parse process.

Signed-off-by: Xuesong Chen <[email protected]>
---
drivers/acpi/pci_mcfg.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index b5ab866..99c9bf5 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -290,6 +290,9 @@ static __init int pci_mcfg_parse(struct acpi_table_header *header)
e->res.end = e->address + PCI_MMCFG_BUS_OFFSET(e->end_bus + 1) - 1;
e->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
list_add(&e->list, &pci_mmcfg_list);
+ pr_info("MCFG entry for domain %04x [bus %02x-%02x] at %pR "
+ "(base %#lx)\n", e->segment, e->start_bus,
+ e->end_bus, &e->res, (unsigned long)e->address);
}

#ifdef CONFIG_PCI_QUIRKS
--
2.9.5