2014-11-19 16:05:33

by Tomasz Nowicki

[permalink] [raw]
Subject: [PATCH 0/6] PCI: MMCONFIG clean up

MMCFG ACPI table has no arch dependencies so it can be used across all
architectures. Currently MMCONFIG related code resides in arch/x86 directories.
This patch set is goint to isolate non-architecure specific code and make
it accessible from drivers/pci/ directory.

Tomasz Nowicki (6):
x86, acpi, pci: Reorder logic of pci_mmconfig_insert() function
x86, acpi, pci: Move arch-agnostic MMCFG code out of arch/x86/
directory
x86, acpi, pci: Move PCI config space accessors.
x86, acpi, pci: mmconfig_{32,64}.c code refactoring - remove code
duplication.
x86, acpi, pci: mmconfig_64.c becomes default implementation for arch
agnostic low-level direct PCI config space accessors via MMCONFIG.
pci, acpi: Share ACPI PCI config space accessors.

arch/x86/include/asm/pci.h | 42 +++++
arch/x86/include/asm/pci_x86.h | 72 --------
arch/x86/pci/Makefile | 5 +-
arch/x86/pci/acpi.c | 1 +
arch/x86/pci/init.c | 1 +
arch/x86/pci/mmconfig-shared.c | 242 ++++---------------------
arch/x86/pci/mmconfig_32.c | 11 +-
arch/x86/pci/mmconfig_64.c | 153 ----------------
drivers/acpi/Makefile | 1 +
drivers/acpi/bus.c | 1 +
drivers/acpi/mmconfig.c | 396 +++++++++++++++++++++++++++++++++++++++++
include/linux/mmconfig.h | 62 +++++++
include/linux/pci.h | 8 -
13 files changed, 541 insertions(+), 454 deletions(-)
delete mode 100644 arch/x86/pci/mmconfig_64.c
create mode 100644 drivers/acpi/mmconfig.c
create mode 100644 include/linux/mmconfig.h

--
1.9.1


2014-11-19 16:05:40

by Tomasz Nowicki

[permalink] [raw]
Subject: [PATCH 2/6] x86, acpi, pci: Move arch-agnostic MMCFG code out of arch/x86/ directory

MMCFG table seems to be architecture independent and it makes sense
to share common code across all architectures. The ones that may need
architectural specific actions have default prototype (__weak).

Signed-off-by: Tomasz Nowicki <[email protected]>
Tested-by: Hanjun Guo <[email protected]>
---
arch/x86/include/asm/pci_x86.h | 29 -----
arch/x86/pci/acpi.c | 1 +
arch/x86/pci/init.c | 1 +
arch/x86/pci/mmconfig-shared.c | 200 +---------------------------------
arch/x86/pci/mmconfig_32.c | 1 +
arch/x86/pci/mmconfig_64.c | 1 +
drivers/acpi/Makefile | 1 +
drivers/acpi/bus.c | 1 +
drivers/acpi/mmconfig.c | 242 +++++++++++++++++++++++++++++++++++++++++
include/linux/mmconfig.h | 58 ++++++++++
include/linux/pci.h | 8 --
11 files changed, 308 insertions(+), 235 deletions(-)
create mode 100644 drivers/acpi/mmconfig.c
create mode 100644 include/linux/mmconfig.h

diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index fa1195d..caba141 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -121,35 +121,6 @@ extern int __init pcibios_init(void);
extern int pci_legacy_init(void);
extern void pcibios_fixup_irqs(void);

-/* pci-mmconfig.c */
-
-/* "PCI MMCONFIG %04x [bus %02x-%02x]" */
-#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2)
-
-struct pci_mmcfg_region {
- struct list_head list;
- struct resource res;
- u64 address;
- char __iomem *virt;
- u16 segment;
- u8 start_bus;
- u8 end_bus;
- char name[PCI_MMCFG_RESOURCE_NAME_LEN];
-};
-
-extern int __init pci_mmcfg_arch_init(void);
-extern void __init pci_mmcfg_arch_free(void);
-extern int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
-extern void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg);
-extern int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
- phys_addr_t addr);
-extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
-extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
-
-extern struct list_head pci_mmcfg_list;
-
-#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20)
-
/*
* AMD Fam10h CPUs are buggy, and cannot access MMIO config space
* on their northbrige except through the * %eax register. As such, you MUST
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index cfd1b13..6d11131 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -4,6 +4,7 @@
#include <linux/irq.h>
#include <linux/dmi.h>
#include <linux/slab.h>
+#include <linux/mmconfig.h>
#include <asm/numa.h>
#include <asm/pci_x86.h>

diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index adb62aa..b4a55df 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -1,5 +1,6 @@
#include <linux/pci.h>
#include <linux/init.h>
+#include <linux/mmconfig.h>
#include <asm/pci_x86.h>
#include <asm/x86_init.h>

diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index ac24e1c..b397544 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/rculist.h>
+#include <linux/mmconfig.h>
#include <asm/e820.h>
#include <asm/pci_x86.h>
#include <asm/acpi.h>
@@ -27,103 +28,6 @@
/* Indicate if the mmcfg resources have been placed into the resource table. */
static bool pci_mmcfg_running_state;
static bool pci_mmcfg_arch_init_failed;
-static DEFINE_MUTEX(pci_mmcfg_lock);
-
-LIST_HEAD(pci_mmcfg_list);
-
-static void __init pci_mmconfig_remove(struct pci_mmcfg_region *cfg)
-{
- if (cfg->res.parent)
- release_resource(&cfg->res);
- list_del(&cfg->list);
- kfree(cfg);
-}
-
-static void __init free_all_mmcfg(void)
-{
- struct pci_mmcfg_region *cfg, *tmp;
-
- pci_mmcfg_arch_free();
- list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list)
- pci_mmconfig_remove(cfg);
-}
-
-static void list_add_sorted(struct pci_mmcfg_region *new)
-{
- struct pci_mmcfg_region *cfg;
-
- /* keep list sorted by segment and starting bus number */
- list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {
- if (cfg->segment > new->segment ||
- (cfg->segment == new->segment &&
- cfg->start_bus >= new->start_bus)) {
- list_add_tail_rcu(&new->list, &cfg->list);
- return;
- }
- }
- list_add_tail_rcu(&new->list, &pci_mmcfg_list);
-}
-
-static struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
- int end, u64 addr)
-{
- struct pci_mmcfg_region *new;
- struct resource *res;
-
- if (addr == 0)
- return NULL;
-
- new = kzalloc(sizeof(*new), GFP_KERNEL);
- if (!new)
- return NULL;
-
- new->address = addr;
- new->segment = segment;
- new->start_bus = start;
- new->end_bus = end;
-
- res = &new->res;
- res->start = addr + PCI_MMCFG_BUS_OFFSET(start);
- res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN,
- "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end);
- res->name = new->name;
-
- return new;
-}
-
-static struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start,
- int end, u64 addr)
-{
- struct pci_mmcfg_region *new;
-
- new = pci_mmconfig_alloc(segment, start, end, addr);
- if (new) {
- mutex_lock(&pci_mmcfg_lock);
- list_add_sorted(new);
- mutex_unlock(&pci_mmcfg_lock);
-
- pr_info(PREFIX
- "MMCONFIG for domain %04x [bus %02x-%02x] at %pR "
- "(base %#lx)\n",
- segment, start, end, &new->res, (unsigned long)addr);
- }
-
- return new;
-}
-
-struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
-{
- struct pci_mmcfg_region *cfg;
-
- list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
- if (cfg->segment == segment &&
- cfg->start_bus <= bus && bus <= cfg->end_bus)
- return cfg;
-
- return NULL;
-}

static const char *__init pci_mmcfg_e7520(void)
{
@@ -543,7 +447,7 @@ static void __init pci_mmcfg_reject_broken(int early)
}
}

-static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
+int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
struct acpi_mcfg_allocation *cfg)
{
int year;
@@ -566,50 +470,6 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
return -EINVAL;
}

-static int __init pci_parse_mcfg(struct acpi_table_header *header)
-{
- struct acpi_table_mcfg *mcfg;
- struct acpi_mcfg_allocation *cfg_table, *cfg;
- unsigned long i;
- int entries;
-
- if (!header)
- return -EINVAL;
-
- mcfg = (struct acpi_table_mcfg *)header;
-
- /* how many config structures do we have */
- free_all_mmcfg();
- entries = 0;
- i = header->length - sizeof(struct acpi_table_mcfg);
- while (i >= sizeof(struct acpi_mcfg_allocation)) {
- entries++;
- i -= sizeof(struct acpi_mcfg_allocation);
- }
- if (entries == 0) {
- pr_err(PREFIX "MMCONFIG has no entries\n");
- return -ENODEV;
- }
-
- cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1];
- for (i = 0; i < entries; i++) {
- cfg = &cfg_table[i];
- if (acpi_mcfg_check_entry(mcfg, cfg)) {
- free_all_mmcfg();
- return -ENODEV;
- }
-
- if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number,
- cfg->end_bus_number, cfg->address) == NULL) {
- pr_warn(PREFIX "no memory for MCFG entries\n");
- free_all_mmcfg();
- return -ENOMEM;
- }
- }
-
- return 0;
-}
-
static void __init __pci_mmcfg_init(int early)
{
pci_mmcfg_reject_broken(early);
@@ -692,39 +552,6 @@ static int __init pci_mmcfg_late_insert_resources(void)
*/
late_initcall(pci_mmcfg_late_insert_resources);

-static int __init pci_mmconfig_inject(struct pci_mmcfg_region *cfg)
-{
- struct pci_mmcfg_region *cfg_conflict;
- int err = 0;
-
- mutex_lock(&pci_mmcfg_lock);
- cfg_conflict = pci_mmconfig_lookup(cfg->segment, cfg->start_bus);
- if (cfg_conflict) {
- if (cfg_conflict->end_bus < cfg->end_bus)
- pr_info(FW_INFO "MMCONFIG for "
- "domain %04x [bus %02x-%02x] "
- "only partially covers this bridge\n",
- cfg_conflict->segment, cfg_conflict->start_bus,
- cfg_conflict->end_bus);
- err = -EEXIST;
- goto out;
- }
-
- if (pci_mmcfg_arch_map(cfg)) {
- pr_warn("fail to map MMCONFIG %pR.\n", &cfg->res);
- err = -ENOMEM;
- goto out;
- } else {
- list_add_sorted(cfg);
- pr_info("MMCONFIG at %pR (base %#lx)\n",
- &cfg->res, (unsigned long)cfg->address);
-
- }
-out:
- mutex_unlock(&pci_mmcfg_lock);
- return err;
-}
-
/* Add MMCFG information for host bridges */
int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
phys_addr_t addr)
@@ -773,26 +600,3 @@ error:
kfree(cfg);
return rc;
}
-
-/* Delete MMCFG information for host bridges */
-int pci_mmconfig_delete(u16 seg, u8 start, u8 end)
-{
- struct pci_mmcfg_region *cfg;
-
- mutex_lock(&pci_mmcfg_lock);
- list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
- if (cfg->segment == seg && cfg->start_bus == start &&
- cfg->end_bus == end) {
- list_del_rcu(&cfg->list);
- synchronize_rcu();
- pci_mmcfg_arch_unmap(cfg);
- if (cfg->res.parent)
- release_resource(&cfg->res);
- mutex_unlock(&pci_mmcfg_lock);
- kfree(cfg);
- return 0;
- }
- mutex_unlock(&pci_mmcfg_lock);
-
- return -ENOENT;
-}
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index 43984bc..d774672 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
+#include <linux/mmconfig.h>
#include <asm/e820.h>
#include <asm/pci_x86.h>

diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index bea5249..1209596 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -10,6 +10,7 @@
#include <linux/acpi.h>
#include <linux/bitmap.h>
#include <linux/rcupdate.h>
+#include <linux/mmconfig.h>
#include <asm/e820.h>
#include <asm/pci_x86.h>

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index c8a16e1..debacb5 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_ACPI_BUTTON) += button.o
obj-$(CONFIG_ACPI_FAN) += fan.o
obj-$(CONFIG_ACPI_VIDEO) += video.o
obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o
+obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
obj-$(CONFIG_ACPI_PROCESSOR) += processor.o
obj-y += container.o
obj-$(CONFIG_ACPI_THERMAL) += thermal.o
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index c412fdb..6d5412ab 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -41,6 +41,7 @@
#include <acpi/apei.h>
#include <linux/dmi.h>
#include <linux/suspend.h>
+#include <linux/mmconfig.h>

#include "internal.h"

diff --git a/drivers/acpi/mmconfig.c b/drivers/acpi/mmconfig.c
new file mode 100644
index 0000000..d62dccda
--- /dev/null
+++ b/drivers/acpi/mmconfig.c
@@ -0,0 +1,242 @@
+/*
+ * Arch agnostic low-level direct PCI config space access via MMCONFIG
+ *
+ * Per-architecture code takes care of the mappings, region validation and
+ * accesses themselves.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/mutex.h>
+#include <linux/rculist.h>
+#include <linux/mmconfig.h>
+
+#define PREFIX "PCI: "
+
+static DEFINE_MUTEX(pci_mmcfg_lock);
+
+LIST_HEAD(pci_mmcfg_list);
+
+static void __init pci_mmconfig_remove(struct pci_mmcfg_region *cfg)
+{
+ if (cfg->res.parent)
+ release_resource(&cfg->res);
+ list_del(&cfg->list);
+ kfree(cfg);
+}
+
+void __init free_all_mmcfg(void)
+{
+ struct pci_mmcfg_region *cfg, *tmp;
+
+ pci_mmcfg_arch_free();
+ list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list)
+ pci_mmconfig_remove(cfg);
+}
+
+void list_add_sorted(struct pci_mmcfg_region *new)
+{
+ struct pci_mmcfg_region *cfg;
+
+ /* keep list sorted by segment and starting bus number */
+ list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {
+ if (cfg->segment > new->segment ||
+ (cfg->segment == new->segment &&
+ cfg->start_bus >= new->start_bus)) {
+ list_add_tail_rcu(&new->list, &cfg->list);
+ return;
+ }
+ }
+ list_add_tail_rcu(&new->list, &pci_mmcfg_list);
+}
+
+struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
+ int end, u64 addr)
+{
+ struct pci_mmcfg_region *new;
+ struct resource *res;
+
+ if (addr == 0)
+ return NULL;
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ new->address = addr;
+ new->segment = segment;
+ new->start_bus = start;
+ new->end_bus = end;
+
+ res = &new->res;
+ res->start = addr + PCI_MMCFG_BUS_OFFSET(start);
+ res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1;
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN,
+ "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end);
+ res->name = new->name;
+
+ return new;
+}
+
+struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start,
+ int end, u64 addr)
+{
+ struct pci_mmcfg_region *new;
+
+ new = pci_mmconfig_alloc(segment, start, end, addr);
+ if (new) {
+ mutex_lock(&pci_mmcfg_lock);
+ list_add_sorted(new);
+ mutex_unlock(&pci_mmcfg_lock);
+
+ pr_info(PREFIX
+ "MMCONFIG for domain %04x [bus %02x-%02x] at %pR "
+ "(base %#lx)\n",
+ segment, start, end, &new->res, (unsigned long)addr);
+ }
+
+ return new;
+}
+
+int __init pci_mmconfig_inject(struct pci_mmcfg_region *cfg)
+{
+ struct pci_mmcfg_region *cfg_conflict;
+ int err = 0;
+
+ mutex_lock(&pci_mmcfg_lock);
+ cfg_conflict = pci_mmconfig_lookup(cfg->segment, cfg->start_bus);
+ if (cfg_conflict) {
+ if (cfg_conflict->end_bus < cfg->end_bus)
+ pr_info(FW_INFO "MMCONFIG for "
+ "domain %04x [bus %02x-%02x] "
+ "only partially covers this bridge\n",
+ cfg_conflict->segment, cfg_conflict->start_bus,
+ cfg_conflict->end_bus);
+ err = -EEXIST;
+ goto out;
+ }
+
+ if (pci_mmcfg_arch_map(cfg)) {
+ pr_warn("fail to map MMCONFIG %pR.\n", &cfg->res);
+ err = -ENOMEM;
+ goto out;
+ } else {
+ list_add_sorted(cfg);
+ pr_info("MMCONFIG at %pR (base %#lx)\n",
+ &cfg->res, (unsigned long)cfg->address);
+
+ }
+out:
+ mutex_unlock(&pci_mmcfg_lock);
+ return err;
+}
+
+struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
+{
+ struct pci_mmcfg_region *cfg;
+
+ list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
+ if (cfg->segment == segment &&
+ cfg->start_bus <= bus && bus <= cfg->end_bus)
+ return cfg;
+
+ return NULL;
+}
+
+int __init __weak acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
+ struct acpi_mcfg_allocation *cfg)
+{
+ return 0;
+}
+
+int __init pci_parse_mcfg(struct acpi_table_header *header)
+{
+ struct acpi_table_mcfg *mcfg;
+ struct acpi_mcfg_allocation *cfg_table, *cfg;
+ unsigned long i;
+ int entries;
+
+ if (!header)
+ return -EINVAL;
+
+ mcfg = (struct acpi_table_mcfg *)header;
+
+ /* how many config structures do we have */
+ free_all_mmcfg();
+ entries = 0;
+ i = header->length - sizeof(struct acpi_table_mcfg);
+ while (i >= sizeof(struct acpi_mcfg_allocation)) {
+ entries++;
+ i -= sizeof(struct acpi_mcfg_allocation);
+ }
+ if (entries == 0) {
+ pr_err(PREFIX "MMCONFIG has no entries\n");
+ return -ENODEV;
+ }
+
+ cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1];
+ for (i = 0; i < entries; i++) {
+ cfg = &cfg_table[i];
+ if (acpi_mcfg_check_entry(mcfg, cfg)) {
+ free_all_mmcfg();
+ return -ENODEV;
+ }
+
+ if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number,
+ cfg->end_bus_number, cfg->address) == NULL) {
+ pr_warn(PREFIX "no memory for MCFG entries\n");
+ free_all_mmcfg();
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+/* Delete MMCFG information for host bridges */
+int pci_mmconfig_delete(u16 seg, u8 start, u8 end)
+{
+ struct pci_mmcfg_region *cfg;
+
+ mutex_lock(&pci_mmcfg_lock);
+ list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
+ if (cfg->segment == seg && cfg->start_bus == start &&
+ cfg->end_bus == end) {
+ list_del_rcu(&cfg->list);
+ synchronize_rcu();
+ pci_mmcfg_arch_unmap(cfg);
+ if (cfg->res.parent)
+ release_resource(&cfg->res);
+ mutex_unlock(&pci_mmcfg_lock);
+ kfree(cfg);
+ return 0;
+ }
+ mutex_unlock(&pci_mmcfg_lock);
+
+ return -ENOENT;
+}
+
+void __init __weak pci_mmcfg_early_init(void)
+{
+
+}
+
+void __init __weak pci_mmcfg_late_init(void)
+{
+ struct pci_mmcfg_region *cfg;
+
+ acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
+
+ if (list_empty(&pci_mmcfg_list))
+ return;
+
+ if (!pci_mmcfg_arch_init())
+ free_all_mmcfg();
+
+ list_for_each_entry(cfg, &pci_mmcfg_list, list)
+ insert_resource(&iomem_resource, &cfg->res);
+}
diff --git a/include/linux/mmconfig.h b/include/linux/mmconfig.h
new file mode 100644
index 0000000..6ccd1ee
--- /dev/null
+++ b/include/linux/mmconfig.h
@@ -0,0 +1,58 @@
+#ifndef __MMCONFIG_H
+#define __MMCONFIG_H
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/acpi.h>
+
+#ifdef CONFIG_PCI_MMCONFIG
+/* "PCI MMCONFIG %04x [bus %02x-%02x]" */
+#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2)
+
+struct pci_mmcfg_region {
+ struct list_head list;
+ struct resource res;
+ u64 address;
+ char __iomem *virt;
+ u16 segment;
+ u8 start_bus;
+ u8 end_bus;
+ char name[PCI_MMCFG_RESOURCE_NAME_LEN];
+};
+
+void pci_mmcfg_early_init(void);
+void pci_mmcfg_late_init(void);
+struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
+
+int pci_parse_mcfg(struct acpi_table_header *header);
+struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
+ int end, u64 addr);
+int pci_mmconfig_inject(struct pci_mmcfg_region *cfg);
+struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start,
+ int end, u64 addr);
+void list_add_sorted(struct pci_mmcfg_region *new);
+int acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
+ struct acpi_mcfg_allocation *cfg);
+void free_all_mmcfg(void);
+int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
+ phys_addr_t addr);
+int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
+
+/* Arch specific calls */
+int pci_mmcfg_arch_init(void);
+void pci_mmcfg_arch_free(void);
+int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
+void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg);
+
+extern struct list_head pci_mmcfg_list;
+
+#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20)
+#else /* CONFIG_PCI_MMCONFIG */
+static inline void pci_mmcfg_late_init(void) { }
+static inline void pci_mmcfg_early_init(void) { }
+static inline void *pci_mmconfig_lookup(int segment, int bus)
+{ return NULL; }
+#endif /* CONFIG_PCI_MMCONFIG */
+
+#endif /* __KERNEL__ */
+#endif /* __MMCONFIG_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6afba72..0a8b82e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1658,14 +1658,6 @@ void pcibios_release_device(struct pci_dev *dev);
extern struct dev_pm_ops pcibios_pm_ops;
#endif

-#ifdef CONFIG_PCI_MMCONFIG
-void __init pci_mmcfg_early_init(void);
-void __init pci_mmcfg_late_init(void);
-#else
-static inline void pci_mmcfg_early_init(void) { }
-static inline void pci_mmcfg_late_init(void) { }
-#endif
-
int pci_ext_cfg_avail(void);

void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
--
1.9.1

2014-11-19 16:05:46

by Tomasz Nowicki

[permalink] [raw]
Subject: [PATCH 5/6] x86, acpi, pci: mmconfig_64.c becomes default implementation for arch agnostic low-level direct PCI config space accessors via MMCONFIG.

Note that x86 32bits machines still have its own low-level direct
PCI config space accessors.

Signed-off-by: Tomasz Nowicki <[email protected]>
---
arch/x86/pci/Makefile | 5 +-
arch/x86/pci/mmconfig_64.c | 152 ---------------------------------------------
drivers/acpi/mmconfig.c | 134 +++++++++++++++++++++++++++++++++++++++
3 files changed, 138 insertions(+), 153 deletions(-)
delete mode 100644 arch/x86/pci/mmconfig_64.c

diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 5c6fc35..35c765b 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -1,7 +1,10 @@
obj-y := i386.o init.o

obj-$(CONFIG_PCI_BIOS) += pcbios.o
-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o
+obj-$(CONFIG_PCI_MMCONFIG) += direct.o mmconfig-shared.o
+ifeq ($(BITS),32)
+obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_32.o
+endif
obj-$(CONFIG_PCI_DIRECT) += direct.o
obj-$(CONFIG_PCI_OLPC) += olpc.o
obj-$(CONFIG_PCI_XEN) += xen.o
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
deleted file mode 100644
index ff2c50c..0000000
--- a/arch/x86/pci/mmconfig_64.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * mmconfig.c - Low-level direct PCI config space access via MMCONFIG
- *
- * This is an 64bit optimized version that always keeps the full mmconfig
- * space mapped. This allows lockless config space operation.
- */
-
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-#include <linux/bitmap.h>
-#include <linux/rcupdate.h>
-#include <linux/mmconfig.h>
-#include <asm/e820.h>
-#include <asm/pci_x86.h>
-
-#define PREFIX "PCI: "
-
-static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
-{
- struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
-
- if (cfg && cfg->virt)
- return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12));
- return NULL;
-}
-
-int pci_mmcfg_read(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 *value)
-{
- char __iomem *addr;
-
- /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
- if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
-err: *value = -1;
- return -EINVAL;
- }
-
- rcu_read_lock();
- addr = pci_dev_base(seg, bus, devfn);
- if (!addr) {
- rcu_read_unlock();
- goto err;
- }
-
- switch (len) {
- case 1:
- *value = mmio_config_readb(addr + reg);
- break;
- case 2:
- *value = mmio_config_readw(addr + reg);
- break;
- case 4:
- *value = mmio_config_readl(addr + reg);
- break;
- }
- rcu_read_unlock();
-
- return 0;
-}
-
-int pci_mmcfg_write(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 value)
-{
- char __iomem *addr;
-
- /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
- if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095)))
- return -EINVAL;
-
- rcu_read_lock();
- addr = pci_dev_base(seg, bus, devfn);
- if (!addr) {
- rcu_read_unlock();
- return -EINVAL;
- }
-
- switch (len) {
- case 1:
- mmio_config_writeb(addr + reg, value);
- break;
- case 2:
- mmio_config_writew(addr + reg, value);
- break;
- case 4:
- mmio_config_writel(addr + reg, value);
- break;
- }
- rcu_read_unlock();
-
- return 0;
-}
-
-const struct pci_raw_ops pci_mmcfg = {
- .read = pci_mmcfg_read,
- .write = pci_mmcfg_write,
-};
-
-static void __iomem *mcfg_ioremap(struct pci_mmcfg_region *cfg)
-{
- void __iomem *addr;
- u64 start, size;
- int num_buses;
-
- start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus);
- num_buses = cfg->end_bus - cfg->start_bus + 1;
- size = PCI_MMCFG_BUS_OFFSET(num_buses);
- addr = ioremap_nocache(start, size);
- if (addr)
- addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus);
- return addr;
-}
-
-int __init pci_mmcfg_arch_init(void)
-{
- struct pci_mmcfg_region *cfg;
-
- list_for_each_entry(cfg, &pci_mmcfg_list, list)
- if (pci_mmcfg_arch_map(cfg)) {
- pci_mmcfg_arch_free();
- return 0;
- }
-
- return 1;
-}
-
-void __init pci_mmcfg_arch_free(void)
-{
- struct pci_mmcfg_region *cfg;
-
- list_for_each_entry(cfg, &pci_mmcfg_list, list)
- pci_mmcfg_arch_unmap(cfg);
-}
-
-int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
-{
- cfg->virt = mcfg_ioremap(cfg);
- if (!cfg->virt) {
- pr_err(PREFIX "can't map MMCONFIG at %pR\n", &cfg->res);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg)
-{
- if (cfg && cfg->virt) {
- iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus));
- cfg->virt = NULL;
- }
-}
diff --git a/drivers/acpi/mmconfig.c b/drivers/acpi/mmconfig.c
index d62dccda..c0ad05f 100644
--- a/drivers/acpi/mmconfig.c
+++ b/drivers/acpi/mmconfig.c
@@ -12,14 +12,148 @@

#include <linux/mutex.h>
#include <linux/rculist.h>
+#include <linux/pci.h>
#include <linux/mmconfig.h>

+#include <asm/pci.h>
+
#define PREFIX "PCI: "

static DEFINE_MUTEX(pci_mmcfg_lock);

LIST_HEAD(pci_mmcfg_list);

+static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus,
+ unsigned int devfn)
+{
+ struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
+
+ if (cfg && cfg->virt)
+ return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12));
+ return NULL;
+}
+
+int __weak pci_mmcfg_read(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 *value)
+{
+ char __iomem *addr;
+
+ /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
+ if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
+err: *value = -1;
+ return -EINVAL;
+ }
+
+ rcu_read_lock();
+ addr = pci_dev_base(seg, bus, devfn);
+ if (!addr) {
+ rcu_read_unlock();
+ goto err;
+ }
+
+ switch (len) {
+ case 1:
+ *value = mmio_config_readb(addr + reg);
+ break;
+ case 2:
+ *value = mmio_config_readw(addr + reg);
+ break;
+ case 4:
+ *value = mmio_config_readl(addr + reg);
+ break;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+int __weak pci_mmcfg_write(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 value)
+{
+ char __iomem *addr;
+
+ /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
+ if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095)))
+ return -EINVAL;
+
+ rcu_read_lock();
+ addr = pci_dev_base(seg, bus, devfn);
+ if (!addr) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+ switch (len) {
+ case 1:
+ mmio_config_writeb(addr + reg, value);
+ break;
+ case 2:
+ mmio_config_writew(addr + reg, value);
+ break;
+ case 4:
+ mmio_config_writel(addr + reg, value);
+ break;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+static void __iomem *mcfg_ioremap(struct pci_mmcfg_region *cfg)
+{
+ void __iomem *addr;
+ u64 start, size;
+ int num_buses;
+
+ start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus);
+ num_buses = cfg->end_bus - cfg->start_bus + 1;
+ size = PCI_MMCFG_BUS_OFFSET(num_buses);
+ addr = ioremap_nocache(start, size);
+ if (addr)
+ addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus);
+ return addr;
+}
+
+int __init __weak pci_mmcfg_arch_init(void)
+{
+ struct pci_mmcfg_region *cfg;
+
+ list_for_each_entry(cfg, &pci_mmcfg_list, list)
+ if (pci_mmcfg_arch_map(cfg)) {
+ pci_mmcfg_arch_free();
+ return 0;
+ }
+
+ return 1;
+}
+
+void __init __weak pci_mmcfg_arch_free(void)
+{
+ struct pci_mmcfg_region *cfg;
+
+ list_for_each_entry(cfg, &pci_mmcfg_list, list)
+ pci_mmcfg_arch_unmap(cfg);
+}
+
+int __weak pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
+{
+ cfg->virt = mcfg_ioremap(cfg);
+ if (!cfg->virt) {
+ pr_err(PREFIX "can't map MMCONFIG at %pR\n", &cfg->res);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void __weak pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg)
+{
+ if (cfg && cfg->virt) {
+ iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus));
+ cfg->virt = NULL;
+ }
+}
+
static void __init pci_mmconfig_remove(struct pci_mmcfg_region *cfg)
{
if (cfg->res.parent)
--
1.9.1

2014-11-19 16:06:03

by Tomasz Nowicki

[permalink] [raw]
Subject: [PATCH 6/6] pci, acpi: Share ACPI PCI config space accessors.

MMCFG can be used perfectly for all architectures which support ACPI.
ACPI mandates MMCFG to describe PCI config space ranges which means
we should use MMCONFIG accessors by default.

Signed-off-by: Tomasz Nowicki <[email protected]>
Tested-by: Hanjun Guo <[email protected]>
---
drivers/acpi/mmconfig.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)

diff --git a/drivers/acpi/mmconfig.c b/drivers/acpi/mmconfig.c
index c0ad05f..c9c6e05 100644
--- a/drivers/acpi/mmconfig.c
+++ b/drivers/acpi/mmconfig.c
@@ -23,6 +23,26 @@ static DEFINE_MUTEX(pci_mmcfg_lock);

LIST_HEAD(pci_mmcfg_list);

+/*
+ * raw_pci_read/write - ACPI PCI config space accessors.
+ *
+ * ACPI spec defines MMCFG as the way we can access PCI config space,
+ * so let MMCFG be default (__weak).
+ *
+ * If platform needs more fancy stuff, should provides its own implementation.
+ */
+int __weak raw_pci_read(unsigned int domain, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 *val)
+{
+ return pci_mmcfg_read(domain, bus, devfn, reg, len, val);
+}
+
+int __weak raw_pci_write(unsigned int domain, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 val)
+{
+ return pci_mmcfg_write(domain, bus, devfn, reg, len, val);
+}
+
static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus,
unsigned int devfn)
{
--
1.9.1

2014-11-19 16:06:46

by Tomasz Nowicki

[permalink] [raw]
Subject: [PATCH 4/6] x86, acpi, pci: mmconfig_{32,64}.c code refactoring - remove code duplication.

mmconfig_64.c version is going to be default implementation for arch
agnostic low-level direct PCI config space accessors via MMCONFIG.
However, now it initialize raw_pci_ext_ops pointer which is used in
x86 specific code only. Moreover, mmconfig_32.c is doing the same thing
at the same time.

Move it to mmconfig_shared.c so it becomes common for both and
mmconfig_64.c turns out to be purely arch agnostic.

Signed-off-by: Tomasz Nowicki <[email protected]>
Tested-by: Hanjun Guo <[email protected]>
---
arch/x86/pci/mmconfig-shared.c | 10 ++++++++--
arch/x86/pci/mmconfig_32.c | 10 ++--------
arch/x86/pci/mmconfig_64.c | 6 ++----
include/linux/mmconfig.h | 4 ++++
4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index b397544..e42004c 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -29,6 +29,11 @@
static bool pci_mmcfg_running_state;
static bool pci_mmcfg_arch_init_failed;

+const struct pci_raw_ops pci_mmcfg = {
+ .read = pci_mmcfg_read,
+ .write = pci_mmcfg_write,
+};
+
static const char *__init pci_mmcfg_e7520(void)
{
u32 win;
@@ -486,9 +491,10 @@ static void __init __pci_mmcfg_init(int early)
}
}

- if (pci_mmcfg_arch_init())
+ if (pci_mmcfg_arch_init()) {
+ raw_pci_ext_ops = &pci_mmcfg;
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
- else {
+ } else {
free_all_mmcfg();
pci_mmcfg_arch_init_failed = true;
}
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index d774672..c0106a6 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -50,7 +50,7 @@ static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn)
}
}

-static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
+int pci_mmcfg_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
unsigned long flags;
@@ -89,7 +89,7 @@ err: *value = -1;
return 0;
}

-static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
+int pci_mmcfg_write(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 value)
{
unsigned long flags;
@@ -126,15 +126,9 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
return 0;
}

-const struct pci_raw_ops pci_mmcfg = {
- .read = pci_mmcfg_read,
- .write = pci_mmcfg_write,
-};
-
int __init pci_mmcfg_arch_init(void)
{
printk(KERN_INFO "PCI: Using MMCONFIG for extended config space\n");
- raw_pci_ext_ops = &pci_mmcfg;
return 1;
}

diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index 1209596..ff2c50c 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -25,7 +25,7 @@ static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned i
return NULL;
}

-static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
+int pci_mmcfg_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
char __iomem *addr;
@@ -59,7 +59,7 @@ err: *value = -1;
return 0;
}

-static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
+int pci_mmcfg_write(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 value)
{
char __iomem *addr;
@@ -121,8 +121,6 @@ int __init pci_mmcfg_arch_init(void)
return 0;
}

- raw_pci_ext_ops = &pci_mmcfg;
-
return 1;
}

diff --git a/include/linux/mmconfig.h b/include/linux/mmconfig.h
index 6ccd1ee..ae8ec83 100644
--- a/include/linux/mmconfig.h
+++ b/include/linux/mmconfig.h
@@ -43,6 +43,10 @@ int pci_mmcfg_arch_init(void);
void pci_mmcfg_arch_free(void);
int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg);
+int pci_mmcfg_read(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 *value);
+int pci_mmcfg_write(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 value);

extern struct list_head pci_mmcfg_list;

--
1.9.1

2014-11-19 16:07:16

by Tomasz Nowicki

[permalink] [raw]
Subject: [PATCH 3/6] x86, acpi, pci: Move PCI config space accessors.

We are going to use mmio_config_{} name convention across all architectures.
Currently it belongs to asm/pci_x86.h header which should be included
only for x86 specific files. From now on, those accessors are in asm/pci.h
header which can be included in non-architecture code much easier.

Signed-off-by: Tomasz Nowicki <[email protected]>
Tested-by: Hanjun Guo <[email protected]>
---
arch/x86/include/asm/pci.h | 42 +++++++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/pci_x86.h | 43 ------------------------------------------
2 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 0892ea0..5ba3720 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -71,6 +71,48 @@ void pcibios_set_master(struct pci_dev *dev);
struct irq_routing_table *pcibios_get_irq_routing_table(void);
int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);

+/*
+ * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
+ * on their northbrige except through the * %eax register. As such, you MUST
+ * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
+ * accessor functions.
+ * In fact just use pci_config_*, nothing else please.
+ */
+static inline unsigned char mmio_config_readb(void __iomem *pos)
+{
+ u8 val;
+ asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
+ return val;
+}
+
+static inline unsigned short mmio_config_readw(void __iomem *pos)
+{
+ u16 val;
+ asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
+ return val;
+}
+
+static inline unsigned int mmio_config_readl(void __iomem *pos)
+{
+ u32 val;
+ asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
+ return val;
+}
+
+static inline void mmio_config_writeb(void __iomem *pos, u8 val)
+{
+ asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
+}
+
+static inline void mmio_config_writew(void __iomem *pos, u16 val)
+{
+ asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
+}
+
+static inline void mmio_config_writel(void __iomem *pos, u32 val)
+{
+ asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
+}

#define HAVE_PCI_MMAP
extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index caba141..42e7332 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -121,49 +121,6 @@ extern int __init pcibios_init(void);
extern int pci_legacy_init(void);
extern void pcibios_fixup_irqs(void);

-/*
- * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
- * on their northbrige except through the * %eax register. As such, you MUST
- * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
- * accessor functions.
- * In fact just use pci_config_*, nothing else please.
- */
-static inline unsigned char mmio_config_readb(void __iomem *pos)
-{
- u8 val;
- asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
- return val;
-}
-
-static inline unsigned short mmio_config_readw(void __iomem *pos)
-{
- u16 val;
- asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
- return val;
-}
-
-static inline unsigned int mmio_config_readl(void __iomem *pos)
-{
- u32 val;
- asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
- return val;
-}
-
-static inline void mmio_config_writeb(void __iomem *pos, u8 val)
-{
- asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
-}
-
-static inline void mmio_config_writew(void __iomem *pos, u16 val)
-{
- asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
-}
-
-static inline void mmio_config_writel(void __iomem *pos, u32 val)
-{
- asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
-}
-
#ifdef CONFIG_PCI
# ifdef CONFIG_ACPI
# define x86_default_pci_init pci_acpi_init
--
1.9.1

2014-11-19 16:08:42

by Tomasz Nowicki

[permalink] [raw]
Subject: [PATCH 1/6] x86, acpi, pci: Reorder logic of pci_mmconfig_insert() function

This patch is the first step of MMCONFIG refactoring process.

Code that uses pci_mmcfg_lock will be moved to common file and become
accessible for all architectures. pci_mmconfig_insert() cannot be moved
so easily since it is mixing generic mmcfg code with x86 specific logic
inside of mutual exclusive block guarded by pci_mmcfg_lock.

To get rid of that constraint we reorder actions as fallow:
1. mmconfig entry allocation can be done at first, does not need lock
2. insertion to iomem_resource has its own lock, no need to wrap it into mutex
3. insertion to mmconfig list can be done as the final stage in separate
function (candidate for further factoring)

Signed-off-by: Tomasz Nowicki <[email protected]>
Tested-by: Hanjun Guo <[email protected]>
---
arch/x86/pci/mmconfig-shared.c | 100 ++++++++++++++++++++++-------------------
1 file changed, 54 insertions(+), 46 deletions(-)

diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 326198a..ac24e1c 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -692,6 +692,39 @@ static int __init pci_mmcfg_late_insert_resources(void)
*/
late_initcall(pci_mmcfg_late_insert_resources);

+static int __init pci_mmconfig_inject(struct pci_mmcfg_region *cfg)
+{
+ struct pci_mmcfg_region *cfg_conflict;
+ int err = 0;
+
+ mutex_lock(&pci_mmcfg_lock);
+ cfg_conflict = pci_mmconfig_lookup(cfg->segment, cfg->start_bus);
+ if (cfg_conflict) {
+ if (cfg_conflict->end_bus < cfg->end_bus)
+ pr_info(FW_INFO "MMCONFIG for "
+ "domain %04x [bus %02x-%02x] "
+ "only partially covers this bridge\n",
+ cfg_conflict->segment, cfg_conflict->start_bus,
+ cfg_conflict->end_bus);
+ err = -EEXIST;
+ goto out;
+ }
+
+ if (pci_mmcfg_arch_map(cfg)) {
+ pr_warn("fail to map MMCONFIG %pR.\n", &cfg->res);
+ err = -ENOMEM;
+ goto out;
+ } else {
+ list_add_sorted(cfg);
+ pr_info("MMCONFIG at %pR (base %#lx)\n",
+ &cfg->res, (unsigned long)cfg->address);
+
+ }
+out:
+ mutex_unlock(&pci_mmcfg_lock);
+ return err;
+}
+
/* Add MMCFG information for host bridges */
int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
phys_addr_t addr)
@@ -703,66 +736,41 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed)
return -ENODEV;

- if (start > end)
+ if (start > end || !addr)
return -EINVAL;

- mutex_lock(&pci_mmcfg_lock);
- cfg = pci_mmconfig_lookup(seg, start);
- if (cfg) {
- if (cfg->end_bus < end)
- dev_info(dev, FW_INFO
- "MMCONFIG for "
- "domain %04x [bus %02x-%02x] "
- "only partially covers this bridge\n",
- cfg->segment, cfg->start_bus, cfg->end_bus);
- mutex_unlock(&pci_mmcfg_lock);
- return -EEXIST;
- }
-
- if (!addr) {
- mutex_unlock(&pci_mmcfg_lock);
- return -EINVAL;
- }
-
rc = -EBUSY;
cfg = pci_mmconfig_alloc(seg, start, end, addr);
if (cfg == NULL) {
dev_warn(dev, "fail to add MMCONFIG (out of memory)\n");
- rc = -ENOMEM;
+ return -ENOMEM;
} else if (!pci_mmcfg_check_reserved(dev, cfg, 0)) {
dev_warn(dev, FW_BUG "MMCONFIG %pR isn't reserved\n",
&cfg->res);
- } else {
- /* Insert resource if it's not in boot stage */
- if (pci_mmcfg_running_state)
- tmp = insert_resource_conflict(&iomem_resource,
- &cfg->res);
-
- if (tmp) {
- dev_warn(dev,
- "MMCONFIG %pR conflicts with "
- "%s %pR\n",
- &cfg->res, tmp->name, tmp);
- } else if (pci_mmcfg_arch_map(cfg)) {
- dev_warn(dev, "fail to map MMCONFIG %pR.\n",
- &cfg->res);
- } else {
- list_add_sorted(cfg);
- dev_info(dev, "MMCONFIG at %pR (base %#lx)\n",
- &cfg->res, (unsigned long)addr);
- cfg = NULL;
- rc = 0;
- }
+ goto error;
}

- if (cfg) {
- if (cfg->res.parent)
- release_resource(&cfg->res);
- kfree(cfg);
+ /* Insert resource if it's not in boot stage */
+ if (pci_mmcfg_running_state)
+ tmp = insert_resource_conflict(&iomem_resource, &cfg->res);
+
+ if (tmp) {
+ dev_warn(dev,
+ "MMCONFIG %pR conflicts with %s %pR\n",
+ &cfg->res, tmp->name, tmp);
+ goto error;
}

- mutex_unlock(&pci_mmcfg_lock);
+ rc = pci_mmconfig_inject(cfg);
+ if (rc)
+ goto error;
+
+ return 0;

+error:
+ if (cfg->res.parent)
+ release_resource(&cfg->res);
+ kfree(cfg);
return rc;
}

--
1.9.1

2014-11-19 16:20:58

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH 6/6] pci, acpi: Share ACPI PCI config space accessors.

On Wednesday 19 November 2014 17:04:51 Tomasz Nowicki wrote:
> +/*
> + * raw_pci_read/write - ACPI PCI config space accessors.
> + *
> + * ACPI spec defines MMCFG as the way we can access PCI config space,
> + * so let MMCFG be default (__weak).
> + *
> + * If platform needs more fancy stuff, should provides its own implementation.
> + */
> +int __weak raw_pci_read(unsigned int domain, unsigned int bus,
> + unsigned int devfn, int reg, int len, u32 *val)
> +{
> + return pci_mmcfg_read(domain, bus, devfn, reg, len, val);
> +}
> +
> +int __weak raw_pci_write(unsigned int domain, unsigned int bus,
> + unsigned int devfn, int reg, int len, u32 val)
> +{
> + return pci_mmcfg_write(domain, bus, devfn, reg, len, val);
> +}
> +
>

I think it would be better to avoid __weak functions here, as they tend
to be hard to follow when trying to understand the code.

How about using a Kconfig symbol like this:

#ifdef CONFIG_ARCH_RAW_PCI_READWRITE
int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 *val);
int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 val);
#else
static inline int raw_pci_read(unsigned int domain, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *val)
{
return pci_mmcfg_read(domain, bus, devfn, reg, len, val);
}

static inline int raw_pci_write(unsigned int domain, unsigned int bus,
unsigned int devfn, int reg, int len, u32 val)
{
return pci_mmcfg_write(domain, bus, devfn, reg, len, val);
}
#endif

Same thing for the weak symbols in patch 5.

Arnd

2014-11-19 16:24:30

by Tomasz Nowicki

[permalink] [raw]
Subject: Re: [PATCH 6/6] pci, acpi: Share ACPI PCI config space accessors.

On 19.11.2014 17:19, Arnd Bergmann wrote:
> On Wednesday 19 November 2014 17:04:51 Tomasz Nowicki wrote:
>> +/*
>> + * raw_pci_read/write - ACPI PCI config space accessors.
>> + *
>> + * ACPI spec defines MMCFG as the way we can access PCI config space,
>> + * so let MMCFG be default (__weak).
>> + *
>> + * If platform needs more fancy stuff, should provides its own implementation.
>> + */
>> +int __weak raw_pci_read(unsigned int domain, unsigned int bus,
>> + unsigned int devfn, int reg, int len, u32 *val)
>> +{
>> + return pci_mmcfg_read(domain, bus, devfn, reg, len, val);
>> +}
>> +
>> +int __weak raw_pci_write(unsigned int domain, unsigned int bus,
>> + unsigned int devfn, int reg, int len, u32 val)
>> +{
>> + return pci_mmcfg_write(domain, bus, devfn, reg, len, val);
>> +}
>> +
>>
>
> I think it would be better to avoid __weak functions here, as they tend
> to be hard to follow when trying to understand the code.
>
> How about using a Kconfig symbol like this:
>
> #ifdef CONFIG_ARCH_RAW_PCI_READWRITE
> int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
> int reg, int len, u32 *val);
> int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
> int reg, int len, u32 val);
> #else
> static inline int raw_pci_read(unsigned int domain, unsigned int bus,
> unsigned int devfn, int reg, int len, u32 *val)
> {
> return pci_mmcfg_read(domain, bus, devfn, reg, len, val);
> }
>
> static inline int raw_pci_write(unsigned int domain, unsigned int bus,
> unsigned int devfn, int reg, int len, u32 val)
> {
> return pci_mmcfg_write(domain, bus, devfn, reg, len, val);
> }
> #endif
>
> Same thing for the weak symbols in patch 5.
>

It makes sense to me, thanks!

Tomasz

2014-11-20 22:26:57

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH 6/6] pci, acpi: Share ACPI PCI config space accessors.

On Wed, Nov 19, 2014 at 05:19:20PM +0100, Arnd Bergmann wrote:
> On Wednesday 19 November 2014 17:04:51 Tomasz Nowicki wrote:
> > +/*
> > + * raw_pci_read/write - ACPI PCI config space accessors.
> > + *
> > + * ACPI spec defines MMCFG as the way we can access PCI config space,
> > + * so let MMCFG be default (__weak).
> > + *
> > + * If platform needs more fancy stuff, should provides its own implementation.
> > + */
> > +int __weak raw_pci_read(unsigned int domain, unsigned int bus,
> > + unsigned int devfn, int reg, int len, u32 *val)
> > +{
> > + return pci_mmcfg_read(domain, bus, devfn, reg, len, val);
> > +}
> > +
> > +int __weak raw_pci_write(unsigned int domain, unsigned int bus,
> > + unsigned int devfn, int reg, int len, u32 val)
> > +{
> > + return pci_mmcfg_write(domain, bus, devfn, reg, len, val);
> > +}
> > +
> >
>
> I think it would be better to avoid __weak functions here, as they tend
> to be hard to follow when trying to understand the code.

That's interesting. I would have said exactly the opposite -- I think the
extra Kconfiggery is harder to follow than weak/strong functions :)

But consistency is better than my personal opinion. Is there a consensus
that we should use the Kconfig strategy instead of __weak?

> How about using a Kconfig symbol like this:
>
> #ifdef CONFIG_ARCH_RAW_PCI_READWRITE
> int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
> int reg, int len, u32 *val);
> int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
> int reg, int len, u32 val);
> #else
> static inline int raw_pci_read(unsigned int domain, unsigned int bus,
> unsigned int devfn, int reg, int len, u32 *val)
> {
> return pci_mmcfg_read(domain, bus, devfn, reg, len, val);
> }
>
> static inline int raw_pci_write(unsigned int domain, unsigned int bus,
> unsigned int devfn, int reg, int len, u32 val)
> {
> return pci_mmcfg_write(domain, bus, devfn, reg, len, val);
> }
> #endif

2014-11-21 04:00:23

by Myron Stowe

[permalink] [raw]
Subject: Re: [PATCH 6/6] pci, acpi: Share ACPI PCI config space accessors.

On Thu, Nov 20, 2014 at 3:26 PM, Bjorn Helgaas <[email protected]> wrote:
> On Wed, Nov 19, 2014 at 05:19:20PM +0100, Arnd Bergmann wrote:
>> On Wednesday 19 November 2014 17:04:51 Tomasz Nowicki wrote:
>> > +/*
>> > + * raw_pci_read/write - ACPI PCI config space accessors.
>> > + *
>> > + * ACPI spec defines MMCFG as the way we can access PCI config space,
>> > + * so let MMCFG be default (__weak).
>> > + *
>> > + * If platform needs more fancy stuff, should provides its own implementation.
>> > + */
>> > +int __weak raw_pci_read(unsigned int domain, unsigned int bus,
>> > + unsigned int devfn, int reg, int len, u32 *val)
>> > +{
>> > + return pci_mmcfg_read(domain, bus, devfn, reg, len, val);
>> > +}
>> > +
>> > +int __weak raw_pci_write(unsigned int domain, unsigned int bus,
>> > + unsigned int devfn, int reg, int len, u32 val)
>> > +{
>> > + return pci_mmcfg_write(domain, bus, devfn, reg, len, val);
>> > +}
>> > +
>> >
>>
>> I think it would be better to avoid __weak functions here, as they tend
>> to be hard to follow when trying to understand the code.
>
> That's interesting. I would have said exactly the opposite -- I think the
> extra Kconfiggery is harder to follow than weak/strong functions :)
>
> But consistency is better than my personal opinion. Is there a consensus
> that we should use the Kconfig strategy instead of __weak?

I too find weak/strong functions easier to follow than "Kconfiggery" (nice term
invention there).

>
>> How about using a Kconfig symbol like this:
>>
>> #ifdef CONFIG_ARCH_RAW_PCI_READWRITE
>> int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
>> int reg, int len, u32 *val);
>> int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
>> int reg, int len, u32 val);
>> #else
>> static inline int raw_pci_read(unsigned int domain, unsigned int bus,
>> unsigned int devfn, int reg, int len, u32 *val)
>> {
>> return pci_mmcfg_read(domain, bus, devfn, reg, len, val);
>> }
>>
>> static inline int raw_pci_write(unsigned int domain, unsigned int bus,
>> unsigned int devfn, int reg, int len, u32 val)
>> {
>> return pci_mmcfg_write(domain, bus, devfn, reg, len, val);
>> }
>> #endif
> --
> To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2014-11-21 12:26:30

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH 6/6] pci, acpi: Share ACPI PCI config space accessors.

On Thursday 20 November 2014 21:00:17 Myron Stowe wrote:
> On Thu, Nov 20, 2014 at 3:26 PM, Bjorn Helgaas <[email protected]> wrote:
> > On Wed, Nov 19, 2014 at 05:19:20PM +0100, Arnd Bergmann wrote:
> >> On Wednesday 19 November 2014 17:04:51 Tomasz Nowicki wrote:
> >> > +/*
> >> > + * raw_pci_read/write - ACPI PCI config space accessors.
> >> > + *
> >> > + * ACPI spec defines MMCFG as the way we can access PCI config space,
> >> > + * so let MMCFG be default (__weak).
> >> > + *
> >> > + * If platform needs more fancy stuff, should provides its own implementation.
> >> > + */
> >> > +int __weak raw_pci_read(unsigned int domain, unsigned int bus,
> >> > + unsigned int devfn, int reg, int len, u32 *val)
> >> > +{
> >> > + return pci_mmcfg_read(domain, bus, devfn, reg, len, val);
> >> > +}
> >> > +
> >> > +int __weak raw_pci_write(unsigned int domain, unsigned int bus,
> >> > + unsigned int devfn, int reg, int len, u32 val)
> >> > +{
> >> > + return pci_mmcfg_write(domain, bus, devfn, reg, len, val);
> >> > +}
> >> > +
> >> >
> >>
> >> I think it would be better to avoid __weak functions here, as they tend
> >> to be hard to follow when trying to understand the code.
> >
> > That's interesting. I would have said exactly the opposite -- I think the
> > extra Kconfiggery is harder to follow than weak/strong functions
> >
> > But consistency is better than my personal opinion. Is there a consensus
> > that we should use the Kconfig strategy instead of __weak?
>
> I too find weak/strong functions easier to follow than "Kconfiggery" (nice term
> invention there).

I don't think there is a universal consensus, but the majority of
maintainers seems to avoid them for the same reasons that I think
__weak is problematic.

We have some uses of __weak in the core kernel, but there is
basically none in drivers outside of PCI, and the most common
uses are all providing an empty __weak function that can be
overridden with a function that actually does something, unlike
the code above.

My pragmatic approach so far has been to advocate __weak for
drivers/pci patches but discourage it elsewhere when I review
patches, in order to maintain consistency. I also think it
would be nice to change the way that PCI handles architecture
specific overrides in the process of unifying the host bridge
handling.

I wouldn't use Kconfig symbols in most cases though. My preferred
choice would be to turn a lot of the __weak symbols into function
pointers within a per-hostbridge structure. As an example, we could
replace pcibios_add_device() with a pointer in pci_host_bridge->ops
that gets set by all the architectures and host drivers that currently
override it, and replace the one caller with

if (pci_host_bridge->ops->add_device)
pci_host_bridge->ops->add_device(dev);

Arnd

2014-11-21 18:08:31

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH 6/6] pci, acpi: Share ACPI PCI config space accessors.

On Fri, Nov 21, 2014 at 01:24:52PM +0100, Arnd Bergmann wrote:
> On Thursday 20 November 2014 21:00:17 Myron Stowe wrote:
> > On Thu, Nov 20, 2014 at 3:26 PM, Bjorn Helgaas <[email protected]> wrote:
> > > On Wed, Nov 19, 2014 at 05:19:20PM +0100, Arnd Bergmann wrote:
> > >> On Wednesday 19 November 2014 17:04:51 Tomasz Nowicki wrote:
> > >> > +/*
> > >> > + * raw_pci_read/write - ACPI PCI config space accessors.
> > >> > + *
> > >> > + * ACPI spec defines MMCFG as the way we can access PCI config space,
> > >> > + * so let MMCFG be default (__weak).
> > >> > + *
> > >> > + * If platform needs more fancy stuff, should provides its own implementation.
> > >> > + */
> > >> > +int __weak raw_pci_read(unsigned int domain, unsigned int bus,
> > >> > + unsigned int devfn, int reg, int len, u32 *val)
> > >> > +{
> > >> > + return pci_mmcfg_read(domain, bus, devfn, reg, len, val);
> > >> > +}
> > >> > +
> > >> > +int __weak raw_pci_write(unsigned int domain, unsigned int bus,
> > >> > + unsigned int devfn, int reg, int len, u32 val)
> > >> > +{
> > >> > + return pci_mmcfg_write(domain, bus, devfn, reg, len, val);
> > >> > +}
> > >> > +
> > >> >
> > >>
> > >> I think it would be better to avoid __weak functions here, as they tend
> > >> to be hard to follow when trying to understand the code.
> > >
> > > That's interesting. I would have said exactly the opposite -- I think the
> > > extra Kconfiggery is harder to follow than weak/strong functions
> > >
> > > But consistency is better than my personal opinion. Is there a consensus
> > > that we should use the Kconfig strategy instead of __weak?
> >
> > I too find weak/strong functions easier to follow than "Kconfiggery" (nice term
> > invention there).
>
> I don't think there is a universal consensus, but the majority of
> maintainers seems to avoid them for the same reasons that I think
> __weak is problematic.
>
> We have some uses of __weak in the core kernel, but there is
> basically none in drivers outside of PCI, and the most common
> uses are all providing an empty __weak function that can be
> overridden with a function that actually does something, unlike
> the code above.

One thing I like better about __weak (when used correctly) is that you have
exactly one declaration, and the role of each definition (weak default
implementation or strong override) is obvious from looking at it.

In your #ifdef example, the extern declaration and the inline definition
are never compiled together, so you have to repeat the signature and the
compiler doesn't enforce that they match. So you end up with the extern
and the inline in one file, a #define in an arch header file or Kconfig,
and an arch definition in a third file.

But it's certainly true that everybody knows how #ifdef works, and the fact
that __weak on a declaration affects all in-scope definitions is definitely
a land mine (multiple weak definitions with no strong one is a disaster).

> My pragmatic approach so far has been to advocate __weak for
> drivers/pci patches but discourage it elsewhere when I review
> patches, in order to maintain consistency. I also think it
> would be nice to change the way that PCI handles architecture
> specific overrides in the process of unifying the host bridge
> handling.
>
> I wouldn't use Kconfig symbols in most cases though. My preferred
> choice would be to turn a lot of the __weak symbols into function
> pointers within a per-hostbridge structure. As an example, we could
> replace pcibios_add_device() with a pointer in pci_host_bridge->ops
> that gets set by all the architectures and host drivers that currently
> override it, and replace the one caller with
>
> if (pci_host_bridge->ops->add_device)
> pci_host_bridge->ops->add_device(dev);

I definitely agree with this part, but I think it's orthogonal to the
__weak question. In this case, we'd like to support multiple host bridges,
each with a different flavor of add_device(). We can't do that at all with
either __weak or #ifdef.

Bjorn

2014-11-24 10:43:44

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH 6/6] pci, acpi: Share ACPI PCI config space accessors.

On Friday 21 November 2014 11:08:25 Bjorn Helgaas wrote:
> On Fri, Nov 21, 2014 at 01:24:52PM +0100, Arnd Bergmann wrote:
> > On Thursday 20 November 2014 21:00:17 Myron Stowe wrote:
> > > On Thu, Nov 20, 2014 at 3:26 PM, Bjorn Helgaas <[email protected]> wrote:
>
> > > > That's interesting. I would have said exactly the opposite -- I think the
> > > > extra Kconfiggery is harder to follow than weak/strong functions
> > > >
> > > > But consistency is better than my personal opinion. Is there a consensus
> > > > that we should use the Kconfig strategy instead of __weak?
> > >
> > > I too find weak/strong functions easier to follow than "Kconfiggery" (nice term
> > > invention there).
> >
> > I don't think there is a universal consensus, but the majority of
> > maintainers seems to avoid them for the same reasons that I think
> > __weak is problematic.
> >
> > We have some uses of __weak in the core kernel, but there is
> > basically none in drivers outside of PCI, and the most common
> > uses are all providing an empty __weak function that can be
> > overridden with a function that actually does something, unlike
> > the code above.
>
> One thing I like better about __weak (when used correctly) is that you have
> exactly one declaration, and the role of each definition (weak default
> implementation or strong override) is obvious from looking at it.

Right.

> In your #ifdef example, the extern declaration and the inline definition
> are never compiled together, so you have to repeat the signature and the
> compiler doesn't enforce that they match. So you end up with the extern
> and the inline in one file, a #define in an arch header file or Kconfig,
> and an arch definition in a third file.
>
> But it's certainly true that everybody knows how #ifdef works, and the fact
> that __weak on a declaration affects all in-scope definitions is definitely
> a land mine (multiple weak definitions with no strong one is a disaster).
>
> > My pragmatic approach so far has been to advocate __weak for
> > drivers/pci patches but discourage it elsewhere when I review
> > patches, in order to maintain consistency. I also think it
> > would be nice to change the way that PCI handles architecture
> > specific overrides in the process of unifying the host bridge
> > handling.
> >
> > I wouldn't use Kconfig symbols in most cases though. My preferred
> > choice would be to turn a lot of the __weak symbols into function
> > pointers within a per-hostbridge structure. As an example, we could
> > replace pcibios_add_device() with a pointer in pci_host_bridge->ops
> > that gets set by all the architectures and host drivers that currently
> > override it, and replace the one caller with
> >
> > if (pci_host_bridge->ops->add_device)
> > pci_host_bridge->ops->add_device(dev);
>
> I definitely agree with this part, but I think it's orthogonal to the
> __weak question. In this case, we'd like to support multiple host bridges,
> each with a different flavor of add_device(). We can't do that at all with
> either __weak or #ifdef.

What we currently have though is a a __weak definition of add_device,
which some architectures override, and some of them (ARM in particular)
by implementing their own abstraction. I suspect for the majority of
what we currently define as __weak functions, we could use a similar
approach and kill off the global symbols entirely.

Arnd

2014-12-08 07:14:08

by Tomasz Nowicki

[permalink] [raw]
Subject: Re: [PATCH 6/6] pci, acpi: Share ACPI PCI config space accessors.

W dniu 24.11.2014 o 11:41, Arnd Bergmann pisze:
> On Friday 21 November 2014 11:08:25 Bjorn Helgaas wrote:
>> On Fri, Nov 21, 2014 at 01:24:52PM +0100, Arnd Bergmann wrote:
>>> On Thursday 20 November 2014 21:00:17 Myron Stowe wrote:
>>>> On Thu, Nov 20, 2014 at 3:26 PM, Bjorn Helgaas <[email protected]> wrote:
>>
>>>>> That's interesting. I would have said exactly the opposite -- I think the
>>>>> extra Kconfiggery is harder to follow than weak/strong functions
>>>>>
>>>>> But consistency is better than my personal opinion. Is there a consensus
>>>>> that we should use the Kconfig strategy instead of __weak?
>>>>
>>>> I too find weak/strong functions easier to follow than "Kconfiggery" (nice term
>>>> invention there).
>>>
>>> I don't think there is a universal consensus, but the majority of
>>> maintainers seems to avoid them for the same reasons that I think
>>> __weak is problematic.
>>>
>>> We have some uses of __weak in the core kernel, but there is
>>> basically none in drivers outside of PCI, and the most common
>>> uses are all providing an empty __weak function that can be
>>> overridden with a function that actually does something, unlike
>>> the code above.
>>
>> One thing I like better about __weak (when used correctly) is that you have
>> exactly one declaration, and the role of each definition (weak default
>> implementation or strong override) is obvious from looking at it.
>
> Right.
>
>> In your #ifdef example, the extern declaration and the inline definition
>> are never compiled together, so you have to repeat the signature and the
>> compiler doesn't enforce that they match. So you end up with the extern
>> and the inline in one file, a #define in an arch header file or Kconfig,
>> and an arch definition in a third file.
>>
>> But it's certainly true that everybody knows how #ifdef works, and the fact
>> that __weak on a declaration affects all in-scope definitions is definitely
>> a land mine (multiple weak definitions with no strong one is a disaster).
>>
>>> My pragmatic approach so far has been to advocate __weak for
>>> drivers/pci patches but discourage it elsewhere when I review
>>> patches, in order to maintain consistency. I also think it
>>> would be nice to change the way that PCI handles architecture
>>> specific overrides in the process of unifying the host bridge
>>> handling.
>>>
>>> I wouldn't use Kconfig symbols in most cases though. My preferred
>>> choice would be to turn a lot of the __weak symbols into function
>>> pointers within a per-hostbridge structure. As an example, we could
>>> replace pcibios_add_device() with a pointer in pci_host_bridge->ops
>>> that gets set by all the architectures and host drivers that currently
>>> override it, and replace the one caller with
>>>
>>> if (pci_host_bridge->ops->add_device)
>>> pci_host_bridge->ops->add_device(dev);
>>
>> I definitely agree with this part, but I think it's orthogonal to the
>> __weak question. In this case, we'd like to support multiple host bridges,
>> each with a different flavor of add_device(). We can't do that at all with
>> either __weak or #ifdef.
>
> What we currently have though is a a __weak definition of add_device,
> which some architectures override, and some of them (ARM in particular)
> by implementing their own abstraction. I suspect for the majority of
> what we currently define as __weak functions, we could use a similar
> approach and kill off the global symbols entirely.

What would be next steps regarding this patch set? I am not sure we have
reached a consensus on weak vs #ifdef choice.

Regards,
Tomasz

2014-12-09 21:50:29

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH 6/6] pci, acpi: Share ACPI PCI config space accessors.

On Mon, Dec 8, 2014 at 12:13 AM, Tomasz Nowicki
<[email protected]> wrote:

> What would be next steps regarding this patch set? I am not sure we have
> reached a consensus on weak vs #ifdef choice.

I work through the list at
https://patchwork.ozlabs.org/project/linux-pci/list/ in FIFO order, so
you don't need to do anything else except poke me to move faster :)

The weak/ifdef question is relatively minor and easily changed, so I
wouldn't worry about it yet.

Bjorn

2014-12-10 06:16:36

by Tomasz Nowicki

[permalink] [raw]
Subject: Re: [PATCH 6/6] pci, acpi: Share ACPI PCI config space accessors.

W dniu 09.12.2014 o 22:50, Bjorn Helgaas pisze:
> On Mon, Dec 8, 2014 at 12:13 AM, Tomasz Nowicki
> <[email protected]> wrote:
>
>> What would be next steps regarding this patch set? I am not sure we have
>> reached a consensus on weak vs #ifdef choice.
>
> I work through the list at
> https://patchwork.ozlabs.org/project/linux-pci/list/ in FIFO order, so
> you don't need to do anything else except poke me to move faster :)

Awesome! I guess I am good at poke people so you can count on me :)

Regards,
Tomasz


2014-12-10 23:17:56

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86, acpi, pci: Move PCI config space accessors.

On Wed, Nov 19, 2014 at 05:04:48PM +0100, Tomasz Nowicki wrote:
> We are going to use mmio_config_{} name convention across all architectures.

mmio_config_*() are workarounds for an AMD Fam10h defect [1]. I would prefer
not to extend these names to other architectures, because they should be
able to use readb()/writeb()/etc. directly, as we did on x86 before
3320ad994afb ("x86: Work around mmio config space quirk on AMD Fam10h").

In fact, it would be nice if we could use readb()/writeb()/etc. on x86, and
only use mmio_config_*() when we're on AMD Fam10h. Maybe there could be a
"struct pci_raw_ops pci_mmcfg_amd_fam10h" that used mmio_config_*(), and we
could set raw_pci_ext_ops to those ops instead of the default ones when
we're on AMD Fam10h. Then x86 should be able to use the generic
readb()-based ops on most platforms.

Bjorn

[1] 3320ad994afb ("x86: Work around mmio config space quirk on AMD Fam10h")

Somewhere in this process, I'd like to update the AMD Fam10h comment to fix
the typos and reference the spec that talks about this, i.e., the "BIOS and
Kernel Developer's Guide (BKDG) For AMD Family 10h Processors," rev 3.48,
sec 2.11.1, "MMIO Configuration Coding Requirements."

That can be a separate patch since it's only cosmetic.

> Currently it belongs to asm/pci_x86.h header which should be included
> only for x86 specific files. From now on, those accessors are in asm/pci.h
> header which can be included in non-architecture code much easier.
>
> Signed-off-by: Tomasz Nowicki <[email protected]>
> Tested-by: Hanjun Guo <[email protected]>
> ---
> arch/x86/include/asm/pci.h | 42 +++++++++++++++++++++++++++++++++++++++++
> arch/x86/include/asm/pci_x86.h | 43 ------------------------------------------
> 2 files changed, 42 insertions(+), 43 deletions(-)
>
> diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
> index 0892ea0..5ba3720 100644
> --- a/arch/x86/include/asm/pci.h
> +++ b/arch/x86/include/asm/pci.h
> @@ -71,6 +71,48 @@ void pcibios_set_master(struct pci_dev *dev);
> struct irq_routing_table *pcibios_get_irq_routing_table(void);
> int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
>
> +/*
> + * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
> + * on their northbrige except through the * %eax register. As such, you MUST
> + * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
> + * accessor functions.
> + * In fact just use pci_config_*, nothing else please.
> + */
> +static inline unsigned char mmio_config_readb(void __iomem *pos)
> +{
> + u8 val;
> + asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
> + return val;
> +}
> +
> +static inline unsigned short mmio_config_readw(void __iomem *pos)
> +{
> + u16 val;
> + asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
> + return val;
> +}
> +
> +static inline unsigned int mmio_config_readl(void __iomem *pos)
> +{
> + u32 val;
> + asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
> + return val;
> +}
> +
> +static inline void mmio_config_writeb(void __iomem *pos, u8 val)
> +{
> + asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
> +}
> +
> +static inline void mmio_config_writew(void __iomem *pos, u16 val)
> +{
> + asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
> +}
> +
> +static inline void mmio_config_writel(void __iomem *pos, u32 val)
> +{
> + asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
> +}
>
> #define HAVE_PCI_MMAP
> extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
> diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
> index caba141..42e7332 100644
> --- a/arch/x86/include/asm/pci_x86.h
> +++ b/arch/x86/include/asm/pci_x86.h
> @@ -121,49 +121,6 @@ extern int __init pcibios_init(void);
> extern int pci_legacy_init(void);
> extern void pcibios_fixup_irqs(void);
>
> -/*
> - * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
> - * on their northbrige except through the * %eax register. As such, you MUST
> - * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
> - * accessor functions.
> - * In fact just use pci_config_*, nothing else please.
> - */
> -static inline unsigned char mmio_config_readb(void __iomem *pos)
> -{
> - u8 val;
> - asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
> - return val;
> -}
> -
> -static inline unsigned short mmio_config_readw(void __iomem *pos)
> -{
> - u16 val;
> - asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
> - return val;
> -}
> -
> -static inline unsigned int mmio_config_readl(void __iomem *pos)
> -{
> - u32 val;
> - asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
> - return val;
> -}
> -
> -static inline void mmio_config_writeb(void __iomem *pos, u8 val)
> -{
> - asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
> -}
> -
> -static inline void mmio_config_writew(void __iomem *pos, u16 val)
> -{
> - asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
> -}
> -
> -static inline void mmio_config_writel(void __iomem *pos, u32 val)
> -{
> - asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
> -}
> -
> #ifdef CONFIG_PCI
> # ifdef CONFIG_ACPI
> # define x86_default_pci_init pci_acpi_init
> --
> 1.9.1
>

2014-12-10 23:36:02

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86, acpi, pci: Move arch-agnostic MMCFG code out of arch/x86/ directory

On Wed, Nov 19, 2014 at 05:04:47PM +0100, Tomasz Nowicki wrote:
> MMCFG table seems to be architecture independent and it makes sense
> to share common code across all architectures. The ones that may need
> architectural specific actions have default prototype (__weak).

We have kind of a stew of abbreviations here: MCFG, MMCFG, MMCONFIG. In
addition, the "MMCONFIG" term comes is used by ACPI but not by the PCI
specs. The PCIe spec uses "ECAM" (Enhanced Configuration Access Mechanism)
instead. If we're going to make this architecture-independent (which I
think is a great idea), I sort of hate to expose it using terms that only
mean something to ACPI folks.

So I want to moot the idea of replacing the "pci_mmcfg" and "pci_mmconfig"
prefixes with something more generic, like "pci_ecam". I'm a little bit
conflicted about this, because it would mean a fair amount of churn, but on
the other hand, I do think good names are a huge aid to understanding.

I'm mostly concerned about the things exposed outside of x86. Things that
are x86-specific or ACPI-specific could be left alone, so maybe it wouldn't
be a huge change.

I noticed you made some functions __weak; I'd like to see that change split
into a separate patch, with this one being basically mechanical code
movement with no functional change. That way we can discuss the need for
and merits of that approach separately. A mmcfg->ecam rename (if we do
that) should also be its own separate patch.

Bjorn

> Signed-off-by: Tomasz Nowicki <[email protected]>
> Tested-by: Hanjun Guo <[email protected]>
> ---
> arch/x86/include/asm/pci_x86.h | 29 -----
> arch/x86/pci/acpi.c | 1 +
> arch/x86/pci/init.c | 1 +
> arch/x86/pci/mmconfig-shared.c | 200 +---------------------------------
> arch/x86/pci/mmconfig_32.c | 1 +
> arch/x86/pci/mmconfig_64.c | 1 +
> drivers/acpi/Makefile | 1 +
> drivers/acpi/bus.c | 1 +
> drivers/acpi/mmconfig.c | 242 +++++++++++++++++++++++++++++++++++++++++
> include/linux/mmconfig.h | 58 ++++++++++
> include/linux/pci.h | 8 --
> 11 files changed, 308 insertions(+), 235 deletions(-)
> create mode 100644 drivers/acpi/mmconfig.c
> create mode 100644 include/linux/mmconfig.h
>
> diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
> index fa1195d..caba141 100644
> --- a/arch/x86/include/asm/pci_x86.h
> +++ b/arch/x86/include/asm/pci_x86.h
> @@ -121,35 +121,6 @@ extern int __init pcibios_init(void);
> extern int pci_legacy_init(void);
> extern void pcibios_fixup_irqs(void);
>
> -/* pci-mmconfig.c */
> -
> -/* "PCI MMCONFIG %04x [bus %02x-%02x]" */
> -#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2)
> -
> -struct pci_mmcfg_region {
> - struct list_head list;
> - struct resource res;
> - u64 address;
> - char __iomem *virt;
> - u16 segment;
> - u8 start_bus;
> - u8 end_bus;
> - char name[PCI_MMCFG_RESOURCE_NAME_LEN];
> -};
> -
> -extern int __init pci_mmcfg_arch_init(void);
> -extern void __init pci_mmcfg_arch_free(void);
> -extern int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
> -extern void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg);
> -extern int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
> - phys_addr_t addr);
> -extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
> -extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
> -
> -extern struct list_head pci_mmcfg_list;
> -
> -#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20)
> -
> /*
> * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
> * on their northbrige except through the * %eax register. As such, you MUST
> diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
> index cfd1b13..6d11131 100644
> --- a/arch/x86/pci/acpi.c
> +++ b/arch/x86/pci/acpi.c
> @@ -4,6 +4,7 @@
> #include <linux/irq.h>
> #include <linux/dmi.h>
> #include <linux/slab.h>
> +#include <linux/mmconfig.h>
> #include <asm/numa.h>
> #include <asm/pci_x86.h>
>
> diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
> index adb62aa..b4a55df 100644
> --- a/arch/x86/pci/init.c
> +++ b/arch/x86/pci/init.c
> @@ -1,5 +1,6 @@
> #include <linux/pci.h>
> #include <linux/init.h>
> +#include <linux/mmconfig.h>
> #include <asm/pci_x86.h>
> #include <asm/x86_init.h>
>
> diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
> index ac24e1c..b397544 100644
> --- a/arch/x86/pci/mmconfig-shared.c
> +++ b/arch/x86/pci/mmconfig-shared.c
> @@ -18,6 +18,7 @@
> #include <linux/slab.h>
> #include <linux/mutex.h>
> #include <linux/rculist.h>
> +#include <linux/mmconfig.h>
> #include <asm/e820.h>
> #include <asm/pci_x86.h>
> #include <asm/acpi.h>
> @@ -27,103 +28,6 @@
> /* Indicate if the mmcfg resources have been placed into the resource table. */
> static bool pci_mmcfg_running_state;
> static bool pci_mmcfg_arch_init_failed;
> -static DEFINE_MUTEX(pci_mmcfg_lock);
> -
> -LIST_HEAD(pci_mmcfg_list);
> -
> -static void __init pci_mmconfig_remove(struct pci_mmcfg_region *cfg)
> -{
> - if (cfg->res.parent)
> - release_resource(&cfg->res);
> - list_del(&cfg->list);
> - kfree(cfg);
> -}
> -
> -static void __init free_all_mmcfg(void)
> -{
> - struct pci_mmcfg_region *cfg, *tmp;
> -
> - pci_mmcfg_arch_free();
> - list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list)
> - pci_mmconfig_remove(cfg);
> -}
> -
> -static void list_add_sorted(struct pci_mmcfg_region *new)
> -{
> - struct pci_mmcfg_region *cfg;
> -
> - /* keep list sorted by segment and starting bus number */
> - list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {
> - if (cfg->segment > new->segment ||
> - (cfg->segment == new->segment &&
> - cfg->start_bus >= new->start_bus)) {
> - list_add_tail_rcu(&new->list, &cfg->list);
> - return;
> - }
> - }
> - list_add_tail_rcu(&new->list, &pci_mmcfg_list);
> -}
> -
> -static struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
> - int end, u64 addr)
> -{
> - struct pci_mmcfg_region *new;
> - struct resource *res;
> -
> - if (addr == 0)
> - return NULL;
> -
> - new = kzalloc(sizeof(*new), GFP_KERNEL);
> - if (!new)
> - return NULL;
> -
> - new->address = addr;
> - new->segment = segment;
> - new->start_bus = start;
> - new->end_bus = end;
> -
> - res = &new->res;
> - res->start = addr + PCI_MMCFG_BUS_OFFSET(start);
> - res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1;
> - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> - snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN,
> - "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end);
> - res->name = new->name;
> -
> - return new;
> -}
> -
> -static struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start,
> - int end, u64 addr)
> -{
> - struct pci_mmcfg_region *new;
> -
> - new = pci_mmconfig_alloc(segment, start, end, addr);
> - if (new) {
> - mutex_lock(&pci_mmcfg_lock);
> - list_add_sorted(new);
> - mutex_unlock(&pci_mmcfg_lock);
> -
> - pr_info(PREFIX
> - "MMCONFIG for domain %04x [bus %02x-%02x] at %pR "
> - "(base %#lx)\n",
> - segment, start, end, &new->res, (unsigned long)addr);
> - }
> -
> - return new;
> -}
> -
> -struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
> -{
> - struct pci_mmcfg_region *cfg;
> -
> - list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
> - if (cfg->segment == segment &&
> - cfg->start_bus <= bus && bus <= cfg->end_bus)
> - return cfg;
> -
> - return NULL;
> -}
>
> static const char *__init pci_mmcfg_e7520(void)
> {
> @@ -543,7 +447,7 @@ static void __init pci_mmcfg_reject_broken(int early)
> }
> }
>
> -static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
> +int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
> struct acpi_mcfg_allocation *cfg)
> {
> int year;
> @@ -566,50 +470,6 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
> return -EINVAL;
> }
>
> -static int __init pci_parse_mcfg(struct acpi_table_header *header)
> -{
> - struct acpi_table_mcfg *mcfg;
> - struct acpi_mcfg_allocation *cfg_table, *cfg;
> - unsigned long i;
> - int entries;
> -
> - if (!header)
> - return -EINVAL;
> -
> - mcfg = (struct acpi_table_mcfg *)header;
> -
> - /* how many config structures do we have */
> - free_all_mmcfg();
> - entries = 0;
> - i = header->length - sizeof(struct acpi_table_mcfg);
> - while (i >= sizeof(struct acpi_mcfg_allocation)) {
> - entries++;
> - i -= sizeof(struct acpi_mcfg_allocation);
> - }
> - if (entries == 0) {
> - pr_err(PREFIX "MMCONFIG has no entries\n");
> - return -ENODEV;
> - }
> -
> - cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1];
> - for (i = 0; i < entries; i++) {
> - cfg = &cfg_table[i];
> - if (acpi_mcfg_check_entry(mcfg, cfg)) {
> - free_all_mmcfg();
> - return -ENODEV;
> - }
> -
> - if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number,
> - cfg->end_bus_number, cfg->address) == NULL) {
> - pr_warn(PREFIX "no memory for MCFG entries\n");
> - free_all_mmcfg();
> - return -ENOMEM;
> - }
> - }
> -
> - return 0;
> -}
> -
> static void __init __pci_mmcfg_init(int early)
> {
> pci_mmcfg_reject_broken(early);
> @@ -692,39 +552,6 @@ static int __init pci_mmcfg_late_insert_resources(void)
> */
> late_initcall(pci_mmcfg_late_insert_resources);
>
> -static int __init pci_mmconfig_inject(struct pci_mmcfg_region *cfg)
> -{
> - struct pci_mmcfg_region *cfg_conflict;
> - int err = 0;
> -
> - mutex_lock(&pci_mmcfg_lock);
> - cfg_conflict = pci_mmconfig_lookup(cfg->segment, cfg->start_bus);
> - if (cfg_conflict) {
> - if (cfg_conflict->end_bus < cfg->end_bus)
> - pr_info(FW_INFO "MMCONFIG for "
> - "domain %04x [bus %02x-%02x] "
> - "only partially covers this bridge\n",
> - cfg_conflict->segment, cfg_conflict->start_bus,
> - cfg_conflict->end_bus);
> - err = -EEXIST;
> - goto out;
> - }
> -
> - if (pci_mmcfg_arch_map(cfg)) {
> - pr_warn("fail to map MMCONFIG %pR.\n", &cfg->res);
> - err = -ENOMEM;
> - goto out;
> - } else {
> - list_add_sorted(cfg);
> - pr_info("MMCONFIG at %pR (base %#lx)\n",
> - &cfg->res, (unsigned long)cfg->address);
> -
> - }
> -out:
> - mutex_unlock(&pci_mmcfg_lock);
> - return err;
> -}
> -
> /* Add MMCFG information for host bridges */
> int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
> phys_addr_t addr)
> @@ -773,26 +600,3 @@ error:
> kfree(cfg);
> return rc;
> }
> -
> -/* Delete MMCFG information for host bridges */
> -int pci_mmconfig_delete(u16 seg, u8 start, u8 end)
> -{
> - struct pci_mmcfg_region *cfg;
> -
> - mutex_lock(&pci_mmcfg_lock);
> - list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
> - if (cfg->segment == seg && cfg->start_bus == start &&
> - cfg->end_bus == end) {
> - list_del_rcu(&cfg->list);
> - synchronize_rcu();
> - pci_mmcfg_arch_unmap(cfg);
> - if (cfg->res.parent)
> - release_resource(&cfg->res);
> - mutex_unlock(&pci_mmcfg_lock);
> - kfree(cfg);
> - return 0;
> - }
> - mutex_unlock(&pci_mmcfg_lock);
> -
> - return -ENOENT;
> -}
> diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
> index 43984bc..d774672 100644
> --- a/arch/x86/pci/mmconfig_32.c
> +++ b/arch/x86/pci/mmconfig_32.c
> @@ -12,6 +12,7 @@
> #include <linux/pci.h>
> #include <linux/init.h>
> #include <linux/rcupdate.h>
> +#include <linux/mmconfig.h>
> #include <asm/e820.h>
> #include <asm/pci_x86.h>
>
> diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
> index bea5249..1209596 100644
> --- a/arch/x86/pci/mmconfig_64.c
> +++ b/arch/x86/pci/mmconfig_64.c
> @@ -10,6 +10,7 @@
> #include <linux/acpi.h>
> #include <linux/bitmap.h>
> #include <linux/rcupdate.h>
> +#include <linux/mmconfig.h>
> #include <asm/e820.h>
> #include <asm/pci_x86.h>
>
> diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
> index c8a16e1..debacb5 100644
> --- a/drivers/acpi/Makefile
> +++ b/drivers/acpi/Makefile
> @@ -69,6 +69,7 @@ obj-$(CONFIG_ACPI_BUTTON) += button.o
> obj-$(CONFIG_ACPI_FAN) += fan.o
> obj-$(CONFIG_ACPI_VIDEO) += video.o
> obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o
> +obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
> obj-$(CONFIG_ACPI_PROCESSOR) += processor.o
> obj-y += container.o
> obj-$(CONFIG_ACPI_THERMAL) += thermal.o
> diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
> index c412fdb..6d5412ab 100644
> --- a/drivers/acpi/bus.c
> +++ b/drivers/acpi/bus.c
> @@ -41,6 +41,7 @@
> #include <acpi/apei.h>
> #include <linux/dmi.h>
> #include <linux/suspend.h>
> +#include <linux/mmconfig.h>
>
> #include "internal.h"
>
> diff --git a/drivers/acpi/mmconfig.c b/drivers/acpi/mmconfig.c
> new file mode 100644
> index 0000000..d62dccda
> --- /dev/null
> +++ b/drivers/acpi/mmconfig.c
> @@ -0,0 +1,242 @@
> +/*
> + * Arch agnostic low-level direct PCI config space access via MMCONFIG
> + *
> + * Per-architecture code takes care of the mappings, region validation and
> + * accesses themselves.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +
> +#include <linux/mutex.h>
> +#include <linux/rculist.h>
> +#include <linux/mmconfig.h>
> +
> +#define PREFIX "PCI: "
> +
> +static DEFINE_MUTEX(pci_mmcfg_lock);
> +
> +LIST_HEAD(pci_mmcfg_list);
> +
> +static void __init pci_mmconfig_remove(struct pci_mmcfg_region *cfg)
> +{
> + if (cfg->res.parent)
> + release_resource(&cfg->res);
> + list_del(&cfg->list);
> + kfree(cfg);
> +}
> +
> +void __init free_all_mmcfg(void)
> +{
> + struct pci_mmcfg_region *cfg, *tmp;
> +
> + pci_mmcfg_arch_free();
> + list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list)
> + pci_mmconfig_remove(cfg);
> +}
> +
> +void list_add_sorted(struct pci_mmcfg_region *new)
> +{
> + struct pci_mmcfg_region *cfg;
> +
> + /* keep list sorted by segment and starting bus number */
> + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {
> + if (cfg->segment > new->segment ||
> + (cfg->segment == new->segment &&
> + cfg->start_bus >= new->start_bus)) {
> + list_add_tail_rcu(&new->list, &cfg->list);
> + return;
> + }
> + }
> + list_add_tail_rcu(&new->list, &pci_mmcfg_list);
> +}
> +
> +struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
> + int end, u64 addr)
> +{
> + struct pci_mmcfg_region *new;
> + struct resource *res;
> +
> + if (addr == 0)
> + return NULL;
> +
> + new = kzalloc(sizeof(*new), GFP_KERNEL);
> + if (!new)
> + return NULL;
> +
> + new->address = addr;
> + new->segment = segment;
> + new->start_bus = start;
> + new->end_bus = end;
> +
> + res = &new->res;
> + res->start = addr + PCI_MMCFG_BUS_OFFSET(start);
> + res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1;
> + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> + snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN,
> + "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end);
> + res->name = new->name;
> +
> + return new;
> +}
> +
> +struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start,
> + int end, u64 addr)
> +{
> + struct pci_mmcfg_region *new;
> +
> + new = pci_mmconfig_alloc(segment, start, end, addr);
> + if (new) {
> + mutex_lock(&pci_mmcfg_lock);
> + list_add_sorted(new);
> + mutex_unlock(&pci_mmcfg_lock);
> +
> + pr_info(PREFIX
> + "MMCONFIG for domain %04x [bus %02x-%02x] at %pR "
> + "(base %#lx)\n",
> + segment, start, end, &new->res, (unsigned long)addr);
> + }
> +
> + return new;
> +}
> +
> +int __init pci_mmconfig_inject(struct pci_mmcfg_region *cfg)
> +{
> + struct pci_mmcfg_region *cfg_conflict;
> + int err = 0;
> +
> + mutex_lock(&pci_mmcfg_lock);
> + cfg_conflict = pci_mmconfig_lookup(cfg->segment, cfg->start_bus);
> + if (cfg_conflict) {
> + if (cfg_conflict->end_bus < cfg->end_bus)
> + pr_info(FW_INFO "MMCONFIG for "
> + "domain %04x [bus %02x-%02x] "
> + "only partially covers this bridge\n",
> + cfg_conflict->segment, cfg_conflict->start_bus,
> + cfg_conflict->end_bus);
> + err = -EEXIST;
> + goto out;
> + }
> +
> + if (pci_mmcfg_arch_map(cfg)) {
> + pr_warn("fail to map MMCONFIG %pR.\n", &cfg->res);
> + err = -ENOMEM;
> + goto out;
> + } else {
> + list_add_sorted(cfg);
> + pr_info("MMCONFIG at %pR (base %#lx)\n",
> + &cfg->res, (unsigned long)cfg->address);
> +
> + }
> +out:
> + mutex_unlock(&pci_mmcfg_lock);
> + return err;
> +}
> +
> +struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
> +{
> + struct pci_mmcfg_region *cfg;
> +
> + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
> + if (cfg->segment == segment &&
> + cfg->start_bus <= bus && bus <= cfg->end_bus)
> + return cfg;
> +
> + return NULL;
> +}
> +
> +int __init __weak acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
> + struct acpi_mcfg_allocation *cfg)
> +{
> + return 0;
> +}
> +
> +int __init pci_parse_mcfg(struct acpi_table_header *header)
> +{
> + struct acpi_table_mcfg *mcfg;
> + struct acpi_mcfg_allocation *cfg_table, *cfg;
> + unsigned long i;
> + int entries;
> +
> + if (!header)
> + return -EINVAL;
> +
> + mcfg = (struct acpi_table_mcfg *)header;
> +
> + /* how many config structures do we have */
> + free_all_mmcfg();
> + entries = 0;
> + i = header->length - sizeof(struct acpi_table_mcfg);
> + while (i >= sizeof(struct acpi_mcfg_allocation)) {
> + entries++;
> + i -= sizeof(struct acpi_mcfg_allocation);
> + }
> + if (entries == 0) {
> + pr_err(PREFIX "MMCONFIG has no entries\n");
> + return -ENODEV;
> + }
> +
> + cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1];
> + for (i = 0; i < entries; i++) {
> + cfg = &cfg_table[i];
> + if (acpi_mcfg_check_entry(mcfg, cfg)) {
> + free_all_mmcfg();
> + return -ENODEV;
> + }
> +
> + if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number,
> + cfg->end_bus_number, cfg->address) == NULL) {
> + pr_warn(PREFIX "no memory for MCFG entries\n");
> + free_all_mmcfg();
> + return -ENOMEM;
> + }
> + }
> +
> + return 0;
> +}
> +
> +/* Delete MMCFG information for host bridges */
> +int pci_mmconfig_delete(u16 seg, u8 start, u8 end)
> +{
> + struct pci_mmcfg_region *cfg;
> +
> + mutex_lock(&pci_mmcfg_lock);
> + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
> + if (cfg->segment == seg && cfg->start_bus == start &&
> + cfg->end_bus == end) {
> + list_del_rcu(&cfg->list);
> + synchronize_rcu();
> + pci_mmcfg_arch_unmap(cfg);
> + if (cfg->res.parent)
> + release_resource(&cfg->res);
> + mutex_unlock(&pci_mmcfg_lock);
> + kfree(cfg);
> + return 0;
> + }
> + mutex_unlock(&pci_mmcfg_lock);
> +
> + return -ENOENT;
> +}
> +
> +void __init __weak pci_mmcfg_early_init(void)
> +{
> +
> +}
> +
> +void __init __weak pci_mmcfg_late_init(void)
> +{
> + struct pci_mmcfg_region *cfg;
> +
> + acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
> +
> + if (list_empty(&pci_mmcfg_list))
> + return;
> +
> + if (!pci_mmcfg_arch_init())
> + free_all_mmcfg();
> +
> + list_for_each_entry(cfg, &pci_mmcfg_list, list)
> + insert_resource(&iomem_resource, &cfg->res);
> +}
> diff --git a/include/linux/mmconfig.h b/include/linux/mmconfig.h
> new file mode 100644
> index 0000000..6ccd1ee
> --- /dev/null
> +++ b/include/linux/mmconfig.h
> @@ -0,0 +1,58 @@
> +#ifndef __MMCONFIG_H
> +#define __MMCONFIG_H
> +#ifdef __KERNEL__
> +
> +#include <linux/types.h>
> +#include <linux/acpi.h>
> +
> +#ifdef CONFIG_PCI_MMCONFIG
> +/* "PCI MMCONFIG %04x [bus %02x-%02x]" */
> +#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2)
> +
> +struct pci_mmcfg_region {
> + struct list_head list;
> + struct resource res;
> + u64 address;
> + char __iomem *virt;
> + u16 segment;
> + u8 start_bus;
> + u8 end_bus;
> + char name[PCI_MMCFG_RESOURCE_NAME_LEN];
> +};
> +
> +void pci_mmcfg_early_init(void);
> +void pci_mmcfg_late_init(void);
> +struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
> +
> +int pci_parse_mcfg(struct acpi_table_header *header);
> +struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
> + int end, u64 addr);
> +int pci_mmconfig_inject(struct pci_mmcfg_region *cfg);
> +struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start,
> + int end, u64 addr);
> +void list_add_sorted(struct pci_mmcfg_region *new);
> +int acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
> + struct acpi_mcfg_allocation *cfg);
> +void free_all_mmcfg(void);
> +int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
> + phys_addr_t addr);
> +int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
> +
> +/* Arch specific calls */
> +int pci_mmcfg_arch_init(void);
> +void pci_mmcfg_arch_free(void);
> +int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
> +void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg);
> +
> +extern struct list_head pci_mmcfg_list;
> +
> +#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20)
> +#else /* CONFIG_PCI_MMCONFIG */
> +static inline void pci_mmcfg_late_init(void) { }
> +static inline void pci_mmcfg_early_init(void) { }
> +static inline void *pci_mmconfig_lookup(int segment, int bus)
> +{ return NULL; }
> +#endif /* CONFIG_PCI_MMCONFIG */
> +
> +#endif /* __KERNEL__ */
> +#endif /* __MMCONFIG_H */
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 6afba72..0a8b82e 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1658,14 +1658,6 @@ void pcibios_release_device(struct pci_dev *dev);
> extern struct dev_pm_ops pcibios_pm_ops;
> #endif
>
> -#ifdef CONFIG_PCI_MMCONFIG
> -void __init pci_mmcfg_early_init(void);
> -void __init pci_mmcfg_late_init(void);
> -#else
> -static inline void pci_mmcfg_early_init(void) { }
> -static inline void pci_mmcfg_late_init(void) { }
> -#endif
> -
> int pci_ext_cfg_avail(void);
>
> void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
> --
> 1.9.1
>

2014-12-10 23:55:07

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86, acpi, pci: Move arch-agnostic MMCFG code out of arch/x86/ directory

On Wed, Nov 19, 2014 at 05:04:47PM +0100, Tomasz Nowicki wrote:
> MMCFG table seems to be architecture independent and it makes sense
> to share common code across all architectures. The ones that may need
> architectural specific actions have default prototype (__weak).
>
> Signed-off-by: Tomasz Nowicki <[email protected]>
> Tested-by: Hanjun Guo <[email protected]>
> ---
> arch/x86/include/asm/pci_x86.h | 29 -----
> arch/x86/pci/acpi.c | 1 +
> arch/x86/pci/init.c | 1 +
> arch/x86/pci/mmconfig-shared.c | 200 +---------------------------------
> arch/x86/pci/mmconfig_32.c | 1 +
> arch/x86/pci/mmconfig_64.c | 1 +
> drivers/acpi/Makefile | 1 +
> drivers/acpi/bus.c | 1 +
> drivers/acpi/mmconfig.c | 242 +++++++++++++++++++++++++++++++++++++++++
> include/linux/mmconfig.h | 58 ++++++++++
> include/linux/pci.h | 8 --
> 11 files changed, 308 insertions(+), 235 deletions(-)
> create mode 100644 drivers/acpi/mmconfig.c
> create mode 100644 include/linux/mmconfig.h
> ...

Much of the code you're moving to drivers/acpi/mmconfig.c is not actually
ACPI-specific and would have to be duplicated for a non-ACPI architecture
that supports ECAM. Could that code be moved somewhere like
drivers/pci/ecam.c, where it could be shared?

Bjorn

> diff --git a/drivers/acpi/mmconfig.c b/drivers/acpi/mmconfig.c
> new file mode 100644
> index 0000000..d62dccda
> --- /dev/null
> +++ b/drivers/acpi/mmconfig.c
> @@ -0,0 +1,242 @@
> +/*
> + * Arch agnostic low-level direct PCI config space access via MMCONFIG
> + *
> + * Per-architecture code takes care of the mappings, region validation and
> + * accesses themselves.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +
> +#include <linux/mutex.h>
> +#include <linux/rculist.h>
> +#include <linux/mmconfig.h>
> +
> +#define PREFIX "PCI: "
> +
> +static DEFINE_MUTEX(pci_mmcfg_lock);
> +
> +LIST_HEAD(pci_mmcfg_list);
> +
> +static void __init pci_mmconfig_remove(struct pci_mmcfg_region *cfg)
> +{
> + if (cfg->res.parent)
> + release_resource(&cfg->res);
> + list_del(&cfg->list);
> + kfree(cfg);
> +}
> +
> +void __init free_all_mmcfg(void)
> +{
> + struct pci_mmcfg_region *cfg, *tmp;
> +
> + pci_mmcfg_arch_free();
> + list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list)
> + pci_mmconfig_remove(cfg);
> +}
> +
> +void list_add_sorted(struct pci_mmcfg_region *new)
> +{
> + struct pci_mmcfg_region *cfg;
> +
> + /* keep list sorted by segment and starting bus number */
> + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {
> + if (cfg->segment > new->segment ||
> + (cfg->segment == new->segment &&
> + cfg->start_bus >= new->start_bus)) {
> + list_add_tail_rcu(&new->list, &cfg->list);
> + return;
> + }
> + }
> + list_add_tail_rcu(&new->list, &pci_mmcfg_list);
> +}
> +
> +struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
> + int end, u64 addr)
> +{
> + struct pci_mmcfg_region *new;
> + struct resource *res;
> +
> + if (addr == 0)
> + return NULL;
> +
> + new = kzalloc(sizeof(*new), GFP_KERNEL);
> + if (!new)
> + return NULL;
> +
> + new->address = addr;
> + new->segment = segment;
> + new->start_bus = start;
> + new->end_bus = end;
> +
> + res = &new->res;
> + res->start = addr + PCI_MMCFG_BUS_OFFSET(start);
> + res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1;
> + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> + snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN,
> + "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end);
> + res->name = new->name;
> +
> + return new;
> +}
> +
> +struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start,
> + int end, u64 addr)
> +{
> + struct pci_mmcfg_region *new;
> +
> + new = pci_mmconfig_alloc(segment, start, end, addr);
> + if (new) {
> + mutex_lock(&pci_mmcfg_lock);
> + list_add_sorted(new);
> + mutex_unlock(&pci_mmcfg_lock);
> +
> + pr_info(PREFIX
> + "MMCONFIG for domain %04x [bus %02x-%02x] at %pR "
> + "(base %#lx)\n",
> + segment, start, end, &new->res, (unsigned long)addr);
> + }
> +
> + return new;
> +}
> +
> +int __init pci_mmconfig_inject(struct pci_mmcfg_region *cfg)
> +{
> + struct pci_mmcfg_region *cfg_conflict;
> + int err = 0;
> +
> + mutex_lock(&pci_mmcfg_lock);
> + cfg_conflict = pci_mmconfig_lookup(cfg->segment, cfg->start_bus);
> + if (cfg_conflict) {
> + if (cfg_conflict->end_bus < cfg->end_bus)
> + pr_info(FW_INFO "MMCONFIG for "
> + "domain %04x [bus %02x-%02x] "
> + "only partially covers this bridge\n",
> + cfg_conflict->segment, cfg_conflict->start_bus,
> + cfg_conflict->end_bus);
> + err = -EEXIST;
> + goto out;
> + }
> +
> + if (pci_mmcfg_arch_map(cfg)) {
> + pr_warn("fail to map MMCONFIG %pR.\n", &cfg->res);
> + err = -ENOMEM;
> + goto out;
> + } else {
> + list_add_sorted(cfg);
> + pr_info("MMCONFIG at %pR (base %#lx)\n",
> + &cfg->res, (unsigned long)cfg->address);
> +
> + }
> +out:
> + mutex_unlock(&pci_mmcfg_lock);
> + return err;
> +}
> +
> +struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
> +{
> + struct pci_mmcfg_region *cfg;
> +
> + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
> + if (cfg->segment == segment &&
> + cfg->start_bus <= bus && bus <= cfg->end_bus)
> + return cfg;
> +
> + return NULL;
> +}
> +
> +int __init __weak acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
> + struct acpi_mcfg_allocation *cfg)
> +{
> + return 0;
> +}
> +
> +int __init pci_parse_mcfg(struct acpi_table_header *header)
> +{
> + struct acpi_table_mcfg *mcfg;
> + struct acpi_mcfg_allocation *cfg_table, *cfg;
> + unsigned long i;
> + int entries;
> +
> + if (!header)
> + return -EINVAL;
> +
> + mcfg = (struct acpi_table_mcfg *)header;
> +
> + /* how many config structures do we have */
> + free_all_mmcfg();
> + entries = 0;
> + i = header->length - sizeof(struct acpi_table_mcfg);
> + while (i >= sizeof(struct acpi_mcfg_allocation)) {
> + entries++;
> + i -= sizeof(struct acpi_mcfg_allocation);
> + }
> + if (entries == 0) {
> + pr_err(PREFIX "MMCONFIG has no entries\n");
> + return -ENODEV;
> + }
> +
> + cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1];
> + for (i = 0; i < entries; i++) {
> + cfg = &cfg_table[i];
> + if (acpi_mcfg_check_entry(mcfg, cfg)) {
> + free_all_mmcfg();
> + return -ENODEV;
> + }
> +
> + if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number,
> + cfg->end_bus_number, cfg->address) == NULL) {
> + pr_warn(PREFIX "no memory for MCFG entries\n");
> + free_all_mmcfg();
> + return -ENOMEM;
> + }
> + }
> +
> + return 0;
> +}
> +
> +/* Delete MMCFG information for host bridges */
> +int pci_mmconfig_delete(u16 seg, u8 start, u8 end)
> +{
> + struct pci_mmcfg_region *cfg;
> +
> + mutex_lock(&pci_mmcfg_lock);
> + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
> + if (cfg->segment == seg && cfg->start_bus == start &&
> + cfg->end_bus == end) {
> + list_del_rcu(&cfg->list);
> + synchronize_rcu();
> + pci_mmcfg_arch_unmap(cfg);
> + if (cfg->res.parent)
> + release_resource(&cfg->res);
> + mutex_unlock(&pci_mmcfg_lock);
> + kfree(cfg);
> + return 0;
> + }
> + mutex_unlock(&pci_mmcfg_lock);
> +
> + return -ENOENT;
> +}
> +
> +void __init __weak pci_mmcfg_early_init(void)
> +{
> +
> +}
> +
> +void __init __weak pci_mmcfg_late_init(void)
> +{
> + struct pci_mmcfg_region *cfg;
> +
> + acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
> +
> + if (list_empty(&pci_mmcfg_list))
> + return;
> +
> + if (!pci_mmcfg_arch_init())
> + free_all_mmcfg();
> +
> + list_for_each_entry(cfg, &pci_mmcfg_list, list)
> + insert_resource(&iomem_resource, &cfg->res);
> +}
> diff --git a/include/linux/mmconfig.h b/include/linux/mmconfig.h
> new file mode 100644
> index 0000000..6ccd1ee
> --- /dev/null
> +++ b/include/linux/mmconfig.h
> @@ -0,0 +1,58 @@
> +#ifndef __MMCONFIG_H
> +#define __MMCONFIG_H
> +#ifdef __KERNEL__
> +
> +#include <linux/types.h>
> +#include <linux/acpi.h>
> +
> +#ifdef CONFIG_PCI_MMCONFIG
> +/* "PCI MMCONFIG %04x [bus %02x-%02x]" */
> +#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2)
> +
> +struct pci_mmcfg_region {
> + struct list_head list;
> + struct resource res;
> + u64 address;
> + char __iomem *virt;
> + u16 segment;
> + u8 start_bus;
> + u8 end_bus;
> + char name[PCI_MMCFG_RESOURCE_NAME_LEN];
> +};
> +
> +void pci_mmcfg_early_init(void);
> +void pci_mmcfg_late_init(void);
> +struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
> +
> +int pci_parse_mcfg(struct acpi_table_header *header);
> +struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
> + int end, u64 addr);
> +int pci_mmconfig_inject(struct pci_mmcfg_region *cfg);
> +struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start,
> + int end, u64 addr);
> +void list_add_sorted(struct pci_mmcfg_region *new);
> +int acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
> + struct acpi_mcfg_allocation *cfg);
> +void free_all_mmcfg(void);
> +int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
> + phys_addr_t addr);
> +int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
> +
> +/* Arch specific calls */
> +int pci_mmcfg_arch_init(void);
> +void pci_mmcfg_arch_free(void);
> +int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
> +void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg);
> +
> +extern struct list_head pci_mmcfg_list;
> +
> +#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20)
> +#else /* CONFIG_PCI_MMCONFIG */
> +static inline void pci_mmcfg_late_init(void) { }
> +static inline void pci_mmcfg_early_init(void) { }
> +static inline void *pci_mmconfig_lookup(int segment, int bus)
> +{ return NULL; }
> +#endif /* CONFIG_PCI_MMCONFIG */
> +
> +#endif /* __KERNEL__ */
> +#endif /* __MMCONFIG_H */
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 6afba72..0a8b82e 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1658,14 +1658,6 @@ void pcibios_release_device(struct pci_dev *dev);
> extern struct dev_pm_ops pcibios_pm_ops;
> #endif
>
> -#ifdef CONFIG_PCI_MMCONFIG
> -void __init pci_mmcfg_early_init(void);
> -void __init pci_mmcfg_late_init(void);
> -#else
> -static inline void pci_mmcfg_early_init(void) { }
> -static inline void pci_mmcfg_late_init(void) { }
> -#endif
> -
> int pci_ext_cfg_avail(void);
>
> void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
> --
> 1.9.1
>

2014-12-12 14:55:45

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [Linaro-acpi] [PATCH 2/6] x86, acpi, pci: Move arch-agnostic MMCFG code out of arch/x86/ directory

On Wednesday 10 December 2014 16:55:00 Bjorn Helgaas wrote:
> On Wed, Nov 19, 2014 at 05:04:47PM +0100, Tomasz Nowicki wrote:
> > MMCFG table seems to be architecture independent and it makes sense
> > to share common code across all architectures. The ones that may need
> > architectural specific actions have default prototype (__weak).
> >
> > Signed-off-by: Tomasz Nowicki <[email protected]>
> > Tested-by: Hanjun Guo <[email protected]>
> > ---
> > arch/x86/include/asm/pci_x86.h | 29 -----
> > arch/x86/pci/acpi.c | 1 +
> > arch/x86/pci/init.c | 1 +
> > arch/x86/pci/mmconfig-shared.c | 200 +---------------------------------
> > arch/x86/pci/mmconfig_32.c | 1 +
> > arch/x86/pci/mmconfig_64.c | 1 +
> > drivers/acpi/Makefile | 1 +
> > drivers/acpi/bus.c | 1 +
> > drivers/acpi/mmconfig.c | 242 +++++++++++++++++++++++++++++++++++++++++
> > include/linux/mmconfig.h | 58 ++++++++++
> > include/linux/pci.h | 8 --
> > 11 files changed, 308 insertions(+), 235 deletions(-)
> > create mode 100644 drivers/acpi/mmconfig.c
> > create mode 100644 include/linux/mmconfig.h
> > ...
>
> Much of the code you're moving to drivers/acpi/mmconfig.c is not actually
> ACPI-specific and would have to be duplicated for a non-ACPI architecture
> that supports ECAM. Could that code be moved somewhere like
> drivers/pci/ecam.c, where it could be shared?

We have an implementation of ECAM in drivers/pci/host/pci-host-generic.c,
which today is ARM32 specific but should be migrated to the new generic
infrastructure we are adding for ARM64, after which that driver becomes
very simple.

I suppose we could either make it use drivers/pci/ecam.c then, or have
the config space code from pci-host-generic be shared between ACPI and
DT based platforms. The former approach is probably nicer in the long
run, and it allows sharing with other drivers that use ECAM but are not
completely generic otherwise.

Arnd

2015-02-02 20:43:04

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH 0/6] PCI: MMCONFIG clean up

On Wed, Nov 19, 2014 at 05:04:45PM +0100, Tomasz Nowicki wrote:
> MMCFG ACPI table has no arch dependencies so it can be used across all
> architectures. Currently MMCONFIG related code resides in arch/x86 directories.
> This patch set is goint to isolate non-architecure specific code and make
> it accessible from drivers/pci/ directory.
>
> Tomasz Nowicki (6):
> x86, acpi, pci: Reorder logic of pci_mmconfig_insert() function
> x86, acpi, pci: Move arch-agnostic MMCFG code out of arch/x86/
> directory
> x86, acpi, pci: Move PCI config space accessors.
> x86, acpi, pci: mmconfig_{32,64}.c code refactoring - remove code
> duplication.
> x86, acpi, pci: mmconfig_64.c becomes default implementation for arch
> agnostic low-level direct PCI config space accessors via MMCONFIG.
> pci, acpi: Share ACPI PCI config space accessors.

Hi Tomasz,

I'm just checking to make sure we aren't deadlocked here, with me waiting
for you and you waiting for me. I gave you some comments about
abbreviations (MCFG/MMCFG/MMCONFIG/ECAM), weak functions, code placement
(drivers/acpi vs. drivers/pci), and the mmio_config_*() naming, so I've
been waiting to continue those discussions. But maybe you're waiting for
me, too?

I think this sort of cleanup is a great idea and I hope we can make some
progress on it. If it's easier to do it in small pieces, e.g., starting
out by moving code and renaming things with no functional changes, that
would be great with me.

Bjorn

2015-02-03 07:42:11

by Tomasz Nowicki

[permalink] [raw]
Subject: Re: [PATCH 0/6] PCI: MMCONFIG clean up

On 02.02.2015 21:42, Bjorn Helgaas wrote:
> On Wed, Nov 19, 2014 at 05:04:45PM +0100, Tomasz Nowicki wrote:
>> MMCFG ACPI table has no arch dependencies so it can be used across all
>> architectures. Currently MMCONFIG related code resides in arch/x86 directories.
>> This patch set is goint to isolate non-architecure specific code and make
>> it accessible from drivers/pci/ directory.
>>
>> Tomasz Nowicki (6):
>> x86, acpi, pci: Reorder logic of pci_mmconfig_insert() function
>> x86, acpi, pci: Move arch-agnostic MMCFG code out of arch/x86/
>> directory
>> x86, acpi, pci: Move PCI config space accessors.
>> x86, acpi, pci: mmconfig_{32,64}.c code refactoring - remove code
>> duplication.
>> x86, acpi, pci: mmconfig_64.c becomes default implementation for arch
>> agnostic low-level direct PCI config space accessors via MMCONFIG.
>> pci, acpi: Share ACPI PCI config space accessors.
>
> Hi Tomasz,
>
> I'm just checking to make sure we aren't deadlocked here, with me waiting
> for you and you waiting for me. I gave you some comments about
> abbreviations (MCFG/MMCFG/MMCONFIG/ECAM), weak functions, code placement
> (drivers/acpi vs. drivers/pci), and the mmio_config_*() naming, so I've
> been waiting to continue those discussions. But maybe you're waiting for
> me, too?
>
> I think this sort of cleanup is a great idea and I hope we can make some
> progress on it. If it's easier to do it in small pieces, e.g., starting
> out by moving code and renaming things with no functional changes, that
> would be great with me.

Hi Bjorn,

It is not deadlock, it is rather me suffering with lack of time :)

I definitely want to make that cleanup happen and was about to start
working on it again. Thanks for remanding me, patches/comments should
come up soon.

Regards,
Tomasz

2015-02-03 09:30:12

by Tomasz Nowicki

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86, acpi, pci: Move PCI config space accessors.

On 11.12.2014 00:17, Bjorn Helgaas wrote:
> On Wed, Nov 19, 2014 at 05:04:48PM +0100, Tomasz Nowicki wrote:
>> We are going to use mmio_config_{} name convention across all architectures.
>
> mmio_config_*() are workarounds for an AMD Fam10h defect [1]. I would prefer
> not to extend these names to other architectures, because they should be
> able to use readb()/writeb()/etc. directly, as we did on x86 before
> 3320ad994afb ("x86: Work around mmio config space quirk on AMD Fam10h").
>
> In fact, it would be nice if we could use readb()/writeb()/etc. on x86, and
> only use mmio_config_*() when we're on AMD Fam10h. Maybe there could be a
> "struct pci_raw_ops pci_mmcfg_amd_fam10h" that used mmio_config_*(), and we
> could set raw_pci_ext_ops to those ops instead of the default ones when
> we're on AMD Fam10h. Then x86 should be able to use the generic
> readb()-based ops on most platforms.
That's good point, I will readb()/writeb() instead.

>
> Bjorn
>
> [1] 3320ad994afb ("x86: Work around mmio config space quirk on AMD Fam10h")
>
> Somewhere in this process, I'd like to update the AMD Fam10h comment to fix
> the typos and reference the spec that talks about this, i.e., the "BIOS and
> Kernel Developer's Guide (BKDG) For AMD Family 10h Processors," rev 3.48,
> sec 2.11.1, "MMIO Configuration Coding Requirements."
>
> That can be a separate patch since it's only cosmetic.

Sure, will do.

Regards,
Tomasz

>
>> Currently it belongs to asm/pci_x86.h header which should be included
>> only for x86 specific files. From now on, those accessors are in asm/pci.h
>> header which can be included in non-architecture code much easier.
>>
>> Signed-off-by: Tomasz Nowicki <[email protected]>
>> Tested-by: Hanjun Guo <[email protected]>
>> ---
>> arch/x86/include/asm/pci.h | 42 +++++++++++++++++++++++++++++++++++++++++
>> arch/x86/include/asm/pci_x86.h | 43 ------------------------------------------
>> 2 files changed, 42 insertions(+), 43 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
>> index 0892ea0..5ba3720 100644
>> --- a/arch/x86/include/asm/pci.h
>> +++ b/arch/x86/include/asm/pci.h
>> @@ -71,6 +71,48 @@ void pcibios_set_master(struct pci_dev *dev);
>> struct irq_routing_table *pcibios_get_irq_routing_table(void);
>> int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
>>
>> +/*
>> + * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
>> + * on their northbrige except through the * %eax register. As such, you MUST
>> + * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
>> + * accessor functions.
>> + * In fact just use pci_config_*, nothing else please.
>> + */
>> +static inline unsigned char mmio_config_readb(void __iomem *pos)
>> +{
>> + u8 val;
>> + asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
>> + return val;
>> +}
>> +
>> +static inline unsigned short mmio_config_readw(void __iomem *pos)
>> +{
>> + u16 val;
>> + asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
>> + return val;
>> +}
>> +
>> +static inline unsigned int mmio_config_readl(void __iomem *pos)
>> +{
>> + u32 val;
>> + asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
>> + return val;
>> +}
>> +
>> +static inline void mmio_config_writeb(void __iomem *pos, u8 val)
>> +{
>> + asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
>> +}
>> +
>> +static inline void mmio_config_writew(void __iomem *pos, u16 val)
>> +{
>> + asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
>> +}
>> +
>> +static inline void mmio_config_writel(void __iomem *pos, u32 val)
>> +{
>> + asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
>> +}
>>
>> #define HAVE_PCI_MMAP
>> extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
>> diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
>> index caba141..42e7332 100644
>> --- a/arch/x86/include/asm/pci_x86.h
>> +++ b/arch/x86/include/asm/pci_x86.h
>> @@ -121,49 +121,6 @@ extern int __init pcibios_init(void);
>> extern int pci_legacy_init(void);
>> extern void pcibios_fixup_irqs(void);
>>
>> -/*
>> - * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
>> - * on their northbrige except through the * %eax register. As such, you MUST
>> - * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
>> - * accessor functions.
>> - * In fact just use pci_config_*, nothing else please.
>> - */
>> -static inline unsigned char mmio_config_readb(void __iomem *pos)
>> -{
>> - u8 val;
>> - asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
>> - return val;
>> -}
>> -
>> -static inline unsigned short mmio_config_readw(void __iomem *pos)
>> -{
>> - u16 val;
>> - asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
>> - return val;
>> -}
>> -
>> -static inline unsigned int mmio_config_readl(void __iomem *pos)
>> -{
>> - u32 val;
>> - asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
>> - return val;
>> -}
>> -
>> -static inline void mmio_config_writeb(void __iomem *pos, u8 val)
>> -{
>> - asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
>> -}
>> -
>> -static inline void mmio_config_writew(void __iomem *pos, u16 val)
>> -{
>> - asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
>> -}
>> -
>> -static inline void mmio_config_writel(void __iomem *pos, u32 val)
>> -{
>> - asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
>> -}
>> -
>> #ifdef CONFIG_PCI
>> # ifdef CONFIG_ACPI
>> # define x86_default_pci_init pci_acpi_init
>> --
>> 1.9.1
>>

2015-02-17 13:03:10

by Tomasz Nowicki

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86, acpi, pci: Move PCI config space accessors.

On 11.12.2014 00:17, Bjorn Helgaas wrote:
> On Wed, Nov 19, 2014 at 05:04:48PM +0100, Tomasz Nowicki wrote:
>> We are going to use mmio_config_{} name convention across all architectures.
>
> mmio_config_*() are workarounds for an AMD Fam10h defect [1]. I would prefer
> not to extend these names to other architectures, because they should be
> able to use readb()/writeb()/etc. directly, as we did on x86 before
> 3320ad994afb ("x86: Work around mmio config space quirk on AMD Fam10h").
>
> In fact, it would be nice if we could use readb()/writeb()/etc. on x86, and
> only use mmio_config_*() when we're on AMD Fam10h. Maybe there could be a
> "struct pci_raw_ops pci_mmcfg_amd_fam10h" that used mmio_config_*(), and we
> could set raw_pci_ext_ops to those ops instead of the default ones when
> we're on AMD Fam10h. Then x86 should be able to use the generic
> readb()-based ops on most platforms.

While I do agree we should use readb()/writeb()... methods, I am not
sure there is a nice way to use mmio_config_*() exclusively for AMD
Fam10h. For x86, right now, we have three PCI config accessors sets:
mmconfig_32.c, mmconfig_64.c, numachip.c and each are different. So one
pci_mmcfg_amd_fam10h pci_raw_ops is not enough. I am thinking of having
additional structure (integrated with "struct pci_mmcfg_region") that
keeps MMIO accessors where readb()/writeb() would be the default one.
For AMD Fam10h case we could tweak it as necessary. What do you thing Bjorn?

Tomasz

>
> Bjorn
>
> [1] 3320ad994afb ("x86: Work around mmio config space quirk on AMD Fam10h")
>
> Somewhere in this process, I'd like to update the AMD Fam10h comment to fix
> the typos and reference the spec that talks about this, i.e., the "BIOS and
> Kernel Developer's Guide (BKDG) For AMD Family 10h Processors," rev 3.48,
> sec 2.11.1, "MMIO Configuration Coding Requirements."
>
> That can be a separate patch since it's only cosmetic.
>
>> Currently it belongs to asm/pci_x86.h header which should be included
>> only for x86 specific files. From now on, those accessors are in asm/pci.h
>> header which can be included in non-architecture code much easier.
>>
>> Signed-off-by: Tomasz Nowicki <[email protected]>
>> Tested-by: Hanjun Guo <[email protected]>
>> ---
>> arch/x86/include/asm/pci.h | 42 +++++++++++++++++++++++++++++++++++++++++
>> arch/x86/include/asm/pci_x86.h | 43 ------------------------------------------
>> 2 files changed, 42 insertions(+), 43 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
>> index 0892ea0..5ba3720 100644
>> --- a/arch/x86/include/asm/pci.h
>> +++ b/arch/x86/include/asm/pci.h
>> @@ -71,6 +71,48 @@ void pcibios_set_master(struct pci_dev *dev);
>> struct irq_routing_table *pcibios_get_irq_routing_table(void);
>> int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
>>
>> +/*
>> + * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
>> + * on their northbrige except through the * %eax register. As such, you MUST
>> + * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
>> + * accessor functions.
>> + * In fact just use pci_config_*, nothing else please.
>> + */
>> +static inline unsigned char mmio_config_readb(void __iomem *pos)
>> +{
>> + u8 val;
>> + asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
>> + return val;
>> +}
>> +
>> +static inline unsigned short mmio_config_readw(void __iomem *pos)
>> +{
>> + u16 val;
>> + asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
>> + return val;
>> +}
>> +
>> +static inline unsigned int mmio_config_readl(void __iomem *pos)
>> +{
>> + u32 val;
>> + asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
>> + return val;
>> +}
>> +
>> +static inline void mmio_config_writeb(void __iomem *pos, u8 val)
>> +{
>> + asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
>> +}
>> +
>> +static inline void mmio_config_writew(void __iomem *pos, u16 val)
>> +{
>> + asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
>> +}
>> +
>> +static inline void mmio_config_writel(void __iomem *pos, u32 val)
>> +{
>> + asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
>> +}
>>
>> #define HAVE_PCI_MMAP
>> extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
>> diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
>> index caba141..42e7332 100644
>> --- a/arch/x86/include/asm/pci_x86.h
>> +++ b/arch/x86/include/asm/pci_x86.h
>> @@ -121,49 +121,6 @@ extern int __init pcibios_init(void);
>> extern int pci_legacy_init(void);
>> extern void pcibios_fixup_irqs(void);
>>
>> -/*
>> - * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
>> - * on their northbrige except through the * %eax register. As such, you MUST
>> - * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
>> - * accessor functions.
>> - * In fact just use pci_config_*, nothing else please.
>> - */
>> -static inline unsigned char mmio_config_readb(void __iomem *pos)
>> -{
>> - u8 val;
>> - asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
>> - return val;
>> -}
>> -
>> -static inline unsigned short mmio_config_readw(void __iomem *pos)
>> -{
>> - u16 val;
>> - asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
>> - return val;
>> -}
>> -
>> -static inline unsigned int mmio_config_readl(void __iomem *pos)
>> -{
>> - u32 val;
>> - asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
>> - return val;
>> -}
>> -
>> -static inline void mmio_config_writeb(void __iomem *pos, u8 val)
>> -{
>> - asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
>> -}
>> -
>> -static inline void mmio_config_writew(void __iomem *pos, u16 val)
>> -{
>> - asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
>> -}
>> -
>> -static inline void mmio_config_writel(void __iomem *pos, u32 val)
>> -{
>> - asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
>> -}
>> -
>> #ifdef CONFIG_PCI
>> # ifdef CONFIG_ACPI
>> # define x86_default_pci_init pci_acpi_init
>> --
>> 1.9.1
>>

2015-02-18 18:27:47

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86, acpi, pci: Move PCI config space accessors.

On Tue, Feb 17, 2015 at 02:03:01PM +0100, Tomasz Nowicki wrote:
> On 11.12.2014 00:17, Bjorn Helgaas wrote:
> >On Wed, Nov 19, 2014 at 05:04:48PM +0100, Tomasz Nowicki wrote:
> >>We are going to use mmio_config_{} name convention across all architectures.
> >
> >mmio_config_*() are workarounds for an AMD Fam10h defect [1]. I would prefer
> >not to extend these names to other architectures, because they should be
> >able to use readb()/writeb()/etc. directly, as we did on x86 before
> >3320ad994afb ("x86: Work around mmio config space quirk on AMD Fam10h").
> >
> >In fact, it would be nice if we could use readb()/writeb()/etc. on x86, and
> >only use mmio_config_*() when we're on AMD Fam10h. Maybe there could be a
> >"struct pci_raw_ops pci_mmcfg_amd_fam10h" that used mmio_config_*(), and we
> >could set raw_pci_ext_ops to those ops instead of the default ones when
> >we're on AMD Fam10h. Then x86 should be able to use the generic
> >readb()-based ops on most platforms.
>
> While I do agree we should use readb()/writeb()... methods, I am not
> sure there is a nice way to use mmio_config_*() exclusively for AMD
> Fam10h. For x86, right now, we have three PCI config accessors sets:
> mmconfig_32.c, mmconfig_64.c, numachip.c and each are different. So
> one pci_mmcfg_amd_fam10h pci_raw_ops is not enough. I am thinking of
> having additional structure (integrated with "struct
> pci_mmcfg_region") that keeps MMIO accessors where readb()/writeb()
> would be the default one. For AMD Fam10h case we could tweak it as
> necessary. What do you thing Bjorn?

It's OK if we use mmio_config_*() for all x86 (not just AMD Fam10h); that's
what we do today. It'd be *nicer* if we could use the workaround only for
Fam10h, but if it complicates the code, don't bother.

My main point is that I think your v1 posting requires every arch to
implement mmio_config_*(), and they will all be the same. If an arch
doesn't require a workaround, it shouldn't have to supply anything and we
should default to readb/writeb/etc.

> >[1] 3320ad994afb ("x86: Work around mmio config space quirk on AMD Fam10h")
> >
> >Somewhere in this process, I'd like to update the AMD Fam10h comment to fix
> >the typos and reference the spec that talks about this, i.e., the "BIOS and
> >Kernel Developer's Guide (BKDG) For AMD Family 10h Processors," rev 3.48,
> >sec 2.11.1, "MMIO Configuration Coding Requirements."
> >
> >That can be a separate patch since it's only cosmetic.
> >
> >>Currently it belongs to asm/pci_x86.h header which should be included
> >>only for x86 specific files. From now on, those accessors are in asm/pci.h
> >>header which can be included in non-architecture code much easier.
> >>
> >>Signed-off-by: Tomasz Nowicki <[email protected]>
> >>Tested-by: Hanjun Guo <[email protected]>
> >>---
> >> arch/x86/include/asm/pci.h | 42 +++++++++++++++++++++++++++++++++++++++++
> >> arch/x86/include/asm/pci_x86.h | 43 ------------------------------------------
> >> 2 files changed, 42 insertions(+), 43 deletions(-)
> >>
> >>diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
> >>index 0892ea0..5ba3720 100644
> >>--- a/arch/x86/include/asm/pci.h
> >>+++ b/arch/x86/include/asm/pci.h
> >>@@ -71,6 +71,48 @@ void pcibios_set_master(struct pci_dev *dev);
> >> struct irq_routing_table *pcibios_get_irq_routing_table(void);
> >> int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
> >>
> >>+/*
> >>+ * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
> >>+ * on their northbrige except through the * %eax register. As such, you MUST
> >>+ * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
> >>+ * accessor functions.
> >>+ * In fact just use pci_config_*, nothing else please.
> >>+ */
> >>+static inline unsigned char mmio_config_readb(void __iomem *pos)
> >>+{
> >>+ u8 val;
> >>+ asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
> >>+ return val;
> >>+}
> >>+
> >>+static inline unsigned short mmio_config_readw(void __iomem *pos)
> >>+{
> >>+ u16 val;
> >>+ asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
> >>+ return val;
> >>+}
> >>+
> >>+static inline unsigned int mmio_config_readl(void __iomem *pos)
> >>+{
> >>+ u32 val;
> >>+ asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
> >>+ return val;
> >>+}
> >>+
> >>+static inline void mmio_config_writeb(void __iomem *pos, u8 val)
> >>+{
> >>+ asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
> >>+}
> >>+
> >>+static inline void mmio_config_writew(void __iomem *pos, u16 val)
> >>+{
> >>+ asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
> >>+}
> >>+
> >>+static inline void mmio_config_writel(void __iomem *pos, u32 val)
> >>+{
> >>+ asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
> >>+}
> >>
> >> #define HAVE_PCI_MMAP
> >> extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
> >>diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
> >>index caba141..42e7332 100644
> >>--- a/arch/x86/include/asm/pci_x86.h
> >>+++ b/arch/x86/include/asm/pci_x86.h
> >>@@ -121,49 +121,6 @@ extern int __init pcibios_init(void);
> >> extern int pci_legacy_init(void);
> >> extern void pcibios_fixup_irqs(void);
> >>
> >>-/*
> >>- * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
> >>- * on their northbrige except through the * %eax register. As such, you MUST
> >>- * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
> >>- * accessor functions.
> >>- * In fact just use pci_config_*, nothing else please.
> >>- */
> >>-static inline unsigned char mmio_config_readb(void __iomem *pos)
> >>-{
> >>- u8 val;
> >>- asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
> >>- return val;
> >>-}
> >>-
> >>-static inline unsigned short mmio_config_readw(void __iomem *pos)
> >>-{
> >>- u16 val;
> >>- asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
> >>- return val;
> >>-}
> >>-
> >>-static inline unsigned int mmio_config_readl(void __iomem *pos)
> >>-{
> >>- u32 val;
> >>- asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
> >>- return val;
> >>-}
> >>-
> >>-static inline void mmio_config_writeb(void __iomem *pos, u8 val)
> >>-{
> >>- asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
> >>-}
> >>-
> >>-static inline void mmio_config_writew(void __iomem *pos, u16 val)
> >>-{
> >>- asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
> >>-}
> >>-
> >>-static inline void mmio_config_writel(void __iomem *pos, u32 val)
> >>-{
> >>- asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
> >>-}
> >>-
> >> #ifdef CONFIG_PCI
> >> # ifdef CONFIG_ACPI
> >> # define x86_default_pci_init pci_acpi_init
> >>--
> >>1.9.1
> >>

2015-02-18 19:03:20

by Rob Herring

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86, acpi, pci: Move PCI config space accessors.

On Wed, Feb 18, 2015 at 12:27 PM, Bjorn Helgaas <[email protected]> wrote:
> On Tue, Feb 17, 2015 at 02:03:01PM +0100, Tomasz Nowicki wrote:
>> On 11.12.2014 00:17, Bjorn Helgaas wrote:
>> >On Wed, Nov 19, 2014 at 05:04:48PM +0100, Tomasz Nowicki wrote:
>> >>We are going to use mmio_config_{} name convention across all architectures.
>> >
>> >mmio_config_*() are workarounds for an AMD Fam10h defect [1]. I would prefer
>> >not to extend these names to other architectures, because they should be
>> >able to use readb()/writeb()/etc. directly, as we did on x86 before
>> >3320ad994afb ("x86: Work around mmio config space quirk on AMD Fam10h").
>> >
>> >In fact, it would be nice if we could use readb()/writeb()/etc. on x86, and
>> >only use mmio_config_*() when we're on AMD Fam10h. Maybe there could be a
>> >"struct pci_raw_ops pci_mmcfg_amd_fam10h" that used mmio_config_*(), and we
>> >could set raw_pci_ext_ops to those ops instead of the default ones when
>> >we're on AMD Fam10h. Then x86 should be able to use the generic
>> >readb()-based ops on most platforms.
>>
>> While I do agree we should use readb()/writeb()... methods, I am not
>> sure there is a nice way to use mmio_config_*() exclusively for AMD
>> Fam10h. For x86, right now, we have three PCI config accessors sets:
>> mmconfig_32.c, mmconfig_64.c, numachip.c and each are different. So
>> one pci_mmcfg_amd_fam10h pci_raw_ops is not enough. I am thinking of
>> having additional structure (integrated with "struct
>> pci_mmcfg_region") that keeps MMIO accessors where readb()/writeb()
>> would be the default one. For AMD Fam10h case we could tweak it as
>> necessary. What do you thing Bjorn?
>
> It's OK if we use mmio_config_*() for all x86 (not just AMD Fam10h); that's
> what we do today. It'd be *nicer* if we could use the workaround only for
> Fam10h, but if it complicates the code, don't bother.
>
> My main point is that I think your v1 posting requires every arch to
> implement mmio_config_*(), and they will all be the same. If an arch
> doesn't require a workaround, it shouldn't have to supply anything and we
> should default to readb/writeb/etc.

Perhaps the abstraction needs to move up a level to pci_ops functions.
Then x86 can use the generic ones I added and AMD can use custom ones.

BTW, there are some cases on MIPS that need special accessors. It's
mainly a function of whether the accessors need to do endian swapping
or not.

Rob