To allow lockless access to the whole PCI configuration space the mmconfig
based accessor functions need to be propagated to the pci_root_ops.
Unfortunatly this cannot be done before the PCI subsystem initialization
happens even if mmconfig access is already available. The reason is that
some of the special platform PCI implementations must be able to overrule
that setting before further accesses happen.
The earliest possible point is after x86_init.pci.init() has been run. This
is at a point in the boot process where nothing actually uses the PCI
devices so the accessor function pointers can be updated lockless w/o risk.
The switch to full ECAM mode depends on the availability of mmconfig and
unchanged default accessors.
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/pci_x86.h | 15 +++++++--------
arch/x86/pci/common.c | 16 ++++++++++++++++
arch/x86/pci/legacy.c | 1 +
arch/x86/pci/mmconfig-shared.c | 30 ++++++++++++++++++++++++++++++
4 files changed, 54 insertions(+), 8 deletions(-)
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -46,20 +46,14 @@ enum pci_bf_sort_state {
pci_dmi_bf,
};
-/* pci-i386.c */
-
void pcibios_resource_survey(void);
void pcibios_set_cache_line_size(void);
-/* pci-pc.c */
-
extern int pcibios_last_bus;
extern struct pci_ops pci_root_ops;
void pcibios_scan_specific_bus(int busn);
-/* pci-irq.c */
-
struct irq_info {
u8 bus, devfn; /* Bus, device and function */
struct {
@@ -120,11 +114,10 @@ extern void __init dmi_check_skip_isa_al
extern int __init pci_acpi_init(void);
extern void __init pcibios_irq_init(void);
extern int __init pcibios_init(void);
+extern void __init pcibios_select_ops(void);
extern int pci_legacy_init(void);
extern void pcibios_fixup_irqs(void);
-/* pci-mmconfig.c */
-
/* "PCI MMCONFIG %04x [bus %02x-%02x]" */
#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2)
@@ -139,6 +132,12 @@ struct pci_mmcfg_region {
char name[PCI_MMCFG_RESOURCE_NAME_LEN];
};
+#ifdef CONFIG_PCI_MMCONFIG
+extern void __init pci_mmcfg_select_ops(void);
+#else
+static inline void pci_mmcfg_select_ops(void) { }
+#endif
+
extern int __init pci_mmcfg_arch_init(void);
extern void __init pci_mmcfg_arch_free(void);
extern int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -157,6 +157,22 @@ static void pcibios_fixup_device_resourc
}
/*
+ * Called after the last possible modification to raw_pci_[ext_]ops.
+ *
+ * Verify that root_pci_ops have not been overwritten by any implementation
+ * of x86_init.pci.arch_init() and x86_init.pci.init().
+ *
+ * If not, let the mmconfig code decide whether the ops can be switched
+ * over to the ECAM accessor functions.
+ */
+void __init pcibios_select_ops(void)
+{
+ if (pci_root_ops.read != pci_read || pci_root_ops.write != pci_write)
+ return;
+ pci_mmcfg_select_ops();
+}
+
+/*
* Called after each bus is probed, but before its children
* are examined.
*/
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -65,6 +65,7 @@ static int __init pci_subsys_init(void)
}
}
+ pcibios_select_ops();
pcibios_fixup_peer_bridges();
x86_init.pci.init_irq();
pcibios_init();
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -822,3 +822,33 @@ int pci_mmconfig_delete(u16 seg, u8 star
return -ENOENT;
}
+
+static int pci_ecam_read(struct pci_bus *bus, unsigned int devfn, int reg,
+ int size, u32 *value)
+{
+ return pci_mmcfg_read(pci_domain_nr(bus), bus->number, devfn, reg,
+ size, value);
+}
+
+static int pci_ecam_write(struct pci_bus *bus, unsigned int devfn, int reg,
+ int size, u32 value)
+{
+ return pci_mmcfg_write(pci_domain_nr(bus), bus->number, devfn, reg,
+ size, value);
+}
+
+void __init pci_mmcfg_select_ops(void)
+{
+ if (raw_pci_ext_ops != &pci_mmcfg)
+ return;
+
+ /*
+ * The pointer to root_pci_ops has been handed in to ACPI already
+ * and is already set in the busses.
+ *
+ * Switch the functions over to ECAM for all config space accesses.
+ */
+ pci_root_ops.read = pci_ecam_read;
+ pci_root_ops.write = pci_ecam_write;
+ pr_info("PCI: Switch to ECAM configuration mode\n");
+}
> + /*
> + * The pointer to root_pci_ops has been handed in to ACPI already
> + * and is already set in the busses.
> + *
> + * Switch the functions over to ECAM for all config space accesses.
> + */
> + pci_root_ops.read = pci_ecam_read;
> + pci_root_ops.write = pci_ecam_write;
> + pr_info("PCI: Switch to ECAM configuration mode\n");
That patch is fine, but it's generally called MMCONFIG (don't know
where this ECAM term comes from). So please use MMCONFIG or MCFG everywhere,
not ECAM.
-Andi
On Thu, 16 Mar 2017, Andi Kleen wrote:
> > + /*
> > + * The pointer to root_pci_ops has been handed in to ACPI already
> > + * and is already set in the busses.
> > + *
> > + * Switch the functions over to ECAM for all config space accesses.
> > + */
> > + pci_root_ops.read = pci_ecam_read;
> > + pci_root_ops.write = pci_ecam_write;
> > + pr_info("PCI: Switch to ECAM configuration mode\n");
>
> That patch is fine, but it's generally called MMCONFIG (don't know
> where this ECAM term comes from).
ECAM is the official name for the memory mapped configuration mechanism
according to the PCI express specification.
> So please use MMCONFIG or MCFG everywhere, not ECAM.
While I prefer using names which match specifications, I let Bjorn decide
on that one.
Thanks,
tglx
[+cc linux-pci]
On Fri, Mar 17, 2017 at 07:15:21AM +0100, Thomas Gleixner wrote:
> On Thu, 16 Mar 2017, Andi Kleen wrote:
>
> > > + /*
> > > + * The pointer to root_pci_ops has been handed in to ACPI already
> > > + * and is already set in the busses.
> > > + *
> > > + * Switch the functions over to ECAM for all config space accesses.
> > > + */
> > > + pci_root_ops.read = pci_ecam_read;
> > > + pci_root_ops.write = pci_ecam_write;
> > > + pr_info("PCI: Switch to ECAM configuration mode\n");
> >
> > That patch is fine, but it's generally called MMCONFIG (don't know
> > where this ECAM term comes from).
>
> ECAM is the official name for the memory mapped configuration mechanism
> according to the PCI express specification.
>
> > So please use MMCONFIG or MCFG everywhere, not ECAM.
>
> While I prefer using names which match specifications, I let Bjorn decide
> on that one.
I've definitely seen "MMCONFIG" used many places, especially
internally. I've been trying to use names that correspond to the
public specs in an arch-independent way, so here's my thinking:
"MCFG" is the ACPI table name and appears in the PCI Firmware spec.
The x86-specific MCFG parser is called arch/x86/pci/mmconfig-shared.c,
but we named the arch-independent parser drivers/acpi/pci_mcfg.c.
"MMCONFIG" appears only in an implementation note in the PCI Firmware
spec r3.2, sec 4.1.4. The x86 ECAM/MMCONFIG/MCFG code is in
arch/x86/pci/mmconfig*. I wouldn't name it that way today because I
don't see "MMCONFIG" used much in public specs, but it doesn't seem
worth changing to me.
The PCIe spec doesn't mention MCFG or MMCONFIG, but it uses "ECAM" many
times, so that's what we used for the arch-independent code in
drivers/pci/ecam.c.
Bottom line, I'm fine with the names in this patch as-is.
Bjorn
Commit-ID: 5d381c2e053918bd67c2d1cc50fc73c35bd547f7
Gitweb: http://git.kernel.org/tip/5d381c2e053918bd67c2d1cc50fc73c35bd547f7
Author: Thomas Gleixner <[email protected]>
AuthorDate: Thu, 16 Mar 2017 22:50:09 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Wed, 28 Jun 2017 22:32:57 +0200
x86/PCI/mmcfg: Switch to ECAM config mode if possible
To allow lockless access to the whole PCI configuration space the mmconfig
based accessor functions need to be propagated to the pci_root_ops.
Unfortunatly this cannot be done before the PCI subsystem initialization
happens even if mmconfig access is already available. The reason is that
some of the special platform PCI implementations must be able to overrule
that setting before further accesses happen.
The earliest possible point is after x86_init.pci.init() has been run. This
is at a point in the boot process where nothing actually uses the PCI
devices so the accessor function pointers can be updated lockless w/o risk.
The switch to full ECAM mode depends on the availability of mmconfig and
unchanged default accessors.
Signed-off-by: Thomas Gleixner <[email protected]>
Acked-by: Bjorn Helgaas <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Stephane Eranian <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/pci_x86.h | 15 +++++++--------
arch/x86/pci/common.c | 16 ++++++++++++++++
arch/x86/pci/legacy.c | 1 +
arch/x86/pci/mmconfig-shared.c | 30 ++++++++++++++++++++++++++++++
4 files changed, 54 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 9f1b21f..ad518a9 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -48,20 +48,14 @@ enum pci_bf_sort_state {
pci_dmi_bf,
};
-/* pci-i386.c */
-
void pcibios_resource_survey(void);
void pcibios_set_cache_line_size(void);
-/* pci-pc.c */
-
extern int pcibios_last_bus;
extern struct pci_ops pci_root_ops;
void pcibios_scan_specific_bus(int busn);
-/* pci-irq.c */
-
struct irq_info {
u8 bus, devfn; /* Bus, device and function */
struct {
@@ -122,11 +116,10 @@ extern void __init dmi_check_skip_isa_align(void);
extern int __init pci_acpi_init(void);
extern void __init pcibios_irq_init(void);
extern int __init pcibios_init(void);
+extern void __init pcibios_select_ops(void);
extern int pci_legacy_init(void);
extern void pcibios_fixup_irqs(void);
-/* pci-mmconfig.c */
-
/* "PCI MMCONFIG %04x [bus %02x-%02x]" */
#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2)
@@ -141,6 +134,12 @@ struct pci_mmcfg_region {
char name[PCI_MMCFG_RESOURCE_NAME_LEN];
};
+#ifdef CONFIG_PCI_MMCONFIG
+extern void __init pci_mmcfg_select_ops(void);
+#else
+static inline void pci_mmcfg_select_ops(void) { }
+#endif
+
extern int __init pci_mmcfg_arch_init(void);
extern void __init pci_mmcfg_arch_free(void);
extern int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index cfd1a89..81e4d21 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -157,6 +157,22 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev)
}
/*
+ * Called after the last possible modification to raw_pci_[ext_]ops.
+ *
+ * Verify that root_pci_ops have not been overwritten by any implementation
+ * of x86_init.pci.arch_init() and x86_init.pci.init().
+ *
+ * If not, let the mmconfig code decide whether the ops can be switched
+ * over to the ECAM accessor functions.
+ */
+void __init pcibios_select_ops(void)
+{
+ if (pci_root_ops.read != pci_read || pci_root_ops.write != pci_write)
+ return;
+ pci_mmcfg_select_ops();
+}
+
+/*
* Called after each bus is probed, but before its children
* are examined.
*/
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 1cb01ab..80ea40e 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -65,6 +65,7 @@ static int __init pci_subsys_init(void)
}
}
+ pcibios_select_ops();
pcibios_fixup_peer_bridges();
x86_init.pci.init_irq();
pcibios_init();
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index d1b47d5..6af6351 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -816,3 +816,33 @@ int pci_mmconfig_delete(u16 seg, u8 start, u8 end)
return -ENOENT;
}
+
+static int pci_ecam_read(struct pci_bus *bus, unsigned int devfn, int reg,
+ int size, u32 *value)
+{
+ return pci_mmcfg_read(pci_domain_nr(bus), bus->number, devfn, reg,
+ size, value);
+}
+
+static int pci_ecam_write(struct pci_bus *bus, unsigned int devfn, int reg,
+ int size, u32 value)
+{
+ return pci_mmcfg_write(pci_domain_nr(bus), bus->number, devfn, reg,
+ size, value);
+}
+
+void __init pci_mmcfg_select_ops(void)
+{
+ if (raw_pci_ext_ops != &pci_mmcfg)
+ return;
+
+ /*
+ * The pointer to root_pci_ops has been handed in to ACPI already
+ * and is already set in the busses.
+ *
+ * Switch the functions over to ECAM for all config space accesses.
+ */
+ pci_root_ops.read = pci_ecam_read;
+ pci_root_ops.write = pci_ecam_write;
+ pr_info("PCI: Switch to ECAM configuration mode\n");
+}
Commit-ID: b5b0f00c760b6e9673ab79b88ede2f3c7a039f74
Gitweb: http://git.kernel.org/tip/b5b0f00c760b6e9673ab79b88ede2f3c7a039f74
Author: Thomas Gleixner <[email protected]>
AuthorDate: Thu, 16 Mar 2017 22:50:09 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 29 Jun 2017 08:41:54 +0200
x86/PCI/mmcfg: Switch to ECAM config mode if possible
To allow lockless access to the whole PCI configuration space the mmconfig
based accessor functions need to be propagated to the pci_root_ops.
Unfortunatly this cannot be done before the PCI subsystem initialization
happens even if mmconfig access is already available. The reason is that
some of the special platform PCI implementations must be able to overrule
that setting before further accesses happen.
The earliest possible point is after x86_init.pci.init() has been run. This
is at a point in the boot process where nothing actually uses the PCI
devices so the accessor function pointers can be updated lockless w/o risk.
The switch to full ECAM mode depends on the availability of mmconfig and
unchanged default accessors.
Signed-off-by: Thomas Gleixner <[email protected]>
Acked-by: Bjorn Helgaas <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Stephane Eranian <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/pci_x86.h | 20 ++++++++++++--------
arch/x86/pci/common.c | 16 ++++++++++++++++
arch/x86/pci/legacy.c | 1 +
arch/x86/pci/mmconfig-shared.c | 30 ++++++++++++++++++++++++++++++
arch/x86/pci/mmconfig_32.c | 8 ++++----
arch/x86/pci/mmconfig_64.c | 8 ++++----
6 files changed, 67 insertions(+), 16 deletions(-)
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 9f1b21f..65e1303 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -48,20 +48,14 @@ enum pci_bf_sort_state {
pci_dmi_bf,
};
-/* pci-i386.c */
-
void pcibios_resource_survey(void);
void pcibios_set_cache_line_size(void);
-/* pci-pc.c */
-
extern int pcibios_last_bus;
extern struct pci_ops pci_root_ops;
void pcibios_scan_specific_bus(int busn);
-/* pci-irq.c */
-
struct irq_info {
u8 bus, devfn; /* Bus, device and function */
struct {
@@ -122,11 +116,10 @@ extern void __init dmi_check_skip_isa_align(void);
extern int __init pci_acpi_init(void);
extern void __init pcibios_irq_init(void);
extern int __init pcibios_init(void);
+extern void __init pcibios_select_ops(void);
extern int pci_legacy_init(void);
extern void pcibios_fixup_irqs(void);
-/* pci-mmconfig.c */
-
/* "PCI MMCONFIG %04x [bus %02x-%02x]" */
#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2)
@@ -141,6 +134,12 @@ struct pci_mmcfg_region {
char name[PCI_MMCFG_RESOURCE_NAME_LEN];
};
+#ifdef CONFIG_PCI_MMCONFIG
+extern void __init pci_mmcfg_select_ops(void);
+#else
+static inline void pci_mmcfg_select_ops(void) { }
+#endif
+
extern int __init pci_mmcfg_arch_init(void);
extern void __init pci_mmcfg_arch_free(void);
extern int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
@@ -197,6 +196,11 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val)
asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
}
+int pci_mmcfg_read(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 *value);
+int pci_mmcfg_write(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 value);
+
#ifdef CONFIG_PCI
# ifdef CONFIG_ACPI
# define x86_default_pci_init pci_acpi_init
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index cfd1a89..81e4d21 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -157,6 +157,22 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev)
}
/*
+ * Called after the last possible modification to raw_pci_[ext_]ops.
+ *
+ * Verify that root_pci_ops have not been overwritten by any implementation
+ * of x86_init.pci.arch_init() and x86_init.pci.init().
+ *
+ * If not, let the mmconfig code decide whether the ops can be switched
+ * over to the ECAM accessor functions.
+ */
+void __init pcibios_select_ops(void)
+{
+ if (pci_root_ops.read != pci_read || pci_root_ops.write != pci_write)
+ return;
+ pci_mmcfg_select_ops();
+}
+
+/*
* Called after each bus is probed, but before its children
* are examined.
*/
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 1cb01ab..80ea40e 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -65,6 +65,7 @@ static int __init pci_subsys_init(void)
}
}
+ pcibios_select_ops();
pcibios_fixup_peer_bridges();
x86_init.pci.init_irq();
pcibios_init();
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index d1b47d5..6af6351 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -816,3 +816,33 @@ int pci_mmconfig_delete(u16 seg, u8 start, u8 end)
return -ENOENT;
}
+
+static int pci_ecam_read(struct pci_bus *bus, unsigned int devfn, int reg,
+ int size, u32 *value)
+{
+ return pci_mmcfg_read(pci_domain_nr(bus), bus->number, devfn, reg,
+ size, value);
+}
+
+static int pci_ecam_write(struct pci_bus *bus, unsigned int devfn, int reg,
+ int size, u32 value)
+{
+ return pci_mmcfg_write(pci_domain_nr(bus), bus->number, devfn, reg,
+ size, value);
+}
+
+void __init pci_mmcfg_select_ops(void)
+{
+ if (raw_pci_ext_ops != &pci_mmcfg)
+ return;
+
+ /*
+ * The pointer to root_pci_ops has been handed in to ACPI already
+ * and is already set in the busses.
+ *
+ * Switch the functions over to ECAM for all config space accesses.
+ */
+ pci_root_ops.read = pci_ecam_read;
+ pci_root_ops.write = pci_ecam_write;
+ pr_info("PCI: Switch to ECAM configuration mode\n");
+}
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index 3e9e166..d0975f1 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -49,8 +49,8 @@ static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn)
}
}
-static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 *value)
+int pci_mmcfg_read(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 *value)
{
unsigned long flags;
u32 base;
@@ -88,8 +88,8 @@ err: *value = -1;
return 0;
}
-static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 value)
+int pci_mmcfg_write(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 value)
{
unsigned long flags;
u32 base;
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index f1c1aa0..64949dd 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -24,8 +24,8 @@ static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned i
return NULL;
}
-static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 *value)
+int pci_mmcfg_read(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 *value)
{
char __iomem *addr;
@@ -58,8 +58,8 @@ err: *value = -1;
return 0;
}
-static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 value)
+int pci_mmcfg_write(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 value)
{
char __iomem *addr;
On Wed, Jun 28, 2017 at 11:45 PM, tip-bot for Thomas Gleixner
<[email protected]> wrote:
> Commit-ID: b5b0f00c760b6e9673ab79b88ede2f3c7a039f74
> Gitweb: http://git.kernel.org/tip/b5b0f00c760b6e9673ab79b88ede2f3c7a039f74
> Author: Thomas Gleixner <[email protected]>
> AuthorDate: Thu, 16 Mar 2017 22:50:09 +0100
> Committer: Thomas Gleixner <[email protected]>
> CommitDate: Thu, 29 Jun 2017 08:41:54 +0200
>
> x86/PCI/mmcfg: Switch to ECAM config mode if possible
>
> To allow lockless access to the whole PCI configuration space the mmconfig
> based accessor functions need to be propagated to the pci_root_ops.
>
> Unfortunatly this cannot be done before the PCI subsystem initialization
> happens even if mmconfig access is already available. The reason is that
> some of the special platform PCI implementations must be able to overrule
> that setting before further accesses happen.
>
> The earliest possible point is after x86_init.pci.init() has been run. This
> is at a point in the boot process where nothing actually uses the PCI
> devices so the accessor function pointers can be updated lockless w/o risk.
>
> The switch to full ECAM mode depends on the availability of mmconfig and
> unchanged default accessors.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> Acked-by: Bjorn Helgaas <[email protected]>
> Cc: Andi Kleen <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Stephane Eranian <[email protected]>
> Cc: Borislav Petkov <[email protected]>
> Cc: [email protected]
> Link: http://lkml.kernel.org/r/[email protected]
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> arch/x86/include/asm/pci_x86.h | 20 ++++++++++++--------
> arch/x86/pci/common.c | 16 ++++++++++++++++
> arch/x86/pci/legacy.c | 1 +
> arch/x86/pci/mmconfig-shared.c | 30 ++++++++++++++++++++++++++++++
> arch/x86/pci/mmconfig_32.c | 8 ++++----
> arch/x86/pci/mmconfig_64.c | 8 ++++----
> 6 files changed, 67 insertions(+), 16 deletions(-)
>
> /*
> + * Called after the last possible modification to raw_pci_[ext_]ops.
> + *
> + * Verify that root_pci_ops have not been overwritten by any implementation
> + * of x86_init.pci.arch_init() and x86_init.pci.init().
> + *
> + * If not, let the mmconfig code decide whether the ops can be switched
> + * over to the ECAM accessor functions.
> + */
> +void __init pcibios_select_ops(void)
> +{
> + if (pci_root_ops.read != pci_read || pci_root_ops.write != pci_write)
> + return;
> + pci_mmcfg_select_ops();
> +}
> +
> +/*
> * Called after each bus is probed, but before its children
> * are examined.
> */
> diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
> index 1cb01ab..80ea40e 100644
> --- a/arch/x86/pci/legacy.c
> +++ b/arch/x86/pci/legacy.c
> @@ -65,6 +65,7 @@ static int __init pci_subsys_init(void)
> }
> }
>
> + pcibios_select_ops();
> pcibios_fixup_peer_bridges();
> x86_init.pci.init_irq();
> pcibios_init();
> diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
> index d1b47d5..6af6351 100644
> --- a/arch/x86/pci/mmconfig-shared.c
> +++ b/arch/x86/pci/mmconfig-shared.c
> @@ -816,3 +816,33 @@ int pci_mmconfig_delete(u16 seg, u8 start, u8 end)
>
> return -ENOENT;
> }
> +
> +static int pci_ecam_read(struct pci_bus *bus, unsigned int devfn, int reg,
> + int size, u32 *value)
> +{
> + return pci_mmcfg_read(pci_domain_nr(bus), bus->number, devfn, reg,
> + size, value);
> +}
> +
> +static int pci_ecam_write(struct pci_bus *bus, unsigned int devfn, int reg,
> + int size, u32 value)
> +{
> + return pci_mmcfg_write(pci_domain_nr(bus), bus->number, devfn, reg,
> + size, value);
> +}
> +
> +void __init pci_mmcfg_select_ops(void)
> +{
> + if (raw_pci_ext_ops != &pci_mmcfg)
> + return;
> +
> + /*
> + * The pointer to root_pci_ops has been handed in to ACPI already
> + * and is already set in the busses.
> + *
> + * Switch the functions over to ECAM for all config space accesses.
> + */
> + pci_root_ops.read = pci_ecam_read;
> + pci_root_ops.write = pci_ecam_write;
> + pr_info("PCI: Switch to ECAM configuration mode\n");
> +}
Hi Thomas,
Would this patch actually void the commit:
commit a0ca9909609470ad779b9b9cc68ce96e975afff7
Author: Ivan Kokshaysky <[email protected]>
Date: Mon Jan 14 17:31:09 2008 -0500
PCI x86: always use conf1 to access config space below 256 bytes
Thanks to Loic Prylli <[email protected]>, who originally proposed
this idea.
Always using legacy configuration mechanism for the legacy config space
and extended mechanism (mmconf) for the extended config space is
a simple and very logical approach. It's supposed to resolve all
known mmconf problems. It still allows per-device quirks (tweaking
dev->cfg_size). It also allows to get rid of mmconf fallback code.
Signed-off-by: Ivan Kokshaysky <[email protected]>
Signed-off-by: Matthew Wilcox <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
> Hi Thomas,
>
> Would this patch actually void the commit:
Yes it does. My origina; patches were opt-in for this reason, but Thomas doesn't
believe in historical or other people's experience.
But MCFG problems were a long time ago and noone uses these systems anymore,
so perhaps he is right.
-Andi
>
> commit a0ca9909609470ad779b9b9cc68ce96e975afff7
> Author: Ivan Kokshaysky <[email protected]>
> Date: Mon Jan 14 17:31:09 2008 -0500
>
> PCI x86: always use conf1 to access config space below 256 bytes
>
> Thanks to Loic Prylli <[email protected]>, who originally proposed
> this idea.
>
> Always using legacy configuration mechanism for the legacy config space
> and extended mechanism (mmconf) for the extended config space is
> a simple and very logical approach. It's supposed to resolve all
> known mmconf problems. It still allows per-device quirks (tweaking
> dev->cfg_size). It also allows to get rid of mmconf fallback code.
>
> Signed-off-by: Ivan Kokshaysky <[email protected]>
> Signed-off-by: Matthew Wilcox <[email protected]>
> Signed-off-by: Linus Torvalds <[email protected]>
On Thu, 29 Jun 2017, Andi Kleen wrote:
> > Would this patch actually void the commit:
>
> Yes it does. My origina; patches were opt-in for this reason, but Thomas doesn't
> believe in historical or other people's experience.
I did not realize that particular wreckage.
> But MCFG problems were a long time ago and noone uses these systems anymore,
> so perhaps he is right.
The obvious solution to this is to force type 1 for older machines, i.e. <=
K8. Some day we should stop supporting 15+ years old crap just because we
can.
Thanks,
tglx
On Fri, Jun 30, 2017 at 7:30 AM, Thomas Gleixner <[email protected]> wrote:
>> But MCFG problems were a long time ago and noone uses these systems anymore,
>> so perhaps he is right.
>
> The obvious solution to this is to force type 1 for older machines, i.e. <=
> K8. Some day we should stop supporting 15+ years old crap just because we
> can.
No.
The fact is, type 1 is the *good* thing. It's the standard thing that
has worked pretty much forever, and that is not just tested, but has
good semantics.
The new stuff is the crazy crap. It's crazy crap in so many ways:
- non-deterministic memory addresses found in firmware tables that
have had bugs
- using MMIO means that there are lots of basic issues with
fundamental things like write gathering and ordering
The right thing to do is to just admit that the extended mmcfg isn't
actually the rigth thjing to do by default, and only use it when you
have to.
"Newer" does not always mean "better", and there's been lots of bad
hardware (and bad firmware).
So use the "enhanced" one for stuff above the 256-byte limit. Not for
basic probing.
Anmd don't think that it should be phased out just because it's old.
Old is often *good*. Old means stable. Old means tested. Old means
simple. Those are all *good* thing.
Linus
On Fri, 30 Jun 2017, Linus Torvalds wrote:
> So use the "enhanced" one for stuff above the 256-byte limit. Not for
> basic probing.
The probing itself uses type1 unconditionally. It just switches over when
mmcfg is the default and no other quirk has been applied.
Nevertheless I zap that commit in question. The rest of the series is still
valid and does not affect that. The main goal of distangling the stuff from
the global PCI lock and avoiding pointless nested locking is still reached.
Thanks,
tglx
On Fri, Jun 30, 2017 at 10:16:15AM -0700, Linus Torvalds wrote:
> On Fri, Jun 30, 2017 at 7:30 AM, Thomas Gleixner <[email protected]> wrote:
> >> But MCFG problems were a long time ago and noone uses these systems anymore,
> >> so perhaps he is right.
> >
> > The obvious solution to this is to force type 1 for older machines, i.e. <=
> > K8. Some day we should stop supporting 15+ years old crap just because we
> > can.
>
> No.
>
> The fact is, type 1 is the *good* thing. It's the standard thing that
> has worked pretty much forever, and that is not just tested, but has
> good semantics.
Fully agreed.
Also, config space accesses supposed to be *safe* rather than *fast*.
There was quite a heated discussion on this very subject back in 2008.
Thought that I have a full copy of that, but no, surprisingly...
Google still gives a sufficient part of that:
http://linux-kernel.2935.n7.nabble.com/Patch-v2-Make-PCI-extended-config-space-MMCONFIG-a-driver-opt-in-td243861i60.html
Ivan.