2004-01-29 11:34:39

by Durairaj, Sundarapandian

[permalink] [raw]
Subject: RE: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

Hi All,

Thanks for the comments.

Please review this updated patch and send your comments.

Thanks,
Sundar

Note:
This is the patch on PCI Express Enhanced configuration for 2.6.0 test11
kernel following up to the Vladimir ([email protected]) and
Harinarayanan ([email protected]) and my previous
patches .
I tested it on our i386 platform.

This patch also implements a mechanism for the kernel to find the
chipset specific mmcfg base address. The kernel will detect the base
address of the chipset through the ACPI table entry and based on that
the PCI subsystem will be initialized.

diff -Naur linux-2.6.0/arch/i386/Kconfig
linux_pciexpress/arch/i386/Kconfig
--- linux-2.6.0/arch/i386/Kconfig 2003-12-18 08:28:16.000000000
+0530
+++ linux_pciexpress/arch/i386/Kconfig 2004-01-29 16:50:56.000000000
+0530
@@ -1020,6 +1020,18 @@

endchoice

+config PCI_EXPRESS
+ bool "PCI_EXPRESS (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && PCI
+ select ACPI_BOOT
+ help
+ PCI Express is the next generation PCI architecture that
supports
+ the configuration space size of 4K bytes. With this option,
+ Linux will first attempt to access the configuration space
through
+ enhanced config access mechanism (will work only on
+ PCI Express based system) otherwise other standard PCI access
+ mechanism will be used.
+
config PCI_BIOS
bool
depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
diff -Naur linux-2.6.0/arch/i386/kernel/acpi/boot.c
linux_pciexpress/arch/i386/kernel/acpi/boot.c
--- linux-2.6.0/arch/i386/kernel/acpi/boot.c 2003-12-18
08:29:29.000000000 +0530
+++ linux_pciexpress/arch/i386/kernel/acpi/boot.c 2004-01-29
16:14:43.000000000 +0530
@@ -93,6 +93,27 @@
return ((unsigned char *) base + offset);
}

+#ifdef CONFIG_PCI_EXPRESS
+static int __init acpi_parse_mcfg
+ (unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_mcfg *mcfg = NULL;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ mcfg = (struct acpi_table_mcfg *) __acpi_map_table
+ (phys_addr, size);
+ if (!mcfg) {
+ printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+ return -ENODEV;
+ }
+ if (mcfg->base_address)
+ mmcfg_base_address = mcfg->base_address;
+
+ return 0;
+}
+#endif /* CONFIG_PCI_EXPRESS */

#ifdef CONFIG_X86_LOCAL_APIC

@@ -508,6 +529,20 @@

#endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */

+#ifdef CONFIG_PCI_EXPRESS
+ result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+ if (!result) {
+ printk(KERN_WARNING PREFIX "MCFG not present\n");
+ return 0;
+ } else if (result < 0) {
+ printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+ return result;
+ } else if (result > 1) {
+ printk(KERN_WARNING PREFIX
+ "Multiple MCFG tables exist\n");
+ }
+#endif /* CONFIG_PCI_EXPRESS */
+
#ifdef CONFIG_X86_LOCAL_APIC
if (acpi_lapic && acpi_ioapic) {
smp_found_config = 1;
diff -Naur linux-2.6.0/arch/i386/pci/common.c
linux_pciexpress/arch/i386/pci/common.c
--- linux-2.6.0/arch/i386/pci/common.c 2003-12-18 08:28:46.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/common.c 2004-01-29
16:14:45.000000000 +0530
@@ -19,7 +19,8 @@
extern void pcibios_sort(void);
#endif

-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 |
PCI_PROBE_CONF2;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 |
PCI_PROBE_CONF2
+ | PCI_PROBE_ENHANCED;

int pcibios_last_bus = -1;
struct pci_bus *pci_root_bus = NULL;
@@ -197,6 +198,12 @@
return NULL;
}
#endif
+#ifdef CONFIG_PCI_EXPRESS
+ else if (!strcmp(str, "nopciexpress")) {
+ pci_probe &= ~PCI_PROBE_ENHANCED;
+ return NULL;
+ }
+#endif
#ifdef CONFIG_ACPI_PCI
else if (!strcmp(str, "noacpi")) {
pci_probe |= PCI_NO_ACPI_ROUTING;
diff -Naur linux-2.6.0/arch/i386/pci/direct.c
linux_pciexpress/arch/i386/pci/direct.c
--- linux-2.6.0/arch/i386/pci/direct.c 2003-12-18 08:28:28.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/direct.c 2004-01-29
16:14:45.000000000 +0530
@@ -167,6 +167,60 @@
};


+#ifdef CONFIG_PCI_EXPRESS
+/*
+ * We map full Page size on each PCI Express request. Incidentally
that's
+ * the size we have for config space too in PCI Express devices.
+ * On PCI Express capable platform, at the time of kernel
initialization
+ * the OS would have scanned for MCFG table and set this variable to
+ * appropriate value. If PCI Express not supported the variable will
+ * have 0 value
+ */
+u32 mmcfg_base_address;
+
+/*
+ * Variable used to store the virtual address of fixed PTE
+ */
+char *mmcfg_virt_addr;
+
+/*
+ * Variable used to store the base address of the last PCI Express
device
+ * accessed.
+ */
+u32 pcie_last_accessed_device;
+
+static int pci_express_conf_read(int seg, int bus,
+ int devfn, int reg, int len, u32 *value)
+{
+ if (!value || (bus > 255) || (devfn > 255) || (reg > 4095)) {
+ printk(KERN_ERR "%s: Invalid Parameter\n",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+ pci_express_read(bus, devfn, reg, len, value);
+
+ return 0;
+}
+
+static int pci_express_conf_write(int seg, int bus,
+ int devfn, int reg, int len, u32 value)
+{
+ if ((bus > 255) || (devfn > 255) || (reg > 4095)) {
+ printk(KERN_ERR "%s: Invalid Parameter\n",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+ pci_express_write(bus, devfn, reg, len, value);
+
+ return 0;
+}
+
+static struct pci_raw_ops pci_express_conf = {
+ .read = pci_express_conf_read,
+ .write = pci_express_conf_write,
+};
+#endif /* CONFIG_PCI_EXPRESS */
+
/*
* Before we decide to use direct hardware access mechanisms, we try to
do some
* trivial checks to ensure it at least _seems_ to be working -- we
just test
@@ -244,7 +298,30 @@
static int __init pci_direct_init(void)
{
struct resource *region, *region2;
+
+#ifdef CONFIG_PCI_EXPRESS
+ if ((pci_probe & PCI_PROBE_ENHANCED) == 0)
+ goto type1;
+ /*
+ * Check if platform we are running is PCI Express capable
+ */
+ if (mmcfg_base_address == 0) {
+ printk(KERN_INFO
+ "MCFG table entry is not found in ACPI
tables....\n"
+ "Not enabling Enhanced Configuration....\n");
+ goto type1;
+ }

+ /* Calculate the virtual address of the PTE */
+ mmcfg_virt_addr = (char *)fix_to_virt(FIX_PCIE_MCFG);
+
+ if (pci_sanity_check(&pci_express_conf)) {
+ printk(KERN_INFO "PCI: Using config type PCIExp\n");
+ raw_pci_ops = &pci_express_conf;
+ return 0;
+ }
+type1:
+#endif /* CONFIG_PCI_EXPRESS */
if ((pci_probe & PCI_PROBE_CONF1) == 0)
goto type2;
region = request_region(0xCF8, 8, "PCI conf1");
diff -Naur linux-2.6.0/arch/i386/pci/Makefile
linux_pciexpress/arch/i386/pci/Makefile
--- linux-2.6.0/arch/i386/pci/Makefile 2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/Makefile 2004-01-29
16:14:45.000000000 +0530
@@ -2,6 +2,7 @@

obj-$(CONFIG_PCI_BIOS) += pcbios.o
obj-$(CONFIG_PCI_DIRECT) += direct.o
+obj-$(CONFIG_PCI_EXPRESS) += direct.o

pci-y := fixup.o
pci-$(CONFIG_ACPI_PCI) += acpi.o
diff -Naur linux-2.6.0/arch/i386/pci/pci.h
linux_pciexpress/arch/i386/pci/pci.h
--- linux-2.6.0/arch/i386/pci/pci.h 2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/pci.h 2004-01-29
16:14:45.000000000 +0530
@@ -15,6 +15,11 @@
#define PCI_PROBE_BIOS 0x0001
#define PCI_PROBE_CONF1 0x0002
#define PCI_PROBE_CONF2 0x0004
+#ifdef CONFIG_PCI_EXPRESS
+#define PCI_PROBE_ENHANCED 0x0008
+#else
+#define PCI_PROBE_ENHANCED 0x0
+#endif
#define PCI_NO_SORT 0x0100
#define PCI_BIOS_SORT 0x0200
#define PCI_NO_CHECKS 0x0400
diff -Naur linux-2.6.0/drivers/acpi/tables.c
linux_pciexpress/drivers/acpi/tables.c
--- linux-2.6.0/drivers/acpi/tables.c 2003-12-18 08:28:46.000000000
+0530
+++ linux_pciexpress/drivers/acpi/tables.c 2004-01-29
16:14:08.000000000 +0530
@@ -58,6 +58,7 @@
[ACPI_SSDT] = "SSDT",
[ACPI_SPMI] = "SPMI",
[ACPI_HPET] = "HPET",
+ [ACPI_MCFG] = "MCFG",
};

/* System Description Table (RSDT/XSDT) */
diff -Naur linux-2.6.0/drivers/pci/pci.c
linux_pciexpress/drivers/pci/pci.c
--- linux-2.6.0/drivers/pci/pci.c 2003-12-18 08:28:38.000000000
+0530
+++ linux_pciexpress/drivers/pci/pci.c 2004-01-29 16:13:58.000000000
+0530
@@ -90,6 +90,7 @@
* %PCI_CAP_ID_CHSWP CompactPCI HotSwap
*
* %PCI_CAP_ID_PCIX PCI-X
+ * %PCI_CAP_ID_EXP PCI-EXP
*/
int
pci_find_capability(struct pci_dev *dev, int cap)
diff -Naur linux-2.6.0/drivers/pci/probe.c
linux_pciexpress/drivers/pci/probe.c
--- linux-2.6.0/drivers/pci/probe.c 2003-12-18 08:29:06.000000000
+0530
+++ linux_pciexpress/drivers/pci/probe.c 2004-01-29
16:13:58.000000000 +0530
@@ -17,6 +17,8 @@

#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */
#define CARDBUS_RESERVE_BUSNR 3
+#define PCI_CFG_SPACE_SIZE 256
+#define PCI_CFG_SPACE_EXP_SIZE 4096

/* Ugh. Need to stop exporting this to modules. */
LIST_HEAD(pci_root_buses);
@@ -479,6 +481,21 @@
kfree(pci_dev);
}

+/*
+ * pci_cfg_space_size - get the configuration space size of the PCI
device
+ */
+static int pci_cfg_space_size(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_EXPRESS
+ /* Find whether the device is PCI Express device */
+ int is_pci_express_dev =
+ pci_find_capability(dev, PCI_CAP_ID_EXP);
+ if (is_pci_express_dev)
+ return PCI_CFG_SPACE_EXP_SIZE;
+#endif
+ return PCI_CFG_SPACE_SIZE;
+}
+
/*
* Read the config data for a PCI device, sanity-check it
* and fill in the dev structure...
@@ -515,6 +532,7 @@
dev->multifunction = !!(hdr_type & 0x80);
dev->vendor = l & 0xffff;
dev->device = (l >> 16) & 0xffff;
+ dev->cfg_size = pci_cfg_space_size(dev);

/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
set this higher, assuming the system even supports it. */
diff -Naur linux-2.6.0/drivers/pci/proc.c
linux_pciexpress/drivers/pci/proc.c
--- linux-2.6.0/drivers/pci/proc.c 2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/drivers/pci/proc.c 2004-01-29 16:13:58.000000000
+0530
@@ -16,14 +16,15 @@
#include <asm/uaccess.h>
#include <asm/byteorder.h>

-#define PCI_CFG_SPACE_SIZE 256
-
static int proc_initialized; /* = 0 */

static loff_t
proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
{
loff_t new = -1;
+ const struct inode *ino = file->f_dentry->d_inode;
+ const struct proc_dir_entry *dp = PDE(ino);
+ struct pci_dev *dev = dp->data;

lock_kernel();
switch (whence) {
@@ -34,11 +35,11 @@
new = file->f_pos + off;
break;
case 2:
- new = PCI_CFG_SPACE_SIZE + off;
+ new = dev->cfg_size + off;
break;
}
unlock_kernel();
- if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+ if (new < 0 || new > dev->cfg_size)
return -EINVAL;
return (file->f_pos = new);
}
@@ -59,7 +60,7 @@
*/

if (capable(CAP_SYS_ADMIN))
- size = PCI_CFG_SPACE_SIZE;
+ size = dev->cfg_size;
else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
size = 128;
else
@@ -133,13 +134,14 @@
struct pci_dev *dev = dp->data;
int pos = *ppos;
int cnt;
+ int size = dev->cfg_size;

- if (pos >= PCI_CFG_SPACE_SIZE)
+ if (pos >= size)
return 0;
- if (nbytes >= PCI_CFG_SPACE_SIZE)
- nbytes = PCI_CFG_SPACE_SIZE;
- if (pos + nbytes > PCI_CFG_SPACE_SIZE)
- nbytes = PCI_CFG_SPACE_SIZE - pos;
+ if (nbytes >= size)
+ nbytes = size;
+ if (pos + nbytes > size)
+ nbytes = size - pos;
cnt = nbytes;

if (!access_ok(VERIFY_READ, buf, cnt))
@@ -401,7 +403,7 @@
return -ENOMEM;
e->proc_fops = &proc_bus_pci_operations;
e->data = dev;
- e->size = PCI_CFG_SPACE_SIZE;
+ e->size = dev->cfg_size;

return 0;
}
diff -Naur linux-2.6.0/include/asm-i386/fixmap.h
linux_pciexpress/include/asm-i386/fixmap.h
--- linux-2.6.0/include/asm-i386/fixmap.h 2003-12-18
08:28:06.000000000 +0530
+++ linux_pciexpress/include/asm-i386/fixmap.h 2004-01-29
16:15:38.000000000 +0530
@@ -67,6 +67,9 @@
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings
*/
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
#endif
+#ifdef CONFIG_PCI_EXPRESS
+ FIX_PCIE_MCFG,
+#endif
#ifdef CONFIG_ACPI_BOOT
FIX_ACPI_BEGIN,
FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
diff -Naur linux-2.6.0/include/asm-i386/pci.h
linux_pciexpress/include/asm-i386/pci.h
--- linux-2.6.0/include/asm-i386/pci.h 2003-12-18 08:28:47.000000000
+0530
+++ linux_pciexpress/include/asm-i386/pci.h 2004-01-29
16:15:39.000000000 +0530
@@ -96,4 +96,69 @@
/* generic pci stuff */
#include <asm-generic/pci.h>

+#ifdef CONFIG_PCI_EXPRESS
+extern spinlock_t pci_config_lock;
+
+/*
+ * Variable used to store the base address of the last PCI Express
device
+ * accessed.
+ */
+extern u32 pcie_last_accessed_device;
+
+/*
+ * Variable used to store the base address of the chipset
+ */
+extern u32 mmcfg_base_address;
+
+/*
+ * Variable used to store the virtual address of fixed PTE
+ */
+extern char *mmcfg_virt_addr;
+
+static inline void pci_exp_set_dev_base(int bus, int devfn)
+{
+ u32 dev_base =
+ mmcfg_base_address | (bus << 20) | (devfn << 12);
+ if (dev_base != pcie_last_accessed_device) {
+ pcie_last_accessed_device = dev_base;
+ set_fixmap(FIX_PCIE_MCFG, dev_base);
+ }
+}
+
+static inline void pci_express_read(int bus, int devfn, int reg,
+ int len, u32 *value)
+{
+ pci_exp_set_dev_base(bus, devfn);
+ switch (len) {
+ case 1:
+ *value = (u8)readb(mmcfg_virt_addr + reg);
+ break;
+ case 2:
+ *value = (u16)readw(mmcfg_virt_addr + reg);
+ break;
+ case 4:
+ *value = (u32)readl(mmcfg_virt_addr + reg);
+ break;
+ }
+}
+
+static inline void pci_express_write(int bus, int devfn, int reg,
+ int len, u32 value)
+{
+ pci_exp_set_dev_base(bus, devfn);
+ switch (len) {
+ case 1:
+ writeb(value, mmcfg_virt_addr + reg);
+ break;
+ case 2:
+ writew(value, mmcfg_virt_addr + reg);
+ break;
+ case 4:
+ writel(value, mmcfg_virt_addr + reg);
+ break;
+ }
+ /* Dummy read to flush PCI write */
+ readl(mmcfg_virt_addr);
+}
+#endif /* CONFIG_PCI_EXPRESS */
#endif /* __i386_PCI_H */
diff -Naur linux-2.6.0/include/linux/acpi.h
linux_pciexpress/include/linux/acpi.h
--- linux-2.6.0/include/linux/acpi.h 2003-12-18 08:27:58.000000000
+0530
+++ linux_pciexpress/include/linux/acpi.h 2004-01-29
16:15:20.000000000 +0530
@@ -317,6 +317,13 @@
char ec_id[0];
} __attribute__ ((packed));

+struct acpi_table_mcfg {
+ struct acpi_table_header header;
+ u8 reserved[8];
+ u32 base_address;
+ u32 base_reserved;
+} __attribute__ ((packed));
+
/* Table Handlers */

enum acpi_table_id {
@@ -338,6 +345,7 @@
ACPI_SSDT,
ACPI_SPMI,
ACPI_HPET,
+ ACPI_MCFG,
ACPI_TABLE_COUNT
};

@@ -437,4 +445,7 @@

#endif /*!CONFIG_ACPI_INTERPRETER*/

+#ifdef CONFIG_PCI_EXPRESS
+extern u32 mmcfg_base_address;
+#endif
#endif /*_LINUX_ACPI_H*/
diff -Naur linux-2.6.0/include/linux/pci.h
linux_pciexpress/include/linux/pci.h
--- linux-2.6.0/include/linux/pci.h 2003-12-18 08:28:49.000000000
+0530
+++ linux_pciexpress/include/linux/pci.h 2004-01-29
16:43:01.000000000 +0530
@@ -198,6 +198,7 @@
#define PCI_CAP_ID_MSI 0x05 /* Message Signalled
Interrupts */
#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */
#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */
+#define PCI_CAP_ID_EXP 0x10 /* PCI-EXPANDED */
#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list
*/
#define PCI_CAP_FLAGS 2 /* Capability defined flags (16
bits) */
#define PCI_CAP_SIZEOF 4
@@ -424,6 +425,7 @@
#define PCI_NAME_HALF __stringify(20) /* less than half to handle slop
*/
char pretty_name[PCI_NAME_SIZE]; /* pretty name
for users to see */
#endif
+ int cfg_size;
};

#define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)


2004-01-29 15:09:50

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

On Thu, Jan 29, 2004 at 05:02:39PM +0530, Durairaj, Sundarapandian wrote:
> Please review this updated patch and send your comments.

Here's a rewrite of Sundarapandian Durairaj's patch for accessing extended
PCI configuration space. Changes of note:

- Forward-ported to 2.6.2-rc2
- Renamed most of the 'Express' to 'MMCONFIG' since that is what we're
actually doing (and it would seem to be the same for PCI-X 2.0)
- Separate out the mmconfig accesses into its own file rather than lumping
them in with direct. Inline the bits from include/asm-i386/pci.h.
- Request the memory region we're going to use for MMCONFIG accesses.
- Remove the EXPERIMENTAL tag.
- Add support in sysfs for the extended config space.
- Use i_size in proc_bus_pci_lseek().
- Move cfg_size to where it will pack better in pci_dev.

Comments? I don't have any x86 PCI-E hardware, so it's not even
slightly tested.

Greg, I'm not entirely keen on the sysfs patch I did, but I don't see
a better way of doing it right now.


diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/Kconfig pciexp-2.6/arch/i386/Kconfig
--- linus-2.6/arch/i386/Kconfig 2004-01-27 21:05:17.000000000 -0500
+++ pciexp-2.6/arch/i386/Kconfig 2004-01-29 09:16:22.000000000 -0500
@@ -1030,12 +1030,16 @@ config PCI_GOBIOS
PCI-based systems don't have any BIOS at all. Linux can also try to
detect the PCI hardware directly without using the BIOS.

- With this option, you can specify how Linux should detect the PCI
- devices. If you choose "BIOS", the BIOS will be used, if you choose
- "Direct", the BIOS won't be used, and if you choose "Any", the
- kernel will try the direct access method and falls back to the BIOS
- if that doesn't work. If unsure, go with the default, which is
- "Any".
+ With this option, you can specify how Linux should detect the
+ PCI devices. If you choose "BIOS", the BIOS will be used,
+ if you choose "Direct", the BIOS won't be used, and if you
+ choose "MMConfig", then PCI Express MMCONFIG will be used.
+ If you choose "Any", the kernel will try MMCONFIG, then the
+ direct access method and falls back to the BIOS if that doesn't
+ work. If unsure, go with the default, which is "Any".
+
+config PCI_GOMMCONFIG
+ bool "MMConfig"

config PCI_GODIRECT
bool "Direct"
@@ -1055,6 +1059,12 @@ config PCI_DIRECT
depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
default y

+config PCI_MMCONFIG
+ bool
+ depends on PCI && (PCI_GOMMCONFIG || PCI_GOANY)
+ select ACPI_BOOT
+ default y
+
config PCI_USE_VECTOR
bool "Vector-based interrupt indexing"
depends on X86_LOCAL_APIC && X86_IO_APIC
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/kernel/acpi/boot.c pciexp-2.6/arch/i386/kernel/acpi/boot.c
--- linus-2.6/arch/i386/kernel/acpi/boot.c 2004-01-07 18:02:42.000000000 -0500
+++ pciexp-2.6/arch/i386/kernel/acpi/boot.c 2004-01-29 09:13:51.000000000 -0500
@@ -95,6 +95,27 @@ char *__acpi_map_table(unsigned long phy
}


+#ifdef CONFIG_PCI_MMCONFIG
+static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_mcfg *mcfg;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
+ if (!mcfg) {
+ printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+ return -ENODEV;
+ }
+
+ if (mcfg->base_address)
+ pci_mmcfg_base_addr = mcfg->base_address;
+
+ return 0;
+}
+#endif /* CONFIG_PCI_MMCONFIG */
+
#ifdef CONFIG_X86_LOCAL_APIC

static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -515,6 +536,19 @@ acpi_boot_init (void)

#endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */

+#ifdef CONFIG_PCI_MMCONFIG
+ result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+ if (!result) {
+ printk(KERN_WARNING PREFIX "MCFG not present\n");
+ return 0;
+ } else if (result < 0) {
+ printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+ return result;
+ } else if (result > 1) {
+ printk(KERN_WARNING PREFIX "Multiple MCFG tables exist\n");
+ }
+#endif /* CONFIG_PCI_MMCONFIG */
+
#ifdef CONFIG_X86_LOCAL_APIC
if (acpi_lapic && acpi_ioapic) {
smp_found_config = 1;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/Makefile pciexp-2.6/arch/i386/pci/Makefile
--- linus-2.6/arch/i386/pci/Makefile 2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/Makefile 2004-01-29 08:11:28.000000000 -0500
@@ -1,6 +1,7 @@
obj-y := i386.o

obj-$(CONFIG_PCI_BIOS) += pcbios.o
+obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
obj-$(CONFIG_PCI_DIRECT) += direct.o

pci-y := fixup.o
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/common.c pciexp-2.6/arch/i386/pci/common.c
--- linus-2.6/arch/i386/pci/common.c 2003-09-08 17:41:32.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/common.c 2004-01-29 08:11:19.000000000 -0500
@@ -19,7 +19,8 @@
extern void pcibios_sort(void);
#endif

-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
+ PCI_PROBE_MMCONF;

int pcibios_last_bus = -1;
struct pci_bus *pci_root_bus = NULL;
@@ -197,6 +198,12 @@ char * __devinit pcibios_setup(char *st
return NULL;
}
#endif
+#ifdef CONFIG_PCI_MMCONFIG
+ else if (!strcmp(str, "nommconf")) {
+ pci_probe &= ~PCI_PROBE_MMCONF;
+ return NULL;
+ }
+#endif
#ifdef CONFIG_ACPI_PCI
else if (!strcmp(str, "noacpi")) {
pci_probe |= PCI_NO_ACPI_ROUTING;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/mmconfig.c pciexp-2.6/arch/i386/pci/mmconfig.c
--- linus-2.6/arch/i386/pci/mmconfig.c 1969-12-31 19:00:00.000000000 -0500
+++ pciexp-2.6/arch/i386/pci/mmconfig.c 2004-01-29 09:14:34.000000000 -0500
@@ -0,0 +1,116 @@
+/*
+ * mmconfig.c - Low-level direct PCI config space access via MMCONFIG
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include "pci.h"
+
+/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
+u32 pci_mmcfg_base_addr;
+
+/* The virtual address of the fixed PTE */
+static char *mmcfg_virt_addr;
+
+/* The base address of the last MMCONFIG device accessed */
+static u32 mmcfg_last_accessed_device;
+
+/*
+ * Functions for accessing PCI configuration space with MMCONFIG accesses
+ */
+
+static inline void pci_exp_set_dev_base(int bus, int devfn)
+{
+ u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12);
+ if (dev_base != mmcfg_last_accessed_device) {
+ mmcfg_last_accessed_device = dev_base;
+ set_fixmap(FIX_PCIE_MCFG, dev_base);
+ }
+}
+
+static int pci_mmcfg_read(int seg, int bus, int devfn, int reg, int len, u32 *value)
+{
+ unsigned long flags;
+
+ if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ pci_exp_set_dev_base(bus, devfn);
+
+ switch (len) {
+ case 1:
+ *value = readb(mmcfg_virt_addr + reg);
+ break;
+ case 2:
+ *value = readw(mmcfg_virt_addr + reg);
+ break;
+ case 4:
+ *value = readl(mmcfg_virt_addr + reg);
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static int pci_mmcfg_write(int seg, int bus, int devfn, int reg, int len, u32 value)
+{
+ unsigned long flags;
+
+ if ((bus > 255) || (devfn > 255) || (reg > 4095))
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ pci_exp_set_dev_base(bus, devfn);
+
+ switch (len) {
+ case 1:
+ writeb(value, mmcfg_virt_addr + reg);
+ break;
+ case 2:
+ writew(value, mmcfg_virt_addr + reg);
+ break;
+ case 4:
+ writel(value, mmcfg_virt_addr + reg);
+ break;
+ }
+
+ /* Dummy read to flush PCI write */
+ readl(mmcfg_virt_addr);
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static struct pci_raw_ops pci_mmcfg = {
+ .read = pci_mmcfg_read,
+ .write = pci_mmcfg_write,
+};
+
+static int __init pci_mmcfg_init(void)
+{
+ struct resource *region;
+
+ if ((pci_probe & PCI_PROBE_MMCONF) == 0)
+ goto out;
+ if (!pci_mmcfg_base_addr)
+ goto out;
+ region = request_mem_region(pci_mmcfg_base_addr, 256 * 1024 * 1024,
+ "PCI MMCONFIG");
+ if (!region)
+ goto out;
+
+ printk(KERN_INFO "PCI: Using MMCONFIG\n");
+ raw_pci_ops = &pci_mmcfg;
+ pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+
+ out:
+ return 0;
+}
+
+arch_initcall(pci_mmcfg_init);
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/pci.h pciexp-2.6/arch/i386/pci/pci.h
--- linus-2.6/arch/i386/pci/pci.h 2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/pci.h 2004-01-29 08:14:48.000000000 -0500
@@ -15,6 +15,9 @@
#define PCI_PROBE_BIOS 0x0001
#define PCI_PROBE_CONF1 0x0002
#define PCI_PROBE_CONF2 0x0004
+#define PCI_PROBE_MMCONF 0x0008
+#define PCI_PROBE_MASK 0x000f
+
#define PCI_NO_SORT 0x0100
#define PCI_BIOS_SORT 0x0200
#define PCI_NO_CHECKS 0x0400
diff -urpNX build-tools/dontdiff linus-2.6/drivers/acpi/tables.c pciexp-2.6/drivers/acpi/tables.c
--- linus-2.6/drivers/acpi/tables.c 2003-10-08 16:52:16.000000000 -0400
+++ pciexp-2.6/drivers/acpi/tables.c 2004-01-29 08:22:52.000000000 -0500
@@ -58,6 +58,7 @@ static char *acpi_table_signatures[ACPI_
[ACPI_SSDT] = "SSDT",
[ACPI_SPMI] = "SPMI",
[ACPI_HPET] = "HPET",
+ [ACPI_MCFG] = "MCFG",
};

/* System Description Table (RSDT/XSDT) */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci-sysfs.c pciexp-2.6/drivers/pci/pci-sysfs.c
--- linus-2.6/drivers/pci/pci-sysfs.c 2003-08-22 22:46:57.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci-sysfs.c 2004-01-29 09:30:43.000000000 -0500
@@ -71,7 +71,7 @@ pci_read_config(struct kobject *kobj, ch

/* Several chips lock up trying to read undefined config space */
if (capable(CAP_SYS_ADMIN)) {
- size = 256;
+ size = dev->cfg_size;
} else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
size = 128;
}
@@ -123,10 +123,10 @@ pci_write_config(struct kobject *kobj, c
unsigned int size = count;
loff_t init_off = off;

- if (off > 256)
+ if (off > dev->cfg_size)
return 0;
- if (off + count > 256) {
- size = 256 - off;
+ if (off + count > dev->cfg_size) {
+ size = dev->cfg_size - off;
count = size;
}

@@ -166,6 +166,16 @@ static struct bin_attribute pci_config_a
.write = pci_write_config,
};

+static struct bin_attribute pcie_config_attr = {
+ .attr = {
+ .name = "config",
+ .mode = S_IRUGO | S_IWUSR,
+ },
+ .size = 4096,
+ .read = pci_read_config,
+ .write = pci_write_config,
+};
+
void pci_create_sysfs_dev_files (struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
@@ -178,5 +188,9 @@ void pci_create_sysfs_dev_files (struct
device_create_file (dev, &dev_attr_class);
device_create_file (dev, &dev_attr_irq);
device_create_file (dev, &dev_attr_resource);
- sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+ if (pdev->cfg_size < 4096) {
+ sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+ } else {
+ sysfs_create_bin_file(&dev->kobj, &pcie_config_attr);
+ }
}
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci.c pciexp-2.6/drivers/pci/pci.c
--- linus-2.6/drivers/pci/pci.c 2003-10-08 16:52:35.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci.c 2004-01-29 08:23:57.000000000 -0500
@@ -90,6 +90,8 @@ pci_max_busnr(void)
* %PCI_CAP_ID_CHSWP CompactPCI HotSwap
*
* %PCI_CAP_ID_PCIX PCI-X
+ *
+ * %PCI_CAP_ID_EXP PCI Express
*/
int
pci_find_capability(struct pci_dev *dev, int cap)
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/probe.c pciexp-2.6/drivers/pci/probe.c
--- linus-2.6/drivers/pci/probe.c 2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/probe.c 2004-01-29 08:59:46.000000000 -0500
@@ -17,6 +17,8 @@

#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */
#define CARDBUS_RESERVE_BUSNR 3
+#define PCI_CFG_SPACE_SIZE 256
+#define PCI_CFG_SPACE_EXP_SIZE 4096

/* Ugh. Need to stop exporting this to modules. */
LIST_HEAD(pci_root_buses);
@@ -479,6 +481,20 @@ static void pci_release_dev(struct devic
kfree(pci_dev);
}

+/**
+ * pci_cfg_space_size - get the configuration space size of the PCI device
+ */
+static int pci_cfg_space_size(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_MMCONFIG
+ /* Find whether the device is PCI Express */
+ int is_pci_express_dev = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ if (is_pci_express_dev)
+ return PCI_CFG_SPACE_EXP_SIZE;
+#endif
+ return PCI_CFG_SPACE_SIZE;
+}
+
/*
* Read the config data for a PCI device, sanity-check it
* and fill in the dev structure...
@@ -515,6 +531,7 @@ pci_scan_device(struct pci_bus *bus, int
dev->multifunction = !!(hdr_type & 0x80);
dev->vendor = l & 0xffff;
dev->device = (l >> 16) & 0xffff;
+ dev->cfg_size = pci_cfg_space_size(dev);

/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
set this higher, assuming the system even supports it. */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/proc.c pciexp-2.6/drivers/pci/proc.c
--- linus-2.6/drivers/pci/proc.c 2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/proc.c 2004-01-29 08:38:49.000000000 -0500
@@ -16,16 +16,15 @@
#include <asm/uaccess.h>
#include <asm/byteorder.h>

-#define PCI_CFG_SPACE_SIZE 256
-
static int proc_initialized; /* = 0 */

static loff_t
proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
{
loff_t new = -1;
+ struct inode *inode = file->f_dentry->d_inode;

- down(&file->f_dentry->d_inode->i_sem);
+ down(&inode->i_sem);
switch (whence) {
case 0:
new = off;
@@ -34,14 +33,14 @@ proc_bus_pci_lseek(struct file *file, lo
new = file->f_pos + off;
break;
case 2:
- new = PCI_CFG_SPACE_SIZE + off;
+ new = inode->i_size + off;
break;
}
- if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+ if (new < 0 || new > inode->i_size)
new = -EINVAL;
else
file->f_pos = new;
- up(&file->f_dentry->d_inode->i_sem);
+ up(&inode->i_sem);
return new;
}

@@ -61,7 +60,7 @@ proc_bus_pci_read(struct file *file, cha
*/

if (capable(CAP_SYS_ADMIN))
- size = PCI_CFG_SPACE_SIZE;
+ size = dev->cfg_size;
else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
size = 128;
else
@@ -134,14 +133,15 @@ proc_bus_pci_write(struct file *file, co
const struct proc_dir_entry *dp = PDE(ino);
struct pci_dev *dev = dp->data;
int pos = *ppos;
+ int size = dev->cfg_size;
int cnt;

- if (pos >= PCI_CFG_SPACE_SIZE)
+ if (pos >= size)
return 0;
- if (nbytes >= PCI_CFG_SPACE_SIZE)
- nbytes = PCI_CFG_SPACE_SIZE;
- if (pos + nbytes > PCI_CFG_SPACE_SIZE)
- nbytes = PCI_CFG_SPACE_SIZE - pos;
+ if (nbytes >= size)
+ nbytes = size;
+ if (pos + nbytes > size)
+ nbytes = size - pos;
cnt = nbytes;

if (!access_ok(VERIFY_READ, buf, cnt))
@@ -403,7 +403,7 @@ int pci_proc_attach_device(struct pci_de
return -ENOMEM;
e->proc_fops = &proc_bus_pci_operations;
e->data = dev;
- e->size = PCI_CFG_SPACE_SIZE;
+ e->size = dev->cfg_size;

return 0;
}
diff -urpNX build-tools/dontdiff linus-2.6/include/asm-i386/fixmap.h pciexp-2.6/include/asm-i386/fixmap.h
--- linus-2.6/include/asm-i386/fixmap.h 2003-07-29 13:01:54.000000000 -0400
+++ pciexp-2.6/include/asm-i386/fixmap.h 2004-01-29 08:40:21.000000000 -0500
@@ -71,6 +71,9 @@ enum fixed_addresses {
FIX_ACPI_BEGIN,
FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
#endif
+#ifdef CONFIG_PCI_MMCONFIG
+ FIX_PCIE_MCFG,
+#endif
__end_of_permanent_fixed_addresses,
/* temporary boot-time mappings, used before ioremap() is functional */
#define NR_FIX_BTMAPS 16
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/acpi.h pciexp-2.6/include/linux/acpi.h
--- linus-2.6/include/linux/acpi.h 2003-10-08 16:53:03.000000000 -0400
+++ pciexp-2.6/include/linux/acpi.h 2004-01-29 08:46:48.000000000 -0500
@@ -317,6 +317,15 @@ struct acpi_table_ecdt {
char ec_id[0];
} __attribute__ ((packed));

+/* PCI MMCONFIG */
+
+struct acpi_table_mcfg {
+ struct acpi_table_header header;
+ u8 reserved[8];
+ u32 base_address;
+ u32 base_reserved;
+} __attribute__ ((packed));
+
/* Table Handlers */

enum acpi_table_id {
@@ -338,6 +347,7 @@ enum acpi_table_id {
ACPI_SSDT,
ACPI_SPMI,
ACPI_HPET,
+ ACPI_MCFG,
ACPI_TABLE_COUNT
};

@@ -369,6 +379,8 @@ void acpi_numa_arch_fixup(void);

extern int acpi_mp_config;

+extern u32 pci_mmcfg_base_addr;
+
#else /*!CONFIG_ACPI_BOOT*/

#define acpi_mp_config 0
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/pci.h pciexp-2.6/include/linux/pci.h
--- linus-2.6/include/linux/pci.h 2004-01-27 21:05:48.000000000 -0500
+++ pciexp-2.6/include/linux/pci.h 2004-01-29 09:13:20.000000000 -0500
@@ -410,6 +410,8 @@ struct pci_dev {
unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE];
unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE];

+ int cfg_size; /* Size of configuration space */
+
/*
* Instead of touching interrupt line and base address registers
* directly, use the values stored here. They might be different!

--
"Next the statesmen will invent cheap lies, putting the blame upon
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince
himself that the war is just, and will thank God for the better sleep
he enjoys after this process of grotesque self-deception." -- Mark Twain

2004-01-29 15:59:47

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

On Thu, Jan 29, 2004 at 03:09:25PM +0000, Matthew Wilcox wrote:
> On Thu, Jan 29, 2004 at 05:02:39PM +0530, Durairaj, Sundarapandian wrote:
> > Please review this updated patch and send your comments.
>
> Here's a rewrite of Sundarapandian Durairaj's patch for accessing extended
> PCI configuration space. Changes of note:

Brian Gerst spotted a bug -- I'd forgotten to initialise mmcfg_virt_addr.
Updated patch:

diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/Kconfig pciexp-2.6/arch/i386/Kconfig
--- linus-2.6/arch/i386/Kconfig 2004-01-27 21:05:17.000000000 -0500
+++ pciexp-2.6/arch/i386/Kconfig 2004-01-29 09:16:22.000000000 -0500
@@ -1030,12 +1030,16 @@ config PCI_GOBIOS
PCI-based systems don't have any BIOS at all. Linux can also try to
detect the PCI hardware directly without using the BIOS.

- With this option, you can specify how Linux should detect the PCI
- devices. If you choose "BIOS", the BIOS will be used, if you choose
- "Direct", the BIOS won't be used, and if you choose "Any", the
- kernel will try the direct access method and falls back to the BIOS
- if that doesn't work. If unsure, go with the default, which is
- "Any".
+ With this option, you can specify how Linux should detect the
+ PCI devices. If you choose "BIOS", the BIOS will be used,
+ if you choose "Direct", the BIOS won't be used, and if you
+ choose "MMConfig", then PCI Express MMCONFIG will be used.
+ If you choose "Any", the kernel will try MMCONFIG, then the
+ direct access method and falls back to the BIOS if that doesn't
+ work. If unsure, go with the default, which is "Any".
+
+config PCI_GOMMCONFIG
+ bool "MMConfig"

config PCI_GODIRECT
bool "Direct"
@@ -1055,6 +1059,12 @@ config PCI_DIRECT
depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
default y

+config PCI_MMCONFIG
+ bool
+ depends on PCI && (PCI_GOMMCONFIG || PCI_GOANY)
+ select ACPI_BOOT
+ default y
+
config PCI_USE_VECTOR
bool "Vector-based interrupt indexing"
depends on X86_LOCAL_APIC && X86_IO_APIC
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/kernel/acpi/boot.c pciexp-2.6/arch/i386/kernel/acpi/boot.c
--- linus-2.6/arch/i386/kernel/acpi/boot.c 2004-01-07 18:02:42.000000000 -0500
+++ pciexp-2.6/arch/i386/kernel/acpi/boot.c 2004-01-29 09:13:51.000000000 -0500
@@ -95,6 +95,27 @@ char *__acpi_map_table(unsigned long phy
}


+#ifdef CONFIG_PCI_MMCONFIG
+static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_mcfg *mcfg;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
+ if (!mcfg) {
+ printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+ return -ENODEV;
+ }
+
+ if (mcfg->base_address)
+ pci_mmcfg_base_addr = mcfg->base_address;
+
+ return 0;
+}
+#endif /* CONFIG_PCI_MMCONFIG */
+
#ifdef CONFIG_X86_LOCAL_APIC

static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -515,6 +536,19 @@ acpi_boot_init (void)

#endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */

+#ifdef CONFIG_PCI_MMCONFIG
+ result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+ if (!result) {
+ printk(KERN_WARNING PREFIX "MCFG not present\n");
+ return 0;
+ } else if (result < 0) {
+ printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+ return result;
+ } else if (result > 1) {
+ printk(KERN_WARNING PREFIX "Multiple MCFG tables exist\n");
+ }
+#endif /* CONFIG_PCI_MMCONFIG */
+
#ifdef CONFIG_X86_LOCAL_APIC
if (acpi_lapic && acpi_ioapic) {
smp_found_config = 1;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/Makefile pciexp-2.6/arch/i386/pci/Makefile
--- linus-2.6/arch/i386/pci/Makefile 2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/Makefile 2004-01-29 08:11:28.000000000 -0500
@@ -1,6 +1,7 @@
obj-y := i386.o

obj-$(CONFIG_PCI_BIOS) += pcbios.o
+obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
obj-$(CONFIG_PCI_DIRECT) += direct.o

pci-y := fixup.o
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/common.c pciexp-2.6/arch/i386/pci/common.c
--- linus-2.6/arch/i386/pci/common.c 2003-09-08 17:41:32.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/common.c 2004-01-29 08:11:19.000000000 -0500
@@ -19,7 +19,8 @@
extern void pcibios_sort(void);
#endif

-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
+ PCI_PROBE_MMCONF;

int pcibios_last_bus = -1;
struct pci_bus *pci_root_bus = NULL;
@@ -197,6 +198,12 @@ char * __devinit pcibios_setup(char *st
return NULL;
}
#endif
+#ifdef CONFIG_PCI_MMCONFIG
+ else if (!strcmp(str, "nommconf")) {
+ pci_probe &= ~PCI_PROBE_MMCONF;
+ return NULL;
+ }
+#endif
#ifdef CONFIG_ACPI_PCI
else if (!strcmp(str, "noacpi")) {
pci_probe |= PCI_NO_ACPI_ROUTING;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/mmconfig.c pciexp-2.6/arch/i386/pci/mmconfig.c
--- linus-2.6/arch/i386/pci/mmconfig.c 1969-12-31 19:00:00.000000000 -0500
+++ pciexp-2.6/arch/i386/pci/mmconfig.c 2004-01-29 10:50:30.000000000 -0500
@@ -0,0 +1,119 @@
+/*
+ * mmconfig.c - Low-level direct PCI config space access via MMCONFIG
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include "pci.h"
+
+/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
+u32 pci_mmcfg_base_addr;
+
+/* The virtual address of the fixed PTE */
+static unsigned long mmcfg_virt_addr;
+
+/* The base address of the last MMCONFIG device accessed */
+static u32 mmcfg_last_accessed_device;
+
+/*
+ * Functions for accessing PCI configuration space with MMCONFIG accesses
+ */
+
+static inline void pci_exp_set_dev_base(int bus, int devfn)
+{
+ u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12);
+ if (dev_base != mmcfg_last_accessed_device) {
+ mmcfg_last_accessed_device = dev_base;
+ set_fixmap(FIX_PCIE_MCFG, dev_base);
+ }
+}
+
+static int pci_mmcfg_read(int seg, int bus, int devfn, int reg, int len, u32 *value)
+{
+ unsigned long flags;
+
+ if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ pci_exp_set_dev_base(bus, devfn);
+
+ switch (len) {
+ case 1:
+ *value = readb(mmcfg_virt_addr + reg);
+ break;
+ case 2:
+ *value = readw(mmcfg_virt_addr + reg);
+ break;
+ case 4:
+ *value = readl(mmcfg_virt_addr + reg);
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static int pci_mmcfg_write(int seg, int bus, int devfn, int reg, int len, u32 value)
+{
+ unsigned long flags;
+
+ if ((bus > 255) || (devfn > 255) || (reg > 4095))
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ pci_exp_set_dev_base(bus, devfn);
+
+ switch (len) {
+ case 1:
+ writeb(value, mmcfg_virt_addr + reg);
+ break;
+ case 2:
+ writew(value, mmcfg_virt_addr + reg);
+ break;
+ case 4:
+ writel(value, mmcfg_virt_addr + reg);
+ break;
+ }
+
+ /* Dummy read to flush PCI write */
+ readl(mmcfg_virt_addr);
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static struct pci_raw_ops pci_mmcfg = {
+ .read = pci_mmcfg_read,
+ .write = pci_mmcfg_write,
+};
+
+static int __init pci_mmcfg_init(void)
+{
+ struct resource *region;
+
+ if ((pci_probe & PCI_PROBE_MMCONF) == 0)
+ goto out;
+ if (!pci_mmcfg_base_addr)
+ goto out;
+ region = request_mem_region(pci_mmcfg_base_addr, 256 * 1024 * 1024,
+ "PCI MMCONFIG");
+ if (!region)
+ goto out;
+
+ /* Calculate the virtual address of the PTE */
+ mmcfg_virt_addr = fix_to_virt(FIX_PCIE_MCFG);
+
+ printk(KERN_INFO "PCI: Using MMCONFIG\n");
+ raw_pci_ops = &pci_mmcfg;
+ pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+
+ out:
+ return 0;
+}
+
+arch_initcall(pci_mmcfg_init);
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/pci.h pciexp-2.6/arch/i386/pci/pci.h
--- linus-2.6/arch/i386/pci/pci.h 2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/pci.h 2004-01-29 08:14:48.000000000 -0500
@@ -15,6 +15,9 @@
#define PCI_PROBE_BIOS 0x0001
#define PCI_PROBE_CONF1 0x0002
#define PCI_PROBE_CONF2 0x0004
+#define PCI_PROBE_MMCONF 0x0008
+#define PCI_PROBE_MASK 0x000f
+
#define PCI_NO_SORT 0x0100
#define PCI_BIOS_SORT 0x0200
#define PCI_NO_CHECKS 0x0400
diff -urpNX build-tools/dontdiff linus-2.6/drivers/acpi/tables.c pciexp-2.6/drivers/acpi/tables.c
--- linus-2.6/drivers/acpi/tables.c 2003-10-08 16:52:16.000000000 -0400
+++ pciexp-2.6/drivers/acpi/tables.c 2004-01-29 08:22:52.000000000 -0500
@@ -58,6 +58,7 @@ static char *acpi_table_signatures[ACPI_
[ACPI_SSDT] = "SSDT",
[ACPI_SPMI] = "SPMI",
[ACPI_HPET] = "HPET",
+ [ACPI_MCFG] = "MCFG",
};

/* System Description Table (RSDT/XSDT) */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci-sysfs.c pciexp-2.6/drivers/pci/pci-sysfs.c
--- linus-2.6/drivers/pci/pci-sysfs.c 2003-08-22 22:46:57.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci-sysfs.c 2004-01-29 09:30:43.000000000 -0500
@@ -71,7 +71,7 @@ pci_read_config(struct kobject *kobj, ch

/* Several chips lock up trying to read undefined config space */
if (capable(CAP_SYS_ADMIN)) {
- size = 256;
+ size = dev->cfg_size;
} else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
size = 128;
}
@@ -123,10 +123,10 @@ pci_write_config(struct kobject *kobj, c
unsigned int size = count;
loff_t init_off = off;

- if (off > 256)
+ if (off > dev->cfg_size)
return 0;
- if (off + count > 256) {
- size = 256 - off;
+ if (off + count > dev->cfg_size) {
+ size = dev->cfg_size - off;
count = size;
}

@@ -166,6 +166,16 @@ static struct bin_attribute pci_config_a
.write = pci_write_config,
};

+static struct bin_attribute pcie_config_attr = {
+ .attr = {
+ .name = "config",
+ .mode = S_IRUGO | S_IWUSR,
+ },
+ .size = 4096,
+ .read = pci_read_config,
+ .write = pci_write_config,
+};
+
void pci_create_sysfs_dev_files (struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
@@ -178,5 +188,9 @@ void pci_create_sysfs_dev_files (struct
device_create_file (dev, &dev_attr_class);
device_create_file (dev, &dev_attr_irq);
device_create_file (dev, &dev_attr_resource);
- sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+ if (pdev->cfg_size < 4096) {
+ sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+ } else {
+ sysfs_create_bin_file(&dev->kobj, &pcie_config_attr);
+ }
}
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci.c pciexp-2.6/drivers/pci/pci.c
--- linus-2.6/drivers/pci/pci.c 2003-10-08 16:52:35.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci.c 2004-01-29 08:23:57.000000000 -0500
@@ -90,6 +90,8 @@ pci_max_busnr(void)
* %PCI_CAP_ID_CHSWP CompactPCI HotSwap
*
* %PCI_CAP_ID_PCIX PCI-X
+ *
+ * %PCI_CAP_ID_EXP PCI Express
*/
int
pci_find_capability(struct pci_dev *dev, int cap)
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/probe.c pciexp-2.6/drivers/pci/probe.c
--- linus-2.6/drivers/pci/probe.c 2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/probe.c 2004-01-29 08:59:46.000000000 -0500
@@ -17,6 +17,8 @@

#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */
#define CARDBUS_RESERVE_BUSNR 3
+#define PCI_CFG_SPACE_SIZE 256
+#define PCI_CFG_SPACE_EXP_SIZE 4096

/* Ugh. Need to stop exporting this to modules. */
LIST_HEAD(pci_root_buses);
@@ -479,6 +481,20 @@ static void pci_release_dev(struct devic
kfree(pci_dev);
}

+/**
+ * pci_cfg_space_size - get the configuration space size of the PCI device
+ */
+static int pci_cfg_space_size(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_MMCONFIG
+ /* Find whether the device is PCI Express */
+ int is_pci_express_dev = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ if (is_pci_express_dev)
+ return PCI_CFG_SPACE_EXP_SIZE;
+#endif
+ return PCI_CFG_SPACE_SIZE;
+}
+
/*
* Read the config data for a PCI device, sanity-check it
* and fill in the dev structure...
@@ -515,6 +531,7 @@ pci_scan_device(struct pci_bus *bus, int
dev->multifunction = !!(hdr_type & 0x80);
dev->vendor = l & 0xffff;
dev->device = (l >> 16) & 0xffff;
+ dev->cfg_size = pci_cfg_space_size(dev);

/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
set this higher, assuming the system even supports it. */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/proc.c pciexp-2.6/drivers/pci/proc.c
--- linus-2.6/drivers/pci/proc.c 2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/proc.c 2004-01-29 08:38:49.000000000 -0500
@@ -16,16 +16,15 @@
#include <asm/uaccess.h>
#include <asm/byteorder.h>

-#define PCI_CFG_SPACE_SIZE 256
-
static int proc_initialized; /* = 0 */

static loff_t
proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
{
loff_t new = -1;
+ struct inode *inode = file->f_dentry->d_inode;

- down(&file->f_dentry->d_inode->i_sem);
+ down(&inode->i_sem);
switch (whence) {
case 0:
new = off;
@@ -34,14 +33,14 @@ proc_bus_pci_lseek(struct file *file, lo
new = file->f_pos + off;
break;
case 2:
- new = PCI_CFG_SPACE_SIZE + off;
+ new = inode->i_size + off;
break;
}
- if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+ if (new < 0 || new > inode->i_size)
new = -EINVAL;
else
file->f_pos = new;
- up(&file->f_dentry->d_inode->i_sem);
+ up(&inode->i_sem);
return new;
}

@@ -61,7 +60,7 @@ proc_bus_pci_read(struct file *file, cha
*/

if (capable(CAP_SYS_ADMIN))
- size = PCI_CFG_SPACE_SIZE;
+ size = dev->cfg_size;
else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
size = 128;
else
@@ -134,14 +133,15 @@ proc_bus_pci_write(struct file *file, co
const struct proc_dir_entry *dp = PDE(ino);
struct pci_dev *dev = dp->data;
int pos = *ppos;
+ int size = dev->cfg_size;
int cnt;

- if (pos >= PCI_CFG_SPACE_SIZE)
+ if (pos >= size)
return 0;
- if (nbytes >= PCI_CFG_SPACE_SIZE)
- nbytes = PCI_CFG_SPACE_SIZE;
- if (pos + nbytes > PCI_CFG_SPACE_SIZE)
- nbytes = PCI_CFG_SPACE_SIZE - pos;
+ if (nbytes >= size)
+ nbytes = size;
+ if (pos + nbytes > size)
+ nbytes = size - pos;
cnt = nbytes;

if (!access_ok(VERIFY_READ, buf, cnt))
@@ -403,7 +403,7 @@ int pci_proc_attach_device(struct pci_de
return -ENOMEM;
e->proc_fops = &proc_bus_pci_operations;
e->data = dev;
- e->size = PCI_CFG_SPACE_SIZE;
+ e->size = dev->cfg_size;

return 0;
}
diff -urpNX build-tools/dontdiff linus-2.6/include/asm-i386/fixmap.h pciexp-2.6/include/asm-i386/fixmap.h
--- linus-2.6/include/asm-i386/fixmap.h 2003-07-29 13:01:54.000000000 -0400
+++ pciexp-2.6/include/asm-i386/fixmap.h 2004-01-29 08:40:21.000000000 -0500
@@ -71,6 +71,9 @@ enum fixed_addresses {
FIX_ACPI_BEGIN,
FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
#endif
+#ifdef CONFIG_PCI_MMCONFIG
+ FIX_PCIE_MCFG,
+#endif
__end_of_permanent_fixed_addresses,
/* temporary boot-time mappings, used before ioremap() is functional */
#define NR_FIX_BTMAPS 16
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/acpi.h pciexp-2.6/include/linux/acpi.h
--- linus-2.6/include/linux/acpi.h 2003-10-08 16:53:03.000000000 -0400
+++ pciexp-2.6/include/linux/acpi.h 2004-01-29 08:46:48.000000000 -0500
@@ -317,6 +317,15 @@ struct acpi_table_ecdt {
char ec_id[0];
} __attribute__ ((packed));

+/* PCI MMCONFIG */
+
+struct acpi_table_mcfg {
+ struct acpi_table_header header;
+ u8 reserved[8];
+ u32 base_address;
+ u32 base_reserved;
+} __attribute__ ((packed));
+
/* Table Handlers */

enum acpi_table_id {
@@ -338,6 +347,7 @@ enum acpi_table_id {
ACPI_SSDT,
ACPI_SPMI,
ACPI_HPET,
+ ACPI_MCFG,
ACPI_TABLE_COUNT
};

@@ -369,6 +379,8 @@ void acpi_numa_arch_fixup(void);

extern int acpi_mp_config;

+extern u32 pci_mmcfg_base_addr;
+
#else /*!CONFIG_ACPI_BOOT*/

#define acpi_mp_config 0
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/pci.h pciexp-2.6/include/linux/pci.h
--- linus-2.6/include/linux/pci.h 2004-01-27 21:05:48.000000000 -0500
+++ pciexp-2.6/include/linux/pci.h 2004-01-29 09:13:20.000000000 -0500
@@ -410,6 +410,8 @@ struct pci_dev {
unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE];
unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE];

+ int cfg_size; /* Size of configuration space */
+
/*
* Instead of touching interrupt line and base address registers
* directly, use the values stored here. They might be different!

--
"Next the statesmen will invent cheap lies, putting the blame upon
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince
himself that the war is just, and will thank God for the better sleep
he enjoys after this process of grotesque self-deception." -- Mark Twain

2004-01-29 16:06:00

by Linus Torvalds

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11



On Thu, 29 Jan 2004, Matthew Wilcox wrote:
>
> Brian Gerst spotted a bug -- I'd forgotten to initialise mmcfg_virt_addr.

The compiler _should_ entirely compile away "fix_to_virt(xxx)", so by
creating a variable for the value, you're actually making code generation
worse. You might as well have

#define mmcfg_virt_addr (fix_to_virt(FIX_PCIE_MCFG))

instead.

That said, this patch looks perfectly acceptable to me. With some testing,
I'd take it through Greg or -mm.

Linus

2004-01-29 16:43:47

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

On Thu, Jan 29, 2004 at 08:05:52AM -0800, Linus Torvalds wrote:
> The compiler _should_ entirely compile away "fix_to_virt(xxx)", so by
> creating a variable for the value, you're actually making code generation
> worse. You might as well have
>
> #define mmcfg_virt_addr (fix_to_virt(FIX_PCIE_MCFG))
>
> instead.

Ahh, I missed the comment towards the top of fixmap.h that this is a
constant address. You're so smart sometimes ;-)

> That said, this patch looks perfectly acceptable to me. With some testing,
> I'd take it through Greg or -mm.

Cool. Here's the final version for testing then.

diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/Kconfig pciexp-2.6/arch/i386/Kconfig
--- linus-2.6/arch/i386/Kconfig 2004-01-27 21:05:17.000000000 -0500
+++ pciexp-2.6/arch/i386/Kconfig 2004-01-29 09:16:22.000000000 -0500
@@ -1030,12 +1030,16 @@ config PCI_GOBIOS
PCI-based systems don't have any BIOS at all. Linux can also try to
detect the PCI hardware directly without using the BIOS.

- With this option, you can specify how Linux should detect the PCI
- devices. If you choose "BIOS", the BIOS will be used, if you choose
- "Direct", the BIOS won't be used, and if you choose "Any", the
- kernel will try the direct access method and falls back to the BIOS
- if that doesn't work. If unsure, go with the default, which is
- "Any".
+ With this option, you can specify how Linux should detect the
+ PCI devices. If you choose "BIOS", the BIOS will be used,
+ if you choose "Direct", the BIOS won't be used, and if you
+ choose "MMConfig", then PCI Express MMCONFIG will be used.
+ If you choose "Any", the kernel will try MMCONFIG, then the
+ direct access method and falls back to the BIOS if that doesn't
+ work. If unsure, go with the default, which is "Any".
+
+config PCI_GOMMCONFIG
+ bool "MMConfig"

config PCI_GODIRECT
bool "Direct"
@@ -1055,6 +1059,12 @@ config PCI_DIRECT
depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
default y

+config PCI_MMCONFIG
+ bool
+ depends on PCI && (PCI_GOMMCONFIG || PCI_GOANY)
+ select ACPI_BOOT
+ default y
+
config PCI_USE_VECTOR
bool "Vector-based interrupt indexing"
depends on X86_LOCAL_APIC && X86_IO_APIC
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/kernel/acpi/boot.c pciexp-2.6/arch/i386/kernel/acpi/boot.c
--- linus-2.6/arch/i386/kernel/acpi/boot.c 2004-01-07 18:02:42.000000000 -0500
+++ pciexp-2.6/arch/i386/kernel/acpi/boot.c 2004-01-29 09:13:51.000000000 -0500
@@ -95,6 +95,27 @@ char *__acpi_map_table(unsigned long phy
}


+#ifdef CONFIG_PCI_MMCONFIG
+static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_mcfg *mcfg;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
+ if (!mcfg) {
+ printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+ return -ENODEV;
+ }
+
+ if (mcfg->base_address)
+ pci_mmcfg_base_addr = mcfg->base_address;
+
+ return 0;
+}
+#endif /* CONFIG_PCI_MMCONFIG */
+
#ifdef CONFIG_X86_LOCAL_APIC

static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -515,6 +536,19 @@ acpi_boot_init (void)

#endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */

+#ifdef CONFIG_PCI_MMCONFIG
+ result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+ if (!result) {
+ printk(KERN_WARNING PREFIX "MCFG not present\n");
+ return 0;
+ } else if (result < 0) {
+ printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+ return result;
+ } else if (result > 1) {
+ printk(KERN_WARNING PREFIX "Multiple MCFG tables exist\n");
+ }
+#endif /* CONFIG_PCI_MMCONFIG */
+
#ifdef CONFIG_X86_LOCAL_APIC
if (acpi_lapic && acpi_ioapic) {
smp_found_config = 1;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/Makefile pciexp-2.6/arch/i386/pci/Makefile
--- linus-2.6/arch/i386/pci/Makefile 2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/Makefile 2004-01-29 08:11:28.000000000 -0500
@@ -1,6 +1,7 @@
obj-y := i386.o

obj-$(CONFIG_PCI_BIOS) += pcbios.o
+obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
obj-$(CONFIG_PCI_DIRECT) += direct.o

pci-y := fixup.o
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/common.c pciexp-2.6/arch/i386/pci/common.c
--- linus-2.6/arch/i386/pci/common.c 2003-09-08 17:41:32.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/common.c 2004-01-29 08:11:19.000000000 -0500
@@ -19,7 +19,8 @@
extern void pcibios_sort(void);
#endif

-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
+ PCI_PROBE_MMCONF;

int pcibios_last_bus = -1;
struct pci_bus *pci_root_bus = NULL;
@@ -197,6 +198,12 @@ char * __devinit pcibios_setup(char *st
return NULL;
}
#endif
+#ifdef CONFIG_PCI_MMCONFIG
+ else if (!strcmp(str, "nommconf")) {
+ pci_probe &= ~PCI_PROBE_MMCONF;
+ return NULL;
+ }
+#endif
#ifdef CONFIG_ACPI_PCI
else if (!strcmp(str, "noacpi")) {
pci_probe |= PCI_NO_ACPI_ROUTING;
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/mmconfig.c pciexp-2.6/arch/i386/pci/mmconfig.c
--- linus-2.6/arch/i386/pci/mmconfig.c 1969-12-31 19:00:00.000000000 -0500
+++ pciexp-2.6/arch/i386/pci/mmconfig.c 2004-01-29 11:37:09.000000000 -0500
@@ -0,0 +1,115 @@
+/*
+ * mmconfig.c - Low-level direct PCI config space access via MMCONFIG
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include "pci.h"
+
+/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
+u32 pci_mmcfg_base_addr;
+
+#define mmcfg_virt_addr (fix_to_virt(FIX_PCIE_MCFG))
+
+/* The base address of the last MMCONFIG device accessed */
+static u32 mmcfg_last_accessed_device;
+
+/*
+ * Functions for accessing PCI configuration space with MMCONFIG accesses
+ */
+
+static inline void pci_exp_set_dev_base(int bus, int devfn)
+{
+ u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12);
+ if (dev_base != mmcfg_last_accessed_device) {
+ mmcfg_last_accessed_device = dev_base;
+ set_fixmap(FIX_PCIE_MCFG, dev_base);
+ }
+}
+
+static int pci_mmcfg_read(int seg, int bus, int devfn, int reg, int len, u32 *value)
+{
+ unsigned long flags;
+
+ if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ pci_exp_set_dev_base(bus, devfn);
+
+ switch (len) {
+ case 1:
+ *value = readb(mmcfg_virt_addr + reg);
+ break;
+ case 2:
+ *value = readw(mmcfg_virt_addr + reg);
+ break;
+ case 4:
+ *value = readl(mmcfg_virt_addr + reg);
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static int pci_mmcfg_write(int seg, int bus, int devfn, int reg, int len, u32 value)
+{
+ unsigned long flags;
+
+ if ((bus > 255) || (devfn > 255) || (reg > 4095))
+ return -EINVAL;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ pci_exp_set_dev_base(bus, devfn);
+
+ switch (len) {
+ case 1:
+ writeb(value, mmcfg_virt_addr + reg);
+ break;
+ case 2:
+ writew(value, mmcfg_virt_addr + reg);
+ break;
+ case 4:
+ writel(value, mmcfg_virt_addr + reg);
+ break;
+ }
+
+ /* Dummy read to flush PCI write */
+ readl(mmcfg_virt_addr);
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return 0;
+}
+
+static struct pci_raw_ops pci_mmcfg = {
+ .read = pci_mmcfg_read,
+ .write = pci_mmcfg_write,
+};
+
+static int __init pci_mmcfg_init(void)
+{
+ struct resource *region;
+
+ if ((pci_probe & PCI_PROBE_MMCONF) == 0)
+ goto out;
+ if (!pci_mmcfg_base_addr)
+ goto out;
+ region = request_mem_region(pci_mmcfg_base_addr, 256 * 1024 * 1024,
+ "PCI MMCONFIG");
+ if (!region)
+ goto out;
+
+ printk(KERN_INFO "PCI: Using MMCONFIG\n");
+ raw_pci_ops = &pci_mmcfg;
+ pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+
+ out:
+ return 0;
+}
+
+arch_initcall(pci_mmcfg_init);
diff -urpNX build-tools/dontdiff linus-2.6/arch/i386/pci/pci.h pciexp-2.6/arch/i386/pci/pci.h
--- linus-2.6/arch/i386/pci/pci.h 2003-07-29 13:00:27.000000000 -0400
+++ pciexp-2.6/arch/i386/pci/pci.h 2004-01-29 08:14:48.000000000 -0500
@@ -15,6 +15,9 @@
#define PCI_PROBE_BIOS 0x0001
#define PCI_PROBE_CONF1 0x0002
#define PCI_PROBE_CONF2 0x0004
+#define PCI_PROBE_MMCONF 0x0008
+#define PCI_PROBE_MASK 0x000f
+
#define PCI_NO_SORT 0x0100
#define PCI_BIOS_SORT 0x0200
#define PCI_NO_CHECKS 0x0400
diff -urpNX build-tools/dontdiff linus-2.6/drivers/acpi/tables.c pciexp-2.6/drivers/acpi/tables.c
--- linus-2.6/drivers/acpi/tables.c 2003-10-08 16:52:16.000000000 -0400
+++ pciexp-2.6/drivers/acpi/tables.c 2004-01-29 08:22:52.000000000 -0500
@@ -58,6 +58,7 @@ static char *acpi_table_signatures[ACPI_
[ACPI_SSDT] = "SSDT",
[ACPI_SPMI] = "SPMI",
[ACPI_HPET] = "HPET",
+ [ACPI_MCFG] = "MCFG",
};

/* System Description Table (RSDT/XSDT) */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci-sysfs.c pciexp-2.6/drivers/pci/pci-sysfs.c
--- linus-2.6/drivers/pci/pci-sysfs.c 2003-08-22 22:46:57.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci-sysfs.c 2004-01-29 09:30:43.000000000 -0500
@@ -71,7 +71,7 @@ pci_read_config(struct kobject *kobj, ch

/* Several chips lock up trying to read undefined config space */
if (capable(CAP_SYS_ADMIN)) {
- size = 256;
+ size = dev->cfg_size;
} else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
size = 128;
}
@@ -123,10 +123,10 @@ pci_write_config(struct kobject *kobj, c
unsigned int size = count;
loff_t init_off = off;

- if (off > 256)
+ if (off > dev->cfg_size)
return 0;
- if (off + count > 256) {
- size = 256 - off;
+ if (off + count > dev->cfg_size) {
+ size = dev->cfg_size - off;
count = size;
}

@@ -166,6 +166,16 @@ static struct bin_attribute pci_config_a
.write = pci_write_config,
};

+static struct bin_attribute pcie_config_attr = {
+ .attr = {
+ .name = "config",
+ .mode = S_IRUGO | S_IWUSR,
+ },
+ .size = 4096,
+ .read = pci_read_config,
+ .write = pci_write_config,
+};
+
void pci_create_sysfs_dev_files (struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
@@ -178,5 +188,9 @@ void pci_create_sysfs_dev_files (struct
device_create_file (dev, &dev_attr_class);
device_create_file (dev, &dev_attr_irq);
device_create_file (dev, &dev_attr_resource);
- sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+ if (pdev->cfg_size < 4096) {
+ sysfs_create_bin_file(&dev->kobj, &pci_config_attr);
+ } else {
+ sysfs_create_bin_file(&dev->kobj, &pcie_config_attr);
+ }
}
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/pci.c pciexp-2.6/drivers/pci/pci.c
--- linus-2.6/drivers/pci/pci.c 2003-10-08 16:52:35.000000000 -0400
+++ pciexp-2.6/drivers/pci/pci.c 2004-01-29 08:23:57.000000000 -0500
@@ -90,6 +90,8 @@ pci_max_busnr(void)
* %PCI_CAP_ID_CHSWP CompactPCI HotSwap
*
* %PCI_CAP_ID_PCIX PCI-X
+ *
+ * %PCI_CAP_ID_EXP PCI Express
*/
int
pci_find_capability(struct pci_dev *dev, int cap)
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/probe.c pciexp-2.6/drivers/pci/probe.c
--- linus-2.6/drivers/pci/probe.c 2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/probe.c 2004-01-29 08:59:46.000000000 -0500
@@ -17,6 +17,8 @@

#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */
#define CARDBUS_RESERVE_BUSNR 3
+#define PCI_CFG_SPACE_SIZE 256
+#define PCI_CFG_SPACE_EXP_SIZE 4096

/* Ugh. Need to stop exporting this to modules. */
LIST_HEAD(pci_root_buses);
@@ -479,6 +481,20 @@ static void pci_release_dev(struct devic
kfree(pci_dev);
}

+/**
+ * pci_cfg_space_size - get the configuration space size of the PCI device
+ */
+static int pci_cfg_space_size(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_MMCONFIG
+ /* Find whether the device is PCI Express */
+ int is_pci_express_dev = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ if (is_pci_express_dev)
+ return PCI_CFG_SPACE_EXP_SIZE;
+#endif
+ return PCI_CFG_SPACE_SIZE;
+}
+
/*
* Read the config data for a PCI device, sanity-check it
* and fill in the dev structure...
@@ -515,6 +531,7 @@ pci_scan_device(struct pci_bus *bus, int
dev->multifunction = !!(hdr_type & 0x80);
dev->vendor = l & 0xffff;
dev->device = (l >> 16) & 0xffff;
+ dev->cfg_size = pci_cfg_space_size(dev);

/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
set this higher, assuming the system even supports it. */
diff -urpNX build-tools/dontdiff linus-2.6/drivers/pci/proc.c pciexp-2.6/drivers/pci/proc.c
--- linus-2.6/drivers/pci/proc.c 2004-01-07 18:02:53.000000000 -0500
+++ pciexp-2.6/drivers/pci/proc.c 2004-01-29 08:38:49.000000000 -0500
@@ -16,16 +16,15 @@
#include <asm/uaccess.h>
#include <asm/byteorder.h>

-#define PCI_CFG_SPACE_SIZE 256
-
static int proc_initialized; /* = 0 */

static loff_t
proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
{
loff_t new = -1;
+ struct inode *inode = file->f_dentry->d_inode;

- down(&file->f_dentry->d_inode->i_sem);
+ down(&inode->i_sem);
switch (whence) {
case 0:
new = off;
@@ -34,14 +33,14 @@ proc_bus_pci_lseek(struct file *file, lo
new = file->f_pos + off;
break;
case 2:
- new = PCI_CFG_SPACE_SIZE + off;
+ new = inode->i_size + off;
break;
}
- if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+ if (new < 0 || new > inode->i_size)
new = -EINVAL;
else
file->f_pos = new;
- up(&file->f_dentry->d_inode->i_sem);
+ up(&inode->i_sem);
return new;
}

@@ -61,7 +60,7 @@ proc_bus_pci_read(struct file *file, cha
*/

if (capable(CAP_SYS_ADMIN))
- size = PCI_CFG_SPACE_SIZE;
+ size = dev->cfg_size;
else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
size = 128;
else
@@ -134,14 +133,15 @@ proc_bus_pci_write(struct file *file, co
const struct proc_dir_entry *dp = PDE(ino);
struct pci_dev *dev = dp->data;
int pos = *ppos;
+ int size = dev->cfg_size;
int cnt;

- if (pos >= PCI_CFG_SPACE_SIZE)
+ if (pos >= size)
return 0;
- if (nbytes >= PCI_CFG_SPACE_SIZE)
- nbytes = PCI_CFG_SPACE_SIZE;
- if (pos + nbytes > PCI_CFG_SPACE_SIZE)
- nbytes = PCI_CFG_SPACE_SIZE - pos;
+ if (nbytes >= size)
+ nbytes = size;
+ if (pos + nbytes > size)
+ nbytes = size - pos;
cnt = nbytes;

if (!access_ok(VERIFY_READ, buf, cnt))
@@ -403,7 +403,7 @@ int pci_proc_attach_device(struct pci_de
return -ENOMEM;
e->proc_fops = &proc_bus_pci_operations;
e->data = dev;
- e->size = PCI_CFG_SPACE_SIZE;
+ e->size = dev->cfg_size;

return 0;
}
diff -urpNX build-tools/dontdiff linus-2.6/include/asm-i386/fixmap.h pciexp-2.6/include/asm-i386/fixmap.h
--- linus-2.6/include/asm-i386/fixmap.h 2003-07-29 13:01:54.000000000 -0400
+++ pciexp-2.6/include/asm-i386/fixmap.h 2004-01-29 08:40:21.000000000 -0500
@@ -71,6 +71,9 @@ enum fixed_addresses {
FIX_ACPI_BEGIN,
FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
#endif
+#ifdef CONFIG_PCI_MMCONFIG
+ FIX_PCIE_MCFG,
+#endif
__end_of_permanent_fixed_addresses,
/* temporary boot-time mappings, used before ioremap() is functional */
#define NR_FIX_BTMAPS 16
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/acpi.h pciexp-2.6/include/linux/acpi.h
--- linus-2.6/include/linux/acpi.h 2003-10-08 16:53:03.000000000 -0400
+++ pciexp-2.6/include/linux/acpi.h 2004-01-29 08:46:48.000000000 -0500
@@ -317,6 +317,15 @@ struct acpi_table_ecdt {
char ec_id[0];
} __attribute__ ((packed));

+/* PCI MMCONFIG */
+
+struct acpi_table_mcfg {
+ struct acpi_table_header header;
+ u8 reserved[8];
+ u32 base_address;
+ u32 base_reserved;
+} __attribute__ ((packed));
+
/* Table Handlers */

enum acpi_table_id {
@@ -338,6 +347,7 @@ enum acpi_table_id {
ACPI_SSDT,
ACPI_SPMI,
ACPI_HPET,
+ ACPI_MCFG,
ACPI_TABLE_COUNT
};

@@ -369,6 +379,8 @@ void acpi_numa_arch_fixup(void);

extern int acpi_mp_config;

+extern u32 pci_mmcfg_base_addr;
+
#else /*!CONFIG_ACPI_BOOT*/

#define acpi_mp_config 0
diff -urpNX build-tools/dontdiff linus-2.6/include/linux/pci.h pciexp-2.6/include/linux/pci.h
--- linus-2.6/include/linux/pci.h 2004-01-27 21:05:48.000000000 -0500
+++ pciexp-2.6/include/linux/pci.h 2004-01-29 09:13:20.000000000 -0500
@@ -410,6 +410,8 @@ struct pci_dev {
unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE];
unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE];

+ int cfg_size; /* Size of configuration space */
+
/*
* Instead of touching interrupt line and base address registers
* directly, use the values stored here. They might be different!

--
"Next the statesmen will invent cheap lies, putting the blame upon
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince
himself that the war is just, and will thank God for the better sleep
he enjoys after this process of grotesque self-deception." -- Mark Twain

2004-01-29 16:53:33

by Linus Torvalds

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11



On Thu, 29 Jan 2004, Matthew Wilcox wrote:
>
> Ahh, I missed the comment towards the top of fixmap.h that this is a
> constant address. You're so smart sometimes ;-)

Hey, you'd better verify that the compiler doesn't do anything stupid (but
the good news is that if it doesn't inline the thing properly and do all
the constant folding, you should get a link-time failure about
"__this_fixmap_does_not_exist", so we should be fairly safe).

Linus

2004-01-29 18:11:15

by Greg KH

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

On Thu, Jan 29, 2004 at 08:05:52AM -0800, Linus Torvalds wrote:
>
> That said, this patch looks perfectly acceptable to me. With some testing,
> I'd take it through Greg or -mm.

It's looking much better. But I _really_ want to actually test this on
real hardware. As no one is shipping PCI Express hardware yet, there is
no rush to get this patch into the kernel tree.

Bill Irwin and I are working on getting some PCI Express hardware to
test this patch out on. We've been promised some for a while, hopefully
it turns up soon...

thanks,

greg k-h

2004-01-30 16:33:19

by Greg KH

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

On Thu, Jan 29, 2004 at 10:09:52AM -0800, Greg KH wrote:
> On Thu, Jan 29, 2004 at 08:05:52AM -0800, Linus Torvalds wrote:
> >
> > That said, this patch looks perfectly acceptable to me. With some testing,
> > I'd take it through Greg or -mm.
>
> It's looking much better. But I _really_ want to actually test this on
> real hardware. As no one is shipping PCI Express hardware yet, there is
> no rush to get this patch into the kernel tree.

Also, can someone from Intel test out Matthew's patch to make sure it
works properly for them on their hardware? It's much cleaner than the
last patch submitted by you all :)

thanks,

greg k-h

2004-01-31 22:05:20

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

Matthew Wilcox <[email protected]> writes:

> On Thu, Jan 29, 2004 at 08:05:52AM -0800, Linus Torvalds wrote:
> > The compiler _should_ entirely compile away "fix_to_virt(xxx)", so by
> > creating a variable for the value, you're actually making code generation
> > worse. You might as well have
> >
> > #define mmcfg_virt_addr (fix_to_virt(FIX_PCIE_MCFG))
> >
> > instead.
>
> Ahh, I missed the comment towards the top of fixmap.h that this is a
> constant address. You're so smart sometimes ;-)
>
> > That said, this patch looks perfectly acceptable to me. With some testing,
> > I'd take it through Greg or -mm.
>
> Cool. Here's the final version for testing then.

Is it really safe to treat the base address as a u32? I know
if I was doing the BIOS and that address was tied to a 32bit BAR I
would be extremely tempted to put those 256M of address space above
4G. Putting something like that below 4G leads to 1/2 Gig of memory
missing.

You can also put the memory above 4G on most intel chipsets but I'd
rather have my memory down low where my legacy OS could get to it
rather than have my PCI extended configuration space down low where
nothing really needs it.

Point being I don't think it is safe to assume the BIOS always puts
the extended PCI configuration space below 4G.

Eric

2004-02-01 04:41:06

by Grant Grundler

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

On Sat, Jan 31, 2004 at 02:57:29PM -0700, Eric W. Biederman wrote:
> Is it really safe to treat the base address as a u32?

Sorry...I missed this in the code...but the following confuses me:

> I know
> if I was doing the BIOS and that address was tied to a 32bit BAR I
> would be extremely tempted to put those 256M of address space above
> 4G.

uhmm, how can one put a 32-bit BAR above 4G?
You meant 64-bit BAR?

> Point being I don't think it is safe to assume the BIOS always puts
> the extended PCI configuration space below 4G.

where MMCONFIG lives is orthogonal to where BARs point to.
I'm pretty sure I missed the point...sorry.

grant