2004-01-22 10:23:34

by Durairaj, Sundarapandian

[permalink] [raw]
Subject: RE: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

Hi All,

I am reposting the updated patch after incorporating the review
comments.

This is the patch on PCI Express Enhanced configuration for 2.6.0 test11
kernel following up to the Vladimir ([email protected]) and
Harinarayanan ([email protected]) and my previous
patches .
I tested it on our i386 platform.

This patch also implements a mechanism for the kernel to find the
chipset specific mmcfg base address. The kernel will detect the base
address of the chipset through the ACPI table entry and based on that
the PCI subsystem will be initialized.

Please review this and send in your comments.

Thanks,
Sundar

diff -Naur linux-2.6.0/arch/i386/Kconfig
linux_pciexpress/arch/i386/Kconfig
--- linux-2.6.0/arch/i386/Kconfig 2003-12-18 08:28:16.000000000
+0530
+++ linux_pciexpress/arch/i386/Kconfig 2004-01-12 14:28:38.000000000
+0530
@@ -959,7 +959,7 @@
endmenu


-menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
+menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA, PCI_EXPRESS)"

config X86_VISWS_APIC
bool
@@ -976,6 +976,18 @@
depends on SMP && !(X86_VISWS || X86_VOYAGER)
default y

+config PCI_EXPRESS
+ bool "PCI_EXPRESS (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && ACPI_BOOT
+ help
+ PCI Express extends the configuration space from 256 bytes to
+ 4k bytes. It also defines an enhanced configuration mechanism
+ to acces the extended configuration space.
+ With this option, you can specify that Linux will first
attempt
+ to access the pci configuration space through enhanced config
+ access mechanism (Will work only on PCI Express based system)
+ otherwise the pci direct mechanism will be used.
+
config PCI
bool "PCI support" if !X86_VISWS
depends on !X86_VOYAGER
diff -Naur linux-2.6.0/arch/i386/kernel/acpi/boot.c
linux_pciexpress/arch/i386/kernel/acpi/boot.c
--- linux-2.6.0/arch/i386/kernel/acpi/boot.c 2003-12-18
08:29:29.000000000 +0530
+++ linux_pciexpress/arch/i386/kernel/acpi/boot.c 2004-01-12
14:14:22.000000000 +0530
@@ -93,6 +93,29 @@
return ((unsigned char *) base + offset);
}

+#ifdef CONFIG_PCI_EXPRESS
+extern u32 mmcfg_base_address;
+static int __init acpi_parse_mcfg
+ (unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_mcfg *mcfg = NULL;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ mcfg = (struct acpi_table_mcfg *) __acpi_map_table
+ (phys_addr, size);
+ if (!mcfg) {
+ printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+ return -ENODEV;
+ }
+ if (mcfg->base_address)
+ mmcfg_base_address = (u32)mcfg->base_address;
+ printk(KERN_INFO PREFIX "Local mcfg address %p\n",
+ mcfg->base_address);
+ return 0;
+}
+#endif /* CONFIG_PCI_EXPRESS*/

#ifdef CONFIG_X86_LOCAL_APIC

@@ -508,6 +531,22 @@

#endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */

+#ifdef CONFIG_PCI_EXPRESS
+ result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+ if (!result) {
+ printk(KERN_WARNING PREFIX "MCFG not present\n");
+ return 0;
+ }
+ else if (result < 0) {
+ printk(KERN_ERR PREFIX "Error parsing MCFG\n");
+ return result;
+ }
+ else if (result > 1) {
+ printk(KERN_WARNING PREFIX \
+ "Multiple MCFG tables exist\n");
+ }
+#endif /*CONFIG_PCI_EXPRESS*/
+
#ifdef CONFIG_X86_LOCAL_APIC
if (acpi_lapic && acpi_ioapic) {
smp_found_config = 1;
diff -Naur linux-2.6.0/arch/i386/pci/common.c
linux_pciexpress/arch/i386/pci/common.c
--- linux-2.6.0/arch/i386/pci/common.c 2003-12-18 08:28:46.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/common.c 2004-01-22
11:54:42.000000000 +0530
@@ -19,7 +19,8 @@
extern void pcibios_sort(void);
#endif

-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 |
PCI_PROBE_CONF2;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 |
PCI_PROBE_CONF2
+ | PCI_PROBE_ENHANCED;

int pcibios_last_bus = -1;
struct pci_bus *pci_root_bus = NULL;
@@ -197,6 +198,12 @@
return NULL;
}
#endif
+#ifdef CONFIG_PCI_EXPRESS
+ else if (!strcmp(str, "no_pcie")) {
+ pci_probe &= !PCI_PROBE_ENHANCED;
+ return NULL;
+ }
+#endif
#ifdef CONFIG_ACPI_PCI
else if (!strcmp(str, "noacpi")) {
pci_probe |= PCI_NO_ACPI_ROUTING;
diff -Naur linux-2.6.0/arch/i386/pci/direct.c
linux_pciexpress/arch/i386/pci/direct.c
--- linux-2.6.0/arch/i386/pci/direct.c 2003-12-18 08:28:28.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/direct.c 2004-01-22
12:07:37.000000000 +0530
@@ -168,6 +168,71 @@


/*
+ *We map full Page size on each request. Incidently that's the size we
+ *have for config space too.
+ */
+#ifdef CONFIG_PCI_EXPRESS
+/*
+ *On PCI Express capable platform, at the time of kernel initialization
+ *the os would have scanned for mcfg table and set this variable to
+ *appropriate value. If PCI Express not supported the variable will
+ * have 0 value
+ */
+u32 mmcfg_base_address;
+
+/*
+ *Variable used to store the virtual address of fixed PTE
+ */
+char * mmcfg_virt_addr;
+
+static int pci_express_conf_read(int seg, int bus,
+ int devfn, int reg, int len, u32 *value)
+{
+ if (!value || ((u32)bus > 255) || ((u32)devfn > 255)
+ || ((u32)reg > 4095)){
+ printk(KERN_ERR "pci_express_conf_read: \
+ Invalid Parameter\n");
+ return -EINVAL;
+ }
+
+ /* Shoot misalligned transaction now */
+ if (reg & (len-1)){
+ printk(KERN_ERR "pci_express_conf_read: \
+ misalligned transaction\n");
+ return -EINVAL;
+ }
+ pci_express_read(bus, devfn, reg, len, value);
+
+ return 0;
+}
+
+static int pci_express_conf_write(int seg, int bus,
+ int devfn, int reg, int len, u32 value)
+{
+ if (((u32)bus > 255) || ((u32)devfn > 255)
+ || ((u32)reg > 4095)){
+ printk(KERN_ERR "pci_express_conf_write: \
+ Invalid Parameter\n");
+ return -EINVAL;
+ }
+
+ /* Shoot misalligned transaction now */
+ if (reg & (len-1)){
+ printk(KERN_ERR "pci_express_conf_write: \
+ misalligned transaction\n");
+ return -EINVAL;
+ }
+ pci_express_write(bus, devfn, reg, len, value);
+ return 0;
+}
+
+static struct pci_raw_ops pci_express_conf = {
+ .read = pci_express_conf_read,
+ .write = pci_express_conf_write,
+};
+#endif /* CONFIG_PCI_EXPRESS */
+
+/*
* Before we decide to use direct hardware access mechanisms, we try to
do some
* trivial checks to ensure it at least _seems_ to be working -- we
just test
* whether bus 00 contains a host bridge (this is similar to checking
@@ -244,7 +309,31 @@
static int __init pci_direct_init(void)
{
struct resource *region, *region2;
+
+#ifdef CONFIG_PCI_EXPRESS
+ if ((pci_probe & PCI_PROBE_ENHANCED) == 0)
+ goto type1;
+ /*
+ *Check if platform we are running is pci express capable
+ */
+ if (mmcfg_base_address == 0){
+ printk(KERN_INFO
+ "MCFG table entry is not found in ACPI
tables....\n \
+ PCI Express not supported in this platform....\n
\
+ Not enabling Enhanced Configuration....\n");
+ goto type1;
+ }

+ /* Calculate the virtual address of the PTE */
+ mmcfg_virt_addr = (char *) (fix_to_virt(FIX_PCIE_MCFG));
+
+ if (pci_sanity_check(&pci_express_conf)) {
+ printk(KERN_INFO "PCI:Using config type PCIExp\n");
+ raw_pci_ops = &pci_express_conf;
+ return 0;
+ }
+type1:
+#endif /* CONFIG_PCI_EXPRESS */
if ((pci_probe & PCI_PROBE_CONF1) == 0)
goto type2;
region = request_region(0xCF8, 8, "PCI conf1");
diff -Naur linux-2.6.0/arch/i386/pci/Makefile
linux_pciexpress/arch/i386/pci/Makefile
--- linux-2.6.0/arch/i386/pci/Makefile 2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/Makefile 2004-01-12
13:38:55.000000000 +0530
@@ -2,6 +2,7 @@

obj-$(CONFIG_PCI_BIOS) += pcbios.o
obj-$(CONFIG_PCI_DIRECT) += direct.o
+obj-$(CONFIG_PCI_EXPRES) += direct.o

pci-y := fixup.o
pci-$(CONFIG_ACPI_PCI) += acpi.o
diff -Naur linux-2.6.0/arch/i386/pci/pci.h
linux_pciexpress/arch/i386/pci/pci.h
--- linux-2.6.0/arch/i386/pci/pci.h 2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/arch/i386/pci/pci.h 2004-01-12
13:38:55.000000000 +0530
@@ -15,6 +15,11 @@
#define PCI_PROBE_BIOS 0x0001
#define PCI_PROBE_CONF1 0x0002
#define PCI_PROBE_CONF2 0x0004
+#ifdef CONFIG_PCI_EXPRESS
+#define PCI_PROBE_ENHANCED 0x0008
+#else
+#define PCI_PROBE_ENHANCED 0x0
+#endif
#define PCI_NO_SORT 0x0100
#define PCI_BIOS_SORT 0x0200
#define PCI_NO_CHECKS 0x0400
diff -Naur linux-2.6.0/drivers/acpi/tables.c
linux_pciexpress/drivers/acpi/tables.c
--- linux-2.6.0/drivers/acpi/tables.c 2003-12-18 08:28:46.000000000
+0530
+++ linux_pciexpress/drivers/acpi/tables.c 2004-01-12
13:38:20.000000000 +0530
@@ -58,6 +58,7 @@
[ACPI_SSDT] = "SSDT",
[ACPI_SPMI] = "SPMI",
[ACPI_HPET] = "HPET",
+ [ACPI_MCFG] = "MCFG",
};

/* System Description Table (RSDT/XSDT) */
diff -Naur linux-2.6.0/drivers/pci/pci.c
linux_pciexpress/drivers/pci/pci.c
--- linux-2.6.0/drivers/pci/pci.c 2003-12-18 08:28:38.000000000
+0530
+++ linux_pciexpress/drivers/pci/pci.c 2004-01-12 13:38:06.000000000
+0530
@@ -90,6 +90,8 @@
* %PCI_CAP_ID_CHSWP CompactPCI HotSwap
*
* %PCI_CAP_ID_PCIX PCI-X
+ * %PCI_CAP_ID_EXP PCI-EXP
+
*/
int
pci_find_capability(struct pci_dev *dev, int cap)
diff -Naur linux-2.6.0/drivers/pci/proc.c
linux_pciexpress/drivers/pci/proc.c
--- linux-2.6.0/drivers/pci/proc.c 2003-12-18 08:28:57.000000000
+0530
+++ linux_pciexpress/drivers/pci/proc.c 2004-01-12 14:36:27.000000000
+0530
@@ -17,13 +17,30 @@
#include <asm/byteorder.h>

#define PCI_CFG_SPACE_SIZE 256
+#define PCI_CFG_SPACE_EXP_SIZE 4096

static int proc_initialized; /* = 0 */

+static int pci_cfg_space_size (struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_EXPRESS
+ /* Find whether the device is PCI Express device */
+ int is_pci_express_dev =
+ pci_find_capability(dev, PCI_CAP_ID_EXP);
+ if (is_pci_express_dev)
+ return PCI_CFG_SPACE_EXP_SIZE;
+ else
+#endif
+ return PCI_CFG_SPACE_SIZE;
+}
+
static loff_t
proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
{
loff_t new = -1;
+ const struct inode *ino = file->f_dentry->d_inode;
+ const struct proc_dir_entry *dp = PDE(ino);
+ struct pci_dev *dev = dp->data;

lock_kernel();
switch (whence) {
@@ -34,11 +51,11 @@
new = file->f_pos + off;
break;
case 2:
- new = PCI_CFG_SPACE_SIZE + off;
+ new = pci_cfg_space_size(dev) + off;
break;
}
unlock_kernel();
- if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+ if (new < 0 || new > pci_cfg_space_size(dev))
return -EINVAL;
return (file->f_pos = new);
}
@@ -59,7 +76,7 @@
*/

if (capable(CAP_SYS_ADMIN))
- size = PCI_CFG_SPACE_SIZE;
+ size = pci_cfg_space_size (dev);
else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
size = 128;
else
@@ -134,12 +151,14 @@
int pos = *ppos;
int cnt;

- if (pos >= PCI_CFG_SPACE_SIZE)
+ int size;
+ size = pci_cfg_space_size(dev);
+ if (pos >= size)
return 0;
- if (nbytes >= PCI_CFG_SPACE_SIZE)
- nbytes = PCI_CFG_SPACE_SIZE;
- if (pos + nbytes > PCI_CFG_SPACE_SIZE)
- nbytes = PCI_CFG_SPACE_SIZE - pos;
+ if (nbytes >= size)
+ nbytes = size;
+ if (pos + nbytes > size)
+ nbytes = size - pos;
cnt = nbytes;

if (!access_ok(VERIFY_READ, buf, cnt))
@@ -384,6 +403,7 @@
struct pci_bus *bus = dev->bus;
struct proc_dir_entry *de, *e;
char name[16];
+ int size;

if (!proc_initialized)
return -EACCES;
@@ -401,7 +421,8 @@
return -ENOMEM;
e->proc_fops = &proc_bus_pci_operations;
e->data = dev;
- e->size = PCI_CFG_SPACE_SIZE;
+ size = pci_cfg_space_size(dev);
+ e->size = size;

return 0;
}
diff -Naur linux-2.6.0/include/asm-i386/fixmap.h
linux_pciexpress/include/asm-i386/fixmap.h
--- linux-2.6.0/include/asm-i386/fixmap.h 2003-12-18
08:28:06.000000000 +0530
+++ linux_pciexpress/include/asm-i386/fixmap.h 2004-01-12
13:40:19.000000000 +0530
@@ -67,6 +67,9 @@
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings
*/
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
#endif
+#ifdef CONFIG_PCI_EXPRESS
+ FIX_PCIE_MCFG,
+#endif
#ifdef CONFIG_ACPI_BOOT
FIX_ACPI_BEGIN,
FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
diff -Naur linux-2.6.0/include/asm-i386/pci.h
linux_pciexpress/include/asm-i386/pci.h
--- linux-2.6.0/include/asm-i386/pci.h 2003-12-18 08:28:47.000000000
+0530
+++ linux_pciexpress/include/asm-i386/pci.h 2004-01-12
14:39:42.000000000 +0530
@@ -96,4 +96,68 @@
/* generic pci stuff */
#include <asm-generic/pci.h>

+#ifdef CONFIG_PCI_EXPRESS
+/*
+ *Variable used to store the base address of the last pciexpress device
+ *accessed.
+ */
+static u32 pcie_last_accessed_device;
+
+extern u32 mmcfg_base_address;
+extern spinlock_t pci_config_lock;
+extern char * mmcfg_virt_addr;
+
+static __inline__ void pci_exp_set_dev_base (int bus, int devfn)
+{
+ u32 dev_base =
+ mmcfg_base_address | (bus << 20) | (devfn << 12);
+ if (dev_base != pcie_last_accessed_device){
+ pcie_last_accessed_device = dev_base;
+ set_fixmap (FIX_PCIE_MCFG, dev_base);
+ }
+}
+
+static __inline__ void pci_express_read(int bus, int devfn, int reg,
+ int len, u32 *value)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&pci_config_lock, flags);
+ pci_exp_set_dev_base(bus, devfn);
+ switch (len) {
+ case 1:
+ *value = (u8)readb((unsigned long) mmcfg_virt_addr +
reg);
+ break;
+ case 2:
+ *value = (u16)readw((unsigned long) mmcfg_virt_addr +
reg);
+ break;
+ case 4:
+ *value = (u32)readl((unsigned long) mmcfg_virt_addr +
reg);
+ break;
+ }
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+
+static __inline__ void pci_express_write(int bus, int devfn, int reg,
+ int len, u32 value)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+ pci_exp_set_dev_base(bus, devfn);
+ switch (len) {
+ case 1:
+ writeb(value,(unsigned long)mmcfg_virt_addr +
reg);
+ break;
+ case 2:
+ writew(value,(unsigned long)mmcfg_virt_addr +
reg);
+ break;
+ case 4:
+ writel(value,(unsigned long)mmcfg_virt_addr +
reg);
+ break;
+ }
+ /* Dummy read to flush PCI write */
+ readl (mmcfg_virt_addr);
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+#endif /* CONFIG_PCI_EXPRESS */
#endif /* __i386_PCI_H */
diff -Naur linux-2.6.0/include/linux/acpi.h
linux_pciexpress/include/linux/acpi.h
--- linux-2.6.0/include/linux/acpi.h 2003-12-18 08:27:58.000000000
+0530
+++ linux_pciexpress/include/linux/acpi.h 2004-01-12
14:43:23.000000000 +0530
@@ -317,6 +317,12 @@
char ec_id[0];
} __attribute__ ((packed));

+struct acpi_table_mcfg {
+ struct acpi_table_header header;
+ u8 reserved[8];
+ u64 base_address;
+} __attribute__ ((packed));
+
/* Table Handlers */

enum acpi_table_id {
@@ -338,6 +344,7 @@
ACPI_SSDT,
ACPI_SPMI,
ACPI_HPET,
+ ACPI_MCFG,
ACPI_TABLE_COUNT
};

diff -Naur linux-2.6.0/include/linux/pci.h
linux_pciexpress/include/linux/pci.h
--- linux-2.6.0/include/linux/pci.h 2003-12-18 08:28:49.000000000
+0530
+++ linux_pciexpress/include/linux/pci.h 2004-01-12
13:40:01.000000000 +0530
@@ -198,6 +198,7 @@
#define PCI_CAP_ID_MSI 0x05 /* Message Signalled
Interrupts */
#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */
#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */
+#define PCI_CAP_ID_EXP 0x10 /* PCI-Express*/
#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list
*/
#define PCI_CAP_FLAGS 2 /* Capability defined flags (16
bits) */
#define PCI_CAP_SIZEOF 4


2004-01-22 18:24:40

by Alan

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

On Iau, 2004-01-22 at 13:12, Andi Kleen wrote:
> > +#ifdef CONFIG_PCI_EXPRESS
> > + else if (!strcmp(str, "no_pcie")) {
>
> Would "no_pciexp" be better? no_pcie looks nearly like a typo.

Other "nofoo" generally don't use "_" (Linux kernel really needs an
actual policy document for such stuff tho)

2004-01-22 16:40:53

by Grant Grundler

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

On Thu, Jan 22, 2004 at 03:51:22PM +0530, Durairaj, Sundarapandian wrote:
> I tested it on our i386 platform.

Any chance Intel can test this on an IA64 box?

...
> /*
> + *We map full Page size on each request. Incidently that's the size we
> + *have for config space too.
> + */

"full Page size" != 4k on several architectures.
PCI Express is going to be implemented on ia64 and power as well.

...
> diff -Naur linux-2.6.0/include/asm-i386/pci.h
...
> +#ifdef CONFIG_PCI_EXPRESS
> +/*
> + *Variable used to store the base address of the last pciexpress device
> + *accessed.
> + */
> +static u32 pcie_last_accessed_device;

Andi is right - this is a definite no-no.

grant

2004-01-22 13:13:20

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

On Thu, Jan 22, 2004 at 03:51:22PM +0530, Durairaj, Sundarapandian wrote:
> Please review this and send in your comments.

Looks better now. Still a few nitpicks.

> + access mechanism (Will work only on PCI Express based system)
> + otherwise the pci direct mechanism will be used.

Is that true? It won't use PCI BIOS anymore? If true this looks not
right.

> return ((unsigned char *) base + offset);
> }
>
> +#ifdef CONFIG_PCI_EXPRESS
> +extern u32 mmcfg_base_address;

Please put that into some header.

> +{
> + struct acpi_table_mcfg *mcfg = NULL;
> +
> + if (!phys_addr || !size)
> + return -EINVAL;
> +
> + mcfg = (struct acpi_table_mcfg *) __acpi_map_table
> + (phys_addr, size);
> + if (!mcfg) {
> + printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
> + return -ENODEV;
> + }
> + if (mcfg->base_address)
> + mmcfg_base_address = (u32)mcfg->base_address;
> + printk(KERN_INFO PREFIX "Local mcfg address %p\n",
> + mcfg->base_address);

Better drop that printk. It's probably not needed and ACPI is already
too noisy.


> + }
> + else if (result < 0) {
> + printk(KERN_ERR PREFIX "Error parsing MCFG\n");
> + return result;
> + }
> + else if (result > 1) {
> + printk(KERN_WARNING PREFIX \

The \ is not needed.

> return NULL;
> }
> #endif
> +#ifdef CONFIG_PCI_EXPRESS
> + else if (!strcmp(str, "no_pcie")) {

Would "no_pciexp" be better? no_pcie looks nearly like a typo.

> + /* Shoot misalligned transaction now */
> + if (reg & (len-1)){
> + printk(KERN_ERR "pci_express_conf_read: \
> + misalligned transaction\n");

misaligned is spelled with one l only (occurs a few more times)

> +#ifdef CONFIG_PCI_EXPRESS
> + if ((pci_probe & PCI_PROBE_ENHANCED) == 0)
> + goto type1;
> + /*
> + *Check if platform we are running is pci express capable

Please always add a space between the * and the text (occurs also a few
more times)

> + */
> + if (mmcfg_base_address == 0){
> + printk(KERN_INFO
> + "MCFG table entry is not found in ACPI
> tables....\n \
> + PCI Express not supported in this platform....\n

on this platform

> +#ifdef CONFIG_PCI_EXPRESS
> +/*
> + *Variable used to store the base address of the last pciexpress device
> + *accessed.
> + */
> +static u32 pcie_last_accessed_device;

static in a header is a bad idea. Make this a global, defined in some file.

> +static __inline__ void pci_exp_set_dev_base (int bus, int devfn)
> +{
> + u32 dev_base =
> + mmcfg_base_address | (bus << 20) | (devfn << 12);
> + if (dev_base != pcie_last_accessed_device){
> + pcie_last_accessed_device = dev_base;
> + set_fixmap (FIX_PCIE_MCFG, dev_base);
> + }
> +}
> +
> +static __inline__ void pci_express_read(int bus, int devfn, int reg,
> + int len, u32 *value)
> +{
> + unsigned long flags;
> + spin_lock_irqsave(&pci_config_lock, flags);
> + pci_exp_set_dev_base(bus, devfn);

You could share/uninline the read/write functions when you made the interface
something like

void *map_addr = pci_exp_map_dev_base(bus, devfn);

... use map_addr... for the access

Having them inline doesn't make much sense anyways because they should
be accessed using function pointers.

> + /* Dummy read to flush PCI write */
> + readl (mmcfg_virt_addr);
> + spin_unlock_irqrestore(&pci_config_lock, flags);

And move the spin lock/unlock into a inline too. Then an 64bit
implementation can just define it as a dummy (not needed when
everything is statically mapped)

-Andi

2004-01-22 10:44:23

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

"Durairaj, Sundarapandian" <[email protected]> wrote:
>
> This is the patch on PCI Express Enhanced configuration for 2.6.0 test11
> ...
> Please review this and send in your comments.

A bit of triviata:

> diff -Naur linux-2.6.0/arch/i386/Kconfig
> linux_pciexpress/arch/i386/Kconfig
> --- linux-2.6.0/arch/i386/Kconfig 2003-12-18 08:28:16.000000000
> +0530

Your mailer wordwrapped it. This seems to be a favourite pastime at Intel
;) You need to struggle with your email client for a while, or give up and
use attachments.

> +++ linux_pciexpress/arch/i386/kernel/acpi/boot.c 2004-01-12
> 14:14:22.000000000 +0530
> @@ -93,6 +93,29 @@
> return ((unsigned char *) base + offset);
> }
>
> +#ifdef CONFIG_PCI_EXPRESS
> +extern u32 mmcfg_base_address;

extern declarations should go in .h files, not in .c.

> +static int __init acpi_parse_mcfg
> + (unsigned long phys_addr, unsigned long size)
> +{
> + struct acpi_table_mcfg *mcfg = NULL;
> +
> + if (!phys_addr || !size)
> + return -EINVAL;
> +
> + mcfg = (struct acpi_table_mcfg *) __acpi_map_table
> + (phys_addr, size);
> + if (!mcfg) {
> + printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
> + return -ENODEV;
> + }
> + if (mcfg->base_address)
> + mmcfg_base_address = (u32)mcfg->base_address;

Is it OK to chop this u64 down to u32? If so, why was it u64?

> +#ifdef CONFIG_PCI_EXPRESS
> + else if (!strcmp(str, "no_pcie")) {
> + pci_probe &= !PCI_PROBE_ENHANCED;
> + return NULL;
> + }

should that be a `!' operator? Or `~'?

> /*
> + *We map full Page size on each request. Incidently that's the size we
> + *have for config space too.
> + */

Conventionally we put a space after the "*" in comments.


> +/*
> + *Variable used to store the virtual address of fixed PTE
> + */
> +char * mmcfg_virt_addr;

But we don't put spaces after this sort of asterisk.

> +
> +static int pci_express_conf_read(int seg, int bus,
> + int devfn, int reg, int len, u32 *value)
> +{
> + if (!value || ((u32)bus > 255) || ((u32)devfn > 255)
> + || ((u32)reg > 4095)){

If you're casting these so as to catch negative ints the cast should be to
`unsigned', not `u32'. Or, better, make this function simply take unsigned
args if negative values are nonsensical.


> + /* Shoot misalligned transaction now */

Spellling error here.

> + if (reg & (len-1)){

Space before the {

> + printk(KERN_ERR "pci_express_conf_read: \
> + misalligned transaction\n");

This string has a bunch of tabs in the middle.

Remove the `\' and do

printk(KERN_ERR "pci_express_conf_read: "
"misalligned transaction\n");


> +static int pci_express_conf_write(int seg, int bus,
> + int devfn, int reg, int len, u32 value)
> +{
> + if (((u32)bus > 255) || ((u32)devfn > 255)
> + || ((u32)reg > 4095)){

See above.

> + printk(KERN_ERR "pci_express_conf_write: \
> + Invalid Parameter\n");

tabs

> + /* Shoot misalligned transaction now */

spelling

> + if (reg & (len-1)){
> + printk(KERN_ERR "pci_express_conf_write: \
> + misalligned transaction\n");

tabs

> + if (mmcfg_base_address == 0){

space

> + printk(KERN_INFO
> + "MCFG table entry is not found in ACPI
> tables....\n \
> + PCI Express not supported in this platform....\n
> \
> + Not enabling Enhanced Configuration....\n");

Use compile-time string concatentation here as well.

> static int proc_initialized; /* = 0 */

The comment here isn't really needed: initalisaton of BSS is kernel
folklore.

>
> +static int pci_cfg_space_size (struct pci_dev *dev)

No space before the opening parenthesis.

> + size = pci_cfg_space_size (dev);

Ditto

> +extern u32 mmcfg_base_address;
> +extern spinlock_t pci_config_lock;
> +extern char * mmcfg_virt_addr;

These should all be in header files.

> +
> +static __inline__ void pci_exp_set_dev_base (int bus, int devfn)
> +static __inline__ void pci_express_read(int bus, int devfn, int reg,

`inline', not __inline__

> + int len, u32 *value)
> +{
> + unsigned long flags;
> + spin_lock_irqsave(&pci_config_lock, flags);
> + pci_exp_set_dev_base(bus, devfn);
> + switch (len) {
> + case 1:
> + *value = (u8)readb((unsigned long) mmcfg_virt_addr +
> reg);
> + break;
> + case 2:
> + *value = (u16)readw((unsigned long) mmcfg_virt_addr +
> reg);
> + break;
> + case 4:
> + *value = (u32)readl((unsigned long) mmcfg_virt_addr +
> reg);

Are these casts needed?

> +static __inline__ void pci_express_write(int bus, int devfn, int reg,

inline

> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&pci_config_lock, flags);
> + pci_exp_set_dev_base(bus, devfn);
> + switch (len) {
> + case 1:
> + writeb(value,(unsigned long)mmcfg_virt_addr +
> reg);
> + break;
> + case 2:
> + writew(value,(unsigned long)mmcfg_virt_addr +
> reg);
> + break;
> + case 4:
> + writel(value,(unsigned long)mmcfg_virt_addr +
> reg);
> + break;
> + }

Are these casts needed?

2004-01-22 17:00:28

by Greg KH

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

On Thu, Jan 22, 2004 at 03:51:22PM +0530, Durairaj, Sundarapandian wrote:
> Hi All,
>
> I am reposting the updated patch after incorporating the review
> comments.

Hm, looks like you have lots of comments already, so I'll wait for your
next revision before making any more about the code.

> This is the patch on PCI Express Enhanced configuration for 2.6.0 test11
> kernel following up to the Vladimir ([email protected]) and
> Harinarayanan ([email protected]) and my previous
> patches .
> I tested it on our i386 platform.

If I could ask, exactly what chipset was this tested on? I'm waist deep
in chipset specs right now, so it would be nice to see if I could try to
test this code out on something that both you have, and have not, tested
it on.

thanks,

greg k-h

2004-01-22 19:46:36

by Randy.Dunlap

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

On Thu, 22 Jan 2004 18:21:04 +0000 Alan Cox <[email protected]> wrote:

| On Iau, 2004-01-22 at 13:12, Andi Kleen wrote:
| > > +#ifdef CONFIG_PCI_EXPRESS
| > > + else if (!strcmp(str, "no_pcie")) {
| >
| > Would "no_pciexp" be better? no_pcie looks nearly like a typo.
|
| Other "nofoo" generally don't use "_" (Linux kernel really needs an
| actual policy document for such stuff tho)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Right, let's keep it consistent, like "nopciexp".

--
~Randy

2004-01-22 11:09:26

by Martin Mares

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

Hello!

> I am reposting the updated patch after incorporating the review comments.

Looks good, but there are still some places to polish (in addition to
Andrew's comments):

> + to access the pci configuration space through enhanced config
> + access mechanism (Will work only on PCI Express based system)

"pci" should be "PCI".

> + mcfg = (struct acpi_table_mcfg *) __acpi_map_table
> + (phys_addr, size);

Wrapping long lines is good, but you seem to overdo it.

> + printk(KERN_INFO PREFIX "Local mcfg address %p\n",
> + mcfg->base_address);

Again -- you should be consistent in usign caps: "MCFG", not "mcfg".

> +#ifdef CONFIG_PCI_EXPRESS
> + else if (!strcmp(str, "no_pcie")) {

Why "no_pcie" with an underscore when existing switches ("noacpi", "nobios"
etc.) don't have one?

> + if (mmcfg_base_address == 0){
> + printk(KERN_INFO
> + "MCFG table entry is not found in ACPI
> tables....\n \
> + PCI Express not supported in this platform....\n
> \
> + Not enabling Enhanced Configuration....\n");
> + goto type1;
> + }

Why printing such a enormous banner for reporting a trivial error?
One line is enough.

> + printk(KERN_INFO "PCI:Using config type PCIExp\n");

"PCI:Using" -> "PCI: Using".

> obj-$(CONFIG_PCI_DIRECT) += direct.o
> +obj-$(CONFIG_PCI_EXPRES) += direct.o

PCI_EXPRES -> PCI_EXPRESS

Also, linking the same object twice doesn't look right.

> +static int pci_cfg_space_size (struct pci_dev *dev)
> +{
> +#ifdef CONFIG_PCI_EXPRESS
> + /* Find whether the device is PCI Express device */
> + int is_pci_express_dev =
> + pci_find_capability(dev, PCI_CAP_ID_EXP);
> + if (is_pci_express_dev)
> + return PCI_CFG_SPACE_EXP_SIZE;
> + else
> +#endif
> + return PCI_CFG_SPACE_SIZE;
> +}

We really shouldn't scan the capability list during each access to /proc/bus/pci.
Better calculate the configuration space size when probing the device
and put it to struct pci_dev.

> +#ifdef CONFIG_PCI_EXPRESS
> +/*
> + *Variable used to store the base address of the last pciexpress device
> + *accessed.
> + */
> +static u32 pcie_last_accessed_device;

Header files should not contain static variables.

> +static __inline__ void pci_express_read(int bus, int devfn, int reg,
> + int len, u32 *value)

Why is this inline?

> + u64 base_address;

If the base_address is 64-bit and you stuff it in a 32-bit variable, you
should check the upper 32 bits and in case they are non-zero, print an error
message.

Have a nice fortnight
--
Martin `MJ' Mares <[email protected]> http://atrey.karlin.mff.cuni.cz/~mj/
Faculty of Math and Physics, Charles University, Prague, Czech Rep., Earth
Entropy isn't what it used to be.

2004-01-23 19:22:05

by Pavel Machek

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

Hi!

> | On Iau, 2004-01-22 at 13:12, Andi Kleen wrote:
> | > > +#ifdef CONFIG_PCI_EXPRESS
> | > > + else if (!strcmp(str, "no_pcie")) {
> | >
> | > Would "no_pciexp" be better? no_pcie looks nearly like a typo.
> |
> | Other "nofoo" generally don't use "_" (Linux kernel really needs an
> | actual policy document for such stuff tho)
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>
> Right, let's keep it consistent, like "nopciexp".

I'd call it "noexpress". pciexp sounds like PCI exception, PCI
expected or something...
Pavel
--
When do you have a heart between your knees?
[Johanka's followup: and *two* hearts?]

2004-01-23 19:31:15

by Martin Mares

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

Hi!

> I'd call it "noexpress". pciexp sounds like PCI exception, PCI
> expected or something...

Well, "noexpress" sounds like it does nothing in common with PCI,
which is more misleading than the connotations you mention.

Have a nice fortnight
--
Martin `MJ' Mares <[email protected]> http://atrey.karlin.mff.cuni.cz/~mj/
Faculty of Math and Physics, Charles University, Prague, Czech Rep., Earth
P.C.M.C.I.A. stands for `People Can't Memorize Computer Industry Acronyms'

2004-01-23 20:04:49

by Stefan Smietanowski

[permalink] [raw]
Subject: Re: [patch] PCI Express Enhanced Config Patch - 2.6.0-test11

Martin Mares wrote:

> Hi!
>
>
>>I'd call it "noexpress". pciexp sounds like PCI exception, PCI
>>expected or something...
>
>
> Well, "noexpress" sounds like it does nothing in common with PCI,
> which is more misleading than the connotations you mention.

nopciexpress sounds better than but it's sorta long...

// Stefan