Hi Greg,
I've updated the series of patches for PCI legacy I/O port free driver
to be applied to 2.6.16-mm1. The previous version of patches conflicts
with some changes to e1000 driver. I also confirmed the updated one
can be applied to 2.6.16-git7 though I got some warnings.
I'm attaching the brief description below about what the problem I'm
trying to solve is.
Thanks,
Kenji Kaneshige
Brief Description
~~~~~~~~~~~~~~~~~
I encountered a problem that some PCI devices don't work on my system
which have huge number of PCI devices.
It is mandatory for all PCI device drivers to enable the device by
calling pci_enable_device() which enables all regions probed from the
device's BARs. If pci_enable_device() failes to enable any regions
probed from BARs, it returns as error. On the large servers, I/O port
resource could not be assigned to all PCI devices because it is
limited (64KB on Intel Architecture[1]) and it would be fragmented
(I/O base register of PCI-to-PCI bridge will usually be aligned to a
4KB boundary[2]). In this case, the devices which have no I/O port
resource assigned don't work because pci_enable_device() for those
devices failes. This is what happened on my machine.
---
[1]: Some machines support 64KB I/O port space per PCI segment.
[2]: Some P2P bridges support optional 1KB aligned I/O base.
Here, there are many PCI devices that provide both I/O port and MMIO
interface, and some of those devices can be handled without using I/O
port interface. The reason why such devices provide I/O port interface
is for compatibility to legacy OSs. So this kind of devices should
work even if enough I/O port resources are not assigned. The "PCI
Local Bus Specification Revision 3.0" also mentions about this topic
(Please see p.44, "IMPLEMENTATION NOTE"). On the current linux,
unfortunately, this kind of devices don't work if I/O port resources
are not assigned, because pci_enable_device() for those devices fails.
This patch adds the following changes into generic PCI code especially
for PCI legacy free drivers.
- Moved the following two things from pci_enable_device() into
pci_enable_device_bars(). By this change, we can use
pci_enable_device_bars() to enable only the specific regions.
o Call pci_fixup_device() on the device
o Set dev->is_enabled
- Added new field 'bars_enabled' into struct pci_device to
remember which BARs already enabled. This new field is
initialized at pci_enable_device_bars() time and cleared
at pci_disable_device() time.
- Changed pci_request_regions()/pci_release_regions() to
request/release only the regions which have already been
enabled.
- Added helper routine pci_select_bars() which makes proper mask
of BARs from the specified resource type. This would be very
helpful for users of pci_enable_device_bars().
Signed-off-by: Kenji Kaneshige <[email protected]>
---
drivers/pci/pci-driver.c | 3 ++-
drivers/pci/pci.c | 30 +++++++++++++++++++++++-------
include/linux/pci.h | 12 ++++++++++++
3 files changed, 37 insertions(+), 8 deletions(-)
Index: linux-2.6.16-mm1/drivers/pci/pci-driver.c
===================================================================
--- linux-2.6.16-mm1.orig/drivers/pci/pci-driver.c 2006-03-23 20:04:06.000000000 +0900
+++ linux-2.6.16-mm1/drivers/pci/pci-driver.c 2006-03-23 20:04:11.000000000 +0900
@@ -293,7 +293,8 @@
pci_restore_state(pci_dev);
/* if the device was enabled before suspend, reenable */
if (pci_dev->is_enabled)
- retval = pci_enable_device(pci_dev);
+ retval = pci_enable_device_bars(pci_dev,
+ pci_dev->bars_enabled);
/* if the device was busmaster before the suspend, make it busmaster again */
if (pci_dev->is_busmaster)
pci_set_master(pci_dev);
Index: linux-2.6.16-mm1/drivers/pci/pci.c
===================================================================
--- linux-2.6.16-mm1.orig/drivers/pci/pci.c 2006-03-23 20:04:06.000000000 +0900
+++ linux-2.6.16-mm1/drivers/pci/pci.c 2006-03-23 20:04:11.000000000 +0900
@@ -493,6 +493,9 @@
err = pcibios_enable_device(dev, bars);
if (err < 0)
return err;
+ pci_fixup_device(pci_fixup_enable, dev);
+ dev->is_enabled = 1;
+ dev->bars_enabled = bars;
return 0;
}
@@ -510,8 +513,6 @@
int err = pci_enable_device_bars(dev, (1 << PCI_NUM_RESOURCES) - 1);
if (err)
return err;
- pci_fixup_device(pci_fixup_enable, dev);
- dev->is_enabled = 1;
return 0;
}
@@ -546,6 +547,7 @@
pcibios_disable_device(dev);
dev->is_enabled = 0;
+ dev->bars_enabled = 0;
}
/**
@@ -628,6 +630,12 @@
{
if (pci_resource_len(pdev, bar) == 0)
return;
+ if (!(pdev->bars_enabled & (1 << bar))) {
+ dev_warn(&pdev->dev,
+ "Trying to release region #%d that is not enabled\n",
+ bar + 1);
+ return;
+ }
if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
release_region(pci_resource_start(pdev, bar),
pci_resource_len(pdev, bar));
@@ -654,7 +662,12 @@
{
if (pci_resource_len(pdev, bar) == 0)
return 0;
-
+ if (!(pdev->bars_enabled & (1 << bar))) {
+ dev_warn(&pdev->dev,
+ "Trying to request region #%d that is not enabled\n",
+ bar + 1);
+ goto err_out;
+ }
if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) {
if (!request_region(pci_resource_start(pdev, bar),
pci_resource_len(pdev, bar), res_name))
@@ -692,7 +705,8 @@
int i;
for (i = 0; i < 6; i++)
- pci_release_region(pdev, i);
+ if (pdev->bars_enabled & (1 << i))
+ pci_release_region(pdev, i);
}
/**
@@ -713,13 +727,15 @@
int i;
for (i = 0; i < 6; i++)
- if(pci_request_region(pdev, i, res_name))
- goto err_out;
+ if (pdev->bars_enabled & (1 << i))
+ if(pci_request_region(pdev, i, res_name))
+ goto err_out;
return 0;
err_out:
while(--i >= 0)
- pci_release_region(pdev, i);
+ if (pdev->bars_enabled & (1 << i))
+ pci_release_region(pdev, i);
return -EBUSY;
}
Index: linux-2.6.16-mm1/include/linux/pci.h
===================================================================
--- linux-2.6.16-mm1.orig/include/linux/pci.h 2006-03-23 20:04:06.000000000 +0900
+++ linux-2.6.16-mm1/include/linux/pci.h 2006-03-23 20:04:11.000000000 +0900
@@ -169,6 +169,7 @@
struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
int rom_attr_enabled; /* has display of the rom attribute been enabled? */
struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
+ int bars_enabled; /* BARs enabled */
};
#define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)
@@ -732,6 +733,17 @@
}
#endif /* HAVE_ARCH_PCI_RESOURCE_TO_USER */
+/*
+ * This helper routine makes bar mask from the type of resource.
+ */
+static inline int pci_select_bars(struct pci_dev *dev, unsigned long flags)
+{
+ int i, bars = 0;
+ for (i = 0; i < PCI_NUM_RESOURCES; i++)
+ if (pci_resource_flags(dev, i) & flags)
+ bars |= (1 << i);
+ return bars;
+}
/*
* The world is not perfect and supplies us with broken PCI devices.
This patch adds the description about legacy I/O port free driver into
Documentation/pci.txt.
Signed-off-by: Kenji Kaneshige <[email protected]>
Documentation/pci.txt | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 67 insertions(+)
Index: linux-2.6.16-mm1/Documentation/pci.txt
===================================================================
--- linux-2.6.16-mm1.orig/Documentation/pci.txt 2006-03-23 20:04:06.000000000 +0900
+++ linux-2.6.16-mm1/Documentation/pci.txt 2006-03-23 20:04:12.000000000 +0900
@@ -269,3 +269,70 @@
pci_find_device() Superseded by pci_get_device()
pci_find_subsys() Superseded by pci_get_subsys()
pci_find_slot() Superseded by pci_get_slot()
+
+
+9. Legacy I/O port free driver
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Large servers may not be able to provide I/O port resources to all PCI
+devices. I/O Port space is only 64KB on Intel Architecture[1] and is
+likely also fragmented since the I/O base register of PCI-to-PCI
+bridge will usually be aligned to a 4KB boundary[2]. On such systems,
+pci_enable_device() and pci_request_regions() will fail when
+attempting to enable I/O Port regions that don't have I/O Port
+resources assigned.
+
+Fortunately, many PCI devices which request I/O Port resources also
+provide access to the same registers via MMIO BARs. These devices can
+be handled without using I/O port space and the drivers typically
+offer a CONFIG_ option to only use MMIO regions
+(e.g. CONFIG_TULIP_MMIO). PCI devices typically provide I/O port
+interface for legacy OSs and will work when I/O port resources are not
+assigned. The "PCI Local Bus Specification Revision 3.0" discusses
+this on p.44, "IMPLEMENTATION NOTE".
+
+If your PCI device driver doesn't need I/O port resources assigned to
+I/O Port BARs, you should use pci_enable_device_bars() instead of
+pci_enable_device() in order not to enable I/O port regions for the
+corresponding devices.
+
+[1] Some systems support 64KB I/O port space per PCI segment.
+[2] Some PCI-to-PCI bridges support optional 1KB aligned I/O base.
+
+
+10. MMIO Space and "Write Posting"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Converting a driver from using I/O Port space to using MMIO space
+often requires some additional changes. Specifically, "write posting"
+needs to be handled. Most drivers (e.g. tg3, acenic, sym53c8xx_2)
+already do. I/O Port space guarantees write transactions reach the PCI
+device before the CPU can continue. Writes to MMIO space allow to CPU
+continue before the transaction reaches the PCI device. HW weenies
+call this "Write Posting" because the write completion is "posted" to
+the CPU before the transaction has reached it's destination.
+
+Thus, timing sensitive code should add readl() where the CPU is
+expected to wait before doing other work. The classic "bit banging"
+sequence works fine for I/O Port space:
+
+ for (i=8; --i; val >>= 1) {
+ outb(val & 1, ioport_reg); /* write bit */
+ udelay(10);
+ }
+
+The same sequence for MMIO space should be:
+
+ for (i=8; --i; val >>= 1) {
+ writeb(val & 1, mmio_reg); /* write bit */
+ readb(safe_mmio_reg); /* flush posted write */
+ udelay(10);
+ }
+
+It is important that "safe_mmio_reg" not have any side effects that
+interferes with the correct operation of the device.
+
+Another case to watch out for is when resetting a PCI device. Use PCI
+Configuration space reads to flush the writel(). This will gracefully
+handle the PCI master abort on all platforms if the PCI device is
+expected to not respond to a readl(). Most x86 platforms will allow
+MMIO reads to master abort (aka "Soft Fail") and return garbage
+(e.g. ~0). But many RISC platforms will crash (aka "Hard Fail").
This patch makes Intel e1000 driver legacy I/O port free.
Signed-off-by: Kenji Kaneshige <[email protected]>
---
drivers/net/e1000/e1000.h | 6 +-
drivers/net/e1000/e1000_main.c | 123 ++++++++++++++++++++++-------------------
2 files changed, 71 insertions(+), 58 deletions(-)
Index: linux-2.6.16-mm1/drivers/net/e1000/e1000.h
===================================================================
--- linux-2.6.16-mm1.orig/drivers/net/e1000/e1000.h 2006-03-23 20:04:06.000000000 +0900
+++ linux-2.6.16-mm1/drivers/net/e1000/e1000.h 2006-03-23 20:04:13.000000000 +0900
@@ -77,8 +77,9 @@
#define BAR_1 1
#define BAR_5 5
-#define INTEL_E1000_ETHERNET_DEVICE(device_id) {\
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)}
+#define E1000_NO_IOPORT (1 << 0)
+#define INTEL_E1000_ETHERNET_DEVICE(device_id, flags) {\
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id), .driver_data = flags}
struct e1000_adapter;
@@ -338,6 +339,7 @@
#ifdef NETIF_F_TSO
boolean_t tso_force;
#endif
+ int bars; /* BARs to be enabled */
};
Index: linux-2.6.16-mm1/drivers/net/e1000/e1000_main.c
===================================================================
--- linux-2.6.16-mm1.orig/drivers/net/e1000/e1000_main.c 2006-03-23 20:04:06.000000000 +0900
+++ linux-2.6.16-mm1/drivers/net/e1000/e1000_main.c 2006-03-23 20:04:13.000000000 +0900
@@ -86,54 +86,54 @@
* {PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)}
*/
static struct pci_device_id e1000_pci_tbl[] = {
- INTEL_E1000_ETHERNET_DEVICE(0x1000),
- INTEL_E1000_ETHERNET_DEVICE(0x1001),
- INTEL_E1000_ETHERNET_DEVICE(0x1004),
- INTEL_E1000_ETHERNET_DEVICE(0x1008),
- INTEL_E1000_ETHERNET_DEVICE(0x1009),
- INTEL_E1000_ETHERNET_DEVICE(0x100C),
- INTEL_E1000_ETHERNET_DEVICE(0x100D),
- INTEL_E1000_ETHERNET_DEVICE(0x100E),
- INTEL_E1000_ETHERNET_DEVICE(0x100F),
- INTEL_E1000_ETHERNET_DEVICE(0x1010),
- INTEL_E1000_ETHERNET_DEVICE(0x1011),
- INTEL_E1000_ETHERNET_DEVICE(0x1012),
- INTEL_E1000_ETHERNET_DEVICE(0x1013),
- INTEL_E1000_ETHERNET_DEVICE(0x1014),
- INTEL_E1000_ETHERNET_DEVICE(0x1015),
- INTEL_E1000_ETHERNET_DEVICE(0x1016),
- INTEL_E1000_ETHERNET_DEVICE(0x1017),
- INTEL_E1000_ETHERNET_DEVICE(0x1018),
- INTEL_E1000_ETHERNET_DEVICE(0x1019),
- INTEL_E1000_ETHERNET_DEVICE(0x101A),
- INTEL_E1000_ETHERNET_DEVICE(0x101D),
- INTEL_E1000_ETHERNET_DEVICE(0x101E),
- INTEL_E1000_ETHERNET_DEVICE(0x1026),
- INTEL_E1000_ETHERNET_DEVICE(0x1027),
- INTEL_E1000_ETHERNET_DEVICE(0x1028),
- INTEL_E1000_ETHERNET_DEVICE(0x105E),
- INTEL_E1000_ETHERNET_DEVICE(0x105F),
- INTEL_E1000_ETHERNET_DEVICE(0x1060),
- INTEL_E1000_ETHERNET_DEVICE(0x1075),
- INTEL_E1000_ETHERNET_DEVICE(0x1076),
- INTEL_E1000_ETHERNET_DEVICE(0x1077),
- INTEL_E1000_ETHERNET_DEVICE(0x1078),
- INTEL_E1000_ETHERNET_DEVICE(0x1079),
- INTEL_E1000_ETHERNET_DEVICE(0x107A),
- INTEL_E1000_ETHERNET_DEVICE(0x107B),
- INTEL_E1000_ETHERNET_DEVICE(0x107C),
- INTEL_E1000_ETHERNET_DEVICE(0x107D),
- INTEL_E1000_ETHERNET_DEVICE(0x107E),
- INTEL_E1000_ETHERNET_DEVICE(0x107F),
- INTEL_E1000_ETHERNET_DEVICE(0x108A),
- INTEL_E1000_ETHERNET_DEVICE(0x108B),
- INTEL_E1000_ETHERNET_DEVICE(0x108C),
- INTEL_E1000_ETHERNET_DEVICE(0x1096),
- INTEL_E1000_ETHERNET_DEVICE(0x1098),
- INTEL_E1000_ETHERNET_DEVICE(0x1099),
- INTEL_E1000_ETHERNET_DEVICE(0x109A),
- INTEL_E1000_ETHERNET_DEVICE(0x10B5),
- INTEL_E1000_ETHERNET_DEVICE(0x10B9),
+ INTEL_E1000_ETHERNET_DEVICE(0x1000, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x1001, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x1004, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x1008, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1009, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x100C, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x100D, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x100E, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x100F, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1010, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1011, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1012, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1013, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1014, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x1015, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1016, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1017, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1018, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1019, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x101A, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x101D, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x101E, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1026, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x1027, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x1028, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x105E, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x105F, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x1060, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x1075, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x1076, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1077, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1078, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1079, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x107A, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x107B, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x107C, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x107D, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x107E, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x107F, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x108A, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x108B, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x108C, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x1096, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1098, 0),
+ INTEL_E1000_ETHERNET_DEVICE(0x1099, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x109A, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x10B5, E1000_NO_IOPORT),
+ INTEL_E1000_ETHERNET_DEVICE(0x10B9, 0),
/* required last entry */
{0,}
};
@@ -619,7 +619,14 @@
int i, err, pci_using_dac;
uint16_t eeprom_data;
uint16_t eeprom_apme_mask = E1000_EEPROM_APME;
- if ((err = pci_enable_device(pdev)))
+ int bars;
+
+ if (ent->driver_data & E1000_NO_IOPORT)
+ bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ else
+ bars = pci_select_bars(pdev, IORESOURCE_MEM | IORESOURCE_IO);
+
+ if ((err = pci_enable_device_bars(pdev, bars)))
return err;
if (!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK))) {
@@ -652,6 +659,7 @@
adapter->pdev = pdev;
adapter->hw.back = adapter;
adapter->msg_enable = (1 << debug) - 1;
+ adapter->bars = bars;
mmio_start = pci_resource_start(pdev, BAR_0);
mmio_len = pci_resource_len(pdev, BAR_0);
@@ -662,12 +670,15 @@
goto err_ioremap;
}
- for (i = BAR_1; i <= BAR_5; i++) {
- if (pci_resource_len(pdev, i) == 0)
- continue;
- if (pci_resource_flags(pdev, i) & IORESOURCE_IO) {
- adapter->hw.io_base = pci_resource_start(pdev, i);
- break;
+ if (!(ent->driver_data & E1000_NO_IOPORT)) {
+ for (i = BAR_1; i <= BAR_5; i++) {
+ if (pci_resource_len(pdev, i) == 0)
+ continue;
+ if (pci_resource_flags(pdev, i) & IORESOURCE_IO) {
+ adapter->hw.io_base =
+ pci_resource_start(pdev, i);
+ break;
+ }
}
}
@@ -4575,7 +4586,7 @@
if (retval)
DPRINTK(PROBE, ERR, "Error in setting power state\n");
e1000_pci_restore_state(adapter);
- ret_val = pci_enable_device(pdev);
+ ret_val = pci_enable_device_bars(pdev, adapter->bars);
pci_set_master(pdev);
retval = pci_enable_wake(pdev, PCI_D3hot, 0);
This patch makes Emulex lpfc driver legacy I/O port free.
Signed-off-by: Kenji Kaneshige <[email protected]>
---
drivers/scsi/lpfc/lpfc_init.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletion(-)
Index: linux-2.6.16-mm1/drivers/scsi/lpfc/lpfc_init.c
===================================================================
--- linux-2.6.16-mm1.orig/drivers/scsi/lpfc/lpfc_init.c 2006-03-23 20:04:05.000000000 +0900
+++ linux-2.6.16-mm1/drivers/scsi/lpfc/lpfc_init.c 2006-03-23 20:04:14.000000000 +0900
@@ -1434,8 +1434,9 @@
int error = -ENODEV, retval;
int i;
uint16_t iotag;
+ int bars = pci_select_bars(pdev, IORESOURCE_MEM);
- if (pci_enable_device(pdev))
+ if (pci_enable_device_bars(pdev, bars))
goto out;
if (pci_request_regions(pdev, LPFC_DRIVER_NAME))
goto out_disable_device;
Kenji Kaneshige <[email protected]> wrote:
>
> +/*
> + * This helper routine makes bar mask from the type of resource.
> + */
> +static inline int pci_select_bars(struct pci_dev *dev, unsigned long flags)
> +{
> + int i, bars = 0;
> + for (i = 0; i < PCI_NUM_RESOURCES; i++)
> + if (pci_resource_flags(dev, i) & flags)
> + bars |= (1 << i);
> + return bars;
> +}
Can we please uninline this?
On Fri, Mar 24, 2006 at 02:32:25PM +0900, Kenji Kaneshige wrote:
> This patch adds the description about legacy I/O port free driver into
> Documentation/pci.txt.
I very much appreciate Kenji has pursued this.
I edited much of the original text and wrote some parts
of the "MMIO Write Posting" chapter. Errors in those
bits are most likely mine.
Please add my S-o-B line to the commit log in case the
language needs further editing. I already see one typo
("to CPU continue" -> "the CPU to continue") and will
submit a patch if anyone has more.
thanks,
grant
Signed-off-by: Grant Grundler <[email protected]>
> Signed-off-by: Kenji Kaneshige <[email protected]>
>
> Documentation/pci.txt | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 67 insertions(+)
>
> Index: linux-2.6.16-mm1/Documentation/pci.txt
> ===================================================================
> --- linux-2.6.16-mm1.orig/Documentation/pci.txt 2006-03-23 20:04:06.000000000 +0900
> +++ linux-2.6.16-mm1/Documentation/pci.txt 2006-03-23 20:04:12.000000000 +0900
> @@ -269,3 +269,70 @@
> pci_find_device() Superseded by pci_get_device()
> pci_find_subsys() Superseded by pci_get_subsys()
> pci_find_slot() Superseded by pci_get_slot()
> +
> +
> +9. Legacy I/O port free driver
> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +Large servers may not be able to provide I/O port resources to all PCI
> +devices. I/O Port space is only 64KB on Intel Architecture[1] and is
> +likely also fragmented since the I/O base register of PCI-to-PCI
> +bridge will usually be aligned to a 4KB boundary[2]. On such systems,
> +pci_enable_device() and pci_request_regions() will fail when
> +attempting to enable I/O Port regions that don't have I/O Port
> +resources assigned.
> +
> +Fortunately, many PCI devices which request I/O Port resources also
> +provide access to the same registers via MMIO BARs. These devices can
> +be handled without using I/O port space and the drivers typically
> +offer a CONFIG_ option to only use MMIO regions
> +(e.g. CONFIG_TULIP_MMIO). PCI devices typically provide I/O port
> +interface for legacy OSs and will work when I/O port resources are not
> +assigned. The "PCI Local Bus Specification Revision 3.0" discusses
> +this on p.44, "IMPLEMENTATION NOTE".
> +
> +If your PCI device driver doesn't need I/O port resources assigned to
> +I/O Port BARs, you should use pci_enable_device_bars() instead of
> +pci_enable_device() in order not to enable I/O port regions for the
> +corresponding devices.
> +
> +[1] Some systems support 64KB I/O port space per PCI segment.
> +[2] Some PCI-to-PCI bridges support optional 1KB aligned I/O base.
> +
> +
> +10. MMIO Space and "Write Posting"
> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +Converting a driver from using I/O Port space to using MMIO space
> +often requires some additional changes. Specifically, "write posting"
> +needs to be handled. Most drivers (e.g. tg3, acenic, sym53c8xx_2)
> +already do. I/O Port space guarantees write transactions reach the PCI
> +device before the CPU can continue. Writes to MMIO space allow to CPU
> +continue before the transaction reaches the PCI device. HW weenies
> +call this "Write Posting" because the write completion is "posted" to
> +the CPU before the transaction has reached it's destination.
> +
> +Thus, timing sensitive code should add readl() where the CPU is
> +expected to wait before doing other work. The classic "bit banging"
> +sequence works fine for I/O Port space:
> +
> + for (i=8; --i; val >>= 1) {
> + outb(val & 1, ioport_reg); /* write bit */
> + udelay(10);
> + }
> +
> +The same sequence for MMIO space should be:
> +
> + for (i=8; --i; val >>= 1) {
> + writeb(val & 1, mmio_reg); /* write bit */
> + readb(safe_mmio_reg); /* flush posted write */
> + udelay(10);
> + }
> +
> +It is important that "safe_mmio_reg" not have any side effects that
> +interferes with the correct operation of the device.
> +
> +Another case to watch out for is when resetting a PCI device. Use PCI
> +Configuration space reads to flush the writel(). This will gracefully
> +handle the PCI master abort on all platforms if the PCI device is
> +expected to not respond to a readl(). Most x86 platforms will allow
> +MMIO reads to master abort (aka "Soft Fail") and return garbage
> +(e.g. ~0). But many RISC platforms will crash (aka "Hard Fail").
Andrew Morton wrote:
> Kenji Kaneshige <[email protected]> wrote:
>
>>+/*
>> + * This helper routine makes bar mask from the type of resource.
>> + */
>> +static inline int pci_select_bars(struct pci_dev *dev, unsigned long flags)
>> +{
>> + int i, bars = 0;
>> + for (i = 0; i < PCI_NUM_RESOURCES; i++)
>> + if (pci_resource_flags(dev, i) & flags)
>> + bars |= (1 << i);
>> + return bars;
>> +}
>
>
> Can we please uninline this?
>
OK. Here is an updated one.
Thanks,
Kenji Kaneshige
This patch adds the following changes into generic PCI code especially
for PCI legacy free drivers.
- Moved the following two things from pci_enable_device() into
pci_enable_device_bars(). By this change, we can use
pci_enable_device_bars() to enable only the specific regions.
o Call pci_fixup_device() on the device
o Set dev->is_enabled
- Added new field 'bars_enabled' into struct pci_device to
remember which BARs already enabled. This new field is
initialized at pci_enable_device_bars() time and cleared
at pci_disable_device() time.
- Changed pci_request_regions()/pci_release_regions() to
request/release only the regions which have already been
enabled.
- Added helper routine pci_select_bars() which makes proper mask
of BARs from the specified resource type. This would be very
helpful for users of pci_enable_device_bars().
Signed-off-by: Kenji Kaneshige <[email protected]>
---
drivers/pci/pci-driver.c | 3 ++-
drivers/pci/pci.c | 47 ++++++++++++++++++++++++++++++++++++++++-------
include/linux/pci.h | 2 ++
3 files changed, 44 insertions(+), 8 deletions(-)
Index: linux-2.6.16-mm1/drivers/pci/pci-driver.c
===================================================================
--- linux-2.6.16-mm1.orig/drivers/pci/pci-driver.c 2006-03-27 11:58:23.000000000 +0900
+++ linux-2.6.16-mm1/drivers/pci/pci-driver.c 2006-03-27 11:58:25.000000000 +0900
@@ -293,7 +293,8 @@
pci_restore_state(pci_dev);
/* if the device was enabled before suspend, reenable */
if (pci_dev->is_enabled)
- retval = pci_enable_device(pci_dev);
+ retval = pci_enable_device_bars(pci_dev,
+ pci_dev->bars_enabled);
/* if the device was busmaster before the suspend, make it busmaster again */
if (pci_dev->is_busmaster)
pci_set_master(pci_dev);
Index: linux-2.6.16-mm1/drivers/pci/pci.c
===================================================================
--- linux-2.6.16-mm1.orig/drivers/pci/pci.c 2006-03-27 11:58:23.000000000 +0900
+++ linux-2.6.16-mm1/drivers/pci/pci.c 2006-03-27 13:33:32.000000000 +0900
@@ -493,6 +493,9 @@
err = pcibios_enable_device(dev, bars);
if (err < 0)
return err;
+ pci_fixup_device(pci_fixup_enable, dev);
+ dev->is_enabled = 1;
+ dev->bars_enabled = bars;
return 0;
}
@@ -510,8 +513,6 @@
int err = pci_enable_device_bars(dev, (1 << PCI_NUM_RESOURCES) - 1);
if (err)
return err;
- pci_fixup_device(pci_fixup_enable, dev);
- dev->is_enabled = 1;
return 0;
}
@@ -546,6 +547,7 @@
pcibios_disable_device(dev);
dev->is_enabled = 0;
+ dev->bars_enabled = 0;
}
/**
@@ -628,6 +630,12 @@
{
if (pci_resource_len(pdev, bar) == 0)
return;
+ if (!(pdev->bars_enabled & (1 << bar))) {
+ dev_warn(&pdev->dev,
+ "Trying to release region #%d that is not enabled\n",
+ bar + 1);
+ return;
+ }
if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
release_region(pci_resource_start(pdev, bar),
pci_resource_len(pdev, bar));
@@ -654,7 +662,12 @@
{
if (pci_resource_len(pdev, bar) == 0)
return 0;
-
+ if (!(pdev->bars_enabled & (1 << bar))) {
+ dev_warn(&pdev->dev,
+ "Trying to request region #%d that is not enabled\n",
+ bar + 1);
+ goto err_out;
+ }
if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) {
if (!request_region(pci_resource_start(pdev, bar),
pci_resource_len(pdev, bar), res_name))
@@ -692,7 +705,8 @@
int i;
for (i = 0; i < 6; i++)
- pci_release_region(pdev, i);
+ if (pdev->bars_enabled & (1 << i))
+ pci_release_region(pdev, i);
}
/**
@@ -713,13 +727,15 @@
int i;
for (i = 0; i < 6; i++)
- if(pci_request_region(pdev, i, res_name))
- goto err_out;
+ if (pdev->bars_enabled & (1 << i))
+ if(pci_request_region(pdev, i, res_name))
+ goto err_out;
return 0;
err_out:
while(--i >= 0)
- pci_release_region(pdev, i);
+ if (pdev->bars_enabled & (1 << i))
+ pci_release_region(pdev, i);
return -EBUSY;
}
@@ -894,6 +910,22 @@
}
#endif
+/**
+ * pci_select_bars - Make BAR mask from the type of resource
+ * @pdev: the PCI device for which BAR mask is made
+ * @flags: resource type mask to be selected
+ *
+ * This helper routine makes bar mask from the type of resource.
+ */
+int pci_select_bars(struct pci_dev *dev, unsigned long flags)
+{
+ int i, bars = 0;
+ for (i = 0; i < PCI_NUM_RESOURCES; i++)
+ if (pci_resource_flags(dev, i) & flags)
+ bars |= (1 << i);
+ return bars;
+}
+
static int __devinit pci_init(void)
{
struct pci_dev *dev = NULL;
@@ -951,6 +983,7 @@
EXPORT_SYMBOL(pci_set_consistent_dma_mask);
EXPORT_SYMBOL(pci_assign_resource);
EXPORT_SYMBOL(pci_find_parent_resource);
+EXPORT_SYMBOL(pci_select_bars);
EXPORT_SYMBOL(pci_set_power_state);
EXPORT_SYMBOL(pci_save_state);
Index: linux-2.6.16-mm1/include/linux/pci.h
===================================================================
--- linux-2.6.16-mm1.orig/include/linux/pci.h 2006-03-27 11:58:23.000000000 +0900
+++ linux-2.6.16-mm1/include/linux/pci.h 2006-03-27 12:13:43.000000000 +0900
@@ -169,6 +169,7 @@
struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
int rom_attr_enabled; /* has display of the rom attribute been enabled? */
struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
+ int bars_enabled; /* BARs enabled */
};
#define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)
@@ -497,6 +498,7 @@
void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno);
int pci_assign_resource(struct pci_dev *dev, int i);
void pci_restore_bars(struct pci_dev *dev);
+int pci_select_bars(struct pci_dev *dev, unsigned long flags);
/* ROM control related routines */
void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size);