Some PCI adapters (eg. ipr scsi adapters) have an exposure today in that
they issue BIST to the adapter to reset the card. If, during the time
it takes to complete BIST, userspace attempts to access PCI config space,
the host bus bridge will master abort the access since the ipr adapter
does not respond on the PCI bus for a brief period of time when running BIST.
On PPC64 hardware, this master abort results in the host PCI bridge
isolating that PCI device from the rest of the system, making the device
unusable until Linux is rebooted. This patch is an attempt to close that
exposure by introducing some blocking code in the PCI code. When blocked,
writes will be humored and reads will return the cached value. Ben
Herrenschmidt has also mentioned that he plans to use this in PPC power
management.
Signed-off-by: Brian King <[email protected]>
---
linux-2.6.10-rc2-bk4-bjking1/drivers/pci/access.c | 104 ++++++++++++++++++++++
linux-2.6.10-rc2-bk4-bjking1/include/linux/pci.h | 37 ++-----
2 files changed, 115 insertions(+), 26 deletions(-)
diff -puN include/linux/pci.h~pci_block_config_io_during_bist include/linux/pci.h
--- linux-2.6.10-rc2-bk4/include/linux/pci.h~pci_block_config_io_during_bist 2004-11-19 13:31:29.000000000 -0600
+++ linux-2.6.10-rc2-bk4-bjking1/include/linux/pci.h 2004-11-19 13:31:29.000000000 -0600
@@ -535,7 +535,8 @@ struct pci_dev {
/* keep track of device state */
unsigned int is_enabled:1; /* pci_enable_device has been called */
unsigned int is_busmaster:1; /* device is busmaster */
-
+ unsigned int block_cfg_access:1; /* config space access is blocked */
+
u32 saved_config_space[16]; /* config space saved at suspend time */
struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
int rom_attr_enabled; /* has display of the rom attribute been enabled? */
@@ -750,31 +751,12 @@ int pci_bus_read_config_dword (struct pc
int pci_bus_write_config_byte (struct pci_bus *bus, unsigned int devfn, int where, u8 val);
int pci_bus_write_config_word (struct pci_bus *bus, unsigned int devfn, int where, u16 val);
int pci_bus_write_config_dword (struct pci_bus *bus, unsigned int devfn, int where, u32 val);
-
-static inline int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val)
-{
- return pci_bus_read_config_byte (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_read_config_word(struct pci_dev *dev, int where, u16 *val)
-{
- return pci_bus_read_config_word (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val)
-{
- return pci_bus_read_config_dword (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_write_config_byte(struct pci_dev *dev, int where, u8 val)
-{
- return pci_bus_write_config_byte (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_write_config_word(struct pci_dev *dev, int where, u16 val)
-{
- return pci_bus_write_config_word (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_write_config_dword(struct pci_dev *dev, int where, u32 val)
-{
- return pci_bus_write_config_dword (dev->bus, dev->devfn, where, val);
-}
+int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val);
+int pci_read_config_word(struct pci_dev *dev, int where, u16 *val);
+int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val);
+int pci_write_config_byte(struct pci_dev *dev, int where, u8 val);
+int pci_write_config_word(struct pci_dev *dev, int where, u16 val);
+int pci_write_config_dword(struct pci_dev *dev, int where, u32 val);
int pci_enable_device(struct pci_dev *dev);
int pci_enable_device_bars(struct pci_dev *dev, int mask);
@@ -870,6 +852,9 @@ extern void pci_disable_msix(struct pci_
extern void msi_remove_pci_irq_vectors(struct pci_dev *dev);
#endif
+extern int pci_start_bist(struct pci_dev *dev);
+extern void pci_block_config_access(struct pci_dev *dev);
+extern void pci_unblock_config_access(struct pci_dev *dev);
#endif /* CONFIG_PCI */
/* Include architecture-dependent settings and functions */
diff -puN drivers/pci/access.c~pci_block_config_io_during_bist drivers/pci/access.c
--- linux-2.6.10-rc2-bk4/drivers/pci/access.c~pci_block_config_io_during_bist 2004-11-19 13:31:29.000000000 -0600
+++ linux-2.6.10-rc2-bk4-bjking1/drivers/pci/access.c 2004-11-19 14:16:25.000000000 -0600
@@ -8,6 +8,7 @@
*/
static spinlock_t pci_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t access_lock = SPIN_LOCK_UNLOCKED;
/*
* Wrappers for all PCI configuration access functions. They just check
@@ -60,3 +61,106 @@ EXPORT_SYMBOL(pci_bus_read_config_dword)
EXPORT_SYMBOL(pci_bus_write_config_byte);
EXPORT_SYMBOL(pci_bus_write_config_word);
EXPORT_SYMBOL(pci_bus_write_config_dword);
+
+#define PCI_READ_CONFIG(size,type) \
+int pci_read_config_##size \
+ (struct pci_dev *dev, int pos, type *val) \
+{ \
+ unsigned long flags; \
+ int ret = 0; \
+ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
+ spin_lock_irqsave(&access_lock, flags); \
+ if (!dev->block_cfg_access) \
+ ret = pci_bus_read_config_##size(dev->bus, dev->devfn, pos, val); \
+ else if (pos < sizeof(dev->saved_config_space)) \
+ *val = (type)dev->saved_config_space[pos/sizeof(dev->saved_config_space[0])]; \
+ else \
+ *val = -1; \
+ spin_unlock_irqrestore(&access_lock, flags); \
+ return ret; \
+}
+
+#define PCI_WRITE_CONFIG(size,type) \
+int pci_write_config_##size \
+ (struct pci_dev *dev, int pos, type val) \
+{ \
+ unsigned long flags; \
+ int ret = 0; \
+ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
+ spin_lock_irqsave(&access_lock, flags); \
+ if (!dev->block_cfg_access) \
+ ret = pci_bus_write_config_##size(dev->bus, dev->devfn, pos, val); \
+ spin_unlock_irqrestore(&access_lock, flags); \
+ return ret; \
+}
+
+PCI_READ_CONFIG(byte, u8)
+PCI_READ_CONFIG(word, u16)
+PCI_READ_CONFIG(dword, u32)
+PCI_WRITE_CONFIG(byte, u8)
+PCI_WRITE_CONFIG(word, u16)
+PCI_WRITE_CONFIG(dword, u32)
+
+/**
+ * pci_block_config_access - Block PCI config reads/writes
+ * @dev: pci device struct
+ *
+ * This function blocks any PCI config accesses from occurring.
+ * When blocked, any writes will be humored and reads will return
+ * the data last saved using pci_save_state for the first 64 bytes
+ * of config space and return ff's for all other config reads.
+ *
+ * Return value:
+ * nothing
+ **/
+void pci_block_config_access(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&access_lock, flags);
+ dev->block_cfg_access = 1;
+ spin_unlock_irqrestore(&access_lock, flags);
+}
+
+/**
+ * pci_unblock_config_access - Unblock PCI config reads/writes
+ * @dev: pci device struct
+ *
+ * This function allows PCI config accesses to resume.
+ *
+ * Return value:
+ * nothing
+ **/
+void pci_unblock_config_access(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&access_lock, flags);
+ dev->block_cfg_access = 0;
+ spin_unlock_irqrestore(&access_lock, flags);
+}
+
+/**
+ * pci_start_bist - Start BIST on a PCI device
+ * @dev: pci device struct
+ *
+ * This function allows a device driver to start BIST
+ * when PCI config accesses are disabled.
+ *
+ * Return value:
+ * nothing
+ **/
+int pci_start_bist(struct pci_dev *dev)
+{
+ return pci_bus_write_config_byte(dev->bus, dev->devfn, PCI_BIST, PCI_BIST_START);
+}
+
+EXPORT_SYMBOL(pci_read_config_byte);
+EXPORT_SYMBOL(pci_read_config_word);
+EXPORT_SYMBOL(pci_read_config_dword);
+EXPORT_SYMBOL(pci_write_config_byte);
+EXPORT_SYMBOL(pci_write_config_word);
+EXPORT_SYMBOL(pci_write_config_dword);
+EXPORT_SYMBOL(pci_start_bist);
+EXPORT_SYMBOL(pci_block_config_access);
+EXPORT_SYMBOL(pci_unblock_config_access);
_
On Fri, Nov 19, 2004 at 02:23:22PM -0600, [email protected] wrote:
> -static inline int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val)
> -{
> - return pci_bus_read_config_byte (dev->bus, dev->devfn, where, val);
> -}
Well, as much as I despise this patch, you should at least get it
correct :)
You need to block the pci_bus_* functions too, otherwise the parts of
the kernel that use them will stomp all over your device, right?
thanks,
greg k-h
Greg KH wrote:
> On Fri, Nov 19, 2004 at 02:23:22PM -0600, [email protected] wrote:
>
>>-static inline int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val)
>>-{
>>- return pci_bus_read_config_byte (dev->bus, dev->devfn, where, val);
>>-}
>
>
> Well, as much as I despise this patch, you should at least get it
> correct :)
>
> You need to block the pci_bus_* functions too, otherwise the parts of
> the kernel that use them will stomp all over your device, right?
I thought about that when writing up this patch, but decided against it.
I figured it was overkill and was going to make the patch more complicated
than it needed to be to solve the main problem I have seen, which is
userspace code, usually hotplug/coldplug scripts, reading config space
when an adapter is running BIST.
If you think there are usages of the pci_bus_* functions in the
kernel after the adapter device driver gets loaded, from callers other
than adapter device drivers and userspace APIs, I would have to agree
with you. I was hoping to keep this patch as simple as possible.
Having to protect the pci_bus_* functions requires a lookup in these
functions to find the pci_dev to get the saved_config_space, which
I was hoping to avoid.
Ben - do you have any concerns with this limitation for the use you have
for this set of APIs?
--
Brian King
eServer Storage I/O
IBM Linux Technology Center
On Fri, 2004-11-19 at 13:32 -0800, Greg KH wrote:
> On Fri, Nov 19, 2004 at 02:23:22PM -0600, [email protected] wrote:
> > -static inline int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val)
> > -{
> > - return pci_bus_read_config_byte (dev->bus, dev->devfn, where, val);
> > -}
>
> Well, as much as I despise this patch, you should at least get it
> correct :)
>
> You need to block the pci_bus_* functions too, otherwise the parts of
> the kernel that use them will stomp all over your device, right?
The real issue is how to do the BIST write in fact once locked ... An
option would be to have an "atomic" write BIST & lock device, which
would do the whole operation with the spinlock.
I agree with Greg, the blocking should be done in the bus functions in
drivers/pci/access.c, and the helper that does the BIST thing should use
the low level callback directly within the lock.
Ben.
On Fri, 2004-11-19 at 16:25 -0600, Brian King wrote:
> I thought about that when writing up this patch, but decided against it.
> I figured it was overkill and was going to make the patch more complicated
> than it needed to be to solve the main problem I have seen, which is
> userspace code, usually hotplug/coldplug scripts, reading config space
> when an adapter is running BIST.
How so ? Why would it be more complicated to do the workaround in
drivers/pci/access.c macros instead and not touch all the wrappers ? It
would actually make a much smaller patch...
> If you think there are usages of the pci_bus_* functions in the
> kernel after the adapter device driver gets loaded, from callers other
> than adapter device drivers and userspace APIs, I would have to agree
> with you. I was hoping to keep this patch as simple as possible.
>
> Having to protect the pci_bus_* functions requires a lookup in these
> functions to find the pci_dev to get the saved_config_space, which
> I was hoping to avoid.
>
> Ben - do you have any concerns with this limitation for the use you have
> for this set of APIs?
If we ever endup rescanning the bus segment, indeed... I'd rather play
safe, it's easy to move the blocking to the bus access functions and
have the BIST function use the low level bus callbacks directly.
Ben.
Benjamin Herrenschmidt wrote:
> On Fri, 2004-11-19 at 16:25 -0600, Brian King wrote:
>
>
>>I thought about that when writing up this patch, but decided against it.
>>I figured it was overkill and was going to make the patch more complicated
>>than it needed to be to solve the main problem I have seen, which is
>>userspace code, usually hotplug/coldplug scripts, reading config space
>>when an adapter is running BIST.
>
>
> How so ? Why would it be more complicated to do the workaround in
> drivers/pci/access.c macros instead and not touch all the wrappers ? It
> would actually make a much smaller patch...
I guess what I was having difficulty with was how to go from bus/devfn
to pci_dev in the bus macros (to access the saved_config_space) and do this
safely at interrupt level. The spinlock protecting the devices list on the
pci_bus struct is never acquired with irqsave and all the existing
functions to search for a given pci device are not callable from
interrupt context.
--
Brian King
eServer Storage I/O
IBM Linux Technology Center
> I guess what I was having difficulty with was how to go from bus/devfn
> to pci_dev in the bus macros (to access the saved_config_space) and do this
> safely at interrupt level. The spinlock protecting the devices list on the
> pci_bus struct is never acquired with irqsave and all the existing
> functions to search for a given pci device are not callable from
> interrupt context.
Hrm... true, that is a problem...
Ben.
On Gwe, 2004-11-19 at 20:23, [email protected] wrote:
> +#define PCI_READ_CONFIG(size,type) \
> +int pci_read_config_##size \
> + (struct pci_dev *dev, int pos, type *val) \
> +{ \
> + unsigned long flags; \
> + int ret = 0; \
> + if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
> + spin_lock_irqsave(&access_lock, flags); \
> + if (!dev->block_cfg_access) \
> + ret = pci_bus_read_config_##size(dev->bus, dev->devfn, pos, val); \
> + else if (pos < sizeof(dev->saved_config_space)) \
> + *val = (type)dev->saved_config_space[pos/sizeof(dev->saved_config_space[0])]; \
> + else \
> + *val = -1; \
> + spin_unlock_irqrestore(&access_lock, flags); \
> + return ret; \
> +}
Several vendors (for good or for bad) require configuration space is
touched from interrupts on fast paths. This change will _really_ hurt
random PC class machines so please make it more sensible in its
condition handling.
To start with you can do something like
if(unlikely(dev->designed_badly)) {
slow_spinlock_path
}
/* Designed less badly 8) */
existing code path
Even better, put that code in your private debug tree. Replace the
locked cases with BUG() and fix the driver to get its internal locking
right in this situation.
It seems wrong to put expensive checks in core code paths when you could
just as easily provide
my_device_is_stupid_pci_read_config_byte()
and equivalent lock taking functions that wrap the existing ones and are
locked against the reset path without hurting sane computing devices
(and PC's).
Alan
> Even better, put that code in your private debug tree. Replace the
> locked cases with BUG() and fix the driver to get its internal locking
> right in this situation.
>
> It seems wrong to put expensive checks in core code paths when you could
> just as easily provide
>
> my_device_is_stupid_pci_read_config_byte()
>
> and equivalent lock taking functions that wrap the existing ones and are
> locked against the reset path without hurting sane computing devices
> (and PC's).
Unfortunately, Alan, the cases where it matters aren't a driver with bad
locking or some something that can be fixed at the driver level. There
are already 2 uses of the above:
- The device he's working on, which sometimes need to trigger a BIST
(built-in self test). During this operation, the device stops responding
on the PCI bus, which can be sort-of fatal if anything (userland playing
with /sys/bus/pci/* for example) touches the config space.
- On Macs, I can turn the clock of some PCI devices on/off for power
management (and I do). However, when such a device is powered off, it
will not respond to config cycles neither, resulting in all-1's reads on
some HW setups or even in deadlock iirc on the G5. We need to "cloack"
them properly while the kernel still has the pci_dev entry for them
since they are just locally power managed by their driver , while
retaining userland visibility in /proc/pci or /sysfs or things like
kudzu stops finding them.
Also, the "Mac" case here (power management) is something I've seen
doable in a variety of embedded setups.
I would add: Config space accesses are slow anyways. They are even
horribly slow. They are worse than IO accesses. I _VERY_MUCH_ doubt that
a test of a variable member of pci_dev like the above would have any
noticeable impact here.
Ben.
On Sad, 2004-11-20 at 07:09, Benjamin Herrenschmidt wrote:
> Unfortunately, Alan, the cases where it matters aren't a driver with bad
> locking or some something that can be fixed at the driver level. There
> are already 2 uses of the above:
That doesn't mean it is the right implementation. Most devices don't
need
this check so might as well have a fast path. You can at least reduce
the cost by setting a flag on devices that potentially have this problem
(or a PCI_ANY PCI_ANY quirk for platforms with it globally)
> - The device he's working on, which sometimes need to trigger a BIST
> (built-in self test). During this operation, the device stops responding
> on the PCI bus, which can be sort-of fatal if anything (userland playing
> with /sys/bus/pci/* for example) touches the config space.
That will be fun given some laptop SMM touches config space.
> I would add: Config space accesses are slow anyways. They are even
> horribly slow. They are worse than IO accesses. I _VERY_MUCH_ doubt that
> a test of a variable member of pci_dev like the above would have any
> noticeable impact here.
Some of the Intel CPU's are very bad at lock handling so it is an issue.
Also most PCI config accesses nowdays go to onboard devices whose
behaviour may well be quite different to PCI anyway. PCI has become a
device management API.
I dislike the "Hey it sucks, lets make it suck more" approach when it
seems easy to do the job well.
Alan
On Sat, 2004-11-20 at 12:42 +0000, Alan Cox wrote:
> On Sad, 2004-11-20 at 07:09, Benjamin Herrenschmidt wrote:
> > Unfortunately, Alan, the cases where it matters aren't a driver with bad
> > locking or some something that can be fixed at the driver level. There
> > are already 2 uses of the above:
>
> That doesn't mean it is the right implementation. Most devices don't
> need
> this check so might as well have a fast path. You can at least reduce
> the cost by setting a flag on devices that potentially have this problem
> (or a PCI_ANY PCI_ANY quirk for platforms with it globally)
Oh, that's I agree (about the implentation beeing maybe sub-optimal),
especially the possibility to avoid the lock...
> > - The device he's working on, which sometimes need to trigger a BIST
> > (built-in self test). During this operation, the device stops responding
> > on the PCI bus, which can be sort-of fatal if anything (userland playing
> > with /sys/bus/pci/* for example) touches the config space.
>
> That will be fun given some laptop SMM touches config space.
None of the affected setups has something as broken-by-design as SMM
BIOSes :)
> > I would add: Config space accesses are slow anyways. They are even
> > horribly slow. They are worse than IO accesses. I _VERY_MUCH_ doubt that
> > a test of a variable member of pci_dev like the above would have any
> > noticeable impact here.
>
> Some of the Intel CPU's are very bad at lock handling so it is an issue.
Yah, we alraedy have a lock in the config space code, so I think the
solution here is to avoid the double locks by doing things a bit
differently.
> Also most PCI config accesses nowdays go to onboard devices whose
> behaviour may well be quite different to PCI anyway. PCI has become a
> device management API.
>
> I dislike the "Hey it sucks, lets make it suck more" approach when it
> seems easy to do the job well.
I don't understand your statements above.
Ben.
Some PCI adapters (eg. ipr scsi adapters) have an exposure today in that
they issue BIST to the adapter to reset the card. If, during the time
it takes to complete BIST, userspace attempts to access PCI config space,
the host bus bridge will master abort the access since the ipr adapter
does not respond on the PCI bus for a brief period of time when running BIST.
On PPC64 hardware, this master abort results in the host PCI bridge
isolating that PCI device from the rest of the system, making the device
unusable until Linux is rebooted. This patch is an attempt to close that
exposure by introducing some blocking code in the PCI code. When blocked,
writes will be humored and reads will return the cached value. Ben
Herrenschmidt has also mentioned that he plans to use this in PPC power
management.
Signed-off-by: Brian King <[email protected]>
---
---
linux-2.6.10-rc2-bk5-bjking1/drivers/pci/access.c | 103 ++++++++++++++++++++++
linux-2.6.10-rc2-bk5-bjking1/include/linux/pci.h | 37 ++-----
2 files changed, 114 insertions(+), 26 deletions(-)
diff -puN drivers/pci/access.c~pci_block_config_io_during_bist drivers/pci/access.c
--- linux-2.6.10-rc2-bk5/drivers/pci/access.c~pci_block_config_io_during_bist 2004-11-20 16:07:19.000000000 -0600
+++ linux-2.6.10-rc2-bk5-bjking1/drivers/pci/access.c 2004-11-20 16:40:32.000000000 -0600
@@ -60,3 +60,106 @@ EXPORT_SYMBOL(pci_bus_read_config_dword)
EXPORT_SYMBOL(pci_bus_write_config_byte);
EXPORT_SYMBOL(pci_bus_write_config_word);
EXPORT_SYMBOL(pci_bus_write_config_dword);
+
+#define PCI_READ_CONFIG(size,type) \
+int pci_read_config_##size \
+ (struct pci_dev *dev, int pos, type *val) \
+{ \
+ unsigned long flags; \
+ int ret = 0; \
+ u32 data = -1; \
+ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
+ spin_lock_irqsave(&pci_lock, flags); \
+ if (likely(!dev->block_cfg_access)) \
+ ret = dev->bus->ops->read(dev->bus, dev->devfn, pos, sizeof(type), &data); \
+ else if (pos < sizeof(dev->saved_config_space)) \
+ data = dev->saved_config_space[pos/sizeof(dev->saved_config_space[0])]; \
+ spin_unlock_irqrestore(&pci_lock, flags); \
+ *val = (type)data; \
+ return ret; \
+}
+
+#define PCI_WRITE_CONFIG(size,type) \
+int pci_write_config_##size \
+ (struct pci_dev *dev, int pos, type val) \
+{ \
+ unsigned long flags; \
+ int ret = 0; \
+ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
+ spin_lock_irqsave(&pci_lock, flags); \
+ if (likely(!dev->block_cfg_access)) \
+ ret = dev->bus->ops->write(dev->bus, dev->devfn, pos, sizeof(type), val); \
+ spin_unlock_irqrestore(&pci_lock, flags); \
+ return ret; \
+}
+
+PCI_READ_CONFIG(byte, u8)
+PCI_READ_CONFIG(word, u16)
+PCI_READ_CONFIG(dword, u32)
+PCI_WRITE_CONFIG(byte, u8)
+PCI_WRITE_CONFIG(word, u16)
+PCI_WRITE_CONFIG(dword, u32)
+
+/**
+ * pci_block_config_access - Block PCI config reads/writes
+ * @dev: pci device struct
+ *
+ * This function blocks any PCI config accesses from occurring.
+ * When blocked, any writes will be humored and reads will return
+ * the data last saved using pci_save_state for the first 64 bytes
+ * of config space and return ff's for all other config reads.
+ *
+ * Return value:
+ * nothing
+ **/
+void pci_block_config_access(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_lock, flags);
+ dev->block_cfg_access = 1;
+ spin_unlock_irqrestore(&pci_lock, flags);
+}
+
+/**
+ * pci_unblock_config_access - Unblock PCI config reads/writes
+ * @dev: pci device struct
+ *
+ * This function allows PCI config accesses to resume.
+ *
+ * Return value:
+ * nothing
+ **/
+void pci_unblock_config_access(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_lock, flags);
+ dev->block_cfg_access = 0;
+ spin_unlock_irqrestore(&pci_lock, flags);
+}
+
+/**
+ * pci_start_bist - Start BIST on a PCI device
+ * @dev: pci device struct
+ *
+ * This function allows a device driver to start BIST
+ * when PCI config accesses are disabled.
+ *
+ * Return value:
+ * nothing
+ **/
+int pci_start_bist(struct pci_dev *dev)
+{
+ return pci_bus_write_config_byte(dev->bus, dev->devfn, PCI_BIST, PCI_BIST_START);
+}
+
+EXPORT_SYMBOL(pci_read_config_byte);
+EXPORT_SYMBOL(pci_read_config_word);
+EXPORT_SYMBOL(pci_read_config_dword);
+EXPORT_SYMBOL(pci_write_config_byte);
+EXPORT_SYMBOL(pci_write_config_word);
+EXPORT_SYMBOL(pci_write_config_dword);
+EXPORT_SYMBOL(pci_start_bist);
+EXPORT_SYMBOL(pci_block_config_access);
+EXPORT_SYMBOL(pci_unblock_config_access);
diff -puN include/linux/pci.h~pci_block_config_io_during_bist include/linux/pci.h
--- linux-2.6.10-rc2-bk5/include/linux/pci.h~pci_block_config_io_during_bist 2004-11-20 16:07:19.000000000 -0600
+++ linux-2.6.10-rc2-bk5-bjking1/include/linux/pci.h 2004-11-20 16:07:19.000000000 -0600
@@ -535,7 +535,8 @@ struct pci_dev {
/* keep track of device state */
unsigned int is_enabled:1; /* pci_enable_device has been called */
unsigned int is_busmaster:1; /* device is busmaster */
-
+ unsigned int block_cfg_access:1; /* config space access is blocked */
+
u32 saved_config_space[16]; /* config space saved at suspend time */
struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
int rom_attr_enabled; /* has display of the rom attribute been enabled? */
@@ -750,31 +751,12 @@ int pci_bus_read_config_dword (struct pc
int pci_bus_write_config_byte (struct pci_bus *bus, unsigned int devfn, int where, u8 val);
int pci_bus_write_config_word (struct pci_bus *bus, unsigned int devfn, int where, u16 val);
int pci_bus_write_config_dword (struct pci_bus *bus, unsigned int devfn, int where, u32 val);
-
-static inline int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val)
-{
- return pci_bus_read_config_byte (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_read_config_word(struct pci_dev *dev, int where, u16 *val)
-{
- return pci_bus_read_config_word (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val)
-{
- return pci_bus_read_config_dword (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_write_config_byte(struct pci_dev *dev, int where, u8 val)
-{
- return pci_bus_write_config_byte (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_write_config_word(struct pci_dev *dev, int where, u16 val)
-{
- return pci_bus_write_config_word (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_write_config_dword(struct pci_dev *dev, int where, u32 val)
-{
- return pci_bus_write_config_dword (dev->bus, dev->devfn, where, val);
-}
+int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val);
+int pci_read_config_word(struct pci_dev *dev, int where, u16 *val);
+int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val);
+int pci_write_config_byte(struct pci_dev *dev, int where, u8 val);
+int pci_write_config_word(struct pci_dev *dev, int where, u16 val);
+int pci_write_config_dword(struct pci_dev *dev, int where, u32 val);
int pci_enable_device(struct pci_dev *dev);
int pci_enable_device_bars(struct pci_dev *dev, int mask);
@@ -870,6 +852,9 @@ extern void pci_disable_msix(struct pci_
extern void msi_remove_pci_irq_vectors(struct pci_dev *dev);
#endif
+extern int pci_start_bist(struct pci_dev *dev);
+extern void pci_block_config_access(struct pci_dev *dev);
+extern void pci_unblock_config_access(struct pci_dev *dev);
#endif /* CONFIG_PCI */
/* Include architecture-dependent settings and functions */
_
On Sat, 2004-11-20 at 17:38 -0600, Brian King wrote:
> Alan Cox wrote:
> > Some of the Intel CPU's are very bad at lock handling so it is an issue.
> > Also most PCI config accesses nowdays go to onboard devices whose
> > behaviour may well be quite different to PCI anyway. PCI has become a
> > device management API.
>
> Does this following patch address your issues with this patch, Alan?
>
> It still doesn't address Greg's issue about making this apply to the
> pci_bus_* functions as well, but I'm not sure of a good way to do that
> due to the reasons given earlier.
Looks good to me, I don't sure we actually have to deal with pci_bus_*
functions, do we ? When are they called ?
> +void pci_block_config_access(struct pci_dev *dev)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&pci_lock, flags);
> + dev->block_cfg_access = 1;
> + spin_unlock_irqrestore(&pci_lock, flags);
> +}
Shouldn't we save the config space here ?
Ben.
Benjamin Herrenschmidt wrote:
>>It still doesn't address Greg's issue about making this apply to the
>>pci_bus_* functions as well, but I'm not sure of a good way to do that
>>due to the reasons given earlier.
>
>
> Looks good to me, I don't sure we actually have to deal with pci_bus_*
> functions, do we ? When are they called ?
For what we are trying to solve, which is blocking userspace config
accesses, I don't think we do. Greg - are you ok with this?
>>+void pci_block_config_access(struct pci_dev *dev)
>>+{
>>+ unsigned long flags;
>>+
>>+ spin_lock_irqsave(&pci_lock, flags);
>>+ dev->block_cfg_access = 1;
>>+ spin_unlock_irqrestore(&pci_lock, flags);
>>+}
>
>
> Shouldn't we save the config space here ?
I thought about that when coding this up and thought it would
be better to simply have the function do what it advertises and no
more. Seems strange that a function called pci_block_config_access
would go and do a bunch of pci config accesses, but we can
certainly add it if you like.
-Brian
--
Brian King
eServer Storage I/O
IBM Linux Technology Center
> I thought about that when coding this up and thought it would
> be better to simply have the function do what it advertises and no
> more. Seems strange that a function called pci_block_config_access
> would go and do a bunch of pci config accesses, but we can
> certainly add it if you like.
Well, considering that when blocked, reads return data from the cache,
it makes sense to fill the cache when blocking ... or we'll end up
returning crap.
Ben.
Some PCI adapters (eg. ipr scsi adapters) have an exposure today in that
they issue BIST to the adapter to reset the card. If, during the time
it takes to complete BIST, userspace attempts to access PCI config space,
the host bus bridge will master abort the access since the ipr adapter
does not respond on the PCI bus for a brief period of time when running BIST.
On PPC64 hardware, this master abort results in the host PCI bridge
isolating that PCI device from the rest of the system, making the device
unusable until Linux is rebooted. This patch is an attempt to close that
exposure by introducing some blocking code in the PCI code. When blocked,
writes will be humored and reads will return the cached value. Ben
Herrenschmidt has also mentioned that he plans to use this in PPC power
management.
Signed-off-by: Brian King <[email protected]>
---
---
linux-2.6.10-rc2-bk5-bjking1/drivers/pci/access.c | 104 ++++++++++++++++++++++
linux-2.6.10-rc2-bk5-bjking1/include/linux/pci.h | 37 ++-----
2 files changed, 115 insertions(+), 26 deletions(-)
diff -puN drivers/pci/access.c~pci_block_config_io_during_bist drivers/pci/access.c
--- linux-2.6.10-rc2-bk5/drivers/pci/access.c~pci_block_config_io_during_bist 2004-11-20 16:07:19.000000000 -0600
+++ linux-2.6.10-rc2-bk5-bjking1/drivers/pci/access.c 2004-11-21 10:42:10.000000000 -0600
@@ -60,3 +60,107 @@ EXPORT_SYMBOL(pci_bus_read_config_dword)
EXPORT_SYMBOL(pci_bus_write_config_byte);
EXPORT_SYMBOL(pci_bus_write_config_word);
EXPORT_SYMBOL(pci_bus_write_config_dword);
+
+#define PCI_READ_CONFIG(size,type) \
+int pci_read_config_##size \
+ (struct pci_dev *dev, int pos, type *val) \
+{ \
+ unsigned long flags; \
+ int ret = 0; \
+ u32 data = -1; \
+ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
+ spin_lock_irqsave(&pci_lock, flags); \
+ if (likely(!dev->block_cfg_access)) \
+ ret = dev->bus->ops->read(dev->bus, dev->devfn, pos, sizeof(type), &data); \
+ else if (pos < sizeof(dev->saved_config_space)) \
+ data = dev->saved_config_space[pos/sizeof(dev->saved_config_space[0])]; \
+ spin_unlock_irqrestore(&pci_lock, flags); \
+ *val = (type)data; \
+ return ret; \
+}
+
+#define PCI_WRITE_CONFIG(size,type) \
+int pci_write_config_##size \
+ (struct pci_dev *dev, int pos, type val) \
+{ \
+ unsigned long flags; \
+ int ret = 0; \
+ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
+ spin_lock_irqsave(&pci_lock, flags); \
+ if (likely(!dev->block_cfg_access)) \
+ ret = dev->bus->ops->write(dev->bus, dev->devfn, pos, sizeof(type), val); \
+ spin_unlock_irqrestore(&pci_lock, flags); \
+ return ret; \
+}
+
+PCI_READ_CONFIG(byte, u8)
+PCI_READ_CONFIG(word, u16)
+PCI_READ_CONFIG(dword, u32)
+PCI_WRITE_CONFIG(byte, u8)
+PCI_WRITE_CONFIG(word, u16)
+PCI_WRITE_CONFIG(dword, u32)
+
+/**
+ * pci_block_config_access - Block PCI config reads/writes
+ * @dev: pci device struct
+ *
+ * This function blocks any PCI config accesses from occurring.
+ * When blocked, any writes will be humored and reads will return
+ * the data saved using pci_save_state for the first 64 bytes
+ * of config space and return ff's for all other config reads.
+ *
+ * Return value:
+ * nothing
+ **/
+void pci_block_config_access(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ pci_save_state(dev);
+ spin_lock_irqsave(&pci_lock, flags);
+ dev->block_cfg_access = 1;
+ spin_unlock_irqrestore(&pci_lock, flags);
+}
+
+/**
+ * pci_unblock_config_access - Unblock PCI config reads/writes
+ * @dev: pci device struct
+ *
+ * This function allows PCI config accesses to resume.
+ *
+ * Return value:
+ * nothing
+ **/
+void pci_unblock_config_access(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_lock, flags);
+ dev->block_cfg_access = 0;
+ spin_unlock_irqrestore(&pci_lock, flags);
+}
+
+/**
+ * pci_start_bist - Start BIST on a PCI device
+ * @dev: pci device struct
+ *
+ * This function allows a device driver to start BIST
+ * when PCI config accesses are disabled.
+ *
+ * Return value:
+ * nothing
+ **/
+int pci_start_bist(struct pci_dev *dev)
+{
+ return pci_bus_write_config_byte(dev->bus, dev->devfn, PCI_BIST, PCI_BIST_START);
+}
+
+EXPORT_SYMBOL(pci_read_config_byte);
+EXPORT_SYMBOL(pci_read_config_word);
+EXPORT_SYMBOL(pci_read_config_dword);
+EXPORT_SYMBOL(pci_write_config_byte);
+EXPORT_SYMBOL(pci_write_config_word);
+EXPORT_SYMBOL(pci_write_config_dword);
+EXPORT_SYMBOL(pci_start_bist);
+EXPORT_SYMBOL(pci_block_config_access);
+EXPORT_SYMBOL(pci_unblock_config_access);
diff -puN include/linux/pci.h~pci_block_config_io_during_bist include/linux/pci.h
--- linux-2.6.10-rc2-bk5/include/linux/pci.h~pci_block_config_io_during_bist 2004-11-20 16:07:19.000000000 -0600
+++ linux-2.6.10-rc2-bk5-bjking1/include/linux/pci.h 2004-11-20 16:07:19.000000000 -0600
@@ -535,7 +535,8 @@ struct pci_dev {
/* keep track of device state */
unsigned int is_enabled:1; /* pci_enable_device has been called */
unsigned int is_busmaster:1; /* device is busmaster */
-
+ unsigned int block_cfg_access:1; /* config space access is blocked */
+
u32 saved_config_space[16]; /* config space saved at suspend time */
struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
int rom_attr_enabled; /* has display of the rom attribute been enabled? */
@@ -750,31 +751,12 @@ int pci_bus_read_config_dword (struct pc
int pci_bus_write_config_byte (struct pci_bus *bus, unsigned int devfn, int where, u8 val);
int pci_bus_write_config_word (struct pci_bus *bus, unsigned int devfn, int where, u16 val);
int pci_bus_write_config_dword (struct pci_bus *bus, unsigned int devfn, int where, u32 val);
-
-static inline int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val)
-{
- return pci_bus_read_config_byte (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_read_config_word(struct pci_dev *dev, int where, u16 *val)
-{
- return pci_bus_read_config_word (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val)
-{
- return pci_bus_read_config_dword (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_write_config_byte(struct pci_dev *dev, int where, u8 val)
-{
- return pci_bus_write_config_byte (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_write_config_word(struct pci_dev *dev, int where, u16 val)
-{
- return pci_bus_write_config_word (dev->bus, dev->devfn, where, val);
-}
-static inline int pci_write_config_dword(struct pci_dev *dev, int where, u32 val)
-{
- return pci_bus_write_config_dword (dev->bus, dev->devfn, where, val);
-}
+int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val);
+int pci_read_config_word(struct pci_dev *dev, int where, u16 *val);
+int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val);
+int pci_write_config_byte(struct pci_dev *dev, int where, u8 val);
+int pci_write_config_word(struct pci_dev *dev, int where, u16 val);
+int pci_write_config_dword(struct pci_dev *dev, int where, u32 val);
int pci_enable_device(struct pci_dev *dev);
int pci_enable_device_bars(struct pci_dev *dev, int mask);
@@ -870,6 +852,9 @@ extern void pci_disable_msix(struct pci_
extern void msi_remove_pci_irq_vectors(struct pci_dev *dev);
#endif
+extern int pci_start_bist(struct pci_dev *dev);
+extern void pci_block_config_access(struct pci_dev *dev);
+extern void pci_unblock_config_access(struct pci_dev *dev);
#endif /* CONFIG_PCI */
/* Include architecture-dependent settings and functions */
_
Alan Cox writes:
> That doesn't mean it is the right implementation. Most devices don't
> need
> this check so might as well have a fast path. You can at least reduce
> the cost by setting a flag on devices that potentially have this problem
But that's exactly what Brian's later patch does! Did you actually
read the patch? All that we are doing is testing one bit in the
struct pci_dev to see whether to do the actual access or not. Or do
you want one bit to tell us whether to go and look at another bit to
see whether to do the access? :)
> (or a PCI_ANY PCI_ANY quirk for platforms with it globally)
How would you use a quirk to block config space accesses? The two are
unrelated.
> > - The device he's working on, which sometimes need to trigger a BIST
> > (built-in self test). During this operation, the device stops responding
> > on the PCI bus, which can be sort-of fatal if anything (userland playing
> > with /sys/bus/pci/* for example) touches the config space.
>
> That will be fun given some laptop SMM touches config space.
Well, I can see that that would limit your power management options,
but fortunately we don't have SMM on the machines where we need this
config access blocking.
> Some of the Intel CPU's are very bad at lock handling so it is an issue.
There is no extra locking introduced by Brian's patch. Config
accesses were already taking the pci_lock and that hasn't changed.
> I dislike the "Hey it sucks, lets make it suck more" approach when it
> seems easy to do the job well.
Perhaps you could expand on what you mean by "do the job well"?
Paul.
On Iau, 2004-11-25 at 22:00, Paul Mackerras wrote:
> read the patch? All that we are doing is testing one bit in the
> struct pci_dev to see whether to do the actual access or not. Or do
> you want one bit to tell us whether to go and look at another bit to
> see whether to do the access? :)
Rereading the newer patch and following the pci_lock move properly this
time it looks fine now.
On Sad, 2004-11-20 at 23:38, Brian King wrote:
> Alan Cox wrote:
> > Some of the Intel CPU's are very bad at lock handling so it is an issue.
> > Also most PCI config accesses nowdays go to onboard devices whose
> > behaviour may well be quite different to PCI anyway. PCI has become a
> > device management API.
>
> Does this following patch address your issues with this patch, Alan?
Doesn't seem related to it