Hello,
[this patch series touches a few subsystems; hopefully I got all
the right maintainers]
Recently, Matthew Wilcox sent out the following mail about PCI
slots:
http://marc.info/?l=linux-pci&m=119432330418980&w=2
The following patch series is a rough first cut at implementing
the ideas he outlined, namely, that PCI slots are physical
objects that we care about, independent of their hotplug
capabilities.
We introduce a struct pci_slot as a first-class citizen, and turn
hotplug_slot into a subsidiary structure. A brief glimpse at the
potential for cleanup is given, as we modify the existing hotplug
drivers and remove the multiple get_address() methods. Certainly
more cleanup can be done with this new structure.
Additionally, we introduce an ACPI PCI slot driver, which will
detect all physical PCI slots in the system (as described by
ACPI). De-coupling the notion of a physical slot from its hotplug
capability allows users to understand the physical geography of
their machines, whether their slots are hotpluggable or not.
This patch series builds heavily on Willy's prior work (with a
bit of the typical refresh needed when aiming at the moving
target of the kernel). The ACPI PCI slot driver is new.
I have tested this series on both ia64 (hp rx6600) and x86 (hp
dl380g). On ia64, system firmware provides _SUN methods for the
PCI slots and we get entries in /sys/bus/pci/slots/. On my x86
machine, firmware didn't seem to provide a _SUN, so we don't get
any entries in /sys/bus/pci/slots/, but nothing really bad
happens either. ;)
Comments/feedback are appreciated.
Thanks.
/ac
[Please note, I got Rick Jones' email screwed up in the 0/5
email; it's corrected above.]
From: Alex Chiang <[email protected]>
Rename the slot to be the contents of the 'path' sysfs attribute, and
delete the attribute. The mapping from pci address to slot name is
supposed to be done through the 'address' file, which will be provided
automatically later in this series of patches.
Signed-off-by: Alex Chiang <[email protected]>
Signed-off-by: Matthew Wilcox <[email protected]>
---
drivers/pci/hotplug/sgi_hotplug.c | 27 +--------------------------
1 files changed, 1 insertions(+), 26 deletions(-)
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index ef07c36..e1e7c9d 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -91,21 +91,6 @@ static struct hotplug_slot_ops sn_hotplug_slot_ops = {
static DEFINE_MUTEX(sn_hotplug_mutex);
-static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot,
- char *buf)
-{
- int retval = -ENOENT;
- struct slot *slot = bss_hotplug_slot->private;
-
- if (!slot)
- return retval;
-
- retval = sprintf (buf, "%s\n", slot->physical_path);
- return retval;
-}
-
-static struct hotplug_slot_attribute sn_slot_path_attr = __ATTR_RO(path);
-
static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device)
{
struct pcibus_info *pcibus_info;
@@ -173,18 +158,10 @@ static int sn_hp_slot_private_alloc(struct hotplug_slot *bss_hotplug_slot,
return -ENOMEM;
bss_hotplug_slot->private = slot;
- bss_hotplug_slot->name = kmalloc(SN_SLOT_NAME_SIZE, GFP_KERNEL);
- if (!bss_hotplug_slot->name) {
- kfree(bss_hotplug_slot->private);
- return -ENOMEM;
- }
+ bss_hotplug_slot->name = slot->physical_path;
slot->device_num = device;
slot->pci_bus = pci_bus;
- sprintf(bss_hotplug_slot->name, "%04x:%02x:%02x",
- pci_domain_nr(pci_bus),
- ((u16)pcibus_info->pbi_buscommon.bs_persist_busnum),
- device + 1);
sn_generate_path(pci_bus, slot->physical_path);
@@ -203,8 +180,6 @@ static struct hotplug_slot * sn_hp_destroy(void)
bss_hotplug_slot = slot->hotplug_slot;
list_del(&((struct slot *)bss_hotplug_slot->private)->
hp_list);
- sysfs_remove_file(&bss_hotplug_slot->kobj,
- &sn_slot_path_attr.attr);
break;
}
return bss_hotplug_slot;
--
1.5.3.1.1.g1e61
Register one slot per slot, rather than one slot per function.
Change the name of the slot to fake%d instead of the pci address.
Signed-off-by: Alex Chiang <[email protected]>
Signed-off-by: Matthew Wilcox <[email protected]>
---
drivers/pci/hotplug/fakephp.c | 75 +++++++++++++++--------------------------
1 files changed, 27 insertions(+), 48 deletions(-)
diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c
index 027f686..828335e 100644
--- a/drivers/pci/hotplug/fakephp.c
+++ b/drivers/pci/hotplug/fakephp.c
@@ -93,6 +93,7 @@ static int add_slot(struct pci_dev *dev)
struct dummy_slot *dslot;
struct hotplug_slot *slot;
int retval = -ENOMEM;
+ static int count = 1;
slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
if (!slot)
@@ -106,7 +107,8 @@ static int add_slot(struct pci_dev *dev)
slot->info->max_bus_speed = PCI_SPEED_UNKNOWN;
slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN;
- slot->name = &dev->dev.bus_id[0];
+ slot->name = kmalloc(8, GFP_KERNEL);
+ sprintf(slot->name, "fake%d", count++);
dbg("slot->name = %s\n", slot->name);
dslot = kmalloc(sizeof(struct dummy_slot), GFP_KERNEL);
@@ -141,17 +143,17 @@ error:
static int __init pci_scan_buses(void)
{
struct pci_dev *dev = NULL;
- int retval = 0;
+ int lastslot = 0;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
- retval = add_slot(dev);
- if (retval) {
- pci_dev_put(dev);
- break;
- }
+ if (PCI_FUNC(dev->devfn) > 0 &&
+ lastslot == PCI_SLOT(dev->devfn))
+ continue;
+ lastslot = PCI_SLOT(dev->devfn);
+ add_slot(dev);
}
- return retval;
+ return 0;
}
static void remove_slot(struct dummy_slot *dslot)
@@ -275,23 +277,9 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
return -ENODEV;
}
-/* find the hotplug_slot for the pci_dev */
-static struct hotplug_slot *get_slot_from_dev(struct pci_dev *dev)
-{
- struct dummy_slot *dslot;
-
- list_for_each_entry(dslot, &slot_list, node) {
- if (dslot->dev == dev)
- return dslot->slot;
- }
- return NULL;
-}
-
-
static int disable_slot(struct hotplug_slot *slot)
{
struct dummy_slot *dslot;
- struct hotplug_slot *hslot;
struct pci_dev *dev;
int func;
@@ -301,36 +289,27 @@ static int disable_slot(struct hotplug_slot *slot)
dbg("%s - physical_slot = %s\n", __FUNCTION__, slot->name);
- /* don't disable bridged devices just yet, we can't handle them easily... */
- if (dslot->dev->subordinate) {
- err("Can't remove PCI devices with other PCI devices behind it yet.\n");
- return -ENODEV;
- }
- /* search for subfunctions and disable them first */
- if (!(dslot->dev->devfn & 7)) {
- for (func = 1; func < 8; func++) {
- dev = pci_get_slot(dslot->dev->bus,
- dslot->dev->devfn + func);
- if (dev) {
- hslot = get_slot_from_dev(dev);
- if (hslot)
- disable_slot(hslot);
- else {
- err("Hotplug slot not found for subfunction of PCI device\n");
- return -ENODEV;
- }
- pci_dev_put(dev);
- } else
- dbg("No device in slot found\n");
+ for (func = 7; func >= 0; func--) {
+ dev = pci_get_slot(dslot->dev->bus, dslot->dev->devfn + func);
+ if (!dev)
+ continue;
+
+ /* don't disable bridged devices just yet, we can't handle
+ * them easily... */
+ if (dev->subordinate) {
+ err("Can't remove PCI devices with other PCI devices behind it yet.\n");
+ return -ENODEV;
}
- }
- /* remove the device from the pci core */
- pci_remove_bus_device(dslot->dev);
- /* blow away this sysfs entry and other parts. */
- remove_slot(dslot);
+ /* remove the device from the pci core */
+ pci_remove_bus_device(dslot->dev);
+ /* blow away this sysfs entry and other parts. */
+ remove_slot(dslot);
+
+ pci_dev_put(dev);
+ }
return 0;
}
--
1.5.3.1.1.g1e61
Introduce struct pci_slot
- Make pci_slot the primary sysfs entity. hotplug_slot becomes a
subsidiary structure.
- Change the prototype of pci_hp_register() to take the bus and
slot number (on parent bus) as parameters.
- Remove all the ->get_address methods since this functionality is
now handled by pci_slot directly.
Signed-off-by: Alex Chiang <[email protected]>
Signed-off-by: Matthew Wilcox <[email protected]>
---
drivers/pci/Makefile | 2 +-
drivers/pci/hotplug/acpiphp.h | 1 -
drivers/pci/hotplug/acpiphp_core.c | 23 +---
drivers/pci/hotplug/acpiphp_glue.c | 16 --
drivers/pci/hotplug/acpiphp_ibm.c | 5 +-
drivers/pci/hotplug/cpci_hotplug_core.c | 2 +-
drivers/pci/hotplug/cpqphp_core.c | 4 +-
drivers/pci/hotplug/fakephp.c | 2 +-
drivers/pci/hotplug/ibmphp_ebda.c | 3 +-
drivers/pci/hotplug/pci_hotplug_core.c | 237 +++++++++++--------------------
drivers/pci/hotplug/pciehp_core.c | 22 +---
drivers/pci/hotplug/rpadlpar_sysfs.c | 4 +-
drivers/pci/hotplug/sgi_hotplug.c | 7 +-
drivers/pci/hotplug/shpchp_core.c | 17 +--
drivers/pci/pci.h | 13 ++
drivers/pci/slot.c | 157 ++++++++++++++++++++
include/linux/pci.h | 14 ++
include/linux/pci_hotplug.h | 12 +--
18 files changed, 293 insertions(+), 248 deletions(-)
create mode 100644 drivers/pci/slot.c
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 5550556..12f0b2d 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -2,7 +2,7 @@
# Makefile for the PCI bus specific drivers.
#
-obj-y += access.o bus.o probe.o remove.o pci.o quirks.o \
+obj-y += access.o bus.o probe.o remove.o pci.o quirks.o slot.o \
pci-driver.o search.o pci-sysfs.o rom.o setup-res.o
obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index f6cc0c5..ab46189 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -216,7 +216,6 @@ extern u8 acpiphp_get_power_status (struct acpiphp_slot *slot);
extern u8 acpiphp_get_attention_status (struct acpiphp_slot *slot);
extern u8 acpiphp_get_latch_status (struct acpiphp_slot *slot);
extern u8 acpiphp_get_adapter_status (struct acpiphp_slot *slot);
-extern u32 acpiphp_get_address (struct acpiphp_slot *slot);
/* variables */
extern int acpiphp_debug;
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index a0ca63a..34b8d0b 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -70,7 +70,6 @@ static int disable_slot (struct hotplug_slot *slot);
static int set_attention_status (struct hotplug_slot *slot, u8 value);
static int get_power_status (struct hotplug_slot *slot, u8 *value);
static int get_attention_status (struct hotplug_slot *slot, u8 *value);
-static int get_address (struct hotplug_slot *slot, u32 *value);
static int get_latch_status (struct hotplug_slot *slot, u8 *value);
static int get_adapter_status (struct hotplug_slot *slot, u8 *value);
@@ -83,7 +82,6 @@ static struct hotplug_slot_ops acpi_hotplug_slot_ops = {
.get_attention_status = get_attention_status,
.get_latch_status = get_latch_status,
.get_adapter_status = get_adapter_status,
- .get_address = get_address,
};
@@ -279,23 +277,6 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
return 0;
}
-
-/**
- * get_address - get pci address of a slot
- * @hotplug_slot: slot to get status
- * @value: pointer to struct pci_busdev (seg, bus, dev)
- */
-static int get_address(struct hotplug_slot *hotplug_slot, u32 *value)
-{
- struct slot *slot = hotplug_slot->private;
-
- dbg("%s - physical_slot = %s\n", __FUNCTION__, hotplug_slot->name);
-
- *value = acpiphp_get_address(slot->acpi_slot);
-
- return 0;
-}
-
static int __init init_acpi(void)
{
int retval;
@@ -362,7 +343,9 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
acpiphp_slot->slot = slot;
snprintf(slot->name, sizeof(slot->name), "%u", slot->acpi_slot->sun);
- retval = pci_hp_register(slot->hotplug_slot);
+ retval = pci_hp_register(slot->hotplug_slot,
+ acpiphp_slot->bridge->pci_bus,
+ acpiphp_slot->device);
if (retval) {
err("pci_hp_register failed with error %d\n", retval);
goto error_hpslot;
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 1e125b5..02a2fd7 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -1874,19 +1874,3 @@ u8 acpiphp_get_adapter_status(struct acpiphp_slot *slot)
return (sta == 0) ? 0 : 1;
}
-
-
-/*
- * pci address (seg/bus/dev)
- */
-u32 acpiphp_get_address(struct acpiphp_slot *slot)
-{
- u32 address;
- struct pci_bus *pci_bus = slot->bridge->pci_bus;
-
- address = (pci_domain_nr(pci_bus) << 16) |
- (pci_bus->number << 8) |
- slot->device;
-
- return address;
-}
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index 56829f8..927b1fb 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -35,6 +35,7 @@
#include <linux/moduleparam.h>
#include "acpiphp.h"
+extern struct kset pci_slots_subsys;
#define DRIVER_VERSION "1.0.1"
#define DRIVER_AUTHOR "Irene Zubarev <[email protected]>, Vernon Mauery <[email protected]>"
@@ -428,7 +429,7 @@ static int __init ibm_acpiphp_init(void)
int retval = 0;
acpi_status status;
struct acpi_device *device;
- struct kobject *sysdir = &pci_hotplug_slots_subsys.kobj;
+ struct kobject *sysdir = &pci_slots_subsys.kobj;
dbg("%s\n", __FUNCTION__);
@@ -475,7 +476,7 @@ init_return:
static void __exit ibm_acpiphp_exit(void)
{
acpi_status status;
- struct kobject *sysdir = &pci_hotplug_slots_subsys.kobj;
+ struct kobject *sysdir = &pci_slots_subsys.kobj;
dbg("%s\n", __FUNCTION__);
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index ed4d44e..aa47b80 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -285,7 +285,7 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last)
info->attention_status = cpci_get_attention_status(slot);
dbg("registering slot %s", slot->hotplug_slot->name);
- status = pci_hp_register(slot->hotplug_slot);
+ status = pci_hp_register(slot->hotplug_slot, bus, i);
if (status) {
err("pci_hp_register failed with error %d", status);
goto error_name;
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index a96b739..67f6a0c 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -434,7 +434,9 @@ static int ctrl_slot_setup(struct controller *ctrl,
slot->bus, slot->device,
slot->number, ctrl->slot_device_offset,
slot_number);
- result = pci_hp_register(hotplug_slot);
+ result = pci_hp_register(hotplug_slot,
+ ctrl->pci_dev->subordinate,
+ slot->device);
if (result) {
err("pci_hp_register failed with error %d\n", result);
goto error_name;
diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c
index 828335e..07597b7 100644
--- a/drivers/pci/hotplug/fakephp.c
+++ b/drivers/pci/hotplug/fakephp.c
@@ -119,7 +119,7 @@ static int add_slot(struct pci_dev *dev)
slot->release = &dummy_release;
slot->private = dslot;
- retval = pci_hp_register(slot);
+ retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn));
if (retval) {
err("pci_hp_register failed with error %d\n", retval);
goto error_dslot;
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 600ed7b..eb7a1c0 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -1000,7 +1000,8 @@ static int __init ebda_rsrc_controller (void)
tmp_slot = list_entry (list, struct slot, ibm_slot_list);
snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot));
- pci_hp_register (tmp_slot->hotplug_slot);
+ pci_hp_register(tmp_slot->hotplug_slot,
+ pci_find_bus(0, tmp_slot->bus), tmp_slot->device);
}
print_ebda_hpc ();
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 01c351c..96c21e2 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -40,6 +40,7 @@
#include <linux/pci.h>
#include <linux/pci_hotplug.h>
#include <asm/uaccess.h>
+#include "../pci.h"
#define MY_NAME "pci_hotplug"
@@ -61,43 +62,6 @@ static int debug;
static LIST_HEAD(pci_hotplug_slot_list);
-struct kset pci_hotplug_slots_subsys;
-
-static ssize_t hotplug_slot_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct hotplug_slot *slot = to_hotplug_slot(kobj);
- struct hotplug_slot_attribute *attribute = to_hotplug_attr(attr);
- return attribute->show ? attribute->show(slot, buf) : -EIO;
-}
-
-static ssize_t hotplug_slot_attr_store(struct kobject *kobj,
- struct attribute *attr, const char *buf, size_t len)
-{
- struct hotplug_slot *slot = to_hotplug_slot(kobj);
- struct hotplug_slot_attribute *attribute = to_hotplug_attr(attr);
- return attribute->store ? attribute->store(slot, buf, len) : -EIO;
-}
-
-static struct sysfs_ops hotplug_slot_sysfs_ops = {
- .show = hotplug_slot_attr_show,
- .store = hotplug_slot_attr_store,
-};
-
-static void hotplug_slot_release(struct kobject *kobj)
-{
- struct hotplug_slot *slot = to_hotplug_slot(kobj);
- if (slot->release)
- slot->release(slot);
-}
-
-static struct kobj_type hotplug_slot_ktype = {
- .sysfs_ops = &hotplug_slot_sysfs_ops,
- .release = &hotplug_slot_release,
-};
-
-decl_subsys_name(pci_hotplug_slots, slots, &hotplug_slot_ktype, NULL);
-
/* these strings match up with the values in pci_bus_speed */
static char *pci_bus_speed_strings[] = {
"33 MHz PCI", /* 0x00 */
@@ -151,16 +115,15 @@ GET_STATUS(power_status, u8)
GET_STATUS(attention_status, u8)
GET_STATUS(latch_status, u8)
GET_STATUS(adapter_status, u8)
-GET_STATUS(address, u32)
GET_STATUS(max_bus_speed, enum pci_bus_speed)
GET_STATUS(cur_bus_speed, enum pci_bus_speed)
-static ssize_t power_read_file (struct hotplug_slot *slot, char *buf)
+static ssize_t power_read_file(struct pci_slot *slot, char *buf)
{
int retval;
u8 value;
- retval = get_power_status (slot, &value);
+ retval = get_power_status(slot->hotplug, &value);
if (retval)
goto exit;
retval = sprintf (buf, "%d\n", value);
@@ -168,9 +131,10 @@ exit:
return retval;
}
-static ssize_t power_write_file (struct hotplug_slot *slot, const char *buf,
+static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf,
size_t count)
{
+ struct hotplug_slot *slot = pci_slot->hotplug;
unsigned long lpower;
u8 power;
int retval = 0;
@@ -206,29 +170,30 @@ exit:
return count;
}
-static struct hotplug_slot_attribute hotplug_slot_attr_power = {
+static struct pci_slot_attribute hotplug_slot_attr_power = {
.attr = {.name = "power", .mode = S_IFREG | S_IRUGO | S_IWUSR},
.show = power_read_file,
.store = power_write_file
};
-static ssize_t attention_read_file (struct hotplug_slot *slot, char *buf)
+static ssize_t attention_read_file(struct pci_slot *slot, char *buf)
{
int retval;
u8 value;
- retval = get_attention_status (slot, &value);
+ retval = get_attention_status(slot->hotplug, &value);
if (retval)
goto exit;
- retval = sprintf (buf, "%d\n", value);
+ retval = sprintf(buf, "%d\n", value);
exit:
return retval;
}
-static ssize_t attention_write_file (struct hotplug_slot *slot, const char *buf,
+static ssize_t attention_write_file(struct pci_slot *slot, const char *buf,
size_t count)
{
+ struct hotplug_slot_ops *ops = slot->hotplug->ops;
unsigned long lattention;
u8 attention;
int retval = 0;
@@ -237,13 +202,13 @@ static ssize_t attention_write_file (struct hotplug_slot *slot, const char *buf,
attention = (u8)(lattention & 0xff);
dbg (" - attention = %d\n", attention);
- if (!try_module_get(slot->ops->owner)) {
+ if (!try_module_get(ops->owner)) {
retval = -ENODEV;
goto exit;
}
- if (slot->ops->set_attention_status)
- retval = slot->ops->set_attention_status(slot, attention);
- module_put(slot->ops->owner);
+ if (ops->set_attention_status)
+ retval = ops->set_attention_status(slot->hotplug, attention);
+ module_put(ops->owner);
exit:
if (retval)
@@ -251,18 +216,18 @@ exit:
return count;
}
-static struct hotplug_slot_attribute hotplug_slot_attr_attention = {
+static struct pci_slot_attribute hotplug_slot_attr_attention = {
.attr = {.name = "attention", .mode = S_IFREG | S_IRUGO | S_IWUSR},
.show = attention_read_file,
.store = attention_write_file
};
-static ssize_t latch_read_file (struct hotplug_slot *slot, char *buf)
+static ssize_t latch_read_file(struct pci_slot *slot, char *buf)
{
int retval;
u8 value;
- retval = get_latch_status (slot, &value);
+ retval = get_latch_status(slot->hotplug, &value);
if (retval)
goto exit;
retval = sprintf (buf, "%d\n", value);
@@ -271,17 +236,17 @@ exit:
return retval;
}
-static struct hotplug_slot_attribute hotplug_slot_attr_latch = {
+static struct pci_slot_attribute hotplug_slot_attr_latch = {
.attr = {.name = "latch", .mode = S_IFREG | S_IRUGO},
.show = latch_read_file,
};
-static ssize_t presence_read_file (struct hotplug_slot *slot, char *buf)
+static ssize_t presence_read_file(struct pci_slot *slot, char *buf)
{
int retval;
u8 value;
- retval = get_adapter_status (slot, &value);
+ retval = get_adapter_status(slot->hotplug, &value);
if (retval)
goto exit;
retval = sprintf (buf, "%d\n", value);
@@ -290,42 +255,20 @@ exit:
return retval;
}
-static struct hotplug_slot_attribute hotplug_slot_attr_presence = {
+static struct pci_slot_attribute hotplug_slot_attr_presence = {
.attr = {.name = "adapter", .mode = S_IFREG | S_IRUGO},
.show = presence_read_file,
};
-static ssize_t address_read_file (struct hotplug_slot *slot, char *buf)
-{
- int retval;
- u32 address;
-
- retval = get_address (slot, &address);
- if (retval)
- goto exit;
- retval = sprintf (buf, "%04x:%02x:%02x\n",
- (address >> 16) & 0xffff,
- (address >> 8) & 0xff,
- address & 0xff);
-
-exit:
- return retval;
-}
-
-static struct hotplug_slot_attribute hotplug_slot_attr_address = {
- .attr = {.name = "address", .mode = S_IFREG | S_IRUGO},
- .show = address_read_file,
-};
-
static char *unknown_speed = "Unknown bus speed";
-static ssize_t max_bus_speed_read_file (struct hotplug_slot *slot, char *buf)
+static ssize_t max_bus_speed_read_file(struct pci_slot *slot, char *buf)
{
char *speed_string;
int retval;
enum pci_bus_speed value;
- retval = get_max_bus_speed (slot, &value);
+ retval = get_max_bus_speed(slot->hotplug, &value);
if (retval)
goto exit;
@@ -340,18 +283,18 @@ exit:
return retval;
}
-static struct hotplug_slot_attribute hotplug_slot_attr_max_bus_speed = {
+static struct pci_slot_attribute hotplug_slot_attr_max_bus_speed = {
.attr = {.name = "max_bus_speed", .mode = S_IFREG | S_IRUGO},
.show = max_bus_speed_read_file,
};
-static ssize_t cur_bus_speed_read_file (struct hotplug_slot *slot, char *buf)
+static ssize_t cur_bus_speed_read_file(struct pci_slot *slot, char *buf)
{
char *speed_string;
int retval;
enum pci_bus_speed value;
- retval = get_cur_bus_speed (slot, &value);
+ retval = get_cur_bus_speed(slot->hotplug, &value);
if (retval)
goto exit;
@@ -366,14 +309,15 @@ exit:
return retval;
}
-static struct hotplug_slot_attribute hotplug_slot_attr_cur_bus_speed = {
+static struct pci_slot_attribute hotplug_slot_attr_cur_bus_speed = {
.attr = {.name = "cur_bus_speed", .mode = S_IFREG | S_IRUGO},
.show = cur_bus_speed_read_file,
};
-static ssize_t test_write_file (struct hotplug_slot *slot, const char *buf,
+static ssize_t test_write_file(struct pci_slot *pci_slot, const char *buf,
size_t count)
{
+ struct hotplug_slot *slot = pci_slot->hotplug;
unsigned long ltest;
u32 test;
int retval = 0;
@@ -396,13 +340,14 @@ exit:
return count;
}
-static struct hotplug_slot_attribute hotplug_slot_attr_test = {
+static struct pci_slot_attribute hotplug_slot_attr_test = {
.attr = {.name = "test", .mode = S_IFREG | S_IRUGO | S_IWUSR},
.store = test_write_file
};
-static int has_power_file (struct hotplug_slot *slot)
+static int has_power_file(struct pci_slot *pci_slot)
{
+ struct hotplug_slot *slot = pci_slot->hotplug;
if ((!slot) || (!slot->ops))
return -ENODEV;
if ((slot->ops->enable_slot) ||
@@ -412,8 +357,9 @@ static int has_power_file (struct hotplug_slot *slot)
return -ENOENT;
}
-static int has_attention_file (struct hotplug_slot *slot)
+static int has_attention_file(struct pci_slot *pci_slot)
{
+ struct hotplug_slot *slot = pci_slot->hotplug;
if ((!slot) || (!slot->ops))
return -ENODEV;
if ((slot->ops->set_attention_status) ||
@@ -422,8 +368,9 @@ static int has_attention_file (struct hotplug_slot *slot)
return -ENOENT;
}
-static int has_latch_file (struct hotplug_slot *slot)
+static int has_latch_file (struct pci_slot *pci_slot)
{
+ struct hotplug_slot *slot = pci_slot->hotplug;
if ((!slot) || (!slot->ops))
return -ENODEV;
if (slot->ops->get_latch_status)
@@ -431,8 +378,9 @@ static int has_latch_file (struct hotplug_slot *slot)
return -ENOENT;
}
-static int has_adapter_file (struct hotplug_slot *slot)
+static int has_adapter_file (struct pci_slot *pci_slot)
{
+ struct hotplug_slot *slot = pci_slot->hotplug;
if ((!slot) || (!slot->ops))
return -ENODEV;
if (slot->ops->get_adapter_status)
@@ -440,17 +388,9 @@ static int has_adapter_file (struct hotplug_slot *slot)
return -ENOENT;
}
-static int has_address_file (struct hotplug_slot *slot)
-{
- if ((!slot) || (!slot->ops))
- return -ENODEV;
- if (slot->ops->get_address)
- return 0;
- return -ENOENT;
-}
-
-static int has_max_bus_speed_file (struct hotplug_slot *slot)
+static int has_max_bus_speed_file (struct pci_slot *pci_slot)
{
+ struct hotplug_slot *slot = pci_slot->hotplug;
if ((!slot) || (!slot->ops))
return -ENODEV;
if (slot->ops->get_max_bus_speed)
@@ -458,8 +398,9 @@ static int has_max_bus_speed_file (struct hotplug_slot *slot)
return -ENOENT;
}
-static int has_cur_bus_speed_file (struct hotplug_slot *slot)
+static int has_cur_bus_speed_file (struct pci_slot *pci_slot)
{
+ struct hotplug_slot *slot = pci_slot->hotplug;
if ((!slot) || (!slot->ops))
return -ENODEV;
if (slot->ops->get_cur_bus_speed)
@@ -467,8 +408,9 @@ static int has_cur_bus_speed_file (struct hotplug_slot *slot)
return -ENOENT;
}
-static int has_test_file (struct hotplug_slot *slot)
+static int has_test_file (struct pci_slot *pci_slot)
{
+ struct hotplug_slot *slot = pci_slot->hotplug;
if ((!slot) || (!slot->ops))
return -ENODEV;
if (slot->ops->hardware_test)
@@ -476,7 +418,7 @@ static int has_test_file (struct hotplug_slot *slot)
return -ENOENT;
}
-static int fs_add_slot (struct hotplug_slot *slot)
+static int fs_add_slot(struct pci_slot *slot)
{
int retval = 0;
@@ -507,13 +449,6 @@ static int fs_add_slot (struct hotplug_slot *slot)
goto exit_adapter;
}
- if (has_address_file(slot) == 0) {
- retval = sysfs_create_file(&slot->kobj,
- &hotplug_slot_attr_address.attr);
- if (retval)
- goto exit_address;
- }
-
if (has_max_bus_speed_file(slot) == 0) {
retval = sysfs_create_file(&slot->kobj,
&hotplug_slot_attr_max_bus_speed.attr);
@@ -546,10 +481,6 @@ exit_cur_speed:
sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_max_bus_speed.attr);
exit_max_speed:
- if (has_address_file(slot) == 0)
- sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_address.attr);
-
-exit_address:
if (has_adapter_file(slot) == 0)
sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_presence.attr);
@@ -569,7 +500,7 @@ exit:
return retval;
}
-static void fs_remove_slot (struct hotplug_slot *slot)
+static void fs_remove_slot (struct pci_slot *slot)
{
if (has_power_file(slot) == 0)
sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_power.attr);
@@ -583,9 +514,6 @@ static void fs_remove_slot (struct hotplug_slot *slot)
if (has_adapter_file(slot) == 0)
sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_presence.attr);
- if (has_address_file(slot) == 0)
- sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_address.attr);
-
if (has_max_bus_speed_file(slot) == 0)
sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_max_bus_speed.attr);
@@ -609,6 +537,13 @@ static struct hotplug_slot *get_slot_from_name (const char *name)
return NULL;
}
+static void hotplug_release(struct pci_slot *slot)
+{
+ struct hotplug_slot *hotplug = slot->hotplug;
+ hotplug->release(hotplug);
+ kfree(slot);
+}
+
/**
* pci_hp_register - register a hotplug_slot with the PCI hotplug subsystem
* @slot: pointer to the &struct hotplug_slot to register
@@ -618,8 +553,9 @@ static struct hotplug_slot *get_slot_from_name (const char *name)
*
* Returns 0 if successful, anything else for an error.
*/
-int pci_hp_register (struct hotplug_slot *slot)
+int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr)
{
+ struct pci_slot *pci_slot;
int result;
if (slot == NULL)
@@ -632,18 +568,15 @@ int pci_hp_register (struct hotplug_slot *slot)
return -EINVAL;
}
- kobject_set_name(&slot->kobj, "%s", slot->name);
- kobj_set_kset_s(slot, pci_hotplug_slots_subsys);
+ pci_slot = pci_create_slot(bus, slot_nr, slot->name, hotplug_release);
+ if (IS_ERR(pci_slot))
+ return PTR_ERR(pci_slot);
+ slot->pci_slot = pci_slot;
+ pci_slot->hotplug = slot;
- /* this can fail if we have already registered a slot with the same name */
- if (kobject_register(&slot->kobj)) {
- err("Unable to register kobject");
- return -EINVAL;
- }
-
- list_add (&slot->slot_list, &pci_hotplug_slot_list);
+ list_add(&slot->slot_list, &pci_hotplug_slot_list);
- result = fs_add_slot (slot);
+ result = fs_add_slot(pci_slot);
dbg ("Added slot %s to the list\n", slot->name);
return result;
}
@@ -657,22 +590,23 @@ int pci_hp_register (struct hotplug_slot *slot)
*
* Returns 0 if successful, anything else for an error.
*/
-int pci_hp_deregister (struct hotplug_slot *slot)
+int pci_hp_deregister(struct hotplug_slot *hotplug)
{
struct hotplug_slot *temp;
+ struct pci_slot *slot;
- if (slot == NULL)
+ if (!hotplug)
return -ENODEV;
- temp = get_slot_from_name (slot->name);
- if (temp != slot) {
+ temp = get_slot_from_name(hotplug->name);
+ if (temp != hotplug)
return -ENODEV;
- }
- list_del (&slot->slot_list);
- fs_remove_slot (slot);
- dbg ("Removed slot %s from the list\n", slot->name);
- kobject_unregister(&slot->kobj);
+ list_del(&hotplug->slot_list);
+
+ slot = hotplug->pci_slot;
+ fs_remove_slot(slot);
+ dbg ("Removed slot %s from the list\n", hotplug->name);
return 0;
}
@@ -686,13 +620,15 @@ int pci_hp_deregister (struct hotplug_slot *slot)
*
* Returns 0 if successful, anything else for an error.
*/
-int __must_check pci_hp_change_slot_info(struct hotplug_slot *slot,
+int __must_check pci_hp_change_slot_info(struct hotplug_slot *hotplug,
struct hotplug_slot_info *info)
{
- if ((slot == NULL) || (info == NULL))
+ struct pci_slot *slot;
+ if (!hotplug || !info)
return -ENODEV;
+ slot = hotplug->pci_slot;
- memcpy (slot->info, info, sizeof (struct hotplug_slot_info));
+ memcpy (hotplug->info, info, sizeof (struct hotplug_slot_info));
return 0;
}
@@ -701,31 +637,21 @@ static int __init pci_hotplug_init (void)
{
int result;
- kobj_set_kset_s(&pci_hotplug_slots_subsys, pci_bus_type.subsys);
- result = subsystem_register(&pci_hotplug_slots_subsys);
- if (result) {
- err("Register subsys with error %d\n", result);
- goto exit;
- }
result = cpci_hotplug_init(debug);
if (result) {
err ("cpci_hotplug_init with error %d\n", result);
- goto err_subsys;
+ goto err_cpci;
}
info (DRIVER_DESC " version: " DRIVER_VERSION "\n");
- goto exit;
-
-err_subsys:
- subsystem_unregister(&pci_hotplug_slots_subsys);
-exit:
+
+err_cpci:
return result;
}
static void __exit pci_hotplug_exit (void)
{
cpci_hotplug_exit();
- subsystem_unregister(&pci_hotplug_slots_subsys);
}
module_init(pci_hotplug_init);
@@ -737,7 +663,6 @@ MODULE_LICENSE("GPL");
module_param(debug, bool, 0644);
MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
-EXPORT_SYMBOL_GPL(pci_hotplug_slots_subsys);
EXPORT_SYMBOL_GPL(pci_hp_register);
EXPORT_SYMBOL_GPL(pci_hp_deregister);
EXPORT_SYMBOL_GPL(pci_hp_change_slot_info);
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 6462ac3..8fee402 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -69,7 +69,6 @@ static int get_power_status (struct hotplug_slot *slot, u8 *value);
static int get_attention_status (struct hotplug_slot *slot, u8 *value);
static int get_latch_status (struct hotplug_slot *slot, u8 *value);
static int get_adapter_status (struct hotplug_slot *slot, u8 *value);
-static int get_address (struct hotplug_slot *slot, u32 *value);
static int get_max_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value);
static int get_cur_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value);
@@ -82,7 +81,6 @@ static struct hotplug_slot_ops pciehp_hotplug_slot_ops = {
.get_attention_status = get_attention_status,
.get_latch_status = get_latch_status,
.get_adapter_status = get_adapter_status,
- .get_address = get_address,
.get_max_bus_speed = get_max_bus_speed,
.get_cur_bus_speed = get_cur_bus_speed,
};
@@ -245,14 +243,16 @@ static int init_slots(struct controller *ctrl)
dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x "
"slot_device_offset=%x\n", slot->bus, slot->device,
slot->hp_slot, slot->number, ctrl->slot_device_offset);
- retval = pci_hp_register(hotplug_slot);
+ retval = pci_hp_register(hotplug_slot,
+ ctrl->pci_dev->subordinate,
+ slot->number);
if (retval) {
err ("pci_hp_register failed with error %d\n", retval);
goto error_info;
}
/* create additional sysfs entries */
if (EMI(ctrl->ctrlcap)) {
- retval = sysfs_create_file(&hotplug_slot->kobj,
+ retval = sysfs_create_file(&hotplug_slot->pci_slot->kobj,
&hotplug_slot_attr_lock.attr);
if (retval) {
pci_hp_deregister(hotplug_slot);
@@ -285,7 +285,7 @@ static void cleanup_slots(struct controller *ctrl)
slot = list_entry(tmp, struct slot, slot_list);
list_del(&slot->slot_list);
if (EMI(ctrl->ctrlcap))
- sysfs_remove_file(&slot->hotplug_slot->kobj,
+ sysfs_remove_file(&slot->hotplug_slot->pci_slot->kobj,
&hotplug_slot_attr_lock.attr);
cancel_delayed_work(&slot->work);
flush_scheduled_work();
@@ -387,18 +387,6 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
return 0;
}
-static int get_address(struct hotplug_slot *hotplug_slot, u32 *value)
-{
- struct slot *slot = hotplug_slot->private;
- struct pci_bus *bus = slot->ctrl->pci_dev->subordinate;
-
- dbg("%s - physical_slot = %s\n", __FUNCTION__, hotplug_slot->name);
-
- *value = (pci_domain_nr(bus) << 16) | (slot->bus << 8) | slot->device;
-
- return 0;
-}
-
static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value)
{
struct slot *slot = hotplug_slot->private;
diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c
index a080fed..3663507 100644
--- a/drivers/pci/hotplug/rpadlpar_sysfs.c
+++ b/drivers/pci/hotplug/rpadlpar_sysfs.c
@@ -23,6 +23,8 @@
#define MAX_DRC_NAME_LEN 64
+extern struct kset pci_slots_subsys;
+
/* Store return code of dlpar operation in attribute struct */
struct dlpar_io_attr {
int rc;
@@ -130,7 +132,7 @@ struct kobj_type ktype_dlpar_io = {
struct kset dlpar_io_kset = {
.kobj = {.ktype = &ktype_dlpar_io,
- .parent = &pci_hotplug_slots_subsys.kobj},
+ .parent = &pci_slots_subsys.kobj},
.ktype = &ktype_dlpar_io,
};
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index e1e7c9d..cc74602 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -625,12 +625,7 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
bss_hotplug_slot->ops = &sn_hotplug_slot_ops;
bss_hotplug_slot->release = &sn_release_slot;
- rc = pci_hp_register(bss_hotplug_slot);
- if (rc)
- goto register_err;
-
- rc = sysfs_create_file(&bss_hotplug_slot->kobj,
- &sn_slot_path_attr.attr);
+ rc = pci_hp_register(bss_hotplug_slot, pci_bus, device);
if (rc)
goto register_err;
}
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index 80dec97..22c4d2e 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -65,7 +65,6 @@ static int get_power_status (struct hotplug_slot *slot, u8 *value);
static int get_attention_status (struct hotplug_slot *slot, u8 *value);
static int get_latch_status (struct hotplug_slot *slot, u8 *value);
static int get_adapter_status (struct hotplug_slot *slot, u8 *value);
-static int get_address (struct hotplug_slot *slot, u32 *value);
static int get_max_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value);
static int get_cur_bus_speed (struct hotplug_slot *slot, enum pci_bus_speed *value);
@@ -78,7 +77,6 @@ static struct hotplug_slot_ops shpchp_hotplug_slot_ops = {
.get_attention_status = get_attention_status,
.get_latch_status = get_latch_status,
.get_adapter_status = get_adapter_status,
- .get_address = get_address,
.get_max_bus_speed = get_max_bus_speed,
.get_cur_bus_speed = get_cur_bus_speed,
};
@@ -152,7 +150,8 @@ static int init_slots(struct controller *ctrl)
dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x "
"slot_device_offset=%x\n", slot->bus, slot->device,
slot->hp_slot, slot->number, ctrl->slot_device_offset);
- retval = pci_hp_register(slot->hotplug_slot);
+ retval = pci_hp_register(slot->hotplug_slot,
+ ctrl->pci_dev->subordinate, slot->device);
if (retval) {
err("pci_hp_register failed with error %d\n", retval);
goto error_info;
@@ -277,18 +276,6 @@ static int get_adapter_status (struct hotplug_slot *hotplug_slot, u8 *value)
return 0;
}
-static int get_address (struct hotplug_slot *hotplug_slot, u32 *value)
-{
- struct slot *slot = get_slot(hotplug_slot);
- struct pci_bus *bus = slot->ctrl->pci_dev->subordinate;
-
- dbg("%s - physical_slot = %s\n", __FUNCTION__, hotplug_slot->name);
-
- *value = (pci_domain_nr(bus) << 16) | (slot->bus << 8) | slot->device;
-
- return 0;
-}
-
static int get_max_bus_speed (struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value)
{
struct slot *slot = get_slot(hotplug_slot);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index fc87e14..4bcfabf 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -91,3 +91,16 @@ pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
}
struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
+
+/* PCI slot sysfs helper code */
+#define to_pci_slot(s) container_of(s, struct pci_slot, kobj)
+
+extern struct kset pci_slots_subsys;
+
+struct pci_slot_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct pci_slot *, char *);
+ ssize_t (*store)(struct pci_slot *, const char *, size_t);
+};
+#define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr)
+
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
new file mode 100644
index 0000000..5d830d4
--- /dev/null
+++ b/drivers/pci/slot.c
@@ -0,0 +1,157 @@
+/*
+ * drivers/pci/slot.c
+ * Copyright (c) 2006 Matthew Wilcox <[email protected]>
+ * Copyright (c) 2006 Hewlett-Packard Company
+ */
+
+#include <linux/kobject.h>
+#include <linux/pci.h>
+#include "pci.h"
+
+struct kset pci_slots_subsys;
+
+static ssize_t pci_slot_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct pci_slot *slot = to_pci_slot(kobj);
+ struct pci_slot_attribute *attribute = to_pci_slot_attr(attr);
+ return attribute->show ? attribute->show(slot, buf) : -EIO;
+}
+
+static ssize_t pci_slot_attr_store(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t len)
+{
+ struct pci_slot *slot = to_pci_slot(kobj);
+ struct pci_slot_attribute *attribute = to_pci_slot_attr(attr);
+ return attribute->store ? attribute->store(slot, buf, len) : -EIO;
+}
+
+static struct sysfs_ops pci_slot_sysfs_ops = {
+ .show = pci_slot_attr_show,
+ .store = pci_slot_attr_store,
+};
+
+static void pci_slot_release(struct kobject *kobj)
+{
+ struct pci_slot *slot = to_pci_slot(kobj);
+ slot->release(slot);
+ kfree(slot);
+}
+
+static struct kobj_type pci_slot_ktype = {
+ .sysfs_ops = &pci_slot_sysfs_ops,
+ .release = &pci_slot_release,
+};
+
+decl_subsys_name(pci_slots, slots, &pci_slot_ktype, NULL);
+
+static ssize_t address_read_file(struct pci_slot *slot, char *buf)
+{
+ return sprintf(buf, "%04x:%02x:%02x\n", pci_domain_nr(slot->bus),
+ slot->bus->number, slot->number);
+}
+
+static struct pci_slot_attribute pci_slot_attr_address = {
+ .attr = { .name = "address", .mode = S_IFREG | S_IRUGO },
+ .show = address_read_file,
+};
+
+static void remove_sysfs_files(struct pci_slot *slot)
+{
+ sysfs_remove_file(&slot->kobj, &pci_slot_attr_address.attr);
+}
+
+static int create_sysfs_files(struct pci_slot *slot)
+{
+ int result;
+
+ result = sysfs_create_file(&slot->kobj, &pci_slot_attr_address.attr);
+
+ return result;
+}
+
+struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
+ const char *name, void (*release)(struct pci_slot *))
+{
+ struct pci_slot *slot;
+ int err;
+
+ down_write(&pci_bus_sem);
+
+ /* If we've already created this slot, return -EEXIST. The same slot
+ * may be described twice (eg, by ACPI and PCIe) */
+ for (slot = parent->slot; slot; slot = slot->next) {
+ if (slot->number != slot_nr)
+ continue;
+ slot = ERR_PTR(-EEXIST);
+ goto out;
+ }
+
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot) {
+ slot = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ slot->bus = parent;
+ slot->number = slot_nr;
+ slot->release = release;
+
+ kobject_set_name(&slot->kobj, "%s", name);
+ kobj_set_kset_s(slot, pci_slots_subsys);
+ if (kobject_register(&slot->kobj)) {
+ printk(KERN_ERR "Unable to register kobject %s", name);
+ err = -EINVAL;
+ goto err;
+ }
+
+ err = create_sysfs_files(slot);
+ if (err)
+ goto unregister;
+
+ slot->next = parent->slot;
+ parent->slot = slot;
+
+ out:
+ up_write(&pci_bus_sem);
+ return slot;
+
+ unregister:
+ kobject_unregister(&slot->kobj);
+ err:
+ kfree(slot);
+ slot = ERR_PTR(err);
+ goto out;
+}
+EXPORT_SYMBOL_GPL(pci_create_slot);
+
+int pci_destroy_slot(struct pci_slot *slot)
+{
+ struct pci_slot **pprev;
+
+ for (pprev = &slot->bus->slot; *pprev; pprev = &(*pprev)->next) {
+ if (*pprev != slot)
+ continue;
+ *pprev = slot->next;
+ }
+
+ remove_sysfs_files(slot);
+ kobject_unregister(&slot->kobj);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pci_destroy_slot);
+
+static int pci_slot_init(void)
+{
+ int result;
+
+ kobj_set_kset_s(&pci_slots_subsys, pci_bus_type.subsys);
+ result = subsystem_register(&pci_slots_subsys);
+ if (result)
+ printk(KERN_ERR "PCI: Slot initialisation failure (%d)",
+ result);
+ return result;
+}
+
+subsys_initcall(pci_slot_init);
+EXPORT_SYMBOL_GPL(pci_slots_subsys);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0dd93bb..a075667 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -129,6 +129,16 @@ struct pci_cap_saved_state {
u32 data[0];
};
+/* pci_slot represents a physical slot */
+struct pci_slot {
+ struct pci_bus *bus; /* The bus this slot is on */
+ struct pci_slot *next; /* Next slot on this bus */
+ struct hotplug_slot *hotplug; /* Hotplug info (migrate over time) */
+ unsigned char number; /* PCI_SLOT(pci_dev->devfn) */
+ struct kobject kobj;
+ void (*release)(struct pci_slot *);
+};
+
/*
* The pci_dev structure is used to describe PCI devices.
*/
@@ -140,6 +150,7 @@ struct pci_dev {
void *sysdata; /* hook for sys-specific extension */
struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */
+ struct pci_slot *slot; /* Physical slot this device is in */
unsigned int devfn; /* encoded device & function index */
unsigned short vendor;
@@ -261,6 +272,7 @@ struct pci_bus {
struct list_head children; /* list of child buses */
struct list_head devices; /* list of devices on this bus */
struct pci_dev *self; /* bridge device as seen by parent */
+ struct pci_slot *slot; /* First physical slot on this bus */
struct resource *resource[PCI_BUS_NUM_RESOURCES];
/* address space routed to this bus */
@@ -470,6 +482,8 @@ static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *s
}
struct pci_bus *pci_create_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata);
struct pci_bus * pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr);
+struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
+ const char *name, void (*release)(struct pci_slot *));
int pci_scan_slot(struct pci_bus *bus, int devfn);
struct pci_dev * pci_scan_single_device(struct pci_bus *bus, int devfn);
void pci_device_add(struct pci_dev *dev, struct pci_bus *bus);
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index ab4cb6e..bb36c59 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -95,9 +95,6 @@ struct hotplug_slot_attribute {
* @get_adapter_status: Called to get see if an adapter is present in the slot or not.
* If this field is NULL, the value passed in the struct hotplug_slot_info
* will be used when this value is requested by a user.
- * @get_address: Called to get pci address of a slot.
- * If this field is NULL, the value passed in the struct hotplug_slot_info
- * will be used when this value is requested by a user.
* @get_max_bus_speed: Called to get the max bus speed for a slot.
* If this field is NULL, the value passed in the struct hotplug_slot_info
* will be used when this value is requested by a user.
@@ -120,7 +117,6 @@ struct hotplug_slot_ops {
int (*get_attention_status) (struct hotplug_slot *slot, u8 *value);
int (*get_latch_status) (struct hotplug_slot *slot, u8 *value);
int (*get_adapter_status) (struct hotplug_slot *slot, u8 *value);
- int (*get_address) (struct hotplug_slot *slot, u32 *value);
int (*get_max_bus_speed) (struct hotplug_slot *slot, enum pci_bus_speed *value);
int (*get_cur_bus_speed) (struct hotplug_slot *slot, enum pci_bus_speed *value);
};
@@ -140,7 +136,6 @@ struct hotplug_slot_info {
u8 attention_status;
u8 latch_status;
u8 adapter_status;
- u32 address;
enum pci_bus_speed max_bus_speed;
enum pci_bus_speed cur_bus_speed;
};
@@ -166,15 +161,14 @@ struct hotplug_slot {
/* Variables below this are for use only by the hotplug pci core. */
struct list_head slot_list;
- struct kobject kobj;
+ struct pci_slot *pci_slot;
};
#define to_hotplug_slot(n) container_of(n, struct hotplug_slot, kobj)
-extern int pci_hp_register (struct hotplug_slot *slot);
-extern int pci_hp_deregister (struct hotplug_slot *slot);
+extern int pci_hp_register(struct hotplug_slot *, struct pci_bus *, int nr);
+extern int pci_hp_deregister(struct hotplug_slot *slot);
extern int __must_check pci_hp_change_slot_info (struct hotplug_slot *slot,
struct hotplug_slot_info *info);
-extern struct kset pci_hotplug_slots_subsys;
/* PCI Setting Record (Type 0) */
struct hpp_type0 {
--
1.5.3.1.1.g1e61
Detect all physical PCI slots as described by ACPI, and create
entries in /sys/bus/pci/slots/.
Not all physical slots are hotpluggable, and the acpiphp module
does not detect them. Now we know the physical PCI geography of
our system, without caring about hotplug.
Signed-off-by: Alex Chiang <[email protected]>
---
Looking to get comments on this patch, as it could use some
work.
I borrowed a lot of code from acpiphp_glue.c. If your work is
identifiable and feel that pci_slot.c should have your (c),
please let me know.
More importantly, in register_slot(), we are seeing a lot of
noisy output during boot, as acpi_get_pci_id() complains quite
often about not finding an ACPI-PCI binding on devices we pass
in. I'm not quite sure what is the best way to detect whether
we've already added a slot, so that we can avoid calling
acpi_get_pci_id() so often.
drivers/acpi/Makefile | 3 +-
drivers/acpi/pci_slot.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 170 insertions(+), 1 deletions(-)
create mode 100644 drivers/acpi/pci_slot.c
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 54e3ab0..f6caec8 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -47,7 +47,8 @@ obj-$(CONFIG_ACPI_FAN) += fan.o
obj-$(CONFIG_ACPI_DOCK) += dock.o
obj-$(CONFIG_ACPI_BAY) += bay.o
obj-$(CONFIG_ACPI_VIDEO) += video.o
-obj-y += pci_root.o pci_link.o pci_irq.o pci_bind.o
+obj-y += pci_root.o pci_link.o pci_irq.o pci_bind.o \
+ pci_slot.o
obj-$(CONFIG_ACPI_POWER) += power.o
obj-$(CONFIG_ACPI_PROCESSOR) += processor.o
obj-$(CONFIG_ACPI_CONTAINER) += container.o
diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c
new file mode 100644
index 0000000..a49a14e
--- /dev/null
+++ b/drivers/acpi/pci_slot.c
@@ -0,0 +1,168 @@
+/*
+ * pci_slot.c - ACPI PCI Slot Driver
+ *
+ * The code here is heavily leveraged from the acpiphp module.
+ * Thanks to Matthew Wilcox <[email protected]> for much guidance.
+ *
+ * Copyright (C) 2007 Alex Chiang <[email protected]>
+ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+
+#define _COMPONENT ACPI_PCI_COMPONENT
+ACPI_MODULE_NAME("pci_slot");
+
+#define MY_NAME "pci_slot"
+#define err(format, arg...) printk(KERN_ERR "%s: " format , MY_NAME , ## arg)
+#define info(format, arg...) printk(KERN_INFO "%s: " format , MY_NAME , ## arg)
+
+static int acpi_pci_slot_add(acpi_handle handle);
+static void acpi_pci_slot_remove(acpi_handle handle);
+
+static struct acpi_pci_driver acpi_pci_slot_driver = {
+ .add = acpi_pci_slot_add,
+ .remove = acpi_pci_slot_remove,
+};
+
+/*
+ * register_slot - callback function to discover / create physical PCI slots
+ * @handle: any device underneath an acpi_pci_root (sometimes it's a slot
+ * device, sometimes not)
+ * @context: struct pci_bus
+ * The possible error conditions are non-fatal, so we always return
+ * AE_OK, as to not terminate our namespace walk prematurely.
+ */
+static acpi_status
+register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+ int device;
+ unsigned long adr, sun;
+ acpi_status status;
+ char name[KOBJ_NAME_LEN];
+
+ struct acpi_pci_id id;
+ struct pci_slot *pci_slot;
+ struct pci_bus *pci_bus = context;
+
+ status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr);
+ if (ACPI_FAILURE(status))
+ return AE_OK;
+ device = (adr >> 16) & 0xffff;
+
+ /*
+ * If we get AE_NOT_FOUND, that means we're looking at an
+ * ACPI SxFy object which we're not conerned with, so just return.
+ *
+ * Otherwise, If we can find an ACPI context for this device, use it.
+ */
+ status = acpi_get_pci_id(handle, &id);
+ if (status == AE_NOT_FOUND)
+ return AE_OK;
+ if (ACPI_SUCCESS(status)) {
+ pci_bus = pci_find_bus(id.segment, id.bus);
+ device = id.device;
+ }
+
+ /* No _SUN == not a slot == bail */
+ status = acpi_evaluate_integer(handle, "_SUN", NULL, &sun);
+ if (ACPI_FAILURE(status))
+ return AE_OK;
+
+ snprintf(name, sizeof(name), "%u", (u32)sun);
+ pci_slot = pci_create_slot(pci_bus, device, name);
+ if (IS_ERR(pci_slot)) {
+ err("pci_create_slot returned %ld\n", PTR_ERR(pci_slot));
+ return AE_OK;
+ }
+
+ return AE_OK;
+}
+
+#define ACPI_STA_FUNCTIONING (0x00000008)
+
+/*
+ * acpi_pci_slot_add - walk namespace under a PCI root bridge
+ * @handle: points to an acpi_pci_root
+ */
+static int
+acpi_pci_slot_add(acpi_handle handle)
+{
+ int seg, bus;
+ unsigned long tmp;
+ acpi_status status;
+ acpi_handle dummy_handle;
+ struct pci_bus *pci_bus;
+
+ /* If the bridge doesn't have _STA, we assume it is always there */
+ status = acpi_get_handle(handle, "_STA", &dummy_handle);
+ if (ACPI_SUCCESS(status)) {
+ status = acpi_evaluate_integer(handle, "_STA", NULL, &tmp);
+ if (ACPI_FAILURE(status)) {
+ info("%s: _STA evaluation failure\n", __FUNCTION__);
+ return 0;
+ }
+ if ((tmp & ACPI_STA_FUNCTIONING) == 0)
+ /* don't register this object */
+ return 0;
+ }
+
+ status = acpi_evaluate_integer(handle, "_SEG", NULL, &tmp);
+ seg = ACPI_SUCCESS(status) ? tmp : 0;
+
+ status = acpi_evaluate_integer(handle, "_BBN", NULL, &tmp);
+ bus = ACPI_SUCCESS(status) ? tmp : 0;
+
+ pci_bus = pci_find_bus(seg, bus);
+ if (!pci_bus)
+ return 0;
+
+ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, ACPI_UINT32_MAX,
+ register_slot, pci_bus, NULL);
+
+ return status;
+}
+
+static int __init
+acpi_pci_slot_init(void)
+{
+ acpi_pci_register_driver(&acpi_pci_slot_driver);
+ return 0;
+}
+
+/*
+ * acpi_pci_slot_remove and acpi_pci_slot_exit are empty for now, since
+ * /sys/bus/pci/slots/ entries shouldn't ever really go away.
+ */
+static void
+acpi_pci_slot_remove(acpi_handle handle)
+{
+}
+
+static void __exit
+acpi_pci_slot_exit(void)
+{
+}
+
+module_init(acpi_pci_slot_init);
+module_exit(acpi_pci_slot_exit);
--
1.5.3.1.1.g1e61
Change the semantics of pci_create_slot() such that it does not
require a hotplug release() method when creating a slot. Now, we
can use this interface to create a pci_slot for any physical PCI
slots, not just hotpluggable ones.
Add new pci_slot_add_hotplug() interface so that various PCI hotplug
drivers can add hotplug release() methods to pre-existing physical
slots.
Signed-off-by: Alex Chiang <[email protected]>
---
drivers/pci/hotplug/pci_hotplug_core.c | 2 +-
drivers/pci/slot.c | 39 +++++++++++++++++++++++++++-----
include/linux/pci.h | 4 ++-
3 files changed, 37 insertions(+), 8 deletions(-)
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 96c21e2..91afa8e 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -568,7 +568,7 @@ int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr)
return -EINVAL;
}
- pci_slot = pci_create_slot(bus, slot_nr, slot->name, hotplug_release);
+ pci_slot = pci_slot_add_hotplug(bus, slot_nr, hotplug_release);
if (IS_ERR(pci_slot))
return PTR_ERR(pci_slot);
slot->pci_slot = pci_slot;
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 5d830d4..3e91491 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -70,20 +70,48 @@ static int create_sysfs_files(struct pci_slot *slot)
return result;
}
+struct pci_slot *pci_slot_add_hotplug(struct pci_bus *parent, int slot_nr,
+ void (*release)(struct pci_slot *))
+{
+ struct pci_slot *slot;
+ int found = 0;
+
+ down_write(&pci_bus_sem);
+
+ /* This slot should have already been created, so look for it. If
+ * we can't find it, return -EEXIST.
+ */
+ for (slot = parent->slot; slot; slot = slot->next)
+ if (slot->number == slot_nr) {
+ found = 1;
+ break;
+ }
+
+ if (!found) {
+ slot = ERR_PTR(-EEXIST);
+ goto out;
+ }
+
+ slot->release = release;
+ out:
+ up_write(&pci_bus_sem);
+ return slot;
+}
+EXPORT_SYMBOL_GPL(pci_slot_add_hotplug);
+
struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
- const char *name, void (*release)(struct pci_slot *))
+ const char *name)
{
struct pci_slot *slot;
int err;
down_write(&pci_bus_sem);
- /* If we've already created this slot, return -EEXIST. The same slot
- * may be described twice (eg, by ACPI and PCIe) */
+ /* If we've already created this slot, just return. */
for (slot = parent->slot; slot; slot = slot->next) {
- if (slot->number != slot_nr)
+ if (slot->number != slot_nr) {
continue;
- slot = ERR_PTR(-EEXIST);
+ }
goto out;
}
@@ -95,7 +123,6 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
slot->bus = parent;
slot->number = slot_nr;
- slot->release = release;
kobject_set_name(&slot->kobj, "%s", name);
kobj_set_kset_s(slot, pci_slots_subsys);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a075667..62116a5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -483,7 +483,9 @@ static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *s
struct pci_bus *pci_create_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata);
struct pci_bus * pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr);
struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
- const char *name, void (*release)(struct pci_slot *));
+ const char *name);
+struct pci_slot *pci_slot_add_hotplug(struct pci_bus *parent, int slot_nr,
+ void (*release)(struct pci_slot *));
int pci_scan_slot(struct pci_bus *bus, int devfn);
struct pci_dev * pci_scan_single_device(struct pci_bus *bus, int devfn);
void pci_device_add(struct pci_dev *dev, struct pci_bus *bus);
--
1.5.3.1.1.g1e61
On Mon, Nov 12, 2007 at 05:08:53PM -0700, Alex Chiang wrote:
> Hello,
>
> [this patch series touches a few subsystems; hopefully I got all
> the right maintainers]
>
> Recently, Matthew Wilcox sent out the following mail about PCI
> slots:
>
> http://marc.info/?l=linux-pci&m=119432330418980&w=2
>
> The following patch series is a rough first cut at implementing
> the ideas he outlined, namely, that PCI slots are physical
> objects that we care about, independent of their hotplug
> capabilities.
I'm still not sold on this idea at all. I'm really betting that there
is a lot of incorrect acpi slot information floating around in machines
and odd things will show up in these slot entries.
I say this because for a long time there was no "standard" acpi entries
for hotplug slots and different companies did different things. Hence
the "odd" IBM acpi hotplug implementation as one example. If this is
going to go anywhere, you need to get IBM to agree that it works
properly with all their machines...
Also, some companies already provide userspace tools to get all of this
information about the different slots in a system and what is where,
from userspace, no kernel changes are needed. So, why add all this
extra complexity to the kernel if it is not needed?
thanks,
greg k-h
On Tue, Nov 13, 2007 at 09:01:29AM -0800, Greg KH wrote:
> I'm still not sold on this idea at all. I'm really betting that there
> is a lot of incorrect acpi slot information floating around in machines
> and odd things will show up in these slot entries.
Is that the end of the world? Instead of having no information, we'll
end up with odd information. If people complain, we can always
blacklist (indeed, won't the ACPI rolling blacklist catch the majority
of these machines?)
> I say this because for a long time there was no "standard" acpi entries
> for hotplug slots and different companies did different things. Hence
> the "odd" IBM acpi hotplug implementation as one example. If this is
> going to go anywhere, you need to get IBM to agree that it works
> properly with all their machines...
Not in terms of slot names. There were various things that ACPI didn't
specify, like attention and latches, but the description of _SUN hasn't
changed.
> Also, some companies already provide userspace tools to get all of this
> information about the different slots in a system and what is where,
> from userspace, no kernel changes are needed. So, why add all this
> extra complexity to the kernel if it is not needed?
Do you have any examples of this?
We should, IMO, be improving the way we tell users which device has
a problem. 'tulip7', 'eth4', 'pci 0000:04:1e.3' ... all the user wants
to know is which damn card it is so they can replace it. And slot name
tells them that.
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
On Tue, Nov 13, 2007 at 11:33:53AM -0700, Matthew Wilcox wrote:
> On Tue, Nov 13, 2007 at 09:01:29AM -0800, Greg KH wrote:
> > I'm still not sold on this idea at all. I'm really betting that there
> > is a lot of incorrect acpi slot information floating around in machines
> > and odd things will show up in these slot entries.
>
> Is that the end of the world? Instead of having no information, we'll
> end up with odd information. If people complain, we can always
> blacklist (indeed, won't the ACPI rolling blacklist catch the majority
> of these machines?)
I don't think the rolling blacklist will catch this, as the rest of the
ACPI code is "correct".
And yes, incorrect information exported by the kernel is not good at
all, especially if a user expects it to work properly :)
> > I say this because for a long time there was no "standard" acpi entries
> > for hotplug slots and different companies did different things. Hence
> > the "odd" IBM acpi hotplug implementation as one example. If this is
> > going to go anywhere, you need to get IBM to agree that it works
> > properly with all their machines...
>
> Not in terms of slot names. There were various things that ACPI didn't
> specify, like attention and latches, but the description of _SUN hasn't
> changed.
Ok, again, I want to see the IBM people sign off on this, after testing
on all of their machines, before I'll consider this, as I know the IBM
acpi tables are "odd".
Also, how about Dell machines? I know they are probably not expecting
this information to show up and who knows if the numbering of their
slots match up with their physical diagrams (I say this based on all of
the eth0/eth1 "issues" with Dell machines over the years...)
> > Also, some companies already provide userspace tools to get all of this
> > information about the different slots in a system and what is where,
> > from userspace, no kernel changes are needed. So, why add all this
> > extra complexity to the kernel if it is not needed?
>
> Do you have any examples of this?
IBM sells a program that does this for server rooms. It's probably part
of some Tivoli package somewhere, sorry I don't remember the name. I
did see it working many years ago and it required no kernel changes at
all to work properly.
> We should, IMO, be improving the way we tell users which device has
> a problem. 'tulip7', 'eth4', 'pci 0000:04:1e.3' ... all the user wants
> to know is which damn card it is so they can replace it. And slot name
> tells them that.
Not if the slot information is incorrect :)
That's my main concern.
thanks,
greg k-h
On Mon, Nov 12, 2007 at 05:13:36PM -0700, Alex Chiang wrote:
> + slot->name = kmalloc(8, GFP_KERNEL);
> + sprintf(slot->name, "fake%d", count++);
Please use snprintf to avoid buffer overruns!
--linas
On Tue, Nov 13, 2007 at 01:48:15PM -0600, Linas Vepstas wrote:
> On Mon, Nov 12, 2007 at 05:13:36PM -0700, Alex Chiang wrote:
> > + slot->name = kmalloc(8, GFP_KERNEL);
> > + sprintf(slot->name, "fake%d", count++);
>
> Please use snprintf to avoid buffer overruns!
Or, since kmalloc can return a 32-byte object at smallest, just allocate
32 bytes and continue using sprintf. fake%d would overflow after
999,999,999,999,999,999,999,999,999 pci devices, by which time we've
run the machine out of memory anyway.
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
On Mon, Nov 12, 2007 at 05:14:47PM -0700, Alex Chiang wrote:
> +/* pci_slot represents a physical slot */
> +struct pci_slot {
> + struct pci_bus *bus; /* The bus this slot is on */
> + struct pci_slot *next; /* Next slot on this bus */
> + struct hotplug_slot *hotplug; /* Hotplug info (migrate over time) */
How much migration do you expect? Some of it? All of it? If the
answer is "all of it", wouldn't it just be easier to rename
struct hotplug_slot, and go from there?
--linas
On Tue, Nov 13, 2007 at 01:56:46PM -0600, Linas Vepstas wrote:
> On Mon, Nov 12, 2007 at 05:14:47PM -0700, Alex Chiang wrote:
> > +/* pci_slot represents a physical slot */
> > +struct pci_slot {
> > + struct pci_bus *bus; /* The bus this slot is on */
> > + struct pci_slot *next; /* Next slot on this bus */
> > + struct hotplug_slot *hotplug; /* Hotplug info (migrate over time) */
>
> How much migration do you expect? Some of it? All of it? If the
> answer is "all of it", wouldn't it just be easier to rename
> struct hotplug_slot, and go from there?
Only the bits that make sense for non-hotpluggable slots.
I tried your suggestion first. It wasn't much fun.
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
On Tue, Nov 13, 2007 at 10:51:22AM -0800, Greg KH wrote:
> Ok, again, I want to see the IBM people sign off on this, after testing
> on all of their machines, before I'll consider this, as I know the IBM
> acpi tables are "odd".
That seems a little higher standard than patches are normally held to.
How about the patches get sent to the appropriate people at IBM (who are
they?) and if we haven't heard a NAK from them after a month, then they
get applied?
> Also, how about Dell machines? I know they are probably not expecting
> this information to show up and who knows if the numbering of their
> slots match up with their physical diagrams (I say this based on all of
> the eth0/eth1 "issues" with Dell machines over the years...)
I'm pretty sure that Windows uses the _SUN information, so I think this
is going to be well-tested.
> IBM sells a program that does this for server rooms. It's probably part
> of some Tivoli package somewhere, sorry I don't remember the name. I
> did see it working many years ago and it required no kernel changes at
> all to work properly.
Well, yes, it doesn't take kernel effort if you're willing to build
proprietary knowledge into a userspace program. This seems similar
to the argument that filesystems don't need to be in the kernel since
they can be implemented in user space. They work better in the kernel,
just like this does.
By the way, the end result of this should be a pci-hotplug system that has
much less code in the drivers, more uniformity between the presentation
to userspace, more functionality and is easier to understand.
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
On Tue, Nov 13, 2007 at 01:11:02PM -0700, Matthew Wilcox wrote:
> On Tue, Nov 13, 2007 at 10:51:22AM -0800, Greg KH wrote:
> > Ok, again, I want to see the IBM people sign off on this, after testing
> > on all of their machines, before I'll consider this, as I know the IBM
> > acpi tables are "odd".
>
> That seems a little higher standard than patches are normally held to.
> How about the patches get sent to the appropriate people at IBM (who are
> they?) and if we haven't heard a NAK from them after a month, then they
> get applied?
When you are touching code that I know is going to have problems on a
whole range of different manufacturer's machines, do you expect me to
just ignore that and let you add a feature that is going to be
incorrect?
> > Also, how about Dell machines? I know they are probably not expecting
> > this information to show up and who knows if the numbering of their
> > slots match up with their physical diagrams (I say this based on all of
> > the eth0/eth1 "issues" with Dell machines over the years...)
>
> I'm pretty sure that Windows uses the _SUN information, so I think this
> is going to be well-tested.
Again, based on the past history with such things as mis-labeled
ethernet ports, I have doubts. I can hope, but I need proof based on
the major problems we have had in the past.
It is better to not show information to users at all, then to give them
information that is incorrect, don't you think?
> > IBM sells a program that does this for server rooms. It's probably part
> > of some Tivoli package somewhere, sorry I don't remember the name. I
> > did see it working many years ago and it required no kernel changes at
> > all to work properly.
>
> Well, yes, it doesn't take kernel effort if you're willing to build
> proprietary knowledge into a userspace program. This seems similar
> to the argument that filesystems don't need to be in the kernel since
> they can be implemented in user space. They work better in the kernel,
> just like this does.
Don't mix up things like filesystems and exposing pci slots. That is
just foolish...
thanks,
greg k-h
* Greg KH <[email protected]>:
> On Mon, Nov 12, 2007 at 05:08:53PM -0700, Alex Chiang wrote:
> >
> > Recently, Matthew Wilcox sent out the following mail about
> > PCI slots:
> >
> > http://marc.info/?l=linux-pci&m=119432330418980&w=2
> >
> > The following patch series is a rough first cut at
> > implementing the ideas he outlined, namely, that PCI slots
> > are physical objects that we care about, independent of their
> > hotplug capabilities.
>
> Also, some companies already provide userspace tools to get all
> of this information about the different slots in a system and
> what is where, from userspace, no kernel changes are needed.
> So, why add all this extra complexity to the kernel if it is
> not needed?
On HP ia64 systems, that information is locked away in ACPI, and
there's no easy way to get at it. Alex Williamson tried providing
a generic dev_acpi driver, so that userspace could do whatever
they wanted to with the information:
http://lkml.org/lkml/2004/8/3/106
And that effort kinda died. I'm of mixed feelings on that driver,
since it would be really nice to get unfettered access to the
ACPI namespace, but it's pretty dangerous, since any interesting
thing you might want to do is actually a method (aka, it calls
into firmware) and who knows what side effects there might be.
So from my point of view, if ia64 customers want to know about
the slots they have in their systems, we're gonna have to do
something kernel-intrusive anyhow.
Thanks.
/ac
On Tue, Nov 13, 2007 at 09:01:29AM -0800, Greg KH wrote:
>
> Also, some companies already provide userspace tools to get all of this
> information about the different slots in a system and what is where,
> from userspace, no kernel changes are needed. So, why add all this
> extra complexity to the kernel if it is not needed?
Second that motion.... I don't get it. What are the goals of this
patch, really? Just to get a "slot geographical location" from the
kernel? I'm balancing the intellectual appeal of having a kernel
struct for representing physical objects, against the headache of
reading (debugging, modifying) code that has yet another struct
doing yet another thing. So far, the dread of future headaches
is winning.
On pseries systems, I deal with something called the "partitionable
endpoint", which I think probably usually corresponds to physical
slots, but I don't really know. The hardware guys pitch changeups
all the time. Some of these are soldered onto the planar,
so the are not physical slots. But they are, um, "hotpluggable"
in that you can dynamically add & remove them from the system
(even though you cannot physically unplug the ones that are
soldered on -- you can only assign them to other hardware partitions
(think hardware VM or hardware Xen)). So, naively, the physical
slot concept doesn't really map to what I have to work with;
it just adds one more appendix to it all, one more thing to
get confused about.
To be clear: above remarks are for the PowerPC boxes. I have
no clue about how things work on the IBM Intel-based boxes.
And Greg's original "get IBM to agree" remark is about the
Intel-based boxes.
--linas
On Tue, Nov 13, 2007 at 01:21:54PM -0700, Alex Chiang wrote:
> * Greg KH <[email protected]>:
> > On Mon, Nov 12, 2007 at 05:08:53PM -0700, Alex Chiang wrote:
> > >
> > > Recently, Matthew Wilcox sent out the following mail about
> > > PCI slots:
> > >
> > > http://marc.info/?l=linux-pci&m=119432330418980&w=2
> > >
> > > The following patch series is a rough first cut at
> > > implementing the ideas he outlined, namely, that PCI slots
> > > are physical objects that we care about, independent of their
> > > hotplug capabilities.
> >
> > Also, some companies already provide userspace tools to get all
> > of this information about the different slots in a system and
> > what is where, from userspace, no kernel changes are needed.
> > So, why add all this extra complexity to the kernel if it is
> > not needed?
>
> On HP ia64 systems, that information is locked away in ACPI, and
> there's no easy way to get at it. Alex Williamson tried providing
> a generic dev_acpi driver, so that userspace could do whatever
> they wanted to with the information:
>
> http://lkml.org/lkml/2004/8/3/106
>
> And that effort kinda died. I'm of mixed feelings on that driver,
> since it would be really nice to get unfettered access to the
> ACPI namespace, but it's pretty dangerous, since any interesting
> thing you might want to do is actually a method (aka, it calls
> into firmware) and who knows what side effects there might be.
>
> So from my point of view, if ia64 customers want to know about
> the slots they have in their systems, we're gonna have to do
> something kernel-intrusive anyhow.
Doesn't /sys/firmware/acpi give you raw access to the correct tables
already?
And isn't there some other tool that dumps the raw ACPI tables? I
thought the acpi developers used it all the time when debugging things
with users.
thanks,
greg k-h
* Greg KH <[email protected]>:
>
> Ok, again, I want to see the IBM people sign off on this, after
> testing on all of their machines, before I'll consider this, as
> I know the IBM acpi tables are "odd".
Who would be a good contact at IBM to get some eyes / machine
time on this?
> Also, how about Dell machines? I know they are probably not
> expecting this information to show up and who knows if the
> numbering of their slots match up with their physical diagrams
Who would be a good contact at Dell for the same?
I don't have as much experience with oddball firmware from
various vendors as others on this list, but given the rather
stable definition of _SUN in the ACPI spec, I'd be surprised to
see vendors abusing that method. [I fully accept the possibility
that I'm just naive ;)]
More likely, a vendor will do what the HP Proliant folks did,
that being simply omitting a _SUN method altogether.
One more thought on that -- at *worst* my patch series will do no
worse than the status quo of what the acpiphp module is doing
today. That module walks through the namespace looking for _SUN
methods, and when it finds them, it creates an entry in exactly
the same spot (/sys/bus/pci/slots/N) that my patch series does.
What this series adds beyond acpiphp is adding entries for slots
that aren't hotpluggable.
> IBM sells a program that does this for server rooms. It's
> probably part of some Tivoli package somewhere, sorry I don't
> remember the name. I did see it working many years ago and it
> required no kernel changes at all to work properly.
Like I said in an earlier email, HP ia64 systems will require a
kernel change to get this information. Whether it comes via a
generic ACPI access layer like dev_acpi, or something like this
patch series, the kernel will still get touched.
Thanks.
/ac
* Linas Vepstas <[email protected]>:
> On Tue, Nov 13, 2007 at 09:01:29AM -0800, Greg KH wrote:
> >
> > Also, some companies already provide userspace tools to get
> > all of this information about the different slots in a system
> > and what is where, from userspace, no kernel changes are
> > needed. So, why add all this extra complexity to the kernel
> > if it is not needed?
>
> Second that motion.... I don't get it. What are the goals of
> this patch, really? Just to get a "slot geographical location"
> from the kernel?
Yes, plus some general cleanups in the PCI hotplug space (more
patches queued up, pending the results of this series ;)
But to use the word "just" kinda implies that this is a trivial
feature that no one really cares about, which I'm not really sure
I agree with. Slot geographical location is important for
managability folks, who want to know which slot out of 192 (on a
big HP ia64 system, e.g.) that their failing network card might
be sitting in.
And again, we (HP ia64) need to get this information from the
kernel.
> I'm balancing the intellectual appeal of having a kernel struct
> for representing physical objects, against the headache of
> reading (debugging, modifying) code that has yet another struct
> doing yet another thing. So far, the dread of future headaches
> is winning.
Well, hopefully, the future is cleaner, rather than messier. ;)
> On pseries systems, I deal with something called the
> "partitionable endpoint", which I think probably usually
> corresponds to physical slots, but I don't really know.
>
> So, naively, the physical slot concept doesn't really map to
> what I have to work with; it just adds one more appendix to it
> all, one more thing to get confused about.
Sorry, I'm a bit ignorant about pseries -- what kind of name does
your PCI hotplug driver give to those slots? What shows up in
/sys/bus/pci/slots/?
> To be clear: above remarks are for the PowerPC boxes. I have no
> clue about how things work on the IBM Intel-based boxes. And
> Greg's original "get IBM to agree" remark is about the
> Intel-based boxes.
A split house. I have no idea how Proliants work either. :)
Thanks.
/ac
On Tue, Nov 13, 2007 at 10:51:22AM -0800, Greg KH wrote:
> On Tue, Nov 13, 2007 at 11:33:53AM -0700, Matthew Wilcox wrote:
> > On Tue, Nov 13, 2007 at 09:01:29AM -0800, Greg KH wrote:
> > > I'm still not sold on this idea at all. I'm really betting that there
> > > is a lot of incorrect acpi slot information floating around in machines
> > > and odd things will show up in these slot entries.
> >
> > Is that the end of the world? Instead of having no information, we'll
> > end up with odd information. If people complain, we can always
> > blacklist (indeed, won't the ACPI rolling blacklist catch the majority
> > of these machines?)
>
> I don't think the rolling blacklist will catch this, as the rest of the
> ACPI code is "correct".
>
> And yes, incorrect information exported by the kernel is not good at
> all, especially if a user expects it to work properly :)
>
> > > I say this because for a long time there was no "standard" acpi entries
> > > for hotplug slots and different companies did different things. Hence
> > > the "odd" IBM acpi hotplug implementation as one example. If this is
> > > going to go anywhere, you need to get IBM to agree that it works
> > > properly with all their machines...
> >
> > Not in terms of slot names. There were various things that ACPI didn't
> > specify, like attention and latches, but the description of _SUN hasn't
> > changed.
>
> Ok, again, I want to see the IBM people sign off on this, after testing
> on all of their machines, before I'll consider this, as I know the IBM
> acpi tables are "odd".
>
> Also, how about Dell machines? I know they are probably not expecting
> this information to show up and who knows if the numbering of their
> slots match up with their physical diagrams (I say this based on all of
> the eth0/eth1 "issues" with Dell machines over the years...)
The only reported _SUN problems on Dell systems were on the PE6800 and
PE6850 systems, which we've fixed with an updated BIOS several months
ago. IIRC the values weren't always unique which kind of defeated the
purpose.
The eth0/eth1 "issues" are completely separate, with no relation to
ACPI. That's purely a PCI tree depth-first discovery
vs. breadth-first discovery (where it indeed seems everyone except
Linux 2.6 uses and expects breadth-first). We looked at extending
ACPI to export device naming preference, but instead extended SMBIOS
tables because we could get that change in far easier.
The other bit that looks at slot numbers is the old PCI IRQ Routing
Table, which still exists on Dell systems.
> > We should, IMO, be improving the way we tell users which device has
> > a problem. 'tulip7', 'eth4', 'pci 0000:04:1e.3' ... all the user wants
> > to know is which damn card it is so they can replace it. And slot name
> > tells them that.
>
> Not if the slot information is incorrect :)
>
> That's my main concern.
Again, I've only seen this be incorrect on those two models of Dell
servers with outdated BIOS versions.
Thanks,
Matt
--
Matt Domsch
Linux Technology Strategist, Dell Office of the CTO
linux.dell.com & http://www.dell.com/linux
* Matt Domsch <[email protected]>:
>
> The only reported _SUN problems on Dell systems were on the
> PE6800 and PE6850 systems, which we've fixed with an updated
> BIOS several months ago. IIRC the values weren't always unique
> which kind of defeated the purpose.
FWIW, the ACPI 2.0 spec did not require uniqueness for _SUN.
(although there is a strange table that refers to _SUN as the
slot-unique ID (table 6-1 in spec v2.0b), the actual definition
of _SUN does not mention uniqueness).
Uniqueness was first required in the 3.0 spec.
/ac
On Tue, Nov 13, 2007 at 01:36:45PM -0700, Alex Chiang wrote:
> * Greg KH <[email protected]>:
> >
> > Ok, again, I want to see the IBM people sign off on this, after
> > testing on all of their machines, before I'll consider this, as
> > I know the IBM acpi tables are "odd".
>
> Who would be a good contact at IBM to get some eyes / machine
> time on this?
The current 2.4 pci hotplug maintainer? :)
> > Also, how about Dell machines? I know they are probably not
> > expecting this information to show up and who knows if the
> > numbering of their slots match up with their physical diagrams
>
> Who would be a good contact at Dell for the same?
Matt has already spoken up for this...
> I don't have as much experience with oddball firmware from
> various vendors as others on this list, but given the rather
> stable definition of _SUN in the ACPI spec, I'd be surprised to
> see vendors abusing that method. [I fully accept the possibility
> that I'm just naive ;)]
>
> More likely, a vendor will do what the HP Proliant folks did,
> that being simply omitting a _SUN method altogether.
>
> One more thought on that -- at *worst* my patch series will do no
> worse than the status quo of what the acpiphp module is doing
> today. That module walks through the namespace looking for _SUN
> methods, and when it finds them, it creates an entry in exactly
> the same spot (/sys/bus/pci/slots/N) that my patch series does.
The acpiphp module is not loaded on zillions of non-pci-hotplug
machines...
> What this series adds beyond acpiphp is adding entries for slots
> that aren't hotpluggable.
How are you going to ensure that the userspace programs that use those
sysfs files are not going to break into tiny pieces now that you have
extended this to show all pci slots?
> > IBM sells a program that does this for server rooms. It's
> > probably part of some Tivoli package somewhere, sorry I don't
> > remember the name. I did see it working many years ago and it
> > required no kernel changes at all to work properly.
>
> Like I said in an earlier email, HP ia64 systems will require a
> kernel change to get this information. Whether it comes via a
> generic ACPI access layer like dev_acpi, or something like this
> patch series, the kernel will still get touched.
And like I said, I'm pretty sure you don't need to touch the kernel
today as there are people doing this just fine from userspace without
any kernel changes needed :)
thanks,
greg k-h
On Tue, Nov 13, 2007 at 03:15:09PM -0600, Matt Domsch wrote:
> On Tue, Nov 13, 2007 at 10:51:22AM -0800, Greg KH wrote:
> > On Tue, Nov 13, 2007 at 11:33:53AM -0700, Matthew Wilcox wrote:
> > > On Tue, Nov 13, 2007 at 09:01:29AM -0800, Greg KH wrote:
> > > > I'm still not sold on this idea at all. I'm really betting that there
> > > > is a lot of incorrect acpi slot information floating around in machines
> > > > and odd things will show up in these slot entries.
> > >
> > > Is that the end of the world? Instead of having no information, we'll
> > > end up with odd information. If people complain, we can always
> > > blacklist (indeed, won't the ACPI rolling blacklist catch the majority
> > > of these machines?)
> >
> > I don't think the rolling blacklist will catch this, as the rest of the
> > ACPI code is "correct".
> >
> > And yes, incorrect information exported by the kernel is not good at
> > all, especially if a user expects it to work properly :)
> >
> > > > I say this because for a long time there was no "standard" acpi entries
> > > > for hotplug slots and different companies did different things. Hence
> > > > the "odd" IBM acpi hotplug implementation as one example. If this is
> > > > going to go anywhere, you need to get IBM to agree that it works
> > > > properly with all their machines...
> > >
> > > Not in terms of slot names. There were various things that ACPI didn't
> > > specify, like attention and latches, but the description of _SUN hasn't
> > > changed.
> >
> > Ok, again, I want to see the IBM people sign off on this, after testing
> > on all of their machines, before I'll consider this, as I know the IBM
> > acpi tables are "odd".
> >
> > Also, how about Dell machines? I know they are probably not expecting
> > this information to show up and who knows if the numbering of their
> > slots match up with their physical diagrams (I say this based on all of
> > the eth0/eth1 "issues" with Dell machines over the years...)
>
>
> The only reported _SUN problems on Dell systems were on the PE6800 and
> PE6850 systems, which we've fixed with an updated BIOS several months
> ago. IIRC the values weren't always unique which kind of defeated the
> purpose.
Heh, I can imagine :)
> The eth0/eth1 "issues" are completely separate, with no relation to
> ACPI. That's purely a PCI tree depth-first discovery
> vs. breadth-first discovery (where it indeed seems everyone except
> Linux 2.6 uses and expects breadth-first). We looked at extending
> ACPI to export device naming preference, but instead extended SMBIOS
> tables because we could get that change in far easier.
I know they are different, I was using that as an example of trying to
match up physical descriptions on a machine with the internal kernel
representation. It isn't always easy and lots of times things go wrong,
if for no other reason than no one is checking these things.
thanks,
greg k-h
On Tue, Nov 13, 2007 at 02:31:08PM -0700, Alex Chiang wrote:
> * Matt Domsch <[email protected]>:
> >
> > The only reported _SUN problems on Dell systems were on the
> > PE6800 and PE6850 systems, which we've fixed with an updated
> > BIOS several months ago. IIRC the values weren't always unique
> > which kind of defeated the purpose.
>
> FWIW, the ACPI 2.0 spec did not require uniqueness for _SUN.
> (although there is a strange table that refers to _SUN as the
> slot-unique ID (table 6-1 in spec v2.0b), the actual definition
> of _SUN does not mention uniqueness).
Does your code handle if these are not unique?
thanks,
greg k-h
On Tue, Nov 13, 2007 at 01:59:39PM -0700, Alex Chiang wrote:
>
> > On pseries systems, I deal with something called the
> > "partitionable endpoint", which I think probably usually
> > corresponds to physical slots, but I don't really know.
> >
> > So, naively, the physical slot concept doesn't really map to
> > what I have to work with; it just adds one more appendix to it
> > all, one more thing to get confused about.
>
> Sorry, I'm a bit ignorant about pseries -- what kind of name does
> your PCI hotplug driver give to those slots? What shows up in
> /sys/bus/pci/slots/?
~ # find /sys/bus/pci/slots -print
/sys/bus/pci/slots
/sys/bus/pci/slots/control
/sys/bus/pci/slots/control/remove_slot
/sys/bus/pci/slots/control/add_slot
/sys/bus/pci/slots/0001:00:02.0
/sys/bus/pci/slots/0001:00:02.0/phy_location
/sys/bus/pci/slots/0001:00:02.0/max_bus_speed
/sys/bus/pci/slots/0001:00:02.0/adapter
/sys/bus/pci/slots/0001:00:02.0/attention
/sys/bus/pci/slots/0001:00:02.0/power
/sys/bus/pci/slots/0001:00:02.6
etc.
~ # cat /sys/bus/pci/slots/0000:00:02.2/phy_location
U787A.001.DNZ00Z5-P1-C2
phy_location corresponds to the printed labels on the box,
and is also shared with the various management consoles
and serivce processors and what not that get involved
with stuff like that.
Although, please note: on these sysmes, 99% of the "useful"
info about hardware is in /proc/device-tree, and not in /sys
I doubt anyone looks at /sys/bus/pci/slots, I think they mostly
look at the open firmware device tree.
--linas
On Tue, Nov 13, 2007 at 03:41:21PM -0600, Linas Vepstas wrote:
> /sys/bus/pci/slots
> /sys/bus/pci/slots/control
> /sys/bus/pci/slots/control/remove_slot
> /sys/bus/pci/slots/control/add_slot
> /sys/bus/pci/slots/0001:00:02.0
> /sys/bus/pci/slots/0001:00:02.0/phy_location
Ugh. Almost two years ago, paulus promised me he was going to fix the
slot name for rpaphp. Guess he didn't.
This is one of the hateful things about the current design -- hotplug
drivers do too much. Instead of being just the interface between the
Linux PCI code and the hardware, they create sysfs directories, add files,
and generally have far too much freedom.
We have four different schemes currently for naming in slots/,
1. slot number. Used by cpqphp, ibmphp, acpiphp, pciehp, shpc.
2. domain:bus:dev:fn. Used by fakephp.
3a. domain:bus:dev. Used by rpaphp and sgihp.
3b. Except that rpaphp uses phy_location to present the information that
should be in the name and sgihp uses path.
... I've forgotten what cpci uses. And yenta doesn't use it.
How is anyone supposed to write sane managability tools in the presence
of such anarchy?
> ~ # cat /sys/bus/pci/slots/0000:00:02.2/phy_location
> U787A.001.DNZ00Z5-P1-C2
Right. This should look like:
# cat /sys/bus/pci/slots/U787A.001.DNZ00Z5-P1-C2/address
0000:00:02
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
On Tuesday 13 November 2007 02:30:49 pm Greg KH wrote:
> On Tue, Nov 13, 2007 at 01:36:45PM -0700, Alex Chiang wrote:
> > * Greg KH <[email protected]>:
> > > IBM sells a program that does this for server rooms. It's
> > > probably part of some Tivoli package somewhere, sorry I don't
> > > remember the name. I did see it working many years ago and it
> > > required no kernel changes at all to work properly.
Do you know how this Tivoli package works or where it gets the
information?
> > Like I said in an earlier email, HP ia64 systems will require a
> > kernel change to get this information. Whether it comes via a
> > generic ACPI access layer like dev_acpi, or something like this
> > patch series, the kernel will still get touched.
>
> And like I said, I'm pretty sure you don't need to touch the kernel
> today as there are people doing this just fine from userspace without
> any kernel changes needed :)
I think you are assuming userspace can just dump the raw ACPI tables
and extract this information from them. But I don't think that's
possible because _SUN is a method that can contain arbitrary AML.
That AML has to be *executed*, and you can't do that safely in
userspace.
Bjorn
On Tue, Nov 13, 2007 at 03:01:01PM -0700, Bjorn Helgaas wrote:
> On Tuesday 13 November 2007 02:30:49 pm Greg KH wrote:
> > On Tue, Nov 13, 2007 at 01:36:45PM -0700, Alex Chiang wrote:
> > > * Greg KH <[email protected]>:
> > > > IBM sells a program that does this for server rooms. It's
> > > > probably part of some Tivoli package somewhere, sorry I don't
> > > > remember the name. I did see it working many years ago and it
> > > > required no kernel changes at all to work properly.
>
> Do you know how this Tivoli package works or where it gets the
> information?
I was told it was just reading the ACPI tables from userspace. As I saw
it running on a machine without a modified kernel, I had no reason to
doubt this, but it might be doing something else for all I know.
What I do know is that it somehow does work without any kernel changes
needed, and is in use today by very large data centers without any
problems.
> > > Like I said in an earlier email, HP ia64 systems will require a
> > > kernel change to get this information. Whether it comes via a
> > > generic ACPI access layer like dev_acpi, or something like this
> > > patch series, the kernel will still get touched.
> >
> > And like I said, I'm pretty sure you don't need to touch the kernel
> > today as there are people doing this just fine from userspace without
> > any kernel changes needed :)
>
> I think you are assuming userspace can just dump the raw ACPI tables
> and extract this information from them. But I don't think that's
> possible because _SUN is a method that can contain arbitrary AML.
> That AML has to be *executed*, and you can't do that safely in
> userspace.
Yes, I was assuming that based on the above running code, if this is
somehow impossible to do from userspace, I don't know what the code is
doing.
thanks,
greg k-h
Greg KH wrote:
> Doesn't /sys/firmware/acpi give you raw access to the correct tables
> already?
>
> And isn't there some other tool that dumps the raw ACPI tables? I
> thought the acpi developers used it all the time when debugging things
> with users.
I'm neither an acpi developer (well I don't think that I am :) nor an
end-user, but here are the two things for which I was going to use the
information being presented by Alex's patch:
1) a not-yet, but on track to be released tool to be used by end-users
to diagnose I/O bottlenecks - the information in
/sys/bus/pci/slot/<foo>/address would be used to associated interfaces
and/or pci busses etc with something the end user would grok - the
number next to the slot.
2) I was also going to get the folks doing installers to make use of the
"end-user" slot ID. Even without going to the extreme of the
aforementioned 192 slot system, an 8 slot system with a bunch of
dual-port NICs in it (for example) is going to present this huge list of
otherwise identical entries. Even if the installers show the MAC for a
NIC (or I guess a WWN for an HBA or whatnot) that still doesn't tell one
without prior knowledge of what MACs were installed in which slot, which
slot is associated with a given ethN. Having the end-user slot ID
visible is then going to be a great help to that poor admin who is doing
the install.
rick jones
On Tue, Nov 13, 2007 at 02:51:13PM -0800, Rick Jones wrote:
> Greg KH wrote:
>> Doesn't /sys/firmware/acpi give you raw access to the correct tables
>> already?
>> And isn't there some other tool that dumps the raw ACPI tables? I
>> thought the acpi developers used it all the time when debugging things
>> with users.
>
> I'm neither an acpi developer (well I don't think that I am :) nor an
> end-user, but here are the two things for which I was going to use the
> information being presented by Alex's patch:
>
> 1) a not-yet, but on track to be released tool to be used by end-users
> to diagnose I/O bottlenecks - the information in
> /sys/bus/pci/slot/<foo>/address would be used to associated interfaces
> and/or pci busses etc with something the end user would grok - the
> number next to the slot.
>
> 2) I was also going to get the folks doing installers to make use of the
> "end-user" slot ID. Even without going to the extreme of the
> aforementioned 192 slot system, an 8 slot system with a bunch of
> dual-port NICs in it (for example) is going to present this huge list of
> otherwise identical entries. Even if the installers show the MAC for a
> NIC (or I guess a WWN for an HBA or whatnot) that still doesn't tell one
> without prior knowledge of what MACs were installed in which slot, which
> slot is associated with a given ethN. Having the end-user slot ID
> visible is then going to be a great help to that poor admin who is doing
> the install.
Why not just use the code in the linux firmware kit that does this
already today from userspace (thanks to Kristen for pointing this out to
me on irc.)?
No kernel changes needed, and you can get started on your userspace
application today, will work on all distros, no problems at all :)
thanks,
greg k-h
On Tue, 13 Nov 2007 12:26:32 -0800
Greg KH <[email protected]> wrote:
> On Tue, Nov 13, 2007 at 01:21:54PM -0700, Alex Chiang wrote:
> > * Greg KH <[email protected]>:
> > > On Mon, Nov 12, 2007 at 05:08:53PM -0700, Alex Chiang wrote:
> > > >
> > > > Recently, Matthew Wilcox sent out the following mail about
> > > > PCI slots:
> > > >
> > > > http://marc.info/?l=linux-pci&m=119432330418980&w=2
> > > >
> > > > The following patch series is a rough first cut at
> > > > implementing the ideas he outlined, namely, that PCI slots
> > > > are physical objects that we care about, independent of their
> > > > hotplug capabilities.
> > >
> > > Also, some companies already provide userspace tools to get all
> > > of this information about the different slots in a system and
> > > what is where, from userspace, no kernel changes are needed.
> > > So, why add all this extra complexity to the kernel if it is
> > > not needed?
> >
> > On HP ia64 systems, that information is locked away in ACPI, and
> > there's no easy way to get at it. Alex Williamson tried providing
> > a generic dev_acpi driver, so that userspace could do whatever
> > they wanted to with the information:
> >
> > http://lkml.org/lkml/2004/8/3/106
> >
> > And that effort kinda died. I'm of mixed feelings on that driver,
> > since it would be really nice to get unfettered access to the
> > ACPI namespace, but it's pretty dangerous, since any interesting
> > thing you might want to do is actually a method (aka, it calls
> > into firmware) and who knows what side effects there might be.
> >
> > So from my point of view, if ia64 customers want to know about
> > the slots they have in their systems, we're gonna have to do
> > something kernel-intrusive anyhow.
>
> Doesn't /sys/firmware/acpi give you raw access to the correct tables
> already?
>
> And isn't there some other tool that dumps the raw ACPI tables? I
> thought the acpi developers used it all the time when debugging things
> with users.
>
> thanks,
>
> greg k-h
>
There are - people should take a look at the Intel Linux Firmware Kit
for an example of how to parse ACPI tables in userspace - the code
is also GPL'd, so you are free to use it in another GPL application.
http://www.linuxfirmwarekit.org/download.php#source
In many DSDTs I've seen, _SUN is hardcoded anyway and can be found
by reading the DSDT from userspace. This is what the firmwarekit
does to check for duplicate _SUN in one of it's tests.
Kristen
On Tue, Nov 13, 2007 at 02:56:05PM -0800, Greg KH wrote:
> Why not just use the code in the linux firmware kit that does this
> already today from userspace (thanks to Kristen for pointing this out to
> me on irc.)?
So then we have something that works on ACPI-based machines. Who will
add support for all the other kinds of firmware and non-firmware based
slots?
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
On Tue, Nov 13, 2007 at 04:04:00PM -0700, Matthew Wilcox wrote:
> On Tue, Nov 13, 2007 at 02:56:05PM -0800, Greg KH wrote:
> > Why not just use the code in the linux firmware kit that does this
> > already today from userspace (thanks to Kristen for pointing this out to
> > me on irc.)?
>
> So then we have something that works on ACPI-based machines. Who will
> add support for all the other kinds of firmware and non-firmware based
> slots?
Well, it seems that the powerpc people do not want this at all, as they
just have "virtual" slots that they don't want people trying to root
around and find the real thing.
What other types of machines can export this kind of information? How
will you discover non-firmware based slots?
thanks,
greg k-h
On Tue, Nov 13, 2007 at 01:11:02PM -0700, Matthew Wilcox wrote:
> On Tue, Nov 13, 2007 at 10:51:22AM -0800, Greg KH wrote:
> > Ok, again, I want to see the IBM people sign off on this, after testing
> > on all of their machines, before I'll consider this, as I know the IBM
> > acpi tables are "odd".
>
> That seems a little higher standard than patches are normally held to.
> How about the patches get sent to the appropriate people at IBM (who are
> they?)
I be one of them. :) I have been involved in many (but not all)
of IBM's x86 based (IBM System x) servers with hotplug capable
PCI slots. I have mostly worked on 'acpiphp' associated issues.
> and if we haven't heard a NAK from them after a month, then they
> get applied?
I am not fundamentally opposed to this new capability but
share the same concerns that Greg and others have expressed.
So far, I have only tried the changes on one single node
system (IBM x3850) but the below NAK-worthy result supports
the idea that the changes need to be well and widely tested.
Have you possibly considered a kernel option as a kinder and
gentler way of introducing the changes?
Gary
--
Gary Hade
System x Enablement
IBM Linux Technology Center
503-578-4503 IBM T/L: 775-4503
[email protected]
http://www.ibm.com/linux/ltc
====
IBM x3850
Slots 1-2: PCI-X under PCI root bridges
Slots 3-6: PCIe under transparent P2P bridges
Slot 1: PCI-X - populated
Slot 2: PCI-X - !populated
Slot 3: PCIe - populated
Slot 4: PCIe - !populated
Slot 5: PCIe - !populated
Slot 6: PCIe - populated
result is with 2.6.24-rc2 plus all 4 proposed patches
problem: acpiphp failed to register empty PCIe slots 4 and 5
====
# dmesg
acpiphp: ACPI Hot Plug PCI Controller Driver version: 0.5
acpiphp_glue: found PCI host-bus bridge with hot-pluggable slots
acpiphp_glue: found ACPI PCI Hotplug slot 1 at PCI 0000:02:01
acpiphp: Slot [1] registered
acpiphp_glue: found PCI host-bus bridge with hot-pluggable slots
acpiphp_glue: found ACPI PCI Hotplug slot 2 at PCI 0000:06:01
acpiphp: Slot [2] registered
acpiphp_glue: found PCI-to-PCI bridge at PCI 0000:0a:00.0
acpiphp_glue: found ACPI PCI Hotplug slot 3 at PCI 0000:0b:00
acpiphp: Slot [3] registered
acpiphp_glue: found PCI-to-PCI bridge at PCI 0000:0f:00.0
acpiphp_glue: found ACPI PCI Hotplug slot 4 at PCI 0000:10:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 4 at PCI 0000:10:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 4 at PCI 0000:10:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 4 at PCI 0000:10:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 4 at PCI 0000:10:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 4 at PCI 0000:10:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 4 at PCI 0000:10:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 4 at PCI 0000:10:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found PCI-to-PCI bridge at PCI 0000:14:00.0
acpiphp_glue: found ACPI PCI Hotplug slot 5 at PCI 0000:15:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 5 at PCI 0000:15:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 5 at PCI 0000:15:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 5 at PCI 0000:15:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 5 at PCI 0000:15:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 5 at PCI 0000:15:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 5 at PCI 0000:15:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found ACPI PCI Hotplug slot 5 at PCI 0000:15:00
acpiphp: pci_hp_register failed with error -17
acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
acpiphp_glue: found PCI-to-PCI bridge at PCI 0000:19:00.0
acpiphp_glue: found ACPI PCI Hotplug slot 6 at PCI 0000:1a:00
acpiphp: Slot [6] registered
acpiphp_glue: Bus 0000:1a has 1 slot
acpiphp_glue: Bus 0000:15 has 0 slots
acpiphp_glue: Bus 0000:10 has 0 slots
acpiphp_glue: Bus 0000:0b has 1 slot
acpiphp_glue: Bus 0000:06 has 1 slot
acpiphp_glue: Bus 0000:02 has 1 slot
acpiphp_glue: Total 4 slots
# find /sys/bus/pci/slots
/sys/bus/pci/slots
/sys/bus/pci/slots/1
/sys/bus/pci/slots/1/address
/sys/bus/pci/slots/1/power
/sys/bus/pci/slots/1/attention
/sys/bus/pci/slots/1/latch
/sys/bus/pci/slots/1/adapter
/sys/bus/pci/slots/2
/sys/bus/pci/slots/2/address
/sys/bus/pci/slots/2/power
/sys/bus/pci/slots/2/attention
/sys/bus/pci/slots/2/latch
/sys/bus/pci/slots/2/adapter
/sys/bus/pci/slots/3
/sys/bus/pci/slots/3/address
/sys/bus/pci/slots/3/power
/sys/bus/pci/slots/3/attention
/sys/bus/pci/slots/3/latch
/sys/bus/pci/slots/3/adapter
/sys/bus/pci/slots/4
/sys/bus/pci/slots/4/address
/sys/bus/pci/slots/5
/sys/bus/pci/slots/5/address
/sys/bus/pci/slots/6
/sys/bus/pci/slots/6/address
/sys/bus/pci/slots/6/power
/sys/bus/pci/slots/6/attention
/sys/bus/pci/slots/6/latch
/sys/bus/pci/slots/6/adapter
Hi Greg,
* Greg KH <[email protected]>:
> On Tue, Nov 13, 2007 at 02:31:08PM -0700, Alex Chiang wrote:
> >
> > FWIW, the ACPI 2.0 spec did not require uniqueness for _SUN.
> > (although there is a strange table that refers to _SUN as the
> > slot-unique ID (table 6-1 in spec v2.0b), the actual definition
> > of _SUN does not mention uniqueness).
>
> Does your code handle if these are not unique?
I hate to punt on this, but I'm not doing anything that acpiphp
is not doing. If we get back non-unique values for _SUN, I'm
pretty sure both acpiphp and my pci_slot driver will both
continue along as far as they can, until they get an error back
from kobject_register() about trying to register an object with
an existing name.
I think I may have a machine that has non-unique values for _SUN.
If so, I'll try and hunt it down and do some testing.
Thanks for the suggestion.
/ac
On Tue, 13 Nov 2007 16:04:00 -0700
Matthew Wilcox <[email protected]> wrote:
> On Tue, Nov 13, 2007 at 02:56:05PM -0800, Greg KH wrote:
> > Why not just use the code in the linux firmware kit that does this
> > already today from userspace (thanks to Kristen for pointing this out to
> > me on irc.)?
>
> So then we have something that works on ACPI-based machines. Who will
> add support for all the other kinds of firmware and non-firmware based
> slots?
>
As far as being able to retrieve the slot number (which it seemed from
the HP manageablity application perspective is the goal here), that
information is available from userspace as well for at least standard PCI
and pcie based systems for occupied slots. For standard pci, you have
to make something up anyway - for shpchp we just use an incremental
number and combine it with the bus number to represent the slot. For
pcie, you can get this info from the slot capabilities register.
On Tue, Nov 13, 2007 at 03:33:14PM -0800, Kristen Carlson Accardi wrote:
> As far as being able to retrieve the slot number (which it seemed from
> the HP manageablity application perspective is the goal here), that
> information is available from userspace as well for at least standard PCI
> and pcie based systems for occupied slots. For standard pci, you have
> to make something up anyway - for shpchp we just use an incremental
> number and combine it with the bus number to represent the slot. For
> pcie, you can get this info from the slot capabilities register.
Ummm ... that's not what the /spec/ says. I've never worked on any shpc
machines, but the shpc driver reads the slot values from the SLOT_CONFIG
register, just like the spec says to.
I don't understand how you're supposed to get the slot number for
standard PCI for occupied slots. Could you explain?
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
Greg KH <[email protected]> writes:
>
> Also, some companies already provide userspace tools to get all of this
> information about the different slots in a system and what is where,
> from userspace, no kernel changes are needed. So, why add all this
> extra complexity to the kernel if it is not needed?
It's not only complexity. Each new sysfs entry costs memory.
Memory is not free. There should be always a good reason for those.
-Andi
Hi Gary,
* Gary Hade <[email protected]>:
> On Tue, Nov 13, 2007 at 01:11:02PM -0700, Matthew Wilcox wrote:
> > On Tue, Nov 13, 2007 at 10:51:22AM -0800, Greg KH wrote:
> > > Ok, again, I want to see the IBM people sign off on this, after testing
> > > on all of their machines, before I'll consider this, as I know the IBM
> > > acpi tables are "odd".
> >
> > That seems a little higher standard than patches are normally held to.
> > How about the patches get sent to the appropriate people at IBM (who are
> > they?)
>
> I be one of them. :) I have been involved in many (but not all)
> of IBM's x86 based (IBM System x) servers with hotplug capable
> PCI slots. I have mostly worked on 'acpiphp' associated issues.
Thanks for testing the series. It's much appreciated.
> Have you possibly considered a kernel option as a kinder and
> gentler way of introducing the changes?
That is a good idea. I will work on that.
> ====
> IBM x3850
> Slots 1-2: PCI-X under PCI root bridges
> Slots 3-6: PCIe under transparent P2P bridges
> Slot 1: PCI-X - populated
> Slot 2: PCI-X - !populated
> Slot 3: PCIe - populated
> Slot 4: PCIe - !populated
> Slot 5: PCIe - !populated
> Slot 6: PCIe - populated
>
> result is with 2.6.24-rc2 plus all 4 proposed patches
Silly question, but I have to ask. :)
I sent out 5 patches -- is this simply a typo on your part, or
did you only apply 4/5 patches?
> problem: acpiphp failed to register empty PCIe slots 4 and 5
Ok, so acpiphp wasn't going to register those slots anyway, since
they are empty. It would have bailed out after not seeing _ADR or
_EJ0 on those slots.
The acpi-pci-slot driver created those slots anyway, which is one
of the points of the patch -- to create sysfs entries even for
empty slots.
> acpiphp_glue: found PCI-to-PCI bridge at PCI 0000:0f:00.0
This is the real address of slot 4.
> acpiphp_glue: found ACPI PCI Hotplug slot 4 at PCI 0000:10:00
> acpiphp: pci_hp_register failed with error -17
> acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
[repeated 7x]
We saw this message 8x, once for each SxFy object under your p2p
bridge. I actually somewhat did expect to see this error message
(hence the RFC part of my patch ;)
I currently don't have a good way to determine if we've already
seen an empty slot under a p2p bridge, so we try to register
every SxFy object. Of course, a /sys/bus/pci/slots/4/ entry
already exists, so that's why we're getting -17 (-EEXIST).
> acpiphp_glue: found PCI-to-PCI bridge at PCI 0000:14:00.0
> acpiphp_glue: found ACPI PCI Hotplug slot 5 at PCI 0000:15:00
> acpiphp: pci_hp_register failed with error -17
> acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
Same explanation as above.
> # find /sys/bus/pci/slots
> /sys/bus/pci/slots
[snip]
> /sys/bus/pci/slots/4
> /sys/bus/pci/slots/4/address
> /sys/bus/pci/slots/5
> /sys/bus/pci/slots/5/address
Arguably, the right thing happened here. We got entries for empty
slots, and we know their addresses.
If anyone can clue me in on a better way to implement patch 4/5
in my series so that we're not seeing those multiple attempts to
register slots under p2p bridges, I'd love to hear your ideas.
Thanks again for testing.
/ac
On Tue, 13 Nov 2007, Greg KH wrote:
> On Tue, Nov 13, 2007 at 04:04:00PM -0700, Matthew Wilcox wrote:
> > On Tue, Nov 13, 2007 at 02:56:05PM -0800, Greg KH wrote:
> > > Why not just use the code in the linux firmware kit that does this
> > > already today from userspace (thanks to Kristen for pointing this out to
> > > me on irc.)?
> >
> > So then we have something that works on ACPI-based machines. Who will
> > add support for all the other kinds of firmware and non-firmware based
> > slots?
>
> Well, it seems that the powerpc people do not want this at all, as they
> just have "virtual" slots that they don't want people trying to root
> around and find the real thing.
>
> What other types of machines can export this kind of information? How
> will you discover non-firmware based slots?
Just to add my 2 cents, I've not seen any CompactPCI gear that had a
way to easily map a PCI peripheral slot to the corresponding physical
slot in the chassis. In the products I've worked on that use my CPCI
hotplug drivers, we did any required mapping in userspace based on
knowledge of the chassis layout for the particular product.
Scott
--
==============================================================================
Scott Murray, [email protected]
"Good, bad ... I'm the guy with the gun." - Ash, "Army of Darkness"
Matthew Wilcox ????????:
> On Tue, Nov 13, 2007 at 03:33:14PM -0800, Kristen Carlson Accardi wrote:
>> As far as being able to retrieve the slot number (which it seemed from
>> the HP manageablity application perspective is the goal here), that
>> information is available from userspace as well for at least standard PCI
>> and pcie based systems for occupied slots. For standard pci, you have
>> to make something up anyway - for shpchp we just use an incremental
>> number and combine it with the bus number to represent the slot. For
>> pcie, you can get this info from the slot capabilities register.
>
> Ummm ... that's not what the /spec/ says. I've never worked on any shpc
> machines, but the shpc driver reads the slot values from the SLOT_CONFIG
> register, just like the spec says to.
>
The slot number for shpc slot is like 'YYYY_XXXX'.
YYYY is the bus number, though I don't know the specific reason
why it was added.
XXXX is slot number decided according to the shpc specification,
as you said.
Thanks,
Kenji Kaneshige
Hi,
I tried the series of patches and I encountered some problems.
Since I don't have enough time to investigate them, I can only
report them at present. My environment was ia64 machine with 64
hotplug slots (shpc & pcie). Those slots can be handled by
acpiphp too, though I have not tried it yet.
- Problem(1)
Two kind of Call Traces were displayed so many times at the
boot time. Those are attached below. I guess it relate to
the ACPI Namespace definition.
- Problem(2)
Many of the shpc slot initializations failed with the
following error messages. I guess the cause is Problem(1).
shpchp: pci_hp_register failed with error -17
shpchp: shpchp: slot initialization failed
- Problem(3)
All of the pcie slot initializations failed with the
following error messages. I guess the cause is Problem(1).
pciehp: pci_hp_register failed with error -17
pciehp: pciehp: slot initialization failed
BTW, I have some comments about the patches.
- I think your patches assume that all the hotplug slots have
ACPI _SUN method regardless of the hotplug controller type,
because pci_slot.c is the only place to call pci_slot_create().
But I think there are hotplug slots without _SUN. If the
hotplug slot doesn't have _SUN, slot initialization by the
hotplug controller driver doesn't work (does it?), because
there are no slot directory.
- I think pci_slot.o should be kernel module so that users can
select to enable or disable this functionality.
Here is the Call Trace mentioned above:
Unable to register kobject 1024<3>pci_slot: pci_create_slot returned -22
ACPI: Invalid ACPI Bus context for device <NULL>
sysfs: duplicate filename '1024' can not be created
WARNING: at fs/sysfs/dir.c:424 sysfs_add_one()
Call Trace:
[<a000000100014b60>] show_stack+0x40/0xa0
sp=e000014101befaf0 bsp=e000014101be0f60
[<a000000100014bf0>] dump_stack+0x30/0x60
sp=e000014101befcc0 bsp=e000014101be0f48
[<a0000001001d6990>] sysfs_add_one+0xb0/0x240
sp=e000014101befcc0 bsp=e000014101be0f18
[<a0000001001d7900>] create_dir+0x80/0x100
sp=e000014101befcc0 bsp=e000014101be0ee0
[<a0000001001d7a30>] sysfs_create_dir+0xb0/0x100
sp=e000014101befce0 bsp=e000014101be0ec0
[<a0000001003b4f50>] kobject_add+0x250/0x460
sp=e000014101befcf0 bsp=e000014101be0e80
[<a0000001003b52d0>] kobject_register+0x50/0xa0
sp=e000014101befcf0 bsp=e000014101be0e58
[<a0000001003d6f40>] pci_create_slot+0x140/0x260
sp=e000014101befcf0 bsp=e000014101be0e10
[<a00000010043acd0>] register_slot+0x170/0x1c0
sp=e000014101befcf0 bsp=e000014101be0dc0
[<a000000100419ef0>] acpi_ns_walk_namespace+0x2b0/0x300
sp=e000014101befd20 bsp=e000014101be0d50
[<a000000100415a50>] acpi_walk_namespace+0x90/0xc0
sp=e000014101befd20 bsp=e000014101be0d00
[<a00000010043ab20>] acpi_pci_slot_add+0x1c0/0x200
sp=e000014101befd20 bsp=e000014101be0cd8
[<a0000001004367e0>] acpi_pci_register_driver+0xe0/0x160
sp=e000014101befd30 bsp=e000014101be0ca8
[<a000000100920620>] acpi_pci_slot_init+0x20/0x40
sp=e000014101befd30 bsp=e000014101be0c90
[<a0000001008f06e0>] kernel_init+0x460/0x7e0
sp=e000014101befd30 bsp=e000014101be0c48
[<a000000100013110>] kernel_thread_helper+0x30/0x60
sp=e000014101befe30 bsp=e000014101be0c20
[<a0000001000089c0>] start_kernel_thread+0x20/0x40
sp=e000014101befe30 bsp=e000014101be0c20
kobject_add failed for 1024 with -EEXIST, don't try to register things with the
same name in the same directory.
Call Trace:
[<a000000100014b60>] show_stack+0x40/0xa0
sp=e000014101befb20 bsp=e000014101be0ed8
[<a000000100014bf0>] dump_stack+0x30/0x60
sp=e000014101befcf0 bsp=e000014101be0ec0
[<a0000001003b50e0>] kobject_add+0x3e0/0x460
sp=e000014101befcf0 bsp=e000014101be0e80
[<a0000001003b52d0>] kobject_register+0x50/0xa0
sp=e000014101befcf0 bsp=e000014101be0e58
[<a0000001003d6f40>] pci_create_slot+0x140/0x260
sp=e000014101befcf0 bsp=e000014101be0e10
[<a00000010043acd0>] register_slot+0x170/0x1c0
sp=e000014101befcf0 bsp=e000014101be0dc0
[<a000000100419ef0>] acpi_ns_walk_namespace+0x2b0/0x300
sp=e000014101befd20 bsp=e000014101be0d50
[<a000000100415a50>] acpi_walk_namespace+0x90/0xc0
sp=e000014101befd20 bsp=e000014101be0d00
[<a00000010043ab20>] acpi_pci_slot_add+0x1c0/0x200
sp=e000014101befd20 bsp=e000014101be0cd8
[<a0000001004367e0>] acpi_pci_register_driver+0xe0/0x160
sp=e000014101befd30 bsp=e000014101be0ca8
[<a000000100920620>] acpi_pci_slot_init+0x20/0x40
sp=e000014101befd30 bsp=e000014101be0c90
[<a0000001008f06e0>] kernel_init+0x460/0x7e0
sp=e000014101befd30 bsp=e000014101be0c48
[<a000000100013110>] kernel_thread_helper+0x30/0x60
sp=e000014101befe30 bsp=e000014101be0c20
[<a0000001000089c0>] start_kernel_thread+0x20/0x40
sp=e000014101befe30 bsp=e000014101be0c20
Unable to register kobject 1024<3>pci_slot: pci_create_slot returned -22
Thanks,
Kenji Kaneshige
Alex Chiang ????????:
> Hello,
>
> [this patch series touches a few subsystems; hopefully I got all
> the right maintainers]
>
> Recently, Matthew Wilcox sent out the following mail about PCI
> slots:
>
> http://marc.info/?l=linux-pci&m=119432330418980&w=2
>
> The following patch series is a rough first cut at implementing
> the ideas he outlined, namely, that PCI slots are physical
> objects that we care about, independent of their hotplug
> capabilities.
>
> We introduce a struct pci_slot as a first-class citizen, and turn
> hotplug_slot into a subsidiary structure. A brief glimpse at the
> potential for cleanup is given, as we modify the existing hotplug
> drivers and remove the multiple get_address() methods. Certainly
> more cleanup can be done with this new structure.
>
> Additionally, we introduce an ACPI PCI slot driver, which will
> detect all physical PCI slots in the system (as described by
> ACPI). De-coupling the notion of a physical slot from its hotplug
> capability allows users to understand the physical geography of
> their machines, whether their slots are hotpluggable or not.
>
> This patch series builds heavily on Willy's prior work (with a
> bit of the typical refresh needed when aiming at the moving
> target of the kernel). The ACPI PCI slot driver is new.
>
> I have tested this series on both ia64 (hp rx6600) and x86 (hp
> dl380g). On ia64, system firmware provides _SUN methods for the
> PCI slots and we get entries in /sys/bus/pci/slots/. On my x86
> machine, firmware didn't seem to provide a _SUN, so we don't get
> any entries in /sys/bus/pci/slots/, but nothing really bad
> happens either. ;)
>
> Comments/feedback are appreciated.
>
> Thanks.
>
> /ac
>
>
>
Alex Chiang wrote:
> Register one slot per slot, rather than one slot per function.
> Change the name of the slot to fake%d instead of the pci address.
>
> Signed-off-by: Alex Chiang <[email protected]>
> Signed-off-by: Matthew Wilcox <[email protected]>
> ---
> drivers/pci/hotplug/fakephp.c | 75
> +++++++++++++++-------------------------- 1 files changed, 27
> insertions(+), 48 deletions(-)
>
> diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c
> index 027f686..828335e 100644
> --- a/drivers/pci/hotplug/fakephp.c
> +++ b/drivers/pci/hotplug/fakephp.c
> @@ -93,6 +93,7 @@ static int add_slot(struct pci_dev *dev)
> struct dummy_slot *dslot;
> struct hotplug_slot *slot;
> int retval = -ENOMEM;
> + static int count = 1;
>
> slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
> if (!slot)
> @@ -106,7 +107,8 @@ static int add_slot(struct pci_dev *dev)
> slot->info->max_bus_speed = PCI_SPEED_UNKNOWN;
> slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN;
>
> - slot->name = &dev->dev.bus_id[0];
> + slot->name = kmalloc(8, GFP_KERNEL);
> + sprintf(slot->name, "fake%d", count++);
> dbg("slot->name = %s\n", slot->name);
>
> dslot = kmalloc(sizeof(struct dummy_slot), GFP_KERNEL);
This is ugly. Please do it the way we already do e.g. for acpiphp: add a
char[8] to "struct dummy_slot" and just reference that here. Or better do
what this name suggests: kill fakephp completely and use dummyphp[1] instead.
Eike
1) http://opensource.sf-tec.de/kernel/
On Wed, Nov 14, 2007 at 02:07:56AM +0100, Andi Kleen wrote:
> It's not only complexity. Each new sysfs entry costs memory.
> Memory is not free. There should be always a good reason for those.
It's not a lot of memory; it's one directory and a couple of files for
each PCI slot in the system. Even huge systems have maybe 200 slots.
In order for this to take up as much as one page of ram on a typical PC
with six slots, this would have to consume 680 bytes per directory. I
don't think sysfs is that inefficient (and if it is, maybe this feature
is not where the problem is, given the 'power' directory per device, the
19 files per scsi device, the huge numbers of symlinks, etc).
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
Hi Eike,
* Rolf Eike Beer <[email protected]>:
> Alex Chiang wrote:
> > --- a/drivers/pci/hotplug/fakephp.c
> > +++ b/drivers/pci/hotplug/fakephp.c
> > @@ -93,6 +93,7 @@ static int add_slot(struct pci_dev *dev)
> > struct dummy_slot *dslot;
> > struct hotplug_slot *slot;
> > int retval = -ENOMEM;
> > + static int count = 1;
> >
> > slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
> > if (!slot)
> > @@ -106,7 +107,8 @@ static int add_slot(struct pci_dev *dev)
> > slot->info->max_bus_speed = PCI_SPEED_UNKNOWN;
> > slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN;
> >
> > - slot->name = &dev->dev.bus_id[0];
> > + slot->name = kmalloc(8, GFP_KERNEL);
> > + sprintf(slot->name, "fake%d", count++);
> > dbg("slot->name = %s\n", slot->name);
> >
> > dslot = kmalloc(sizeof(struct dummy_slot), GFP_KERNEL);
>
> This is ugly. Please do it the way we already do e.g. for
> acpiphp: add a char[8] to "struct dummy_slot" and just
> reference that here.
I took at look at the code in acpiphp you're talking about, and
I'm not sure why you think the above is "ugly". We're talking
about a runtime vs compile time storage for the name, and doing a
kmalloc/sprintf is a pretty standard idiom.
BTW, I did incorporate both Linas' and Willy's comments, by
changing the kmalloc size to an explicit 32, and using snprintf
instead.
In any case, for your particular comment, I think I'm going to
leave it alone for now, and let Greg weigh in with the final
recommendation.
Thanks for the review.
/ac
On Wed, Nov 14, 2007 at 07:17:51AM -0700, Matthew Wilcox wrote:
> On Wed, Nov 14, 2007 at 02:07:56AM +0100, Andi Kleen wrote:
> > It's not only complexity. Each new sysfs entry costs memory.
> > Memory is not free. There should be always a good reason for those.
>
> It's not a lot of memory; it's one directory and a couple of files for
> each PCI slot in the system. Even huge systems have maybe 200 slots.
> In order for this to take up as much as one page of ram on a typical PC
> with six slots, this would have to consume 680 bytes per directory. I
> don't think sysfs is that inefficient (and if it is, maybe this feature
> is not where the problem is, given the 'power' directory per device, the
> 19 files per scsi device, the huge numbers of symlinks, etc).
It becomes much more when someone does a find /sys. dentries are
expensive. They eventually can get pruned again, but it's still
costly to do that.
-Andi
Hi Gary,
* Gary Hade <[email protected]>:
>
> I am not fundamentally opposed to this new capability but share
> the same concerns that Greg and others have expressed. So far,
> I have only tried the changes on one single node system (IBM
> x3850) but the below NAK-worthy result supports the idea that
> the changes need to be well and widely tested.
Can you send me the lspci -vv and lspci -vt output from this
machine?
Thanks.
/ac
Am Mittwoch, 14. November 2007 schrieb Alex Chiang:
> Hi Eike,
>
> * Rolf Eike Beer <[email protected]>:
> > Alex Chiang wrote:
> > > --- a/drivers/pci/hotplug/fakephp.c
> > > +++ b/drivers/pci/hotplug/fakephp.c
> > > @@ -93,6 +93,7 @@ static int add_slot(struct pci_dev *dev)
> > > struct dummy_slot *dslot;
> > > struct hotplug_slot *slot;
> > > int retval = -ENOMEM;
> > > + static int count = 1;
> > >
> > > slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
> > > if (!slot)
> > > @@ -106,7 +107,8 @@ static int add_slot(struct pci_dev *dev)
> > > slot->info->max_bus_speed = PCI_SPEED_UNKNOWN;
> > > slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN;
> > >
> > > - slot->name = &dev->dev.bus_id[0];
> > > + slot->name = kmalloc(8, GFP_KERNEL);
> > > + sprintf(slot->name, "fake%d", count++);
> > > dbg("slot->name = %s\n", slot->name);
> > >
> > > dslot = kmalloc(sizeof(struct dummy_slot), GFP_KERNEL);
> >
> > This is ugly. Please do it the way we already do e.g. for
> > acpiphp: add a char[8] to "struct dummy_slot" and just
> > reference that here.
>
> I took at look at the code in acpiphp you're talking about, and
> I'm not sure why you think the above is "ugly". We're talking
> about a runtime vs compile time storage for the name, and doing a
> kmalloc/sprintf is a pretty standard idiom.
>
> BTW, I did incorporate both Linas' and Willy's comments, by
> changing the kmalloc size to an explicit 32, and using snprintf
> instead.
>
> In any case, for your particular comment, I think I'm going to
> leave it alone for now, and let Greg weigh in with the final
> recommendation.
Because we have another allocation of very small size for every slot here.
struct dummy_slot has a size of 4 pointers, that's 16 byte for 32 bit
architectures. Putting 8 byte of additional storage here would save a
complete allocation on 32 bit platforms. For 64 bit platforms the memory
usage is the same but we do less allocations (i.e. less points to fail) and
less memory fragmentation.
Btw: your code lacks a check if kmalloc() returns NULL.
Eike
On Wed, Nov 14, 2007 at 03:35:33PM +0100, Andi Kleen wrote:
> It becomes much more when someone does a find /sys. dentries are
> expensive. They eventually can get pruned again, but it's still
> costly to do that.
Again, if this is a big concern for you, there are better places to look
at for savings.
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
On Wed, Nov 14, 2007 at 08:00:25AM -0700, Matthew Wilcox wrote:
> On Wed, Nov 14, 2007 at 03:35:33PM +0100, Andi Kleen wrote:
> > It becomes much more when someone does a find /sys. dentries are
> > expensive. They eventually can get pruned again, but it's still
> > costly to do that.
>
> Again, if this is a big concern for you, there are better places to look
> at for savings.
Whoever is proposing a feature has the burden to justify that
its usefulness is larger than the overhead/cost it adds.
Doesn't seem to be the case with this one so far.
And in general ignoring overhead in new features is a pretty sad
approach. Big bloat does come in small steps with each new feature.
-Andi
Hi Eike,
* Rolf Eike Beer <[email protected]>:
> Am Mittwoch, 14. November 2007 schrieb Alex Chiang:
> > * Rolf Eike Beer <[email protected]>:
> > >
> > > This is ugly. Please do it the way we already do e.g. for
> > > acpiphp: add a char[8] to "struct dummy_slot" and just
> > > reference that here.
> >
> > I took at look at the code in acpiphp you're talking about,
> > and I'm not sure why you think the above is "ugly". We're
> > talking about a runtime vs compile time storage for the name,
> > and doing a kmalloc/sprintf is a pretty standard idiom.
> >
> > BTW, I did incorporate both Linas' and Willy's comments, by
> > changing the kmalloc size to an explicit 32, and using
> > snprintf instead.
> >
> > In any case, for your particular comment, I think I'm going
> > to leave it alone for now, and let Greg weigh in with the
> > final recommendation.
>
> Because we have another allocation of very small size for every
> slot here.
>
> struct dummy_slot has a size of 4 pointers, that's 16 byte for
> 32 bit architectures. Putting 8 byte of additional storage here
> would save a complete allocation on 32 bit platforms. For 64
> bit platforms the memory usage is the same but we do less
> allocations (i.e. less points to fail) and less memory
> fragmentation.
>
> Btw: your code lacks a check if kmalloc() returns NULL.
Good points. I'll make your suggested change for v2.
Thanks.
/ac
On Wed, Nov 14, 2007 at 04:08:01PM +0100, Andi Kleen wrote:
> Whoever is proposing a feature has the burden to justify that
> its usefulness is larger than the overhead/cost it adds.
>
> Doesn't seem to be the case with this one so far.
Huh? There are half a dozen people who think it does, and half a dozen
people who think it doesn't. Fine, you're on the side which sees no use
for it.
> And in general ignoring overhead in new features is a pretty sad
> approach. Big bloat does come in small steps with each new feature.
A very generic argument which could be used to shoot down any new feature.
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
* Andi Kleen <[email protected]>:
> On Wed, Nov 14, 2007 at 08:00:25AM -0700, Matthew Wilcox wrote:
> > On Wed, Nov 14, 2007 at 03:35:33PM +0100, Andi Kleen wrote:
> > > It becomes much more when someone does a find /sys.
> > > dentries are expensive. They eventually can get pruned
> > > again, but it's still costly to do that.
> >
> > Again, if this is a big concern for you, there are better
> > places to look at for savings.
>
> Whoever is proposing a feature has the burden to justify that
> its usefulness is larger than the overhead/cost it adds.
>
> Doesn't seem to be the case with this one so far.
I'm a bit confused where you're going with this. I thought the
main point of contention wasn't around the utility of the
patchset, it was around getting the actual information correct.
We've already mentioned several uses that this patchset adds:
1. allowing managability folks to determine which physical slot
their failing card might be sitting in (independent of hotplug
capability).
2. giving installers a method of presenting the physical slot
to the user so that the user can choose to, say, do a network
install off of the card in slot N, vs having to guess based on
MAC address or something else unfriendly
3. performance monitoring tools based on slot addresses/numbers
I believe that userspace cares more about those sorts of things
compared to consuming extra bytes when creating sysfs entries.
> And in general ignoring overhead in new features is a pretty
> sad approach. Big bloat does come in small steps with each new
> feature.
I'm kinda new to the Linux kernel, so I don't really get what
you're saying. Are you saying that the approved method of
exposing kernel information to the user via sysfs is actually
bloat? Even if that information has utility?
Feel free to educate me; I'm here to learn.
Thanks.
/ac
On Tuesday 13 November 2007 03:59:36 pm Kristen Carlson Accardi wrote:
> On Tue, 13 Nov 2007 12:26:32 -0800 Greg KH <[email protected]> wrote:
> > And isn't there some other tool that dumps the raw ACPI tables? I
> > thought the acpi developers used it all the time when debugging things
> > with users.
>
> There are - people should take a look at the Intel Linux Firmware Kit
> for an example of how to parse ACPI tables in userspace - the code
> is also GPL'd, so you are free to use it in another GPL application.
>
> http://www.linuxfirmwarekit.org/download.php#source
>
> In many DSDTs I've seen, _SUN is hardcoded anyway and can be found
> by reading the DSDT from userspace. This is what the firmwarekit
> does to check for duplicate _SUN in one of it's tests.
I see three relevant things in the firmware kit:
1) ExecuteAmlMethod() in amlpoke/amlpoke.c. This uses
/proc/acpi/hotkey/event_config to cause the kernel to
execute an AML method. This looks similar to what dev_acpi
does and is unsafe for the same reasons (the method may have
side effects that interfere with kernel drivers). The kernel
support for this was removed in 2.6.21:
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=5ee6edbcde4d3b14e4e03d4b331df1099a34aa8d
2) execute_aml_method() in acpitable.c. Similar to above.
3) parse_SUN_name() in SUN/SUN.c. This uses acpidump, acpixtract,
and iasl -d to disassemble the DSDT and SSDTs, then looks for
things like "Name (_SUN, 0x0000012C)". That works well if _SUN
merely returns a constant, and many DSDTs do that.
But _SUN can be implemented as a control method, and then we have
a problem because we can't determine the _SUN value by inspecting
the DSDT. We have to evaluate the method, and that may require
operation regions, semaphores, etc., so it can only be done in the
kernel.
So I agree that the firmware kit has a clever hack that works on much
existing x86 firmware, and it sounds like Tivoli might even rely on
it. But I don't feel good about it, and it could easily break when
some BIOS writer needs to make _SUN slightly more complicated.
Bjorn
On Tue, Nov 13, 2007 at 12:26:32PM -0800, Greg KH wrote:
> Doesn't /sys/firmware/acpi give you raw access to the correct tables
> already?
>
> And isn't there some other tool that dumps the raw ACPI tables? I
> thought the acpi developers used it all the time when debugging things
> with users.
Dumping raw ACPI tables isn't adequate - _SUN might be a complex ACPI
method with multiple reads and writes to raw hardware, and we really
don't want to do that in userspace. The only way to do this reliably is
in the kernel.
--
Matthew Garrett | [email protected]
On Wed, Nov 14, 2007 at 05:44:01PM +0000, Matthew Garrett wrote:
> On Tue, Nov 13, 2007 at 12:26:32PM -0800, Greg KH wrote:
>
> > Doesn't /sys/firmware/acpi give you raw access to the correct tables
> > already?
> >
> > And isn't there some other tool that dumps the raw ACPI tables? I
> > thought the acpi developers used it all the time when debugging things
> > with users.
>
> Dumping raw ACPI tables isn't adequate - _SUN might be a complex ACPI
> method with multiple reads and writes to raw hardware, and we really
> don't want to do that in userspace. The only way to do this reliably is
> in the kernel.
But it really isn't, as the firmware kit has proven...
thanks,
greg k-h
On Wed, Nov 14, 2007 at 10:37:08AM -0700, Bjorn Helgaas wrote:
> On Tuesday 13 November 2007 03:59:36 pm Kristen Carlson Accardi wrote:
> > On Tue, 13 Nov 2007 12:26:32 -0800 Greg KH <[email protected]> wrote:
> > > And isn't there some other tool that dumps the raw ACPI tables? I
> > > thought the acpi developers used it all the time when debugging things
> > > with users.
> >
> > There are - people should take a look at the Intel Linux Firmware Kit
> > for an example of how to parse ACPI tables in userspace - the code
> > is also GPL'd, so you are free to use it in another GPL application.
> >
> > http://www.linuxfirmwarekit.org/download.php#source
> >
> > In many DSDTs I've seen, _SUN is hardcoded anyway and can be found
> > by reading the DSDT from userspace. This is what the firmwarekit
> > does to check for duplicate _SUN in one of it's tests.
>
> I see three relevant things in the firmware kit:
>
> 1) ExecuteAmlMethod() in amlpoke/amlpoke.c. This uses
> /proc/acpi/hotkey/event_config to cause the kernel to
> execute an AML method. This looks similar to what dev_acpi
> does and is unsafe for the same reasons (the method may have
> side effects that interfere with kernel drivers). The kernel
> support for this was removed in 2.6.21:
>
> http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=5ee6edbcde4d3b14e4e03d4b331df1099a34aa8d
>
> 2) execute_aml_method() in acpitable.c. Similar to above.
>
> 3) parse_SUN_name() in SUN/SUN.c. This uses acpidump, acpixtract,
> and iasl -d to disassemble the DSDT and SSDTs, then looks for
> things like "Name (_SUN, 0x0000012C)". That works well if _SUN
> merely returns a constant, and many DSDTs do that.
>
> But _SUN can be implemented as a control method, and then we have
> a problem because we can't determine the _SUN value by inspecting
> the DSDT. We have to evaluate the method, and that may require
> operation regions, semaphores, etc., so it can only be done in the
> kernel.
>
> So I agree that the firmware kit has a clever hack that works on much
> existing x86 firmware, and it sounds like Tivoli might even rely on
> it. But I don't feel good about it, and it could easily break when
> some BIOS writer needs to make _SUN slightly more complicated.
Do you know of such BIOSes out there that do this? Will the above
scheme not work for the ia64 boxes that you know of that are out in the
world today?
thanks,
greg k-h
On Wed, Nov 14, 2007 at 09:51:51AM -0800, Greg KH wrote:
> On Wed, Nov 14, 2007 at 05:44:01PM +0000, Matthew Garrett wrote:
> > Dumping raw ACPI tables isn't adequate - _SUN might be a complex ACPI
> > method with multiple reads and writes to raw hardware, and we really
> > don't want to do that in userspace. The only way to do this reliably is
> > in the kernel.
>
> But it really isn't, as the firmware kit has proven...
The firmware toolkit will only work if the _SUN method merely returns a
hardcoded value. The spec doesn't require that that be the case, and
it's easy to imagine situations where it won't.
--
Matthew Garrett | [email protected]
On Wed, Nov 14, 2007 at 07:42:37AM -0700, Alex Chiang wrote:
> Hi Gary,
>
> * Gary Hade <[email protected]>:
> >
> > I am not fundamentally opposed to this new capability but share
> > the same concerns that Greg and others have expressed. So far,
> > I have only tried the changes on one single node system (IBM
> > x3850) but the below NAK-worthy result supports the idea that
> > the changes need to be well and widely tested.
>
> Can you send me the lspci -vv and lspci -vt output from this
> machine?
Alex, I added some slot number hints to the below `lspci -vt`
output.
I am still looking at your other message and will have some
hopefully helpful comments soon. I did apply all 5 patches,
4 was a typo.
Thanks,
Gary
--
Gary Hade
System x Enablement
IBM Linux Technology Center
503-578-4503 IBM T/L: 775-4503
[email protected]
http://www.ibm.com/linux/ltc
Script started on Wed 14 Nov 2007 03:38:14 AM PST
0;root@elm3a9:~[root@elm3a9 ~]# lspci -vt
-+-[0000:19]---00.0-[0000:1a-1d]--+-00.0 Intel Corporation 82571EB Gigabit Ethernet Controller <- slot 6
| \-00.1 Intel Corporation 82571EB Gigabit Ethernet Controller
+-[0000:14]---00.0-[0000:15-18]-- <- slot 5
+-[0000:0f]---00.0-[0000:10-13]-- <- slot 4
+-[0000:0a]---00.0-[0000:0b-0e]----00.0-[0000:0c]----04.0 Adaptec ASC-29320ALP U320 <- slot 3
+-[0000:06]---00.0 IBM Calgary PCI-X Host Bridge <- slot 2
+-[0000:02]-+-00.0 IBM Calgary PCI-X Host Bridge <- slot 1
| \-01.0 S2io Inc. Xframe II 10Gbps Ethernet
+-[0000:01]-+-00.0 IBM Calgary PCI-X Host Bridge
| +-01.0 Broadcom Corporation NetXtreme BCM5704 Gigabit Ethernet
| +-01.1 Broadcom Corporation NetXtreme BCM5704 Gigabit Ethernet
| \-02.0 Adaptec AIC-9410W SAS (Razor ASIC non-RAID)
\-[0000:00]-+-00.0 IBM Calgary PCI-X Host Bridge
+-01.0 ATI Technologies Inc Radeon RV100 QY [Radeon 7000/VE]
+-03.0 NEC Corporation USB
+-03.1 NEC Corporation USB
+-03.2 NEC Corporation USB 2.0
+-0f.0 Broadcom CSB6 South Bridge
+-0f.1 Broadcom CSB6 RAID/IDE Controller
\-0f.3 Broadcom GCLE-2 Host Bridge
0;root@elm3a9:~[root@elm3a9 ~]# lspci -vvv
00:00.0 Host bridge: IBM Calgary PCI-X Host Bridge (rev 04)
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=slow >TAbort- <TAbort- <MAbort+ >SERR- <PERR-
Latency: 240
Capabilities: [60] PCI-X non-bridge device
Command: DPERE- ERO- RBC=512 OST=2
Status: Dev=00:00.0 64bit+ 133MHz+ SCD- USC- DC=bridge DMMRBC=512 DMOST=2 DMCRS=8 RSCEM- 266MHz+ 533MHz-
00:01.0 VGA compatible controller: ATI Technologies Inc Radeon RV100 QY [Radeon 7000/VE] (prog-if 00 [VGA])
Subsystem: IBM IBM eServer xSeries server mainboard
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping+ SERR+ FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 240 (2000ns min), Cache Line Size: 64 bytes
Interrupt: pin A routed to IRQ 5
Region 0: Memory at e8000000 (32-bit, prefetchable) [size=128M]
Region 1: I/O ports at 1800 [size=256]
Region 2: Memory at f1c00000 (32-bit, non-prefetchable) [size=64K]
[virtual] Expansion ROM at f1c20000 [disabled] [size=128K]
Capabilities: [50] Power Management version 2
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
Status: D0 PME-Enable- DSel=0 DScale=0 PME-
00:03.0 USB Controller: NEC Corporation USB (rev 43) (prog-if 10 [OHCI])
Subsystem: NEC Corporation Hama USB 2.0 CardBus
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 240 (250ns min, 10500ns max), Cache Line Size: 64 bytes
Interrupt: pin A routed to IRQ 20
Region 0: Memory at f1c10000 (32-bit, non-prefetchable) [size=4K]
Capabilities: [40] Power Management version 2
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold-)
Status: D0 PME-Enable- DSel=0 DScale=0 PME+
00:03.1 USB Controller: NEC Corporation USB (rev 43) (prog-if 10 [OHCI])
Subsystem: NEC Corporation Hama USB 2.0 CardBus
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 240 (250ns min, 10500ns max), Cache Line Size: 64 bytes
Interrupt: pin B routed to IRQ 20
Region 0: Memory at f1c11000 (32-bit, non-prefetchable) [size=4K]
Capabilities: [40] Power Management version 2
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold-)
Status: D0 PME-Enable- DSel=0 DScale=0 PME-
00:03.2 USB Controller: NEC Corporation USB 2.0 (rev 04) (prog-if 20 [EHCI])
Subsystem: NEC Corporation USB 2.0
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 240 (4000ns min, 8500ns max), Cache Line Size: 128 bytes
Interrupt: pin C routed to IRQ 20
Region 0: Memory at f1c12000 (32-bit, non-prefetchable) [size=256]
Capabilities: [40] Power Management version 2
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold-)
Status: D0 PME-Enable- DSel=0 DScale=0 PME-
00:0f.0 Host bridge: Broadcom CSB6 South Bridge (rev a0)
Subsystem: Broadcom Unknown device 0201
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 240
00:0f.1 IDE interface: Broadcom CSB6 RAID/IDE Controller (rev a0) (prog-if 82 [Master PriP])
Subsystem: Broadcom Unknown device 0212
Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 64
Region 0: I/O ports at 01f0 [size=8]
Region 1: I/O ports at 03f4 [size=1]
Region 2: I/O ports at 0170 [size=8]
Region 3: I/O ports at 0374 [size=1]
Region 4: I/O ports at 0700 [size=16]
00:0f.3 ISA bridge: Broadcom GCLE-2 Host Bridge
Subsystem: Broadcom Unknown device 0230
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 0
01:00.0 Host bridge: IBM Calgary PCI-X Host Bridge (rev 04)
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=slow >TAbort- <TAbort- <MAbort+ >SERR- <PERR-
Latency: 240
Capabilities: [60] PCI-X non-bridge device
Command: DPERE- ERO- RBC=512 OST=2
Status: Dev=01:00.0 64bit+ 133MHz+ SCD- USC- DC=bridge DMMRBC=512 DMOST=2 DMCRS=8 RSCEM- 266MHz+ 533MHz-
01:01.0 Ethernet controller: Broadcom Corporation NetXtreme BCM5704 Gigabit Ethernet (rev 10)
Subsystem: IBM Unknown device 02e7
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 240 (16000ns min), Cache Line Size: 64 bytes
Interrupt: pin A routed to IRQ 24
Region 0: Memory at f1dc0000 (64-bit, non-prefetchable) [size=64K]
Capabilities: [40] PCI-X non-bridge device
Command: DPERE- ERO- RBC=2048 OST=1
Status: Dev=01:01.0 64bit+ 133MHz+ SCD- USC- DC=simple DMMRBC=2048 DMOST=1 DMCRS=16 RSCEM- 266MHz- 533MHz-
Capabilities: [48] Power Management version 2
Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot+,D3cold+)
Status: D0 PME-Enable+ DSel=0 DScale=1 PME-
Capabilities: [50] Vital Product Data
Capabilities: [58] Message Signalled Interrupts: 64bit+ Queue=0/3 Enable-
Address: 00000000fee00000 Data: 4056
01:01.1 Ethernet controller: Broadcom Corporation NetXtreme BCM5704 Gigabit Ethernet (rev 10)
Subsystem: IBM Unknown device 02e7
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 240 (16000ns min), Cache Line Size: 64 bytes
Interrupt: pin B routed to IRQ 28
Region 0: Memory at f1dd0000 (64-bit, non-prefetchable) [size=64K]
Capabilities: [40] PCI-X non-bridge device
Command: DPERE- ERO- RBC=512 OST=1
Status: Dev=01:01.1 64bit+ 133MHz+ SCD- USC- DC=simple DMMRBC=2048 DMOST=1 DMCRS=16 RSCEM- 266MHz- 533MHz-
Capabilities: [48] Power Management version 2
Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot+,D3cold+)
Status: D3 PME-Enable+ DSel=0 DScale=1 PME-
Capabilities: [50] Vital Product Data
Capabilities: [58] Message Signalled Interrupts: 64bit+ Queue=0/3 Enable-
Address: 00000000fee00000 Data: 4056
01:02.0 SCSI storage controller: Adaptec AIC-9410W SAS (Razor ASIC non-RAID) (rev 08)
Subsystem: IBM Unknown device 02e7
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz+ UDF- FastB2B- ParErr- DEVSEL=slow >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 240 (32000ns min, 26750ns max), Cache Line Size: 64 bytes
Interrupt: pin A routed to IRQ 25
Region 0: Memory at f1d80000 (64-bit, non-prefetchable) [size=256K]
Region 2: Memory at f0c00000 (64-bit, prefetchable) [size=128K]
Region 4: I/O ports at 1900 [size=256]
[virtual] Expansion ROM at f1d00000 [disabled] [size=512K]
Capabilities: [40] PCI-X non-bridge device
Command: DPERE- ERO- RBC=4096 OST=8
Status: Dev=01:02.0 64bit+ 133MHz+ SCD- USC- DC=simple DMMRBC=4096 DMOST=8 DMCRS=128 RSCEM- 266MHz- 533MHz-
Capabilities: [58] Power Management version 2
Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
Status: D0 PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [e0] Message Signalled Interrupts: 64bit+ Queue=0/2 Enable-
Address: 00000000fee00000 Data: 4055
02:00.0 Host bridge: IBM Calgary PCI-X Host Bridge (rev 04)
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=slow >TAbort- <TAbort- <MAbort+ >SERR- <PERR-
Latency: 240
Capabilities: [60] PCI-X non-bridge device
Command: DPERE- ERO- RBC=512 OST=2
Status: Dev=02:00.0 64bit+ 133MHz+ SCD- USC- DC=bridge DMMRBC=512 DMOST=2 DMCRS=8 RSCEM- 266MHz+ 533MHz-
02:01.0 Ethernet controller: S2io Inc. Xframe II 10Gbps Ethernet (rev 02)
Subsystem: S2io Inc. Unknown device 6020
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz+ UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 240 (63750ns min, 250ns max), Cache Line Size: 64 bytes
Interrupt: pin A routed to IRQ 18
Region 0: Memory at f0e00000 (64-bit, prefetchable) [size=32K]
Region 2: Memory at f0d00000 (64-bit, prefetchable) [size=1M]
Region 4: Memory at f0e08000 (64-bit, prefetchable) [size=2K]
[virtual] Expansion ROM at f1e00000 [disabled] [size=1M]
Capabilities: [40] Message Signalled Interrupts: 64bit+ Queue=0/5 Enable-
Address: 00000000fee00000 Data: 4056
Capabilities: [60] PCI-X non-bridge device
Command: DPERE+ ERO+ RBC=4096 OST=8
Status: Dev=02:01.0 64bit+ 133MHz+ SCD- USC- DC=simple DMMRBC=4096 DMOST=32 DMCRS=1024 RSCEM- 266MHz+ 533MHz-
Capabilities: [80] Vital Product Data
Capabilities: [90] MSI-X: Enable- Mask- TabSize=64
Vector table: BAR=4 offset=00000000
PBA: BAR=4 offset=00000400
Capabilities: [a0] Power Management version 3
Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold-)
Status: D0 PME-Enable- DSel=0 DScale=1 PME-
06:00.0 Host bridge: IBM Calgary PCI-X Host Bridge (rev 04)
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=slow >TAbort- <TAbort- <MAbort+ >SERR- <PERR-
Latency: 240
Capabilities: [60] PCI-X non-bridge device
Command: DPERE- ERO- RBC=512 OST=2
Status: Dev=06:00.0 64bit+ 133MHz+ SCD- USC- DC=bridge DMMRBC=512 DMOST=2 DMCRS=8 RSCEM- 266MHz+ 533MHz-
0a:00.0 PCI bridge: IBM CalIOC2 PCI-E Root Port (rev 01) (prog-if 01 [Subtractive decode])
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR- FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 0, Cache Line Size: 64 bytes
Bus: primary=0a, secondary=0b, subordinate=0e, sec-latency=0
I/O behind bridge: 0000f000-00000fff
Memory behind bridge: fff00000-000fffff
Prefetchable memory behind bridge: 00000000fff00000-0000000000000000
Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort+ <SERR- <PERR-
BridgeCtl: Parity+ SERR+ NoISA- VGA- MAbort- >Reset- FastB2B-
Capabilities: [40] Power Management version 3
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold+)
Status: D0 PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [80] Express Root Port (Slot+) IRQ 0
Device: Supported: MaxPayload 512 bytes, PhantFunc 0, ExtTag-
Device: Latency L0s unlimited, L1 unlimited
Device: Errors: Correctable+ Non-Fatal+ Fatal+ Unsupported+
Device: RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop+
Device: MaxPayload 256 bytes, MaxReadReq 4096 bytes
Link: Supported Speed 2.5Gb/s, Width x8, ASPM L0s L1, Port 0
Link: Latency L0s <256ns, L1 <2us
Link: ASPM Disabled RCB 128 bytes CommClk- ExtSynch-
Link: Speed 2.5Gb/s, Width x1
Slot: AtnBtn- PwrCtrl+ MRL+ AtnInd+ PwrInd+ HotPlug+ Surpise+
Slot: Number 3, PowerLimit 25.000000
Slot: Enabled AtnBtn- PwrFlt+ MRL+ PresDet- CmdCplt- HPIrq+
Slot: AttnInd Off, PwrInd On, Power-
Root: Correctable+ Non-Fatal+ Fatal+ PME+
Capabilities: [100] Advanced Error Reporting
0b:00.0 PCI bridge: PLX Technology, Inc. PEX 8114 PCI Express-to-PCI/PCI-X Bridge (rev bc) (prog-if 00 [Normal decode])
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr+ Stepping- SERR- FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 0, Cache Line Size: 64 bytes
Region 0: Memory at f2000000 (32-bit, non-prefetchable) [size=8K]
Bus: primary=0b, secondary=0c, subordinate=0c, sec-latency=240
I/O behind bridge: 00003000-00003fff
Memory behind bridge: f1f00000-f1ffffff
Prefetchable memory behind bridge: 00000000fff00000-0000000000000000
Secondary status: 66MHz+ FastB2B+ ParErr- DEVSEL=slow >TAbort- <TAbort- <MAbort- <SERR- <PERR-
BridgeCtl: Parity+ SERR+ NoISA- VGA- MAbort- >Reset- FastB2B-
Capabilities: [40] Power Management version 3
Flags: PMEClk+ DSI- D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold-)
Status: D0 PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [48] Message Signalled Interrupts: 64bit+ Queue=0/0 Enable-
Address: 00000000fee00000 Data: 4052
Capabilities: [58] PCI-X bridge device
Secondary Status: 64bit+ 133MHz+ SCD- USC- SCO- SRD- Freq=133MHz
Status: Dev=0b:00.0 64bit- 133MHz- SCD- USC- SCO+ SRD-
Upstream: Capacity=16 CommitmentLimit=16
Downstream: Capacity=16 CommitmentLimit=16
Capabilities: [68] Express PCI/PCI-X Bridge IRQ 0
Device: Supported: MaxPayload 256 bytes, PhantFunc 0, ExtTag-
Device: Latency L0s <64ns, L1 <1us
Device: AtnBtn+ AtnInd+ PwrInd+
Device: Errors: Correctable+ Non-Fatal+ Fatal+ Unsupported-
Device: RlxdOrd- ExtTag- PhantFunc- AuxPwr- NoSnoop-
Device: MaxPayload 256 bytes, MaxReadReq 2048 bytes
Link: Supported Speed 2.5Gb/s, Width x4, ASPM L0s L1, Port 0
Link: Latency L0s <2us, L1 <32us
Link: ASPM Disabled CommClk- ExtSynch-
Link: Speed 2.5Gb/s, Width x1
Capabilities: [100] Device Serial Number df-0e-00-00-01-00-00-00
Capabilities: [fb4] Advanced Error Reporting
Capabilities: [138] Power Budgeting
Capabilities: [148] Virtual Channel
0c:04.0 SCSI storage controller: Adaptec ASC-29320ALP U320 (rev 10)
Subsystem: Adaptec Unknown device 0045
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz+ UDF- FastB2B- ParErr- DEVSEL=slow >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 240 (10000ns min, 6250ns max), Cache Line Size: 64 bytes
Interrupt: pin A routed to IRQ 52
Region 0: I/O ports at 3000 [disabled] [size=256]
Region 1: Memory at f1f80000 (64-bit, non-prefetchable) [size=8K]
Region 3: I/O ports at 3100 [disabled] [size=256]
[virtual] Expansion ROM at f1f00000 [disabled] [size=512K]
Capabilities: [dc] Power Management version 2
Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
Status: D0 PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [a0] Message Signalled Interrupts: 64bit+ Queue=0/1 Enable-
Address: 00000000fee00000 Data: 4052
Capabilities: [94] PCI-X non-bridge device
Command: DPERE- ERO+ RBC=2048 OST=8
Status: Dev=0c:04.0 64bit+ 133MHz+ SCD- USC- DC=simple DMMRBC=2048 DMOST=8 DMCRS=64 RSCEM- 266MHz- 533MHz-
0f:00.0 PCI bridge: IBM CalIOC2 PCI-E Root Port (rev 01) (prog-if 01 [Subtractive decode])
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR- FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 0, Cache Line Size: 64 bytes
Bus: primary=0f, secondary=10, subordinate=13, sec-latency=0
I/O behind bridge: 0000f000-00000fff
Memory behind bridge: fff00000-000fffff
Prefetchable memory behind bridge: 00000000fff00000-0000000000000000
Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- <SERR- <PERR-
BridgeCtl: Parity+ SERR+ NoISA- VGA- MAbort- >Reset- FastB2B-
Capabilities: [40] Power Management version 3
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold+)
Status: D0 PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [80] Express Root Port (Slot+) IRQ 0
Device: Supported: MaxPayload 512 bytes, PhantFunc 0, ExtTag-
Device: Latency L0s unlimited, L1 unlimited
Device: Errors: Correctable+ Non-Fatal+ Fatal+ Unsupported+
Device: RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop+
Device: MaxPayload 512 bytes, MaxReadReq 4096 bytes
Link: Supported Speed 2.5Gb/s, Width x8, ASPM L0s L1, Port 1
Link: Latency L0s <256ns, L1 <2us
Link: ASPM Disabled RCB 128 bytes CommClk- ExtSynch-
Link: Speed 2.5Gb/s, Width x16
Slot: AtnBtn- PwrCtrl+ MRL+ AtnInd+ PwrInd+ HotPlug+ Surpise+
Slot: Number 4, PowerLimit 25.000000
Slot: Enabled AtnBtn- PwrFlt+ MRL+ PresDet- CmdCplt- HPIrq+
Slot: AttnInd Off, PwrInd Off, Power+
Root: Correctable+ Non-Fatal+ Fatal+ PME+
Capabilities: [100] Advanced Error Reporting
14:00.0 PCI bridge: IBM CalIOC2 PCI-E Root Port (rev 01) (prog-if 01 [Subtractive decode])
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR- FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 0, Cache Line Size: 64 bytes
Bus: primary=14, secondary=15, subordinate=18, sec-latency=0
I/O behind bridge: 0000f000-00000fff
Memory behind bridge: fff00000-000fffff
Prefetchable memory behind bridge: 00000000fff00000-0000000000000000
Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- <SERR- <PERR-
BridgeCtl: Parity+ SERR+ NoISA- VGA- MAbort- >Reset- FastB2B-
Capabilities: [40] Power Management version 3
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold+)
Status: D0 PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [80] Express Root Port (Slot+) IRQ 0
Device: Supported: MaxPayload 512 bytes, PhantFunc 0, ExtTag-
Device: Latency L0s unlimited, L1 unlimited
Device: Errors: Correctable+ Non-Fatal+ Fatal+ Unsupported+
Device: RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop+
Device: MaxPayload 512 bytes, MaxReadReq 4096 bytes
Link: Supported Speed 2.5Gb/s, Width x8, ASPM L0s L1, Port 2
Link: Latency L0s <256ns, L1 <2us
Link: ASPM Disabled RCB 128 bytes CommClk- ExtSynch-
Link: Speed 2.5Gb/s, Width x16
Slot: AtnBtn- PwrCtrl+ MRL+ AtnInd+ PwrInd+ HotPlug+ Surpise+
Slot: Number 5, PowerLimit 25.000000
Slot: Enabled AtnBtn- PwrFlt+ MRL+ PresDet- CmdCplt- HPIrq+
Slot: AttnInd Off, PwrInd Off, Power+
Root: Correctable+ Non-Fatal+ Fatal+ PME+
Capabilities: [100] Advanced Error Reporting
19:00.0 PCI bridge: IBM CalIOC2 PCI-E Root Port (rev 01) (prog-if 01 [Subtractive decode])
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR- FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 0, Cache Line Size: 64 bytes
Bus: primary=19, secondary=1a, subordinate=1d, sec-latency=0
I/O behind bridge: 0000f000-00000fff
Memory behind bridge: fff00000-000fffff
Prefetchable memory behind bridge: 00000000fff00000-0000000000000000
Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort+ <SERR- <PERR-
BridgeCtl: Parity+ SERR+ NoISA- VGA- MAbort- >Reset- FastB2B-
Capabilities: [40] Power Management version 3
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold+)
Status: D0 PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [80] Express Root Port (Slot+) IRQ 0
Device: Supported: MaxPayload 512 bytes, PhantFunc 0, ExtTag-
Device: Latency L0s unlimited, L1 unlimited
Device: Errors: Correctable+ Non-Fatal+ Fatal+ Unsupported+
Device: RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop+
Device: MaxPayload 128 bytes, MaxReadReq 4096 bytes
Link: Supported Speed 2.5Gb/s, Width x8, ASPM L0s L1, Port 3
Link: Latency L0s <256ns, L1 <2us
Link: ASPM Disabled RCB 128 bytes CommClk- ExtSynch-
Link: Speed 2.5Gb/s, Width x4
Slot: AtnBtn- PwrCtrl+ MRL+ AtnInd+ PwrInd+ HotPlug+ Surpise+
Slot: Number 6, PowerLimit 25.000000
Slot: Enabled AtnBtn- PwrFlt+ MRL+ PresDet- CmdCplt- HPIrq+
Slot: AttnInd Off, PwrInd On, Power-
Root: Correctable+ Non-Fatal+ Fatal+ PME+
Capabilities: [100] Advanced Error Reporting
1a:00.0 Ethernet controller: Intel Corporation 82571EB Gigabit Ethernet Controller (rev 06)
Subsystem: Intel Corporation PRO/1000 PT Dual Port Server Adapter
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 0, Cache Line Size: 64 bytes
Interrupt: pin A routed to IRQ 55
Region 0: Memory at f2100000 (32-bit, non-prefetchable) [size=128K]
Region 1: Memory at f2120000 (32-bit, non-prefetchable) [size=128K]
Region 2: I/O ports at 1a00 [size=32]
[virtual] Expansion ROM at f2140000 [disabled] [size=128K]
Capabilities: [c8] Power Management version 2
Flags: PMEClk- DSI+ D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
Status: D0 PME-Enable- DSel=0 DScale=1 PME-
Capabilities: [d0] Message Signalled Interrupts: 64bit+ Queue=0/0 Enable-
Address: 00000000fee00000 Data: 407b
Capabilities: [e0] Express Endpoint IRQ 0
Device: Supported: MaxPayload 256 bytes, PhantFunc 0, ExtTag-
Device: Latency L0s <512ns, L1 <64us
Device: AtnBtn- AtnInd- PwrInd-
Device: Errors: Correctable- Non-Fatal- Fatal- Unsupported-
Device: RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop+
Device: MaxPayload 128 bytes, MaxReadReq 512 bytes
Link: Supported Speed 2.5Gb/s, Width x4, ASPM L0s, Port 0
Link: Latency L0s <4us, L1 <64us
Link: ASPM Disabled RCB 64 bytes CommClk- ExtSynch-
Link: Speed 2.5Gb/s, Width x4
Capabilities: [100] Advanced Error Reporting
Capabilities: [140] Device Serial Number ce-c4-0b-ff-ff-17-15-00
1a:00.1 Ethernet controller: Intel Corporation 82571EB Gigabit Ethernet Controller (rev 06)
Subsystem: Intel Corporation PRO/1000 PT Dual Port Server Adapter
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR-
Latency: 0, Cache Line Size: 64 bytes
Interrupt: pin B routed to IRQ 59
Region 0: Memory at f2160000 (32-bit, non-prefetchable) [size=128K]
Region 1: Memory at f2180000 (32-bit, non-prefetchable) [size=128K]
Region 2: I/O ports at 1a20 [size=32]
[virtual] Expansion ROM at f21a0000 [disabled] [size=128K]
Capabilities: [c8] Power Management version 2
Flags: PMEClk- DSI+ D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
Status: D0 PME-Enable- DSel=0 DScale=1 PME-
Capabilities: [d0] Message Signalled Interrupts: 64bit+ Queue=0/0 Enable-
Address: 00000000fee00000 Data: 4083
Capabilities: [e0] Express Endpoint IRQ 0
Device: Supported: MaxPayload 256 bytes, PhantFunc 0, ExtTag-
Device: Latency L0s <512ns, L1 <64us
Device: AtnBtn- AtnInd- PwrInd-
Device: Errors: Correctable- Non-Fatal- Fatal- Unsupported-
Device: RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop+
Device: MaxPayload 128 bytes, MaxReadReq 512 bytes
Link: Supported Speed 2.5Gb/s, Width x4, ASPM L0s, Port 0
Link: Latency L0s <4us, L1 <64us
Link: ASPM Disabled RCB 64 bytes CommClk- ExtSynch-
Link: Speed 2.5Gb/s, Width x4
Capabilities: [100] Advanced Error Reporting
Capabilities: [140] Device Serial Number ce-c4-0b-ff-ff-17-15-00
0;root@elm3a9:~[root@elm3a9 ~]# exit
exit
Script done on Wed 14 Nov 2007 03:38:23 AM PST
Hi Gary,
* Gary Hade <[email protected]>:
> On Wed, Nov 14, 2007 at 07:42:37AM -0700, Alex Chiang wrote:
> > Hi Gary,
> >
> > * Gary Hade <[email protected]>:
> > >
> > > I am not fundamentally opposed to this new capability but share
> > > the same concerns that Greg and others have expressed. So far,
> > > I have only tried the changes on one single node system (IBM
> > > x3850) but the below NAK-worthy result supports the idea that
> > > the changes need to be well and widely tested.
> >
> > Can you send me the lspci -vv and lspci -vt output from this
> > machine?
>
> Alex, I added some slot number hints to the below `lspci -vt`
> output.
Thanks.
> I am still looking at your other message and will have some
> hopefully helpful comments soon. I did apply all 5 patches,
> 4 was a typo.
Actually, I just reworked my patch this morning, and believe that
I have a much cleaner implementation now that should fix a lot of
the errors you saw.
Incoming!
Thanks.
/ac
On Wed, 14 Nov 2007 18:55:21 +0900
Kenji Kaneshige <[email protected]> wrote:
> Matthew Wilcox ????????:
> > On Tue, Nov 13, 2007 at 03:33:14PM -0800, Kristen Carlson Accardi wrote:
> >> As far as being able to retrieve the slot number (which it seemed from
> >> the HP manageablity application perspective is the goal here), that
> >> information is available from userspace as well for at least standard PCI
> >> and pcie based systems for occupied slots. For standard pci, you have
> >> to make something up anyway - for shpchp we just use an incremental
> >> number and combine it with the bus number to represent the slot. For
> >> pcie, you can get this info from the slot capabilities register.
> >
> > Ummm ... that's not what the /spec/ says. I've never worked on any shpc
> > machines, but the shpc driver reads the slot values from the SLOT_CONFIG
> > register, just like the spec says to.
> >
>
> The slot number for shpc slot is like 'YYYY_XXXX'.
>
> YYYY is the bus number, though I don't know the specific reason
> why it was added.
>
> XXXX is slot number decided according to the shpc specification,
> as you said.
>
> Thanks,
> Kenji Kaneshige
>
Kenji is correct, you should listen to him, not me :).
For standard PCI slots that are not hotplug, I would think you'd need
firware support to find the slot numbers -- but I'm not sure on this.
btw - the YYYY was added to prevent duplicate slot numbers on buggy
machines btw.
Hi Greg,
* Greg KH <[email protected]>:
> On Wed, Nov 14, 2007 at 10:37:08AM -0700, Bjorn Helgaas wrote:
> >
> > So I agree that the firmware kit has a clever hack that works
> > on much existing x86 firmware, and it sounds like Tivoli
> > might even rely on it. But I don't feel good about it, and
> > it could easily break when some BIOS writer needs to make
> > _SUN slightly more complicated.
>
> Do you know of such BIOSes out there that do this? Will the
> above scheme not work for the ia64 boxes that you know of that
> are out in the world today?
Matthew and Bjorn have already been hinting at this, but I'd like
to voice my concern as well.
Even if no one is implementing _SUN as a complicated AML control
method today, that is no guarantee that prevents future firmware
vendors from doing so. The spec allows it, so in my opinion,
pointing to existing implementations and claiming that it all
just works is somewhat unfair (and scary).
If/when the first vendor implements an AML version of _SUN, the
firmware kit will break, and Tivoli will have to react.
I was initially intrigued with you and Kristen pointed out this
tool, but after thinking about it some more, I am not convinced
it is a viable alternative, due to the unknown nature of future
firmware implementations.
Thanks.
/ac
Hi Greg,
* Greg KH <[email protected]>:
> On Wed, Nov 14, 2007 at 10:37:08AM -0700, Bjorn Helgaas wrote:
> >
> > So I agree that the firmware kit has a clever hack that works
> > on much existing x86 firmware, and it sounds like Tivoli
> > might even rely on it. But I don't feel good about it, and
> > it could easily break when some BIOS writer needs to make
> > _SUN slightly more complicated.
>
> Do you know of such BIOSes out there that do this? Will the
> above scheme not work for the ia64 boxes that you know of that
> are out in the world today?
Talked to some of our ia64 firmware folks, and they confirmed
that our future platforms will implement _SUN as a control
method.
Thanks.
/ac
* Greg KH <[email protected]>:
> On Wed, Nov 14, 2007 at 10:37:08AM -0700, Bjorn Helgaas wrote:
> >
> > So I agree that the firmware kit has a clever hack that works
> > on much existing x86 firmware, and it sounds like Tivoli
> > might even rely on it. But I don't feel good about it, and
> > it could easily break when some BIOS writer needs to make
> > _SUN slightly more complicated.
>
> Do you know of such BIOSes out there that do this? Will the
> above scheme not work for the ia64 boxes that you know of that
> are out in the world today?
One last mail on this subject -- Bjorn has pointed out to me that
the Dell pe6800 and rez1850 both implement _SUN as control
methods today.
Does Tivoli run on those machines? If so, how is it getting slot
information?
Thanks.
/ac
On Wed, Nov 14, 2007 at 02:42:21PM -0700, Alex Chiang wrote:
> * Greg KH <[email protected]>:
> > On Wed, Nov 14, 2007 at 10:37:08AM -0700, Bjorn Helgaas wrote:
> > >
> > > So I agree that the firmware kit has a clever hack that works
> > > on much existing x86 firmware, and it sounds like Tivoli
> > > might even rely on it. But I don't feel good about it, and
> > > it could easily break when some BIOS writer needs to make
> > > _SUN slightly more complicated.
> >
> > Do you know of such BIOSes out there that do this? Will the
> > above scheme not work for the ia64 boxes that you know of that
> > are out in the world today?
>
> One last mail on this subject -- Bjorn has pointed out to me that
> the Dell pe6800 and rez1850 both implement _SUN as control
> methods today.
Does the firmware kit break on them?
> Does Tivoli run on those machines? If so, how is it getting slot
> information?
I have no idea, I don't work for them :)
good luck,
greg k-h
On Tue, Nov 13, 2007 at 06:37:32PM -0700, Alex Chiang wrote:
> Hi Gary,
>
> * Gary Hade <[email protected]>:
> > On Tue, Nov 13, 2007 at 01:11:02PM -0700, Matthew Wilcox wrote:
> > > On Tue, Nov 13, 2007 at 10:51:22AM -0800, Greg KH wrote:
> > > > Ok, again, I want to see the IBM people sign off on this, after testing
> > > > on all of their machines, before I'll consider this, as I know the IBM
> > > > acpi tables are "odd".
> > >
> > > That seems a little higher standard than patches are normally held to.
> > > How about the patches get sent to the appropriate people at IBM (who are
> > > they?)
> >
> > I be one of them. :) I have been involved in many (but not all)
> > of IBM's x86 based (IBM System x) servers with hotplug capable
> > PCI slots. I have mostly worked on 'acpiphp' associated issues.
>
> Thanks for testing the series. It's much appreciated.
>
> > Have you possibly considered a kernel option as a kinder and
> > gentler way of introducing the changes?
>
> That is a good idea. I will work on that.
Thanks. This will allow everyone to focus on the systems where
the changes are most beneficial and not waste a bunch of time
trying to test everywhere.
>
> > ====
> > IBM x3850
> > Slots 1-2: PCI-X under PCI root bridges
> > Slots 3-6: PCIe under transparent P2P bridges
> > Slot 1: PCI-X - populated
> > Slot 2: PCI-X - !populated
> > Slot 3: PCIe - populated
> > Slot 4: PCIe - !populated
> > Slot 5: PCIe - !populated
> > Slot 6: PCIe - populated
> >
> > result is with 2.6.24-rc2 plus all 4 proposed patches
>
> Silly question, but I have to ask. :)
Hey, this isn't a silly question. :)
>
> I sent out 5 patches -- is this simply a typo on your part, or
> did you only apply 4/5 patches?
Yes, it is just a typo. I did apply all 5 patches.
>
> > problem: acpiphp failed to register empty PCIe slots 4 and 5
>
> Ok, so acpiphp wasn't going to register those slots anyway, since
> they are empty.
No, acpiphp should (and did before your changes) register all
hotplug capable slots. All 6 slots (2 PCI-X, 4 PCIe) in that
system are hotplug capable. Emptyness shouldn't matter. If
the empty slots are not registered it is not be possible to
successfully hotplug cards to them.
Without your changes acpiphp loads with the following output.
acpiphp: ACPI Hot Plug PCI Controller Driver version: 0.5
acpiphp: Slot [1] registered
acpiphp: Slot [2] registered
acpiphp: Slot [3] registered
acpiphp: Slot [4] registered
acpiphp: Slot [5] registered
acpiphp: Slot [6] registered
With your changes I confirmed that an attempted hotplug
to a boot-time vacant PCIe slot failed as expected. The
driver saw the insertion event but didn't find anything
to enable:
acpiphp_glue: handle_hotplug_event_bridge: Bus check notify on \_SB_.VP05.CALG
acpiphp_glue: handle_hotplug_event_bridge: re-enumerating slots under \_SB_.VP05.CALG
acpiphp_glue: acpiphp_check_bridge: 0 enabled, 0 disabled
> It would have bailed out after not seeing _ADR or
> _EJ0 on those slots.
Well, both _ADR and _EJ0 exist for each of the 4 PCIe slots.
>
> The acpi-pci-slot driver created those slots anyway, which is one
> of the points of the patch -- to create sysfs entries even for
> empty slots.
>
> > acpiphp_glue: found PCI-to-PCI bridge at PCI 0000:0f:00.0
>
> This is the real address of slot 4.
No, the P2P parent bus is 0000:0f and the P2P child bus is
0000:10 so I believe the real address for slot 4 should be
0000:10:00.
kernel without your changes after loading acpiphp:
# cat /sys/bus/pci/slots/4/address
0000:10:00
kernel with your changes both before and after loading acpiphp:
# cat /sys/bus/pci/slots/4/address
0000:0f:00
>
> > acpiphp_glue: found ACPI PCI Hotplug slot 4 at PCI 0000:10:00
> > acpiphp: pci_hp_register failed with error -17
> > acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
> [repeated 7x]
>
> We saw this message 8x, once for each SxFy object under your p2p
> bridge. I actually somewhat did expect to see this error message
> (hence the RFC part of my patch ;)
>
> I currently don't have a good way to determine if we've already
> seen an empty slot under a p2p bridge, so we try to register
> every SxFy object. Of course, a /sys/bus/pci/slots/4/ entry
> already exists, so that's why we're getting -17 (-EEXIST).
Of course, this kind of confusing noise would not be acceptable
in the final version of your changes.
>
> > acpiphp_glue: found PCI-to-PCI bridge at PCI 0000:14:00.0
> > acpiphp_glue: found ACPI PCI Hotplug slot 5 at PCI 0000:15:00
> > acpiphp: pci_hp_register failed with error -17
> > acpiphp_glue: acpiphp_register_hotplug_slot failed(err code = 0xffffffef)
>
> Same explanation as above.
>
> > # find /sys/bus/pci/slots
> > /sys/bus/pci/slots
>
> [snip]
>
> > /sys/bus/pci/slots/4
> > /sys/bus/pci/slots/4/address
> > /sys/bus/pci/slots/5
> > /sys/bus/pci/slots/5/address
>
> Arguably, the right thing happened here. We got entries for empty
> slots, and we know their addresses.
No, the wrong thing happened here. I expect the slot directories
for the empty slots to look the same as they did before your changes.
This is what the slot directories for empty slots look like without
your changes.
# find /sys/bus/pci/slots/[45]
/sys/bus/pci/slots/4
/sys/bus/pci/slots/4/power
/sys/bus/pci/slots/4/attention
/sys/bus/pci/slots/4/latch
/sys/bus/pci/slots/4/adapter
/sys/bus/pci/slots/4/address
/sys/bus/pci/slots/5
/sys/bus/pci/slots/5/power
/sys/bus/pci/slots/5/attention
/sys/bus/pci/slots/5/latch
/sys/bus/pci/slots/5/adapter
/sys/bus/pci/slots/5/address
Note that with your changes the slot directories for the PCI-X
slots (slot 1 populated, slot 2 empty) look fine.
# find /sys/bus/pci/slots/[12]
/sys/bus/pci/slots/1
/sys/bus/pci/slots/1/address
/sys/bus/pci/slots/1/power
/sys/bus/pci/slots/1/attention
/sys/bus/pci/slots/1/latch
/sys/bus/pci/slots/1/adapter
/sys/bus/pci/slots/2
/sys/bus/pci/slots/2/address
/sys/bus/pci/slots/2/power
/sys/bus/pci/slots/2/attention
/sys/bus/pci/slots/2/latch
/sys/bus/pci/slots/2/adapter
>
> If anyone can clue me in on a better way to implement patch 4/5
> in my series so that we're not seeing those multiple attempts to
> register slots under p2p bridges, I'd love to hear your ideas.
At first I thought you were talking about the acpiphp
register failure messages that I reported here. Since the
new functions added with patch 4/5 are not visited when acpiphp
loads you must be talking about the ACPI complaints during boot
(see below) which are mentioned in the comment you included
in your patch. I don't have any ideas right now.
Thanks,
Gary
--
Gary Hade
System x Enablement
IBM Linux Technology Center
503-578-4503 IBM T/L: 775-4503
[email protected]
http://www.ibm.com/linux/ltc
pci_hotplug: PCI Hot Plug PCI Core version: 0.5
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device S1F1 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device S1F2 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device S1F3 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device S1F4 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device S1F5 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device S1F6 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device S1F7 [20070126]
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E3F1 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E3F2 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E3F3 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E3F4 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E3F5 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E3F6 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E3F7 [20070126]
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E6F2 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E6F3 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E6F4 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E6F5 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E6F6 [20070126]
ACPI Exception (pci_bind-0086): AE_NOT_FOUND, Invalid ACPI-PCI context for device E6F7 [20070126]
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
ACPI: Invalid ACPI Bus context for device <NULL>
Hi Gary,
[I'm gonna take the points in your mail a little out of order]
> No, acpiphp should (and did before your changes) register all
> hotplug capable slots. All 6 slots (2 PCI-X, 4 PCIe) in that
> system are hotplug capable. Emptyness shouldn't matter. If
> the empty slots are not registered it is not be possible to
> successfully hotplug cards to them.
Of course, you are right. I am not sure what I was thinking when
I wrote that email yesterday, but I pretty much got it exactly
wrong. I think I confused myself between empty/populated vs
hp/non-hp. In any case, thanks for the patient explanation.
I went back and looked at a vanilla kernel on my machine that has
the following hardware configuration:
HP rx6600
Slot 1-2: PCI-X under PCI root bridges
Slot 3-6: PCIe under transparent P2P bridges
Slot 7-10: PCI-X under PCI root bridges
Slot 1: PCI-X, !populated, !hp
Slot 2: PCI-X, populated, !hp
Slot 3: PCIe, populated, hp
Slot 4: PCIe, populated, hp
Slot 5: PCIe, !populated, hp
Slot 6: PCIe, populated, hp
Slot 7: PCI-X, !populated, hp
Slot 8: PCI-X, !populated, hp
Slot 9: PCI-X, !populated, hp
Slot 10: PCI-X, !populated, hp
On a kernel without my changes, we see:
[root@canola slots]# modprobe acpiphp
[root@canola slots]# ls
10 3 4 5 6 7 8 9
[root@canola slots]# for i in * ; do ls $i ; done
adapter address attention latch power
adapter address attention latch power
adapter address attention latch power
adapter address attention latch power
adapter address attention latch power
adapter address attention latch power
adapter address attention latch power
adapter address attention latch power
[root@canola slots]# for i in * ; do cat $i/address ; done
0000:01:02
0000:8b:00
0000:52:00
0000:c5:00
0000:10:00
0000:0a:01
0000:4a:01
0000:01:01
This confirms that you were right -- we see all the hp slots show
up, even though many of them are non-populated. Also, the correct
hp entries show up, just like you'd expect.
Ok, so all that said, after re-implementing my ACPI-PCI slot
driver, we get all the correct answers, but with the additional
appearance of slots 1 and 2 (which aren't hotpluggable):
[root@canola slots]# ls
1 10 2 3 4 5 6 7 8 9
[root@canola slots]# for i in * ; do ls $i ; done
address
adapter address attention latch power
address
adapter address attention latch power
adapter address attention latch power
adapter address attention latch power
adapter address attention latch power
adapter address attention latch power
adapter address attention latch power
adapter address attention latch power
[root@canola slots]# for i in * ; do cat $i/address ; done
0000:49:01
0000:01:02
0000:49:02
0000:8b:00
0000:52:00
0000:c5:00
0000:10:00
0000:0a:01
0000:4a:01
0000:01:01
Bonus points because all that noise during boot went away too.
Life is much better when you actually copy an algorithm correctly
rather than coming up with some half-assed solution on one's own.
;)
So if you could try out v2 of my patch series when you get access
to your machine back, I'd much appreciate it. I feel a lot better
about it compared to v1.
> > > Have you possibly considered a kernel option as a kinder
> > > and gentler way of introducing the changes?
> >
> > That is a good idea. I will work on that.
>
> Thanks. This will allow everyone to focus on the systems where
> the changes are most beneficial and not waste a bunch of time
> trying to test everywhere.
I will work on this for v3.
Thanks!
/ac
Here is some dmesg and lspci -vt output in case you want to
double-check my work:
ACPI: PCI Root Bridge [L000] (0000:00)
ACPI: PCI Root Bridge [L001] (0000:01)
ACPI: PCI Root Bridge [L002] (0000:0a)
ACPI: PCI Root Bridge [L003] (0000:0f)
ACPI: PCI Root Bridge [L004] (0000:49)
ACPI: PCI Root Bridge [L005] (0000:4a)
ACPI: PCI Root Bridge [L006] (0000:4f)
ACPI: PCI Root Bridge [L007] (0000:c4)
[root@canola slots]# lspci -vt
-+-[0000:c4]---00.0-[0000:c5-fe]--
+-[0000:4f]---00.0-[0000:50-c3]----00.0-[0000:51-c3]--+-00.0-[0000:52-8a]----00.0 Hewlett-Packard Company Smart Array Controller
| \-01.0-[0000:8b-c3]----00.0 Hewlett-Packard Company Smart Array Controller
+-[0000:49]-+-02.0 Intel Corporation 82546GB Gigabit Ethernet Controller
| \-02.1 Intel Corporation 82546GB Gigabit Ethernet Controller
+-[0000:0f]---00.0-[0000:10-48]----00.0 Hewlett-Packard Company Smart Array Controller
\-[0000:00]-+-01.0 Hewlett-Packard Company RMP-3 (Remote Management Processor)
+-01.1 Hewlett-Packard Company RMP-3 Shared Memory Driver
+-01.2 Hewlett-Packard Company Diva Serial [GSP] Multiport UART
+-02.0 NEC Corporation USB
+-02.1 NEC Corporation USB
+-02.2 NEC Corporation USB 2.0
\-04.0 ATI Technologies Inc Radeon RV100 QY [Radeon 7000/VE]
Hi Greg, Matt,
* Greg KH <[email protected]>:
> On Wed, Nov 14, 2007 at 02:42:21PM -0700, Alex Chiang wrote:
> > * Greg KH <[email protected]>:
> > > On Wed, Nov 14, 2007 at 10:37:08AM -0700, Bjorn Helgaas wrote:
> > > >
> > > > So I agree that the firmware kit has a clever hack that works
> > > > on much existing x86 firmware, and it sounds like Tivoli
> > > > might even rely on it. But I don't feel good about it, and
> > > > it could easily break when some BIOS writer needs to make
> > > > _SUN slightly more complicated.
> > >
> > > Do you know of such BIOSes out there that do this? Will the
> > > above scheme not work for the ia64 boxes that you know of that
> > > are out in the world today?
> >
> > One last mail on this subject -- Bjorn has pointed out to me that
> > the Dell pe6800 and rez1850 both implement _SUN as control
> > methods today.
>
> Does the firmware kit break on them?
I downloaded the firmware kit today and played with it. There is
a test called SUN.exe, which searches through the DSDT, looking
to verify that there are no duplicate _SUN values in the system.
The test breaks on my hp rx6600 (a currently shipping platform),
because _SUN is not a hard-coded value, but implemented as an AML
control method. From the various SSDT dumps:
Here's the _SUN for a slot:
Method (_SUN, 0, Serialized)
{
Store (\SLOT.SUN, ^_UID)
Local0
Return (Local0)
}
Here's the _SUN for a processor:
Method (_SUN, 0, NotSerialized)
{
Store (\CPUL.SUN, ^CPID)
Local0
Return (Local0)
}
So parse_SUN_name() from SUN.c is just plain broken. The test
"passes" because it doesn't find any duplicate values for _SUN,
but what's really going on is that it doesn't find *any* values
of _SUN, so of course there won't be duplicates. ;)
I looked at convincing this test to try and execute the method
using ExecuteAmlMethod() and/or execute_aml_method(), but of
course, that won't work on a modern kernel, as they depend on
/proc/acpi/hotkey/, which was removed (as Bjorn pointed out).
Matt, is there any chance you could see if the firmware kit works
or breaks on your PE6800?
I downloaded the latest tarball:
http://www.linuxfirmwarekit.org/download/firmwarekit-r3.tar.gz
And ran it in stand-alone mode. You do need to build the test
suite, but you don't need to run the entire thing -- just run the
SUN/SUN.exe that is generated. (If you get odd complaints about
not being able to find libstandalone.so, just stick it in
/usr/local/lib, modify /etc/ld.so.conf as necessary and run
ldconfig).
Please apply the following debug patch to see how many _SUN
objects the test is actually finding. On my machine, I don't find
any _SUN objects.
Thanks.
/ac
diff --git a/SUN/SUN.c b/SUN/SUN.c
index 90264a7..3b21b9c 100644
--- a/SUN/SUN.c
+++ b/SUN/SUN.c
@@ -260,6 +260,7 @@ static void parse_SUN_name(gpointer data, gpointer user_data)
int main(int argc, char **argv)
{
+ int i;
start_test("SUN", "SUN duplicate test",
"This makes sure that each SUN (Slot Unique Number) that is "
"called in the DSDT through the Name() method is unique, no "
@@ -281,6 +282,11 @@ int main(int argc, char **argv)
/* List SUN duplicates (if any) */
do_SUN_check();
+
+ while(SUN_list[i].line_num) {
+ printf("## found SUN: %s\n", SUN_list[i].SUN_hexnum);
+ i++;
+ }
finish_test("SUN");
return EXIT_SUCCESS;
On Thu, Nov 15, 2007 at 10:36:13AM -0700, Alex Chiang wrote:
< snip >
> Ok, so all that said, after re-implementing my ACPI-PCI slot
> driver, we get all the correct answers, but with the additional
> appearance of slots 1 and 2 (which aren't hotpluggable):
Yea, looks much better. Nice to see that the addresses
for slots below the p2p bridges now include the child
instead of the parent bus number. :)
< snip >
> So if you could try out v2 of my patch series when you get access
> to your machine back, I'd much appreciate it.
Will do. It still looks like I will not get the box back
until early next week. I will only be working Monday and
Tuesday due to holiday/vacation but that will hopefully not
be a problem.
> I feel a lot better about it compared to v1.
I haven't tried it yet but I feel better seeing that you
are now testing on a system having a PCI topology that
appears similar to some of our systems.
< snip >
> Here is some dmesg and lspci -vt output in case you want to
> double-check my work:
Well, I didn't see any obvious warts but I'm pretty skilled
at missing problems when looking at that kind of info.
Thanks,
Gary
--
Gary Hade
System x Enablement
IBM Linux Technology Center
503-578-4503 IBM T/L: 775-4503
[email protected]
http://www.ibm.com/linux/ltc