move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to call
assign resources several times, when there are several bridges under the slot bridge.
need to introduce pci_bridge_assign_resources there.
handle the case the first bridge that doesn't get pre-allocated big enough res from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...
pci_is_enabled must be removed in pci_setup_bridge(), otherwise update res is not
updated to bridge BAR. and we are safe to do that, because only have one bus_size
and assigned resources are called.
Signed-off-by: Yinghai Lu <[email protected]>
---
drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++----
drivers/pci/setup-bus.c | 70 +++++++++++++++++++++++++++++++++++++--
include/linux/pci.h | 1
3 files changed, 90 insertions(+), 10 deletions(-)
Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
@@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
busnr = pci_scan_bridge(parent, dev, busnr, pass);
if (!dev->subordinate)
return -1;
- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);
+
return 0;
}
int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
+ struct pci_dev *bridge = p_slot->ctrl->pcie->port;
+ struct pci_bus *parent = bridge->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;
+ int retval;
dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
if (dev) {
@@ -96,12 +95,28 @@ int pciehp_configure_device(struct slot
(dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
pciehp_add_bridge(dev);
}
- pci_configure_slot(dev);
pci_dev_put(dev);
}
- pci_bus_assign_resources(parent);
+ pci_bus_size_bridges(parent);
+ pci_bridge_assign_resources(bridge);
+ retval = pci_reenable_device(bridge);
+ pci_set_master(bridge);
+ pci_enable_bridges(parent);
pci_bus_add_devices(parent);
+
+ for (fn = 0; fn < 8; fn++) {
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
+ if (!dev)
+ continue;
+ if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ pci_dev_put(dev);
+ continue;
+ }
+ pci_configure_slot(dev);
+ pci_dev_put(dev);
+ }
+
return 0;
}
Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -27,6 +27,44 @@
#include <linux/slab.h>
#include "pci.h"
+static void pdev_assign_resources_sorted(struct pci_dev *dev)
+{
+ struct resource *res;
+ struct resource_list head, *list, *tmp;
+ int idx;
+ u16 class = dev->class >> 8;
+
+ head.next = NULL;
+
+ /* Don't touch classless devices or host bridges or ioapics. */
+ if (class == PCI_CLASS_NOT_DEFINED ||
+ class == PCI_CLASS_BRIDGE_HOST)
+ return;
+
+ /* Don't touch ioapic devices already enabled by firmware */
+ if (class == PCI_CLASS_SYSTEM_PIC) {
+ u16 command;
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
+ return;
+ }
+
+ pdev_sort_resources(dev, &head);
+
+ for (list = head.next; list;) {
+ res = list->res;
+ idx = res - &list->dev->resource[0];
+ if (pci_assign_resource(list->dev, idx)) {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+}
+
static void pbus_assign_resources_sorted(const struct pci_bus *bus)
{
struct pci_dev *dev;
@@ -144,9 +182,6 @@ static void pci_setup_bridge(struct pci_
u32 l, bu, lu, io_upper16;
int pref_mem64;
- if (pci_is_enabled(bridge))
- return;
-
dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
pci_domain_nr(bus), bus->number);
@@ -550,6 +585,35 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);
+void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
+{
+ struct pci_bus *b;
+
+ pdev_assign_resources_sorted((struct pci_dev *)bridge);
+
+ b = bridge->subordinate;
+ if (!b)
+ return;
+
+ pci_bus_assign_resources(b);
+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:
+ pci_setup_bridge(b);
+ break;
+
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}
+EXPORT_SYMBOL(pci_bridge_assign_resources);
+
void __ref pci_bus_assign_resources(const struct pci_bus *bus)
{
struct pci_bus *b;
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
int pci_vpd_truncate(struct pci_dev *dev, size_t size);
/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+void pci_bridge_assign_resources(const struct pci_dev *bridge);
void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);
* Yinghai Lu <[email protected]>:
> @@ -144,9 +182,6 @@ static void pci_setup_bridge(struct pci_
> u32 l, bu, lu, io_upper16;
> int pref_mem64;
>
> - if (pci_is_enabled(bridge))
> - return;
> -
> dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
> pci_domain_nr(bus), bus->number);
Have you tested this with the hotplug facilities I added a while
back?
Basically, what happens when you offline a bridge using
/sys/bus/pci/devices/.../remove
And then re-add it using:
/sys/bus/pci/rescan
Please try this both for bridges that are claimed by pciehp as
well as bridges that are not claimed.
Thanks.
/ac
move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.
need to introduce pci_bridge_assign_resources there.
handle the case the slot bridge that doesn't get pre-allocated big enough res
from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...
pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.
-v2: address Alex's concern about pci remove/rescan feature about pci_setup_bridge changes.
Signed-off-by: Yinghai Lu <[email protected]>
---
drivers/pci/hotplug/pciehp_pci.c | 30 ++++++++++++----
drivers/pci/setup-bus.c | 73 +++++++++++++++++++++++++++++++++++++--
include/linux/pci.h | 1
3 files changed, 94 insertions(+), 10 deletions(-)
Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
@@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
busnr = pci_scan_bridge(parent, dev, busnr, pass);
if (!dev->subordinate)
return -1;
- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);
+
return 0;
}
int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
+ struct pci_dev *bridge = p_slot->ctrl->pcie->port;
+ struct pci_bus *parent = bridge->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;
+ int retval;
dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
if (dev) {
@@ -96,12 +95,29 @@ int pciehp_configure_device(struct slot
(dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
pciehp_add_bridge(dev);
}
- pci_configure_slot(dev);
pci_dev_put(dev);
}
- pci_bus_assign_resources(parent);
+ pci_bus_size_bridges(parent);
+ pci_clear_master(bridge);
+ pci_bridge_assign_resources(bridge);
+ retval = pci_reenable_device(bridge);
+ pci_set_master(bridge);
+ pci_enable_bridges(parent);
pci_bus_add_devices(parent);
+
+ for (fn = 0; fn < 8; fn++) {
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
+ if (!dev)
+ continue;
+ if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ pci_dev_put(dev);
+ continue;
+ }
+ pci_configure_slot(dev);
+ pci_dev_put(dev);
+ }
+
return 0;
}
Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -27,6 +27,44 @@
#include <linux/slab.h>
#include "pci.h"
+static void pdev_assign_resources_sorted(struct pci_dev *dev)
+{
+ struct resource *res;
+ struct resource_list head, *list, *tmp;
+ int idx;
+ u16 class = dev->class >> 8;
+
+ head.next = NULL;
+
+ /* Don't touch classless devices or host bridges or ioapics. */
+ if (class == PCI_CLASS_NOT_DEFINED ||
+ class == PCI_CLASS_BRIDGE_HOST)
+ return;
+
+ /* Don't touch ioapic devices already enabled by firmware */
+ if (class == PCI_CLASS_SYSTEM_PIC) {
+ u16 command;
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
+ return;
+ }
+
+ pdev_sort_resources(dev, &head);
+
+ for (list = head.next; list;) {
+ res = list->res;
+ idx = res - &list->dev->resource[0];
+ if (pci_assign_resource(list->dev, idx)) {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+}
+
static void pbus_assign_resources_sorted(const struct pci_bus *bus)
{
struct pci_dev *dev;
@@ -137,14 +175,14 @@ EXPORT_SYMBOL(pci_setup_cardbus);
config space writes, so it's quite possible that an I/O window of
the bridge will have some undesirable address (e.g. 0) after the
first write. Ditto 64-bit prefetchable MMIO. */
-static void pci_setup_bridge(struct pci_bus *bus)
+static void pci_setup_bridge(struct pci_bus *bus, bool check_enabled)
{
struct pci_dev *bridge = bus->self;
struct pci_bus_region region;
u32 l, bu, lu, io_upper16;
int pref_mem64;
- if (pci_is_enabled(bridge))
+ if (check_enabled && pci_is_enabled(bridge))
return;
dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
@@ -550,6 +588,35 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);
+void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
+{
+ struct pci_bus *b;
+
+ pdev_assign_resources_sorted((struct pci_dev *)bridge);
+
+ b = bridge->subordinate;
+ if (!b)
+ return;
+
+ pci_bus_assign_resources(b);
+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:
+ pci_setup_bridge(b, false);
+ break;
+
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}
+EXPORT_SYMBOL(pci_bridge_assign_resources);
+
void __ref pci_bus_assign_resources(const struct pci_bus *bus)
{
struct pci_bus *b;
@@ -566,7 +633,7 @@ void __ref pci_bus_assign_resources(cons
switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:
- pci_setup_bridge(b);
+ pci_setup_bridge(b, true);
break;
case PCI_CLASS_BRIDGE_CARDBUS:
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
int pci_vpd_truncate(struct pci_dev *dev, size_t size);
/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+void pci_bridge_assign_resources(const struct pci_dev *bridge);
void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);
Yinghai Lu wrote:
> move out bus_size_bridges and assign resources out of pciehp_add_bridge()
> and at last do them all together one time including slot bridge, to avoid to call
> assign resources several times, when there are several bridges under the slot bridge.
>
> need to introduce pci_bridge_assign_resources there.
>
> handle the case the first bridge that doesn't get pre-allocated big enough res from FW.
> for example pcie devices need 256M, but the bridge only get preallocated 2M...
>
Though I have not looked at the patch deeply yet (sorry), I have
some questions and concerns about this change. Please correct me
if my understanding is not correct.
- Your patch doesn't seems to have the code to free resources.
If we need to expand the resource range, don't we need to free
preallocated resource before allocating the new one?
- Your patch seems to update BARs for bridge itself. I think it
would break the bridge's driver (port service driver) that if
it controls the device's capability by using IO/Mem, though I
don't know if such driver or capabilities exists now.
And one comment about pci_configure_slot(). We need to program
hotplug parameters before the device start working. So we need
to call pci_configure_slot() before calling pci_bus_add_devices().
Thanks,
Kenji Kaneshige
> pci_is_enabled must be removed in pci_setup_bridge(), otherwise update res is not
> updated to bridge BAR. and we are safe to do that, because only have one bus_size
> and assigned resources are called.
>
> Signed-off-by: Yinghai Lu <[email protected]>
>
> ---
> drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++----
> drivers/pci/setup-bus.c | 70 +++++++++++++++++++++++++++++++++++++--
> include/linux/pci.h | 1
> 3 files changed, 90 insertions(+), 10 deletions(-)
>
> Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
> +++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
> @@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
> busnr = pci_scan_bridge(parent, dev, busnr, pass);
> if (!dev->subordinate)
> return -1;
> - pci_bus_size_bridges(dev->subordinate);
> - pci_bus_assign_resources(parent);
> - pci_enable_bridges(parent);
> - pci_bus_add_devices(parent);
> +
> return 0;
> }
>
> int pciehp_configure_device(struct slot *p_slot)
> {
> struct pci_dev *dev;
> - struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
> + struct pci_dev *bridge = p_slot->ctrl->pcie->port;
> + struct pci_bus *parent = bridge->subordinate;
> int num, fn;
> struct controller *ctrl = p_slot->ctrl;
> + int retval;
>
> dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
> if (dev) {
> @@ -96,12 +95,28 @@ int pciehp_configure_device(struct slot
> (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
> pciehp_add_bridge(dev);
> }
> - pci_configure_slot(dev);
> pci_dev_put(dev);
> }
>
> - pci_bus_assign_resources(parent);
> + pci_bus_size_bridges(parent);
> + pci_bridge_assign_resources(bridge);
> + retval = pci_reenable_device(bridge);
> + pci_set_master(bridge);
> + pci_enable_bridges(parent);
> pci_bus_add_devices(parent);
> +
> + for (fn = 0; fn < 8; fn++) {
> + dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
> + if (!dev)
> + continue;
> + if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
> + pci_dev_put(dev);
> + continue;
> + }
> + pci_configure_slot(dev);
> + pci_dev_put(dev);
> + }
> +
> return 0;
> }
>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -27,6 +27,44 @@
> #include <linux/slab.h>
> #include "pci.h"
>
> +static void pdev_assign_resources_sorted(struct pci_dev *dev)
> +{
> + struct resource *res;
> + struct resource_list head, *list, *tmp;
> + int idx;
> + u16 class = dev->class >> 8;
> +
> + head.next = NULL;
> +
> + /* Don't touch classless devices or host bridges or ioapics. */
> + if (class == PCI_CLASS_NOT_DEFINED ||
> + class == PCI_CLASS_BRIDGE_HOST)
> + return;
> +
> + /* Don't touch ioapic devices already enabled by firmware */
> + if (class == PCI_CLASS_SYSTEM_PIC) {
> + u16 command;
> + pci_read_config_word(dev, PCI_COMMAND, &command);
> + if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
> + return;
> + }
> +
> + pdev_sort_resources(dev, &head);
> +
> + for (list = head.next; list;) {
> + res = list->res;
> + idx = res - &list->dev->resource[0];
> + if (pci_assign_resource(list->dev, idx)) {
> + res->start = 0;
> + res->end = 0;
> + res->flags = 0;
> + }
> + tmp = list;
> + list = list->next;
> + kfree(tmp);
> + }
> +}
> +
> static void pbus_assign_resources_sorted(const struct pci_bus *bus)
> {
> struct pci_dev *dev;
> @@ -144,9 +182,6 @@ static void pci_setup_bridge(struct pci_
> u32 l, bu, lu, io_upper16;
> int pref_mem64;
>
> - if (pci_is_enabled(bridge))
> - return;
> -
> dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
> pci_domain_nr(bus), bus->number);
>
> @@ -550,6 +585,35 @@ void __ref pci_bus_size_bridges(struct p
> }
> EXPORT_SYMBOL(pci_bus_size_bridges);
>
> +void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
> +{
> + struct pci_bus *b;
> +
> + pdev_assign_resources_sorted((struct pci_dev *)bridge);
> +
> + b = bridge->subordinate;
> + if (!b)
> + return;
> +
> + pci_bus_assign_resources(b);
> +
> + switch (bridge->class >> 8) {
> + case PCI_CLASS_BRIDGE_PCI:
> + pci_setup_bridge(b);
> + break;
> +
> + case PCI_CLASS_BRIDGE_CARDBUS:
> + pci_setup_cardbus(b);
> + break;
> +
> + default:
> + dev_info(&bridge->dev, "not setting up bridge for bus "
> + "%04x:%02x\n", pci_domain_nr(b), b->number);
> + break;
> + }
> +}
> +EXPORT_SYMBOL(pci_bridge_assign_resources);
> +
> void __ref pci_bus_assign_resources(const struct pci_bus *bus)
> {
> struct pci_bus *b;
> Index: linux-2.6/include/linux/pci.h
> ===================================================================
> --- linux-2.6.orig/include/linux/pci.h
> +++ linux-2.6/include/linux/pci.h
> @@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
> int pci_vpd_truncate(struct pci_dev *dev, size_t size);
>
> /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
> +void pci_bridge_assign_resources(const struct pci_dev *bridge);
> void pci_bus_assign_resources(const struct pci_bus *bus);
> void pci_bus_size_bridges(struct pci_bus *bus);
> int pci_claim_resource(struct pci_dev *, int);
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
Kenji Kaneshige wrote:
> Yinghai Lu wrote:
>> move out bus_size_bridges and assign resources out of pciehp_add_bridge()
>> and at last do them all together one time including slot bridge, to
>> avoid to call
>> assign resources several times, when there are several bridges under
>> the slot bridge.
>>
>> need to introduce pci_bridge_assign_resources there.
>>
>> handle the case the first bridge that doesn't get pre-allocated big
>> enough res from FW.
>> for example pcie devices need 256M, but the bridge only get
>> preallocated 2M...
>>
>
> Though I have not looked at the patch deeply yet (sorry), I have
> some questions and concerns about this change. Please correct me
> if my understanding is not correct.
>
> - Your patch doesn't seems to have the code to free resources.
> If we need to expand the resource range, don't we need to free
> preallocated resource before allocating the new one?
that is done with pci_bus_size_bridges ==> pbus_size_io/pbus_size_mem ==> find_free_bus_resource ==> release_resource.
>
> - Your patch seems to update BARs for bridge itself. I think it
> would break the bridge's driver (port service driver) that if
> it controls the device's capability by using IO/Mem, though I
> don't know if such driver or capabilities exists now.
port service driver will be AER and pciehotplug.
it seems those driver are not use those BAR...
those BAR are supposed for the devices under the pcie bridge.
>
> And one comment about pci_configure_slot(). We need to program
> hotplug parameters before the device start working. So we need
> to call pci_configure_slot() before calling pci_bus_add_devices().
>
Thanks, will correct that.
YH
YH
Yinghai Lu wrote:
> Kenji Kaneshige wrote:
>> Yinghai Lu wrote:
>>> move out bus_size_bridges and assign resources out of pciehp_add_bridge()
>>> and at last do them all together one time including slot bridge, to
>>> avoid to call
>>> assign resources several times, when there are several bridges under
>>> the slot bridge.
>>>
>>> need to introduce pci_bridge_assign_resources there.
>>>
>>> handle the case the first bridge that doesn't get pre-allocated big
>>> enough res from FW.
>>> for example pcie devices need 256M, but the bridge only get
>>> preallocated 2M...
>>>
>> Though I have not looked at the patch deeply yet (sorry), I have
>> some questions and concerns about this change. Please correct me
>> if my understanding is not correct.
>>
>> - Your patch doesn't seems to have the code to free resources.
>> If we need to expand the resource range, don't we need to free
>> preallocated resource before allocating the new one?
>
> that is done with pci_bus_size_bridges ==> pbus_size_io/pbus_size_mem ==> find_free_bus_resource ==> release_resource.
>
I didn't noticed that find_free_bus_resource() was changed to call
release_resource() recently...
By the way, does this (release_resource() by find_bus_resource())
change the resource assignment by BIOS also for bridges other than
the ports with hotplug slot (switch upstreamport, for example)?
>> - Your patch seems to update BARs for bridge itself. I think it
>> would break the bridge's driver (port service driver) that if
>> it controls the device's capability by using IO/Mem, though I
>> don't know if such driver or capabilities exists now.
>
> port service driver will be AER and pciehotplug.
> it seems those driver are not use those BAR...
> those BAR are supposed for the devices under the pcie bridge.
>
I understand that there are only two port service drivers (AER and
PCIe hotplug) and both doesn't use BAR. But I still have a concern
that changing bridge's BARs might cause problems in the future. In
my understanding, what you need is expanding IO/Mem base and limit
of root or switch downstream ports. If so, I think we should only
touch IO/Mem base/limit, and should not touch bridge's BARs.
Thanks,
Kenji Kaneshige
Kenji Kaneshige wrote:
> Yinghai Lu wrote:
>> Kenji Kaneshige wrote:
>>> Yinghai Lu wrote:
>>>> move out bus_size_bridges and assign resources out of
>>>> pciehp_add_bridge()
>>>> and at last do them all together one time including slot bridge, to
>>>> avoid to call
>>>> assign resources several times, when there are several bridges under
>>>> the slot bridge.
>>>>
>>>> need to introduce pci_bridge_assign_resources there.
>>>>
>>>> handle the case the first bridge that doesn't get pre-allocated big
>>>> enough res from FW.
>>>> for example pcie devices need 256M, but the bridge only get
>>>> preallocated 2M...
>>>>
>>> Though I have not looked at the patch deeply yet (sorry), I have
>>> some questions and concerns about this change. Please correct me
>>> if my understanding is not correct.
>>>
>>> - Your patch doesn't seems to have the code to free resources.
>>> If we need to expand the resource range, don't we need to free
>>> preallocated resource before allocating the new one?
>>
>> that is done with pci_bus_size_bridges ==> pbus_size_io/pbus_size_mem
>> ==> find_free_bus_resource ==> release_resource.
>>
>
> I didn't noticed that find_free_bus_resource() was changed to call
> release_resource() recently...
>
> By the way, does this (release_resource() by find_bus_resource())
> change the resource assignment by BIOS also for bridges other than
> the ports with hotplug slot (switch upstreamport, for example)?
yes.
BIOS preallocate small range for the bridge, and leave the BAR for the device under that bridge uninitialized.
>
>>> - Your patch seems to update BARs for bridge itself. I think it
>>> would break the bridge's driver (port service driver) that if
>>> it controls the device's capability by using IO/Mem, though I
>>> don't know if such driver or capabilities exists now.
>>
>> port service driver will be AER and pciehotplug.
>> it seems those driver are not use those BAR...
>> those BAR are supposed for the devices under the pcie bridge.
>>
>
> I understand that there are only two port service drivers (AER and
> PCIe hotplug) and both doesn't use BAR. But I still have a concern
> that changing bridge's BARs might cause problems in the future. In
> my understanding, what you need is expanding IO/Mem base and limit
> of root or switch downstream ports. If so, I think we should only
> touch IO/Mem base/limit, and should not touch bridge's BARs.
those bridge BAR are for devices under that bridge. the port device is not supposed to use them.
if we don't touch the bridge's BAR, the hw will not forward the io for those devices under it.
YH
move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.
need to introduce pci_bridge_assign_resources there.
handle the case the slot bridge that doesn't get pre-allocated big enough res
from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...
pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.
-v2: address Alex's concern about pci remove/rescan feature about pci_setup_bridge changes.
-v3: Kenji pointed out that pci_config_slot need to be called before pci_bus_add_devices()
Signed-off-by: Yinghai Lu <[email protected]>
---
drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++---
drivers/pci/setup-bus.c | 73 +++++++++++++++++++++++++++++++++++++--
include/linux/pci.h | 1
3 files changed, 94 insertions(+), 9 deletions(-)
Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
@@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
busnr = pci_scan_bridge(parent, dev, busnr, pass);
if (!dev->subordinate)
return -1;
- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);
+
return 0;
}
int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
+ struct pci_dev *bridge = p_slot->ctrl->pcie->port;
+ struct pci_bus *parent = bridge->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;
+ int retval;
dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
if (dev) {
@@ -96,12 +95,30 @@ int pciehp_configure_device(struct slot
(dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
pciehp_add_bridge(dev);
}
+ pci_dev_put(dev);
+ }
+
+ pci_bus_size_bridges(parent);
+ pci_clear_master(bridge);
+ pci_bridge_assign_resources(bridge);
+ retval = pci_reenable_device(bridge);
+ pci_set_master(bridge);
+ pci_enable_bridges(parent);
+
+ for (fn = 0; fn < 8; fn++) {
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
+ if (!dev)
+ continue;
+ if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ pci_dev_put(dev);
+ continue;
+ }
pci_configure_slot(dev);
pci_dev_put(dev);
}
- pci_bus_assign_resources(parent);
pci_bus_add_devices(parent);
+
return 0;
}
Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -27,6 +27,44 @@
#include <linux/slab.h>
#include "pci.h"
+static void pdev_assign_resources_sorted(struct pci_dev *dev)
+{
+ struct resource *res;
+ struct resource_list head, *list, *tmp;
+ int idx;
+ u16 class = dev->class >> 8;
+
+ head.next = NULL;
+
+ /* Don't touch classless devices or host bridges or ioapics. */
+ if (class == PCI_CLASS_NOT_DEFINED ||
+ class == PCI_CLASS_BRIDGE_HOST)
+ return;
+
+ /* Don't touch ioapic devices already enabled by firmware */
+ if (class == PCI_CLASS_SYSTEM_PIC) {
+ u16 command;
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
+ return;
+ }
+
+ pdev_sort_resources(dev, &head);
+
+ for (list = head.next; list;) {
+ res = list->res;
+ idx = res - &list->dev->resource[0];
+ if (pci_assign_resource(list->dev, idx)) {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+}
+
static void pbus_assign_resources_sorted(const struct pci_bus *bus)
{
struct pci_dev *dev;
@@ -137,14 +175,14 @@ EXPORT_SYMBOL(pci_setup_cardbus);
config space writes, so it's quite possible that an I/O window of
the bridge will have some undesirable address (e.g. 0) after the
first write. Ditto 64-bit prefetchable MMIO. */
-static void pci_setup_bridge(struct pci_bus *bus)
+static void pci_setup_bridge(struct pci_bus *bus, bool check_enabled)
{
struct pci_dev *bridge = bus->self;
struct pci_bus_region region;
u32 l, bu, lu, io_upper16;
int pref_mem64;
- if (pci_is_enabled(bridge))
+ if (check_enabled && pci_is_enabled(bridge))
return;
dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
@@ -550,6 +588,35 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);
+void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
+{
+ struct pci_bus *b;
+
+ pdev_assign_resources_sorted((struct pci_dev *)bridge);
+
+ b = bridge->subordinate;
+ if (!b)
+ return;
+
+ pci_bus_assign_resources(b);
+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:
+ pci_setup_bridge(b, false);
+ break;
+
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}
+EXPORT_SYMBOL(pci_bridge_assign_resources);
+
void __ref pci_bus_assign_resources(const struct pci_bus *bus)
{
struct pci_bus *b;
@@ -566,7 +633,7 @@ void __ref pci_bus_assign_resources(cons
switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:
- pci_setup_bridge(b);
+ pci_setup_bridge(b, true);
break;
case PCI_CLASS_BRIDGE_CARDBUS:
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
int pci_vpd_truncate(struct pci_dev *dev, size_t size);
/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+void pci_bridge_assign_resources(const struct pci_dev *bridge);
void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);
Yinghai Lu wrote:
> Kenji Kaneshige wrote:
>> Yinghai Lu wrote:
>>> Kenji Kaneshige wrote:
>>>> Yinghai Lu wrote:
>>>>> move out bus_size_bridges and assign resources out of
>>>>> pciehp_add_bridge()
>>>>> and at last do them all together one time including slot bridge, to
>>>>> avoid to call
>>>>> assign resources several times, when there are several bridges under
>>>>> the slot bridge.
>>>>>
>>>>> need to introduce pci_bridge_assign_resources there.
>>>>>
>>>>> handle the case the first bridge that doesn't get pre-allocated big
>>>>> enough res from FW.
>>>>> for example pcie devices need 256M, but the bridge only get
>>>>> preallocated 2M...
>>>>>
>>>> Though I have not looked at the patch deeply yet (sorry), I have
>>>> some questions and concerns about this change. Please correct me
>>>> if my understanding is not correct.
>>>>
>>>> - Your patch doesn't seems to have the code to free resources.
>>>> If we need to expand the resource range, don't we need to free
>>>> preallocated resource before allocating the new one?
>>> that is done with pci_bus_size_bridges ==> pbus_size_io/pbus_size_mem
>>> ==> find_free_bus_resource ==> release_resource.
>>>
>> I didn't noticed that find_free_bus_resource() was changed to call
>> release_resource() recently...
>>
>> By the way, does this (release_resource() by find_bus_resource())
>> change the resource assignment by BIOS also for bridges other than
>> the ports with hotplug slot (switch upstreamport, for example)?
>
> yes.
>
> BIOS preallocate small range for the bridge, and leave the BAR for the device under that bridge uninitialized.
>
Does this happen at the boot time regardless of hot-plug?
>>>> - Your patch seems to update BARs for bridge itself. I think it
>>>> would break the bridge's driver (port service driver) that if
>>>> it controls the device's capability by using IO/Mem, though I
>>>> don't know if such driver or capabilities exists now.
>>> port service driver will be AER and pciehotplug.
>>> it seems those driver are not use those BAR...
>>> those BAR are supposed for the devices under the pcie bridge.
>>>
>> I understand that there are only two port service drivers (AER and
>> PCIe hotplug) and both doesn't use BAR. But I still have a concern
>> that changing bridge's BARs might cause problems in the future. In
>> my understanding, what you need is expanding IO/Mem base and limit
>> of root or switch downstream ports. If so, I think we should only
>> touch IO/Mem base/limit, and should not touch bridge's BARs.
>
> those bridge BAR are for devices under that bridge. the port device is not supposed to use them.
>
Do you mean you touch only BARs of the devices under the bridge?
> if we don't touch the bridge's BAR, the hw will not forward the io for those devices under it.
>
I understand you need to touch I/O base/limit and Mem base/limit. But
I don't understand why you also need to update bridge's BARs. Could
you please explain a little more about it?
Just in case, my terminology "bridge's BARs" is Base Address Register
0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
(type 1) configuration space header of the bridge.
Thanks,
Kenji Kaneshige
Kenji Kaneshige wrote:
> Yinghai Lu wrote:
>> Kenji Kaneshige wrote:
>>> Yinghai Lu wrote:
>>>> Kenji Kaneshige wrote:
>>>>> Yinghai Lu wrote:
>>>>>> move out bus_size_bridges and assign resources out of
>>>>>> pciehp_add_bridge()
>>>>>> and at last do them all together one time including slot bridge, to
>>>>>> avoid to call
>>>>>> assign resources several times, when there are several bridges under
>>>>>> the slot bridge.
>>>>>>
>>>>>> need to introduce pci_bridge_assign_resources there.
>>>>>>
>>>>>> handle the case the first bridge that doesn't get pre-allocated big
>>>>>> enough res from FW.
>>>>>> for example pcie devices need 256M, but the bridge only get
>>>>>> preallocated 2M...
>>>>>>
>>>>> Though I have not looked at the patch deeply yet (sorry), I have
>>>>> some questions and concerns about this change. Please correct me
>>>>> if my understanding is not correct.
>>>>>
>>>>> - Your patch doesn't seems to have the code to free resources.
>>>>> If we need to expand the resource range, don't we need to free
>>>>> preallocated resource before allocating the new one?
>>>> that is done with pci_bus_size_bridges ==> pbus_size_io/pbus_size_mem
>>>> ==> find_free_bus_resource ==> release_resource.
>>>>
>>> I didn't noticed that find_free_bus_resource() was changed to call
>>> release_resource() recently...
>>>
>>> By the way, does this (release_resource() by find_bus_resource())
>>> change the resource assignment by BIOS also for bridges other than
>>> the ports with hotplug slot (switch upstreamport, for example)?
>>
>> yes.
>>
>> BIOS preallocate small range for the bridge, and leave the BAR for the
>> device under that bridge uninitialized.
>>
>
> Does this happen at the boot time regardless of hot-plug?
yes
>
>
>>>>> - Your patch seems to update BARs for bridge itself. I think it
>>>>> would break the bridge's driver (port service driver) that if
>>>>> it controls the device's capability by using IO/Mem, though I
>>>>> don't know if such driver or capabilities exists now.
>>>> port service driver will be AER and pciehotplug.
>>>> it seems those driver are not use those BAR...
>>>> those BAR are supposed for the devices under the pcie bridge.
>>>>
>>> I understand that there are only two port service drivers (AER and
>>> PCIe hotplug) and both doesn't use BAR. But I still have a concern
>>> that changing bridge's BARs might cause problems in the future. In
>>> my understanding, what you need is expanding IO/Mem base and limit
>>> of root or switch downstream ports. If so, I think we should only
>>> touch IO/Mem base/limit, and should not touch bridge's BARs.
>>
>> those bridge BAR are for devices under that bridge. the port device is
>> not supposed to use them.
>>
>
> Do you mean you touch only BARs of the devices under the bridge?
no. the BAR of 0x1c, 0x20, and 0x28
>
>> if we don't touch the bridge's BAR, the hw will not forward the io for
>> those devices under it.
>>
>
> I understand you need to touch I/O base/limit and Mem base/limit. But
> I don't understand why you also need to update bridge's BARs. Could
> you please explain a little more about it?
>
> Just in case, my terminology "bridge's BARs" is Base Address Register
> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
> (type 1) configuration space header of the bridge.
i mean 0x1c, 0x20, 0x28
did not notice that bridge device's 0x10, 0x14 are used...
if port service need to use 0x10, 0x14, and the device is enabled, we should touch 0x10, and 0x14.
YH
Yinghai Lu wrote:
> Kenji Kaneshige wrote:
>> I understand you need to touch I/O base/limit and Mem base/limit. But
>> I don't understand why you also need to update bridge's BARs. Could
>> you please explain a little more about it?
>>
>> Just in case, my terminology "bridge's BARs" is Base Address Register
>> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
>> (type 1) configuration space header of the bridge.
>
> i mean 0x1c, 0x20, 0x28
>
> did not notice that bridge device's 0x10, 0x14 are used...
> if port service need to use 0x10, 0x14, and the device is enabled, we should touch 0x10, and 0x14.
after check the code, if
pci_bridge_assign_resources ==> pdev_assign_resources_sorted ==> pdev_sort_resources
will not touch 0x10 and 0x14, if those resource is claimed by port service.
/* Sort resources by alignment */
void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
{
int i;
for (i = 0; i < PCI_NUM_RESOURCES; i++) {
struct resource *r;
struct resource_list *list, *tmp;
resource_size_t r_align;
r = &dev->resource[i];
if (r->flags & IORESOURCE_PCI_FIXED)
continue;
if (!(r->flags) || r->parent)
continue;
r->parent != NULL, will make it skip those two.
So -v3 should be safe.
Thanks
Yinghai Lu
move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.
need to introduce pci_bridge_assign_resources there.
handle the case the slot bridge that doesn't get pre-allocated big enough res
from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...
pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.
-v2: address Alex's concern about pci remove/rescan feature about pci_setup_bridge changes.
-v3: Kenji pointed out that pci_config_slot need to be called before pci_bus_add_devices()
-v4: move out pci_is_enabled checkout of pci_setup_bridge()
Signed-off-by: Yinghai Lu <[email protected]>
---
drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++---
drivers/pci/setup-bus.c | 73 ++++++++++++++++++++++++++++++++++++---
include/linux/pci.h | 1
3 files changed, 93 insertions(+), 10 deletions(-)
Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
@@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
busnr = pci_scan_bridge(parent, dev, busnr, pass);
if (!dev->subordinate)
return -1;
- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);
+
return 0;
}
int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
+ struct pci_dev *bridge = p_slot->ctrl->pcie->port;
+ struct pci_bus *parent = bridge->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;
+ int retval;
dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
if (dev) {
@@ -96,12 +95,30 @@ int pciehp_configure_device(struct slot
(dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
pciehp_add_bridge(dev);
}
+ pci_dev_put(dev);
+ }
+
+ pci_bus_size_bridges(parent);
+ pci_clear_master(bridge);
+ pci_bridge_assign_resources(bridge);
+ retval = pci_reenable_device(bridge);
+ pci_set_master(bridge);
+ pci_enable_bridges(parent);
+
+ for (fn = 0; fn < 8; fn++) {
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
+ if (!dev)
+ continue;
+ if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ pci_dev_put(dev);
+ continue;
+ }
pci_configure_slot(dev);
pci_dev_put(dev);
}
- pci_bus_assign_resources(parent);
pci_bus_add_devices(parent);
+
return 0;
}
Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -27,6 +27,44 @@
#include <linux/slab.h>
#include "pci.h"
+static void pdev_assign_resources_sorted(struct pci_dev *dev)
+{
+ struct resource *res;
+ struct resource_list head, *list, *tmp;
+ int idx;
+ u16 class = dev->class >> 8;
+
+ head.next = NULL;
+
+ /* Don't touch classless devices or host bridges or ioapics. */
+ if (class == PCI_CLASS_NOT_DEFINED ||
+ class == PCI_CLASS_BRIDGE_HOST)
+ return;
+
+ /* Don't touch ioapic devices already enabled by firmware */
+ if (class == PCI_CLASS_SYSTEM_PIC) {
+ u16 command;
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
+ return;
+ }
+
+ pdev_sort_resources(dev, &head);
+
+ for (list = head.next; list;) {
+ res = list->res;
+ idx = res - &list->dev->resource[0];
+ if (pci_assign_resource(list->dev, idx)) {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+}
+
static void pbus_assign_resources_sorted(const struct pci_bus *bus)
{
struct pci_dev *dev;
@@ -144,9 +182,6 @@ static void pci_setup_bridge(struct pci_
u32 l, bu, lu, io_upper16;
int pref_mem64;
- if (pci_is_enabled(bridge))
- return;
-
dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
pci_domain_nr(bus), bus->number);
@@ -550,6 +585,35 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);
+void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
+{
+ struct pci_bus *b;
+
+ pdev_assign_resources_sorted((struct pci_dev *)bridge);
+
+ b = bridge->subordinate;
+ if (!b)
+ return;
+
+ pci_bus_assign_resources(b);
+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:
+ pci_setup_bridge(b);
+ break;
+
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}
+EXPORT_SYMBOL(pci_bridge_assign_resources);
+
void __ref pci_bus_assign_resources(const struct pci_bus *bus)
{
struct pci_bus *b;
@@ -566,7 +630,8 @@ void __ref pci_bus_assign_resources(cons
switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:
- pci_setup_bridge(b);
+ if (!pci_is_enabled(dev))
+ pci_setup_bridge(b);
break;
case PCI_CLASS_BRIDGE_CARDBUS:
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
int pci_vpd_truncate(struct pci_dev *dev, size_t size);
/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+void pci_bridge_assign_resources(const struct pci_dev *bridge);
void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);
Yinghai Lu wrote:
> Yinghai Lu wrote:
>> Kenji Kaneshige wrote:
>>> I understand you need to touch I/O base/limit and Mem base/limit. But
>>> I don't understand why you also need to update bridge's BARs. Could
>>> you please explain a little more about it?
>>>
>>> Just in case, my terminology "bridge's BARs" is Base Address Register
>>> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
>>> (type 1) configuration space header of the bridge.
>> i mean 0x1c, 0x20, 0x28
>>
>> did not notice that bridge device's 0x10, 0x14 are used...
>> if port service need to use 0x10, 0x14, and the device is enabled, we should touch 0x10, and 0x14.
>
> after check the code, if
>
> pci_bridge_assign_resources ==> pdev_assign_resources_sorted ==> pdev_sort_resources
>
> will not touch 0x10 and 0x14, if those resource is claimed by port service.
>
> /* Sort resources by alignment */
> void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
> {
> int i;
>
> for (i = 0; i < PCI_NUM_RESOURCES; i++) {
> struct resource *r;
> struct resource_list *list, *tmp;
> resource_size_t r_align;
>
> r = &dev->resource[i];
>
> if (r->flags & IORESOURCE_PCI_FIXED)
> continue;
>
> if (!(r->flags) || r->parent)
> continue;
>
>
> r->parent != NULL, will make it skip those two.
>
> So -v3 should be safe.
>
Thank you for the clarification.
But I still don't understand the whole picture of your set of
changes. Let me ask some questions.
In my understanding of your set of changes, if there is a PCIe
switch with some hot-plug slots and all of those slots are empty,
I/O and Memory resources assigned by BIOS are all released at
the boot time. For example, suppose the following case.
bridge(A)
|
-----------------------
| |
bridge(B) bridge(C)
| |
slot(1) slot(2)
(empty) (empty)
bridge(A): P2P bridge for switch upstream port
bridge(B): P2P bridge for switch downstream port
bridge(C): P2P bridge for switch downstream port
In the above example, I/O and Mem resource assigned to bridge(A),
bridge(B) and bridge(C) are all released at the boot time. Correct?
Then, when a adapter card is hot-added to slot(1), I/O and Mem
resources enough for enabling the hot-added adapter card is assigned
to bridge(A), bridge(B) and the adapter card. Correct?
Then, when an another adpater card is hot-added to slot(2), we
need to assign enough resource to bridge(C) and the new card.
But bridge(A) doesn't have enough resource for bridge(C) and
the new card. In addition, all bridge(A) and bridge(B) and the
adapter card on slot(1) are already working. How do you assign
resource to bridge(C) and the card on slot(2)?
Thanks,
Kenji Kaneshige
Kenji Kaneshige wrote:
> Yinghai Lu wrote:
>> Yinghai Lu wrote:
>>> Kenji Kaneshige wrote:
>>>> I understand you need to touch I/O base/limit and Mem base/limit. But
>>>> I don't understand why you also need to update bridge's BARs. Could
>>>> you please explain a little more about it?
>>>>
>>>> Just in case, my terminology "bridge's BARs" is Base Address Register
>>>> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
>>>> (type 1) configuration space header of the bridge.
>>> i mean 0x1c, 0x20, 0x28
>>>
>>> did not notice that bridge device's 0x10, 0x14 are used...
>>> if port service need to use 0x10, 0x14, and the device is enabled, we
>>> should touch 0x10, and 0x14.
>>
>> after check the code, if
>> pci_bridge_assign_resources ==> pdev_assign_resources_sorted ==>
>> pdev_sort_resources
>>
>> will not touch 0x10 and 0x14, if those resource is claimed by port
>> service.
>>
>> /* Sort resources by alignment */
>> void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
>> { int i;
>> for (i = 0; i < PCI_NUM_RESOURCES; i++) {
>> struct resource *r;
>> struct resource_list *list, *tmp;
>> resource_size_t r_align;
>> r = &dev->resource[i];
>> if (r->flags &
>> IORESOURCE_PCI_FIXED)
>> continue;
>> if (!(r->flags) || r->parent)
>> continue;
>>
>> r->parent != NULL, will make it skip those two.
>>
>> So -v3 should be safe.
>>
>
> Thank you for the clarification.
>
> But I still don't understand the whole picture of your set of
> changes. Let me ask some questions.
>
> In my understanding of your set of changes, if there is a PCIe
> switch with some hot-plug slots and all of those slots are empty,
> I/O and Memory resources assigned by BIOS are all released at
> the boot time. For example, suppose the following case.
>
> bridge(A)
> |
> -----------------------
> | |
> bridge(B) bridge(C)
> | |
> slot(1) slot(2)
> (empty) (empty)
>
> bridge(A): P2P bridge for switch upstream port
> bridge(B): P2P bridge for switch downstream port
> bridge(C): P2P bridge for switch downstream port
>
> In the above example, I/O and Mem resource assigned to bridge(A),
> bridge(B) and bridge(C) are all released at the boot time. Correct?
>
> Then, when a adapter card is hot-added to slot(1), I/O and Mem
> resources enough for enabling the hot-added adapter card is assigned
> to bridge(A), bridge(B) and the adapter card. Correct?
>
> Then, when an another adpater card is hot-added to slot(2), we
> need to assign enough resource to bridge(C) and the new card.
> But bridge(A) doesn't have enough resource for bridge(C) and
> the new card. In addition, all bridge(A) and bridge(B) and the
> adapter card on slot(1) are already working. How do you assign
> resource to bridge(C) and the card on slot(2)?
>
thanks, will update the patches to only handle leaf bridge, and don't touch min_size etc.
YH
On Wed, 2009-10-28 at 10:44 -0700, Yinghai Lu wrote:
> Kenji Kaneshige wrote:
> > Yinghai Lu wrote:
> >> Yinghai Lu wrote:
> >>> Kenji Kaneshige wrote:
> >>>> I understand you need to touch I/O base/limit and Mem base/limit. But
> >>>> I don't understand why you also need to update bridge's BARs. Could
> >>>> you please explain a little more about it?
> >>>>
> >>>> Just in case, my terminology "bridge's BARs" is Base Address Register
> >>>> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
> >>>> (type 1) configuration space header of the bridge.
> >>> i mean 0x1c, 0x20, 0x28
> >>>
> >>> did not notice that bridge device's 0x10, 0x14 are used...
> >>> if port service need to use 0x10, 0x14, and the device is enabled, we
> >>> should touch 0x10, and 0x14.
> >>
> >> after check the code, if
> >> pci_bridge_assign_resources ==> pdev_assign_resources_sorted ==>
> >> pdev_sort_resources
> >>
> >> will not touch 0x10 and 0x14, if those resource is claimed by port
> >> service.
> >>
> >> /* Sort resources by alignment */
> >> void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
> >> { int i;
> >> for (i = 0; i < PCI_NUM_RESOURCES; i++) {
> >> struct resource *r;
> >> struct resource_list *list, *tmp;
> >> resource_size_t r_align;
> >> r = &dev->resource[i];
> >> if (r->flags &
> >> IORESOURCE_PCI_FIXED)
> >> continue;
> >> if (!(r->flags) || r->parent)
> >> continue;
> >>
> >> r->parent != NULL, will make it skip those two.
> >>
> >> So -v3 should be safe.
> >>
> >
> > Thank you for the clarification.
> >
> > But I still don't understand the whole picture of your set of
> > changes. Let me ask some questions.
> >
> > In my understanding of your set of changes, if there is a PCIe
> > switch with some hot-plug slots and all of those slots are empty,
> > I/O and Memory resources assigned by BIOS are all released at
> > the boot time. For example, suppose the following case.
> >
> > bridge(A)
> > |
> > -----------------------
> > | |
> > bridge(B) bridge(C)
> > | |
> > slot(1) slot(2)
> > (empty) (empty)
> >
> > bridge(A): P2P bridge for switch upstream port
> > bridge(B): P2P bridge for switch downstream port
> > bridge(C): P2P bridge for switch downstream port
> >
> > In the above example, I/O and Mem resource assigned to bridge(A),
> > bridge(B) and bridge(C) are all released at the boot time. Correct?
> >
> > Then, when a adapter card is hot-added to slot(1), I/O and Mem
> > resources enough for enabling the hot-added adapter card is assigned
> > to bridge(A), bridge(B) and the adapter card. Correct?
> >
> > Then, when an another adpater card is hot-added to slot(2), we
> > need to assign enough resource to bridge(C) and the new card.
> > But bridge(A) doesn't have enough resource for bridge(C) and
> > the new card. In addition, all bridge(A) and bridge(B) and the
> > adapter card on slot(1) are already working. How do you assign
> > resource to bridge(C) and the card on slot(2)?
> >
>
> thanks, will update the patches to only handle leaf bridge, and don't touch min_size etc.
When you do, can you please add printks showing the changes you're
making to bridge resources? I think these events are infrequent, and
knowing about the changes is vital to debugging problems.
Bjorn
Bjorn Helgaas wrote:
> On Wed, 2009-10-28 at 10:44 -0700, Yinghai Lu wrote:
>>>
>> thanks, will update the patches to only handle leaf bridge, and don't touch min_size etc.
>
> When you do, can you please add printks showing the changes you're
> making to bridge resources? I think these events are infrequent, and
> knowing about the changes is vital to debugging problems.
sure.
[ 91.762205] pci 0000:00:07.0: resource 15 [mem 0x91800000-0x91ffffff 64bit pref] released
[ 91.771715] pci 0000:00:07.0: PCI bridge, secondary bus 0000:0e
[ 91.783297] pci 0000:00:07.0: IO window: 0x2000-0x2fff
[ 91.791564] pci 0000:00:07.0: MEM window: 0x90000000-0x917fffff
[ 91.806376] pci 0000:00:07.0: PREFETCH window: disabled
[ 91.811537] pci 0000:40:07.0: resource 15 [mem 0xb1800000-0xb1ffffff 64bit pref] released
[ 91.826760] pci 0000:40:07.0: PCI bridge, secondary bus 0000:50
[ 91.841288] pci 0000:40:07.0: IO window: 0x7000-0x7fff
[ 91.847605] pci 0000:40:07.0: MEM window: 0xb0000000-0xb17fffff
[ 91.861572] pci 0000:40:07.0: PREFETCH window: disabled
[ 91.868426] pci 0000:80:07.0: resource 15 [mem 0xd1800000-0xd1ffffff 64bit pref] released
[ 91.884552] pci 0000:80:07.0: PCI bridge, secondary bus 0000:90
[ 91.892898] pci 0000:80:07.0: IO window: 0xa000-0xafff
[ 91.905952] pci 0000:80:07.0: MEM window: 0xd0000000-0xd17fffff
[ 91.912304] pci 0000:80:07.0: PREFETCH window: disabled
[ 91.925909] pci 0000:c0:07.0: resource 15 [mem 0xf1800000-0xf1ffffff 64bit pref] released
[ 91.942305] pci 0000:c0:07.0: PCI bridge, secondary bus 0000:d0
[ 91.950273] pci 0000:c0:07.0: IO window: 0xf000-0xffff
[ 91.961687] pci 0000:c0:07.0: MEM window: 0xf0000000-0xf17fffff
[ 91.969438] pci 0000:c0:07.0: PREFETCH window: disabled
YH
Yinghai Lu <[email protected]> writes:
> Kenji Kaneshige wrote:
>> Yinghai Lu wrote:
>>> Yinghai Lu wrote:
>>>> Kenji Kaneshige wrote:
>>>>> I understand you need to touch I/O base/limit and Mem base/limit. But
>>>>> I don't understand why you also need to update bridge's BARs. Could
>>>>> you please explain a little more about it?
>>>>>
>>>>> Just in case, my terminology "bridge's BARs" is Base Address Register
>>>>> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
>>>>> (type 1) configuration space header of the bridge.
>>>> i mean 0x1c, 0x20, 0x28
>>>>
>>>> did not notice that bridge device's 0x10, 0x14 are used...
>>>> if port service need to use 0x10, 0x14, and the device is enabled, we
>>>> should touch 0x10, and 0x14.
>>>
>>> after check the code, if
>>> pci_bridge_assign_resources ==> pdev_assign_resources_sorted ==>
>>> pdev_sort_resources
>>>
>>> will not touch 0x10 and 0x14, if those resource is claimed by port
>>> service.
>>>
>>> /* Sort resources by alignment */
>>> void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
>>> { int i;
>>> for (i = 0; i < PCI_NUM_RESOURCES; i++) {
>>> struct resource *r;
>>> struct resource_list *list, *tmp;
>>> resource_size_t r_align;
>>> r = &dev->resource[i];
>>> if (r->flags &
>>> IORESOURCE_PCI_FIXED)
>>> continue;
>>> if (!(r->flags) || r->parent)
>>> continue;
>>>
>>> r->parent != NULL, will make it skip those two.
>>>
>>> So -v3 should be safe.
>>>
>>
>> Thank you for the clarification.
>>
>> But I still don't understand the whole picture of your set of
>> changes. Let me ask some questions.
>>
>> In my understanding of your set of changes, if there is a PCIe
>> switch with some hot-plug slots and all of those slots are empty,
>> I/O and Memory resources assigned by BIOS are all released at
>> the boot time. For example, suppose the following case.
>>
>> bridge(A)
>> |
>> -----------------------
>> | |
>> bridge(B) bridge(C)
>> | |
>> slot(1) slot(2)
>> (empty) (empty)
>>
>> bridge(A): P2P bridge for switch upstream port
>> bridge(B): P2P bridge for switch downstream port
>> bridge(C): P2P bridge for switch downstream port
>>
>> In the above example, I/O and Mem resource assigned to bridge(A),
>> bridge(B) and bridge(C) are all released at the boot time. Correct?
>>
>> Then, when a adapter card is hot-added to slot(1), I/O and Mem
>> resources enough for enabling the hot-added adapter card is assigned
>> to bridge(A), bridge(B) and the adapter card. Correct?
>>
>> Then, when an another adpater card is hot-added to slot(2), we
>> need to assign enough resource to bridge(C) and the new card.
>> But bridge(A) doesn't have enough resource for bridge(C) and
>> the new card. In addition, all bridge(A) and bridge(B) and the
>> adapter card on slot(1) are already working. How do you assign
>> resource to bridge(C) and the card on slot(2)?
>>
>
> thanks, will update the patches to only handle leaf bridge, and don't touch min_size etc.
Tell me what is your expected behavior if I plug a bridge with hotplug
slots into a leaf hotplug slot? Will you assign me enough resources so
that I can plug in additional devices?
Today I make plugging in a hotplug bridge work by having the firmware
reserve at one level and having the kernel reserve at the next level.
Windows handles the issue in theory by performing some kind of
hot-unplugging of drivers that already have assigned resources and
then replugging them. Which allows a full renumbering of busses.
We don't have the infrastructure to do that safely today.
Eric
Eric W. Biederman wrote:
> Yinghai Lu <[email protected]> writes:
>
>> Kenji Kaneshige wrote:
>>> Yinghai Lu wrote:
>>>> Yinghai Lu wrote:
>>>>> Kenji Kaneshige wrote:
>>>>>> I understand you need to touch I/O base/limit and Mem base/limit. But
>>>>>> I don't understand why you also need to update bridge's BARs. Could
>>>>>> you please explain a little more about it?
>>>>>>
>>>>>> Just in case, my terminology "bridge's BARs" is Base Address Register
>>>>>> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
>>>>>> (type 1) configuration space header of the bridge.
>>>>> i mean 0x1c, 0x20, 0x28
>>>>>
>>>>> did not notice that bridge device's 0x10, 0x14 are used...
>>>>> if port service need to use 0x10, 0x14, and the device is enabled, we
>>>>> should touch 0x10, and 0x14.
>>>> after check the code, if
>>>> pci_bridge_assign_resources ==> pdev_assign_resources_sorted ==>
>>>> pdev_sort_resources
>>>>
>>>> will not touch 0x10 and 0x14, if those resource is claimed by port
>>>> service.
>>>>
>>>> /* Sort resources by alignment */
>>>> void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
>>>> { int i;
>>>> for (i = 0; i < PCI_NUM_RESOURCES; i++) {
>>>> struct resource *r;
>>>> struct resource_list *list, *tmp;
>>>> resource_size_t r_align;
>>>> r = &dev->resource[i];
>>>> if (r->flags &
>>>> IORESOURCE_PCI_FIXED)
>>>> continue;
>>>> if (!(r->flags) || r->parent)
>>>> continue;
>>>>
>>>> r->parent != NULL, will make it skip those two.
>>>>
>>>> So -v3 should be safe.
>>>>
>>> Thank you for the clarification.
>>>
>>> But I still don't understand the whole picture of your set of
>>> changes. Let me ask some questions.
>>>
>>> In my understanding of your set of changes, if there is a PCIe
>>> switch with some hot-plug slots and all of those slots are empty,
>>> I/O and Memory resources assigned by BIOS are all released at
>>> the boot time. For example, suppose the following case.
>>>
>>> bridge(A)
>>> |
>>> -----------------------
>>> | |
>>> bridge(B) bridge(C)
>>> | |
>>> slot(1) slot(2)
>>> (empty) (empty)
>>>
>>> bridge(A): P2P bridge for switch upstream port
>>> bridge(B): P2P bridge for switch downstream port
>>> bridge(C): P2P bridge for switch downstream port
>>>
>>> In the above example, I/O and Mem resource assigned to bridge(A),
>>> bridge(B) and bridge(C) are all released at the boot time. Correct?
>>>
>>> Then, when a adapter card is hot-added to slot(1), I/O and Mem
>>> resources enough for enabling the hot-added adapter card is assigned
>>> to bridge(A), bridge(B) and the adapter card. Correct?
>>>
>>> Then, when an another adpater card is hot-added to slot(2), we
>>> need to assign enough resource to bridge(C) and the new card.
>>> But bridge(A) doesn't have enough resource for bridge(C) and
>>> the new card. In addition, all bridge(A) and bridge(B) and the
>>> adapter card on slot(1) are already working. How do you assign
>>> resource to bridge(C) and the card on slot(2)?
>>>
>> thanks, will update the patches to only handle leaf bridge, and don't touch min_size etc.
>
> Tell me what is your expected behavior if I plug a bridge with hotplug
> slots into a leaf hotplug slot? Will you assign me enough resources so
> that I can plug in additional devices?
no.
you need to plug device in those slots and then insert it into a leaf hotplug slot.
>
> Today I make plugging in a hotplug bridge work by having the firmware
> reserve at one level and having the kernel reserve at the next level.
>
> Windows handles the issue in theory by performing some kind of
> hot-unplugging of drivers that already have assigned resources and
> then replugging them. Which allows a full renumbering of busses.
> We don't have the infrastructure to do that safely today.
that will take some drivers offline at first ?
YH
move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.
need to introduce pci_bridge_assign_resources there.
handle the case the slot bridge that doesn't get pre-allocated big enough res
from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...
pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.
-v2: address Alex's concern about pci remove/rescan feature about pci_setup_bridge changes.
-v3: Kenji pointed out that pci_config_slot need to be called before pci_bus_add_devices()
-v4: move out pci_is_enabled checkout of pci_setup_bridge()
Signed-off-by: Yinghai Lu <[email protected]>
---
drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++---
drivers/pci/setup-bus.c | 73 ++++++++++++++++++++++++++++++++++++---
include/linux/pci.h | 1
3 files changed, 93 insertions(+), 10 deletions(-)
Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
@@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
busnr = pci_scan_bridge(parent, dev, busnr, pass);
if (!dev->subordinate)
return -1;
- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);
+
return 0;
}
int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
+ struct pci_dev *bridge = p_slot->ctrl->pcie->port;
+ struct pci_bus *parent = bridge->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;
+ int retval;
dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
if (dev) {
@@ -96,12 +95,30 @@ int pciehp_configure_device(struct slot
(dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
pciehp_add_bridge(dev);
}
+ pci_dev_put(dev);
+ }
+
+ pci_bus_size_bridges(parent);
+ pci_clear_master(bridge);
+ pci_bridge_assign_resources(bridge);
+ retval = pci_reenable_device(bridge);
+ pci_set_master(bridge);
+ pci_enable_bridges(parent);
+
+ for (fn = 0; fn < 8; fn++) {
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
+ if (!dev)
+ continue;
+ if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ pci_dev_put(dev);
+ continue;
+ }
pci_configure_slot(dev);
pci_dev_put(dev);
}
- pci_bus_assign_resources(parent);
pci_bus_add_devices(parent);
+
return 0;
}
Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -27,6 +27,44 @@
#include <linux/slab.h>
#include "pci.h"
+static void pdev_assign_resources_sorted(struct pci_dev *dev)
+{
+ struct resource *res;
+ struct resource_list head, *list, *tmp;
+ int idx;
+ u16 class = dev->class >> 8;
+
+ head.next = NULL;
+
+ /* Don't touch classless devices or host bridges or ioapics. */
+ if (class == PCI_CLASS_NOT_DEFINED ||
+ class == PCI_CLASS_BRIDGE_HOST)
+ return;
+
+ /* Don't touch ioapic devices already enabled by firmware */
+ if (class == PCI_CLASS_SYSTEM_PIC) {
+ u16 command;
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
+ return;
+ }
+
+ pdev_sort_resources(dev, &head);
+
+ for (list = head.next; list;) {
+ res = list->res;
+ idx = res - &list->dev->resource[0];
+ if (pci_assign_resource(list->dev, idx)) {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+}
+
static void pbus_assign_resources_sorted(const struct pci_bus *bus)
{
struct pci_dev *dev;
@@ -144,9 +182,6 @@ static void pci_setup_bridge(struct pci_
u32 l, bu, lu, io_upper16;
int pref_mem64;
- if (pci_is_enabled(bridge))
- return;
-
dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
pci_domain_nr(bus), bus->number);
@@ -541,6 +576,35 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);
+void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
+{
+ struct pci_bus *b;
+
+ pdev_assign_resources_sorted((struct pci_dev *)bridge);
+
+ b = bridge->subordinate;
+ if (!b)
+ return;
+
+ pci_bus_assign_resources(b);
+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:
+ pci_setup_bridge(b);
+ break;
+
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}
+EXPORT_SYMBOL(pci_bridge_assign_resources);
+
void __ref pci_bus_assign_resources(const struct pci_bus *bus)
{
struct pci_bus *b;
@@ -557,7 +621,8 @@ void __ref pci_bus_assign_resources(cons
switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:
- pci_setup_bridge(b);
+ if (!pci_is_enabled(dev))
+ pci_setup_bridge(b);
break;
case PCI_CLASS_BRIDGE_CARDBUS:
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
int pci_vpd_truncate(struct pci_dev *dev, size_t size);
/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+void pci_bridge_assign_resources(const struct pci_dev *bridge);
void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);
Yinghai Lu <[email protected]> writes:
> Eric W. Biederman wrote:
>> Yinghai Lu <[email protected]> writes:
>>
>>> Kenji Kaneshige wrote:
>>>> Yinghai Lu wrote:
>>>>> Yinghai Lu wrote:
>>>>>> Kenji Kaneshige wrote:
>>>>>>> I understand you need to touch I/O base/limit and Mem base/limit. But
>>>>>>> I don't understand why you also need to update bridge's BARs. Could
>>>>>>> you please explain a little more about it?
>>>>>>>
>>>>>>> Just in case, my terminology "bridge's BARs" is Base Address Register
>>>>>>> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
>>>>>>> (type 1) configuration space header of the bridge.
>>>>>> i mean 0x1c, 0x20, 0x28
>>>>>>
>>>>>> did not notice that bridge device's 0x10, 0x14 are used...
>>>>>> if port service need to use 0x10, 0x14, and the device is enabled, we
>>>>>> should touch 0x10, and 0x14.
>>>>> after check the code, if
>>>>> pci_bridge_assign_resources ==> pdev_assign_resources_sorted ==>
>>>>> pdev_sort_resources
>>>>>
>>>>> will not touch 0x10 and 0x14, if those resource is claimed by port
>>>>> service.
>>>>>
>>>>> /* Sort resources by alignment */
>>>>> void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
>>>>> { int i;
>>>>> for (i = 0; i < PCI_NUM_RESOURCES; i++) {
>>>>> struct resource *r;
>>>>> struct resource_list *list, *tmp;
>>>>> resource_size_t r_align;
>>>>> r = &dev->resource[i];
>>>>> if (r->flags &
>>>>> IORESOURCE_PCI_FIXED)
>>>>> continue;
>>>>> if (!(r->flags) || r->parent)
>>>>> continue;
>>>>>
>>>>> r->parent != NULL, will make it skip those two.
>>>>>
>>>>> So -v3 should be safe.
>>>>>
>>>> Thank you for the clarification.
>>>>
>>>> But I still don't understand the whole picture of your set of
>>>> changes. Let me ask some questions.
>>>>
>>>> In my understanding of your set of changes, if there is a PCIe
>>>> switch with some hot-plug slots and all of those slots are empty,
>>>> I/O and Memory resources assigned by BIOS are all released at
>>>> the boot time. For example, suppose the following case.
>>>>
>>>> bridge(A)
>>>> |
>>>> -----------------------
>>>> | |
>>>> bridge(B) bridge(C)
>>>> | |
>>>> slot(1) slot(2)
>>>> (empty) (empty)
>>>>
>>>> bridge(A): P2P bridge for switch upstream port
>>>> bridge(B): P2P bridge for switch downstream port
>>>> bridge(C): P2P bridge for switch downstream port
>>>>
>>>> In the above example, I/O and Mem resource assigned to bridge(A),
>>>> bridge(B) and bridge(C) are all released at the boot time. Correct?
>>>>
>>>> Then, when a adapter card is hot-added to slot(1), I/O and Mem
>>>> resources enough for enabling the hot-added adapter card is assigned
>>>> to bridge(A), bridge(B) and the adapter card. Correct?
>>>>
>>>> Then, when an another adpater card is hot-added to slot(2), we
>>>> need to assign enough resource to bridge(C) and the new card.
>>>> But bridge(A) doesn't have enough resource for bridge(C) and
>>>> the new card. In addition, all bridge(A) and bridge(B) and the
>>>> adapter card on slot(1) are already working. How do you assign
>>>> resource to bridge(C) and the card on slot(2)?
>>>>
>>> thanks, will update the patches to only handle leaf bridge, and don't touch min_size etc.
>>
>> Tell me what is your expected behavior if I plug a bridge with hotplug
>> slots into a leaf hotplug slot? Will you assign me enough resources so
>> that I can plug in additional devices?
>
> no.
>
> you need to plug device in those slots and then insert it into a leaf hotplug slot.
Scenario.
I insert a bridge with pci hotplug slots into a leaf hotplug slot.
Which adds more leave hotplug slots.
Since the bridge itself is no longer a leaf slot it's resources will not
get reassigned.
Then I will have no resources to assign to the leaves?
>> Today I make plugging in a hotplug bridge work by having the firmware
>> reserve at one level and having the kernel reserve at the next level.
>>
>> Windows handles the issue in theory by performing some kind of
>> hot-unplugging of drivers that already have assigned resources and
>> then replugging them. Which allows a full renumbering of busses.
>> We don't have the infrastructure to do that safely today.
>
> that will take some drivers offline at first ?
I believe windows only does that for drivers that support being temporarily
disconnected from their hardware.
Eric
Eric W. Biederman wrote:
> Yinghai Lu <[email protected]> writes:
>
>> Eric W. Biederman wrote:
>>> Yinghai Lu <[email protected]> writes:
>>>
>>>> Kenji Kaneshige wrote:
>>>>> Yinghai Lu wrote:
>>>>>> Yinghai Lu wrote:
>>>>>>> Kenji Kaneshige wrote:
>>>>>>>> I understand you need to touch I/O base/limit and Mem base/limit. But
>>>>>>>> I don't understand why you also need to update bridge's BARs. Could
>>>>>>>> you please explain a little more about it?
>>>>>>>>
>>>>>>>> Just in case, my terminology "bridge's BARs" is Base Address Register
>>>>>>>> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
>>>>>>>> (type 1) configuration space header of the bridge.
>>>>>>> i mean 0x1c, 0x20, 0x28
>>>>>>>
>>>>>>> did not notice that bridge device's 0x10, 0x14 are used...
>>>>>>> if port service need to use 0x10, 0x14, and the device is enabled, we
>>>>>>> should touch 0x10, and 0x14.
>>>>>> after check the code, if
>>>>>> pci_bridge_assign_resources ==> pdev_assign_resources_sorted ==>
>>>>>> pdev_sort_resources
>>>>>>
>>>>>> will not touch 0x10 and 0x14, if those resource is claimed by port
>>>>>> service.
>>>>>>
>>>>>> /* Sort resources by alignment */
>>>>>> void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
>>>>>> { int i;
>>>>>> for (i = 0; i < PCI_NUM_RESOURCES; i++) {
>>>>>> struct resource *r;
>>>>>> struct resource_list *list, *tmp;
>>>>>> resource_size_t r_align;
>>>>>> r = &dev->resource[i];
>>>>>> if (r->flags &
>>>>>> IORESOURCE_PCI_FIXED)
>>>>>> continue;
>>>>>> if (!(r->flags) || r->parent)
>>>>>> continue;
>>>>>>
>>>>>> r->parent != NULL, will make it skip those two.
>>>>>>
>>>>>> So -v3 should be safe.
>>>>>>
>>>>> Thank you for the clarification.
>>>>>
>>>>> But I still don't understand the whole picture of your set of
>>>>> changes. Let me ask some questions.
>>>>>
>>>>> In my understanding of your set of changes, if there is a PCIe
>>>>> switch with some hot-plug slots and all of those slots are empty,
>>>>> I/O and Memory resources assigned by BIOS are all released at
>>>>> the boot time. For example, suppose the following case.
>>>>>
>>>>> bridge(A)
>>>>> |
>>>>> -----------------------
>>>>> | |
>>>>> bridge(B) bridge(C)
>>>>> | |
>>>>> slot(1) slot(2)
>>>>> (empty) (empty)
>>>>>
>>>>> bridge(A): P2P bridge for switch upstream port
>>>>> bridge(B): P2P bridge for switch downstream port
>>>>> bridge(C): P2P bridge for switch downstream port
>>>>>
>>>>> In the above example, I/O and Mem resource assigned to bridge(A),
>>>>> bridge(B) and bridge(C) are all released at the boot time. Correct?
>>>>>
>>>>> Then, when a adapter card is hot-added to slot(1), I/O and Mem
>>>>> resources enough for enabling the hot-added adapter card is assigned
>>>>> to bridge(A), bridge(B) and the adapter card. Correct?
>>>>>
>>>>> Then, when an another adpater card is hot-added to slot(2), we
>>>>> need to assign enough resource to bridge(C) and the new card.
>>>>> But bridge(A) doesn't have enough resource for bridge(C) and
>>>>> the new card. In addition, all bridge(A) and bridge(B) and the
>>>>> adapter card on slot(1) are already working. How do you assign
>>>>> resource to bridge(C) and the card on slot(2)?
>>>>>
>>>> thanks, will update the patches to only handle leaf bridge, and don't touch min_size etc.
>>> Tell me what is your expected behavior if I plug a bridge with hotplug
>>> slots into a leaf hotplug slot? Will you assign me enough resources so
>>> that I can plug in additional devices?
>> no.
>>
>> you need to plug device in those slots and then insert it into a leaf hotplug slot.
>
> Scenario.
>
> I insert a bridge with pci hotplug slots into a leaf hotplug slot.
> Which adds more leave hotplug slots.
>
> Since the bridge itself is no longer a leaf slot it's resources will not
> get reassigned.
>
> Then I will have no resources to assign to the leaves?
so we still have your min_size code there.
in your case:
you need plug all card in your slots on that daughter card at first, and then insert the daughter card to leaf slot in the MB.
my setup is :
system got 4 io chains. and will get slot:
00:03.0 00:05.0 00:07.0 00:09.0
40:03.0 40:05.0 40:07.0 40:09.0
80:03.0 80:05.0 80:07.0 80:09.0
c0:03.0 c0:05.0 c0:07.0 c0:09.0
those are hanged on peer root buses directly. but bios assign to them every one get 8M, if user plug one card need 256M, then it will not work.
with those two patches, could clear the resource assigned by BIOS, and get resource as needed. ( with mmio 64 bit )
YH
Yinghai Lu <[email protected]> writes:
> Eric W. Biederman wrote:
>> Yinghai Lu <[email protected]> writes:
>>
>>> Eric W. Biederman wrote:
>>>> Yinghai Lu <[email protected]> writes:
>>>>
>>>>> Kenji Kaneshige wrote:
>>>>>> Yinghai Lu wrote:
>>>>>>> Yinghai Lu wrote:
>>>>>>>> Kenji Kaneshige wrote:
>>>>>>>>> I understand you need to touch I/O base/limit and Mem base/limit. But
>>>>>>>>> I don't understand why you also need to update bridge's BARs. Could
>>>>>>>>> you please explain a little more about it?
>>>>>>>>>
>>>>>>>>> Just in case, my terminology "bridge's BARs" is Base Address Register
>>>>>>>>> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
>>>>>>>>> (type 1) configuration space header of the bridge.
>>>>>>>> i mean 0x1c, 0x20, 0x28
>>>>>>>>
>>>>>>>> did not notice that bridge device's 0x10, 0x14 are used...
>>>>>>>> if port service need to use 0x10, 0x14, and the device is enabled, we
>>>>>>>> should touch 0x10, and 0x14.
>>>>>>> after check the code, if
>>>>>>> pci_bridge_assign_resources ==> pdev_assign_resources_sorted ==>
>>>>>>> pdev_sort_resources
>>>>>>>
>>>>>>> will not touch 0x10 and 0x14, if those resource is claimed by port
>>>>>>> service.
>>>>>>>
>>>>>>> /* Sort resources by alignment */
>>>>>>> void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
>>>>>>> { int i;
>>>>>>> for (i = 0; i < PCI_NUM_RESOURCES; i++) {
>>>>>>> struct resource *r;
>>>>>>> struct resource_list *list, *tmp;
>>>>>>> resource_size_t r_align;
>>>>>>> r = &dev->resource[i];
>>>>>>> if (r->flags &
>>>>>>> IORESOURCE_PCI_FIXED)
>>>>>>> continue;
>>>>>>> if (!(r->flags) || r->parent)
>>>>>>> continue;
>>>>>>>
>>>>>>> r->parent != NULL, will make it skip those two.
>>>>>>>
>>>>>>> So -v3 should be safe.
>>>>>>>
>>>>>> Thank you for the clarification.
>>>>>>
>>>>>> But I still don't understand the whole picture of your set of
>>>>>> changes. Let me ask some questions.
>>>>>>
>>>>>> In my understanding of your set of changes, if there is a PCIe
>>>>>> switch with some hot-plug slots and all of those slots are empty,
>>>>>> I/O and Memory resources assigned by BIOS are all released at
>>>>>> the boot time. For example, suppose the following case.
>>>>>>
>>>>>> bridge(A)
>>>>>> |
>>>>>> -----------------------
>>>>>> | |
>>>>>> bridge(B) bridge(C)
>>>>>> | |
>>>>>> slot(1) slot(2)
>>>>>> (empty) (empty)
>>>>>>
>>>>>> bridge(A): P2P bridge for switch upstream port
>>>>>> bridge(B): P2P bridge for switch downstream port
>>>>>> bridge(C): P2P bridge for switch downstream port
>>>>>>
>>>>>> In the above example, I/O and Mem resource assigned to bridge(A),
>>>>>> bridge(B) and bridge(C) are all released at the boot time. Correct?
>>>>>>
>>>>>> Then, when a adapter card is hot-added to slot(1), I/O and Mem
>>>>>> resources enough for enabling the hot-added adapter card is assigned
>>>>>> to bridge(A), bridge(B) and the adapter card. Correct?
>>>>>>
>>>>>> Then, when an another adpater card is hot-added to slot(2), we
>>>>>> need to assign enough resource to bridge(C) and the new card.
>>>>>> But bridge(A) doesn't have enough resource for bridge(C) and
>>>>>> the new card. In addition, all bridge(A) and bridge(B) and the
>>>>>> adapter card on slot(1) are already working. How do you assign
>>>>>> resource to bridge(C) and the card on slot(2)?
>>>>>>
>>>>> thanks, will update the patches to only handle leaf bridge, and don't touch min_size etc.
>>>> Tell me what is your expected behavior if I plug a bridge with hotplug
>>>> slots into a leaf hotplug slot? Will you assign me enough resources so
>>>> that I can plug in additional devices?
>>> no.
>>>
>>> you need to plug device in those slots and then insert it into a leaf hotplug slot.
>>
>> Scenario.
>>
>> I insert a bridge with pci hotplug slots into a leaf hotplug slot.
>> Which adds more leave hotplug slots.
>>
>> Since the bridge itself is no longer a leaf slot it's resources will not
>> get reassigned.
>>
>> Then I will have no resources to assign to the leaves?
>
> so we still have your min_size code there.
>
> in your case: you need plug all card in your slots on that daughter
> card at first, and then insert the daughter card to leaf slot in the
> MB.
Operationally that is an impossibility. I would not have multiple
layers of hotplug if I only needed a single layer.
Which means your patch would cause a regression in my setup.
> my setup is :
>
> system got 4 io chains. and will get slot:
> 00:03.0 00:05.0 00:07.0 00:09.0
> 40:03.0 40:05.0 40:07.0 40:09.0
> 80:03.0 80:05.0 80:07.0 80:09.0
> c0:03.0 c0:05.0 c0:07.0 c0:09.0
>
> those are hanged on peer root buses directly. but bios assign to
> them every one get 8M, if user plug one card need 256M, then it will
> not work.
>
> with those two patches, could clear the resource assigned by BIOS,
> and get resource as needed. ( with mmio 64 bit )
Hmm.
Could you avoid reallocating resources until a pci device is plugged in
that has problems?
A lot of root bridges have important configuration registers that are
not in standard locations. Which means in general we can not reprogram
root bridges successfully from linux. At least not without code that
knows the root bridge magic.
You can almost solve your problem by simply saying: pci=hpmemsize=256M.
Which works except that allocating 4G of pci memory isn't very likely
to work.
One of the suggestions when I made my patch was to have a per port option
instead of a global minimum. That is an option for your case. But it
is not as elegant.
The truly elegant approach is to make certain the hibernate in the
drivers can handle bars being changed under them, hibernate everything
that needs renumbering and then bring them back.
Personally I think you should walk over to whomever did your firmware
and tell them they goofed.
Eric
Eric W. Biederman wrote:
> Yinghai Lu <[email protected]> writes:
>
>> Eric W. Biederman wrote:
>>> Yinghai Lu <[email protected]> writes:
>>>
>>>> Eric W. Biederman wrote:
>>>>> Yinghai Lu <[email protected]> writes:
>>>>>
>>>>>> Kenji Kaneshige wrote:
>>>>>>> Yinghai Lu wrote:
>>>>>>>> Yinghai Lu wrote:
>>>>>>>>> Kenji Kaneshige wrote:
>>>>>>>>>> I understand you need to touch I/O base/limit and Mem base/limit. But
>>>>>>>>>> I don't understand why you also need to update bridge's BARs. Could
>>>>>>>>>> you please explain a little more about it?
>>>>>>>>>>
>>>>>>>>>> Just in case, my terminology "bridge's BARs" is Base Address Register
>>>>>>>>>> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
>>>>>>>>>> (type 1) configuration space header of the bridge.
>>>>>>>>> i mean 0x1c, 0x20, 0x28
>>>>>>>>>
>>>>>>>>> did not notice that bridge device's 0x10, 0x14 are used...
>>>>>>>>> if port service need to use 0x10, 0x14, and the device is enabled, we
>>>>>>>>> should touch 0x10, and 0x14.
>>>>>>>> after check the code, if
>>>>>>>> pci_bridge_assign_resources ==> pdev_assign_resources_sorted ==>
>>>>>>>> pdev_sort_resources
>>>>>>>>
>>>>>>>> will not touch 0x10 and 0x14, if those resource is claimed by port
>>>>>>>> service.
>>>>>>>>
>>>>>>>> /* Sort resources by alignment */
>>>>>>>> void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
>>>>>>>> { int i;
>>>>>>>> for (i = 0; i < PCI_NUM_RESOURCES; i++) {
>>>>>>>> struct resource *r;
>>>>>>>> struct resource_list *list, *tmp;
>>>>>>>> resource_size_t r_align;
>>>>>>>> r = &dev->resource[i];
>>>>>>>> if (r->flags &
>>>>>>>> IORESOURCE_PCI_FIXED)
>>>>>>>> continue;
>>>>>>>> if (!(r->flags) || r->parent)
>>>>>>>> continue;
>>>>>>>>
>>>>>>>> r->parent != NULL, will make it skip those two.
>>>>>>>>
>>>>>>>> So -v3 should be safe.
>>>>>>>>
>>>>>>> Thank you for the clarification.
>>>>>>>
>>>>>>> But I still don't understand the whole picture of your set of
>>>>>>> changes. Let me ask some questions.
>>>>>>>
>>>>>>> In my understanding of your set of changes, if there is a PCIe
>>>>>>> switch with some hot-plug slots and all of those slots are empty,
>>>>>>> I/O and Memory resources assigned by BIOS are all released at
>>>>>>> the boot time. For example, suppose the following case.
>>>>>>>
>>>>>>> bridge(A)
>>>>>>> |
>>>>>>> -----------------------
>>>>>>> | |
>>>>>>> bridge(B) bridge(C)
>>>>>>> | |
>>>>>>> slot(1) slot(2)
>>>>>>> (empty) (empty)
>>>>>>>
>>>>>>> bridge(A): P2P bridge for switch upstream port
>>>>>>> bridge(B): P2P bridge for switch downstream port
>>>>>>> bridge(C): P2P bridge for switch downstream port
>>>>>>>
>>>>>>> In the above example, I/O and Mem resource assigned to bridge(A),
>>>>>>> bridge(B) and bridge(C) are all released at the boot time. Correct?
>>>>>>>
>>>>>>> Then, when a adapter card is hot-added to slot(1), I/O and Mem
>>>>>>> resources enough for enabling the hot-added adapter card is assigned
>>>>>>> to bridge(A), bridge(B) and the adapter card. Correct?
>>>>>>>
>>>>>>> Then, when an another adpater card is hot-added to slot(2), we
>>>>>>> need to assign enough resource to bridge(C) and the new card.
>>>>>>> But bridge(A) doesn't have enough resource for bridge(C) and
>>>>>>> the new card. In addition, all bridge(A) and bridge(B) and the
>>>>>>> adapter card on slot(1) are already working. How do you assign
>>>>>>> resource to bridge(C) and the card on slot(2)?
>>>>>>>
>>>>>> thanks, will update the patches to only handle leaf bridge, and don't touch min_size etc.
>>>>> Tell me what is your expected behavior if I plug a bridge with hotplug
>>>>> slots into a leaf hotplug slot? Will you assign me enough resources so
>>>>> that I can plug in additional devices?
>>>> no.
>>>>
>>>> you need to plug device in those slots and then insert it into a leaf hotplug slot.
>>> Scenario.
>>>
>>> I insert a bridge with pci hotplug slots into a leaf hotplug slot.
>>> Which adds more leave hotplug slots.
>>>
>>> Since the bridge itself is no longer a leaf slot it's resources will not
>>> get reassigned.
>>>
>>> Then I will have no resources to assign to the leaves?
>> so we still have your min_size code there.
>>
>> in your case: you need plug all card in your slots on that daughter
>> card at first, and then insert the daughter card to leaf slot in the
>> MB.
>
> Operationally that is an impossibility. I would not have multiple
> layers of hotplug if I only needed a single layer.
>
> Which means your patch would cause a regression in my setup.
ok, may need to compare new range size and old range size before clear it.
>
>> my setup is :
>>
>> system got 4 io chains. and will get slot:
>> 00:03.0 00:05.0 00:07.0 00:09.0
>> 40:03.0 40:05.0 40:07.0 40:09.0
>> 80:03.0 80:05.0 80:07.0 80:09.0
>> c0:03.0 c0:05.0 c0:07.0 c0:09.0
>>
>> those are hanged on peer root buses directly. but bios assign to
>> them every one get 8M, if user plug one card need 256M, then it will
>> not work.
>>
>> with those two patches, could clear the resource assigned by BIOS,
>> and get resource as needed. ( with mmio 64 bit )
>
> Hmm.
>
> Could you avoid reallocating resources until a pci device is plugged in
> that has problems?
>
> A lot of root bridges have important configuration registers that are
> not in standard locations. Which means in general we can not reprogram
> root bridges successfully from linux. At least not without code that
> knows the root bridge magic.
no one change that
>
> You can almost solve your problem by simply saying: pci=hpmemsize=256M.
> Which works except that allocating 4G of pci memory isn't very likely
> to work.
>
> One of the suggestions when I made my patch was to have a per port option
> instead of a global minimum. That is an option for your case. But it
> is not as elegant.
>
> The truly elegant approach is to make certain the hibernate in the
> drivers can handle bars being changed under them, hibernate everything
> that needs renumbering and then bring them back.
>
> Personally I think you should walk over to whomever did your firmware
> and tell them they goofed.
they said it IS Linux problem. because other os is ok.
YH
Yinghai Lu wrote:
> Eric W. Biederman wrote:
>> Yinghai Lu <[email protected]> writes:
>>
>>> Eric W. Biederman wrote:
>>>> Yinghai Lu <[email protected]> writes:
>>>>
>>>>> Eric W. Biederman wrote:
>>>>>> Yinghai Lu <[email protected]> writes:
>>>>>>
>>>>>>> Kenji Kaneshige wrote:
>>>>>>>> Yinghai Lu wrote:
>>>>>>>>> Yinghai Lu wrote:
>>>>>>>>>> Kenji Kaneshige wrote:
>>>>>>>>>>> I understand you need to touch I/O base/limit and Mem base/limit. But
>>>>>>>>>>> I don't understand why you also need to update bridge's BARs. Could
>>>>>>>>>>> you please explain a little more about it?
>>>>>>>>>>>
>>>>>>>>>>> Just in case, my terminology "bridge's BARs" is Base Address Register
>>>>>>>>>>> 0 (offset 0x10) and Base Address Register 1 (offset 0x14) in the
>>>>>>>>>>> (type 1) configuration space header of the bridge.
>>>>>>>>>> i mean 0x1c, 0x20, 0x28
>>>>>>>>>>
>>>>>>>>>> did not notice that bridge device's 0x10, 0x14 are used...
>>>>>>>>>> if port service need to use 0x10, 0x14, and the device is enabled, we
>>>>>>>>>> should touch 0x10, and 0x14.
>>>>>>>>> after check the code, if
>>>>>>>>> pci_bridge_assign_resources ==> pdev_assign_resources_sorted ==>
>>>>>>>>> pdev_sort_resources
>>>>>>>>>
>>>>>>>>> will not touch 0x10 and 0x14, if those resource is claimed by port
>>>>>>>>> service.
>>>>>>>>>
>>>>>>>>> /* Sort resources by alignment */
>>>>>>>>> void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
>>>>>>>>> { int i;
>>>>>>>>> for (i = 0; i < PCI_NUM_RESOURCES; i++) {
>>>>>>>>> struct resource *r;
>>>>>>>>> struct resource_list *list, *tmp;
>>>>>>>>> resource_size_t r_align;
>>>>>>>>> r = &dev->resource[i];
>>>>>>>>> if (r->flags &
>>>>>>>>> IORESOURCE_PCI_FIXED)
>>>>>>>>> continue;
>>>>>>>>> if (!(r->flags) || r->parent)
>>>>>>>>> continue;
>>>>>>>>>
>>>>>>>>> r->parent != NULL, will make it skip those two.
>>>>>>>>>
>>>>>>>>> So -v3 should be safe.
>>>>>>>>>
>>>>>>>> Thank you for the clarification.
>>>>>>>>
>>>>>>>> But I still don't understand the whole picture of your set of
>>>>>>>> changes. Let me ask some questions.
>>>>>>>>
>>>>>>>> In my understanding of your set of changes, if there is a PCIe
>>>>>>>> switch with some hot-plug slots and all of those slots are empty,
>>>>>>>> I/O and Memory resources assigned by BIOS are all released at
>>>>>>>> the boot time. For example, suppose the following case.
>>>>>>>>
>>>>>>>> bridge(A)
>>>>>>>> |
>>>>>>>> -----------------------
>>>>>>>> | |
>>>>>>>> bridge(B) bridge(C)
>>>>>>>> | |
>>>>>>>> slot(1) slot(2)
>>>>>>>> (empty) (empty)
>>>>>>>>
>>>>>>>> bridge(A): P2P bridge for switch upstream port
>>>>>>>> bridge(B): P2P bridge for switch downstream port
>>>>>>>> bridge(C): P2P bridge for switch downstream port
>>>>>>>>
>>>>>>>> In the above example, I/O and Mem resource assigned to bridge(A),
>>>>>>>> bridge(B) and bridge(C) are all released at the boot time. Correct?
>>>>>>>>
>>>>>>>> Then, when a adapter card is hot-added to slot(1), I/O and Mem
>>>>>>>> resources enough for enabling the hot-added adapter card is assigned
>>>>>>>> to bridge(A), bridge(B) and the adapter card. Correct?
>>>>>>>>
>>>>>>>> Then, when an another adpater card is hot-added to slot(2), we
>>>>>>>> need to assign enough resource to bridge(C) and the new card.
>>>>>>>> But bridge(A) doesn't have enough resource for bridge(C) and
>>>>>>>> the new card. In addition, all bridge(A) and bridge(B) and the
>>>>>>>> adapter card on slot(1) are already working. How do you assign
>>>>>>>> resource to bridge(C) and the card on slot(2)?
>>>>>>>>
>>>>>>> thanks, will update the patches to only handle leaf bridge, and don't touch min_size etc.
>>>>>> Tell me what is your expected behavior if I plug a bridge with hotplug
>>>>>> slots into a leaf hotplug slot? Will you assign me enough resources so
>>>>>> that I can plug in additional devices?
>>>>> no.
>>>>>
>>>>> you need to plug device in those slots and then insert it into a leaf hotplug slot.
>>>> Scenario.
>>>>
>>>> I insert a bridge with pci hotplug slots into a leaf hotplug slot.
>>>> Which adds more leave hotplug slots.
>>>>
>>>> Since the bridge itself is no longer a leaf slot it's resources will not
>>>> get reassigned.
>>>>
>>>> Then I will have no resources to assign to the leaves?
>>> so we still have your min_size code there.
>>>
>>> in your case: you need plug all card in your slots on that daughter
>>> card at first, and then insert the daughter card to leaf slot in the
>>> MB.
>> Operationally that is an impossibility. I would not have multiple
>> layers of hotplug if I only needed a single layer.
>>
>> Which means your patch would cause a regression in my setup.
>
> ok, may need to compare new range size and old range size before clear it.
after closing look up the code, it looks it will not break your setup.
1. before the patches:
a. when master card is inserted, all bridge in that card will get assigned with min_size
b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
2. after the patches: v5
a. booted up, all leaf bridge mmio get clearred.
b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
can you check those two patches in your setup to verify it?
http://patchwork.kernel.org/patch/56344/
http://patchwork.kernel.org/patch/56343/
Thanks
Yinghai Lu
Yinghai Lu wrote:
>>>
>>> Which means your patch would cause a regression in my setup.
>> ok, may need to compare new range size and old range size before clear it.
>
> after closing look up the code, it looks it will not break your setup.
>
> 1. before the patches:
> a. when master card is inserted, all bridge in that card will get assigned with min_size
> b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>
> 2. after the patches: v5
> a. booted up, all leaf bridge mmio get clearred.
> b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
> c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>
> can you check those two patches in your setup to verify it?
> http://patchwork.kernel.org/patch/56344/
> http://patchwork.kernel.org/patch/56343/
on top Jesse today's PCI tree.
YH
Yinghai Lu <[email protected]> writes:
>
> after closing look up the code, it looks it will not break your setup.
>
> 1. before the patches:
> a. when master card is inserted, all bridge in that card will get assigned with min_size
> b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>
> 2. after the patches: v5
> a. booted up, all leaf bridge mmio get clearred.
> b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
> c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>
> can you check those two patches in your setup to verify it?
I have a much simpler case I will break, as I tried something similar by accident.
AMD cpu MCP55 with one pcie port setup as hotplug.
The system only has 2GB of RAM. So plenty of space for pcie devices.
If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
Reads from the bar of the hotplugged device work
Writes to the bar of the hotplugged device, cause further writes to go to lala land.
So I had to have the firmware make the assignment, because only it knows the
details of the hidden AMD bar registers for each hypertransport chain etc.
I don't see how throwing away the work that the firmware has done in
preallocation is something that we can afford to do in general if what
the firmware has done works for us.
Eric
Yinghai Lu wrote:
>
> move out bus_size_bridges and assign resources out of pciehp_add_bridge()
> and at last do them all together one time including slot bridge, to avoid to
> call assign resources several times, when there are several bridges under the
> slot bridge.
>
> need to introduce pci_bridge_assign_resources there.
>
> handle the case the slot bridge that doesn't get pre-allocated big enough res
> from FW.
> for example pcie devices need 256M, but the bridge only get preallocated 2M...
>
> pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
> update res is not updated to bridge BAR. that is bridge is enabled already for
> port service.
>
> -v2: address Alex's concern about pci remove/rescan feature about pci_setup_bridge changes.
> -v3: Kenji pointed out that pci_config_slot need to be called before pci_bus_add_devices()
> -v4: move out pci_is_enabled checkout of pci_setup_bridge()
>
> Signed-off-by: Yinghai Lu <[email protected]>
>
> ---
> drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++---
> drivers/pci/setup-bus.c | 73 ++++++++++++++++++++++++++++++++++++---
> include/linux/pci.h | 1
> 3 files changed, 93 insertions(+), 10 deletions(-)
>
> Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
> +++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
> @@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
> busnr = pci_scan_bridge(parent, dev, busnr, pass);
> if (!dev->subordinate)
> return -1;
> - pci_bus_size_bridges(dev->subordinate);
> - pci_bus_assign_resources(parent);
> - pci_enable_bridges(parent);
> - pci_bus_add_devices(parent);
> +
> return 0;
> }
>
> int pciehp_configure_device(struct slot *p_slot)
> {
> struct pci_dev *dev;
> - struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
> + struct pci_dev *bridge = p_slot->ctrl->pcie->port;
> + struct pci_bus *parent = bridge->subordinate;
> int num, fn;
> struct controller *ctrl = p_slot->ctrl;
> + int retval;
>
> dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
> if (dev) {
> @@ -96,12 +95,30 @@ int pciehp_configure_device(struct slot
> (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
> pciehp_add_bridge(dev);
> }
> + pci_dev_put(dev);
> + }
> +
> + pci_bus_size_bridges(parent);
> + pci_clear_master(bridge);
> + pci_bridge_assign_resources(bridge);
> + retval = pci_reenable_device(bridge);
> + pci_set_master(bridge);
> + pci_enable_bridges(parent);
> +
> + for (fn = 0; fn < 8; fn++) {
> + dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
> + if (!dev)
> + continue;
> + if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
> + pci_dev_put(dev);
> + continue;
> + }
> pci_configure_slot(dev);
> pci_dev_put(dev);
> }
>
> - pci_bus_assign_resources(parent);
> pci_bus_add_devices(parent);
> +
> return 0;
> }
>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -27,6 +27,44 @@
> #include <linux/slab.h>
> #include "pci.h"
>
> +static void pdev_assign_resources_sorted(struct pci_dev *dev)
> +{
> + struct resource *res;
> + struct resource_list head, *list, *tmp;
> + int idx;
> + u16 class = dev->class >> 8;
> +
> + head.next = NULL;
> +
> + /* Don't touch classless devices or host bridges or ioapics. */
> + if (class == PCI_CLASS_NOT_DEFINED ||
> + class == PCI_CLASS_BRIDGE_HOST)
> + return;
> +
> + /* Don't touch ioapic devices already enabled by firmware */
> + if (class == PCI_CLASS_SYSTEM_PIC) {
> + u16 command;
> + pci_read_config_word(dev, PCI_COMMAND, &command);
> + if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
> + return;
> + }
> +
> + pdev_sort_resources(dev, &head);
> +
> + for (list = head.next; list;) {
> + res = list->res;
> + idx = res - &list->dev->resource[0];
> + if (pci_assign_resource(list->dev, idx)) {
> + res->start = 0;
> + res->end = 0;
> + res->flags = 0;
> + }
> + tmp = list;
> + list = list->next;
> + kfree(tmp);
> + }
> +}
> +
> static void pbus_assign_resources_sorted(const struct pci_bus *bus)
> {
> struct pci_dev *dev;
> @@ -144,9 +182,6 @@ static void pci_setup_bridge(struct pci_
> u32 l, bu, lu, io_upper16;
> int pref_mem64;
>
> - if (pci_is_enabled(bridge))
> - return;
> -
> dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
> pci_domain_nr(bus), bus->number);
>
> @@ -541,6 +576,35 @@ void __ref pci_bus_size_bridges(struct p
> }
> EXPORT_SYMBOL(pci_bus_size_bridges);
>
> +void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
> +{
> + struct pci_bus *b;
> +
> + pdev_assign_resources_sorted((struct pci_dev *)bridge);
> +
> + b = bridge->subordinate;
> + if (!b)
> + return;
> +
> + pci_bus_assign_resources(b);
> +
> + switch (bridge->class >> 8) {
> + case PCI_CLASS_BRIDGE_PCI:
> + pci_setup_bridge(b);
> + break;
> +
> + case PCI_CLASS_BRIDGE_CARDBUS:
> + pci_setup_cardbus(b);
> + break;
> +
> + default:
> + dev_info(&bridge->dev, "not setting up bridge for bus "
> + "%04x:%02x\n", pci_domain_nr(b), b->number);
> + break;
> + }
> +}
> +EXPORT_SYMBOL(pci_bridge_assign_resources);
> +
> void __ref pci_bus_assign_resources(const struct pci_bus *bus)
> {
> struct pci_bus *b;
> @@ -557,7 +621,8 @@ void __ref pci_bus_assign_resources(cons
>
> switch (dev->class >> 8) {
> case PCI_CLASS_BRIDGE_PCI:
> - pci_setup_bridge(b);
> + if (!pci_is_enabled(dev))
> + pci_setup_bridge(b);
> break;
>
> case PCI_CLASS_BRIDGE_CARDBUS:
> Index: linux-2.6/include/linux/pci.h
> ===================================================================
> --- linux-2.6.orig/include/linux/pci.h
> +++ linux-2.6/include/linux/pci.h
> @@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
> int pci_vpd_truncate(struct pci_dev *dev, size_t size);
>
> /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
> +void pci_bridge_assign_resources(const struct pci_dev *bridge);
> void pci_bus_assign_resources(const struct pci_bus *bus);
> void pci_bus_size_bridges(struct pci_bus *bus);
> int pci_claim_resource(struct pci_dev *, int);
>
Does this patch work without [PATCH 2/2]? I don't understand who
releases resouces? Does find_free_bus_resource() still release
resources?
Thanks,
Kenji Kaneshige
Kenji Kaneshige wrote:
> Yinghai Lu wrote:
>>
>> move out bus_size_bridges and assign resources out of pciehp_add_bridge()
>> and at last do them all together one time including slot bridge, to
>> avoid to
>> call assign resources several times, when there are several bridges
>> under the
>> slot bridge.
>>
>> need to introduce pci_bridge_assign_resources there.
>>
>> handle the case the slot bridge that doesn't get pre-allocated big
>> enough res
>> from FW.
>> for example pcie devices need 256M, but the bridge only get
>> preallocated 2M...
>>
>> pci_setup_bridge() will take extra check_enabled for the slot bridge,
>> otherwise
>> update res is not updated to bridge BAR. that is bridge is enabled
>> already for
>> port service.
>>
>> -v2: address Alex's concern about pci remove/rescan feature about
>> pci_setup_bridge changes.
>> -v3: Kenji pointed out that pci_config_slot need to be called before
>> pci_bus_add_devices()
>> -v4: move out pci_is_enabled checkout of pci_setup_bridge()
>>
>> Signed-off-by: Yinghai Lu <[email protected]>
>>
>> ---
>> drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++---
>> drivers/pci/setup-bus.c | 73
>> ++++++++++++++++++++++++++++++++++++---
>> include/linux/pci.h | 1 3 files changed, 93
>> insertions(+), 10 deletions(-)
>>
>> Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
>> +++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
>> @@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
>> busnr = pci_scan_bridge(parent, dev, busnr, pass);
>> if (!dev->subordinate)
>> return -1;
>> - pci_bus_size_bridges(dev->subordinate);
>> - pci_bus_assign_resources(parent);
>> - pci_enable_bridges(parent);
>> - pci_bus_add_devices(parent);
>> +
>> return 0;
>> }
>>
>> int pciehp_configure_device(struct slot *p_slot)
>> {
>> struct pci_dev *dev;
>> - struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
>> + struct pci_dev *bridge = p_slot->ctrl->pcie->port;
>> + struct pci_bus *parent = bridge->subordinate;
>> int num, fn;
>> struct controller *ctrl = p_slot->ctrl;
>> + int retval;
>>
>> dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
>> if (dev) {
>> @@ -96,12 +95,30 @@ int pciehp_configure_device(struct slot
>> (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
>> pciehp_add_bridge(dev);
>> }
>> + pci_dev_put(dev);
>> + }
>> +
>> + pci_bus_size_bridges(parent);
>> + pci_clear_master(bridge);
>> + pci_bridge_assign_resources(bridge);
>> + retval = pci_reenable_device(bridge);
>> + pci_set_master(bridge);
>> + pci_enable_bridges(parent);
>> +
>> + for (fn = 0; fn < 8; fn++) {
>> + dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
>> + if (!dev)
>> + continue;
>> + if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
>> + pci_dev_put(dev);
>> + continue;
>> + }
>> pci_configure_slot(dev);
>> pci_dev_put(dev);
>> }
>>
>> - pci_bus_assign_resources(parent);
>> pci_bus_add_devices(parent);
>> +
>> return 0;
>> }
>>
>> Index: linux-2.6/drivers/pci/setup-bus.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/pci/setup-bus.c
>> +++ linux-2.6/drivers/pci/setup-bus.c
>> @@ -27,6 +27,44 @@
>> #include <linux/slab.h>
>> #include "pci.h"
>>
>> +static void pdev_assign_resources_sorted(struct pci_dev *dev)
>> +{
>> + struct resource *res;
>> + struct resource_list head, *list, *tmp;
>> + int idx;
>> + u16 class = dev->class >> 8;
>> +
>> + head.next = NULL;
>> +
>> + /* Don't touch classless devices or host bridges or ioapics. */
>> + if (class == PCI_CLASS_NOT_DEFINED ||
>> + class == PCI_CLASS_BRIDGE_HOST)
>> + return;
>> +
>> + /* Don't touch ioapic devices already enabled by firmware */
>> + if (class == PCI_CLASS_SYSTEM_PIC) {
>> + u16 command;
>> + pci_read_config_word(dev, PCI_COMMAND, &command);
>> + if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
>> + return;
>> + }
>> +
>> + pdev_sort_resources(dev, &head);
>> +
>> + for (list = head.next; list;) {
>> + res = list->res;
>> + idx = res - &list->dev->resource[0];
>> + if (pci_assign_resource(list->dev, idx)) {
>> + res->start = 0;
>> + res->end = 0;
>> + res->flags = 0;
>> + }
>> + tmp = list;
>> + list = list->next;
>> + kfree(tmp);
>> + }
>> +}
>> +
>> static void pbus_assign_resources_sorted(const struct pci_bus *bus)
>> {
>> struct pci_dev *dev;
>> @@ -144,9 +182,6 @@ static void pci_setup_bridge(struct pci_
>> u32 l, bu, lu, io_upper16;
>> int pref_mem64;
>>
>> - if (pci_is_enabled(bridge))
>> - return;
>> -
>> dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
>> pci_domain_nr(bus), bus->number);
>>
>> @@ -541,6 +576,35 @@ void __ref pci_bus_size_bridges(struct p
>> }
>> EXPORT_SYMBOL(pci_bus_size_bridges);
>>
>> +void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
>> +{
>> + struct pci_bus *b;
>> +
>> + pdev_assign_resources_sorted((struct pci_dev *)bridge);
>> +
>> + b = bridge->subordinate;
>> + if (!b)
>> + return;
>> +
>> + pci_bus_assign_resources(b);
>> +
>> + switch (bridge->class >> 8) {
>> + case PCI_CLASS_BRIDGE_PCI:
>> + pci_setup_bridge(b);
>> + break;
>> +
>> + case PCI_CLASS_BRIDGE_CARDBUS:
>> + pci_setup_cardbus(b);
>> + break;
>> +
>> + default:
>> + dev_info(&bridge->dev, "not setting up bridge for bus "
>> + "%04x:%02x\n", pci_domain_nr(b), b->number);
>> + break;
>> + }
>> +}
>> +EXPORT_SYMBOL(pci_bridge_assign_resources);
>> +
>> void __ref pci_bus_assign_resources(const struct pci_bus *bus)
>> {
>> struct pci_bus *b;
>> @@ -557,7 +621,8 @@ void __ref pci_bus_assign_resources(cons
>>
>> switch (dev->class >> 8) {
>> case PCI_CLASS_BRIDGE_PCI:
>> - pci_setup_bridge(b);
>> + if (!pci_is_enabled(dev))
>> + pci_setup_bridge(b);
>> break;
>>
>> case PCI_CLASS_BRIDGE_CARDBUS:
>> Index: linux-2.6/include/linux/pci.h
>> ===================================================================
>> --- linux-2.6.orig/include/linux/pci.h
>> +++ linux-2.6/include/linux/pci.h
>> @@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
>> int pci_vpd_truncate(struct pci_dev *dev, size_t size);
>>
>> /* Helper functions for low-level code
>> (drivers/pci/setup-[bus,res].c) */
>> +void pci_bridge_assign_resources(const struct pci_dev *bridge);
>> void pci_bus_assign_resources(const struct pci_bus *bus);
>> void pci_bus_size_bridges(struct pci_bus *bus);
>> int pci_claim_resource(struct pci_dev *, int);
>>
>
> Does this patch work without [PATCH 2/2]? I don't understand who
> releases resouces? Does find_free_bus_resource() still release
> resources?
need to work with [2/2].
YH
Yinghai Lu wrote:
> Kenji Kaneshige wrote:
>> Yinghai Lu wrote:
>>> move out bus_size_bridges and assign resources out of pciehp_add_bridge()
>>> and at last do them all together one time including slot bridge, to
>>> avoid to
>>> call assign resources several times, when there are several bridges
>>> under the
>>> slot bridge.
>>>
>>> need to introduce pci_bridge_assign_resources there.
>>>
>>> handle the case the slot bridge that doesn't get pre-allocated big
>>> enough res
>>> from FW.
>>> for example pcie devices need 256M, but the bridge only get
>>> preallocated 2M...
>>>
>>> pci_setup_bridge() will take extra check_enabled for the slot bridge,
>>> otherwise
>>> update res is not updated to bridge BAR. that is bridge is enabled
>>> already for
>>> port service.
>>>
>>> -v2: address Alex's concern about pci remove/rescan feature about
>>> pci_setup_bridge changes.
>>> -v3: Kenji pointed out that pci_config_slot need to be called before
>>> pci_bus_add_devices()
>>> -v4: move out pci_is_enabled checkout of pci_setup_bridge()
>>>
>>> Signed-off-by: Yinghai Lu <[email protected]>
>>>
>>> ---
>>> drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++---
>>> drivers/pci/setup-bus.c | 73
>>> ++++++++++++++++++++++++++++++++++++---
>>> include/linux/pci.h | 1 3 files changed, 93
>>> insertions(+), 10 deletions(-)
>>>
>>> Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
>>> ===================================================================
>>> --- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
>>> +++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
>>> @@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
>>> busnr = pci_scan_bridge(parent, dev, busnr, pass);
>>> if (!dev->subordinate)
>>> return -1;
>>> - pci_bus_size_bridges(dev->subordinate);
>>> - pci_bus_assign_resources(parent);
>>> - pci_enable_bridges(parent);
>>> - pci_bus_add_devices(parent);
>>> +
>>> return 0;
>>> }
>>>
>>> int pciehp_configure_device(struct slot *p_slot)
>>> {
>>> struct pci_dev *dev;
>>> - struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
>>> + struct pci_dev *bridge = p_slot->ctrl->pcie->port;
>>> + struct pci_bus *parent = bridge->subordinate;
>>> int num, fn;
>>> struct controller *ctrl = p_slot->ctrl;
>>> + int retval;
>>>
>>> dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
>>> if (dev) {
>>> @@ -96,12 +95,30 @@ int pciehp_configure_device(struct slot
>>> (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
>>> pciehp_add_bridge(dev);
>>> }
>>> + pci_dev_put(dev);
>>> + }
>>> +
>>> + pci_bus_size_bridges(parent);
>>> + pci_clear_master(bridge);
>>> + pci_bridge_assign_resources(bridge);
>>> + retval = pci_reenable_device(bridge);
>>> + pci_set_master(bridge);
>>> + pci_enable_bridges(parent);
>>> +
>>> + for (fn = 0; fn < 8; fn++) {
>>> + dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
>>> + if (!dev)
>>> + continue;
>>> + if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
>>> + pci_dev_put(dev);
>>> + continue;
>>> + }
>>> pci_configure_slot(dev);
>>> pci_dev_put(dev);
>>> }
>>>
>>> - pci_bus_assign_resources(parent);
>>> pci_bus_add_devices(parent);
>>> +
>>> return 0;
>>> }
>>>
>>> Index: linux-2.6/drivers/pci/setup-bus.c
>>> ===================================================================
>>> --- linux-2.6.orig/drivers/pci/setup-bus.c
>>> +++ linux-2.6/drivers/pci/setup-bus.c
>>> @@ -27,6 +27,44 @@
>>> #include <linux/slab.h>
>>> #include "pci.h"
>>>
>>> +static void pdev_assign_resources_sorted(struct pci_dev *dev)
>>> +{
>>> + struct resource *res;
>>> + struct resource_list head, *list, *tmp;
>>> + int idx;
>>> + u16 class = dev->class >> 8;
>>> +
>>> + head.next = NULL;
>>> +
>>> + /* Don't touch classless devices or host bridges or ioapics. */
>>> + if (class == PCI_CLASS_NOT_DEFINED ||
>>> + class == PCI_CLASS_BRIDGE_HOST)
>>> + return;
>>> +
>>> + /* Don't touch ioapic devices already enabled by firmware */
>>> + if (class == PCI_CLASS_SYSTEM_PIC) {
>>> + u16 command;
>>> + pci_read_config_word(dev, PCI_COMMAND, &command);
>>> + if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
>>> + return;
>>> + }
>>> +
>>> + pdev_sort_resources(dev, &head);
>>> +
>>> + for (list = head.next; list;) {
>>> + res = list->res;
>>> + idx = res - &list->dev->resource[0];
>>> + if (pci_assign_resource(list->dev, idx)) {
>>> + res->start = 0;
>>> + res->end = 0;
>>> + res->flags = 0;
>>> + }
>>> + tmp = list;
>>> + list = list->next;
>>> + kfree(tmp);
>>> + }
>>> +}
>>> +
>>> static void pbus_assign_resources_sorted(const struct pci_bus *bus)
>>> {
>>> struct pci_dev *dev;
>>> @@ -144,9 +182,6 @@ static void pci_setup_bridge(struct pci_
>>> u32 l, bu, lu, io_upper16;
>>> int pref_mem64;
>>>
>>> - if (pci_is_enabled(bridge))
>>> - return;
>>> -
>>> dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
>>> pci_domain_nr(bus), bus->number);
>>>
>>> @@ -541,6 +576,35 @@ void __ref pci_bus_size_bridges(struct p
>>> }
>>> EXPORT_SYMBOL(pci_bus_size_bridges);
>>>
>>> +void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
>>> +{
>>> + struct pci_bus *b;
>>> +
>>> + pdev_assign_resources_sorted((struct pci_dev *)bridge);
>>> +
>>> + b = bridge->subordinate;
>>> + if (!b)
>>> + return;
>>> +
>>> + pci_bus_assign_resources(b);
>>> +
>>> + switch (bridge->class >> 8) {
>>> + case PCI_CLASS_BRIDGE_PCI:
>>> + pci_setup_bridge(b);
>>> + break;
>>> +
>>> + case PCI_CLASS_BRIDGE_CARDBUS:
>>> + pci_setup_cardbus(b);
>>> + break;
>>> +
>>> + default:
>>> + dev_info(&bridge->dev, "not setting up bridge for bus "
>>> + "%04x:%02x\n", pci_domain_nr(b), b->number);
>>> + break;
>>> + }
>>> +}
>>> +EXPORT_SYMBOL(pci_bridge_assign_resources);
>>> +
>>> void __ref pci_bus_assign_resources(const struct pci_bus *bus)
>>> {
>>> struct pci_bus *b;
>>> @@ -557,7 +621,8 @@ void __ref pci_bus_assign_resources(cons
>>>
>>> switch (dev->class >> 8) {
>>> case PCI_CLASS_BRIDGE_PCI:
>>> - pci_setup_bridge(b);
>>> + if (!pci_is_enabled(dev))
>>> + pci_setup_bridge(b);
>>> break;
>>>
>>> case PCI_CLASS_BRIDGE_CARDBUS:
>>> Index: linux-2.6/include/linux/pci.h
>>> ===================================================================
>>> --- linux-2.6.orig/include/linux/pci.h
>>> +++ linux-2.6/include/linux/pci.h
>>> @@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
>>> int pci_vpd_truncate(struct pci_dev *dev, size_t size);
>>>
>>> /* Helper functions for low-level code
>>> (drivers/pci/setup-[bus,res].c) */
>>> +void pci_bridge_assign_resources(const struct pci_dev *bridge);
>>> void pci_bus_assign_resources(const struct pci_bus *bus);
>>> void pci_bus_size_bridges(struct pci_bus *bus);
>>> int pci_claim_resource(struct pci_dev *, int);
>>>
>> Does this patch work without [PATCH 2/2]? I don't understand who
>> releases resouces? Does find_free_bus_resource() still release
>> resources?
>
> need to work with [2/2].
>
Ok. Could you rearrange the set of patches with the right pieces and
with the right order? It's very difficult for me to understand and
review the current patches.
By the way, is release_resource() in find_free_bus_resource() already
removed?
Thanks,
Kenji Kaneshige
Kenji Kaneshige wrote:
> Yinghai Lu wrote:
>> Kenji Kaneshige wrote:
>>> Yinghai Lu wrote:
>>>> move out bus_size_bridges and assign resources out of
>>>> pciehp_add_bridge()
>>>> and at last do them all together one time including slot bridge, to
>>>> avoid to
>>>> call assign resources several times, when there are several bridges
>>>> under the
>>>> slot bridge.
>>>>
>>>> need to introduce pci_bridge_assign_resources there.
>>>>
>>>> handle the case the slot bridge that doesn't get pre-allocated big
>>>> enough res
>>>> from FW.
>>>> for example pcie devices need 256M, but the bridge only get
>>>> preallocated 2M...
>>>>
>>>> pci_setup_bridge() will take extra check_enabled for the slot bridge,
>>>> otherwise
>>>> update res is not updated to bridge BAR. that is bridge is enabled
>>>> already for
>>>> port service.
>>>>
>>>> -v2: address Alex's concern about pci remove/rescan feature about
>>>> pci_setup_bridge changes.
>>>> -v3: Kenji pointed out that pci_config_slot need to be called before
>>>> pci_bus_add_devices()
>>>> -v4: move out pci_is_enabled checkout of pci_setup_bridge()
>>>>
>>>> Signed-off-by: Yinghai Lu <[email protected]>
>>>>
>>>> ---
>>>> drivers/pci/hotplug/pciehp_pci.c | 29 ++++++++++++---
>>>> drivers/pci/setup-bus.c | 73
>>>> ++++++++++++++++++++++++++++++++++++---
>>>> include/linux/pci.h | 1 3 files changed, 93
>>>> insertions(+), 10 deletions(-)
>>>>
>>>> Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
>>>> ===================================================================
>>>> --- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
>>>> +++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
>>>> @@ -53,19 +53,18 @@ static int __ref pciehp_add_bridge(struc
>>>> busnr = pci_scan_bridge(parent, dev, busnr, pass);
>>>> if (!dev->subordinate)
>>>> return -1;
>>>> - pci_bus_size_bridges(dev->subordinate);
>>>> - pci_bus_assign_resources(parent);
>>>> - pci_enable_bridges(parent);
>>>> - pci_bus_add_devices(parent);
>>>> +
>>>> return 0;
>>>> }
>>>>
>>>> int pciehp_configure_device(struct slot *p_slot)
>>>> {
>>>> struct pci_dev *dev;
>>>> - struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
>>>> + struct pci_dev *bridge = p_slot->ctrl->pcie->port;
>>>> + struct pci_bus *parent = bridge->subordinate;
>>>> int num, fn;
>>>> struct controller *ctrl = p_slot->ctrl;
>>>> + int retval;
>>>>
>>>> dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
>>>> if (dev) {
>>>> @@ -96,12 +95,30 @@ int pciehp_configure_device(struct slot
>>>> (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
>>>> pciehp_add_bridge(dev);
>>>> }
>>>> + pci_dev_put(dev);
>>>> + }
>>>> +
>>>> + pci_bus_size_bridges(parent);
>>>> + pci_clear_master(bridge);
>>>> + pci_bridge_assign_resources(bridge);
>>>> + retval = pci_reenable_device(bridge);
>>>> + pci_set_master(bridge);
>>>> + pci_enable_bridges(parent);
>>>> +
>>>> + for (fn = 0; fn < 8; fn++) {
>>>> + dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
>>>> + if (!dev)
>>>> + continue;
>>>> + if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
>>>> + pci_dev_put(dev);
>>>> + continue;
>>>> + }
>>>> pci_configure_slot(dev);
>>>> pci_dev_put(dev);
>>>> }
>>>>
>>>> - pci_bus_assign_resources(parent);
>>>> pci_bus_add_devices(parent);
>>>> +
>>>> return 0;
>>>> }
>>>>
>>>> Index: linux-2.6/drivers/pci/setup-bus.c
>>>> ===================================================================
>>>> --- linux-2.6.orig/drivers/pci/setup-bus.c
>>>> +++ linux-2.6/drivers/pci/setup-bus.c
>>>> @@ -27,6 +27,44 @@
>>>> #include <linux/slab.h>
>>>> #include "pci.h"
>>>>
>>>> +static void pdev_assign_resources_sorted(struct pci_dev *dev)
>>>> +{
>>>> + struct resource *res;
>>>> + struct resource_list head, *list, *tmp;
>>>> + int idx;
>>>> + u16 class = dev->class >> 8;
>>>> +
>>>> + head.next = NULL;
>>>> +
>>>> + /* Don't touch classless devices or host bridges or ioapics. */
>>>> + if (class == PCI_CLASS_NOT_DEFINED ||
>>>> + class == PCI_CLASS_BRIDGE_HOST)
>>>> + return;
>>>> +
>>>> + /* Don't touch ioapic devices already enabled by firmware */
>>>> + if (class == PCI_CLASS_SYSTEM_PIC) {
>>>> + u16 command;
>>>> + pci_read_config_word(dev, PCI_COMMAND, &command);
>>>> + if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
>>>> + return;
>>>> + }
>>>> +
>>>> + pdev_sort_resources(dev, &head);
>>>> +
>>>> + for (list = head.next; list;) {
>>>> + res = list->res;
>>>> + idx = res - &list->dev->resource[0];
>>>> + if (pci_assign_resource(list->dev, idx)) {
>>>> + res->start = 0;
>>>> + res->end = 0;
>>>> + res->flags = 0;
>>>> + }
>>>> + tmp = list;
>>>> + list = list->next;
>>>> + kfree(tmp);
>>>> + }
>>>> +}
>>>> +
>>>> static void pbus_assign_resources_sorted(const struct pci_bus *bus)
>>>> {
>>>> struct pci_dev *dev;
>>>> @@ -144,9 +182,6 @@ static void pci_setup_bridge(struct pci_
>>>> u32 l, bu, lu, io_upper16;
>>>> int pref_mem64;
>>>>
>>>> - if (pci_is_enabled(bridge))
>>>> - return;
>>>> -
>>>> dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
>>>> pci_domain_nr(bus), bus->number);
>>>>
>>>> @@ -541,6 +576,35 @@ void __ref pci_bus_size_bridges(struct p
>>>> }
>>>> EXPORT_SYMBOL(pci_bus_size_bridges);
>>>>
>>>> +void __ref pci_bridge_assign_resources(const struct pci_dev *bridge)
>>>> +{
>>>> + struct pci_bus *b;
>>>> +
>>>> + pdev_assign_resources_sorted((struct pci_dev *)bridge);
>>>> +
>>>> + b = bridge->subordinate;
>>>> + if (!b)
>>>> + return;
>>>> +
>>>> + pci_bus_assign_resources(b);
>>>> +
>>>> + switch (bridge->class >> 8) {
>>>> + case PCI_CLASS_BRIDGE_PCI:
>>>> + pci_setup_bridge(b);
>>>> + break;
>>>> +
>>>> + case PCI_CLASS_BRIDGE_CARDBUS:
>>>> + pci_setup_cardbus(b);
>>>> + break;
>>>> +
>>>> + default:
>>>> + dev_info(&bridge->dev, "not setting up bridge for bus "
>>>> + "%04x:%02x\n", pci_domain_nr(b), b->number);
>>>> + break;
>>>> + }
>>>> +}
>>>> +EXPORT_SYMBOL(pci_bridge_assign_resources);
>>>> +
>>>> void __ref pci_bus_assign_resources(const struct pci_bus *bus)
>>>> {
>>>> struct pci_bus *b;
>>>> @@ -557,7 +621,8 @@ void __ref pci_bus_assign_resources(cons
>>>>
>>>> switch (dev->class >> 8) {
>>>> case PCI_CLASS_BRIDGE_PCI:
>>>> - pci_setup_bridge(b);
>>>> + if (!pci_is_enabled(dev))
>>>> + pci_setup_bridge(b);
>>>> break;
>>>>
>>>> case PCI_CLASS_BRIDGE_CARDBUS:
>>>> Index: linux-2.6/include/linux/pci.h
>>>> ===================================================================
>>>> --- linux-2.6.orig/include/linux/pci.h
>>>> +++ linux-2.6/include/linux/pci.h
>>>> @@ -756,6 +756,7 @@ ssize_t pci_write_vpd(struct pci_dev *de
>>>> int pci_vpd_truncate(struct pci_dev *dev, size_t size);
>>>>
>>>> /* Helper functions for low-level code
>>>> (drivers/pci/setup-[bus,res].c) */
>>>> +void pci_bridge_assign_resources(const struct pci_dev *bridge);
>>>> void pci_bus_assign_resources(const struct pci_bus *bus);
>>>> void pci_bus_size_bridges(struct pci_bus *bus);
>>>> int pci_claim_resource(struct pci_dev *, int);
>>>>
>>> Does this patch work without [PATCH 2/2]? I don't understand who
>>> releases resouces? Does find_free_bus_resource() still release
>>> resources?
>>
>> need to work with [2/2].
>>
>
> Ok. Could you rearrange the set of patches with the right pieces and
> with the right order? It's very difficult for me to understand and
> review the current patches.
ok
>
> By the way, is release_resource() in find_free_bus_resource() already
> removed?
YES. Jesse sent request to Linus to revert that.
YH
Eric W. Biederman wrote:
> Yinghai Lu <[email protected]> writes:
>> after closing look up the code, it looks it will not break your setup.
>>
>> 1. before the patches:
>> a. when master card is inserted, all bridge in that card will get assigned with min_size
>> b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>>
>> 2. after the patches: v5
>> a. booted up, all leaf bridge mmio get clearred.
>> b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
>> c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>>
>> can you check those two patches in your setup to verify it?
>
> I have a much simpler case I will break, as I tried something similar by accident.
which kernel version?
>
> AMD cpu MCP55 with one pcie port setup as hotplug.
> The system only has 2GB of RAM. So plenty of space for pcie devices.
one or two ht chains?
do you still have lspci -tv with it?
>
> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
> Reads from the bar of the hotplugged device work
> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
>
> So I had to have the firmware make the assignment, because only it knows the
> details of the hidden AMD bar registers for each hypertransport chain etc.
that mean kernel doesn't get peer root bus res probed properly
YH
only for index < 3
v2: Jesse doesn't like it is in find_free_bus_resource...
try to move out of pci_bus_size_bridges loop.
need to apply after:
[PATCH] pci: pciehp update the slot bridge res to get big range for pcie devices - v4
v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
only clear release those res for x86.
v4: Bjorn want to release use dev instead of bus.
v5: Kenji pointed out it will have problem with several level bridge.
so let only handle leaf bridge.
v6: address Kenji's request (new pci_bus_release...). and change applying order
move back release to pci_assign_unassigned_resource
Signed-off-by: Yinghai Lu <[email protected]>
---
drivers/pci/setup-bus.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++-
include/linux/pci.h | 1
2 files changed, 65 insertions(+), 1 deletion(-)
Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -573,13 +573,68 @@ void __ref pci_bus_assign_resources(cons
}
EXPORT_SYMBOL(pci_bus_assign_resources);
+static void pci_bridge_release_not_used_res(struct pci_bus *bus)
+{
+ int idx;
+ bool changed = false;
+ struct pci_dev *dev;
+ struct resource *r;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ /* for pci bridges res only */
+ dev = bus->self;
+ for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES + 3;
+ idx++) {
+ r = &dev->resource[idx];
+ if (!(r->flags & type_mask))
+ continue;
+ if (!r->parent)
+ continue;
+ /* if there is no child under that, we should release it */
+ if (r->child)
+ continue;
+ if (!release_resource(r)) {
+ dev_info(&dev->dev, "resource %d %pRt released\n",
+ idx, r);
+ r->flags = 0;
+ changed = true;
+ }
+ }
+
+ if (changed)
+ pci_setup_bridge(bus);
+}
+
+void __ref pci_bus_release_bridges_not_used_res(struct pci_bus *bus)
+{
+ struct pci_bus *b;
+
+ /* If the bus is cardbus, do nothing */
+ if (bus->self && (bus->self->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)
+ return;
+
+ /* We only release the resources under the leaf bridge */
+ if (list_empty(&bus->children)) {
+ if (!pci_is_root_bus(bus))
+ pci_bridge_release_not_used_res(bus);
+ return;
+ }
+
+ /* Scan child buses if the bus is not leaf */
+ list_for_each_entry(b, &bus->children, node)
+ pci_bus_release_bridges_not_used_res(b);
+}
+EXPORT_SYMBOL(pci_bus_release_bridges_not_used_res);
+
static void pci_bus_dump_res(struct pci_bus *bus)
{
int i;
for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
struct resource *res = bus->resource[i];
- if (!res || !res->end)
+
+ if (!res || !res->end || !res->flags)
continue;
dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pRt\n", i, res);
@@ -608,6 +663,14 @@ pci_assign_unassigned_resources(void)
{
struct pci_bus *bus;
+ /*
+ * Try to release leaf bridge's resources that there is not child
+ * under it
+ */
+ list_for_each_entry(bus, &pci_root_buses, node) {
+ pci_bus_release_bridges_not_used_res(bus);
+ }
+
/* Depth first, calculate sizes and alignments of all
subordinate buses. */
list_for_each_entry(bus, &pci_root_buses, node) {
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -758,6 +758,7 @@ int pci_vpd_truncate(struct pci_dev *dev
/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
void pci_bus_assign_resources(const struct pci_bus *bus);
void pci_bus_size_bridges(struct pci_bus *bus);
+void pci_bus_release_bridges_not_used_res(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);
void pci_assign_unassigned_resources(void);
void pdev_enable_device(struct pci_dev *);
On Thu, 2009-10-29 at 01:16 -0700, Eric W. Biederman wrote:
> Yinghai Lu <[email protected]> writes:
> >
> > after closing look up the code, it looks it will not break your setup.
> >
> > 1. before the patches:
> > a. when master card is inserted, all bridge in that card will get assigned with min_size
> > b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
> >
> > 2. after the patches: v5
> > a. booted up, all leaf bridge mmio get clearred.
> > b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
> > c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
> >
> > can you check those two patches in your setup to verify it?
>
> I have a much simpler case I will break, as I tried something similar by accident.
>
> AMD cpu MCP55 with one pcie port setup as hotplug.
> The system only has 2GB of RAM. So plenty of space for pcie devices.
>
> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
> Reads from the bar of the hotplugged device work
> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
>
> So I had to have the firmware make the assignment, because only it knows the
> details of the hidden AMD bar registers for each hypertransport chain etc.
Do you mean you had to have firmware program a hot-added device, or just
that firmware had to program the apertures of the root port that was
present at boot, even though it had no devices below it?
Firmware normally supplies ACPI _CRS information that tells us how it
programmed the host bridge windows. On x86, Linux normally ignores that
and just assumes a range based on memory size. If we paid attention to
it (as with "pci=use_crs"), it's likely that we could do a better job of
doing this setup.
Or, of course, we could add a Linux driver that knows about "the hidden
AMD bar registers." But I think that should be a last resort, for when
firmware supplied incorrect _CRS information.
> I don't see how throwing away the work that the firmware has done in
> preallocation is something that we can afford to do in general if what
> the firmware has done works for us.
>
> Eric
Bjorn Helgaas <[email protected]> writes:
> On Thu, 2009-10-29 at 01:16 -0700, Eric W. Biederman wrote:
>> Yinghai Lu <[email protected]> writes:
>> >
>> > after closing look up the code, it looks it will not break your setup.
>> >
>> > 1. before the patches:
>> > a. when master card is inserted, all bridge in that card will get assigned with min_size
>> > b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>> >
>> > 2. after the patches: v5
>> > a. booted up, all leaf bridge mmio get clearred.
>> > b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
>> > c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>> >
>> > can you check those two patches in your setup to verify it?
>>
>> I have a much simpler case I will break, as I tried something similar by accident.
>>
>> AMD cpu MCP55 with one pcie port setup as hotplug.
>> The system only has 2GB of RAM. So plenty of space for pcie devices.
>>
>> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
>> Reads from the bar of the hotplugged device work
>> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
>>
>> So I had to have the firmware make the assignment, because only it knows the
>> details of the hidden AMD bar registers for each hypertransport chain etc.
>
> Do you mean you had to have firmware program a hot-added device, or just
> that firmware had to program the apertures of the root port that was
> present at boot, even though it had no devices below it?
Firmware had to program the apertures of the root port that was present
at boot, even though it had no devices below it.
> Firmware normally supplies ACPI _CRS information that tells us how it
> programmed the host bridge windows. On x86, Linux normally ignores that
> and just assumes a range based on memory size. If we paid attention to
> it (as with "pci=use_crs"), it's likely that we could do a better job of
> doing this setup.
>
> Or, of course, we could add a Linux driver that knows about "the hidden
> AMD bar registers." But I think that should be a last resort, for when
> firmware supplied incorrect _CRS information.
In this case there was no ACPI, and even if there was correct _CRS information
it would have said only those addresses routed to bars/apertures on the
root bridge was routed to the MCP55. So while it looked like we had gobs
of unallocated space we could use. In practice we did not.
Eric
On Thu, 2009-10-29 at 08:13 -0700, Eric W. Biederman wrote:
> Bjorn Helgaas <[email protected]> writes:
>
> > On Thu, 2009-10-29 at 01:16 -0700, Eric W. Biederman wrote:
> >> Yinghai Lu <[email protected]> writes:
> >> >
> >> > after closing look up the code, it looks it will not break your setup.
> >> >
> >> > 1. before the patches:
> >> > a. when master card is inserted, all bridge in that card will get assigned with min_size
> >> > b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
> >> >
> >> > 2. after the patches: v5
> >> > a. booted up, all leaf bridge mmio get clearred.
> >> > b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
> >> > c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
> >> >
> >> > can you check those two patches in your setup to verify it?
> >>
> >> I have a much simpler case I will break, as I tried something similar by accident.
> >>
> >> AMD cpu MCP55 with one pcie port setup as hotplug.
> >> The system only has 2GB of RAM. So plenty of space for pcie devices.
> >>
> >> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
> >> Reads from the bar of the hotplugged device work
> >> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
> >>
> >> So I had to have the firmware make the assignment, because only it knows the
> >> details of the hidden AMD bar registers for each hypertransport chain etc.
> >
> > Do you mean you had to have firmware program a hot-added device, or just
> > that firmware had to program the apertures of the root port that was
> > present at boot, even though it had no devices below it?
>
> Firmware had to program the apertures of the root port that was present
> at boot, even though it had no devices below it.
>
> > Firmware normally supplies ACPI _CRS information that tells us how it
> > programmed the host bridge windows. On x86, Linux normally ignores that
> > and just assumes a range based on memory size. If we paid attention to
> > it (as with "pci=use_crs"), it's likely that we could do a better job of
> > doing this setup.
> >
> > Or, of course, we could add a Linux driver that knows about "the hidden
> > AMD bar registers." But I think that should be a last resort, for when
> > firmware supplied incorrect _CRS information.
>
> In this case there was no ACPI, and even if there was correct _CRS information
> it would have said only those addresses routed to bars/apertures on the
> root bridge was routed to the MCP55. So while it looked like we had gobs
> of unallocated space we could use. In practice we did not.
I know this is a hypothetical case since you don't have ACPI, but I'm
curious about this.
I assume the magic AMD BARs only affect the host bridge, and that the
downstream root ports look like standard PCI-to-PCI bridges. If that's
the case, and if we have correct descriptions of the host bridge
apertures, Linux should theoretically be able to do as well as firmware.
But you seem to be suggesting that even with a correct host bridge
description, there's space that *looks* available but is not. I don't
understand how this can be.
Bjorn
Yinghai Lu <[email protected]> writes:
> Eric W. Biederman wrote:
>> Yinghai Lu <[email protected]> writes:
>>> after closing look up the code, it looks it will not break your setup.
>>>
>>> 1. before the patches:
>>> a. when master card is inserted, all bridge in that card will get assigned with min_size
>>> b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>>>
>>> 2. after the patches: v5
>>> a. booted up, all leaf bridge mmio get clearred.
>>> b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
>>> c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>>>
>>> can you check those two patches in your setup to verify it?
>>
>> I have a much simpler case I will break, as I tried something similar by accident.
> which kernel version?
>>
>> AMD cpu MCP55 with one pcie port setup as hotplug.
>> The system only has 2GB of RAM. So plenty of space for pcie devices.
>
> one or two ht chains?
One chain.
> do you still have lspci -tv with it?
>
>>
>> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
>> Reads from the bar of the hotplugged device work
>> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
>>
>> So I had to have the firmware make the assignment, because only it knows the
>> details of the hidden AMD bar registers for each hypertransport chain etc.
>
> that mean kernel doesn't get peer root bus res probed properly
How do you do that without having drivers for the peer root bus?
Eric
On Thu, 29 Oct 2009 02:52:00 -0700
Yinghai Lu <[email protected]> wrote:
>
> only for index < 3
>
> v2: Jesse doesn't like it is in find_free_bus_resource...
> try to move out of pci_bus_size_bridges loop.
> need to apply after:
> [PATCH] pci: pciehp update the slot bridge res to get big
> range for pcie devices - v4 v3: add pci_setup_bridge calling after
> pci_bridge_release_not_used_res. only clear release those res for x86.
> v4: Bjorn want to release use dev instead of bus.
> v5: Kenji pointed out it will have problem with several level bridge.
> so let only handle leaf bridge.
> v6: address Kenji's request (new pci_bus_release...). and change
> applying order move back release to pci_assign_unassigned_resource
>
> Signed-off-by: Yinghai Lu <[email protected]>
Starting to look better...
>
> ---
> drivers/pci/setup-bus.c | 65
> +++++++++++++++++++++++++++++++++++++++++++++++-
> include/linux/pci.h | 1 2 files changed, 65 insertions(+), 1
> deletion(-)
>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -573,13 +573,68 @@ void __ref pci_bus_assign_resources(cons
> }
> EXPORT_SYMBOL(pci_bus_assign_resources);
>
> +static void pci_bridge_release_not_used_res(struct pci_bus *bus)
Can we call these functions something else? Maybe
pci_bridge_release_unused() with a kdoc comment describing exactly what
it does and why?
> +{
> + int idx;
> + bool changed = false;
> + struct pci_dev *dev;
> + struct resource *r;
> + unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> + IORESOURCE_PREFETCH;
> +
> + /* for pci bridges res only */
> + dev = bus->self;
> + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES
> + 3;
> + idx++) {
> + r = &dev->resource[idx];
> + if (!(r->flags & type_mask))
> + continue;
> + if (!r->parent)
> + continue;
> + /* if there is no child under that, we should
> release it */
> + if (r->child)
> + continue;
> + if (!release_resource(r)) {
> + dev_info(&dev->dev, "resource %d %pRt
> released\n",
> + idx, r);
> + r->flags = 0;
> + changed = true;
> + }
> + }
> +
> + if (changed)
> + pci_setup_bridge(bus);
> +}
> +
> +void __ref pci_bus_release_bridges_not_used_res(struct pci_bus *bus)
Likewise, maybe pci_bus_release_unused_bridge_res or something, with
comments again.
> +{
> + struct pci_bus *b;
> +
> + /* If the bus is cardbus, do nothing */
> + if (bus->self && (bus->self->class >> 8) ==
> PCI_CLASS_BRIDGE_CARDBUS)
> + return;
> +
> + /* We only release the resources under the leaf bridge */
> + if (list_empty(&bus->children)) {
> + if (!pci_is_root_bus(bus))
> + pci_bridge_release_not_used_res(bus);
> + return;
> + }
> +
> + /* Scan child buses if the bus is not leaf */
> + list_for_each_entry(b, &bus->children, node)
> + pci_bus_release_bridges_not_used_res(b);
> +}
> +EXPORT_SYMBOL(pci_bus_release_bridges_not_used_res);
> +
> static void pci_bus_dump_res(struct pci_bus *bus)
> {
> int i;
>
> for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
> struct resource *res = bus->resource[i];
> - if (!res || !res->end)
> +
> + if (!res || !res->end || !res->flags)
> continue;
>
> dev_printk(KERN_DEBUG, &bus->dev, "resource %d
> %pRt\n", i, res); @@ -608,6 +663,14 @@
> pci_assign_unassigned_resources(void) {
> struct pci_bus *bus;
>
> + /*
> + * Try to release leaf bridge's resources that there is not
> child
> + * under it
> + */
> + list_for_each_entry(bus, &pci_root_buses, node) {
> + pci_bus_release_bridges_not_used_res(bus);
> + }
> +
Also I wonder if we need to make this a command line option that isn't
run by default?
Really, as Eric says, we need a real dynamic allocation system with the
ability to move devices around at some point. Any volunteers? :)
--
Jesse Barnes, Intel Open Source Technology Center
Eric W. Biederman wrote:
> Yinghai Lu <[email protected]> writes:
>
>> Eric W. Biederman wrote:
>>> Yinghai Lu <[email protected]> writes:
>>>> after closing look up the code, it looks it will not break your setup.
>>>>
>>>> 1. before the patches:
>>>> a. when master card is inserted, all bridge in that card will get assigned with min_size
>>>> b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>>>>
>>>> 2. after the patches: v5
>>>> a. booted up, all leaf bridge mmio get clearred.
>>>> b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
>>>> c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>>>>
>>>> can you check those two patches in your setup to verify it?
>>> I have a much simpler case I will break, as I tried something similar by accident.
>> which kernel version?
>>> AMD cpu MCP55 with one pcie port setup as hotplug.
>>> The system only has 2GB of RAM. So plenty of space for pcie devices.
>> one or two ht chains?
>
> One chain.
>
>> do you still have lspci -tv with it?
>>
>>> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
>>> Reads from the bar of the hotplugged device work
>>> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
>>>
>>> So I had to have the firmware make the assignment, because only it knows the
>>> details of the hidden AMD bar registers for each hypertransport chain etc.
>> that mean kernel doesn't get peer root bus res probed properly
>
> How do you do that without having drivers for the peer root bus?
we have amd_bus.c to handle amd k8 system with two chains. but one chain is skipped.
(wonder if need to reenable that for one chain k8 system)
another intel_bus.c is on the way to 2.6.33.
when use_crs is used, those info from pci conf space is not used but just print out for check if _CRS is right or not.
YH
Jesse Barnes wrote:
> On Thu, 29 Oct 2009 02:52:00 -0700
> Yinghai Lu <[email protected]> wrote:
>
>> only for index < 3
>>
>> v2: Jesse doesn't like it is in find_free_bus_resource...
>> try to move out of pci_bus_size_bridges loop.
>> need to apply after:
>> [PATCH] pci: pciehp update the slot bridge res to get big
>> range for pcie devices - v4 v3: add pci_setup_bridge calling after
>> pci_bridge_release_not_used_res. only clear release those res for x86.
>> v4: Bjorn want to release use dev instead of bus.
>> v5: Kenji pointed out it will have problem with several level bridge.
>> so let only handle leaf bridge.
>> v6: address Kenji's request (new pci_bus_release...). and change
>> applying order move back release to pci_assign_unassigned_resource
>>
>> Signed-off-by: Yinghai Lu <[email protected]>
>
> Starting to look better...
>
>> ---
>> drivers/pci/setup-bus.c | 65
>> +++++++++++++++++++++++++++++++++++++++++++++++-
>> include/linux/pci.h | 1 2 files changed, 65 insertions(+), 1
>> deletion(-)
>>
>> Index: linux-2.6/drivers/pci/setup-bus.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/pci/setup-bus.c
>> +++ linux-2.6/drivers/pci/setup-bus.c
>> @@ -573,13 +573,68 @@ void __ref pci_bus_assign_resources(cons
>> }
>> EXPORT_SYMBOL(pci_bus_assign_resources);
>>
>> +static void pci_bridge_release_not_used_res(struct pci_bus *bus)
>
> Can we call these functions something else? Maybe
> pci_bridge_release_unused() with a kdoc comment describing exactly what
> it does and why?
ok.
>
>> +{
>> + int idx;
>> + bool changed = false;
>> + struct pci_dev *dev;
>> + struct resource *r;
>> + unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
>> + IORESOURCE_PREFETCH;
>> +
>> + /* for pci bridges res only */
>> + dev = bus->self;
>> + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES
>> + 3;
>> + idx++) {
>> + r = &dev->resource[idx];
>> + if (!(r->flags & type_mask))
>> + continue;
>> + if (!r->parent)
>> + continue;
>> + /* if there is no child under that, we should
>> release it */
>> + if (r->child)
>> + continue;
>> + if (!release_resource(r)) {
>> + dev_info(&dev->dev, "resource %d %pRt
>> released\n",
>> + idx, r);
>> + r->flags = 0;
>> + changed = true;
>> + }
>> + }
>> +
>> + if (changed)
>> + pci_setup_bridge(bus);
>> +}
>> +
>> +void __ref pci_bus_release_bridges_not_used_res(struct pci_bus *bus)
>
> Likewise, maybe pci_bus_release_unused_bridge_res or something, with
> comments again.
ok
>
>> +{
>> + struct pci_bus *b;
>> +
>> + /* If the bus is cardbus, do nothing */
>> + if (bus->self && (bus->self->class >> 8) ==
>> PCI_CLASS_BRIDGE_CARDBUS)
>> + return;
>> +
>> + /* We only release the resources under the leaf bridge */
>> + if (list_empty(&bus->children)) {
>> + if (!pci_is_root_bus(bus))
>> + pci_bridge_release_not_used_res(bus);
>> + return;
>> + }
>> +
>> + /* Scan child buses if the bus is not leaf */
>> + list_for_each_entry(b, &bus->children, node)
>> + pci_bus_release_bridges_not_used_res(b);
>> +}
>> +EXPORT_SYMBOL(pci_bus_release_bridges_not_used_res);
>> +
>> static void pci_bus_dump_res(struct pci_bus *bus)
>> {
>> int i;
>>
>> for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
>> struct resource *res = bus->resource[i];
>> - if (!res || !res->end)
>> +
>> + if (!res || !res->end || !res->flags)
>> continue;
>>
>> dev_printk(KERN_DEBUG, &bus->dev, "resource %d
>> %pRt\n", i, res); @@ -608,6 +663,14 @@
>> pci_assign_unassigned_resources(void) {
>> struct pci_bus *bus;
>>
>> + /*
>> + * Try to release leaf bridge's resources that there is not
>> child
>> + * under it
>> + */
>> + list_for_each_entry(bus, &pci_root_buses, node) {
>> + pci_bus_release_bridges_not_used_res(bus);
>> + }
>> +
>
> Also I wonder if we need to make this a command line option that isn't
> run by default?
thinking:
1. try not to touch bridge resource and do the allocation and record the fail device
2. release leaf bridge for those failed device
3. do second around bus_size and allocation.
>
> Really, as Eric says, we need a real dynamic allocation system with the
> ability to move devices around at some point. Any volunteers? :)
that will need suspend driver and rescan allocation and wakeup driver...
YH
On Thu, 29 Oct 2009 10:10:22 -0700
Yinghai Lu <[email protected]> wrote:
> > Also I wonder if we need to make this a command line option that
> > isn't run by default?
>
> thinking:
> 1. try not to touch bridge resource and do the allocation and record
> the fail device 2. release leaf bridge for those failed device
> 3. do second around bus_size and allocation.
Yeah, that would make it more automatic at least, which would be nice.
> > Really, as Eric says, we need a real dynamic allocation system with
> > the ability to move devices around at some point. Any
> > volunteers? :)
>
> that will need suspend driver and rescan allocation and wakeup
> driver...
Right, it's fairly invasive. But it's the right direction to go long
term.
--
Jesse Barnes, Intel Open Source Technology Center
Bjorn Helgaas <[email protected]> writes:
> On Thu, 2009-10-29 at 08:13 -0700, Eric W. Biederman wrote:
>> Bjorn Helgaas <[email protected]> writes:
>>
>> > On Thu, 2009-10-29 at 01:16 -0700, Eric W. Biederman wrote:
>> >> Yinghai Lu <[email protected]> writes:
>> >> >
>> >> > after closing look up the code, it looks it will not break your setup.
>> >> >
>> >> > 1. before the patches:
>> >> > a. when master card is inserted, all bridge in that card will get assigned with min_size
>> >> > b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>> >> >
>> >> > 2. after the patches: v5
>> >> > a. booted up, all leaf bridge mmio get clearred.
>> >> > b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
>> >> > c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>> >> >
>> >> > can you check those two patches in your setup to verify it?
>> >>
>> >> I have a much simpler case I will break, as I tried something similar by accident.
>> >>
>> >> AMD cpu MCP55 with one pcie port setup as hotplug.
>> >> The system only has 2GB of RAM. So plenty of space for pcie devices.
>> >>
>> >> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
>> >> Reads from the bar of the hotplugged device work
>> >> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
>> >>
>> >> So I had to have the firmware make the assignment, because only it knows the
>> >> details of the hidden AMD bar registers for each hypertransport chain etc.
>> >
>> > Do you mean you had to have firmware program a hot-added device, or just
>> > that firmware had to program the apertures of the root port that was
>> > present at boot, even though it had no devices below it?
>>
>> Firmware had to program the apertures of the root port that was present
>> at boot, even though it had no devices below it.
>>
>> > Firmware normally supplies ACPI _CRS information that tells us how it
>> > programmed the host bridge windows. On x86, Linux normally ignores that
>> > and just assumes a range based on memory size. If we paid attention to
>> > it (as with "pci=use_crs"), it's likely that we could do a better job of
>> > doing this setup.
>> >
>> > Or, of course, we could add a Linux driver that knows about "the hidden
>> > AMD bar registers." But I think that should be a last resort, for when
>> > firmware supplied incorrect _CRS information.
>>
>> In this case there was no ACPI, and even if there was correct _CRS information
>> it would have said only those addresses routed to bars/apertures on the
>> root bridge was routed to the MCP55. So while it looked like we had gobs
>> of unallocated space we could use. In practice we did not.
>
> I know this is a hypothetical case since you don't have ACPI, but I'm
> curious about this.
>
> I assume the magic AMD BARs only affect the host bridge, and that the
> downstream root ports look like standard PCI-to-PCI bridges. If that's
> the case, and if we have correct descriptions of the host bridge
> apertures, Linux should theoretically be able to do as well as firmware.
>
> But you seem to be suggesting that even with a correct host bridge
> description, there's space that *looks* available but is not. I don't
> understand how this can be.
What I meant was simply that not all of the non-memory space was
routed down the hypertransport chain to the mcp55. If you have an
accurate description of that you should be fine.
Eric
On Thu, 2009-10-29 at 12:28 -0700, Eric W. Biederman wrote:
> Bjorn Helgaas <[email protected]> writes:
>
> > On Thu, 2009-10-29 at 08:13 -0700, Eric W. Biederman wrote:
> >> Bjorn Helgaas <[email protected]> writes:
> >>
> >> > On Thu, 2009-10-29 at 01:16 -0700, Eric W. Biederman wrote:
> >> >> Yinghai Lu <[email protected]> writes:
> >> >> >
> >> >> > after closing look up the code, it looks it will not break your setup.
> >> >> >
> >> >> > 1. before the patches:
> >> >> > a. when master card is inserted, all bridge in that card will get assigned with min_size
> >> >> > b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
> >> >> >
> >> >> > 2. after the patches: v5
> >> >> > a. booted up, all leaf bridge mmio get clearred.
> >> >> > b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
> >> >> > c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
> >> >> >
> >> >> > can you check those two patches in your setup to verify it?
> >> >>
> >> >> I have a much simpler case I will break, as I tried something similar by accident.
> >> >>
> >> >> AMD cpu MCP55 with one pcie port setup as hotplug.
> >> >> The system only has 2GB of RAM. So plenty of space for pcie devices.
> >> >>
> >> >> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
> >> >> Reads from the bar of the hotplugged device work
> >> >> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
> >> >>
> >> >> So I had to have the firmware make the assignment, because only it knows the
> >> >> details of the hidden AMD bar registers for each hypertransport chain etc.
> >> >
> >> > Do you mean you had to have firmware program a hot-added device, or just
> >> > that firmware had to program the apertures of the root port that was
> >> > present at boot, even though it had no devices below it?
> >>
> >> Firmware had to program the apertures of the root port that was present
> >> at boot, even though it had no devices below it.
> >>
> >> > Firmware normally supplies ACPI _CRS information that tells us how it
> >> > programmed the host bridge windows. On x86, Linux normally ignores that
> >> > and just assumes a range based on memory size. If we paid attention to
> >> > it (as with "pci=use_crs"), it's likely that we could do a better job of
> >> > doing this setup.
> >> >
> >> > Or, of course, we could add a Linux driver that knows about "the hidden
> >> > AMD bar registers." But I think that should be a last resort, for when
> >> > firmware supplied incorrect _CRS information.
> >>
> >> In this case there was no ACPI, and even if there was correct _CRS information
> >> it would have said only those addresses routed to bars/apertures on the
> >> root bridge was routed to the MCP55. So while it looked like we had gobs
> >> of unallocated space we could use. In practice we did not.
> >
> > I know this is a hypothetical case since you don't have ACPI, but I'm
> > curious about this.
> >
> > I assume the magic AMD BARs only affect the host bridge, and that the
> > downstream root ports look like standard PCI-to-PCI bridges. If that's
> > the case, and if we have correct descriptions of the host bridge
> > apertures, Linux should theoretically be able to do as well as firmware.
> >
> > But you seem to be suggesting that even with a correct host bridge
> > description, there's space that *looks* available but is not. I don't
> > understand how this can be.
>
> What I meant was simply that not all of the non-memory space was
> routed down the hypertransport chain to the mcp55. If you have an
> accurate description of that you should be fine.
OK, yep, that makes perfect sense. That's a good example of why I
believe we should start paying attention to the root bridge _CRS,
because that's exactly what it would tell us.
Bjorn
Yinghai Lu <[email protected]> writes:
> Eric W. Biederman wrote:
>> Yinghai Lu <[email protected]> writes:
>>
>>> Eric W. Biederman wrote:
>>>> Yinghai Lu <[email protected]> writes:
>>>>> after closing look up the code, it looks it will not break your setup.
>>>>>
>>>>> 1. before the patches:
>>>>> a. when master card is inserted, all bridge in that card will get assigned with min_size
>>>>> b. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>>>>>
>>>>> 2. after the patches: v5
>>>>> a. booted up, all leaf bridge mmio get clearred.
>>>>> b. when master card is inserted, all bridge in that card will get assigned with min_size, and master bridge will be sum of them
>>>>> c. when new cards is inserted to those slots in master card, will get assigned in the bridge size.
>>>>>
>>>>> can you check those two patches in your setup to verify it?
>>>> I have a much simpler case I will break, as I tried something similar by accident.
>>> which kernel version?
>>>> AMD cpu MCP55 with one pcie port setup as hotplug.
>>>> The system only has 2GB of RAM. So plenty of space for pcie devices.
>>> one or two ht chains?
>>
>> One chain.
>>
>>> do you still have lspci -tv with it?
>>>
>>>> If the firmware assigns nothing and linux at boot time assigns the pci mmio space:
>>>> Reads from the bar of the hotplugged device work
>>>> Writes to the bar of the hotplugged device, cause further writes to go to lala land.
>>>>
>>>> So I had to have the firmware make the assignment, because only it knows the
>>>> details of the hidden AMD bar registers for each hypertransport chain etc.
>>> that mean kernel doesn't get peer root bus res probed properly
>>
>> How do you do that without having drivers for the peer root bus?
>
> we have amd_bus.c to handle amd k8 system with two chains. but one chain is skipped.
> (wonder if need to reenable that for one chain k8 system)
I was running a 32bit kernel so this didn't kick in. That might have
helped. At least as far as recognizing the resources weren't properly
routed. If we don't setup the infrastructure so that we can reprogram
those resources I'm not certain how much good it will do in general.
> another intel_bus.c is on the way to 2.6.33.
>
> when use_crs is used, those info from pci conf space is not used but just print out for check if _CRS is right or not.
If enough space is routed and we get accurate information I am certain
that is fine. I am still worried about the change in policy though.
Only rerouting things when there is a need gives us the best chance of
working everywhere. Freeing unused resources on hotplug ports before
we plug in a device scares me, because we do something that should
but doesn't we reallocate them. If there is simply not enough room
we can do something different.
Eric
>
> If enough space is routed and we get accurate information I am certain
> that is fine. I am still worried about the change in policy though.
>
> Only rerouting things when there is a need gives us the best chance of
> working everywhere. Freeing unused resources on hotplug ports before
> we plug in a device scares me, because we do something that should
> but doesn't we reallocate them. If there is simply not enough room
> we can do something different.
sure.
will send out another version, only release those res that don't have big range to support children.
YH
only for index < 3
v2: Jesse doesn't like it is in find_free_bus_resource...
try to move out of pci_bus_size_bridges loop.
v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
only clear release those res for x86.
v4: Bjorn want to release use dev instead of bus.
v5: Kenji pointed out it will have problem with several level bridge.
so let only handle leaf bridge.
v6: address Kenji's request (new pci_bus_release...). and change applying order
move back release to pci_assign_unassigned_resource
v7: change functions name pci_bus_release_unused_bridge_res according to Jesse
v8: address Eric's concern, only overwrite leaf bridge resource that is not big enough
need to do it in two steps, and first step recore the failed res, and don't
touch bridge res that programmed by firmware. second step will try to release
bridge resource that is too small at first.
Signed-off-by: Yinghai Lu <[email protected]>
---
drivers/pci/setup-bus.c | 258 ++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 239 insertions(+), 19 deletions(-)
Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -27,11 +27,54 @@
#include <linux/slab.h>
#include "pci.h"
-static void pbus_assign_resources_sorted(const struct pci_bus *bus)
+
+static void add_to_failed_list(struct resource_list *head, struct pci_dev *dev,
+ struct resource *res)
+{
+ struct resource_list *list = head;
+ struct resource_list *ln = list->next;
+ struct resource_list *tmp;
+
+ tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp) {
+ pr_warning("add_to_failed_list: kmalloc() failed!\n");
+ return;
+ }
+
+ tmp->next = ln;
+ tmp->res = res;
+ tmp->dev = dev;
+ list->next = tmp;
+}
+
+static void free_failed_list(struct resource_list *head)
+{
+ struct resource_list *list, *tmp;
+ struct resource *res;
+ /*
+ * Try to release leaf bridge's resources that there is no child
+ * under it
+ */
+ for (list = head->next; list;) {
+ res = list->res;
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+
+ head->next = NULL;
+}
+
+static void pbus_assign_resources_sorted(const struct pci_bus *bus,
+ struct resource_list *fail_head)
{
struct pci_dev *dev;
struct resource *res;
struct resource_list head, *list, *tmp;
+ resource_size_t size;
int idx;
head.next = NULL;
@@ -57,10 +100,22 @@ static void pbus_assign_resources_sorted
for (list = head.next; list;) {
res = list->res;
idx = res - &list->dev->resource[0];
+ /* save the size at first */
+ size = resource_size(res);
if (pci_assign_resource(list->dev, idx)) {
- res->start = 0;
- res->end = 0;
- res->flags = 0;
+ if (fail_head && !list->dev->subordinate) {
+ /*
+ * device need to keep flags and size
+ * for second try
+ */
+ res->start = 0;
+ res->end = size - 1;
+ add_to_failed_list(fail_head, list->dev, res);
+ } else {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
}
tmp = list;
list = list->next;
@@ -137,18 +192,12 @@ EXPORT_SYMBOL(pci_setup_cardbus);
config space writes, so it's quite possible that an I/O window of
the bridge will have some undesirable address (e.g. 0) after the
first write. Ditto 64-bit prefetchable MMIO. */
-static void pci_setup_bridge(struct pci_bus *bus)
+
+static void pci_setup_bridge_io(struct pci_bus *bus)
{
struct pci_dev *bridge = bus->self;
struct pci_bus_region region;
- u32 l, bu, lu, io_upper16;
- int pref_mem64;
-
- if (pci_is_enabled(bridge))
- return;
-
- dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
- pci_domain_nr(bus), bus->number);
+ u32 l, io_upper16;
/* Set up the top and bottom of the PCI I/O segment for this bus. */
pcibios_resource_to_bus(bridge, ®ion, bus->resource[0]);
@@ -175,7 +224,12 @@ static void pci_setup_bridge(struct pci_
pci_write_config_dword(bridge, PCI_IO_BASE, l);
/* Update upper 16 bits of I/O base/limit. */
pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, io_upper16);
-
+}
+static void pci_setup_bridge_mmio(struct pci_bus *bus)
+{
+ struct pci_dev *bridge = bus->self;
+ struct pci_bus_region region;
+ u32 l;
/* Set up the top and bottom of the PCI Memory segment
for this bus. */
pcibios_resource_to_bus(bridge, ®ion, bus->resource[1]);
@@ -191,6 +245,13 @@ static void pci_setup_bridge(struct pci_
dev_info(&bridge->dev, " MEM window: disabled\n");
}
pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
+}
+static void pci_setup_bridge_mmio_pref(struct pci_bus *bus)
+{
+ struct pci_dev *bridge = bus->self;
+ struct pci_bus_region region;
+ u32 l, bu, lu;
+ int pref_mem64;
/* Clear out the upper 32 bits of PREF limit.
If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
@@ -226,10 +287,37 @@ static void pci_setup_bridge(struct pci_
pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
}
+}
+static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)
+{
+ struct pci_dev *bridge = bus->self;
+
+ if (pci_is_enabled(bridge))
+ return;
+
+ dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
+ pci_domain_nr(bus), bus->number);
+
+ if (type & IORESOURCE_IO)
+ pci_setup_bridge_io(bus);
+
+ if (type & IORESOURCE_MEM)
+ pci_setup_bridge_mmio(bus);
+
+ if (type & IORESOURCE_PREFETCH)
+ pci_setup_bridge_mmio_pref(bus);
pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl);
}
+static void pci_setup_bridge(struct pci_bus *bus)
+{
+ unsigned long type = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ __pci_setup_bridge(bus, type);
+}
+
/* Check whether the bridge supports optional I/O and
prefetchable memory ranges. If not, the respective
base/limit registers must be read-only and read as 0. */
@@ -541,19 +629,20 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);
-void __ref pci_bus_assign_resources(const struct pci_bus *bus)
+static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
+ struct resource_list *fail_head)
{
struct pci_bus *b;
struct pci_dev *dev;
- pbus_assign_resources_sorted(bus);
+ pbus_assign_resources_sorted(bus, fail_head);
list_for_each_entry(dev, &bus->devices, bus_list) {
b = dev->subordinate;
if (!b)
continue;
- pci_bus_assign_resources(b);
+ __pci_bus_assign_resources(b, fail_head);
switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:
@@ -571,15 +660,105 @@ void __ref pci_bus_assign_resources(cons
}
}
}
+
+void __ref pci_bus_assign_resources(const struct pci_bus *bus)
+{
+ __pci_bus_assign_resources(bus, NULL);
+}
EXPORT_SYMBOL(pci_bus_assign_resources);
+static void release_children_resource(struct resource *r)
+{
+ struct resource *p;
+ resource_size_t size;
+
+ p = r->child;
+ while (p) {
+ release_children_resource(p);
+ release_resource(p);
+ printk(KERN_DEBUG "PCI: release child resource %pRt\n", p);
+ /* need to restore size, and keep flags */
+ size = resource_size(p);
+ p->start = 0;
+ p->end = size - 1;
+ p = r->child;
+ }
+}
+
+static void pci_bridge_release_unused_res(struct pci_bus *bus,
+ unsigned long type)
+{
+ int idx;
+ bool changed = false;
+ struct pci_dev *dev;
+ struct resource *r;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ /* for pci bridges res only */
+ dev = bus->self;
+ for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES + 3;
+ idx++) {
+ r = &dev->resource[idx];
+ if ((r->flags & type_mask) != type)
+ continue;
+ if (!r->parent)
+ continue;
+ /*
+ * if there are children under that, we should release them
+ * all
+ */
+ release_children_resource(r);
+ if (!release_resource(r)) {
+ dev_printk(KERN_DEBUG, &dev->dev,
+ "resource %d %pRt released\n", idx, r);
+ r->flags = 0;
+ changed = true;
+ }
+ }
+
+ if (changed) {
+ if (type & IORESOURCE_PREFETCH) {
+ /* avoiding touch the one without PREF */
+ type = IORESOURCE_PREFETCH;
+ }
+ __pci_setup_bridge(bus, type);
+ }
+}
+
+/*
+ * try to release pci bridge resources that is from leaf bridge,
+ * so we can allocate big new one later
+ */
+static void __ref pci_bus_release_unused_bridge_res(struct pci_bus *bus,
+ unsigned long type)
+{
+ struct pci_bus *b;
+
+ /* If the bus is cardbus, do nothing */
+ if (bus->self && (bus->self->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)
+ return;
+
+ /* We only release the resources under the leaf bridge */
+ if (list_empty(&bus->children)) {
+ if (!pci_is_root_bus(bus))
+ pci_bridge_release_unused_res(bus, type);
+ return;
+ }
+
+ /* Scan child buses if the bus is not leaf */
+ list_for_each_entry(b, &bus->children, node)
+ pci_bus_release_unused_bridge_res(b, type);
+}
+
static void pci_bus_dump_res(struct pci_bus *bus)
{
int i;
for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
struct resource *res = bus->resource[i];
- if (!res || !res->end)
+
+ if (!res || !res->end || !res->flags)
continue;
dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pRt\n", i, res);
@@ -607,6 +786,12 @@ void __init
pci_assign_unassigned_resources(void)
{
struct pci_bus *bus;
+ bool second_tried = false;
+ struct resource_list head, *list, *tmp;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+again:
+ head.next = NULL;
/* Depth first, calculate sizes and alignments of all
subordinate buses. */
@@ -615,7 +800,42 @@ pci_assign_unassigned_resources(void)
}
/* Depth last, allocate resources and update the hardware. */
list_for_each_entry(bus, &pci_root_buses, node) {
- pci_bus_assign_resources(bus);
+ __pci_bus_assign_resources(bus, &head);
+ }
+
+ /* any device complain? */
+ if (!head.next)
+ goto enable_and_dump;
+
+ if (second_tried) {
+ /* still fail, don't want to try more */
+ free_failed_list(&head);
+ goto enable_and_dump;
+ }
+
+ second_tried = true;
+ printk(KERN_DEBUG "PCI: second try to assign unassigned res\n");
+
+ /*
+ * Try to release leaf bridge's resources that doesn't fit resource of
+ * child device under that bridge
+ */
+ for (list = head.next; list;) {
+ bus = list->dev->bus;
+ /* don't need to play with root bus */
+ if (!pci_is_root_bus(bus))
+ pci_bus_release_unused_bridge_res(bus,
+ list->res->flags & type_mask);
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+
+ goto again;
+
+enable_and_dump:
+ /* Depth last, update the hardware. */
+ list_for_each_entry(bus, &pci_root_buses, node) {
pci_enable_bridges(bus);
}
move out bus_size_bridges and assign resources out of pciehp_add_bridge()
and at last do them all together one time including slot bridge, to avoid to
call assign resources several times, when there are several bridges under the
slot bridge.
need to introduce pci_bridge_assign_resources there.
handle the case the slot bridge that doesn't get pre-allocated big enough res
from FW.
for example pcie devices need 256M, but the bridge only get preallocated 2M...
pci_setup_bridge() will take extra check_enabled for the slot bridge, otherwise
update res is not updated to bridge BAR. that is bridge is enabled already for
port service.
v2: address Alex's concern about pci remove/rescan feature about pci_setup_bridge changes.
v3: Kenji pointed out that pci_config_slot need to be called before pci_bus_add_devices()
v4: move out pci_is_enabled checkout of pci_setup_bridge()
v5: change the applying sequence.
v6: change the functions name according to Jesse
v8: address Eric's concern, only overwrite leaf bridge resource that is not big enough
Signed-off-by: Yinghai Lu <[email protected]>
---
drivers/pci/hotplug/pciehp_pci.c | 23 ++++--
drivers/pci/setup-bus.c | 133 +++++++++++++++++++++++++++++++++++++--
include/linux/pci.h | 1
3 files changed, 147 insertions(+), 10 deletions(-)
Index: linux-2.6/drivers/pci/hotplug/pciehp_pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_pci.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_pci.c
@@ -53,17 +53,15 @@ static int __ref pciehp_add_bridge(struc
busnr = pci_scan_bridge(parent, dev, busnr, pass);
if (!dev->subordinate)
return -1;
- pci_bus_size_bridges(dev->subordinate);
- pci_bus_assign_resources(parent);
- pci_enable_bridges(parent);
- pci_bus_add_devices(parent);
+
return 0;
}
int pciehp_configure_device(struct slot *p_slot)
{
struct pci_dev *dev;
- struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
+ struct pci_dev *bridge = p_slot->ctrl->pcie->port;
+ struct pci_bus *parent = bridge->subordinate;
int num, fn;
struct controller *ctrl = p_slot->ctrl;
@@ -96,12 +94,25 @@ int pciehp_configure_device(struct slot
(dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
pciehp_add_bridge(dev);
}
+ pci_dev_put(dev);
+ }
+
+ pci_assign_unassigned_bridge_resources(bridge);
+
+ for (fn = 0; fn < 8; fn++) {
+ dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
+ if (!dev)
+ continue;
+ if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+ pci_dev_put(dev);
+ continue;
+ }
pci_configure_slot(dev);
pci_dev_put(dev);
}
- pci_bus_assign_resources(parent);
pci_bus_add_devices(parent);
+
return 0;
}
Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -68,6 +68,56 @@ static void free_failed_list(struct reso
head->next = NULL;
}
+static void pdev_assign_resources_sorted(struct pci_dev *dev,
+ struct resource_list *fail_head)
+{
+ struct resource *res;
+ struct resource_list head, *list, *tmp;
+ int idx;
+ u16 class = dev->class >> 8;
+ resource_size_t size;
+
+ head.next = NULL;
+
+ /* Don't touch classless devices or host bridges or ioapics. */
+ if (class == PCI_CLASS_NOT_DEFINED || class == PCI_CLASS_BRIDGE_HOST)
+ return;
+
+ /* Don't touch ioapic devices already enabled by firmware */
+ if (class == PCI_CLASS_SYSTEM_PIC) {
+ u16 command;
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
+ return;
+ }
+
+ pdev_sort_resources(dev, &head);
+
+ for (list = head.next; list;) {
+ res = list->res;
+ idx = res - &list->dev->resource[0];
+ /* save the size at first */
+ size = resource_size(res);
+ if (pci_assign_resource(list->dev, idx)) {
+ if (fail_head && !list->dev->subordinate) {
+ /*
+ * device need to keep flags and size
+ * for second try
+ */
+ res->start = 0;
+ res->end = size - 1;
+ add_to_failed_list(fail_head, list->dev, res);
+ } else {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
+ }
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+}
static void pbus_assign_resources_sorted(const struct pci_bus *bus,
struct resource_list *fail_head)
{
@@ -292,9 +342,6 @@ static void __pci_setup_bridge(struct pc
{
struct pci_dev *bridge = bus->self;
- if (pci_is_enabled(bridge))
- return;
-
dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
pci_domain_nr(bus), bus->number);
@@ -646,7 +693,8 @@ static void __ref __pci_bus_assign_resou
switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:
- pci_setup_bridge(b);
+ if (!pci_is_enabled(dev))
+ pci_setup_bridge(b);
break;
case PCI_CLASS_BRIDGE_CARDBUS:
@@ -667,6 +715,34 @@ void __ref pci_bus_assign_resources(cons
}
EXPORT_SYMBOL(pci_bus_assign_resources);
+static void __ref __pci_bridge_assign_resources(const struct pci_dev *bridge,
+ struct resource_list *fail_head)
+{
+ struct pci_bus *b;
+
+ pdev_assign_resources_sorted((struct pci_dev *)bridge, fail_head);
+
+ b = bridge->subordinate;
+ if (!b)
+ return;
+
+ __pci_bus_assign_resources(b, fail_head);
+
+ switch (bridge->class >> 8) {
+ case PCI_CLASS_BRIDGE_PCI:
+ pci_setup_bridge(b);
+ break;
+
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ pci_setup_cardbus(b);
+ break;
+
+ default:
+ dev_info(&bridge->dev, "not setting up bridge for bus "
+ "%04x:%02x\n", pci_domain_nr(b), b->number);
+ break;
+ }
+}
static void release_children_resource(struct resource *r)
{
struct resource *p;
@@ -844,3 +920,52 @@ enable_and_dump:
pci_bus_dump_resources(bus);
}
}
+
+void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
+{
+ struct pci_bus *bus;
+ struct pci_bus *parent = bridge->subordinate;
+ bool second_tried = false;
+ struct resource_list head, *list, *tmp;
+ int retval;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+again:
+ head.next = NULL;
+
+ pci_bus_size_bridges(parent);
+ pci_clear_master(bridge);
+ __pci_bridge_assign_resources(bridge, &head);
+ retval = pci_reenable_device(bridge);
+ pci_set_master(bridge);
+ pci_enable_bridges(parent);
+
+ /* any device complain? */
+ if (!head.next)
+ return;
+
+ if (second_tried) {
+ /* still fail, don't want to try more */
+ free_failed_list(&head);
+ return;
+ }
+
+ second_tried = true;
+ printk(KERN_DEBUG "PCI: second try to assign unassigned res\n");
+
+ /*
+ * Try to release leaf bridge's resources that doesn't fit resource of
+ * child device under that bridge
+ */
+ for (list = head.next; list;) {
+ bus = list->dev->bus;
+ pci_bus_release_unused_bridge_res(bus,
+ list->res->flags & type_mask);
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+
+ goto again;
+}
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -760,6 +760,7 @@ void pci_bus_assign_resources(const stru
void pci_bus_size_bridges(struct pci_bus *bus);
int pci_claim_resource(struct pci_dev *, int);
void pci_assign_unassigned_resources(void);
+void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge);
void pdev_enable_device(struct pci_dev *);
void pdev_sort_resources(struct pci_dev *, struct resource_list *);
int pci_enable_resources(struct pci_dev *, int mask);
Hi Yinghai,
Sorry for the delayed response.
I'll try your patches on my machine soon (maybe in
the few days).
Thanks,
Kenji Kaneshige
Yinghai Lu wrote:
>
> only for index < 3
>
> v2: Jesse doesn't like it is in find_free_bus_resource...
> try to move out of pci_bus_size_bridges loop.
> v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
> only clear release those res for x86.
> v4: Bjorn want to release use dev instead of bus.
> v5: Kenji pointed out it will have problem with several level bridge.
> so let only handle leaf bridge.
> v6: address Kenji's request (new pci_bus_release...). and change applying order
> move back release to pci_assign_unassigned_resource
> v7: change functions name pci_bus_release_unused_bridge_res according to Jesse
> v8: address Eric's concern, only overwrite leaf bridge resource that is not big enough
> need to do it in two steps, and first step recore the failed res, and don't
> touch bridge res that programmed by firmware. second step will try to release
> bridge resource that is too small at first.
>
> Signed-off-by: Yinghai Lu <[email protected]>
>
> ---
> drivers/pci/setup-bus.c | 258 ++++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 239 insertions(+), 19 deletions(-)
>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -27,11 +27,54 @@
> #include <linux/slab.h>
> #include "pci.h"
>
> -static void pbus_assign_resources_sorted(const struct pci_bus *bus)
> +
> +static void add_to_failed_list(struct resource_list *head, struct pci_dev *dev,
> + struct resource *res)
> +{
> + struct resource_list *list = head;
> + struct resource_list *ln = list->next;
> + struct resource_list *tmp;
> +
> + tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
> + if (!tmp) {
> + pr_warning("add_to_failed_list: kmalloc() failed!\n");
> + return;
> + }
> +
> + tmp->next = ln;
> + tmp->res = res;
> + tmp->dev = dev;
> + list->next = tmp;
> +}
> +
> +static void free_failed_list(struct resource_list *head)
> +{
> + struct resource_list *list, *tmp;
> + struct resource *res;
> + /*
> + * Try to release leaf bridge's resources that there is no child
> + * under it
> + */
> + for (list = head->next; list;) {
> + res = list->res;
> + res->start = 0;
> + res->end = 0;
> + res->flags = 0;
> + tmp = list;
> + list = list->next;
> + kfree(tmp);
> + }
> +
> + head->next = NULL;
> +}
> +
> +static void pbus_assign_resources_sorted(const struct pci_bus *bus,
> + struct resource_list *fail_head)
> {
> struct pci_dev *dev;
> struct resource *res;
> struct resource_list head, *list, *tmp;
> + resource_size_t size;
> int idx;
>
> head.next = NULL;
> @@ -57,10 +100,22 @@ static void pbus_assign_resources_sorted
> for (list = head.next; list;) {
> res = list->res;
> idx = res - &list->dev->resource[0];
> + /* save the size at first */
> + size = resource_size(res);
> if (pci_assign_resource(list->dev, idx)) {
> - res->start = 0;
> - res->end = 0;
> - res->flags = 0;
> + if (fail_head && !list->dev->subordinate) {
> + /*
> + * device need to keep flags and size
> + * for second try
> + */
> + res->start = 0;
> + res->end = size - 1;
> + add_to_failed_list(fail_head, list->dev, res);
> + } else {
> + res->start = 0;
> + res->end = 0;
> + res->flags = 0;
> + }
> }
> tmp = list;
> list = list->next;
> @@ -137,18 +192,12 @@ EXPORT_SYMBOL(pci_setup_cardbus);
> config space writes, so it's quite possible that an I/O window of
> the bridge will have some undesirable address (e.g. 0) after the
> first write. Ditto 64-bit prefetchable MMIO. */
> -static void pci_setup_bridge(struct pci_bus *bus)
> +
> +static void pci_setup_bridge_io(struct pci_bus *bus)
> {
> struct pci_dev *bridge = bus->self;
> struct pci_bus_region region;
> - u32 l, bu, lu, io_upper16;
> - int pref_mem64;
> -
> - if (pci_is_enabled(bridge))
> - return;
> -
> - dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
> - pci_domain_nr(bus), bus->number);
> + u32 l, io_upper16;
>
> /* Set up the top and bottom of the PCI I/O segment for this bus. */
> pcibios_resource_to_bus(bridge, ®ion, bus->resource[0]);
> @@ -175,7 +224,12 @@ static void pci_setup_bridge(struct pci_
> pci_write_config_dword(bridge, PCI_IO_BASE, l);
> /* Update upper 16 bits of I/O base/limit. */
> pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, io_upper16);
> -
> +}
> +static void pci_setup_bridge_mmio(struct pci_bus *bus)
> +{
> + struct pci_dev *bridge = bus->self;
> + struct pci_bus_region region;
> + u32 l;
> /* Set up the top and bottom of the PCI Memory segment
> for this bus. */
> pcibios_resource_to_bus(bridge, ®ion, bus->resource[1]);
> @@ -191,6 +245,13 @@ static void pci_setup_bridge(struct pci_
> dev_info(&bridge->dev, " MEM window: disabled\n");
> }
> pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
> +}
> +static void pci_setup_bridge_mmio_pref(struct pci_bus *bus)
> +{
> + struct pci_dev *bridge = bus->self;
> + struct pci_bus_region region;
> + u32 l, bu, lu;
> + int pref_mem64;
>
> /* Clear out the upper 32 bits of PREF limit.
> If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
> @@ -226,10 +287,37 @@ static void pci_setup_bridge(struct pci_
> pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
> pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
> }
> +}
> +static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)
> +{
> + struct pci_dev *bridge = bus->self;
> +
> + if (pci_is_enabled(bridge))
> + return;
> +
> + dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
> + pci_domain_nr(bus), bus->number);
> +
> + if (type & IORESOURCE_IO)
> + pci_setup_bridge_io(bus);
> +
> + if (type & IORESOURCE_MEM)
> + pci_setup_bridge_mmio(bus);
> +
> + if (type & IORESOURCE_PREFETCH)
> + pci_setup_bridge_mmio_pref(bus);
>
> pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl);
> }
>
> +static void pci_setup_bridge(struct pci_bus *bus)
> +{
> + unsigned long type = IORESOURCE_IO | IORESOURCE_MEM |
> + IORESOURCE_PREFETCH;
> +
> + __pci_setup_bridge(bus, type);
> +}
> +
> /* Check whether the bridge supports optional I/O and
> prefetchable memory ranges. If not, the respective
> base/limit registers must be read-only and read as 0. */
> @@ -541,19 +629,20 @@ void __ref pci_bus_size_bridges(struct p
> }
> EXPORT_SYMBOL(pci_bus_size_bridges);
>
> -void __ref pci_bus_assign_resources(const struct pci_bus *bus)
> +static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
> + struct resource_list *fail_head)
> {
> struct pci_bus *b;
> struct pci_dev *dev;
>
> - pbus_assign_resources_sorted(bus);
> + pbus_assign_resources_sorted(bus, fail_head);
>
> list_for_each_entry(dev, &bus->devices, bus_list) {
> b = dev->subordinate;
> if (!b)
> continue;
>
> - pci_bus_assign_resources(b);
> + __pci_bus_assign_resources(b, fail_head);
>
> switch (dev->class >> 8) {
> case PCI_CLASS_BRIDGE_PCI:
> @@ -571,15 +660,105 @@ void __ref pci_bus_assign_resources(cons
> }
> }
> }
> +
> +void __ref pci_bus_assign_resources(const struct pci_bus *bus)
> +{
> + __pci_bus_assign_resources(bus, NULL);
> +}
> EXPORT_SYMBOL(pci_bus_assign_resources);
>
> +static void release_children_resource(struct resource *r)
> +{
> + struct resource *p;
> + resource_size_t size;
> +
> + p = r->child;
> + while (p) {
> + release_children_resource(p);
> + release_resource(p);
> + printk(KERN_DEBUG "PCI: release child resource %pRt\n", p);
> + /* need to restore size, and keep flags */
> + size = resource_size(p);
> + p->start = 0;
> + p->end = size - 1;
> + p = r->child;
> + }
> +}
> +
> +static void pci_bridge_release_unused_res(struct pci_bus *bus,
> + unsigned long type)
> +{
> + int idx;
> + bool changed = false;
> + struct pci_dev *dev;
> + struct resource *r;
> + unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> + IORESOURCE_PREFETCH;
> +
> + /* for pci bridges res only */
> + dev = bus->self;
> + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES + 3;
> + idx++) {
> + r = &dev->resource[idx];
> + if ((r->flags & type_mask) != type)
> + continue;
> + if (!r->parent)
> + continue;
> + /*
> + * if there are children under that, we should release them
> + * all
> + */
> + release_children_resource(r);
> + if (!release_resource(r)) {
> + dev_printk(KERN_DEBUG, &dev->dev,
> + "resource %d %pRt released\n", idx, r);
> + r->flags = 0;
> + changed = true;
> + }
> + }
> +
> + if (changed) {
> + if (type & IORESOURCE_PREFETCH) {
> + /* avoiding touch the one without PREF */
> + type = IORESOURCE_PREFETCH;
> + }
> + __pci_setup_bridge(bus, type);
> + }
> +}
> +
> +/*
> + * try to release pci bridge resources that is from leaf bridge,
> + * so we can allocate big new one later
> + */
> +static void __ref pci_bus_release_unused_bridge_res(struct pci_bus *bus,
> + unsigned long type)
> +{
> + struct pci_bus *b;
> +
> + /* If the bus is cardbus, do nothing */
> + if (bus->self && (bus->self->class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)
> + return;
> +
> + /* We only release the resources under the leaf bridge */
> + if (list_empty(&bus->children)) {
> + if (!pci_is_root_bus(bus))
> + pci_bridge_release_unused_res(bus, type);
> + return;
> + }
> +
> + /* Scan child buses if the bus is not leaf */
> + list_for_each_entry(b, &bus->children, node)
> + pci_bus_release_unused_bridge_res(b, type);
> +}
> +
> static void pci_bus_dump_res(struct pci_bus *bus)
> {
> int i;
>
> for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
> struct resource *res = bus->resource[i];
> - if (!res || !res->end)
> +
> + if (!res || !res->end || !res->flags)
> continue;
>
> dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pRt\n", i, res);
> @@ -607,6 +786,12 @@ void __init
> pci_assign_unassigned_resources(void)
> {
> struct pci_bus *bus;
> + bool second_tried = false;
> + struct resource_list head, *list, *tmp;
> + unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> + IORESOURCE_PREFETCH;
> +again:
> + head.next = NULL;
>
> /* Depth first, calculate sizes and alignments of all
> subordinate buses. */
> @@ -615,7 +800,42 @@ pci_assign_unassigned_resources(void)
> }
> /* Depth last, allocate resources and update the hardware. */
> list_for_each_entry(bus, &pci_root_buses, node) {
> - pci_bus_assign_resources(bus);
> + __pci_bus_assign_resources(bus, &head);
> + }
> +
> + /* any device complain? */
> + if (!head.next)
> + goto enable_and_dump;
> +
> + if (second_tried) {
> + /* still fail, don't want to try more */
> + free_failed_list(&head);
> + goto enable_and_dump;
> + }
> +
> + second_tried = true;
> + printk(KERN_DEBUG "PCI: second try to assign unassigned res\n");
> +
> + /*
> + * Try to release leaf bridge's resources that doesn't fit resource of
> + * child device under that bridge
> + */
> + for (list = head.next; list;) {
> + bus = list->dev->bus;
> + /* don't need to play with root bus */
> + if (!pci_is_root_bus(bus))
> + pci_bus_release_unused_bridge_res(bus,
> + list->res->flags & type_mask);
> + tmp = list;
> + list = list->next;
> + kfree(tmp);
> + }
> +
> + goto again;
> +
> +enable_and_dump:
> + /* Depth last, update the hardware. */
> + list_for_each_entry(bus, &pci_root_buses, node) {
> pci_enable_bridges(bus);
> }
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>