so later we could use it to release small resource before pci assign unassign res
-v2: change name to release_child_resources
Signed-off-by: Yinghai Lu <[email protected]>
---
drivers/pci/setup-bus.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 112 insertions(+), 1 deletion(-)
Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -209,7 +209,6 @@ static void pci_setup_bridge_mmio_pref(s
l = (region.start >> 16) & 0xfff0;
l |= region.end & 0xfff00000;
if (res->flags & IORESOURCE_MEM_64) {
- pref_mem64 = 1;
bu = upper_32_bits(region.start);
lu = upper_32_bits(region.end);
}
@@ -608,6 +607,118 @@ void __ref pci_bus_assign_resources(cons
}
EXPORT_SYMBOL(pci_bus_assign_resources);
+static void release_child_resources(struct resource *r)
+{
+ struct resource *p;
+ resource_size_t size;
+
+ p = r->child;
+ while (p) {
+ release_child_resources(p);
+ release_resource(p);
+ printk(KERN_DEBUG "PCI: release child resource %pR\n", p);
+ /* need to restore size, and keep flags */
+ size = resource_size(p);
+ p->start = 0;
+ p->end = size - 1;
+ p = r->child;
+ }
+}
+
+static void pci_bridge_release_unused_res(struct pci_bus *bus,
+ unsigned long type)
+{
+ int idx;
+ bool changed = false;
+ struct pci_dev *dev;
+ struct resource *r;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ /* for pci bridges res only */
+ dev = bus->self;
+ for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES + 3;
+ idx++) {
+ r = &dev->resource[idx];
+ if ((r->flags & type_mask) != type)
+ continue;
+ if (!r->parent)
+ continue;
+ /*
+ * if there are children under that, we should release them
+ * all
+ */
+ release_child_resources(r);
+ if (!release_resource(r)) {
+ dev_printk(KERN_DEBUG, &dev->dev,
+ "resource %d %pR released\n", idx, r);
+ /* keep the old size */
+ r->end = resource_size(r) - 1;
+ r->start = 0;
+ r->flags = 0;
+ changed = true;
+ }
+ }
+
+ if (changed) {
+ if (type & IORESOURCE_PREFETCH) {
+ /* avoiding touch the one without PREF */
+ type = IORESOURCE_PREFETCH;
+ }
+ __pci_setup_bridge(bus, type);
+ }
+}
+
+/*
+ * try to release pci bridge resources that is from leaf bridge,
+ * so we can allocate big new one later
+ * check:
+ * 0: only release the bridge and only the bridge is leaf
+ * 1: release all down side bridge for third shoot
+ */
+static void __ref pci_bus_release_unused_bridge_res(struct pci_bus *bus,
+ unsigned long type,
+ int check_leaf)
+{
+ struct pci_dev *dev;
+ bool is_leaf_bridge = true;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ struct pci_bus *b = dev->subordinate;
+ if (!b)
+ continue;
+
+ switch (dev->class >> 8) {
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ is_leaf_bridge = false;
+ break;
+
+ case PCI_CLASS_BRIDGE_PCI:
+ default:
+ is_leaf_bridge = false;
+ if (!check_leaf)
+ pci_bus_release_unused_bridge_res(b, type,
+ check_leaf);
+ break;
+ }
+ }
+
+ /* The root bus? */
+ if (!bus->self)
+ return;
+
+ switch (bus->self->class >> 8) {
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ break;
+
+ case PCI_CLASS_BRIDGE_PCI:
+ default:
+ if ((check_leaf && is_leaf_bridge) || !check_leaf)
+ pci_bridge_release_unused_res(bus, type);
+ break;
+ }
+}
+
static void pci_bus_dump_res(struct pci_bus *bus)
{
int i;
On Fri, 18 Dec 2009, Yinghai Lu wrote:
>
> so later we could use it to release small resource before pci assign unassign res
However, I think this one is wrong.
> +static void release_child_resources(struct resource *r)
> +{
> + struct resource *p;
> + resource_size_t size;
> +
> + p = r->child;
> + while (p) {
> + release_child_resources(p);
> + release_resource(p);
So not only is this releasing resources that aren't necessarily PCI
devices, it's releasing the whole tree - regardless of how they were
allocated and initialized. That makes me nervous to begin with. It's in
the wrong file.
But the locking is crap too!
You need to hold the resource lock for the whole operation - you can't
just walk the resource tree and release them.
And once you do that, then using "release_resrouce()" is the wrong thing,
since it turns into just "__release_resource()" and you notice that that
walks the chain looking for them - which makes it pointless to have
_another_ outer loop that walks the chain to release them!
So you'd need to
- move this to kernel/resource.c
- do it all under 'write_lock(&resource_lock);'
- stop the silly double list loop, and just do it as a single loop that
does
p = old->parent->child;
old->parent = NULL;
while (p) {
struct resource *tmp = p;
p = p->sibling;
.. do whatever you do to free tmp ..
}
and it's much simpler, more efficient, has the rigth locking, and is in
the right place.
That said, it's still unclear if you can ever do this! Why would the PCI
layer be allowed to release ACPI resources int he tree, for example?
So I can see fixing the _implementation_ issues I have like above, but I'd
still be nervous about the whole concept of the patch..
Linus
On Fri, 18 Dec 2009 13:24:41 -0800 (PST)
Linus Torvalds <[email protected]> wrote:
> And once you do that, then using "release_resrouce()" is the wrong
> thing, since it turns into just "__release_resource()" and you notice
> that that walks the chain looking for them - which makes it pointless
> to have _another_ outer loop that walks the chain to release them!
>
> So you'd need to
>
> - move this to kernel/resource.c
>
> - do it all under 'write_lock(&resource_lock);'
>
> - stop the silly double list loop, and just do it as a single loop
> that does
>
> p = old->parent->child;
> old->parent = NULL;
> while (p) {
> struct resource *tmp = p;
> p = p->sibling;
>
> .. do whatever you do to free tmp ..
> }
>
> and it's much simpler, more efficient, has the rigth locking, and
> is in the right place.
>
> That said, it's still unclear if you can ever do this! Why would the
> PCI layer be allowed to release ACPI resources int he tree, for
> example?
>
> So I can see fixing the _implementation_ issues I have like above,
> but I'd still be nervous about the whole concept of the patch..
Yeah, if we roll all the way back up through some system resources we
could definitely get into trouble. Stopping the recursion when we hit
a bridge or host bridge may be a good enough heuristic?
Really I guess this is just a half measure. There are a whole set a
fixed resources that are children of the root address space, and we
shouldn't try to free or move them around at all (e.g. ACPI _CRS type
resources). Beyond that though it should be safe to free all the
moveable resources in a tree and try again if a leaf device can't get
its preferred allocation (the whole purpose of this series afaict).
--
Jesse Barnes, Intel Open Source Technology Center
Linus Torvalds wrote:
>
> On Fri, 18 Dec 2009, Yinghai Lu wrote:
>> so later we could use it to release small resource before pci assign unassign res
>
> However, I think this one is wrong.
>
>> +static void release_child_resources(struct resource *r)
>> +{
>> + struct resource *p;
>> + resource_size_t size;
>> +
>> + p = r->child;
>> + while (p) {
>> + release_child_resources(p);
>> + release_resource(p);
>
> So not only is this releasing resources that aren't necessarily PCI
> devices, it's releasing the whole tree - regardless of how they were
> allocated and initialized. That makes me nervous to begin with. It's in
> the wrong file.
>
> But the locking is crap too!
>
> You need to hold the resource lock for the whole operation - you can't
> just walk the resource tree and release them.
>
> And once you do that, then using "release_resrouce()" is the wrong thing,
> since it turns into just "__release_resource()" and you notice that that
> walks the chain looking for them - which makes it pointless to have
> _another_ outer loop that walks the chain to release them!
>
> So you'd need to
>
> - move this to kernel/resource.c
>
> - do it all under 'write_lock(&resource_lock);'
>
> - stop the silly double list loop, and just do it as a single loop that
> does
>
> p = old->parent->child;
> old->parent = NULL;
> while (p) {
> struct resource *tmp = p;
> p = p->sibling;
>
> .. do whatever you do to free tmp ..
> }
>
> and it's much simpler, more efficient, has the rigth locking, and is in
> the right place.
ok, please check attached is right or not
>
> That said, it's still unclear if you can ever do this! Why would the PCI
> layer be allowed to release ACPI resources int he tree, for example?
>
> So I can see fixing the _implementation_ issues I have like above, but I'd
> still be nervous about the whole concept of the patch..
those code are only called during early stage when pci_assign unassigned, and pcie hotplug under the pcie port.
also that is the pci_try_num is default to 1, and only be changed by pci=try=2 etc. only second try start to call
those functions.
Thanks
Yinghai
Subject: [PATCH 2/12] pci: add pci_bridge_release_unused_res and pci_bus_release_unused_bridge_res -v3
so later we could use it to release small resource before pci assign unassign res
-v2: change name to release_child_resources according to Jesse
-v3: according to Linus, move release_child_resources to resource.c
also need to put the lock around them all to avoid recursive deep.
(my test case only have one level that need to be released)
Signed-off-by: Yinghai Lu <[email protected]>
---
drivers/pci/setup-bus.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++-
include/linux/ioport.h | 1
kernel/resource.c | 30 +++++++++++++++
3 files changed, 125 insertions(+), 1 deletion(-)
Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -209,7 +209,6 @@ static void pci_setup_bridge_mmio_pref(s
l = (region.start >> 16) & 0xfff0;
l |= region.end & 0xfff00000;
if (res->flags & IORESOURCE_MEM_64) {
- pref_mem64 = 1;
bu = upper_32_bits(region.start);
lu = upper_32_bits(region.end);
}
@@ -608,6 +607,100 @@ void __ref pci_bus_assign_resources(cons
}
EXPORT_SYMBOL(pci_bus_assign_resources);
+static void pci_bridge_release_unused_res(struct pci_bus *bus,
+ unsigned long type)
+{
+ int idx;
+ bool changed = false;
+ struct pci_dev *dev;
+ struct resource *r;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ /* for pci bridges res only */
+ dev = bus->self;
+ for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES + 3;
+ idx++) {
+ r = &dev->resource[idx];
+ if ((r->flags & type_mask) != type)
+ continue;
+ if (!r->parent)
+ continue;
+ /*
+ * if there are children under that, we should release them
+ * all
+ */
+ release_child_resources(r);
+ if (!release_resource(r)) {
+ dev_printk(KERN_DEBUG, &dev->dev,
+ "resource %d %pR released\n", idx, r);
+ /* keep the old size */
+ r->end = resource_size(r) - 1;
+ r->start = 0;
+ r->flags = 0;
+ changed = true;
+ }
+ }
+
+ if (changed) {
+ if (type & IORESOURCE_PREFETCH) {
+ /* avoiding touch the one without PREF */
+ type = IORESOURCE_PREFETCH;
+ }
+ __pci_setup_bridge(bus, type);
+ }
+}
+
+/*
+ * try to release pci bridge resources that is from leaf bridge,
+ * so we can allocate big new one later
+ * check:
+ * 0: only release the bridge and only the bridge is leaf
+ * 1: release all down side bridge for third shoot
+ */
+static void __ref pci_bus_release_unused_bridge_res(struct pci_bus *bus,
+ unsigned long type,
+ int check_leaf)
+{
+ struct pci_dev *dev;
+ bool is_leaf_bridge = true;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ struct pci_bus *b = dev->subordinate;
+ if (!b)
+ continue;
+
+ switch (dev->class >> 8) {
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ is_leaf_bridge = false;
+ break;
+
+ case PCI_CLASS_BRIDGE_PCI:
+ default:
+ is_leaf_bridge = false;
+ if (!check_leaf)
+ pci_bus_release_unused_bridge_res(b, type,
+ check_leaf);
+ break;
+ }
+ }
+
+ /* The root bus? */
+ if (!bus->self)
+ return;
+
+ switch (bus->self->class >> 8) {
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ break;
+
+ case PCI_CLASS_BRIDGE_PCI:
+ default:
+ if ((check_leaf && is_leaf_bridge) || !check_leaf)
+ pci_bridge_release_unused_res(bus, type);
+ break;
+ }
+}
+
static void pci_bus_dump_res(struct pci_bus *bus)
{
int i;
Index: linux-2.6/include/linux/ioport.h
===================================================================
--- linux-2.6.orig/include/linux/ioport.h
+++ linux-2.6/include/linux/ioport.h
@@ -112,6 +112,7 @@ extern struct resource iomem_resource;
extern int request_resource(struct resource *root, struct resource *new);
extern int release_resource(struct resource *new);
+void release_child_resources(struct resource *new);
extern void reserve_region_with_split(struct resource *root,
resource_size_t start, resource_size_t end,
const char *name);
Index: linux-2.6/kernel/resource.c
===================================================================
--- linux-2.6.orig/kernel/resource.c
+++ linux-2.6/kernel/resource.c
@@ -188,6 +188,36 @@ static int __release_resource(struct res
return -EINVAL;
}
+static void __release_child_resources(struct resource *r)
+{
+ struct resource *tmp, *p;
+ resource_size_t size;
+
+ p = r->child;
+ r->child = NULL;
+ while (p) {
+ tmp = p;
+ p = p->sibling;
+
+ tmp->parent = NULL;
+ tmp->sibling = NULL;
+ __release_child_resources(tmp);
+
+ printk(KERN_DEBUG "release child resource %pR\n", tmp);
+ /* need to restore size, and keep flags */
+ size = resource_size(tmp);
+ tmp->start = 0;
+ tmp->end = size - 1;
+ }
+}
+
+void release_child_resources(struct resource *r)
+{
+ write_lock(&resource_lock);
+ __release_child_resources(r);
+ write_unlock(&resource_lock);
+}
+
/**
* request_resource - request and reserve an I/O or memory resource
* @root: root resource descriptor
On Fri, 18 Dec 2009, Yinghai Lu wrote:
>
> ok, please check attached is right or not
>
> +static void __release_child_resources(struct resource *r)
> +{
> + struct resource *tmp, *p;
> + resource_size_t size;
> +
> + p = r->child;
> + r->child = NULL;
> + while (p) {
> + tmp = p;
> + p = p->sibling;
> +
> + tmp->parent = NULL;
> + tmp->sibling = NULL;
> + __release_child_resources(tmp);
> +
> + printk(KERN_DEBUG "release child resource %pR\n", tmp);
> + /* need to restore size, and keep flags */
> + size = resource_size(tmp);
> + tmp->start = 0;
> + tmp->end = size - 1;
> + }
> +}
Ok, this looks mostly right. I do worry about the alignment information:
you lose that thing for any resource that had IORESOURCE_STARTALIGN set
when you do this thing. That's pretty fundamental to the whole resource
code, I suspect we should just finally add a 'alignment' field to the
resource struct, so that alignment doesn't get lost when a resource is
allocated.
(Do a "git grep IORESOURCE_.*ALIGN" to see the kind of stuff I'm talking
about, and look at he PCI 'setup-bus.c' code that sets that STARTALIGN
thing).
So a preliminary ack on the resource.c parts. The rest I'm still a bit
dubious about, and the whole "we've lost alignment on the resources" is
probably indicative of how none of the resource code has ever really been
designed for this kind of "tear down and build back up again" behavior.
Linus
Linus Torvalds wrote:
>
> On Fri, 18 Dec 2009, Yinghai Lu wrote:
>> ok, please check attached is right or not
>>
>> +static void __release_child_resources(struct resource *r)
>> +{
>> + struct resource *tmp, *p;
>> + resource_size_t size;
>> +
>> + p = r->child;
>> + r->child = NULL;
>> + while (p) {
>> + tmp = p;
>> + p = p->sibling;
>> +
>> + tmp->parent = NULL;
>> + tmp->sibling = NULL;
>> + __release_child_resources(tmp);
>> +
>> + printk(KERN_DEBUG "release child resource %pR\n", tmp);
>> + /* need to restore size, and keep flags */
>> + size = resource_size(tmp);
>> + tmp->start = 0;
>> + tmp->end = size - 1;
>> + }
>> +}
>
> Ok, this looks mostly right. I do worry about the alignment information:
> you lose that thing for any resource that had IORESOURCE_STARTALIGN set
> when you do this thing. That's pretty fundamental to the whole resource
> code, I suspect we should just finally add a 'alignment' field to the
> resource struct, so that alignment doesn't get lost when a resource is
> allocated.
>
> (Do a "git grep IORESOURCE_.*ALIGN" to see the kind of stuff I'm talking
> about, and look at he PCI 'setup-bus.c' code that sets that STARTALIGN
> thing).
>
> So a preliminary ack on the resource.c parts. The rest I'm still a bit
> dubious about, and the whole "we've lost alignment on the resources" is
> probably indicative of how none of the resource code has ever really been
> designed for this kind of "tear down and build back up again" behavior.
arch/powerpc/kernel/pci_of_scan.c: flags |= IORESOURCE_SIZEALIGN;
drivers/pci/probe.c: res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN;
drivers/pci/probe.c: IORESOURCE_SIZEALIGN;
drivers/pci/setup-bus.c: b_res->flags |= IORESOURCE_STARTALIGN;
drivers/pci/setup-bus.c: b_res->flags |= IORESOURCE_STARTALIGN;
drivers/pci/setup-bus.c: b_res[0].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN;
drivers/pci/setup-bus.c: b_res[1].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN;
drivers/pci/setup-bus.c: b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_SIZEALIGN;
drivers/pci/setup-bus.c: b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN;
drivers/pci/setup-bus.c: b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN;
drivers/pci/setup-res.c: res->flags &= ~IORESOURCE_STARTALIGN;
drivers/staging/b3dfg/b3dfg.c: != (IORESOURCE_MEM | IORESOURCE_SIZEALIGN)) {
include/linux/ioport.h:#define IORESOURCE_SIZEALIGN 0x00020000 /* size indicates alignment */
include/linux/ioport.h:#define IORESOURCE_STARTALIGN 0x00040000 /* start field is alignment */
kernel/resource.c: switch (res->flags & (IORESOURCE_SIZEALIGN | IORESOURCE_STARTALIGN)) {
kernel/resource.c: case IORESOURCE_SIZEALIGN:
kernel/resource.c: case IORESOURCE_STARTALIGN:
looks like IORESOURCE_SIZEALIGN is only used by bridge.
and next round. pbus_size_mem will add that back again.
YH
On Fri, 2009-12-18 at 13:24 -0800, Linus Torvalds wrote:
>
> On Fri, 18 Dec 2009, Yinghai Lu wrote:
> >
> > so later we could use it to release small resource before pci assign unassign res
>
> However, I think this one is wrong.
>
> > +static void release_child_resources(struct resource *r)
> > +{
> > + struct resource *p;
> > + resource_size_t size;
> > +
> > + p = r->child;
> > + while (p) {
> > + release_child_resources(p);
> > + release_resource(p);
>
> So not only is this releasing resources that aren't necessarily PCI
> devices, it's releasing the whole tree - regardless of how they were
> allocated and initialized. ...
Help me fill in my mental picture of these resources ... This function
just takes a struct resource, so it doesn't know whether it's a PCI,
ACPI, or other resource. But in Yinghai's usage, I think we do know
that we're starting with a PCI resource. In that case, is it possible
that some child is a non-PCI resource?
The picture in my mind is that once we are downstream of a PCI host
bridge, all child resources must be PCI, and they must all conform to
the forwarding rules for PCI bridges and so on. I know there are PCI
devices with BARs at non-standard places, so Linux wouldn't know about
those resources (and that worries me a bit when we're talking about
reprogramming a bridge window that might be upstream of such a device).
But if Linux *did* learn about those non-standard resources via a quirk
or something, I would still think of those as PCI resources, not ACPI or
something else.
Anyway, I'd like to correct my mental picture if it's mistaken.
Bjorn
On Fri, 2009-12-18 at 16:26 -0800, Yinghai Lu wrote:
> Subject: [PATCH 2/12] pci: add pci_bridge_release_unused_res and pci_bus_release_unused_bridge_res -v3
>
> so later we could use it to release small resource before pci assign unassign res
>
> -v2: change name to release_child_resources according to Jesse
> -v3: according to Linus, move release_child_resources to resource.c
> also need to put the lock around them all to avoid recursive deep.
> (my test case only have one level that need to be released)
>
> Signed-off-by: Yinghai Lu <[email protected]>
>
> ---
> drivers/pci/setup-bus.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++-
> include/linux/ioport.h | 1
> kernel/resource.c | 30 +++++++++++++++
> 3 files changed, 125 insertions(+), 1 deletion(-)
>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -209,7 +209,6 @@ static void pci_setup_bridge_mmio_pref(s
> l = (region.start >> 16) & 0xfff0;
> l |= region.end & 0xfff00000;
> if (res->flags & IORESOURCE_MEM_64) {
> - pref_mem64 = 1;
> bu = upper_32_bits(region.start);
> lu = upper_32_bits(region.end);
> }
> @@ -608,6 +607,100 @@ void __ref pci_bus_assign_resources(cons
> }
> EXPORT_SYMBOL(pci_bus_assign_resources);
>
> +static void pci_bridge_release_unused_res(struct pci_bus *bus,
> + unsigned long type)
> +{
> + int idx;
> + bool changed = false;
> + struct pci_dev *dev;
> + struct resource *r;
> + unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> + IORESOURCE_PREFETCH;
> +
> + /* for pci bridges res only */
> + dev = bus->self;
> + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES + 3;
I *think* this magic "3" refers to the three possible windows supported
by PCI bridges (I/O, memory, prefetchable memory). I asked you before
about using something more descriptive here; do you have any ideas? I
think even PCI_BRIDGE_RESOURCE_NUM (4) would be better, because I think
that's the number of bridge windows we support (CardBus bridges have
four windows, regular PCI bridges only have three). For regular PCI
bridges, that fourth window should be NULL, so it could easily be
skipped here.
> + idx++) {
> + r = &dev->resource[idx];
> + if ((r->flags & type_mask) != type)
> + continue;
> + if (!r->parent)
> + continue;
> + /*
> + * if there are children under that, we should release them
> + * all
> + */
> + release_child_resources(r);
Sorry for my ignorance here, but is it possible there's a driver using
any of these child devices?
> + if (!release_resource(r)) {
> + dev_printk(KERN_DEBUG, &dev->dev,
> + "resource %d %pR released\n", idx, r);
> + /* keep the old size */
> + r->end = resource_size(r) - 1;
> + r->start = 0;
> + r->flags = 0;
> + changed = true;
> + }
> + }
> +
> + if (changed) {
> + if (type & IORESOURCE_PREFETCH) {
> + /* avoiding touch the one without PREF */
> + type = IORESOURCE_PREFETCH;
> + }
Again, it's probably perfectly obvious why we need to leave the
non-prefetchable window untouched, but I don't know the reason. Can you
add a comment about why that's important?
> + __pci_setup_bridge(bus, type);
> + }
> +}
> +
> +/*
> + * try to release pci bridge resources that is from leaf bridge,
> + * so we can allocate big new one later
> + * check:
> + * 0: only release the bridge and only the bridge is leaf
> + * 1: release all down side bridge for third shoot
> + */
> +static void __ref pci_bus_release_unused_bridge_res(struct pci_bus *bus,
> + unsigned long type,
> + int check_leaf)
> +{
> + struct pci_dev *dev;
> + bool is_leaf_bridge = true;
> +
> + list_for_each_entry(dev, &bus->devices, bus_list) {
> + struct pci_bus *b = dev->subordinate;
> + if (!b)
> + continue;
> +
> + switch (dev->class >> 8) {
> + case PCI_CLASS_BRIDGE_CARDBUS:
> + is_leaf_bridge = false;
> + break;
> +
> + case PCI_CLASS_BRIDGE_PCI:
> + default:
> + is_leaf_bridge = false;
> + if (!check_leaf)
> + pci_bus_release_unused_bridge_res(b, type,
> + check_leaf);
> + break;
> + }
> + }
> +
> + /* The root bus? */
> + if (!bus->self)
> + return;
> +
> + switch (bus->self->class >> 8) {
> + case PCI_CLASS_BRIDGE_CARDBUS:
> + break;
> +
> + case PCI_CLASS_BRIDGE_PCI:
Sorry for my ignorance again. Why do we have to treat CardBus bridges
so much differently? I realize their windows are programmed a bit
differently, but I don't know what the conceptual difference is as far
as a bridge window being too small to accomodate all downstream devices.
> + default:
> + if ((check_leaf && is_leaf_bridge) || !check_leaf)
> + pci_bridge_release_unused_res(bus, type);
> + break;
> + }
> +}
> +
> static void pci_bus_dump_res(struct pci_bus *bus)
> {
> int i;
Bjorn
Bjorn Helgaas wrote:
> On Fri, 2009-12-18 at 16:26 -0800, Yinghai Lu wrote:
>> Subject: [PATCH 2/12] pci: add pci_bridge_release_unused_res and pci_bus_release_unused_bridge_res -v3
>>
>> so later we could use it to release small resource before pci assign unassign res
>>
>> -v2: change name to release_child_resources according to Jesse
>> -v3: according to Linus, move release_child_resources to resource.c
>> also need to put the lock around them all to avoid recursive deep.
>> (my test case only have one level that need to be released)
>>
>> Signed-off-by: Yinghai Lu <[email protected]>
>>
>> ---
>> drivers/pci/setup-bus.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++-
>> include/linux/ioport.h | 1
>> kernel/resource.c | 30 +++++++++++++++
>> 3 files changed, 125 insertions(+), 1 deletion(-)
>>
>> Index: linux-2.6/drivers/pci/setup-bus.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/pci/setup-bus.c
>> +++ linux-2.6/drivers/pci/setup-bus.c
>> @@ -209,7 +209,6 @@ static void pci_setup_bridge_mmio_pref(s
>> l = (region.start >> 16) & 0xfff0;
>> l |= region.end & 0xfff00000;
>> if (res->flags & IORESOURCE_MEM_64) {
>> - pref_mem64 = 1;
>> bu = upper_32_bits(region.start);
>> lu = upper_32_bits(region.end);
>> }
>> @@ -608,6 +607,100 @@ void __ref pci_bus_assign_resources(cons
>> }
>> EXPORT_SYMBOL(pci_bus_assign_resources);
>>
>> +static void pci_bridge_release_unused_res(struct pci_bus *bus,
>> + unsigned long type)
>> +{
>> + int idx;
>> + bool changed = false;
>> + struct pci_dev *dev;
>> + struct resource *r;
>> + unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
>> + IORESOURCE_PREFETCH;
>> +
>> + /* for pci bridges res only */
>> + dev = bus->self;
>> + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES + 3;
>
> I *think* this magic "3" refers to the three possible windows supported
> by PCI bridges (I/O, memory, prefetchable memory). I asked you before
> about using something more descriptive here; do you have any ideas? I
> think even PCI_BRIDGE_RESOURCE_NUM (4) would be better, because I think
> that's the number of bridge windows we support (CardBus bridges have
> four windows, regular PCI bridges only have three). For regular PCI
> bridges, that fourth window should be NULL, so it could easily be
> skipped here.
ok. will try to use PCI_BRIDGE_RESOURCE_NUM
>
>> + idx++) {
>> + r = &dev->resource[idx];
>> + if ((r->flags & type_mask) != type)
>> + continue;
>> + if (!r->parent)
>> + continue;
>> + /*
>> + * if there are children under that, we should release them
>> + * all
>> + */
>> + release_child_resources(r);
>
> Sorry for my ignorance here, but is it possible there's a driver using
> any of these child devices?
Good question!.
this function is only being called
1. during pci_assign unassgined resources. during first...so no driver using those...
2. during loading pciehp device under that pcie port are assigned pci resource. at that time no resource are claimed for those devices.
>
>> + if (!release_resource(r)) {
>> + dev_printk(KERN_DEBUG, &dev->dev,
>> + "resource %d %pR released\n", idx, r);
>> + /* keep the old size */
>> + r->end = resource_size(r) - 1;
>> + r->start = 0;
>> + r->flags = 0;
>> + changed = true;
>> + }
>> + }
>> +
>> + if (changed) {
>> + if (type & IORESOURCE_PREFETCH) {
>> + /* avoiding touch the one without PREF */
>> + type = IORESOURCE_PREFETCH;
>> + }
>
> Again, it's probably perfectly obvious why we need to leave the
> non-prefetchable window untouched, but I don't know the reason. Can you
> add a comment about why that's important?
only release related type.
for example: the device under that bridge, didn't get pref mmio correctly assigned,
will only try to release bridge's pref mmio window instead of release mmio window at
same time.
>
>> + __pci_setup_bridge(bus, type);
>> + }
>> +}
>> +
>> +/*
>> + * try to release pci bridge resources that is from leaf bridge,
>> + * so we can allocate big new one later
>> + * check:
>> + * 0: only release the bridge and only the bridge is leaf
>> + * 1: release all down side bridge for third shoot
>> + */
>> +static void __ref pci_bus_release_unused_bridge_res(struct pci_bus *bus,
>> + unsigned long type,
>> + int check_leaf)
>> +{
>> + struct pci_dev *dev;
>> + bool is_leaf_bridge = true;
>> +
>> + list_for_each_entry(dev, &bus->devices, bus_list) {
>> + struct pci_bus *b = dev->subordinate;
>> + if (!b)
>> + continue;
>> +
>> + switch (dev->class >> 8) {
>> + case PCI_CLASS_BRIDGE_CARDBUS:
>> + is_leaf_bridge = false;
>> + break;
>> +
>> + case PCI_CLASS_BRIDGE_PCI:
>> + default:
>> + is_leaf_bridge = false;
>> + if (!check_leaf)
>> + pci_bus_release_unused_bridge_res(b, type,
>> + check_leaf);
>> + break;
>> + }
>> + }
>> +
>> + /* The root bus? */
>> + if (!bus->self)
>> + return;
>> +
>> + switch (bus->self->class >> 8) {
>> + case PCI_CLASS_BRIDGE_CARDBUS:
>> + break;
>> +
>> + case PCI_CLASS_BRIDGE_PCI:
>
> Sorry for my ignorance again. Why do we have to treat CardBus bridges
> so much differently? I realize their windows are programmed a bit
> differently, but I don't know what the conceptual difference is as far
> as a bridge window being too small to accomodate all downstream devices.
i didn't have those cardbus stuff around, and have no way to test them.
and looks like some special code for card bus etc handling. so could leave other
guys to play them later.
thanks for closely reading through the code.
Yinghai