2006-09-26 19:15:16

by Jeff Garzik

[permalink] [raw]
Subject: [PATCH] x86[-64] PCI domain support


The x86[-64] PCI domain effort needs to be restarted, because we've got
machines out in the field that need this in order for some devices to
work.

RHEL is shipping it now, apparently without any problems.

The 'pciseg' branch of
git://git.kernel.org/pub/scm/linux/kernel/git/jgarzik/misc-2.6.git pciseg

to receive the following updates:

arch/i386/pci/acpi.c | 25 +++++++++++++++++++++----
arch/i386/pci/common.c | 19 ++++++++++++++++---
arch/x86_64/pci/k8-bus.c | 6 +++++-
include/asm-i386/pci.h | 19 +++++++++++++++++++
include/asm-i386/topology.h | 2 +-
include/asm-x86_64/pci.h | 18 ++++++++++++++++++
include/asm-x86_64/topology.h | 2 +-
7 files changed, 81 insertions(+), 10 deletions(-)

Jeff Garzik:
[x86, PCI] pass PCI domain number to PCI config read/write hooks
[x86, PCI] Switch pci_bus::sysdata from NUMA node integer to a pointer
[x86, PCI] add PCI domain support

diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c
index b33aea8..e4f4828 100644
--- a/arch/i386/pci/acpi.c
+++ b/arch/i386/pci/acpi.c
@@ -8,20 +8,37 @@ #include "pci.h"
struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum)
{
struct pci_bus *bus;
+ struct pci_sysdata *sd;

+ /* Allocate per-root-bus (not per bus) arch-specific data.
+ * TODO: leak; this memory is never freed.
+ * It's arguable whether it's worth the trouble to care.
+ */
+ sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+ if (!sd) {
+ printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
+ return NULL;
+ }
+
+#ifdef CONFIG_PCI_DOMAINS
+ sd->domain = domain;
+#else
if (domain != 0) {
printk(KERN_WARNING "PCI: Multiple domains not supported\n");
return NULL;
}
+#endif /* CONFIG_PCI_DOMAINS */

- bus = pcibios_scan_root(busnum);
+ bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
+ if (!bus)
+ kfree(sd);
#ifdef CONFIG_ACPI_NUMA
if (bus != NULL) {
int pxm = acpi_get_pxm(device->handle);
if (pxm >= 0) {
- bus->sysdata = (void *)(unsigned long)pxm_to_node(pxm);
- printk("bus %d -> pxm %d -> node %ld\n",
- busnum, pxm, (long)(bus->sysdata));
+ sd->node = pxm_to_node(pxm);
+ printk("bus %d -> pxm %d -> node %d\n",
+ busnum, pxm, sd->node);
}
}
#endif
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 0a362e3..21bf223 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -28,12 +28,14 @@ struct pci_raw_ops *raw_pci_ops;

static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
{
- return raw_pci_ops->read(0, bus->number, devfn, where, size, value);
+ return raw_pci_ops->read(pci_domain_nr(bus), bus->number,
+ devfn, where, size, value);
}

static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
{
- return raw_pci_ops->write(0, bus->number, devfn, where, size, value);
+ return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
+ devfn, where, size, value);
}

struct pci_ops pci_root_ops = {
@@ -150,6 +152,7 @@ #endif /* __i386__ */
struct pci_bus * __devinit pcibios_scan_root(int busnum)
{
struct pci_bus *bus = NULL;
+ struct pci_sysdata *sd;

dmi_check_system(pciprobe_dmi_table);

@@ -160,9 +163,19 @@ struct pci_bus * __devinit pcibios_scan_
}
}

+ /* Allocate per-root-bus (not per bus) arch-specific data.
+ * TODO: leak; this memory is never freed.
+ * It's arguable whether it's worth the trouble to care.
+ */
+ sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+ if (!sd) {
+ printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
+ return NULL;
+ }
+
printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum);

- return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, NULL);
+ return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
}

extern u8 pci_cache_line_size;
diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c
index 3acf60d..9cc813e 100644
--- a/arch/x86_64/pci/k8-bus.c
+++ b/arch/x86_64/pci/k8-bus.c
@@ -59,6 +59,8 @@ fill_mp_bus_to_cpumask(void)
j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus);
j++) {
struct pci_bus *bus;
+ struct pci_sysdata *sd;
+
long node = NODE_ID(nid);
/* Algorithm a bit dumb, but
it shouldn't matter here */
@@ -67,7 +69,9 @@ fill_mp_bus_to_cpumask(void)
continue;
if (!node_online(node))
node = 0;
- bus->sysdata = (void *)node;
+
+ sd = bus->sysdata;
+ sd->node = node;
}
}
}
diff --git a/include/asm-i386/pci.h b/include/asm-i386/pci.h
index 64b6d0b..2c8b5e9 100644
--- a/include/asm-i386/pci.h
+++ b/include/asm-i386/pci.h
@@ -3,6 +3,25 @@ #define __i386_PCI_H


#ifdef __KERNEL__
+
+struct pci_sysdata {
+ int domain; /* PCI domain */
+ int node; /* NUMA node */
+};
+
+#ifdef CONFIG_PCI_DOMAINS
+static inline int pci_domain_nr(struct pci_bus *bus)
+{
+ struct pci_sysdata *sd = bus->sysdata;
+ return sd->domain;
+}
+
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+ return pci_domain_nr(bus);
+}
+#endif /* CONFIG_PCI_DOMAINS */
+
#include <linux/mm.h> /* for struct page */

/* Can be used to override the logic in pci_scan_bus for skipping
diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h
index 6adbd9b..9234497 100644
--- a/include/asm-i386/topology.h
+++ b/include/asm-i386/topology.h
@@ -67,7 +67,7 @@ static inline int node_to_first_cpu(int
return first_cpu(mask);
}

-#define pcibus_to_node(bus) ((long) (bus)->sysdata)
+#define pcibus_to_node(bus) ((struct pci_sysdata *)((bus)->sysdata))->node
#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus))

/* sched_domains SD_NODE_INIT for NUMAQ machines */
diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h
index 49c5e92..e7a863c 100644
--- a/include/asm-x86_64/pci.h
+++ b/include/asm-x86_64/pci.h
@@ -5,6 +5,24 @@ #include <asm/io.h>

#ifdef __KERNEL__

+struct pci_sysdata {
+ int domain; /* PCI domain */
+ int node; /* NUMA node */
+};
+
+#ifdef CONFIG_PCI_DOMAINS
+static inline int pci_domain_nr(struct pci_bus *bus)
+{
+ struct pci_sysdata *sd = bus->sysdata;
+ return sd->domain;
+}
+
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+ return pci_domain_nr(bus);
+}
+#endif /* CONFIG_PCI_DOMAINS */
+
#include <linux/mm.h> /* for struct page */

/* Can be used to override the logic in pci_scan_bus for skipping
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index 6e7a2e9..cd24646 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -22,7 +22,7 @@ #define cpu_to_node(cpu) (cpu_to_node[c
#define parent_node(node) (node)
#define node_to_first_cpu(node) (first_cpu(node_to_cpumask[node]))
#define node_to_cpumask(node) (node_to_cpumask[node])
-#define pcibus_to_node(bus) ((long)(bus->sysdata))
+#define pcibus_to_node(bus) ((struct pci_sysdata *)((bus)->sysdata))->node
#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus));

#define numa_node_id() read_pda(nodenumber)


2006-09-26 20:23:22

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

On Tue, Sep 26, 2006 at 03:15:08PM -0400, Jeff Garzik wrote:
>
> The x86[-64] PCI domain effort needs to be restarted, because we've got
> machines out in the field that need this in order for some devices to
> work.
>
> RHEL is shipping it now, apparently without any problems.
>
> The 'pciseg' branch of
> git://git.kernel.org/pub/scm/linux/kernel/git/jgarzik/misc-2.6.git pciseg

So are the NUMA issues now taken care of properly? If so, care to send
me the patches for this so I can add them to my quilt tree?

thanks,

greg k-h

2006-09-26 20:27:29

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

On Tuesday 26 September 2006 22:23, Greg KH wrote:
> On Tue, Sep 26, 2006 at 03:15:08PM -0400, Jeff Garzik wrote:
> >
> > The x86[-64] PCI domain effort needs to be restarted, because we've got
> > machines out in the field that need this in order for some devices to
> > work.
> >
> > RHEL is shipping it now, apparently without any problems.
> >
> > The 'pciseg' branch of
> > git://git.kernel.org/pub/scm/linux/kernel/git/jgarzik/misc-2.6.git pciseg
>
> So are the NUMA issues now taken care of properly?

I assume IBM would have complained if it was broken in RHEL.

But iirc Summit has two BIOS options for this. Maybe they just recommend
to set the other one for Linux. Anyways I guess that can be all figured
out once the patch is in tree.

-Andi

2006-09-26 20:44:35

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

Greg KH wrote:
> On Tue, Sep 26, 2006 at 03:15:08PM -0400, Jeff Garzik wrote:
>> The x86[-64] PCI domain effort needs to be restarted, because we've got
>> machines out in the field that need this in order for some devices to
>> work.
>>
>> RHEL is shipping it now, apparently without any problems.
>>
>> The 'pciseg' branch of
>> git://git.kernel.org/pub/scm/linux/kernel/git/jgarzik/misc-2.6.git pciseg
>
> So are the NUMA issues now taken care of properly? If so, care to send
> me the patches for this so I can add them to my quilt tree?

Er, I just posted the combined patch for review. Can't you pull from
the above URL? It's a bit of a pain to dive in and out of git.

Jeff



2006-09-27 05:03:56

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

On Tue, Sep 26, 2006 at 04:44:28PM -0400, Jeff Garzik wrote:
> Greg KH wrote:
> >On Tue, Sep 26, 2006 at 03:15:08PM -0400, Jeff Garzik wrote:
> >>The x86[-64] PCI domain effort needs to be restarted, because we've got
> >>machines out in the field that need this in order for some devices to
> >>work.
> >>
> >>RHEL is shipping it now, apparently without any problems.
> >>
> >>The 'pciseg' branch of
> >>git://git.kernel.org/pub/scm/linux/kernel/git/jgarzik/misc-2.6.git pciseg
> >
> >So are the NUMA issues now taken care of properly? If so, care to send
> >me the patches for this so I can add them to my quilt tree?
>
> Er, I just posted the combined patch for review. Can't you pull from
> the above URL? It's a bit of a pain to dive in and out of git.

I agree, it is a pain :)

And I don't use git for patch maintenance, only for sending stuff to
Linus. So I'd have to pull this and extract out the patches and edit
the headers in order to get them into my tree in mbox form (as my
patchflow works off of emails.)

So, care to make it a bit easier for me to take these?

thanks,

greg k-h

2006-09-27 07:27:41

by Rolf Eike Beer

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

Jeff Garzik wrote:

> diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c
> index b33aea8..e4f4828 100644
> --- a/arch/i386/pci/acpi.c
> +++ b/arch/i386/pci/acpi.c
> @@ -8,20 +8,37 @@ #include "pci.h"
> struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device,
> int domain, int busnum) {
> struct pci_bus *bus;
> + struct pci_sysdata *sd;
>
> + /* Allocate per-root-bus (not per bus) arch-specific data.
> + * TODO: leak; this memory is never freed.
> + * It's arguable whether it's worth the trouble to care.
> + */
> + sd = kzalloc(sizeof(*sd), GFP_KERNEL);
> + if (!sd) {
> + printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
> + return NULL;
> + }
> +
> +#ifdef CONFIG_PCI_DOMAINS
> + sd->domain = domain;
> +#else
> if (domain != 0) {
> printk(KERN_WARNING "PCI: Multiple domains not supported\n");

kfree(sd);


> return NULL;
> }
> +#endif /* CONFIG_PCI_DOMAINS */

I would move this check to be done before the memory is allocated so we don't
need to free it.

Eike


Attachments:
(No filename) (1.01 kB)
(No filename) (189.00 B)
Download all attachments

2006-09-28 09:33:38

by Muli Ben-Yehuda

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

On Tue, Sep 26, 2006 at 03:15:08PM -0400, Jeff Garzik wrote:
>
> The x86[-64] PCI domain effort needs to be restarted, because we've got
> machines out in the field that need this in order for some devices to
> work.
>
This breaks the Calgary IOMMU, since it uses sysdata for other
purposes (going back from a bus to its IO address space). I'm looking
into it.

Cheers,
Muli

2006-09-28 09:45:58

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

Muli Ben-Yehuda wrote:
> On Tue, Sep 26, 2006 at 03:15:08PM -0400, Jeff Garzik wrote:
>> The x86[-64] PCI domain effort needs to be restarted, because we've got
>> machines out in the field that need this in order for some devices to
>> work.
>>
> This breaks the Calgary IOMMU, since it uses sysdata for other
> purposes (going back from a bus to its IO address space). I'm looking
> into it.

You'll need to modify struct pci_sysdata in
include/asm-{i386,x86_64}/pci.h to include the data that you previously
stored directly into the sysdata pointer.

Jeff



2006-09-28 22:45:59

by Muli Ben-Yehuda

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

On Thu, Sep 28, 2006 at 05:45:41AM -0400, Jeff Garzik wrote:
> Muli Ben-Yehuda wrote:
> >On Tue, Sep 26, 2006 at 03:15:08PM -0400, Jeff Garzik wrote:
> >>The x86[-64] PCI domain effort needs to be restarted, because we've got
> >>machines out in the field that need this in order for some devices to
> >>work.
> >>
> >This breaks the Calgary IOMMU, since it uses sysdata for other
> >purposes (going back from a bus to its IO address space). I'm looking
> >into it.
>
> You'll need to modify struct pci_sysdata in
> include/asm-{i386,x86_64}/pci.h to include the data that you previously
> stored directly into the sysdata pointer.

Something like this should do the trick. Note - this should not be
applied yet - after several gigabytes of network and disk activity it
takes aic94xx down. More investigation required.

diff -Naurp -X /home/muli/w/dontdiff pci-domains/arch/x86_64/kernel/pci-calgary.c linux/arch/x86_64/kernel/pci-calgary.c
--- pci-domains/arch/x86_64/kernel/pci-calgary.c 2006-09-28 13:31:14.000000000 +0300
+++ linux/arch/x86_64/kernel/pci-calgary.c 2006-09-28 13:14:38.000000000 +0300
@@ -317,7 +317,7 @@ void calgary_unmap_sg(struct device *dev
int nelems, int direction)
{
unsigned long flags;
- struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+ struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus);

if (!translate_phb(to_pci_dev(dev)))
return;
@@ -346,7 +346,7 @@ static int calgary_nontranslate_map_sg(s
int calgary_map_sg(struct device *dev, struct scatterlist *sg,
int nelems, int direction)
{
- struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+ struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus);
unsigned long flags;
unsigned long vaddr;
unsigned int npages;
@@ -400,7 +400,7 @@ dma_addr_t calgary_map_single(struct dev
dma_addr_t dma_handle = bad_dma_address;
unsigned long uaddr;
unsigned int npages;
- struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+ struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus);

uaddr = (unsigned long)vaddr;
npages = num_dma_pages(uaddr, size);
@@ -416,7 +416,7 @@ dma_addr_t calgary_map_single(struct dev
void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
size_t size, int direction)
{
- struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+ struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus);
unsigned int npages;

if (!translate_phb(to_pci_dev(dev)))
@@ -434,7 +434,7 @@ void* calgary_alloc_coherent(struct devi
unsigned int npages, order;
struct iommu_table *tbl;

- tbl = to_pci_dev(dev)->bus->self->sysdata;
+ tbl = pci_iommu(to_pci_dev(dev)->bus);

size = PAGE_ALIGN(size); /* size rounded up to full pages */
npages = size >> PAGE_SHIFT;
@@ -551,7 +551,7 @@ static void __init calgary_reserve_mem_r
limit++;

numpages = ((limit - start) >> PAGE_SHIFT);
- iommu_range_reserve(dev->sysdata, start, numpages);
+ iommu_range_reserve(pci_iommu(dev->bus), start, numpages);
}

static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
@@ -559,7 +559,7 @@ static void __init calgary_reserve_perip
void __iomem *target;
u64 low, high, sizelow;
u64 start, limit;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
unsigned char busnum = dev->bus->number;
void __iomem *bbar = tbl->bbar;

@@ -583,7 +583,7 @@ static void __init calgary_reserve_perip
u32 val32;
u64 low, high, sizelow, sizehigh;
u64 start, limit;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
unsigned char busnum = dev->bus->number;
void __iomem *bbar = tbl->bbar;

@@ -621,7 +621,7 @@ static void __init calgary_reserve_regio
void __iomem *bbar;
unsigned char busnum;
u64 start;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);

bbar = tbl->bbar;
busnum = dev->bus->number;
@@ -652,7 +652,7 @@ static int __init calgary_setup_tar(stru
if (ret)
return ret;

- tbl = dev->sysdata;
+ tbl = pci_iommu(dev->bus);
tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
tce_free(tbl, 0, tbl->it_size);

@@ -665,7 +665,7 @@ static int __init calgary_setup_tar(stru
/* zero out all TAR bits under sw control */
val64 &= ~TAR_SW_BITS;

- tbl = dev->sysdata;
+ tbl = pci_iommu(dev->bus);
table_phys = (u64)__pa(tbl->it_base);
val64 |= table_phys;

@@ -682,7 +682,7 @@ static int __init calgary_setup_tar(stru
static void __init calgary_free_bus(struct pci_dev *dev)
{
u64 val64;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
void __iomem *target;
unsigned int bitmapsz;

@@ -697,7 +697,8 @@ static void __init calgary_free_bus(stru
tbl->it_map = NULL;

kfree(tbl);
- dev->sysdata = NULL;
+
+ set_pci_iommu(dev->bus, NULL);

/* Can't free bootmem allocated memory after system is up :-( */
bus_info[dev->bus->number].tce_space = NULL;
@@ -706,7 +707,7 @@ static void __init calgary_free_bus(stru
static void calgary_watchdog(unsigned long data)
{
struct pci_dev *dev = (struct pci_dev *)data;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
void __iomem *bbar = tbl->bbar;
u32 val32;
void __iomem *target;
@@ -742,7 +743,7 @@ static void __init calgary_enable_transl
struct iommu_table *tbl;

busnum = dev->bus->number;
- tbl = dev->sysdata;
+ tbl = pci_iommu(dev->bus);
bbar = tbl->bbar;

/* enable TCE in PHB Config Register */
@@ -772,7 +773,7 @@ static void __init calgary_disable_trans
struct iommu_table *tbl;

busnum = dev->bus->number;
- tbl = dev->sysdata;
+ tbl = pci_iommu(dev->bus);
bbar = tbl->bbar;

/* disable TCE in PHB Config Register */
@@ -813,8 +814,7 @@ static inline unsigned int __init locate
static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
{
pci_dev_get(dev);
- dev->sysdata = NULL;
- dev->bus->self = dev;
+ set_pci_iommu(dev->bus, NULL);
}

static int __init calgary_init_one(struct pci_dev *dev)
diff -Naurp -X /home/muli/w/dontdiff pci-domains/arch/x86_64/kernel/tce.c linux/arch/x86_64/kernel/tce.c
--- pci-domains/arch/x86_64/kernel/tce.c 2006-09-28 13:31:14.000000000 +0300
+++ linux/arch/x86_64/kernel/tce.c 2006-09-28 13:40:52.000000000 +0300
@@ -137,9 +137,9 @@ int build_tce_table(struct pci_dev *dev,
struct iommu_table *tbl;
int ret;

- if (dev->sysdata) {
- printk(KERN_ERR "Calgary: dev %p has sysdata %p\n",
- dev, dev->sysdata);
+ if (pci_iommu(dev->bus)) {
+ printk(KERN_ERR "Calgary: dev %p has sysdata->iommu %p\n",
+ dev, pci_iommu(dev->bus));
BUG();
}

@@ -156,11 +156,7 @@ int build_tce_table(struct pci_dev *dev,

tbl->bbar = bbar;

- /*
- * NUMA is already using the bus's sysdata pointer, so we use
- * the bus's pci_dev's sysdata instead.
- */
- dev->sysdata = tbl;
+ set_pci_iommu(dev->bus, tbl);

return 0;

diff -Naurp -X /home/muli/w/dontdiff pci-domains/include/asm-x86_64/pci.h linux/include/asm-x86_64/pci.h
--- pci-domains/include/asm-x86_64/pci.h 2006-09-28 13:34:05.000000000 +0300
+++ linux/include/asm-x86_64/pci.h 2006-09-28 15:19:56.000000000 +0300
@@ -8,6 +8,7 @@
struct pci_sysdata {
int domain; /* PCI domain */
int node; /* NUMA node */
+ void* iommu; /* IOMMU private data */
};

#ifdef CONFIG_PCI_DOMAINS
@@ -23,6 +24,20 @@ static inline int pci_proc_domain(struct
}
#endif /* CONFIG_PCI_DOMAINS */

+#ifdef CONFIG_CALGARY_IOMMU
+static inline void* pci_iommu(struct pci_bus *bus)
+{
+ struct pci_sysdata *sd = bus->sysdata;
+ return sd->iommu;
+}
+
+static inline void set_pci_iommu(struct pci_bus *bus, void *val)
+{
+ struct pci_sysdata *sd = bus->sysdata;
+ sd->iommu = val;
+}
+#endif /* CONFIG_CALGARY_IOMMU */
+
#include <linux/mm.h> /* for struct page */

/* Can be used to override the logic in pci_scan_bus for skipping

2006-09-28 23:03:40

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

Muli Ben-Yehuda wrote:
> On Thu, Sep 28, 2006 at 05:45:41AM -0400, Jeff Garzik wrote:
>> Muli Ben-Yehuda wrote:
>>> On Tue, Sep 26, 2006 at 03:15:08PM -0400, Jeff Garzik wrote:
>>>> The x86[-64] PCI domain effort needs to be restarted, because we've got
>>>> machines out in the field that need this in order for some devices to
>>>> work.
>>>>
>>> This breaks the Calgary IOMMU, since it uses sysdata for other
>>> purposes (going back from a bus to its IO address space). I'm looking
>>> into it.
>> You'll need to modify struct pci_sysdata in
>> include/asm-{i386,x86_64}/pci.h to include the data that you previously
>> stored directly into the sysdata pointer.
>
> Something like this should do the trick. Note - this should not be
> applied yet - after several gigabytes of network and disk activity it
> takes aic94xx down. More investigation required.

hmmmm. What kernels did you test?

I would suggest testing

jgarzik/misc-2.6.git#master -> vanilla Linux kernel
jgarzik/misc-2.6.git#pciseg -> #master + PCI domain support
#pciseg + your patch

That should narrow down the problems. A problem with aic94xx sorta
sounds like something unrelated.


> diff -Naurp -X /home/muli/w/dontdiff pci-domains/arch/x86_64/kernel/pci-calgary.c linux/arch/x86_64/kernel/pci-calgary.c
> --- pci-domains/arch/x86_64/kernel/pci-calgary.c 2006-09-28 13:31:14.000000000 +0300
> +++ linux/arch/x86_64/kernel/pci-calgary.c 2006-09-28 13:14:38.000000000 +0300

ACK patch


2006-09-28 23:31:22

by Muli Ben-Yehuda

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

On Thu, Sep 28, 2006 at 07:03:28PM -0400, Jeff Garzik wrote:

> hmmmm. What kernels did you test?

mainline as of today + several unrelated Calgary patches I'll post
shortly + your PCI domains patch + my Calgary patch. I'll test with
iommu=off next.

> That should narrow down the problems. A problem with aic94xx sorta
> sounds like something unrelated.

Not necessarily - Calgary is an isolating IOMMU, meaning that if we
set up a mapping for aic94xx in the wrong IO space due to a Calgary
bug, aic94xx will fall over and die. Usually however this happens a
lot sooner. Also, we have code in Calgary to detect when an errant DMA
happens and it hasn't triggered in this case.

Cheers,
Muli

2006-09-29 13:40:34

by Jon Mason

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

On Fri, Sep 29, 2006 at 01:45:50AM +0300, Muli Ben-Yehuda wrote:
> On Thu, Sep 28, 2006 at 05:45:41AM -0400, Jeff Garzik wrote:
> > Muli Ben-Yehuda wrote:
> > >On Tue, Sep 26, 2006 at 03:15:08PM -0400, Jeff Garzik wrote:
> > >>The x86[-64] PCI domain effort needs to be restarted, because we've got
> > >>machines out in the field that need this in order for some devices to
> > >>work.
> > >>
> > >This breaks the Calgary IOMMU, since it uses sysdata for other
> > >purposes (going back from a bus to its IO address space). I'm looking
> > >into it.
> >
> > You'll need to modify struct pci_sysdata in
> > include/asm-{i386,x86_64}/pci.h to include the data that you previously
> > stored directly into the sysdata pointer.
>
> Something like this should do the trick. Note - this should not be
> applied yet - after several gigabytes of network and disk activity it
> takes aic94xx down. More investigation required.

Nak! The calgary code should not be affected by these patches. The
PCI domain code uses the sysdata pointer from struct pci_bus, where as
calgary uses the sysdata pointer from struct pci_dev. This should not
be an issue. Can you confirm actual breakage?

The sysdata pointer from struct pci_bus is used by NUMA, which we found
out when we tried to use it for our calgary code initially. I'm sure
someone has been checking that NUMA works with these PCI domain
patches....

Thanks,
Jon


>
> diff -Naurp -X /home/muli/w/dontdiff pci-domains/arch/x86_64/kernel/pci-calgary.c linux/arch/x86_64/kernel/pci-calgary.c
> --- pci-domains/arch/x86_64/kernel/pci-calgary.c 2006-09-28 13:31:14.000000000 +0300
> +++ linux/arch/x86_64/kernel/pci-calgary.c 2006-09-28 13:14:38.000000000 +0300
> @@ -317,7 +317,7 @@ void calgary_unmap_sg(struct device *dev
> int nelems, int direction)
> {
> unsigned long flags;
> - struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
> + struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus);
>
> if (!translate_phb(to_pci_dev(dev)))
> return;
> @@ -346,7 +346,7 @@ static int calgary_nontranslate_map_sg(s
> int calgary_map_sg(struct device *dev, struct scatterlist *sg,
> int nelems, int direction)
> {
> - struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
> + struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus);
> unsigned long flags;
> unsigned long vaddr;
> unsigned int npages;
> @@ -400,7 +400,7 @@ dma_addr_t calgary_map_single(struct dev
> dma_addr_t dma_handle = bad_dma_address;
> unsigned long uaddr;
> unsigned int npages;
> - struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
> + struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus);
>
> uaddr = (unsigned long)vaddr;
> npages = num_dma_pages(uaddr, size);
> @@ -416,7 +416,7 @@ dma_addr_t calgary_map_single(struct dev
> void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
> size_t size, int direction)
> {
> - struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
> + struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus);
> unsigned int npages;
>
> if (!translate_phb(to_pci_dev(dev)))
> @@ -434,7 +434,7 @@ void* calgary_alloc_coherent(struct devi
> unsigned int npages, order;
> struct iommu_table *tbl;
>
> - tbl = to_pci_dev(dev)->bus->self->sysdata;
> + tbl = pci_iommu(to_pci_dev(dev)->bus);
>
> size = PAGE_ALIGN(size); /* size rounded up to full pages */
> npages = size >> PAGE_SHIFT;
> @@ -551,7 +551,7 @@ static void __init calgary_reserve_mem_r
> limit++;
>
> numpages = ((limit - start) >> PAGE_SHIFT);
> - iommu_range_reserve(dev->sysdata, start, numpages);
> + iommu_range_reserve(pci_iommu(dev->bus), start, numpages);
> }
>
> static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
> @@ -559,7 +559,7 @@ static void __init calgary_reserve_perip
> void __iomem *target;
> u64 low, high, sizelow;
> u64 start, limit;
> - struct iommu_table *tbl = dev->sysdata;
> + struct iommu_table *tbl = pci_iommu(dev->bus);
> unsigned char busnum = dev->bus->number;
> void __iomem *bbar = tbl->bbar;
>
> @@ -583,7 +583,7 @@ static void __init calgary_reserve_perip
> u32 val32;
> u64 low, high, sizelow, sizehigh;
> u64 start, limit;
> - struct iommu_table *tbl = dev->sysdata;
> + struct iommu_table *tbl = pci_iommu(dev->bus);
> unsigned char busnum = dev->bus->number;
> void __iomem *bbar = tbl->bbar;
>
> @@ -621,7 +621,7 @@ static void __init calgary_reserve_regio
> void __iomem *bbar;
> unsigned char busnum;
> u64 start;
> - struct iommu_table *tbl = dev->sysdata;
> + struct iommu_table *tbl = pci_iommu(dev->bus);
>
> bbar = tbl->bbar;
> busnum = dev->bus->number;
> @@ -652,7 +652,7 @@ static int __init calgary_setup_tar(stru
> if (ret)
> return ret;
>
> - tbl = dev->sysdata;
> + tbl = pci_iommu(dev->bus);
> tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
> tce_free(tbl, 0, tbl->it_size);
>
> @@ -665,7 +665,7 @@ static int __init calgary_setup_tar(stru
> /* zero out all TAR bits under sw control */
> val64 &= ~TAR_SW_BITS;
>
> - tbl = dev->sysdata;
> + tbl = pci_iommu(dev->bus);
> table_phys = (u64)__pa(tbl->it_base);
> val64 |= table_phys;
>
> @@ -682,7 +682,7 @@ static int __init calgary_setup_tar(stru
> static void __init calgary_free_bus(struct pci_dev *dev)
> {
> u64 val64;
> - struct iommu_table *tbl = dev->sysdata;
> + struct iommu_table *tbl = pci_iommu(dev->bus);
> void __iomem *target;
> unsigned int bitmapsz;
>
> @@ -697,7 +697,8 @@ static void __init calgary_free_bus(stru
> tbl->it_map = NULL;
>
> kfree(tbl);
> - dev->sysdata = NULL;
> +
> + set_pci_iommu(dev->bus, NULL);
>
> /* Can't free bootmem allocated memory after system is up :-( */
> bus_info[dev->bus->number].tce_space = NULL;
> @@ -706,7 +707,7 @@ static void __init calgary_free_bus(stru
> static void calgary_watchdog(unsigned long data)
> {
> struct pci_dev *dev = (struct pci_dev *)data;
> - struct iommu_table *tbl = dev->sysdata;
> + struct iommu_table *tbl = pci_iommu(dev->bus);
> void __iomem *bbar = tbl->bbar;
> u32 val32;
> void __iomem *target;
> @@ -742,7 +743,7 @@ static void __init calgary_enable_transl
> struct iommu_table *tbl;
>
> busnum = dev->bus->number;
> - tbl = dev->sysdata;
> + tbl = pci_iommu(dev->bus);
> bbar = tbl->bbar;
>
> /* enable TCE in PHB Config Register */
> @@ -772,7 +773,7 @@ static void __init calgary_disable_trans
> struct iommu_table *tbl;
>
> busnum = dev->bus->number;
> - tbl = dev->sysdata;
> + tbl = pci_iommu(dev->bus);
> bbar = tbl->bbar;
>
> /* disable TCE in PHB Config Register */
> @@ -813,8 +814,7 @@ static inline unsigned int __init locate
> static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
> {
> pci_dev_get(dev);
> - dev->sysdata = NULL;
> - dev->bus->self = dev;
> + set_pci_iommu(dev->bus, NULL);
> }
>
> static int __init calgary_init_one(struct pci_dev *dev)
> diff -Naurp -X /home/muli/w/dontdiff pci-domains/arch/x86_64/kernel/tce.c linux/arch/x86_64/kernel/tce.c
> --- pci-domains/arch/x86_64/kernel/tce.c 2006-09-28 13:31:14.000000000 +0300
> +++ linux/arch/x86_64/kernel/tce.c 2006-09-28 13:40:52.000000000 +0300
> @@ -137,9 +137,9 @@ int build_tce_table(struct pci_dev *dev,
> struct iommu_table *tbl;
> int ret;
>
> - if (dev->sysdata) {
> - printk(KERN_ERR "Calgary: dev %p has sysdata %p\n",
> - dev, dev->sysdata);
> + if (pci_iommu(dev->bus)) {
> + printk(KERN_ERR "Calgary: dev %p has sysdata->iommu %p\n",
> + dev, pci_iommu(dev->bus));
> BUG();
> }
>
> @@ -156,11 +156,7 @@ int build_tce_table(struct pci_dev *dev,
>
> tbl->bbar = bbar;
>
> - /*
> - * NUMA is already using the bus's sysdata pointer, so we use
> - * the bus's pci_dev's sysdata instead.
> - */
> - dev->sysdata = tbl;
> + set_pci_iommu(dev->bus, tbl);
>
> return 0;
>
> diff -Naurp -X /home/muli/w/dontdiff pci-domains/include/asm-x86_64/pci.h linux/include/asm-x86_64/pci.h
> --- pci-domains/include/asm-x86_64/pci.h 2006-09-28 13:34:05.000000000 +0300
> +++ linux/include/asm-x86_64/pci.h 2006-09-28 15:19:56.000000000 +0300
> @@ -8,6 +8,7 @@
> struct pci_sysdata {
> int domain; /* PCI domain */
> int node; /* NUMA node */
> + void* iommu; /* IOMMU private data */
> };
>
> #ifdef CONFIG_PCI_DOMAINS
> @@ -23,6 +24,20 @@ static inline int pci_proc_domain(struct
> }
> #endif /* CONFIG_PCI_DOMAINS */
>
> +#ifdef CONFIG_CALGARY_IOMMU
> +static inline void* pci_iommu(struct pci_bus *bus)
> +{
> + struct pci_sysdata *sd = bus->sysdata;
> + return sd->iommu;
> +}
> +
> +static inline void set_pci_iommu(struct pci_bus *bus, void *val)
> +{
> + struct pci_sysdata *sd = bus->sysdata;
> + sd->iommu = val;
> +}
> +#endif /* CONFIG_CALGARY_IOMMU */
> +
> #include <linux/mm.h> /* for struct page */
>
> /* Can be used to override the logic in pci_scan_bus for skipping
>

2006-09-29 17:11:27

by Muli Ben-Yehuda

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

On Fri, Sep 29, 2006 at 08:43:31AM -0500, Jon Mason wrote:

> Nak! The calgary code should not be affected by these patches.

It is. It triggers the BUG() I put there exactly in case someone tries
to grab ->sysdata from uder us.

PCI-DMA: Using Calgary IOMMU
Calgary: dev ffff8101979a7800 has sysdata ffff81019795f620
----------- [cut here ] --------- [please bite here ] ---------
Kernel BUG at ...uli/w/iommu/calgary/linux/arch/x86_64/kernel/tce.c:143
invalid opcode: 0000 [1] SMP
CPU 1
Modules linked in:
Pid: 1, comm: swapper Not tainted 2.6.18mx #115
RIP: 0010:[<ffffffff8021b38c>] [<ffffffff8021b38c>] build_tce_table+0x2b/0x126
RSP: 0000:ffff810197c7de70 EFLAGS: 00010282
RAX: 000000000000003e RBX: 00000000ffffffc3 RCX: 0000000000000000
RDX: ffffffff8022e3ee RSI: 0000000000000000 RDI: ffff810197c67040
RBP: ffff810197c7de90 R08: 0000000000000002 R09: ffffffff8022df0a
R10: 0000000000000000 R11: ffffffff80762280 R12: ffffc20000080000
R13: ffff8101979a7800 R14: ffffc20000080000 R15: 00000000ffffffed
FS: 0000000000000000(0000) GS:ffff810197c75cc0(0000) knlGS:0000000000000000
CS: 0010 DS: 0018 ES: 0018 CR0: 000000ffff80c728db 0000000000000000 00000000000 00000
ffffffff80c9cf18 0000000000000000 0000000000000000 0000000000000000
Call Trace:
[<ffffffff80c728db>] calgary_iommu_init+0x11e/0x569
[<ffffffff80c6b6fe>] pci_iommu_init+0x9/0x17
[<ffffffff8020718d>] init+0x145/0x300
[<ffffffff805d8e51>] trace_hardirqs_on_thunk+0x35/0x37
[<ffffffff80244a46>] trace_hardirqs_on+0xfe/0x129
[<ffffffff8020a6c8>] child_rip+0xa/0x12
[<ffffffff805d95ea>] _spin_unlock_irq+0x29/0x2f
[<ffffffff80209e4d>] restore_args+0x0/0x30
[<ffffffff80207048>] init+0x0/0x300
[<ffffffff8020a6be>] child_rip+0x0/0x12

> The PCI domain code uses the sysdata pointer from struct pci_bus,
> where as calgary uses the sysdata pointer from struct pci_dev. This
> should not be an issue. Can you confirm actual breakage?

See above. This is with mainline + Jeff's PCI domains patch.

In any case, even if it wasn't necessary, I'd like to take advantage
of Jeff's sysdata changes and get rid of our hack, as having an
extensible struct hanging of ->sysdata is the right way to go
forward.

Cheers,
Muli

2006-09-29 18:21:44

by Jon Mason

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

On Fri, Sep 29, 2006 at 08:11:19PM +0300, Muli Ben-Yehuda wrote:
> On Fri, Sep 29, 2006 at 08:43:31AM -0500, Jon Mason wrote:
>
> > Nak! The calgary code should not be affected by these patches.
>
> It is. It triggers the BUG() I put there exactly in case someone tries
> to grab ->sysdata from uder us.
>
> PCI-DMA: Using Calgary IOMMU
> Calgary: dev ffff8101979a7800 has sysdata ffff81019795f620
> ----------- [cut here ] --------- [please bite here ] ---------
> Kernel BUG at ...uli/w/iommu/calgary/linux/arch/x86_64/kernel/tce.c:143
> invalid opcode: 0000 [1] SMP
> CPU 1
> Modules linked in:
> Pid: 1, comm: swapper Not tainted 2.6.18mx #115
> RIP: 0010:[<ffffffff8021b38c>] [<ffffffff8021b38c>] build_tce_table+0x2b/0x126
> RSP: 0000:ffff810197c7de70 EFLAGS: 00010282
> RAX: 000000000000003e RBX: 00000000ffffffc3 RCX: 0000000000000000
> RDX: ffffffff8022e3ee RSI: 0000000000000000 RDI: ffff810197c67040
> RBP: ffff810197c7de90 R08: 0000000000000002 R09: ffffffff8022df0a
> R10: 0000000000000000 R11: ffffffff80762280 R12: ffffc20000080000
> R13: ffff8101979a7800 R14: ffffc20000080000 R15: 00000000ffffffed
> FS: 0000000000000000(0000) GS:ffff810197c75cc0(0000) knlGS:0000000000000000
> CS: 0010 DS: 0018 ES: 0018 CR0: 000000ffff80c728db 0000000000000000 00000000000 00000
> ffffffff80c9cf18 0000000000000000 0000000000000000 0000000000000000
> Call Trace:
> [<ffffffff80c728db>] calgary_iommu_init+0x11e/0x569
> [<ffffffff80c6b6fe>] pci_iommu_init+0x9/0x17
> [<ffffffff8020718d>] init+0x145/0x300
> [<ffffffff805d8e51>] trace_hardirqs_on_thunk+0x35/0x37
> [<ffffffff80244a46>] trace_hardirqs_on+0xfe/0x129
> [<ffffffff8020a6c8>] child_rip+0xa/0x12
> [<ffffffff805d95ea>] _spin_unlock_irq+0x29/0x2f
> [<ffffffff80209e4d>] restore_args+0x0/0x30
> [<ffffffff80207048>] init+0x0/0x300
> [<ffffffff8020a6be>] child_rip+0x0/0x12
>
> > The PCI domain code uses the sysdata pointer from struct pci_bus,
> > where as calgary uses the sysdata pointer from struct pci_dev. This
> > should not be an issue. Can you confirm actual breakage?
>
> See above. This is with mainline + Jeff's PCI domains patch.
>
> In any case, even if it wasn't necessary, I'd like to take advantage
> of Jeff's sysdata changes and get rid of our hack, as having an
> extensible struct hanging of ->sysdata is the right way to go
> forward.

I'm not sure why the above is happening, but I'll buy the clean-up :-).
The patch you sent out looks fine to me too.

Thanks,
Jon

>
> Cheers,
> Muli
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

2006-09-30 09:34:27

by Muli Ben-Yehuda

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

On Fri, Sep 29, 2006 at 02:31:16AM +0300, Muli Ben-Yehuda wrote:
> On Thu, Sep 28, 2006 at 07:03:28PM -0400, Jeff Garzik wrote:
>
> > hmmmm. What kernels did you test?
>
> mainline as of today + several unrelated Calgary patches I'll post
> shortly + your PCI domains patch + my Calgary patch. I'll test with
> iommu=off next.
>
> > That should narrow down the problems. A problem with aic94xx sorta
> > sounds like something unrelated.
>
> Not necessarily - Calgary is an isolating IOMMU, meaning that if we
> set up a mapping for aic94xx in the wrong IO space due to a Calgary
> bug, aic94xx will fall over and die. Usually however this happens a
> lot sooner. Also, we have code in Calgary to detect when an errant DMA
> happens and it hasn't triggered in this case.

Ok, turns out it's neither a PCI domains nor Calgary issue, since I
can reproduce it on mainline with iommu=off. Must be an aic94xx
issue, I'll send the details to linux-scsi in a bit.

Cheers,
Muli

2006-09-30 10:03:31

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

Muli Ben-Yehuda wrote:
> On Fri, Sep 29, 2006 at 02:31:16AM +0300, Muli Ben-Yehuda wrote:
>> On Thu, Sep 28, 2006 at 07:03:28PM -0400, Jeff Garzik wrote:
>>
>>> hmmmm. What kernels did you test?
>> mainline as of today + several unrelated Calgary patches I'll post
>> shortly + your PCI domains patch + my Calgary patch. I'll test with
>> iommu=off next.
>>
>>> That should narrow down the problems. A problem with aic94xx sorta
>>> sounds like something unrelated.
>> Not necessarily - Calgary is an isolating IOMMU, meaning that if we
>> set up a mapping for aic94xx in the wrong IO space due to a Calgary
>> bug, aic94xx will fall over and die. Usually however this happens a
>> lot sooner. Also, we have code in Calgary to detect when an errant DMA
>> happens and it hasn't triggered in this case.
>
> Ok, turns out it's neither a PCI domains nor Calgary issue, since I
> can reproduce it on mainline with iommu=off. Must be an aic94xx
> issue, I'll send the details to linux-scsi in a bit.

Would you also make sure that Andrew has the necessary bits to keep
Calgary going under PCI domains? If it's a patch that sits on top of
linux-2.6.git + my patch, I can merge it into misc-2.6.git#pciseg (which
automatically goes into -mm). Otherwise, make sure -mm has the stack of
patches necessary.

Jeff



2006-09-30 10:42:53

by Muli Ben-Yehuda

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

On Sat, Sep 30, 2006 at 06:03:11AM -0400, Jeff Garzik wrote:

> Would you also make sure that Andrew has the necessary bits to keep
> Calgary going under PCI domains? If it's a patch that sits on top of
> linux-2.6.git + my patch, I can merge it into misc-2.6.git#pciseg (which
> automatically goes into -mm). Otherwise, make sure -mm has the stack of
> patches necessary.

The patch I posted earlier is all that's needed, if you could merge it
into #pciseg that would be fine. I'm pondering making one small
change though: in your pci domains patch, you have this snippet:

+#ifdef CONFIG_PCI_DOMAINS
+static inline int pci_domain_nr(struct pci_bus *bus)
+{
+ struct pci_sysdata *sd = bus->sysdata;
+ return sd->domain;
+}
+
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+ return pci_domain_nr(bus);
+}
+#endif /* CONFIG_PCI_DOMAINS */

So pci_domain_nr and pci_proc_domain are only available if
CONFIG_PCI_DOMAINS is defined. I followed suit and make pci_iommu only
available if CONFIG_CALGARY_IOMMU is defined, but perhaps it would be
better to make it unconditional, since ->sysdata will always be
available anyway. Was there a specific reason why pci_domain_nr is
only available if CONFIG_PCI_DOMAINS?

Cheers,
Muli

2006-09-30 11:12:42

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

Muli Ben-Yehuda wrote:
> So pci_domain_nr and pci_proc_domain are only available if
> CONFIG_PCI_DOMAINS is defined. I followed suit and make pci_iommu only
> available if CONFIG_CALGARY_IOMMU is defined, but perhaps it would be
> better to make it unconditional, since ->sysdata will always be
> available anyway. Was there a specific reason why pci_domain_nr is
> only available if CONFIG_PCI_DOMAINS?

Generic stubs already exist in include/linux/pci.h.

Jeff


2006-09-30 11:42:00

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] x86[-64] PCI domain support

Muli Ben-Yehuda wrote:
> The patch I posted earlier is all that's needed, if you could merge it
> into #pciseg that would be fine. I'm pondering making one small
> change though: in your pci domains patch, you have this snippet:

Would you be kind enough to resend the patch with a proper Signed-off-by
line? (and subject/description, etc. while you're at it)

Thanks,

Jeff


2006-09-30 17:51:54

by Muli Ben-Yehuda

[permalink] [raw]
Subject: [PATCH] x86-64: Calgary IOMMU: update to work with PCI domains

On Sat, Sep 30, 2006 at 07:41:50AM -0400, Jeff Garzik wrote:

> Muli Ben-Yehuda wrote:
> >The patch I posted earlier is all that's needed, if you could merge it
> >into #pciseg that would be fine. I'm pondering making one small
> >change though: in your pci domains patch, you have this snippet:
>
> Would you be kind enough to resend the patch with a proper Signed-off-by
> line? (and subject/description, etc. while you're at it)

This patch updates Calgary to work with Jeff's PCI domains code by
moving each bus's pointer to its struct iommu_table to struct
pci_sysdata, rather than stashing it in ->sysdata directly.

Signed-off-by: Muli Ben-Yehuda <[email protected]>
Signed-off-by: Jon Mason <[email protected]>

diff -Naurp -X /home/muli/w/dontdiff pci-domains/arch/x86_64/kernel/pci-calgary.c linux/arch/x86_64/kernel/pci-calgary.c
--- pci-domains/arch/x86_64/kernel/pci-calgary.c 2006-09-28 13:31:14.000000000 +0300
+++ linux/arch/x86_64/kernel/pci-calgary.c 2006-09-28 13:14:38.000000000 +0300
@@ -317,7 +317,7 @@ void calgary_unmap_sg(struct device *dev
int nelems, int direction)
{
unsigned long flags;
- struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+ struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus);

if (!translate_phb(to_pci_dev(dev)))
return;
@@ -346,7 +346,7 @@ static int calgary_nontranslate_map_sg(s
int calgary_map_sg(struct device *dev, struct scatterlist *sg,
int nelems, int direction)
{
- struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+ struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus);
unsigned long flags;
unsigned long vaddr;
unsigned int npages;
@@ -400,7 +400,7 @@ dma_addr_t calgary_map_single(struct dev
dma_addr_t dma_handle = bad_dma_address;
unsigned long uaddr;
unsigned int npages;
- struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+ struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus);

uaddr = (unsigned long)vaddr;
npages = num_dma_pages(uaddr, size);
@@ -416,7 +416,7 @@ dma_addr_t calgary_map_single(struct dev
void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
size_t size, int direction)
{
- struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+ struct iommu_table *tbl = pci_iommu(to_pci_dev(dev)->bus);
unsigned int npages;

if (!translate_phb(to_pci_dev(dev)))
@@ -434,7 +434,7 @@ void* calgary_alloc_coherent(struct devi
unsigned int npages, order;
struct iommu_table *tbl;

- tbl = to_pci_dev(dev)->bus->self->sysdata;
+ tbl = pci_iommu(to_pci_dev(dev)->bus);

size = PAGE_ALIGN(size); /* size rounded up to full pages */
npages = size >> PAGE_SHIFT;
@@ -551,7 +551,7 @@ static void __init calgary_reserve_mem_r
limit++;

numpages = ((limit - start) >> PAGE_SHIFT);
- iommu_range_reserve(dev->sysdata, start, numpages);
+ iommu_range_reserve(pci_iommu(dev->bus), start, numpages);
}

static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
@@ -559,7 +559,7 @@ static void __init calgary_reserve_perip
void __iomem *target;
u64 low, high, sizelow;
u64 start, limit;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
unsigned char busnum = dev->bus->number;
void __iomem *bbar = tbl->bbar;

@@ -583,7 +583,7 @@ static void __init calgary_reserve_perip
u32 val32;
u64 low, high, sizelow, sizehigh;
u64 start, limit;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
unsigned char busnum = dev->bus->number;
void __iomem *bbar = tbl->bbar;

@@ -621,7 +621,7 @@ static void __init calgary_reserve_regio
void __iomem *bbar;
unsigned char busnum;
u64 start;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);

bbar = tbl->bbar;
busnum = dev->bus->number;
@@ -652,7 +652,7 @@ static int __init calgary_setup_tar(stru
if (ret)
return ret;

- tbl = dev->sysdata;
+ tbl = pci_iommu(dev->bus);
tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
tce_free(tbl, 0, tbl->it_size);

@@ -665,7 +665,7 @@ static int __init calgary_setup_tar(stru
/* zero out all TAR bits under sw control */
val64 &= ~TAR_SW_BITS;

- tbl = dev->sysdata;
+ tbl = pci_iommu(dev->bus);
table_phys = (u64)__pa(tbl->it_base);
val64 |= table_phys;

@@ -682,7 +682,7 @@ static int __init calgary_setup_tar(stru
static void __init calgary_free_bus(struct pci_dev *dev)
{
u64 val64;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
void __iomem *target;
unsigned int bitmapsz;

@@ -697,7 +697,8 @@ static void __init calgary_free_bus(stru
tbl->it_map = NULL;

kfree(tbl);
- dev->sysdata = NULL;
+
+ set_pci_iommu(dev->bus, NULL);

/* Can't free bootmem allocated memory after system is up :-( */
bus_info[dev->bus->number].tce_space = NULL;
@@ -706,7 +707,7 @@ static void __init calgary_free_bus(stru
static void calgary_watchdog(unsigned long data)
{
struct pci_dev *dev = (struct pci_dev *)data;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
void __iomem *bbar = tbl->bbar;
u32 val32;
void __iomem *target;
@@ -742,7 +743,7 @@ static void __init calgary_enable_transl
struct iommu_table *tbl;

busnum = dev->bus->number;
- tbl = dev->sysdata;
+ tbl = pci_iommu(dev->bus);
bbar = tbl->bbar;

/* enable TCE in PHB Config Register */
@@ -772,7 +773,7 @@ static void __init calgary_disable_trans
struct iommu_table *tbl;

busnum = dev->bus->number;
- tbl = dev->sysdata;
+ tbl = pci_iommu(dev->bus);
bbar = tbl->bbar;

/* disable TCE in PHB Config Register */
@@ -813,8 +814,7 @@ static inline unsigned int __init locate
static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
{
pci_dev_get(dev);
- dev->sysdata = NULL;
- dev->bus->self = dev;
+ set_pci_iommu(dev->bus, NULL);
}

static int __init calgary_init_one(struct pci_dev *dev)
diff -Naurp -X /home/muli/w/dontdiff pci-domains/arch/x86_64/kernel/tce.c linux/arch/x86_64/kernel/tce.c
--- pci-domains/arch/x86_64/kernel/tce.c 2006-09-28 13:31:14.000000000 +0300
+++ linux/arch/x86_64/kernel/tce.c 2006-09-28 13:40:52.000000000 +0300
@@ -137,9 +137,9 @@ int build_tce_table(struct pci_dev *dev,
struct iommu_table *tbl;
int ret;

- if (dev->sysdata) {
- printk(KERN_ERR "Calgary: dev %p has sysdata %p\n",
- dev, dev->sysdata);
+ if (pci_iommu(dev->bus)) {
+ printk(KERN_ERR "Calgary: dev %p has sysdata->iommu %p\n",
+ dev, pci_iommu(dev->bus));
BUG();
}

@@ -156,11 +156,7 @@ int build_tce_table(struct pci_dev *dev,

tbl->bbar = bbar;

- /*
- * NUMA is already using the bus's sysdata pointer, so we use
- * the bus's pci_dev's sysdata instead.
- */
- dev->sysdata = tbl;
+ set_pci_iommu(dev->bus, tbl);

return 0;

diff -Naurp -X /home/muli/w/dontdiff pci-domains/include/asm-x86_64/pci.h linux/include/asm-x86_64/pci.h
--- pci-domains/include/asm-x86_64/pci.h 2006-09-28 13:34:05.000000000 +0300
+++ linux/include/asm-x86_64/pci.h 2006-09-28 15:19:56.000000000 +0300
@@ -8,6 +8,7 @@
struct pci_sysdata {
int domain; /* PCI domain */
int node; /* NUMA node */
+ void* iommu; /* IOMMU private data */
};

#ifdef CONFIG_PCI_DOMAINS
@@ -23,6 +24,20 @@ static inline int pci_proc_domain(struct
}
#endif /* CONFIG_PCI_DOMAINS */

+#ifdef CONFIG_CALGARY_IOMMU
+static inline void* pci_iommu(struct pci_bus *bus)
+{
+ struct pci_sysdata *sd = bus->sysdata;
+ return sd->iommu;
+}
+
+static inline void set_pci_iommu(struct pci_bus *bus, void *val)
+{
+ struct pci_sysdata *sd = bus->sysdata;
+ sd->iommu = val;
+}
+#endif /* CONFIG_CALGARY_IOMMU */
+
#include <linux/mm.h> /* for struct page */

/* Can be used to override the logic in pci_scan_bus for skipping

2006-10-01 03:42:27

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] x86-64: Calgary IOMMU: update to work with PCI domains

Muli Ben-Yehuda wrote:
> On Sat, Sep 30, 2006 at 07:41:50AM -0400, Jeff Garzik wrote:
>
>> Muli Ben-Yehuda wrote:
>>> The patch I posted earlier is all that's needed, if you could merge it
>>> into #pciseg that would be fine. I'm pondering making one small
>>> change though: in your pci domains patch, you have this snippet:
>> Would you be kind enough to resend the patch with a proper Signed-off-by
>> line? (and subject/description, etc. while you're at it)
>
> This patch updates Calgary to work with Jeff's PCI domains code by
> moving each bus's pointer to its struct iommu_table to struct
> pci_sysdata, rather than stashing it in ->sysdata directly.
>
> Signed-off-by: Muli Ben-Yehuda <[email protected]>
> Signed-off-by: Jon Mason <[email protected]>

applied, and pushed out to jgarzik/misc-2.6.git#pciseg.

thanks,

Jeff