2007-09-07 19:52:54

by Keshavamurthy, Anil S

[permalink] [raw]
Subject: [RFC][Intel-IOMMU] Fix for IOMMU early crash

Subject: [RFC][Intel-IOMMU] Fix for IOMMU early crash

Populating pci_bus->sysdata way early in the pci discovery phase
sets NON-NULL value to pci_dev->sysdata which breaks the assumption
in the Intel IOMMU driver and crashes the system.


In the drivers/pci/probe.c, pci_dev->sysdata gets a copy of
its pci_bus->sysdata which is not required as
the same can be obtained from pci_dev->bus->sysdata. More over
the left hand assignment of pci_dev->sysdata is never being used,
so their is no point is setting
pci_dev->sysdata = pci_bus->sysdata;

This patch removes sysdata from pci_dev struct and creates a new
field called sys_data which is exclusively used
by IOMMU driver to keep its per device context pointer.

Signed-off-by: Anil S Keshavamurthy <[email protected]>

---
drivers/pci/hotplug/fakephp.c | 1 -
drivers/pci/intel-iommu.c | 22 +++++++++++-----------
drivers/pci/probe.c | 1 -
include/linux/pci.h | 2 +-
4 files changed, 12 insertions(+), 14 deletions(-)

Index: work/drivers/pci/hotplug/fakephp.c
===================================================================
--- work.orig/drivers/pci/hotplug/fakephp.c 2007-09-08 12:00:20.000000000 -0700
+++ work/drivers/pci/hotplug/fakephp.c 2007-09-08 12:07:19.000000000 -0700
@@ -243,7 +243,6 @@
return;

dev->bus = (struct pci_bus*)bus;
- dev->sysdata = bus->sysdata;
for (devfn = 0; devfn < 0x100; devfn += 8) {
dev->devfn = devfn;
pci_rescan_slot(dev);
Index: work/drivers/pci/intel-iommu.c
===================================================================
--- work.orig/drivers/pci/intel-iommu.c 2007-09-08 12:00:47.000000000 -0700
+++ work/drivers/pci/intel-iommu.c 2007-09-08 12:08:20.000000000 -0700
@@ -1348,7 +1348,7 @@
list_del(&info->link);
list_del(&info->global);
if (info->dev)
- info->dev->sysdata = NULL;
+ info->dev->sys_data = NULL;
spin_unlock_irqrestore(&device_domain_lock, flags);

detach_domain_for_dev(info->domain, info->bus, info->devfn);
@@ -1361,7 +1361,7 @@

/*
* find_domain
- * Note: we use struct pci_dev->sysdata stores the info
+ * Note: we use struct pci_dev->sys_data stores the info
*/
struct dmar_domain *
find_domain(struct pci_dev *pdev)
@@ -1369,7 +1369,7 @@
struct device_domain_info *info;

/* No lock here, assumes no domain exit in normal case */
- info = pdev->sysdata;
+ info = pdev->sys_data;
if (info)
return info->domain;
return NULL;
@@ -1519,7 +1519,7 @@
}
list_add(&info->link, &domain->devices);
list_add(&info->global, &device_domain_list);
- pdev->sysdata = info;
+ pdev->sys_data = info;
spin_unlock_irqrestore(&device_domain_lock, flags);
return domain;
error:
@@ -1579,7 +1579,7 @@
static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
struct pci_dev *pdev)
{
- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->sys_data == DUMMY_DEVICE_DOMAIN_INFO)
return 0;
return iommu_prepare_identity_map(pdev, rmrr->base_address,
rmrr->end_address + 1);
@@ -1595,7 +1595,7 @@
int ret;

for_each_pci_dev(pdev) {
- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO ||
+ if (pdev->sys_data == DUMMY_DEVICE_DOMAIN_INFO ||
!IS_GFX_DEVICE(pdev))
continue;
printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
@@ -1836,7 +1836,7 @@
int prot = 0;

BUG_ON(dir == DMA_NONE);
- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->sys_data == DUMMY_DEVICE_DOMAIN_INFO)
return virt_to_bus(addr);

domain = get_valid_domain_for_dev(pdev);
@@ -1900,7 +1900,7 @@
unsigned long start_addr;
struct iova *iova;

- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->sys_data == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
BUG_ON(!domain);
@@ -1974,7 +1974,7 @@
size_t size = 0;
void *addr;

- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->sys_data == DUMMY_DEVICE_DOMAIN_INFO)
return;

domain = find_domain(pdev);
@@ -2032,7 +2032,7 @@
unsigned long start_addr;

BUG_ON(dir == DMA_NONE);
- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->sys_data == DUMMY_DEVICE_DOMAIN_INFO)
return intel_nontranslate_map_sg(hwdev, sg, nelems, dir);

domain = get_valid_domain_for_dev(pdev);
@@ -2234,7 +2234,7 @@
for (i = 0; i < drhd->devices_cnt; i++) {
if (!drhd->devices[i])
continue;
- drhd->devices[i]->sysdata = DUMMY_DEVICE_DOMAIN_INFO;
+ drhd->devices[i]->sys_data = DUMMY_DEVICE_DOMAIN_INFO;
}
}
}
Index: work/drivers/pci/probe.c
===================================================================
--- work.orig/drivers/pci/probe.c 2007-09-08 12:00:47.000000000 -0700
+++ work/drivers/pci/probe.c 2007-09-08 12:06:59.000000000 -0700
@@ -994,7 +994,6 @@
return NULL;

dev->bus = bus;
- dev->sysdata = bus->sysdata;
dev->dev.parent = bus->bridge;
dev->dev.bus = &pci_bus_type;
dev->devfn = devfn;
Index: work/include/linux/pci.h
===================================================================
--- work.orig/include/linux/pci.h 2007-09-08 12:00:47.000000000 -0700
+++ work/include/linux/pci.h 2007-09-08 12:29:36.000000000 -0700
@@ -130,7 +130,7 @@
struct pci_bus *bus; /* bus this device is on */
struct pci_bus *subordinate; /* bus this device bridges to */

- void *sysdata; /* hook for sys-specific extension */
+ void *sys_data; /* hook for IOMMU specific extension */
struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */

unsigned int devfn; /* encoded device & function index */


2007-09-09 11:17:04

by Muli Ben-Yehuda

[permalink] [raw]
Subject: Re: [RFC][Intel-IOMMU] Fix for IOMMU early crash

On Sat, Sep 08, 2007 at 01:05:24PM -0700, Keshavamurthy, Anil S wrote:

> Subject: [RFC][Intel-IOMMU] Fix for IOMMU early crash

This patch feels like a huge hack. See below.

> This patch removes sysdata from pci_dev struct and creates a new
> field called sys_data which is exclusively used by IOMMU driver to
> keep its per device context pointer.

Hmpf, why is this needed? with the pci_sysdata work that recently went
into mainline we have a void *iommu member in pci_sysdata which should
be all that's needed. Please elaborate if it's not enough for your
needs.

Thanks,
Muli

2007-09-09 15:31:51

by Keshavamurthy, Anil S

[permalink] [raw]
Subject: Re: [RFC][Intel-IOMMU] Fix for IOMMU early crash

On Sun, Sep 09, 2007 at 02:16:19PM +0300, Muli Ben-Yehuda wrote:
> On Sat, Sep 08, 2007 at 01:05:24PM -0700, Keshavamurthy, Anil S wrote:
>
> > Subject: [RFC][Intel-IOMMU] Fix for IOMMU early crash
>
> This patch feels like a huge hack. See below.
You seem to be jumping to conclusion without going
in detail. The pci_dev struct contains pointer to sysdata,
which in turn points to the copy of its parent's bus sysdata.
So technically speaking we can eliminate sysdata pointer from
pci_dev struct which is what one portion of this patch does.

>
> > This patch removes sysdata from pci_dev struct and creates a new
> > field called sys_data which is exclusively used by IOMMU driver to
> > keep its per device context pointer.
>
> Hmpf, why is this needed? with the pci_sysdata work that recently went
> into mainline we have a void *iommu member in pci_sysdata which should
> be all that's needed. Please elaborate if it's not enough for your
> needs.
I looked at your patch and it was not suitable because I need to store
iommu private pointer in pci_dev and not in the pci_bus. So I have added
a new member sys_data in the pci_dev struct. I can change the name from
sys_dev to iomu_priv to clear the confusion. Do let me know.


-Anil






>
> Thanks,
> Muli

2007-09-09 17:51:52

by Muli Ben-Yehuda

[permalink] [raw]
Subject: Re: [RFC][Intel-IOMMU] Fix for IOMMU early crash

On Mon, Sep 10, 2007 at 08:43:59AM -0700, Keshavamurthy, Anil S wrote:
> On Sun, Sep 09, 2007 at 02:16:19PM +0300, Muli Ben-Yehuda wrote:
> > On Sat, Sep 08, 2007 at 01:05:24PM -0700, Keshavamurthy, Anil S wrote:
> >
> > > Subject: [RFC][Intel-IOMMU] Fix for IOMMU early crash
> >
> > This patch feels like a huge hack. See below.
>
> You seem to be jumping to conclusion without going in detail. The
> pci_dev struct contains pointer to sysdata, which in turn points to
> the copy of its parent's bus sysdata. So technically speaking we
> can eliminate sysdata pointer from pci_dev struct which is what one
> portion of this patch does.

... provided nothing relies on this relationship or the existence of
the pci_dev's sysdata. Have you audited every architecture's use of
the sysdata pointers?

> > > This patch removes sysdata from pci_dev struct and creates a new
> > > field called sys_data which is exclusively used by IOMMU driver to
> > > keep its per device context pointer.
> >
> > Hmpf, why is this needed? with the pci_sysdata work that recently went
> > into mainline we have a void *iommu member in pci_sysdata which should
> > be all that's needed. Please elaborate if it's not enough for your
> > needs.

> I looked at your patch and it was not suitable because I need to
> store iommu private pointer in pci_dev

Could you elaborate on why you need this? I'm assuming it's for the
per-device IOMMU page tables?

> and not in the pci_bus. So I have added a new member sys_data in the
> pci_dev struct. I can change the name from sys_dev to iomu_priv to
> clear the confusion. Do let me know.

Well, you should be able to just use the pci_dev's ->sysdata (that's
what it's there for after all!) but you might need to make it point to
a structure if it's shared, the same way we did with the bus's
->sysdata. I agree that just having it point to the bus's ->sysdata is
not very useful *but* there may be code in the kernel that relies on
it (Calgary did until very recently...) so it would have to be audited
first.

Cheers,
Muli

2007-09-09 20:48:31

by Paul Mackerras

[permalink] [raw]
Subject: Re: [RFC][Intel-IOMMU] Fix for IOMMU early crash

Keshavamurthy, Anil S writes:

> Subject: [RFC][Intel-IOMMU] Fix for IOMMU early crash
>
> Populating pci_bus->sysdata way early in the pci discovery phase
> sets NON-NULL value to pci_dev->sysdata which breaks the assumption
> in the Intel IOMMU driver and crashes the system.
>
>
> In the drivers/pci/probe.c, pci_dev->sysdata gets a copy of
> its pci_bus->sysdata which is not required as
> the same can be obtained from pci_dev->bus->sysdata. More over
> the left hand assignment of pci_dev->sysdata is never being used,

Wrong. You needed to grep a bit more widely...

> so their is no point is setting
> pci_dev->sysdata = pci_bus->sysdata;
>
> This patch removes sysdata from pci_dev struct and creates a new
> field called sys_data which is exclusively used
> by IOMMU driver to keep its per device context pointer.

This will break powerpc, because we use the pci_dev->sysdata field to
point to a firmware device tree node. Please figure out another way
to solve your problem.

Paul.

2007-09-10 17:10:58

by Keshavamurthy, Anil S

[permalink] [raw]
Subject: Re: [RFC][Intel-IOMMU] Fix for IOMMU early crash

On Sun, Sep 09, 2007 at 08:51:40PM +0300, Muli Ben-Yehuda wrote:
> On Mon, Sep 10, 2007 at 08:43:59AM -0700, Keshavamurthy, Anil S wrote:
> > On Sun, Sep 09, 2007 at 02:16:19PM +0300, Muli Ben-Yehuda wrote:
> > > On Sat, Sep 08, 2007 at 01:05:24PM -0700, Keshavamurthy, Anil S wrote:
> > >
> > > > Subject: [RFC][Intel-IOMMU] Fix for IOMMU early crash
> > >
> > > This patch feels like a huge hack. See below.
> >
> > You seem to be jumping to conclusion without going in detail. The
> > pci_dev struct contains pointer to sysdata, which in turn points to
> > the copy of its parent's bus sysdata. So technically speaking we
> > can eliminate sysdata pointer from pci_dev struct which is what one
> > portion of this patch does.
>
> ... provided nothing relies on this relationship or the existence of
> the pci_dev's sysdata. Have you audited every architecture's use of
> the sysdata pointers?
>
> > > > This patch removes sysdata from pci_dev struct and creates a new
> > > > field called sys_data which is exclusively used by IOMMU driver to
> > > > keep its per device context pointer.
> > >
> > > Hmpf, why is this needed? with the pci_sysdata work that recently went
> > > into mainline we have a void *iommu member in pci_sysdata which should
> > > be all that's needed. Please elaborate if it's not enough for your
> > > needs.
>
> > I looked at your patch and it was not suitable because I need to
> > store iommu private pointer in pci_dev
>
> Could you elaborate on why you need this? I'm assuming it's for the
> per-device IOMMU page tables?
Yes, it is for per-device IOMMU domain information which internally holds
info about the page tables.
>
> > and not in the pci_bus. So I have added a new member sys_data in the
> > pci_dev struct. I can change the name from sys_dev to iomu_priv to
> > clear the confusion. Do let me know.
>
> Well, you should be able to just use the pci_dev's ->sysdata (that's
> what it's there for after all!) but you might need to make it point to
> a structure if it's shared, the same way we did with the bus's
> ->sysdata.
How about adding a new field as I don't care about KABI for mainline?

>I agree that just having it point to the bus's ->sysdata is
> not very useful *but* there may be code in the kernel that relies on
> it (Calgary did until very recently...) so it would have to be audited
> first.
I still wonder copying bus's->sysdata to pci_dev's-> sysdata is any useful?

-Anil

2007-09-10 17:30:40

by Keshavamurthy, Anil S

[permalink] [raw]
Subject: Re: [RFC][Intel-IOMMU] Fix for IOMMU early crash

On Mon, Sep 10, 2007 at 03:37:48AM +1000, Paul Mackerras wrote:
> Keshavamurthy, Anil S writes:
>
> > Subject: [RFC][Intel-IOMMU] Fix for IOMMU early crash
> >
> > Populating pci_bus->sysdata way early in the pci discovery phase
> > sets NON-NULL value to pci_dev->sysdata which breaks the assumption
> > in the Intel IOMMU driver and crashes the system.
> >
> >
> > In the drivers/pci/probe.c, pci_dev->sysdata gets a copy of
> > its pci_bus->sysdata which is not required as
> > the same can be obtained from pci_dev->bus->sysdata. More over
> > the left hand assignment of pci_dev->sysdata is never being used,
>
> Wrong. You needed to grep a bit more widely...
Ah..Thanks for pointing this out. sorry I had checked only i386 and x86_64.
>
> > so their is no point is setting
> > pci_dev->sysdata = pci_bus->sysdata;
> >
> > This patch removes sysdata from pci_dev struct and creates a new
> > field called sys_data which is exclusively used
> > by IOMMU driver to keep its per device context pointer.
>
> This will break powerpc, because we use the pci_dev->sysdata field to
> point to a firmware device tree node. Please figure out another way
> to solve your problem.

Yes, I agree that pci_dev->sysdata can;t be removed. Even we (IOMMU)
were dependent on this field but somehow this field is being overwritten
to point to pci_bus's->sysdata and hence IOMMU was failing. Earlier
it was overwritten to NULL and hence we were not failing but now it
is overwritten to non-NULL and hence we fail.

My therory is that we don;t need to copy pci_bus's->sysdata to
pci_dev's->sysdata. Below patch solves my problem.
Any objection to below patch?

---
drivers/pci/hotplug/fakephp.c | 1 -
drivers/pci/probe.c | 1 -
2 files changed, 2 deletions(-)

Index: work/drivers/pci/hotplug/fakephp.c
===================================================================
--- work.orig/drivers/pci/hotplug/fakephp.c 2007-09-11 10:29:30.000000000 -0700
+++ work/drivers/pci/hotplug/fakephp.c 2007-09-11 10:35:22.000000000 -0700
@@ -243,7 +243,6 @@
return;

dev->bus = (struct pci_bus*)bus;
- dev->sysdata = bus->sysdata;
for (devfn = 0; devfn < 0x100; devfn += 8) {
dev->devfn = devfn;
pci_rescan_slot(dev);
Index: work/drivers/pci/probe.c
===================================================================
--- work.orig/drivers/pci/probe.c 2007-09-11 10:29:30.000000000 -0700
+++ work/drivers/pci/probe.c 2007-09-11 10:35:22.000000000 -0700
@@ -994,7 +994,6 @@
return NULL;

dev->bus = bus;
- dev->sysdata = bus->sysdata;
dev->dev.parent = bus->bridge;
dev->dev.bus = &pci_bus_type;
dev->devfn = devfn;



2007-09-10 20:25:55

by Muli Ben-Yehuda

[permalink] [raw]
Subject: Re: [RFC][Intel-IOMMU] Fix for IOMMU early crash

On Tue, Sep 11, 2007 at 10:42:31AM -0700, Keshavamurthy, Anil S wrote:

> Yes, I agree that pci_dev->sysdata can;t be removed. Even we (IOMMU)
> were dependent on this field but somehow this field is being
> overwritten to point to pci_bus's->sysdata and hence IOMMU was
> failing. Earlier it was overwritten to NULL and hence we were not
> failing but now it is overwritten to non-NULL and hence we fail.

Do you know which commit caused that change?

> My therory is that we don;t need to copy pci_bus's->sysdata to
> pci_dev's->sysdata. Below patch solves my problem.
> Any objection to below patch?

I will give it a spin to verify it works for me, but in general I am
wary of making such changes unless we can verify (read: audit) that
they have no adverse side effects *on all architectures*.

Cheers,
Muli

2007-09-10 20:32:01

by Keshavamurthy, Anil S

[permalink] [raw]
Subject: Re: [RFC][Intel-IOMMU] Fix for IOMMU early crash

On Mon, Sep 10, 2007 at 11:25:43PM +0300, Muli Ben-Yehuda wrote:
> On Tue, Sep 11, 2007 at 10:42:31AM -0700, Keshavamurthy, Anil S wrote:
>
> > Yes, I agree that pci_dev->sysdata can;t be removed. Even we (IOMMU)
> > were dependent on this field but somehow this field is being
> > overwritten to point to pci_bus's->sysdata and hence IOMMU was
> > failing. Earlier it was overwritten to NULL and hence we were not
> > failing but now it is overwritten to non-NULL and hence we fail.
>
> Do you know which commit caused that change?
>
> > My therory is that we don;t need to copy pci_bus's->sysdata to
> > pci_dev's->sysdata. Below patch solves my problem.
> > Any objection to below patch?
>
> I will give it a spin to verify it works for me, but in general I am
> wary of making such changes unless we can verify (read: audit) that
> they have no adverse side effects *on all architectures*.
Thanks Muli for your help here. I tested on x86_64 and saw no
issues. Looking at the code, pci_dev's->sysdata becomes useless
if the intent here is to keep a copy of it's bus's->sysdata as
the same can be obtained from pci_dev->bus->sysdata.

Thanks,
Anil

2007-09-11 19:17:16

by Keshavamurthy, Anil S

[permalink] [raw]
Subject: [patch][Intel-IOMMU] Fix for IOMMU early crash

Subject: Fix IOMMU early crash

This patch avoids copying pci_bus's->sysdata to
pci_dev's->sysdata as one can easily obtain
the same through pci_dev->bus->sysdata.

Now with some recent pci_sysdata patches,
the bus's->sysdata gets populated way early
and a value of non-NULL gets copied from
bus's->sysdata to pci_dev's->sysdata
which causes IOMMU to crash way early in the
boot time as IOMMU depends on this field for its
per device IOMMU context.

Hence this patch not only makes pci_dev's->sysdata
useful and but also fixes the IOMMU early crash.

This patch needs to be applied before IOMMU patches,
so that git bisect will not fail on IOMMU code :-)

Tested on x86_64.

Please apply.

Signed-off-by: Anil S Keshavamurthy <[email protected]>

---
drivers/pci/hotplug/fakephp.c | 1 -
drivers/pci/probe.c | 1 -
2 files changed, 2 deletions(-)

Index: work/drivers/pci/hotplug/fakephp.c
===================================================================
--- work.orig/drivers/pci/hotplug/fakephp.c 2007-09-11 10:29:30.000000000 -0700
+++ work/drivers/pci/hotplug/fakephp.c 2007-09-11 10:35:22.000000000 -0700
@@ -243,7 +243,6 @@
return;

dev->bus = (struct pci_bus*)bus;
- dev->sysdata = bus->sysdata;
for (devfn = 0; devfn < 0x100; devfn += 8) {
dev->devfn = devfn;
pci_rescan_slot(dev);
Index: work/drivers/pci/probe.c
===================================================================
--- work.orig/drivers/pci/probe.c 2007-09-11 10:29:30.000000000 -0700
+++ work/drivers/pci/probe.c 2007-09-11 10:35:22.000000000 -0700
@@ -994,7 +994,6 @@
return NULL;

dev->bus = bus;
- dev->sysdata = bus->sysdata;
dev->dev.parent = bus->bridge;
dev->dev.bus = &pci_bus_type;
dev->devfn = devfn;

2007-09-11 20:59:56

by Paul Mackerras

[permalink] [raw]
Subject: Re: [patch][Intel-IOMMU] Fix for IOMMU early crash

Keshavamurthy, Anil S writes:

> Subject: Fix IOMMU early crash
>
> This patch avoids copying pci_bus's->sysdata to
> pci_dev's->sysdata as one can easily obtain
> the same through pci_dev->bus->sysdata.

At the moment this will cause ppc64 to crash, since we rely on
pci_dev->sysdata pointing to some node in the firmware device tree,
either the device's node or the node for a parent bus.

We could change the ppc64 code to use pci_dev->bus->sysdata in the
case when pci_dev->sysdata is NULL, which would fix the problem. I
think that change should be incorporated as part of this patch so that
we don't break git bisection.

In other words I don't want to see this patch applied as it stands.

Paul.

2007-09-11 21:43:44

by Keshavamurthy, Anil S

[permalink] [raw]
Subject: Re: [patch][Intel-IOMMU] Fix for IOMMU early crash

On Wed, Sep 12, 2007 at 05:48:52AM +1000, Paul Mackerras wrote:
> Keshavamurthy, Anil S writes:
>
> > Subject: Fix IOMMU early crash
> >
> > This patch avoids copying pci_bus's->sysdata to
> > pci_dev's->sysdata as one can easily obtain
> > the same through pci_dev->bus->sysdata.
>
> At the moment this will cause ppc64 to crash, since we rely on
> pci_dev->sysdata pointing to some node in the firmware device tree,
> either the device's node or the node for a parent bus.
>
> We could change the ppc64 code to use pci_dev->bus->sysdata in the
> case when pci_dev->sysdata is NULL, which would fix the problem. I
> think that change should be incorporated as part of this patch so that
> we don't break git bisection.

Why do you want to check if pci_dev->sysdata is NULL then use
pci_dev->bus->sysdata else pci_dev->sysdata? If you change this
to always use pci_dev->bus->sysdata, then you don;t have to depend
on my patch and your patch can get in independent of mine.

>
> In other words I don't want to see this patch applied as it stands.
Is it possible to post your patch anytime soon? Or feel free to combine
mine with yours and post it with your signed-off-by.

Thanks,
Anil

2007-09-11 22:12:55

by Casey Dahlin

[permalink] [raw]
Subject: [BUG:] forcedeth: MCP55 not allowing DHCP

I have an Asus Striker Extreme motherboard with two built in MCP55 GigE
interfaces. When I build with the original Fedora 7 release kernel (
ftp://ftp.belnet.be/linux/fedora/linux/releases/7/Fedora/i386/os/Fedora/kernel-2.6.21-1.3194.fc7.i686.rpm
) everything works fine. However, when I boot with any updated kernels
or any other kernel (have tried building from several points in the
linus git tree between 2.6.20 and .23-rc3, and 2.6.21.2 in -stable) I
cannot get an IP address via dhcp. There is no error in dmesg. The card
shows a link and otherwise appears to be working, but it is as if the
dhcp server has been removed from the network.

On a running system there is no indication that this is a kernel bug at
all, however by varying only the kernel the bug appears and disappears.
I've run all these tests repeatedly with no intervening updates of any
other packages.

As I said I attempted to build 2.6.21.2 ( the point of divergence
between the Fedora kernel in question and -stable ) and still the card
did not work. I will next attempt to manually build the rpm for the
release kernel. If this works I will try experimenting with the included
patches to narrow it down, but at this point I'm at a complete loss.

-Casey Dahlin

2007-09-12 01:18:17

by Keshavamurthy, Anil S

[permalink] [raw]
Subject: Re: [patch][Intel-IOMMU] Fix for IOMMU early crash

On Wed, Sep 12, 2007 at 05:48:52AM +1000, Paul Mackerras wrote:
> Keshavamurthy, Anil S writes:
>
> > Subject: Fix IOMMU early crash
> >
> > This patch avoids copying pci_bus's->sysdata to
> > pci_dev's->sysdata as one can easily obtain
> > the same through pci_dev->bus->sysdata.
>
> At the moment this will cause ppc64 to crash, since we rely on
> pci_dev->sysdata pointing to some node in the firmware device tree,
> either the device's node or the node for a parent bus.
>
> We could change the ppc64 code to use pci_dev->bus->sysdata in the
> case when pci_dev->sysdata is NULL, which would fix the problem. I
> think that change should be incorporated as part of this patch so that
> we don't break git bisection.
Can I expect the ppc64 code changes from you?
Once I get your, I will merge with mine and post it again.
>
> In other words I don't want to see this patch applied as it stands.
Yup, I agree with you.

-Anil

2007-09-14 16:31:20

by Paul Mackerras

[permalink] [raw]
Subject: Re: [patch][Intel-IOMMU] Fix for IOMMU early crash

Keshavamurthy, Anil S writes:

> Can I expect the ppc64 code changes from you?
> Once I get your, I will merge with mine and post it again.

Sure, but it will be next week, since I am travelling this week.

Paul.

2007-09-18 01:59:44

by Casey Dahlin

[permalink] [raw]
Subject: Re: [BUG:] forcedeth: MCP55 not allowing DHCP

Casey Dahlin wrote:
> I have an Asus Striker Extreme motherboard with two built in MCP55
> GigE interfaces. When I build with the original Fedora 7 release
> kernel (
> ftp://ftp.belnet.be/linux/fedora/linux/releases/7/Fedora/i386/os/Fedora/kernel-2.6.21-1.3194.fc7.i686.rpm
> ) everything works fine. However, when I boot with any updated kernels
> or any other kernel (have tried building from several points in the
> linus git tree between 2.6.20 and .23-rc3, and 2.6.21.2 in -stable) I
> cannot get an IP address via dhcp. There is no error in dmesg. The
> card shows a link and otherwise appears to be working, but it is as if
> the dhcp server has been removed from the network.
>
> On a running system there is no indication that this is a kernel bug
> at all, however by varying only the kernel the bug appears and
> disappears. I've run all these tests repeatedly with no intervening
> updates of any other packages.
>
> As I said I attempted to build 2.6.21.2 ( the point of divergence
> between the Fedora kernel in question and -stable ) and still the card
> did not work. I will next attempt to manually build the rpm for the
> release kernel. If this works I will try experimenting with the
> included patches to narrow it down, but at this point I'm at a
> complete loss.
>
> -Casey Dahlin
>

Is there any feedback to be had on this? I've gotten no reply whatsoever
from several sources now.

2007-09-25 17:11:39

by Keshavamurthy, Anil S

[permalink] [raw]
Subject: Re: [patch][Intel-IOMMU] Fix for IOMMU early crash

On Sat, Sep 15, 2007 at 02:30:40AM +1000, Paul Mackerras wrote:
> Keshavamurthy, Anil S writes:
>
> > Can I expect the ppc64 code changes from you?
> > Once I get your, I will merge with mine and post it again.
>
> Sure, but it will be next week, since I am travelling this week.
Any progress?

-Anil

2007-10-03 21:20:44

by Keshavamurthy, Anil S

[permalink] [raw]
Subject: [patch take 2][Intel-IOMMU] Fix for IOMMU early crash

Subject: [patch][Intel-IOMMU] Fix for IOMMU early crash

pci_dev's->sysdata is highly overloaded and currently
IOMMU is broken due to IOMMU code depending on this field.

This patch introduces new field in pci_dev's struct to
hold IOMMU specific per device IOMMU private data.

Signed-off-by: Anil S Keshavamurthy <[email protected]>

---
drivers/pci/intel-iommu.c | 22 +++++++++++-----------
include/linux/pci.h | 1 +
2 files changed, 12 insertions(+), 11 deletions(-)

Index: 2.6-mm/drivers/pci/intel-iommu.c
===================================================================
--- 2.6-mm.orig/drivers/pci/intel-iommu.c 2007-10-03 13:48:18.000000000 -0700
+++ 2.6-mm/drivers/pci/intel-iommu.c 2007-10-03 13:48:41.000000000 -0700
@@ -1348,7 +1348,7 @@
list_del(&info->link);
list_del(&info->global);
if (info->dev)
- info->dev->sysdata = NULL;
+ info->dev->iommu_private = NULL;
spin_unlock_irqrestore(&device_domain_lock, flags);

detach_domain_for_dev(info->domain, info->bus, info->devfn);
@@ -1361,7 +1361,7 @@

/*
* find_domain
- * Note: we use struct pci_dev->sysdata stores the info
+ * Note: we use struct pci_dev->iommu_private stores the info
*/
struct dmar_domain *
find_domain(struct pci_dev *pdev)
@@ -1369,7 +1369,7 @@
struct device_domain_info *info;

/* No lock here, assumes no domain exit in normal case */
- info = pdev->sysdata;
+ info = pdev->iommu_private;
if (info)
return info->domain;
return NULL;
@@ -1519,7 +1519,7 @@
}
list_add(&info->link, &domain->devices);
list_add(&info->global, &device_domain_list);
- pdev->sysdata = info;
+ pdev->iommu_private = info;
spin_unlock_irqrestore(&device_domain_lock, flags);
return domain;
error:
@@ -1579,7 +1579,7 @@
static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
struct pci_dev *pdev)
{
- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->iommu_private == DUMMY_DEVICE_DOMAIN_INFO)
return 0;
return iommu_prepare_identity_map(pdev, rmrr->base_address,
rmrr->end_address + 1);
@@ -1595,7 +1595,7 @@
int ret;

for_each_pci_dev(pdev) {
- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO ||
+ if (pdev->iommu_private == DUMMY_DEVICE_DOMAIN_INFO ||
!IS_GFX_DEVICE(pdev))
continue;
printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
@@ -1836,7 +1836,7 @@
int prot = 0;

BUG_ON(dir == DMA_NONE);
- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->iommu_private == DUMMY_DEVICE_DOMAIN_INFO)
return virt_to_bus(addr);

domain = get_valid_domain_for_dev(pdev);
@@ -1900,7 +1900,7 @@
unsigned long start_addr;
struct iova *iova;

- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->iommu_private == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
BUG_ON(!domain);
@@ -1974,7 +1974,7 @@
size_t size = 0;
void *addr;

- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->iommu_private == DUMMY_DEVICE_DOMAIN_INFO)
return;

domain = find_domain(pdev);
@@ -2032,7 +2032,7 @@
unsigned long start_addr;

BUG_ON(dir == DMA_NONE);
- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->iommu_private == DUMMY_DEVICE_DOMAIN_INFO)
return intel_nontranslate_map_sg(hwdev, sg, nelems, dir);

domain = get_valid_domain_for_dev(pdev);
@@ -2234,7 +2234,7 @@
for (i = 0; i < drhd->devices_cnt; i++) {
if (!drhd->devices[i])
continue;
- drhd->devices[i]->sysdata = DUMMY_DEVICE_DOMAIN_INFO;
+ drhd->devices[i]->iommu_private = DUMMY_DEVICE_DOMAIN_INFO;
}
}
}
Index: 2.6-mm/include/linux/pci.h
===================================================================
--- 2.6-mm.orig/include/linux/pci.h 2007-10-03 13:48:20.000000000 -0700
+++ 2.6-mm/include/linux/pci.h 2007-10-03 13:49:08.000000000 -0700
@@ -195,6 +195,7 @@
#ifdef CONFIG_PCI_MSI
struct list_head msi_list;
#endif
+ void *iommu_private; /* hook for IOMMU specific extension */
};

extern struct pci_dev *alloc_pci_dev(void);

2007-10-04 01:20:04

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [patch take 2][Intel-IOMMU] Fix for IOMMU early crash

> Index: 2.6-mm/include/linux/pci.h
> ===================================================================
> --- 2.6-mm.orig/include/linux/pci.h 2007-10-03 13:48:20.000000000 -0700
> +++ 2.6-mm/include/linux/pci.h 2007-10-03 13:49:08.000000000 -0700
> @@ -195,6 +195,7 @@
> #ifdef CONFIG_PCI_MSI
> struct list_head msi_list;
> #endif
> + void *iommu_private; /* hook for IOMMU specific extension */
> };

I'm not fan of this. That would imply that iommu stuff is specific to
PCI or that sort of thing.

Why don't you use the new struct dev_archdata mechanism ? That's what I
use on powerpc to provide optional iommu linkage to any device in the
system.

Ben.


2007-10-04 01:42:00

by Keshavamurthy, Anil S

[permalink] [raw]
Subject: Re: [patch take 2][Intel-IOMMU] Fix for IOMMU early crash

On Thu, Oct 04, 2007 at 11:19:33AM +1000, Benjamin Herrenschmidt wrote:
> > Index: 2.6-mm/include/linux/pci.h
> > ===================================================================
> > --- 2.6-mm.orig/include/linux/pci.h 2007-10-03 13:48:20.000000000 -0700
> > +++ 2.6-mm/include/linux/pci.h 2007-10-03 13:49:08.000000000 -0700
> > @@ -195,6 +195,7 @@
> > #ifdef CONFIG_PCI_MSI
> > struct list_head msi_list;
> > #endif
> > + void *iommu_private; /* hook for IOMMU specific extension */
> > };
>
> I'm not fan of this. That would imply that iommu stuff is specific to
> PCI or that sort of thing.
>
> Why don't you use the new struct dev_archdata mechanism ? That's what I
> use on powerpc to provide optional iommu linkage to any device in the
> system.
Good one. I will certainly try out your idea and will update the list
tomorrow.

-Anil

2007-10-04 03:40:09

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [patch take 2][Intel-IOMMU] Fix for IOMMU early crash


> > Why don't you use the new struct dev_archdata mechanism ? That's what I
> > use on powerpc to provide optional iommu linkage to any device in the
> > system.
> Good one. I will certainly try out your idea and will update the list
> tomorrow.

The advantage is that it allows to completely isolate the iommu code
from any dependency to PCI, which means you can implement DMA ops
support for various platform devices or other fancy things. Maybe not
the most useful in x86-land, but still ;-)

Ben.


2007-10-04 19:26:19

by Keshavamurthy, Anil S

[permalink] [raw]
Subject: Re: [patch take 2][Intel-IOMMU] Fix for IOMMU early crash

On Thu, Oct 04, 2007 at 01:39:39PM +1000, Benjamin Herrenschmidt wrote:
>
> > > Why don't you use the new struct dev_archdata mechanism ? That's what I
> > > use on powerpc to provide optional iommu linkage to any device in the
> > > system.
> > Good one. I will certainly try out your idea and will update the list
> > tomorrow.
>
> The advantage is that it allows to completely isolate the iommu code
> from any dependency to PCI, which means you can implement DMA ops
> support for various platform devices or other fancy things. Maybe not
> the most useful in x86-land, but still ;-)

Andrew,
Please delete my previous patch and add the below patch
to your MM queue.

I guess the patch name is "intel-iommu-fix-for-iommu-early-crash.patch".

Thanks,
Anil
------------------------------------------------------

Subject: [Intel-IOMMU] Fix for IOMMU early crash

pci_dev's->sysdata is highly overloaded and currently
IOMMU is broken due to IOMMU code depending on this field.

This patch introduces new field in pci_dev's dev.archdata struct to
hold IOMMU specific per device IOMMU private data.

Signed-off-by: Anil S Keshavamurthy <[email protected]>

---
drivers/pci/intel-iommu.c | 22 +++++++++++-----------
include/asm-x86_64/device.h | 3 +++
2 files changed, 14 insertions(+), 11 deletions(-)

Index: 2.6-mm/drivers/pci/intel-iommu.c
===================================================================
--- 2.6-mm.orig/drivers/pci/intel-iommu.c 2007-10-04 11:35:09.000000000 -0700
+++ 2.6-mm/drivers/pci/intel-iommu.c 2007-10-04 11:47:47.000000000 -0700
@@ -1348,7 +1348,7 @@
list_del(&info->link);
list_del(&info->global);
if (info->dev)
- info->dev->sysdata = NULL;
+ info->dev->dev.archdata.iommu = NULL;
spin_unlock_irqrestore(&device_domain_lock, flags);

detach_domain_for_dev(info->domain, info->bus, info->devfn);
@@ -1361,7 +1361,7 @@

/*
* find_domain
- * Note: we use struct pci_dev->sysdata stores the info
+ * Note: we use struct pci_dev->dev.archdata.iommu stores the info
*/
struct dmar_domain *
find_domain(struct pci_dev *pdev)
@@ -1369,7 +1369,7 @@
struct device_domain_info *info;

/* No lock here, assumes no domain exit in normal case */
- info = pdev->sysdata;
+ info = pdev->dev.archdata.iommu;
if (info)
return info->domain;
return NULL;
@@ -1519,7 +1519,7 @@
}
list_add(&info->link, &domain->devices);
list_add(&info->global, &device_domain_list);
- pdev->sysdata = info;
+ pdev->dev.archdata.iommu = info;
spin_unlock_irqrestore(&device_domain_lock, flags);
return domain;
error:
@@ -1579,7 +1579,7 @@
static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
struct pci_dev *pdev)
{
- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return 0;
return iommu_prepare_identity_map(pdev, rmrr->base_address,
rmrr->end_address + 1);
@@ -1595,7 +1595,7 @@
int ret;

for_each_pci_dev(pdev) {
- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO ||
+ if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
!IS_GFX_DEVICE(pdev))
continue;
printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
@@ -1836,7 +1836,7 @@
int prot = 0;

BUG_ON(dir == DMA_NONE);
- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return virt_to_bus(addr);

domain = get_valid_domain_for_dev(pdev);
@@ -1900,7 +1900,7 @@
unsigned long start_addr;
struct iova *iova;

- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
BUG_ON(!domain);
@@ -1974,7 +1974,7 @@
size_t size = 0;
void *addr;

- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return;

domain = find_domain(pdev);
@@ -2032,7 +2032,7 @@
unsigned long start_addr;

BUG_ON(dir == DMA_NONE);
- if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
+ if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return intel_nontranslate_map_sg(hwdev, sg, nelems, dir);

domain = get_valid_domain_for_dev(pdev);
@@ -2234,7 +2234,7 @@
for (i = 0; i < drhd->devices_cnt; i++) {
if (!drhd->devices[i])
continue;
- drhd->devices[i]->sysdata = DUMMY_DEVICE_DOMAIN_INFO;
+ drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
}
}
}
Index: 2.6-mm/include/asm-x86_64/device.h
===================================================================
--- 2.6-mm.orig/include/asm-x86_64/device.h 2007-10-04 11:35:09.000000000 -0700
+++ 2.6-mm/include/asm-x86_64/device.h 2007-10-04 11:49:44.000000000 -0700
@@ -10,6 +10,9 @@
#ifdef CONFIG_ACPI
void *acpi_handle;
#endif
+#ifdef CONFIG_DMAR
+ void *iommu; /* hook for IOMMU specific extension */
+#endif
};

#endif /* _ASM_X86_64_DEVICE_H */

2007-10-05 03:08:53

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [patch take 2][Intel-IOMMU] Fix for IOMMU early crash


> Subject: [Intel-IOMMU] Fix for IOMMU early crash
>
> pci_dev's->sysdata is highly overloaded and currently
> IOMMU is broken due to IOMMU code depending on this field.
>
> This patch introduces new field in pci_dev's dev.archdata struct to
> hold IOMMU specific per device IOMMU private data.
>
> Signed-off-by: Anil S Keshavamurthy <[email protected]>

Looks good. Won't break powerpc.

Acked-by: Benjamin Herrenschmidt <[email protected]>

> ---
> drivers/pci/intel-iommu.c | 22 +++++++++++-----------
> include/asm-x86_64/device.h | 3 +++
> 2 files changed, 14 insertions(+), 11 deletions(-)
>
> Index: 2.6-mm/drivers/pci/intel-iommu.c
> ===================================================================
> --- 2.6-mm.orig/drivers/pci/intel-iommu.c 2007-10-04 11:35:09.000000000 -0700
> +++ 2.6-mm/drivers/pci/intel-iommu.c 2007-10-04 11:47:47.000000000 -0700
> @@ -1348,7 +1348,7 @@
> list_del(&info->link);
> list_del(&info->global);
> if (info->dev)
> - info->dev->sysdata = NULL;
> + info->dev->dev.archdata.iommu = NULL;
> spin_unlock_irqrestore(&device_domain_lock, flags);
>
> detach_domain_for_dev(info->domain, info->bus, info->devfn);
> @@ -1361,7 +1361,7 @@
>
> /*
> * find_domain
> - * Note: we use struct pci_dev->sysdata stores the info
> + * Note: we use struct pci_dev->dev.archdata.iommu stores the info
> */
> struct dmar_domain *
> find_domain(struct pci_dev *pdev)
> @@ -1369,7 +1369,7 @@
> struct device_domain_info *info;
>
> /* No lock here, assumes no domain exit in normal case */
> - info = pdev->sysdata;
> + info = pdev->dev.archdata.iommu;
> if (info)
> return info->domain;
> return NULL;
> @@ -1519,7 +1519,7 @@
> }
> list_add(&info->link, &domain->devices);
> list_add(&info->global, &device_domain_list);
> - pdev->sysdata = info;
> + pdev->dev.archdata.iommu = info;
> spin_unlock_irqrestore(&device_domain_lock, flags);
> return domain;
> error:
> @@ -1579,7 +1579,7 @@
> static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
> struct pci_dev *pdev)
> {
> - if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
> + if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
> return 0;
> return iommu_prepare_identity_map(pdev, rmrr->base_address,
> rmrr->end_address + 1);
> @@ -1595,7 +1595,7 @@
> int ret;
>
> for_each_pci_dev(pdev) {
> - if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO ||
> + if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
> !IS_GFX_DEVICE(pdev))
> continue;
> printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
> @@ -1836,7 +1836,7 @@
> int prot = 0;
>
> BUG_ON(dir == DMA_NONE);
> - if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
> + if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
> return virt_to_bus(addr);
>
> domain = get_valid_domain_for_dev(pdev);
> @@ -1900,7 +1900,7 @@
> unsigned long start_addr;
> struct iova *iova;
>
> - if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
> + if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
> return;
> domain = find_domain(pdev);
> BUG_ON(!domain);
> @@ -1974,7 +1974,7 @@
> size_t size = 0;
> void *addr;
>
> - if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
> + if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
> return;
>
> domain = find_domain(pdev);
> @@ -2032,7 +2032,7 @@
> unsigned long start_addr;
>
> BUG_ON(dir == DMA_NONE);
> - if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
> + if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
> return intel_nontranslate_map_sg(hwdev, sg, nelems, dir);
>
> domain = get_valid_domain_for_dev(pdev);
> @@ -2234,7 +2234,7 @@
> for (i = 0; i < drhd->devices_cnt; i++) {
> if (!drhd->devices[i])
> continue;
> - drhd->devices[i]->sysdata = DUMMY_DEVICE_DOMAIN_INFO;
> + drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
> }
> }
> }
> Index: 2.6-mm/include/asm-x86_64/device.h
> ===================================================================
> --- 2.6-mm.orig/include/asm-x86_64/device.h 2007-10-04 11:35:09.000000000 -0700
> +++ 2.6-mm/include/asm-x86_64/device.h 2007-10-04 11:49:44.000000000 -0700
> @@ -10,6 +10,9 @@
> #ifdef CONFIG_ACPI
> void *acpi_handle;
> #endif
> +#ifdef CONFIG_DMAR
> + void *iommu; /* hook for IOMMU specific extension */
> +#endif
> };
>
> #endif /* _ASM_X86_64_DEVICE_H */