2009-10-26 23:29:01

by Alex Williamson

[permalink] [raw]
Subject: [PATCH 0/5] intel-iommu fixes

This series implements several fixes for intel-iommu. The first
is to make use of the device coherent_dma_mask when allocating
coherent DMA buffers, for which we move dma_generic_alloc_coherent
out of the x86 code so we can use it by both of the current users
of intel-iommu. Next, we add a little more to detecting when a
device can't support passthrough mode. Then we reinstate RMRRs
for devices that get kicked out of passthrough mode. And finally,
a trivial printk change to be less verbose on boot.

Ideally I'd like to entertain the idea of getting these into 2.6.32
because the coherent mapping issue is actually a regression since
2.6.31 and will cause some devices to fail to initialize in passthrough
mode (cciss for one). Thanks,

Alex

---

Alex Williamson (5):
intel-iommu: Quiet unnecessary output
intel-iommu: Reinstate RMRRs if a device is removed from passthrough domain
intel-iommu: Use max_pfn to determine whether a device can passthrough
intel-iommu: Use dma_generic_alloc_coherent() for passthrough mappings
dma: create dma_generic_alloc/free_coherent()


arch/x86/include/asm/dma-mapping.h | 3 --
arch/x86/kernel/pci-dma.c | 31 --------------------
arch/x86/kernel/pci-nommu.c | 10 +++++--
drivers/pci/intel-iommu.c | 55 +++++++++++++++++++++++++++++++++---
include/linux/dma-mapping.h | 44 +++++++++++++++++++++++++++++
5 files changed, 102 insertions(+), 41 deletions(-)


2009-10-26 23:29:10

by Alex Williamson

[permalink] [raw]
Subject: [PATCH 1/5] dma: create dma_generic_alloc/free_coherent()

Move dma_generic_alloc_coherent() out of x86 code and create
corresponding dma_generic_free_coherent() for symmetry. These
can then be used by IOMMU drivers attempting to implement
passthrough mode.

Signed-off-by: Alex Williamson <[email protected]>
---

arch/x86/include/asm/dma-mapping.h | 3 --
arch/x86/kernel/pci-dma.c | 31 -------------------------
arch/x86/kernel/pci-nommu.c | 10 +++++++-
include/linux/dma-mapping.h | 44 ++++++++++++++++++++++++++++++++++++
4 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 0ee770d..e6d2c9f 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -52,9 +52,6 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
extern int dma_supported(struct device *hwdev, u64 mask);
extern int dma_set_mask(struct device *dev, u64 mask);

-extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag);
-
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
if (!dev->dma_mask)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index b2a71dc..ecd9df0 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -144,37 +144,6 @@ void __init pci_iommu_alloc(void)
pci_swiotlb_init();
}

-void *dma_generic_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag)
-{
- unsigned long dma_mask;
- struct page *page;
- dma_addr_t addr;
-
- dma_mask = dma_alloc_coherent_mask(dev, flag);
-
- flag |= __GFP_ZERO;
-again:
- page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
- if (!page)
- return NULL;
-
- addr = page_to_phys(page);
- if (addr + size > dma_mask) {
- __free_pages(page, get_order(size));
-
- if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
- flag = (flag & ~GFP_DMA32) | GFP_DMA;
- goto again;
- }
-
- return NULL;
- }
-
- *dma_addr = addr;
- return page_address(page);
-}
-
/*
* See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
* documentation.
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index a3933d4..ed9e12e 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -73,10 +73,16 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
return nents;
}

+static void *nommu_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_addr, gfp_t flag)
+{
+ return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
+}
+
static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr)
{
- free_pages((unsigned long)vaddr, get_order(size));
+ dma_generic_free_coherent(dev, size, vaddr, dma_addr);
}

static void nommu_sync_single_for_device(struct device *dev,
@@ -95,7 +101,7 @@ static void nommu_sync_sg_for_device(struct device *dev,
}

struct dma_map_ops nommu_dma_ops = {
- .alloc_coherent = dma_generic_alloc_coherent,
+ .alloc_coherent = nommu_alloc_coherent,
.free_coherent = nommu_free_coherent,
.map_sg = nommu_map_sg,
.map_page = nommu_map_page,
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 91b7618..285043c 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -232,4 +232,48 @@ struct dma_attrs;

#endif /* CONFIG_HAVE_DMA_ATTRS */

+static inline void *dma_generic_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_addr, gfp_t flag)
+{
+ unsigned long dma_mask;
+ struct page *page;
+ dma_addr_t addr;
+
+ dma_mask = dev->coherent_dma_mask;
+ if (!dma_mask) {
+#ifdef CONFIG_ISA
+ dma_mask = (flag & GFP_DMA) ? DMA_BIT_MASK(24)
+ : DMA_BIT_MASK(32);
+#else
+ dma_mask = DMA_BIT_MASK(32);
+#endif
+ }
+
+ flag |= __GFP_ZERO;
+again:
+ page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
+ if (!page)
+ return NULL;
+
+ addr = page_to_phys(page);
+ if (addr + size > dma_mask) {
+ __free_pages(page, get_order(size));
+
+ if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
+ flag = (flag & ~GFP_DMA32) | GFP_DMA;
+ goto again;
+ }
+
+ return NULL;
+ }
+
+ *dma_addr = addr;
+ return page_address(page);
+}
+
+static inline void dma_generic_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr)
+{
+ free_pages((unsigned long)vaddr, get_order(size));
+}
#endif

2009-10-26 23:29:16

by Alex Williamson

[permalink] [raw]
Subject: [PATCH 2/5] intel-iommu: Use dma_generic_alloc_coherent() for passthrough mappings

intel_alloc_coherent() needs to follow DMA mapping convention and
make use of the coherent_dma_mask of the device. Without this,
devices may get buffers they can't use.

Signed-off-by: Alex Williamson <[email protected]>
---

drivers/pci/intel-iommu.c | 8 ++++++++
1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index b1e97e6..40be49a 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2765,6 +2765,10 @@ static void *intel_alloc_coherent(struct device *hwdev, size_t size,
void *vaddr;
int order;

+ if (iommu_no_mapping(hwdev))
+ return dma_generic_alloc_coherent(hwdev, size, dma_handle,
+ flags);
+
size = PAGE_ALIGN(size);
order = get_order(size);
flags &= ~(GFP_DMA | GFP_DMA32);
@@ -2788,6 +2792,10 @@ static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
{
int order;

+ if (iommu_no_mapping(hwdev))
+ return dma_generic_free_coherent(hwdev, size, vaddr,
+ dma_handle);
+
size = PAGE_ALIGN(size);
order = get_order(size);

2009-10-26 23:29:19

by Alex Williamson

[permalink] [raw]
Subject: [PATCH 3/5] intel-iommu: Use max_pfn to determine whether a device can passthrough

A dma_mask that's greater than 32-bit does not imply the device
can DMA anywhere in physical memory.

Signed-off-by: Alex Williamson <[email protected]>
---

drivers/pci/intel-iommu.c | 11 +++++++----
1 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 40be49a..19f10ae 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -39,6 +39,7 @@
#include <linux/sysdev.h>
#include <linux/tboot.h>
#include <linux/dmi.h>
+#include <linux/bootmem.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
#include "pci.h"
@@ -2196,7 +2197,8 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
* take them out of the 1:1 domain later.
*/
if (!startup)
- return pdev->dma_mask > DMA_BIT_MASK(32);
+ return (pdev->dma_mask & (max_pfn << PAGE_SHIFT)) ==
+ (max_pfn << PAGE_SHIFT);

return 1;
}
@@ -2537,11 +2539,12 @@ static int iommu_no_mapping(struct device *dev)
return 1;
else {
/*
- * 32 bit DMA is removed from si_domain and fall back
- * to non-identity mapping.
+ * Devices that cannot support identity mapping
+ * are removed from si_domain and fall back to
+ * non-identity mapping.
*/
domain_remove_one_dev_info(si_domain, pdev);
- printk(KERN_INFO "32bit %s uses non-identity mapping\n",
+ printk(KERN_INFO "%s uses non-identity mapping\n",
pci_name(pdev));
return 0;
}

2009-10-26 23:29:44

by Alex Williamson

[permalink] [raw]
Subject: [PATCH 4/5] intel-iommu: Reinstate RMRRs if a device is removed from passthrough domain

When a device is setup for passthrough it has full access to memory
so processing the RMRRs is unnecessary. However, if we remove the device
from the si_domain, we need to reinstate the associated RMRRs.

Signed-off-by: Alex Williamson <[email protected]>
---

drivers/pci/intel-iommu.c | 33 +++++++++++++++++++++++++++++++++
1 files changed, 33 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 19f10ae..a7f4476 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2538,6 +2538,10 @@ static int iommu_no_mapping(struct device *dev)
if (iommu_should_identity_map(pdev, 0))
return 1;
else {
+ struct dmar_rmrr_unit *rmrr;
+ struct dmar_domain *domain;
+ int i, ret;
+
/*
* Devices that cannot support identity mapping
* are removed from si_domain and fall back to
@@ -2546,6 +2550,35 @@ static int iommu_no_mapping(struct device *dev)
domain_remove_one_dev_info(si_domain, pdev);
printk(KERN_INFO "%s uses non-identity mapping\n",
pci_name(pdev));
+
+ domain = get_valid_domain_for_dev(pdev);
+ if (!domain) {
+ printk(KERN_ERR
+ "Allocating domain for %s failed",
+ pci_name(pdev));
+ return 0;
+ }
+
+ ret = domain_add_dev_info(domain, pdev,
+ CONTEXT_TT_MULTI_LEVEL);
+ if (ret) {
+ printk(KERN_ERR
+ "Attaching %s to domain failed",
+ pci_name(pdev));
+ return 0;
+ }
+
+ for_each_rmrr_units(rmrr) {
+ for (i = 0; i < rmrr->devices_cnt; i++) {
+ if (pdev != rmrr->devices[i])
+ continue;
+ ret = iommu_prepare_rmrr_dev(rmrr,
+ pdev);
+ if (ret)
+ printk(KERN_ERR
+ "IOMMU: mapping reserved region failed\n");
+ }
+ }
return 0;
}
} else {

2009-10-26 23:29:29

by Alex Williamson

[permalink] [raw]
Subject: [PATCH 5/5] intel-iommu: Quiet unnecessary output

Signed-off-by: Alex Williamson <[email protected]>
---

drivers/pci/intel-iommu.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index a7f4476..77f4b25 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1983,7 +1983,8 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
range which is reserved in E820, so which didn't get set
up to start with in si_domain */
if (domain == si_domain && hw_pass_through) {
- printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
+ printk(KERN_INFO
+ "Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
pci_name(pdev), start, end);
return 0;
}

2009-10-27 00:57:54

by Paul Mundt

[permalink] [raw]
Subject: Re: [PATCH 1/5] dma: create dma_generic_alloc/free_coherent()

On Mon, Oct 26, 2009 at 05:24:58PM -0600, Alex Williamson wrote:
> Move dma_generic_alloc_coherent() out of x86 code and create
> corresponding dma_generic_free_coherent() for symmetry. These
> can then be used by IOMMU drivers attempting to implement
> passthrough mode.
>
Except that dma_generic_alloc_coherent() is only "generic" for platforms
with consistent DMA. Everyone else will need a cacheflush and potentially
a remap. It's not even obvious from looking at the consistent DMA
platforms that they'll be able to use it out of the box due to expecting
something other than page_address(), which all suggests that this is
better off remaining an x86-only routine.

This is also making changes to the DMA-API without any documentation
updates and without consulting with any other architecture people. If you
wish to make a proposal for a DMA-API addition, then this should be made
to linux-arch rather than hidden in an iommu patchset.

Beyond that, we presently have:

- dma_alloc_coherent()
- dma_alloc_noncoherent()
- dma_alloc_from_coherent()

defined in the API. Making an addition to this for your fallback case
seems workable, but it's something that will have to be handled
differently for each architecture. You might be able to get away with
something generic enough to stash in drivers/base/dma-coherent.c, but
from a first look, I wouldn't count on it.

It was bad enough when the x86-specific flush_write_buffers() snuck in to
the "generic" dma mapping code, but this particular case is much more
problematic.

2009-10-27 01:47:23

by FUJITA Tomonori

[permalink] [raw]
Subject: Re: [PATCH 1/5] dma: create dma_generic_alloc/free_coherent()

On Mon, 26 Oct 2009 17:24:58 -0600
Alex Williamson <[email protected]> wrote:

> Move dma_generic_alloc_coherent() out of x86 code and create
> corresponding dma_generic_free_coherent() for symmetry. These
> can then be used by IOMMU drivers attempting to implement
> passthrough mode.
>
> Signed-off-by: Alex Williamson <[email protected]>
> ---
>
> arch/x86/include/asm/dma-mapping.h | 3 --
> arch/x86/kernel/pci-dma.c | 31 -------------------------
> arch/x86/kernel/pci-nommu.c | 10 +++++++-
> include/linux/dma-mapping.h | 44 ++++++++++++++++++++++++++++++++++++
> 4 files changed, 52 insertions(+), 36 deletions(-)

dma_generic_alloc_coherent() is x86 specific; other sane architectures
don't need such GFP_ hack. So moving it to the generic place is not a
good idea.

2009-10-27 02:26:39

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH 1/5] dma: create dma_generic_alloc/free_coherent()

On Tue, 2009-10-27 at 09:57 +0900, Paul Mundt wrote:
> On Mon, Oct 26, 2009 at 05:24:58PM -0600, Alex Williamson wrote:
> > Move dma_generic_alloc_coherent() out of x86 code and create
> > corresponding dma_generic_free_coherent() for symmetry. These
> > can then be used by IOMMU drivers attempting to implement
> > passthrough mode.
> >
> Except that dma_generic_alloc_coherent() is only "generic" for platforms
> with consistent DMA. Everyone else will need a cacheflush and potentially
> a remap. It's not even obvious from looking at the consistent DMA
> platforms that they'll be able to use it out of the box due to expecting
> something other than page_address(), which all suggests that this is
> better off remaining an x86-only routine.
>
> This is also making changes to the DMA-API without any documentation
> updates and without consulting with any other architecture people.

Ok, thanks for the feedback. Let's forget this patch and I'll see how
amenable David is to doing something within intel-iommu since it only
supports platforms with consistent DMA. Thanks,

Alex

2009-10-27 03:35:41

by Alex Williamson

[permalink] [raw]
Subject: [PATCH v2 2/5] intel-iommu: Obey coherent_dma_mask for alloc_coherent on passthrough

intel_alloc_coherent() needs to follow DMA mapping convention and
make use of the coherent_dma_mask of the device for identity mappings.
Without this, devices may get buffers they can't use.

Signed-off-by: Alex Williamson <[email protected]>
---

v2: Abandon attempt to create a common dma_generic_alloc_coherent,
patch 1/5 is now defunct. Patches 3-5 are unchanged.

drivers/pci/intel-iommu.c | 8 ++++++--
1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index b1e97e6..272261e 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2582,7 +2582,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
BUG_ON(dir == DMA_NONE);

if (iommu_no_mapping(hwdev))
- return paddr;
+ return paddr + size > dma_mask ? 0 : paddr;

domain = get_valid_domain_for_dev(pdev);
if (!domain)
@@ -2767,7 +2767,11 @@ static void *intel_alloc_coherent(struct device *hwdev, size_t size,

size = PAGE_ALIGN(size);
order = get_order(size);
- flags &= ~(GFP_DMA | GFP_DMA32);
+
+ if (!iommu_no_mapping(hwdev))
+ flags &= ~(GFP_DMA | GFP_DMA32);
+ else if (hwdev->coherent_dma_mask != DMA_BIT_MASK(64))
+ flags |= GFP_DMA;

vaddr = (void *)__get_free_pages(flags, order);
if (!vaddr)

2009-10-27 08:15:34

by David Woodhouse

[permalink] [raw]
Subject: Re: [PATCH 4/5] intel-iommu: Reinstate RMRRs if a device is removed from passthrough domain

On Mon, 2009-10-26 at 17:25 -0600, Alex Williamson wrote:
> When a device is setup for passthrough it has full access to memory
> so processing the RMRRs is unnecessary. However, if we remove the device
> from the si_domain, we need to reinstate the associated RMRRs.
>
> Signed-off-by: Alex Williamson <[email protected]>

If your device is doing DMA to host memory autonomously, you may still
have problems with this patch -- you take it out of the si_domain and
then there's a period of time before you reapply the RMRRs, during which
its DMA may be prevented.

You want to set up the new domain first, then switch the device over to
it atomically.

--
David Woodhouse Open Source Technology Centre
[email protected] Intel Corporation

2009-10-27 15:50:31

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH 4/5] intel-iommu: Reinstate RMRRs if a device is removed from passthrough domain

On Tue, 2009-10-27 at 08:15 +0000, David Woodhouse wrote:
> On Mon, 2009-10-26 at 17:25 -0600, Alex Williamson wrote:
> > When a device is setup for passthrough it has full access to memory
> > so processing the RMRRs is unnecessary. However, if we remove the device
> > from the si_domain, we need to reinstate the associated RMRRs.
> >
> > Signed-off-by: Alex Williamson <[email protected]>
>
> If your device is doing DMA to host memory autonomously, you may still
> have problems with this patch -- you take it out of the si_domain and
> then there's a period of time before you reapply the RMRRs, during which
> its DMA may be prevented.
>
> You want to set up the new domain first, then switch the device over to
> it atomically.

Yes, good point. I'm not seeing any convenient ways to setup a new
domain for a device while it's still a member of the si_domain. It
looks like I'd need to extract parts of the get_valid_domain_for_dev()
path and ignore any bits about using the already existing domain. Is
there an easier way? Thanks,

Alex

2009-10-28 14:37:04

by David Woodhouse

[permalink] [raw]
Subject: Re: [PATCH 4/5] intel-iommu: Reinstate RMRRs if a device is removed from passthrough domain

On Tue, 2009-10-27 at 09:50 -0600, Alex Williamson wrote:
> On Tue, 2009-10-27 at 08:15 +0000, David Woodhouse wrote:
> > On Mon, 2009-10-26 at 17:25 -0600, Alex Williamson wrote:
> > > When a device is setup for passthrough it has full access to memory
> > > so processing the RMRRs is unnecessary. However, if we remove the device
> > > from the si_domain, we need to reinstate the associated RMRRs.
> > >
> > > Signed-off-by: Alex Williamson <[email protected]>
> >
> > If your device is doing DMA to host memory autonomously, you may still
> > have problems with this patch -- you take it out of the si_domain and
> > then there's a period of time before you reapply the RMRRs, during which
> > its DMA may be prevented.
> >
> > You want to set up the new domain first, then switch the device over to
> > it atomically.
>
> Yes, good point. I'm not seeing any convenient ways to setup a new
> domain for a device while it's still a member of the si_domain. It
> looks like I'd need to extract parts of the get_valid_domain_for_dev()
> path and ignore any bits about using the already existing domain.

That seems like a sane plan. That code wants cleaning up anyway, and
factoring out the 'make new domain' part of get_valid_domain_for_dev()
should be simple enough.

--
dwmw2

2009-10-28 16:06:54

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH 4/5] intel-iommu: Reinstate RMRRs if a device is removed from passthrough domain

On Wed, 2009-10-28 at 14:36 +0000, David Woodhouse wrote:
> On Tue, 2009-10-27 at 09:50 -0600, Alex Williamson wrote:
> >
> > Yes, good point. I'm not seeing any convenient ways to setup a new
> > domain for a device while it's still a member of the si_domain. It
> > looks like I'd need to extract parts of the get_valid_domain_for_dev()
> > path and ignore any bits about using the already existing domain.
>
> That seems like a sane plan. That code wants cleaning up anyway, and
> factoring out the 'make new domain' part of get_valid_domain_for_dev()
> should be simple enough.

Ok, I'll work on something along those lines. FWIW, even though I sent
these as a series, each one stands on it's own. While I think
reinstating RMRRs is the right thing to do, I don't know of any devices
that break with the current model (the device I know of with the
interesting RMRR usage shouldn't be demoted from the si_domain). So
please consider pushing patches 2, 3 & 5 upstream (particularly 2).
Thanks,

Alex

2009-10-28 20:35:43

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH 4/5] intel-iommu: Reinstate RMRRs if a device is removed from passthrough domain

On Wed, 2009-10-28 at 10:06 -0600, Alex Williamson wrote:
> On Wed, 2009-10-28 at 14:36 +0000, David Woodhouse wrote:
> > On Tue, 2009-10-27 at 09:50 -0600, Alex Williamson wrote:
> > >
> > > Yes, good point. I'm not seeing any convenient ways to setup a new
> > > domain for a device while it's still a member of the si_domain. It
> > > looks like I'd need to extract parts of the get_valid_domain_for_dev()
> > > path and ignore any bits about using the already existing domain.
> >
> > That seems like a sane plan. That code wants cleaning up anyway, and
> > factoring out the 'make new domain' part of get_valid_domain_for_dev()
> > should be simple enough.
>
> Ok, I'll work on something along those lines. FWIW, even though I sent
> these as a series, each one stands on it's own. While I think
> reinstating RMRRs is the right thing to do, I don't know of any devices
> that break with the current model (the device I know of with the
> interesting RMRR usage shouldn't be demoted from the si_domain). So
> please consider pushing patches 2, 3 & 5 upstream (particularly 2).

On IRC David brought up that VT-d requires a context entry to be marked
not present before it can be changed, making a seamless transition for
devices with RMRRs impossible. This really makes devices with RMRRs
difficult to deal with, and I'm not sure what we can do with them other
than warn when we might hit problems and prevent them from being moved
to a VM domain. The patch below does that, though I fear we may be
tweaking what devices we white-list for a while. Comments? Thanks,

Alex

commit db9778445a5347a817263db60bfb432f235411ba
Author: Alex Williamson <[email protected]>
Date: Wed Oct 28 14:10:56 2009 -0600

intel-iommu: warn/prevent operations on devices with RMRRs

RMRRs throw a wrench in our ability to change VT-d domain mapping since
they may be used for side-band platform purposes and we cannot switch
domain mappings seamlessly. RMRRs for USB devices are typical, but
anything else is likely suspect. Provide a warning when such devices
are removed from an identity map and prevent them from being assigned
to a VM.

Signed-off-by: Alex Williamson <[email protected]>

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index aa19d75..45f3485 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -49,6 +49,7 @@

#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
+#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)

#define IOAPIC_RANGE_START (0xfee00000)
@@ -2517,6 +2518,25 @@ static int iommu_dummy(struct pci_dev *pdev)
return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
}

+static int device_has_rmrr(struct pci_dev *pdev)
+{
+ struct dmar_rmrr_unit *rmrr;
+ int i;
+
+ /*
+ * USB devices do typically have RMRRs for DOS
+ * compatibility, but can typically be ignored.
+ */
+ if (IS_USB_DEVICE(pdev))
+ return 0;
+
+ for_each_rmrr_units(rmrr)
+ for (i = 0; i < rmrr->devices_cnt; i++)
+ if (pdev == rmrr->devices[i])
+ return 1;
+ return 0;
+}
+
/* Check if the pdev needs to go through non-identity map and unmap process.*/
static int iommu_no_mapping(struct device *dev)
{
@@ -2546,6 +2566,13 @@ static int iommu_no_mapping(struct device *dev)
domain_remove_one_dev_info(si_domain, pdev);
printk(KERN_INFO "%s uses non-identity mapping\n",
pci_name(pdev));
+
+ if (device_has_rmrr(pdev))
+ printk(KERN_WARNING
+ "IOMMU: Warning RMRRs for %s not mapped\n",
+ pci_name(pdev));
+
+
return 0;
}
} else {
@@ -3544,6 +3571,13 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
int addr_width;
u64 end;

+ if (device_has_rmrr(pdev)) {
+ printk(KERN_WARNING
+ "IOMMU: Device %s requires RMRRs, cannot be attached\n",
+ pci_name(pdev));
+ return -EBUSY;
+ }
+
/* normally pdev is not mapped */
if (unlikely(domain_context_mapped(pdev))) {
struct dmar_domain *old_domain;