2013-03-08 02:36:27

by Andrew Cooks

[permalink] [raw]
Subject: [PATCH v4] Quirk for buggy dma source tags with Intel IOMMU.

This patch creates a quirk to allow the Intel IOMMU to be enabled for devices
that use incorrect tags during DMA. It is similar to the quirk for Ricoh
devices, but allows mapping multiple functions and mapping of 'ghost'
functions that do not correspond to real devices. Devices that need this
include a variety of Marvell 88SE91xx based SATA controllers. [1][2]

Changelog:
v4: Process feedback received from Alex Williamson.
* don't assume function 0 is a real device.
* exit early if no ghost functions are known, or all known functions have
been mapped.
* cleanup failure case so mapping succeeds or fails for all ghost functions
per device.
* improve comments.

v3:
* Adopt David Woodhouse's terminology by referring to the quirky functions as
'ghost' functions.
* Unmap ghost functions when device is detached from IOMMU.
* Stub function for when CONFIG_PCI_QUIRKS is not enabled.


This patch was generated against 3.9-rc1, but will also apply to 3.7.10.

Bug reports:
1. https://bugzilla.redhat.com/show_bug.cgi?id=757166
2. https://bugzilla.kernel.org/show_bug.cgi?id=42679

Signed-off-by: Andrew Cooks <[email protected]>
---
drivers/iommu/intel-iommu.c | 69 +++++++++++++++++++++++++++++++++++++++++++
drivers/pci/quirks.c | 67 +++++++++++++++++++++++++++++++++++++++++-
include/linux/pci.h | 5 +++
include/linux/pci_ids.h | 1 +
4 files changed, 141 insertions(+), 1 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 0099667..f53f3e3 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1674,6 +1674,69 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
return 0;
}

+static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn);
+
+static void unmap_ghost_dma_fn(struct pci_dev *pdev, u8 fn_map)
+{
+ u8 fn;
+ struct intel_iommu *iommu;
+
+ iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
+ pdev->devfn);
+
+ /* something must be seriously fubar if we can't lookup the iommu. */
+ BUG_ON(!iommu);
+
+ for (fn = 0; fn <= 7 && fn_map << fn; fn++) {
+ if (fn == PCI_FUNC(pdev->devfn))
+ continue;
+ if (fn_map & (1<<fn)) {
+ iommu_detach_dev(iommu,
+ pdev->bus->number,
+ PCI_DEVFN(PCI_SLOT(pdev->devfn), fn));
+ dev_dbg(&pdev->dev, "quirk; ghost func %d unmapped",
+ fn);
+ }
+ }
+}
+
+/* For quirky devices like Marvell 88SE91xx chips that use ghost functions. */
+static int map_ghost_dma_fn(struct dmar_domain *domain,
+ struct pci_dev *pdev,
+ int translation)
+{
+ u8 fn, fn_map;
+ u8 fn_mapped = 0;
+ int err = 0;
+
+ fn_map = pci_get_dma_source_map(pdev);
+
+ /* this is the common, non-quirky case. */
+ if (!fn_map)
+ return 0;
+
+ for (fn = 0; fn <= 7 && fn_map << fn; fn++) {
+ if (fn == PCI_FUNC(pdev->devfn))
+ continue;
+ if (fn_map & (1<<fn)) {
+ err = domain_context_mapping_one(domain,
+ pci_domain_nr(pdev->bus),
+ pdev->bus->number,
+ PCI_DEVFN(PCI_SLOT(pdev->devfn), fn),
+ translation);
+ if (err) {
+ dev_err(&pdev->dev,
+ "mapping ghost func %d failed", fn);
+ unmap_ghost_dma_fn(pdev, fn_mapped);
+ return err;
+ }
+ dev_dbg(&pdev->dev, "quirk; ghost func %d mapped", fn);
+ fn_mapped |= (1<<fn);
+ }
+ }
+ return 0;
+}
+
static int
domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
int translation)
@@ -1687,6 +1750,11 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
if (ret)
return ret;

+ /* quirk for undeclared/ghost pci functions */
+ ret = map_ghost_dma_fn(domain, pdev, translation);
+ if (ret)
+ return ret;
+
/* dependent device mapping */
tmp = pci_find_upstream_pcie_bridge(pdev);
if (!tmp)
@@ -3786,6 +3854,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
iommu_disable_dev_iotlb(info);
iommu_detach_dev(iommu, info->bus, info->devfn);
iommu_detach_dependent_devices(iommu, pdev);
+ unmap_ghost_dma_fn(pdev, pci_get_dma_source_map(pdev));
free_devinfo_mem(info);

spin_lock_irqsave(&device_domain_lock, flags);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 0369fb6..cf00acb 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3249,6 +3249,10 @@ static struct pci_dev *pci_func_0_dma_source(struct pci_dev *dev)
return pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
}

+/* Table of source functions for real devices. The DMA requests for the
+ * device are tagged with a different real function as source. This is
+ * relevant to multifunction devices.
+ */
static const struct pci_dev_dma_source {
u16 vendor;
u16 device;
@@ -3275,7 +3279,8 @@ static const struct pci_dev_dma_source {
* the device doing the DMA, but sometimes hardware is broken and will
* tag the DMA as being sourced from a different device. This function
* allows that translation. Note that the reference count of the
- * returned device is incremented on all paths.
+ * returned device is incremented on all paths. Translation is done when
+ * the device is added to an IOMMU group.
*/
struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
{
@@ -3292,6 +3297,66 @@ struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
return pci_dev_get(dev);
}

+/* Table of multiple (ghost) source functions. This is similar to the
+ * translated sources above, but with the following differences:
+ * 1. the device may use multiple functions as DMA sources,
+ * 2. these functions cannot be assumed to be actual devices, they're simply
+ * incorrect DMA tags.
+ * 3. the specific ghost function for a request can not always be predicted.
+ * For example, the actual device could be xx:yy.1 and it could use
+ * both 0 and 1 for different requests, with no obvious way to tell when
+ * DMA will be tagged as comming from xx.yy.0 and and when it will be tagged
+ * as comming from xx.yy.1.
+ * The bitmap contains all of the functions used in DMA tags, including the
+ * actual device.
+ * See https://bugzilla.redhat.com/show_bug.cgi?id=757166,
+ * https://bugzilla.kernel.org/show_bug.cgi?id=42679
+ * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1089768
+ */
+static const struct pci_dev_dma_multi_func_sources {
+ u16 vendor;
+ u16 device;
+ u8 func_map; /* bit map. lsb is fn 0. */
+} pci_dev_dma_multi_func_sources[] = {
+ { PCI_VENDOR_ID_MARVELL_2, 0x9123, (1<<0)|(1<<1)},
+ { PCI_VENDOR_ID_MARVELL_2, 0x9125, (1<<0)|(1<<1)},
+ { PCI_VENDOR_ID_MARVELL_2, 0x9128, (1<<0)|(1<<1)},
+ { PCI_VENDOR_ID_MARVELL_2, 0x9130, (1<<0)|(1<<1)},
+ { PCI_VENDOR_ID_MARVELL_2, 0x9143, (1<<0)|(1<<1)},
+ { PCI_VENDOR_ID_MARVELL_2, 0x9172, (1<<0)|(1<<1)},
+ { 0 }
+};
+
+/*
+ * The mapping of fake/ghost functions is used when the real device is
+ * attached to an IOMMU domain. IOMMU groups are not aware of these
+ * functions, because they're not real devices.
+ */
+u8 pci_get_dma_source_map(struct pci_dev *dev)
+{
+ const struct pci_dev_dma_multi_func_sources *i;
+
+ for (i = pci_dev_dma_multi_func_sources; i->func_map; i++) {
+ if ((i->vendor == dev->vendor ||
+ i->vendor == (u16)PCI_ANY_ID) &&
+ (i->device == dev->device ||
+ i->device == (u16)PCI_ANY_ID)) {
+ return i->func_map;
+ }
+ }
+ return 0;
+}
+
static const struct pci_dev_acs_enabled {
u16 vendor;
u16 device;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2461033..5ad3822 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1578,6 +1578,7 @@ enum pci_fixup_pass {
#ifdef CONFIG_PCI_QUIRKS
void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
struct pci_dev *pci_get_dma_source(struct pci_dev *dev);
+u8 pci_get_dma_source_map(struct pci_dev *dev);
int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
#else
static inline void pci_fixup_device(enum pci_fixup_pass pass,
@@ -1586,6 +1587,10 @@ static inline struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
{
return pci_dev_get(dev);
}
+u8 pci_get_dma_source_map(struct pci_dev *dev)
+{
+ return 0;
+}
static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev,
u16 acs_flags)
{
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f11c1c2..df57496 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1604,6 +1604,7 @@
#define PCI_SUBDEVICE_ID_KEYSPAN_SX2 0x5334

#define PCI_VENDOR_ID_MARVELL 0x11ab
+#define PCI_VENDOR_ID_MARVELL_2 0x1b4b
#define PCI_DEVICE_ID_MARVELL_GT64111 0x4146
#define PCI_DEVICE_ID_MARVELL_GT64260 0x6430
#define PCI_DEVICE_ID_MARVELL_MV64360 0x6460
--
1.7.1


2013-03-08 11:43:30

by Gaudenz Steinlin

[permalink] [raw]
Subject: Re: [PATCH v4] Quirk for buggy dma source tags with Intel IOMMU.


Hi Andrew

Andrew Cooks <[email protected]> writes:

> This patch creates a quirk to allow the Intel IOMMU to be enabled for devices
> that use incorrect tags during DMA. It is similar to the quirk for Ricoh
> devices, but allows mapping multiple functions and mapping of 'ghost'
> functions that do not correspond to real devices. Devices that need this
> include a variety of Marvell 88SE91xx based SATA controllers. [1][2]

I can confirm that this version of the patch also works for my mini-PCIe
device (88NV9143). See the my mail about it for more information. I had
to manually fix the patch because the patch utility did not understand
it. There is a formatting error in the last hunk for quirks.c (missing
space before context line) and the line count in the hunk header is
wrong (66 lines changed should be 56 lines). I hope nothing was missing
from the patch.

Tested on 3.8.2.

Gaudenz

>
> Changelog:
> v4: Process feedback received from Alex Williamson.
> * don't assume function 0 is a real device.
> * exit early if no ghost functions are known, or all known functions have
> been mapped.
> * cleanup failure case so mapping succeeds or fails for all ghost functions
> per device.
> * improve comments.
>
> v3:
> * Adopt David Woodhouse's terminology by referring to the quirky functions as
> 'ghost' functions.
> * Unmap ghost functions when device is detached from IOMMU.
> * Stub function for when CONFIG_PCI_QUIRKS is not enabled.
>
>
> This patch was generated against 3.9-rc1, but will also apply to 3.7.10.
>
> Bug reports:
> 1. https://bugzilla.redhat.com/show_bug.cgi?id=757166
> 2. https://bugzilla.kernel.org/show_bug.cgi?id=42679
>
> Signed-off-by: Andrew Cooks <[email protected]>
> ---
> drivers/iommu/intel-iommu.c | 69 +++++++++++++++++++++++++++++++++++++++++++
> drivers/pci/quirks.c | 67 +++++++++++++++++++++++++++++++++++++++++-
> include/linux/pci.h | 5 +++
> include/linux/pci_ids.h | 1 +
> 4 files changed, 141 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 0099667..f53f3e3 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -1674,6 +1674,69 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
> return 0;
> }
>
> +static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn);
> +
> +static void unmap_ghost_dma_fn(struct pci_dev *pdev, u8 fn_map)
> +{
> + u8 fn;
> + struct intel_iommu *iommu;
> +
> + iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
> + pdev->devfn);
> +
> + /* something must be seriously fubar if we can't lookup the iommu. */
> + BUG_ON(!iommu);
> +
> + for (fn = 0; fn <= 7 && fn_map << fn; fn++) {
> + if (fn == PCI_FUNC(pdev->devfn))
> + continue;
> + if (fn_map & (1<<fn)) {
> + iommu_detach_dev(iommu,
> + pdev->bus->number,
> + PCI_DEVFN(PCI_SLOT(pdev->devfn), fn));
> + dev_dbg(&pdev->dev, "quirk; ghost func %d unmapped",
> + fn);
> + }
> + }
> +}
> +
> +/* For quirky devices like Marvell 88SE91xx chips that use ghost functions. */
> +static int map_ghost_dma_fn(struct dmar_domain *domain,
> + struct pci_dev *pdev,
> + int translation)
> +{
> + u8 fn, fn_map;
> + u8 fn_mapped = 0;
> + int err = 0;
> +
> + fn_map = pci_get_dma_source_map(pdev);
> +
> + /* this is the common, non-quirky case. */
> + if (!fn_map)
> + return 0;
> +
> + for (fn = 0; fn <= 7 && fn_map << fn; fn++) {
> + if (fn == PCI_FUNC(pdev->devfn))
> + continue;
> + if (fn_map & (1<<fn)) {
> + err = domain_context_mapping_one(domain,
> + pci_domain_nr(pdev->bus),
> + pdev->bus->number,
> + PCI_DEVFN(PCI_SLOT(pdev->devfn), fn),
> + translation);
> + if (err) {
> + dev_err(&pdev->dev,
> + "mapping ghost func %d failed", fn);
> + unmap_ghost_dma_fn(pdev, fn_mapped);
> + return err;
> + }
> + dev_dbg(&pdev->dev, "quirk; ghost func %d mapped", fn);
> + fn_mapped |= (1<<fn);
> + }
> + }
> + return 0;
> +}
> +
> static int
> domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
> int translation)
> @@ -1687,6 +1750,11 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
> if (ret)
> return ret;
>
> + /* quirk for undeclared/ghost pci functions */
> + ret = map_ghost_dma_fn(domain, pdev, translation);
> + if (ret)
> + return ret;
> +
> /* dependent device mapping */
> tmp = pci_find_upstream_pcie_bridge(pdev);
> if (!tmp)
> @@ -3786,6 +3854,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
> iommu_disable_dev_iotlb(info);
> iommu_detach_dev(iommu, info->bus, info->devfn);
> iommu_detach_dependent_devices(iommu, pdev);
> + unmap_ghost_dma_fn(pdev, pci_get_dma_source_map(pdev));
> free_devinfo_mem(info);
>
> spin_lock_irqsave(&device_domain_lock, flags);
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index 0369fb6..cf00acb 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -3249,6 +3249,10 @@ static struct pci_dev *pci_func_0_dma_source(struct pci_dev *dev)
> return pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
> }
>
> +/* Table of source functions for real devices. The DMA requests for the
> + * device are tagged with a different real function as source. This is
> + * relevant to multifunction devices.
> + */
> static const struct pci_dev_dma_source {
> u16 vendor;
> u16 device;
> @@ -3275,7 +3279,8 @@ static const struct pci_dev_dma_source {
> * the device doing the DMA, but sometimes hardware is broken and will
> * tag the DMA as being sourced from a different device. This function
> * allows that translation. Note that the reference count of the
> - * returned device is incremented on all paths.
> + * returned device is incremented on all paths. Translation is done when
> + * the device is added to an IOMMU group.
> */
> struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
> {
> @@ -3292,6 +3297,66 @@ struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
> return pci_dev_get(dev);
> }
>
> +/* Table of multiple (ghost) source functions. This is similar to the
> + * translated sources above, but with the following differences:
> + * 1. the device may use multiple functions as DMA sources,
> + * 2. these functions cannot be assumed to be actual devices, they're simply
> + * incorrect DMA tags.
> + * 3. the specific ghost function for a request can not always be predicted.
> + * For example, the actual device could be xx:yy.1 and it could use
> + * both 0 and 1 for different requests, with no obvious way to tell when
> + * DMA will be tagged as comming from xx.yy.0 and and when it will be tagged
> + * as comming from xx.yy.1.
> + * The bitmap contains all of the functions used in DMA tags, including the
> + * actual device.
> + * See https://bugzilla.redhat.com/show_bug.cgi?id=757166,
> + * https://bugzilla.kernel.org/show_bug.cgi?id=42679
> + * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1089768
> + */
> +static const struct pci_dev_dma_multi_func_sources {
> + u16 vendor;
> + u16 device;
> + u8 func_map; /* bit map. lsb is fn 0. */
> +} pci_dev_dma_multi_func_sources[] = {
> + { PCI_VENDOR_ID_MARVELL_2, 0x9123, (1<<0)|(1<<1)},
> + { PCI_VENDOR_ID_MARVELL_2, 0x9125, (1<<0)|(1<<1)},
> + { PCI_VENDOR_ID_MARVELL_2, 0x9128, (1<<0)|(1<<1)},
> + { PCI_VENDOR_ID_MARVELL_2, 0x9130, (1<<0)|(1<<1)},
> + { PCI_VENDOR_ID_MARVELL_2, 0x9143, (1<<0)|(1<<1)},
> + { PCI_VENDOR_ID_MARVELL_2, 0x9172, (1<<0)|(1<<1)},
> + { 0 }
> +};
> +
> +/*
> + * The mapping of fake/ghost functions is used when the real device is
> + * attached to an IOMMU domain. IOMMU groups are not aware of these
> + * functions, because they're not real devices.
> + */
> +u8 pci_get_dma_source_map(struct pci_dev *dev)
> +{
> + const struct pci_dev_dma_multi_func_sources *i;
> +
> + for (i = pci_dev_dma_multi_func_sources; i->func_map; i++) {
> + if ((i->vendor == dev->vendor ||
> + i->vendor == (u16)PCI_ANY_ID) &&
> + (i->device == dev->device ||
> + i->device == (u16)PCI_ANY_ID)) {
> + return i->func_map;
> + }
> + }
> + return 0;
> +}
> +
> static const struct pci_dev_acs_enabled {
> u16 vendor;
> u16 device;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 2461033..5ad3822 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1578,6 +1578,7 @@ enum pci_fixup_pass {
> #ifdef CONFIG_PCI_QUIRKS
> void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
> struct pci_dev *pci_get_dma_source(struct pci_dev *dev);
> +u8 pci_get_dma_source_map(struct pci_dev *dev);
> int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
> #else
> static inline void pci_fixup_device(enum pci_fixup_pass pass,
> @@ -1586,6 +1587,10 @@ static inline struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
> {
> return pci_dev_get(dev);
> }
> +u8 pci_get_dma_source_map(struct pci_dev *dev)
> +{
> + return 0;
> +}
> static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev,
> u16 acs_flags)
> {
> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> index f11c1c2..df57496 100644
> --- a/include/linux/pci_ids.h
> +++ b/include/linux/pci_ids.h
> @@ -1604,6 +1604,7 @@
> #define PCI_SUBDEVICE_ID_KEYSPAN_SX2 0x5334
>
> #define PCI_VENDOR_ID_MARVELL 0x11ab
> +#define PCI_VENDOR_ID_MARVELL_2 0x1b4b
> #define PCI_DEVICE_ID_MARVELL_GT64111 0x4146
> #define PCI_DEVICE_ID_MARVELL_GT64260 0x6430
> #define PCI_DEVICE_ID_MARVELL_MV64360 0x6460
> --
> 1.7.1
>

--
Ever tried. Ever failed. No matter.
Try again. Fail again. Fail better.
~ Samuel Beckett ~

2013-03-09 05:07:21

by Andrew Cooks

[permalink] [raw]
Subject: Re: [PATCH v4] Quirk for buggy dma source tags with Intel IOMMU.

On Fri, Mar 8, 2013 at 7:43 PM, Gaudenz Steinlin <[email protected]> wrote:
>
> Hi Andrew
>
> Andrew Cooks <[email protected]> writes:
>
>> This patch creates a quirk to allow the Intel IOMMU to be enabled for devices
>> that use incorrect tags during DMA. It is similar to the quirk for Ricoh
>> devices, but allows mapping multiple functions and mapping of 'ghost'
>> functions that do not correspond to real devices. Devices that need this
>> include a variety of Marvell 88SE91xx based SATA controllers. [1][2]
>
> I can confirm that this version of the patch also works for my mini-PCIe
> device (88NV9143). See the my mail about it for more information. I had
> to manually fix the patch because the patch utility did not understand
> it. There is a formatting error in the last hunk for quirks.c (missing
> space before context line) and the line count in the hunk header is
> wrong (66 lines changed should be 56 lines). I hope nothing was missing
> from the patch.
>
> Tested on 3.8.2.

Thanks for testing.

The formatting error is embarrassing. I was impatient and removed some
unused content from the patch, instead of cleaning the source. The
thing about posting to open lists with thousands or subscribers and
searchable archives is that it's impossible to hide incompetence.

a.

2013-04-02 15:48:23

by Pat Erley

[permalink] [raw]
Subject: Re: [PATCH v4] Quirk for buggy dma source tags with Intel IOMMU.

On 04/02/2013 10:50 AM, Andrew Cooks wrote:
> On 2 Apr 2013 15:37, "Pat Erley" <[email protected]
> <mailto:[email protected]>> wrote:
> >
> > On 03/07/2013 09:35 PM, Andrew Cooks wrote:
> >>
> >> --- a/drivers/pci/quirks.c
> >> +++ b/drivers/pci/quirks.c
> >>
> >> +/* Table of multiple (ghost) source functions. This is similar to the
> >> + * translated sources above, but with the following differences:
> >> + * 1. the device may use multiple functions as DMA sources,
> >> + * 2. these functions cannot be assumed to be actual devices,
> they're simply
> >> + * incorrect DMA tags.
> >> + * 3. the specific ghost function for a request can not always be
> predicted.
> >> + * For example, the actual device could be xx:yy.1 and it could use
> >> + * both 0 and 1 for different requests, with no obvious way to tell
> when
> >> + * DMA will be tagged as comming from xx.yy.0 and and when it will
> be tagged
> >> + * as comming from xx.yy.1.
> >> + * The bitmap contains all of the functions used in DMA tags,
> including the
> >> + * actual device.
> >> + * See https://bugzilla.redhat.com/show_bug.cgi?id=757166,
> >> + * https://bugzilla.kernel.org/show_bug.cgi?id=42679
> >> + * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1089768
> >> + */
> >> +static const struct pci_dev_dma_multi_func_sources {
> >> + u16 vendor;
> >> + u16 device;
> >> + u8 func_map; /* bit map. lsb is fn 0. */
> >> +} pci_dev_dma_multi_func_sources[] = {
> >> + { PCI_VENDOR_ID_MARVELL_2, 0x9123, (1<<0)|(1<<1)},
> >> + { PCI_VENDOR_ID_MARVELL_2, 0x9125, (1<<0)|(1<<1)},
> >> + { PCI_VENDOR_ID_MARVELL_2, 0x9128, (1<<0)|(1<<1)},
> >> + { PCI_VENDOR_ID_MARVELL_2, 0x9130, (1<<0)|(1<<1)},
> >> + { PCI_VENDOR_ID_MARVELL_2, 0x9143, (1<<0)|(1<<1)},
> >> + { PCI_VENDOR_ID_MARVELL_2, 0x9172, (1<<0)|(1<<1)},
> >> + { 0 }
> >> +};
> >
> >
> > Adding another buggy device. I have a Ricoh multifunction device:
> >
> > 17:00.0 SD Host controller: Ricoh Co Ltd MMC/SD Host Controller (rev 01)
> > 17:00.3 FireWire (IEEE 1394): Ricoh Co Ltd R5C832 PCIe IEEE 1394
> > Controller (rev 01)
> >
> > 17:00.0 0805: 1180:e822 (rev 01)
> > 17:00.3 0c00: 1180:e832 (rev 01)
> >
>
> The Ricoh device issue has been known for some time and a quirk has been
> available since commit 12ea6cad1c7d046 in June 2012. It's slightly
> different than the problem this patch tries to work around [1].

Hmm, I've had this problem with many recent (vanilla) kernels, up to and
including 3.9-rc5

> > that adding entries for also fixed booting. I don't have any SD
> cards or firewire devices handy to test that they work, but the system
> now boots, which was not the case without your patch and IOMMU/DMAR enabled.
>
> That is really strange. Could you tell us what kernel version you tested
> and provide dmesg output?

I'll capture a vanilla 3.8.5 boot without any patches and iommu=off,
then try to find another machine to catch what I can of a netconsole
boot with iommu=on. What's the preferred way to send these? pastebin
links?

I'd been running the 'dirty' fix that's in the redhat bugzilla entry. I
checked my .config and have CONFIG_PCI_QUIRKS=y, and verified my devices
are in the quirks table for the pci_func_0_dma_source fixup.

> > Here's a previous patch used for similar hardware that may also be
> fixed by this:
> >
> >
> http://lists.fedoraproject.org/pipermail/scm-commits/2010-October/510785.html
> >
> > and another thread/bug report this may solve:
> >
> > https://bugzilla.redhat.com/show_bug.cgi?id=605888
>
> I believe this is referenced in drivers/pci/quirks.c for versions newer
> than 3.5.
>
>
> > Feel free to include me in any future iterations of this patch you'd
> like tested.
> >
> > Tested-By: Pat Erley <[email protected] <mailto:[email protected]>>
> >
>
> Thanks for testing!
>
> [1] In the Ricoh case, multiple functions are used for real devices and
> the bug is that these devices all use function 0 during DMA. In this
> particular case, I'd expect the FireWire device 17:00.3 to issue DMA
> from the SD Host Controller address 17:00.0. The quirk is not too much
> of a terrible hack - it's a fairly simple translation.
>
> In the Marvell case, the real device uses DMA source tags that don't
> actually belong to any visible devices. The quirk to make this work is
> more invasive, not nearly as elegant and has not attracted much
> enthusiasm from subsystem maintainers, though I'm still hopeful that a
> quirk will be merged in some form or another.
>

Thanks for explaining the difference!

Pat

2013-04-02 17:26:11

by Pat Erley

[permalink] [raw]
Subject: Re: [PATCH v4] Quirk for buggy dma source tags with Intel IOMMU.

On 04/02/2013 11:47 AM, Pat Erley wrote:
> On 04/02/2013 10:50 AM, Andrew Cooks wrote:
>> On 2 Apr 2013 15:37, "Pat Erley" <[email protected]
>> <mailto:[email protected]>> wrote:
>> >
>> > On 03/07/2013 09:35 PM, Andrew Cooks wrote:
>> >>
>> >> --- a/drivers/pci/quirks.c
>> >> +++ b/drivers/pci/quirks.c
>> >>
>> >> +/* Table of multiple (ghost) source functions. This is similar to
>> the
>> >> + * translated sources above, but with the following differences:
>> >> + * 1. the device may use multiple functions as DMA sources,
>> >> + * 2. these functions cannot be assumed to be actual devices,
>> they're simply
>> >> + * incorrect DMA tags.
>> >> + * 3. the specific ghost function for a request can not always be
>> predicted.
>> >> + * For example, the actual device could be xx:yy.1 and it could use
>> >> + * both 0 and 1 for different requests, with no obvious way to tell
>> when
>> >> + * DMA will be tagged as comming from xx.yy.0 and and when it will
>> be tagged
>> >> + * as comming from xx.yy.1.
>> >> + * The bitmap contains all of the functions used in DMA tags,
>> including the
>> >> + * actual device.
>> >> + * See https://bugzilla.redhat.com/show_bug.cgi?id=757166,
>> >> + * https://bugzilla.kernel.org/show_bug.cgi?id=42679
>> >> + * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1089768
>> >> + */
>> >> +static const struct pci_dev_dma_multi_func_sources {
>> >> + u16 vendor;
>> >> + u16 device;
>> >> + u8 func_map; /* bit map. lsb is fn 0. */
>> >> +} pci_dev_dma_multi_func_sources[] = {
>> >> + { PCI_VENDOR_ID_MARVELL_2, 0x9123, (1<<0)|(1<<1)},
>> >> + { PCI_VENDOR_ID_MARVELL_2, 0x9125, (1<<0)|(1<<1)},
>> >> + { PCI_VENDOR_ID_MARVELL_2, 0x9128, (1<<0)|(1<<1)},
>> >> + { PCI_VENDOR_ID_MARVELL_2, 0x9130, (1<<0)|(1<<1)},
>> >> + { PCI_VENDOR_ID_MARVELL_2, 0x9143, (1<<0)|(1<<1)},
>> >> + { PCI_VENDOR_ID_MARVELL_2, 0x9172, (1<<0)|(1<<1)},
>> >> + { 0 }
>> >> +};
>> >
>> >
>> > Adding another buggy device. I have a Ricoh multifunction device:
>> >
>> > 17:00.0 SD Host controller: Ricoh Co Ltd MMC/SD Host Controller
>> (rev 01)
>> > 17:00.3 FireWire (IEEE 1394): Ricoh Co Ltd R5C832 PCIe IEEE 1394
>> > Controller (rev 01)
>> >
>> > 17:00.0 0805: 1180:e822 (rev 01)
>> > 17:00.3 0c00: 1180:e832 (rev 01)
>> >
>>
>> The Ricoh device issue has been known for some time and a quirk has been
>> available since commit 12ea6cad1c7d046 in June 2012. It's slightly
>> different than the problem this patch tries to work around [1].
>
> Hmm, I've had this problem with many recent (vanilla) kernels, up to and
> including 3.9-rc5
>
>> > that adding entries for also fixed booting. I don't have any SD
>> cards or firewire devices handy to test that they work, but the system
>> now boots, which was not the case without your patch and IOMMU/DMAR
>> enabled.
>>
>> That is really strange. Could you tell us what kernel version you tested
>> and provide dmesg output?
>
> I'll capture a vanilla 3.8.5 boot without any patches and iommu=off,
> then try to find another machine to catch what I can of a netconsole
> boot with iommu=on. What's the preferred way to send these? pastebin
> links?
>
> I'd been running the 'dirty' fix that's in the redhat bugzilla entry. I
> checked my .config and have CONFIG_PCI_QUIRKS=y, and verified my devices
> are in the quirks table for the pci_func_0_dma_source fixup.
>
>> > Here's a previous patch used for similar hardware that may also be
>> fixed by this:
>> >
>> >
>> http://lists.fedoraproject.org/pipermail/scm-commits/2010-October/510785.html
>>
>> >
>> > and another thread/bug report this may solve:
>> >
>> > https://bugzilla.redhat.com/show_bug.cgi?id=605888
>>
>> I believe this is referenced in drivers/pci/quirks.c for versions newer
>> than 3.5.
>>
>>
>> > Feel free to include me in any future iterations of this patch you'd
>> like tested.
>> >
>> > Tested-By: Pat Erley <[email protected] <mailto:[email protected]>>
>> >
>>
>> Thanks for testing!
>>
>> [1] In the Ricoh case, multiple functions are used for real devices and
>> the bug is that these devices all use function 0 during DMA. In this
>> particular case, I'd expect the FireWire device 17:00.3 to issue DMA
>> from the SD Host Controller address 17:00.0. The quirk is not too much
>> of a terrible hack - it's a fairly simple translation.
>>
>> In the Marvell case, the real device uses DMA source tags that don't
>> actually belong to any visible devices. The quirk to make this work is
>> more invasive, not nearly as elegant and has not attracted much
>> enthusiasm from subsystem maintainers, though I'm still hopeful that a
>> quirk will be merged in some form or another.
>>
>
> Thanks for explaining the difference!
>
> Pat
> --

Here are my relevant logs and configs from a vanilla 3.8.5 kernel:

http://www.erley.org/oops/

* the -nots files have had timestamps stripped for ease of diffing.

* no_iommu_no_fw.txt is a diff of the -nots logs.

* loading_fw.txt is an excerpt of log once I load the firewire-ohci
module (causing, for all practical purposes, a complete system lock.)

* the .gz of the same name is the 55mb of logs it generated in 36
seconds.

I was hesitant to send 100k of text to the ML, here is the only
'interesting' difference in the logs, from my inspection:

-PCI-DMA: Using software bounce buffering for IO (SWIOTLB)
-(64MB) mapped at [ffff8800b7a7c000-ffff8800bba7bfff]
+DMAR: No ATSR found
+IOMMU 0 0xfed90000: using Queued invalidation
+IOMMU: Setting RMRR:
+IOMMU: Setting identity map for device 0000:00:1a.0 [0xbbee9000 -
0xbbefffff]
+IOMMU: Setting identity map for device 0000:00:1d.0 [0xbbee9000 -
0xbbefffff]
+IOMMU: Prepare 0-16MiB unity mapping for LPC
+IOMMU: Setting identity map for device 0000:00:1f.0 [0x0 - 0xffffff]
+PCI-DMA: Intel(R) Virtualization Technology for Directed I/O

I was not able to find another machine with working network right now
(at families house for the week), so the only way I was able to compare was:

Case 1:
Boot iommu=off with firewire-ohci not blacklisted

Case 2:
Boot iommu=on with firewire-ohci blacklisted
Load firewire-ohci

With your patch(admittedly, only tested on 3.9-rc5), Case 2 works,
without it, I get my logs spammed with:

dmar: DRHD: handling fault status reg 2
dmar: DMAR:[DMA Read] Request device [17:00.0] fault addr fffff000
DMAR:[fault reason 02] Present bit in context entry is clear

When loading firewire.

2013-04-04 18:17:06

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v4] Quirk for buggy dma source tags with Intel IOMMU.

On Thu, Mar 7, 2013 at 7:35 PM, Andrew Cooks <[email protected]> wrote:
> This patch creates a quirk to allow the Intel IOMMU to be enabled for devices
> that use incorrect tags during DMA. It is similar to the quirk for Ricoh
> devices, but allows mapping multiple functions and mapping of 'ghost'
> functions that do not correspond to real devices. Devices that need this
> include a variety of Marvell 88SE91xx based SATA controllers. [1][2]
>
> Changelog:
> v4: Process feedback received from Alex Williamson.
> * don't assume function 0 is a real device.
> * exit early if no ghost functions are known, or all known functions have
> been mapped.
> * cleanup failure case so mapping succeeds or fails for all ghost functions
> per device.
> * improve comments.
>
> v3:
> * Adopt David Woodhouse's terminology by referring to the quirky functions as
> 'ghost' functions.
> * Unmap ghost functions when device is detached from IOMMU.
> * Stub function for when CONFIG_PCI_QUIRKS is not enabled.
>
>
> This patch was generated against 3.9-rc1, but will also apply to 3.7.10.
>
> Bug reports:
> 1. https://bugzilla.redhat.com/show_bug.cgi?id=757166
> 2. https://bugzilla.kernel.org/show_bug.cgi?id=42679
>
> Signed-off-by: Andrew Cooks <[email protected]>
> ---
> drivers/iommu/intel-iommu.c | 69 +++++++++++++++++++++++++++++++++++++++++++
> drivers/pci/quirks.c | 67 +++++++++++++++++++++++++++++++++++++++++-
> include/linux/pci.h | 5 +++
> include/linux/pci_ids.h | 1 +
> 4 files changed, 141 insertions(+), 1 deletions(-)

I'm OK with the pci/quirks.c part of this, but the bulk of the
interesting code is in intel-iommu.c, so I assume the IOMMU folks will
take care of this.

Bjorn

> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 0099667..f53f3e3 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -1674,6 +1674,69 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
> return 0;
> }
>
> +static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn);
> +
> +static void unmap_ghost_dma_fn(struct pci_dev *pdev, u8 fn_map)
> +{
> + u8 fn;
> + struct intel_iommu *iommu;
> +
> + iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
> + pdev->devfn);
> +
> + /* something must be seriously fubar if we can't lookup the iommu. */
> + BUG_ON(!iommu);
> +
> + for (fn = 0; fn <= 7 && fn_map << fn; fn++) {
> + if (fn == PCI_FUNC(pdev->devfn))
> + continue;
> + if (fn_map & (1<<fn)) {
> + iommu_detach_dev(iommu,
> + pdev->bus->number,
> + PCI_DEVFN(PCI_SLOT(pdev->devfn), fn));
> + dev_dbg(&pdev->dev, "quirk; ghost func %d unmapped",
> + fn);
> + }
> + }
> +}
> +
> +/* For quirky devices like Marvell 88SE91xx chips that use ghost functions. */
> +static int map_ghost_dma_fn(struct dmar_domain *domain,
> + struct pci_dev *pdev,
> + int translation)
> +{
> + u8 fn, fn_map;
> + u8 fn_mapped = 0;
> + int err = 0;
> +
> + fn_map = pci_get_dma_source_map(pdev);
> +
> + /* this is the common, non-quirky case. */
> + if (!fn_map)
> + return 0;
> +
> + for (fn = 0; fn <= 7 && fn_map << fn; fn++) {
> + if (fn == PCI_FUNC(pdev->devfn))
> + continue;
> + if (fn_map & (1<<fn)) {
> + err = domain_context_mapping_one(domain,
> + pci_domain_nr(pdev->bus),
> + pdev->bus->number,
> + PCI_DEVFN(PCI_SLOT(pdev->devfn), fn),
> + translation);
> + if (err) {
> + dev_err(&pdev->dev,
> + "mapping ghost func %d failed", fn);
> + unmap_ghost_dma_fn(pdev, fn_mapped);
> + return err;
> + }
> + dev_dbg(&pdev->dev, "quirk; ghost func %d mapped", fn);
> + fn_mapped |= (1<<fn);
> + }
> + }
> + return 0;
> +}
> +
> static int
> domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
> int translation)
> @@ -1687,6 +1750,11 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
> if (ret)
> return ret;
>
> + /* quirk for undeclared/ghost pci functions */
> + ret = map_ghost_dma_fn(domain, pdev, translation);
> + if (ret)
> + return ret;
> +
> /* dependent device mapping */
> tmp = pci_find_upstream_pcie_bridge(pdev);
> if (!tmp)
> @@ -3786,6 +3854,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
> iommu_disable_dev_iotlb(info);
> iommu_detach_dev(iommu, info->bus, info->devfn);
> iommu_detach_dependent_devices(iommu, pdev);
> + unmap_ghost_dma_fn(pdev, pci_get_dma_source_map(pdev));
> free_devinfo_mem(info);
>
> spin_lock_irqsave(&device_domain_lock, flags);
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index 0369fb6..cf00acb 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -3249,6 +3249,10 @@ static struct pci_dev *pci_func_0_dma_source(struct pci_dev *dev)
> return pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
> }
>
> +/* Table of source functions for real devices. The DMA requests for the
> + * device are tagged with a different real function as source. This is
> + * relevant to multifunction devices.
> + */
> static const struct pci_dev_dma_source {
> u16 vendor;
> u16 device;
> @@ -3275,7 +3279,8 @@ static const struct pci_dev_dma_source {
> * the device doing the DMA, but sometimes hardware is broken and will
> * tag the DMA as being sourced from a different device. This function
> * allows that translation. Note that the reference count of the
> - * returned device is incremented on all paths.
> + * returned device is incremented on all paths. Translation is done when
> + * the device is added to an IOMMU group.
> */
> struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
> {
> @@ -3292,6 +3297,66 @@ struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
> return pci_dev_get(dev);
> }
>
> +/* Table of multiple (ghost) source functions. This is similar to the
> + * translated sources above, but with the following differences:
> + * 1. the device may use multiple functions as DMA sources,
> + * 2. these functions cannot be assumed to be actual devices, they're simply
> + * incorrect DMA tags.
> + * 3. the specific ghost function for a request can not always be predicted.
> + * For example, the actual device could be xx:yy.1 and it could use
> + * both 0 and 1 for different requests, with no obvious way to tell when
> + * DMA will be tagged as comming from xx.yy.0 and and when it will be tagged
> + * as comming from xx.yy.1.
> + * The bitmap contains all of the functions used in DMA tags, including the
> + * actual device.
> + * See https://bugzilla.redhat.com/show_bug.cgi?id=757166,
> + * https://bugzilla.kernel.org/show_bug.cgi?id=42679
> + * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1089768
> + */
> +static const struct pci_dev_dma_multi_func_sources {
> + u16 vendor;
> + u16 device;
> + u8 func_map; /* bit map. lsb is fn 0. */
> +} pci_dev_dma_multi_func_sources[] = {
> + { PCI_VENDOR_ID_MARVELL_2, 0x9123, (1<<0)|(1<<1)},
> + { PCI_VENDOR_ID_MARVELL_2, 0x9125, (1<<0)|(1<<1)},
> + { PCI_VENDOR_ID_MARVELL_2, 0x9128, (1<<0)|(1<<1)},
> + { PCI_VENDOR_ID_MARVELL_2, 0x9130, (1<<0)|(1<<1)},
> + { PCI_VENDOR_ID_MARVELL_2, 0x9143, (1<<0)|(1<<1)},
> + { PCI_VENDOR_ID_MARVELL_2, 0x9172, (1<<0)|(1<<1)},
> + { 0 }
> +};
> +
> +/*
> + * The mapping of fake/ghost functions is used when the real device is
> + * attached to an IOMMU domain. IOMMU groups are not aware of these
> + * functions, because they're not real devices.
> + */
> +u8 pci_get_dma_source_map(struct pci_dev *dev)
> +{
> + const struct pci_dev_dma_multi_func_sources *i;
> +
> + for (i = pci_dev_dma_multi_func_sources; i->func_map; i++) {
> + if ((i->vendor == dev->vendor ||
> + i->vendor == (u16)PCI_ANY_ID) &&
> + (i->device == dev->device ||
> + i->device == (u16)PCI_ANY_ID)) {
> + return i->func_map;
> + }
> + }
> + return 0;
> +}
> +
> static const struct pci_dev_acs_enabled {
> u16 vendor;
> u16 device;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 2461033..5ad3822 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1578,6 +1578,7 @@ enum pci_fixup_pass {
> #ifdef CONFIG_PCI_QUIRKS
> void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
> struct pci_dev *pci_get_dma_source(struct pci_dev *dev);
> +u8 pci_get_dma_source_map(struct pci_dev *dev);
> int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
> #else
> static inline void pci_fixup_device(enum pci_fixup_pass pass,
> @@ -1586,6 +1587,10 @@ static inline struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
> {
> return pci_dev_get(dev);
> }
> +u8 pci_get_dma_source_map(struct pci_dev *dev)
> +{
> + return 0;
> +}
> static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev,
> u16 acs_flags)
> {
> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> index f11c1c2..df57496 100644
> --- a/include/linux/pci_ids.h
> +++ b/include/linux/pci_ids.h
> @@ -1604,6 +1604,7 @@
> #define PCI_SUBDEVICE_ID_KEYSPAN_SX2 0x5334
>
> #define PCI_VENDOR_ID_MARVELL 0x11ab
> +#define PCI_VENDOR_ID_MARVELL_2 0x1b4b
> #define PCI_DEVICE_ID_MARVELL_GT64111 0x4146
> #define PCI_DEVICE_ID_MARVELL_GT64260 0x6430
> #define PCI_DEVICE_ID_MARVELL_MV64360 0x6460
> --
> 1.7.1
>