2012-06-04 21:29:21

by Donald Dutile

[permalink] [raw]
Subject: [PATCH 0/2]intel-iommu: reserve IOMMU register space

Reserve the memory space that Intel IOMMUs use so
they aren't dynamically allocated by hot-plug operations.
Seem on some systems when BIOS doesn't place IOMMU registers
in a safe, non-hotplug location. ... until BIOS quickly changed.

Since modifying dmar.c, and information prints in the code,
cleanup the info/warning prints in the first patch to use
more std pr_*() functions and formatting.

Second patch is core of the iomem space reservation.

Bjorn had posted a similar patch a couple weeks back,
and I informed him in a side note I had this patch set
in my queue, just needed couple minor edits to complete.
So, if you are having deja-vu, now you know why...

Signed-off-by: Donald Dutile <[email protected]>


2012-06-04 21:29:23

by Donald Dutile

[permalink] [raw]
Subject: [PATCH 1/2] iommu: dmar: replace printks with appropriate pr_*()

Just some cleanup so next patch can keep the info printing
the same way throughout the file.

Replace printk(KERN_* with pr_*() functions.

Signed-off-by: Donald Dutile <[email protected]>
---
drivers/iommu/dmar.c | 83 ++++++++++++++++++++++------------------------------
1 file changed, 35 insertions(+), 48 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 3a74e44..1e5a10d 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -83,15 +83,14 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
* ignore it
*/
if (!bus) {
- printk(KERN_WARNING
- PREFIX "Device scope bus [%d] not found\n",
- scope->bus);
+ pr_warn(PREFIX "Device scope bus [%d] not found\n",
+ scope->bus);
break;
}
pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
if (!pdev) {
- printk(KERN_WARNING PREFIX
- "Device scope device [%04x:%02x:%02x.%02x] not found\n",
+ pr_warn(PREFIX "Device scope device"
+ "[%04x:%02x:%02x.%02x] not found\n",
segment, bus->number, path->dev, path->fn);
break;
}
@@ -100,9 +99,9 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
bus = pdev->subordinate;
}
if (!pdev) {
- printk(KERN_WARNING PREFIX
- "Device scope device [%04x:%02x:%02x.%02x] not found\n",
- segment, scope->bus, path->dev, path->fn);
+ pr_warn(PREFIX
+ "Device scope device [%04x:%02x:%02x.%02x] not found\n",
+ segment, scope->bus, path->dev, path->fn);
*dev = NULL;
return 0;
}
@@ -110,8 +109,7 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
pdev->subordinate) || (scope->entry_type == \
ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
pci_dev_put(pdev);
- printk(KERN_WARNING PREFIX
- "Device scope type does not match for %s\n",
+ pr_warn(PREFIX "Device scope type does not match for %s\n",
pci_name(pdev));
return -EINVAL;
}
@@ -134,8 +132,7 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
(*cnt)++;
else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
- printk(KERN_WARNING PREFIX
- "Unsupported device scope\n");
+ pr_warn(PREFIX "Unsupported device scope\n");
}
start += scope->length;
}
@@ -261,25 +258,23 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
case ACPI_DMAR_TYPE_HARDWARE_UNIT:
drhd = container_of(header, struct acpi_dmar_hardware_unit,
header);
- printk (KERN_INFO PREFIX
- "DRHD base: %#016Lx flags: %#x\n",
+ pr_info(PREFIX "DRHD base: %#016Lx flags: %#x\n",
(unsigned long long)drhd->address, drhd->flags);
break;
case ACPI_DMAR_TYPE_RESERVED_MEMORY:
rmrr = container_of(header, struct acpi_dmar_reserved_memory,
header);
- printk (KERN_INFO PREFIX
- "RMRR base: %#016Lx end: %#016Lx\n",
+ pr_info(PREFIX "RMRR base: %#016Lx end: %#016Lx\n",
(unsigned long long)rmrr->base_address,
(unsigned long long)rmrr->end_address);
break;
case ACPI_DMAR_TYPE_ATSR:
atsr = container_of(header, struct acpi_dmar_atsr, header);
- printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
+ pr_info(PREFIX "ATSR flags: %#x\n", atsr->flags);
break;
case ACPI_DMAR_HARDWARE_AFFINITY:
rhsa = container_of(header, struct acpi_dmar_rhsa, header);
- printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
+ pr_info(PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
(unsigned long long)rhsa->base_address,
rhsa->proximity_domain);
break;
@@ -299,7 +294,7 @@ static int __init dmar_table_detect(void)
&dmar_tbl_size);

if (ACPI_SUCCESS(status) && !dmar_tbl) {
- printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
+ pr_warn(PREFIX "Unable to map DMAR\n");
status = AE_NOT_FOUND;
}

@@ -333,20 +328,18 @@ parse_dmar_table(void)
return -ENODEV;

if (dmar->width < PAGE_SHIFT - 1) {
- printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
+ pr_warn(PREFIX "Invalid DMAR haw\n");
return -EINVAL;
}

- printk (KERN_INFO PREFIX "Host address width %d\n",
- dmar->width + 1);
+ pr_info(PREFIX "Host address width %d\n", dmar->width + 1);

entry_header = (struct acpi_dmar_header *)(dmar + 1);
while (((unsigned long)entry_header) <
(((unsigned long)dmar) + dmar_tbl->length)) {
/* Avoid looping forever on bad ACPI tables */
if (entry_header->length == 0) {
- printk(KERN_WARNING PREFIX
- "Invalid 0-length structure\n");
+ pr_warn(PREFIX "Invalid 0-length structure\n");
ret = -EINVAL;
break;
}
@@ -369,8 +362,7 @@ parse_dmar_table(void)
#endif
break;
default:
- printk(KERN_WARNING PREFIX
- "Unknown DMAR structure type %d\n",
+ pr_warn(PREFIX "Unknown DMAR structure type %d\n",
entry_header->type);
ret = 0; /* for forward compatibility */
break;
@@ -469,12 +461,12 @@ int __init dmar_table_init(void)
ret = parse_dmar_table();
if (ret) {
if (ret != -ENODEV)
- printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
+ pr_info(PREFIX "parse DMAR table failure.\n");
return ret;
}

if (list_empty(&dmar_drhd_units)) {
- printk(KERN_INFO PREFIX "No DMAR devices found\n");
+ pr_info(PREFIX "No DMAR devices found\n");
return -ENODEV;
}

@@ -506,8 +498,7 @@ int __init check_zero_address(void)
(((unsigned long)dmar) + dmar_tbl->length)) {
/* Avoid looping forever on bad ACPI tables */
if (entry_header->length == 0) {
- printk(KERN_WARNING PREFIX
- "Invalid 0-length structure\n");
+ pr_warn(PREFIX "Invalid 0-length structure\n");
return 0;
}

@@ -558,8 +549,8 @@ int __init detect_intel_iommu(void)

if (ret && irq_remapping_enabled && cpu_has_x2apic &&
dmar->flags & 0x1)
- printk(KERN_INFO
- "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
+ pr_info("Queued invalidation will be enabled to "
+ "support x2apic and Intr-remapping.\n");

if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
iommu_detected = 1;
@@ -602,7 +593,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)

iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
if (!iommu->reg) {
- printk(KERN_ERR "IOMMU: can't map the region\n");
+ pr_err("IOMMU: can't map the region\n");
goto error;
}
iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
@@ -615,15 +606,13 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)

agaw = iommu_calculate_agaw(iommu);
if (agaw < 0) {
- printk(KERN_ERR
- "Cannot get a valid agaw for iommu (seq_id = %d)\n",
- iommu->seq_id);
+ pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
+ iommu->seq_id);
goto err_unmap;
}
msagaw = iommu_calculate_max_sagaw(iommu);
if (msagaw < 0) {
- printk(KERN_ERR
- "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
+ pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
iommu->seq_id);
goto err_unmap;
}
@@ -640,7 +629,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iounmap(iommu->reg);
iommu->reg = ioremap(drhd->reg_base_addr, map_size);
if (!iommu->reg) {
- printk(KERN_ERR "IOMMU: can't map the region\n");
+ pr_err("IOMMU: can't map the region\n");
goto error;
}
}
@@ -710,7 +699,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
if (fault & DMA_FSTS_IQE) {
head = readl(iommu->reg + DMAR_IQH_REG);
if ((head >> DMAR_IQ_SHIFT) == index) {
- printk(KERN_ERR "VT-d detected invalid descriptor: "
+ pr_err("VT-d detected invalid descriptor: "
"low=%llx, high=%llx\n",
(unsigned long long)qi->desc[index].low,
(unsigned long long)qi->desc[index].high);
@@ -1129,15 +1118,14 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
reason = dmar_get_fault_reason(fault_reason, &fault_type);

if (fault_type == INTR_REMAP)
- printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
+ pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
"fault index %llx\n"
"INTR-REMAP:[fault reason %02d] %s\n",
(source_id >> 8), PCI_SLOT(source_id & 0xFF),
PCI_FUNC(source_id & 0xFF), addr >> 48,
fault_reason, reason);
else
- printk(KERN_ERR
- "DMAR:[%s] Request device [%02x:%02x.%d] "
+ pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
"fault addr %llx \n"
"DMAR:[fault reason %02d] %s\n",
(type ? "DMA Read" : "DMA Write"),
@@ -1157,8 +1145,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
raw_spin_lock_irqsave(&iommu->register_lock, flag);
fault_status = readl(iommu->reg + DMAR_FSTS_REG);
if (fault_status)
- printk(KERN_ERR "DRHD: handling fault status reg %x\n",
- fault_status);
+ pr_err("DRHD: handling fault status reg %x\n", fault_status);

/* TBD: ignore advanced fault log currently */
if (!(fault_status & DMA_FSTS_PPF))
@@ -1224,7 +1211,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)

irq = create_irq();
if (!irq) {
- printk(KERN_ERR "IOMMU: no free vectors\n");
+ pr_err("IOMMU: no free vectors\n");
return -EINVAL;
}

@@ -1241,7 +1228,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)

ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
if (ret)
- printk(KERN_ERR "IOMMU: can't request irq\n");
+ pr_err("IOMMU: can't request irq\n");
return ret;
}

@@ -1258,7 +1245,7 @@ int __init enable_drhd_fault_handling(void)
ret = dmar_set_interrupt(iommu);

if (ret) {
- printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
+ pr_err("DRHD %Lx: failed to enable fault, "
" interrupt, ret %d\n",
(unsigned long long)drhd->reg_base_addr, ret);
return -1;
--
1.7.10.2.552.gaa3bb87

2012-06-04 21:29:35

by Donald Dutile

[permalink] [raw]
Subject: [PATCH 2/2] iommu: dmar -- reserve mmio space used by IOMMU

Intel-iommu initialization doesn't currently reserve the memory used
for the IOMMU registers. This can allow the pci resource allocator
to assign a device BAR to the same address as the IOMMU registers.
This can cause some not so nice side affects when the driver
ioremap's that region.

Introduced two helper functions to map & unmap the IOMMU registers
as well as simplify the init and exit paths.

Signed-off-by: Donald Dutile <[email protected]>
---
drivers/iommu/dmar.c | 111 +++++++++++++++++++++++++++++++++-----------
include/linux/intel-iommu.h | 2 +
2 files changed, 86 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 1e5a10d..9ab6ebf 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -570,14 +570,89 @@ int __init detect_intel_iommu(void)
}


+static void unmap_iommu(struct intel_iommu *iommu)
+{
+ iounmap(iommu->reg);
+ release_mem_region(iommu->reg_phys, iommu->reg_size);
+}
+
+/**
+ * map_iommu: map the iommu's registers
+ * @iommu: the iommu to map
+ * @phys_addr: the physical address of the base resgister
+ *
+ * Memory map the iommu's registers. Start w/ a single page, and
+ * possibly expand if that turns out to be insufficent.
+ */
+static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
+{
+ int map_size, err=0;
+
+ iommu->reg_phys = phys_addr;
+ iommu->reg_size = VTD_PAGE_SIZE;
+
+ if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
+ pr_err("IOMMU: can't reserve memory\n");
+ err = -EBUSY;
+ goto out;
+ }
+
+ iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
+ if (!iommu->reg) {
+ pr_err("IOMMU: can't map the region\n");
+ err = -ENOMEM;
+ goto release;
+ }
+
+ iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
+ iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+
+ if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
+ err = -EINVAL;
+ warn_invalid_dmar(phys_addr, " returns all ones");
+ goto unmap;
+ }
+
+ /* the registers might be more than one page */
+ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
+ cap_max_fault_reg_offset(iommu->cap));
+ map_size = VTD_PAGE_ALIGN(map_size);
+ if (map_size > iommu->reg_size) {
+ iounmap(iommu->reg);
+ release_mem_region(iommu->reg_phys, iommu->reg_size);
+ iommu->reg_size = map_size;
+ if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
+ iommu->name)) {
+ pr_err("IOMMU: can't reserve memory\n");
+ err = -EBUSY;
+ goto out;
+ }
+ iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
+ if (!iommu->reg) {
+ pr_err("IOMMU: can't map the region\n");
+ err = -ENOMEM;
+ goto release;
+ }
+ }
+ err = 0;
+ goto out;
+
+unmap:
+ iounmap(iommu->reg);
+release:
+ release_mem_region(iommu->reg_phys, iommu->reg_size);
+out:
+ return err;
+}
+
int alloc_iommu(struct dmar_drhd_unit *drhd)
{
struct intel_iommu *iommu;
- int map_size;
u32 ver;
static int iommu_allocated = 0;
int agaw = 0;
int msagaw = 0;
+ int err;

if (!drhd->reg_base_addr) {
warn_invalid_dmar(0, "");
@@ -591,19 +666,13 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->seq_id = iommu_allocated++;
sprintf (iommu->name, "dmar%d", iommu->seq_id);

- iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
- if (!iommu->reg) {
- pr_err("IOMMU: can't map the region\n");
+ err = map_iommu(iommu, drhd->reg_base_addr);
+ if (err) {
+ pr_err("IOMMU: failed to map %s\n", iommu->name);
goto error;
}
- iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
- iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
-
- if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
- warn_invalid_dmar(drhd->reg_base_addr, " returns all ones");
- goto err_unmap;
- }

+ err = -EINVAL;
agaw = iommu_calculate_agaw(iommu);
if (agaw < 0) {
pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
@@ -621,19 +690,6 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)

iommu->node = -1;

- /* the registers might be more than one page */
- map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
- cap_max_fault_reg_offset(iommu->cap));
- map_size = VTD_PAGE_ALIGN(map_size);
- if (map_size > VTD_PAGE_SIZE) {
- iounmap(iommu->reg);
- iommu->reg = ioremap(drhd->reg_base_addr, map_size);
- if (!iommu->reg) {
- pr_err("IOMMU: can't map the region\n");
- goto error;
- }
- }
-
ver = readl(iommu->reg + DMAR_VER_REG);
pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
iommu->seq_id,
@@ -648,10 +704,10 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
return 0;

err_unmap:
- iounmap(iommu->reg);
+ unmap_iommu(iommu);
error:
kfree(iommu);
- return -1;
+ return err;
}

void free_iommu(struct intel_iommu *iommu)
@@ -662,7 +718,8 @@ void free_iommu(struct intel_iommu *iommu)
free_dmar_iommu(iommu);

if (iommu->reg)
- iounmap(iommu->reg);
+ unmap_iommu(iommu);
+
kfree(iommu);
}

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e6ca56d..78e2ada 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -308,6 +308,8 @@ enum {

struct intel_iommu {
void __iomem *reg; /* Pointer to hardware regs, virtual addr */
+ u64 reg_phys; /* physical address of hw register set */
+ u64 reg_size; /* size of hw register set */
u64 cap;
u64 ecap;
u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
--
1.7.10.2.552.gaa3bb87

2012-06-04 22:15:40

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu: dmar: replace printks with appropriate pr_*()

On Mon, 2012-06-04 at 17:29 -0400, Donald Dutile wrote:
> Replace printk(KERN_* with pr_*() functions.

Please add
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
before any include and remove the embedded PREFIX
from each printk

> diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c

[]

> break;
> }
> pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
> if (!pdev) {
> - printk(KERN_WARNING PREFIX
> - "Device scope device [%04x:%02x:%02x.%02x] not found\n",
> + pr_warn(PREFIX "Device scope device"
> + "[%04x:%02x:%02x.%02x] not found\n",
> segment, bus->number, path->dev, path->fn);

Please don't split any format string. You removed
a space between the scope device and an open bracket.
It's OK for format strings to exceed 80 chars.

2012-06-04 22:28:27

by Donald Dutile

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu: dmar: replace printks with appropriate pr_*()

On 06/04/2012 06:15 PM, Joe Perches wrote:
> On Mon, 2012-06-04 at 17:29 -0400, Donald Dutile wrote:
>> Replace printk(KERN_* with pr_*() functions.
>
> Please add
> #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> before any include and remove the embedded PREFIX
> from each printk
>
>> diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
>
> []
>
>> break;
>> }
>> pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
>> if (!pdev) {
>> - printk(KERN_WARNING PREFIX
>> - "Device scope device [%04x:%02x:%02x.%02x] not found\n",
>> + pr_warn(PREFIX "Device scope device"
>> + "[%04x:%02x:%02x.%02x] not found\n",
>> segment, bus->number, path->dev, path->fn);
>
> Please don't split any format string. You removed
> a space between the scope device and an open bracket.
> It's OK for format strings to exceed 80 chars.
>
>
Joe,
Thanks for the feeback. I'll incorporate the changes once
others have a chance to review & feedback as well.

- Don

2012-06-04 22:37:17

by David Woodhouse

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu: dmar -- reserve mmio space used by IOMMU

On Mon, 2012-06-04 at 17:29 -0400, Donald Dutile wrote:
> Intel-iommu initialization doesn't currently reserve the memory used
> for the IOMMU registers. This can allow the pci resource allocator
> to assign a device BAR to the same address as the IOMMU registers.
> This can cause some not so nice side affects when the driver
> ioremap's that region.

s/affect/effect/

And surely this can happen even when IOMMU support is compiled out of
the kernel. Shouldn't the BIOS be *telling* us that this region is
unavailable for PCI resource allocation (or anything else, for that
matter)?

If the BIOS *doesn't* do that, then I believe this should be
WARN_TAINT_ONCE(…TAINT_FIRMWARE_WORKAROUND…) like other BIOS problems
that we have discovered.

And we should probably do it based on the actual chipset registers, not
the DMAR tables (which the BIOS has also been known to lie about).

--
dwmw2


Attachments:
smime.p7s (6.03 kB)

2012-06-04 23:09:31

by Donald Dutile

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu: dmar -- reserve mmio space used by IOMMU

On 06/04/2012 06:37 PM, David Woodhouse wrote:
> On Mon, 2012-06-04 at 17:29 -0400, Donald Dutile wrote:
>> Intel-iommu initialization doesn't currently reserve the memory used
>> for the IOMMU registers. This can allow the pci resource allocator
>> to assign a device BAR to the same address as the IOMMU registers.
>> This can cause some not so nice side affects when the driver
>> ioremap's that region.
>
> s/affect/effect/
>
ok.

> And surely this can happen even when IOMMU support is compiled out of
> the kernel. Shouldn't the BIOS be *telling* us that this region is
> unavailable for PCI resource allocation (or anything else, for that
> matter)?
>
good point....

> If the BIOS *doesn't* do that, then I believe this should be
> WARN_TAINT_ONCE(…TAINT_FIRMWARE_WORKAROUND…) like other BIOS problems
> that we have discovered.
>
well, one could argue it may be easier to claim the space reserved in
the OS then making yet another hole in the available IO address space
in the ACPI tables. Although I think the workarounds more systems
implement is to stick the IOMMUs into an existing hole to avoid this problem.

> And we should probably do it based on the actual chipset registers, not
> the DMAR tables (which the BIOS has also been known to lie about).
>
but.... the DMAR tables are the source of all information.... wise and ... er, um... ;-)
yes, I've been on the receiving side of more bz's wrt bad DMAR tables then I can count,
but...

How does the kernel probe for chipsets, then registers with the chipsets
to find the programmed IOMMU BAR values?
-- I missed that class.... I only have Intel Virt Tech Directed I/O
Architecture spec., and the beginning of IOMMU is based on DMAR tables...
If you have more info/guidance, I'd appreciate it.

Seems like the patch would be easier to support, although it doesn't
solve the problem you mentioned above, unless the reservation code isn't
compiled out by INTEL-IOMMU (but something more general like !(x86 && PCI)).
the firmware taint message would be informative as to the quality of
the firmware, but my experience is nothing changes unless it's critical
to a system shipping.

IMO, if we can avoid a BIOS problem, we should.
The empirical data I've gathered so far in this space
(IOMMU, use by SRIOV VF devices), shows the BIOS has numerous
weaknesses, and this is yet another one. The BIOS's are getting better,
but I've seen turtles run faster... ;-) .

2012-06-04 23:23:24

by David Woodhouse

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu: dmar -- reserve mmio space used by IOMMU

On Mon, 2012-06-04 at 19:09 -0400, Don Dutile wrote:
> > If the BIOS *doesn't* do that, then I believe this should be
> > WARN_TAINT_ONCE(…TAINT_FIRMWARE_WORKAROUND…) like other BIOS problems
> > that we have discovered.
> >
> well, one could argue it may be easier to claim the space reserved in
> the OS then making yet another hole in the available IO address space
> in the ACPI tables.

But how? It's got to work with operating systems that predate the IOMMU.
The registers *have* to be in a marked hole. If *not*, then we should
give a clear "YOUR BIOS IS BROKEN" output like all the similar
breakages, and do our best to work around it.

Working around it is fine; I'm not suggesting that we should WARN()
*instead* of working around it.

> How does the kernel probe for chipsets, then registers with the chipsets
> to find the programmed IOMMU BAR values?
> -- I missed that class.... I only have Intel Virt Tech Directed I/O
> Architecture spec., and the beginning of IOMMU is based on DMAR tables...
> If you have more info/guidance, I'd appreciate it.

Hm, I thought we'd already started doing some of that in order to
sanity-check the DMAR tables. The VTBAR registers are in PCI config
space. The quirk_ioat_snb_local_iommu() check is already looking at
them...

I'm not quite sure which document they are documented in. Doing it based
on the DMAR table, as you have, is certainly a good start. But do it
with a bigger shouty WARN(TAINT_FIRMWARE_WORKAROUND), and do it when the
IOMMU code isn't compiled in.

> Seems like the patch would be easier to support, although it doesn't
> solve the problem you mentioned above, unless the reservation code isn't
> compiled out by INTEL-IOMMU (but something more general like !(x86 && PCI)).
> the firmware taint message would be informative as to the quality of
> the firmware, but my experience is nothing changes unless it's critical
> to a system shipping.

> The BIOS's are getting better, but I've seen turtles run faster... ;-) .

Thankfully, there are now some modern Intel systems on which you can run
Coreboot. This should be a huge benefit — you should be able to build an
up-to-date Tianocore and deploy it as your Coreboot payload, rather than
having to put up with the crap that's on the system when you receive it.


--
dwmw2


Attachments:
smime.p7s (6.03 kB)

2012-06-04 23:53:11

by Donald Dutile

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu: dmar -- reserve mmio space used by IOMMU

On 06/04/2012 07:23 PM, David Woodhouse wrote:
> On Mon, 2012-06-04 at 19:09 -0400, Don Dutile wrote:
>>> If the BIOS *doesn't* do that, then I believe this should be
>>> WARN_TAINT_ONCE(…TAINT_FIRMWARE_WORKAROUND…) like other BIOS problems
>>> that we have discovered.
>>>
>> well, one could argue it may be easier to claim the space reserved in
>> the OS then making yet another hole in the available IO address space
>> in the ACPI tables.
>
> But how? It's got to work with operating systems that predate the IOMMU.
> The registers *have* to be in a marked hole. If *not*, then we should
> give a clear "YOUR BIOS IS BROKEN" output like all the similar
> breakages, and do our best to work around it.
>
> Working around it is fine; I'm not suggesting that we should WARN()
> *instead* of working around it.
>
ok.

>> How does the kernel probe for chipsets, then registers with the chipsets
>> to find the programmed IOMMU BAR values?
>> -- I missed that class.... I only have Intel Virt Tech Directed I/O
>> Architecture spec., and the beginning of IOMMU is based on DMAR tables...
>> If you have more info/guidance, I'd appreciate it.
>
> Hm, I thought we'd already started doing some of that in order to
> sanity-check the DMAR tables. The VTBAR registers are in PCI config
> space. The quirk_ioat_snb_local_iommu() check is already looking at
> them...
>
except that quirk is conditionally compiled in intel-iommu.c;
to do the check indep of INTEL-IOMMU CONFIG tag, it'd have to move into
pci/quirks.c. ... and how does it get triggered? ... a dmar table check?
(typical quirks kicked based on vid/did...)

> I'm not quite sure which document they are documented in. Doing it based
> on the DMAR table, as you have, is certainly a good start. But do it
> with a bigger shouty WARN(TAINT_FIRMWARE_WORKAROUND), and do it when the
> IOMMU code isn't compiled in.
>
The 'do it for intel-iommu systems only' and not be CONFIG dependent
is a bit challenging given how the code is compiled, and the expected/normal
code flow starting from a dmar table.
of course, if the IOMMU just exposed itself as the first device on a PCI
bus, this would be trivial!
-- I really hate BIOS dependencies to get things right!

>> Seems like the patch would be easier to support, although it doesn't
>> solve the problem you mentioned above, unless the reservation code isn't
>> compiled out by INTEL-IOMMU (but something more general like !(x86&& PCI)).
>> the firmware taint message would be informative as to the quality of
>> the firmware, but my experience is nothing changes unless it's critical
>> to a system shipping.
>
>> The BIOS's are getting better, but I've seen turtles run faster... ;-) .
>
> Thankfully, there are now some modern Intel systems on which you can run
> Coreboot. This should be a huge benefit — you should be able to build an
> up-to-date Tianocore and deploy it as your Coreboot payload, rather than
> having to put up with the crap that's on the system when you receive it.
>
>
except, most system (hw, os, applic) certifications are based on vendor's
shipped BIOS, so Coreboot isn't a guarantee either. Additionally, telling
a customer to replace their paid-for-BIOS for a build-your-own-coreboot bios
is a tough way to close a bz. ;-)

2012-06-06 08:16:13

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu: dmar -- reserve mmio space used by IOMMU


* David Woodhouse <[email protected]> wrote:

> > well, one could argue it may be easier to claim the space
> > reserved in the OS then making yet another hole in the
> > available IO address space in the ACPI tables.
>
> But how? It's got to work with operating systems that predate
> the IOMMU. The registers *have* to be in a marked hole. If
> *not*, then we should give a clear "YOUR BIOS IS BROKEN"
> output like all the similar breakages, and do our best to work
> around it.
>
> Working around it is fine; I'm not suggesting that we should
> WARN() *instead* of working around it.

So basically the patch-set is fine as-is, we just want a
sufficiently nasty sounding warning message about the BIOS bug,
with actionable output, something like:

... the kernel is sad because a buggy BIOS has not
marked IOMMU area xxx-yyy as reserved, working
it around. You get to keep all the pieces and
be more careful with remaining eye.

(or a functional equivalent thereof.)

this patch could be added as a third patch in the series, right?

> > The BIOS's are getting better, but I've seen turtles run
> > faster... ;-) .
>
> Thankfully, there are now some modern Intel systems on which
> you can run Coreboot. This should be a huge benefit — you
> should be able to build an up-to-date Tianocore and deploy it
> as your Coreboot payload, rather than having to put up with
> the crap that's on the system when you receive it.

If we could integrate Coreboot into the kernel and could build a
bzImage that one could write into the BIOS flash image and thus
have an updated and functional BIOS, that would be awesome.

People could actually write systems with proper 'every pixel is
perfect' boot output and low latency behavior from the moment
power is turned on to the first desktop GUI frame and such.

(With some fail safe mechanism for bugs.)

And ponies.

Thanks,

Ingo

2012-06-06 08:26:36

by David Woodhouse

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu: dmar -- reserve mmio space used by IOMMU

On Wed, 2012-06-06 at 10:16 +0200, Ingo Molnar wrote:
> So basically the patch-set is fine as-is, we just want a
> sufficiently nasty sounding warning message about the BIOS bug,

No. The other change that's required is that the warning and the
workaround need to trigger even when IOMMU support isn't built into the
kernel. This BIOS bug can bite you even when you aren't *using* the
IOMMU.

> If we could integrate Coreboot into the kernel and could build a
> bzImage that one could write into the BIOS flash image and thus
> have an updated and functional BIOS, that would be awesome.

People *have* used the kernel as a Coreboot payload, but it's not really
the best solution. For anything but deeply embedded devices, you
probably do want a real firmware of some kind (like Tianocore).

The point in having hardware which is fully supported by Coreboot is
that it gives you the choice. You *can* do the above, or more sensibly
you can use it to deploy an up-to-date and working Tianocore without the
type of bugs that we're discussing here. Or at least without the ones
which offend *you*.

--
dwmw2


Attachments:
smime.p7s (6.03 kB)

2012-06-06 08:29:15

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu: dmar -- reserve mmio space used by IOMMU


* David Woodhouse <[email protected]> wrote:

> On Wed, 2012-06-06 at 10:16 +0200, Ingo Molnar wrote:
>
> > So basically the patch-set is fine as-is, we just want a
> > sufficiently nasty sounding warning message about the BIOS
> > bug,
>
> No. The other change that's required is that the warning and
> the workaround need to trigger even when IOMMU support isn't
> built into the kernel. This BIOS bug can bite you even when
> you aren't *using* the IOMMU.

Right, but that's the status quo and most of the time (distro
kernels) the driver is available, right?

So for all practical purposes we get 99% of the warning power
without going into chicken-and-egg problems like how do we
determine that there's an IOMMU area there if there's no IOMMU
aware code in the kernel, right?

Thanks,

Ingo

2012-06-06 08:45:37

by David Woodhouse

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu: dmar -- reserve mmio space used by IOMMU

On Wed, 2012-06-06 at 10:29 +0200, Ingo Molnar wrote:
> So for all practical purposes we get 99% of the warning power
> without going into chicken-and-egg problems like how do we
> determine that there's an IOMMU area there if there's no IOMMU
> aware code in the kernel, right?

That's not a chicken-and-egg problem. It's hardly difficult to see the
DMAR ACPI table and take a *basic* look at the DRHD entries. You don't
even need to fully parse it.

Doing it even without *ACPI* in your kernel would be a different
matter :)

--
dwmw2


Attachments:
smime.p7s (6.03 kB)

2012-06-06 23:56:40

by Chris Wright

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu: dmar -- reserve mmio space used by IOMMU

* David Woodhouse ([email protected]) wrote:
> On Wed, 2012-06-06 at 10:29 +0200, Ingo Molnar wrote:
> > So for all practical purposes we get 99% of the warning power
> > without going into chicken-and-egg problems like how do we
> > determine that there's an IOMMU area there if there's no IOMMU
> > aware code in the kernel, right?
>
> That's not a chicken-and-egg problem. It's hardly difficult to see the
> DMAR ACPI table and take a *basic* look at the DRHD entries. You don't
> even need to fully parse it.

I had the same impression as Ingo. Patch is basically good as is (fixing
most of the problem), and could be augmented later to close the final gap
(when no IOMMU code). While parsing the DMAR table is trivial, what are
you recommending to use to inform if the region is not properly reserved?

thanks,
-chris

2012-06-06 23:58:38

by Chris Wright

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu: dmar -- reserve mmio space used by IOMMU

* David Woodhouse ([email protected]) wrote:
> On Mon, 2012-06-04 at 19:09 -0400, Don Dutile wrote:
> > How does the kernel probe for chipsets, then registers with the chipsets
> > to find the programmed IOMMU BAR values?
> > -- I missed that class.... I only have Intel Virt Tech Directed I/O
> > Architecture spec., and the beginning of IOMMU is based on DMAR tables...
> > If you have more info/guidance, I'd appreciate it.
>
> Hm, I thought we'd already started doing some of that in order to
> sanity-check the DMAR tables. The VTBAR registers are in PCI config
> space. The quirk_ioat_snb_local_iommu() check is already looking at
> them...

I always had the impression that things like VTBAR registers are
entirely chipset specific (and may not be there for all chipsets).
Would be great if that's incorrect. Any idea?

thanks,
-chris

2012-06-08 14:55:42

by Donald Dutile

[permalink] [raw]
Subject: [tip:core/iommu] iommu/dmar: Replace printks with appropriate pr_* ()

Commit-ID: bf947fcb77ff858f223c49c76e2d130095fa2585
Gitweb: http://git.kernel.org/tip/bf947fcb77ff858f223c49c76e2d130095fa2585
Author: Donald Dutile <[email protected]>
AuthorDate: Mon, 4 Jun 2012 17:29:01 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 8 Jun 2012 12:14:57 +0200

iommu/dmar: Replace printks with appropriate pr_*()

Just some cleanup so next patch can keep the info printing
the same way throughout the file.

Replace printk(KERN_* with pr_*() functions.

Signed-off-by: Donald Dutile <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
drivers/iommu/dmar.c | 83 +++++++++++++++++++++-----------------------------
1 files changed, 35 insertions(+), 48 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 3a74e44..1e5a10d 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -83,15 +83,14 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
* ignore it
*/
if (!bus) {
- printk(KERN_WARNING
- PREFIX "Device scope bus [%d] not found\n",
- scope->bus);
+ pr_warn(PREFIX "Device scope bus [%d] not found\n",
+ scope->bus);
break;
}
pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
if (!pdev) {
- printk(KERN_WARNING PREFIX
- "Device scope device [%04x:%02x:%02x.%02x] not found\n",
+ pr_warn(PREFIX "Device scope device"
+ "[%04x:%02x:%02x.%02x] not found\n",
segment, bus->number, path->dev, path->fn);
break;
}
@@ -100,9 +99,9 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
bus = pdev->subordinate;
}
if (!pdev) {
- printk(KERN_WARNING PREFIX
- "Device scope device [%04x:%02x:%02x.%02x] not found\n",
- segment, scope->bus, path->dev, path->fn);
+ pr_warn(PREFIX
+ "Device scope device [%04x:%02x:%02x.%02x] not found\n",
+ segment, scope->bus, path->dev, path->fn);
*dev = NULL;
return 0;
}
@@ -110,8 +109,7 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
pdev->subordinate) || (scope->entry_type == \
ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
pci_dev_put(pdev);
- printk(KERN_WARNING PREFIX
- "Device scope type does not match for %s\n",
+ pr_warn(PREFIX "Device scope type does not match for %s\n",
pci_name(pdev));
return -EINVAL;
}
@@ -134,8 +132,7 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
(*cnt)++;
else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
- printk(KERN_WARNING PREFIX
- "Unsupported device scope\n");
+ pr_warn(PREFIX "Unsupported device scope\n");
}
start += scope->length;
}
@@ -261,25 +258,23 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
case ACPI_DMAR_TYPE_HARDWARE_UNIT:
drhd = container_of(header, struct acpi_dmar_hardware_unit,
header);
- printk (KERN_INFO PREFIX
- "DRHD base: %#016Lx flags: %#x\n",
+ pr_info(PREFIX "DRHD base: %#016Lx flags: %#x\n",
(unsigned long long)drhd->address, drhd->flags);
break;
case ACPI_DMAR_TYPE_RESERVED_MEMORY:
rmrr = container_of(header, struct acpi_dmar_reserved_memory,
header);
- printk (KERN_INFO PREFIX
- "RMRR base: %#016Lx end: %#016Lx\n",
+ pr_info(PREFIX "RMRR base: %#016Lx end: %#016Lx\n",
(unsigned long long)rmrr->base_address,
(unsigned long long)rmrr->end_address);
break;
case ACPI_DMAR_TYPE_ATSR:
atsr = container_of(header, struct acpi_dmar_atsr, header);
- printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
+ pr_info(PREFIX "ATSR flags: %#x\n", atsr->flags);
break;
case ACPI_DMAR_HARDWARE_AFFINITY:
rhsa = container_of(header, struct acpi_dmar_rhsa, header);
- printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
+ pr_info(PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
(unsigned long long)rhsa->base_address,
rhsa->proximity_domain);
break;
@@ -299,7 +294,7 @@ static int __init dmar_table_detect(void)
&dmar_tbl_size);

if (ACPI_SUCCESS(status) && !dmar_tbl) {
- printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
+ pr_warn(PREFIX "Unable to map DMAR\n");
status = AE_NOT_FOUND;
}

@@ -333,20 +328,18 @@ parse_dmar_table(void)
return -ENODEV;

if (dmar->width < PAGE_SHIFT - 1) {
- printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
+ pr_warn(PREFIX "Invalid DMAR haw\n");
return -EINVAL;
}

- printk (KERN_INFO PREFIX "Host address width %d\n",
- dmar->width + 1);
+ pr_info(PREFIX "Host address width %d\n", dmar->width + 1);

entry_header = (struct acpi_dmar_header *)(dmar + 1);
while (((unsigned long)entry_header) <
(((unsigned long)dmar) + dmar_tbl->length)) {
/* Avoid looping forever on bad ACPI tables */
if (entry_header->length == 0) {
- printk(KERN_WARNING PREFIX
- "Invalid 0-length structure\n");
+ pr_warn(PREFIX "Invalid 0-length structure\n");
ret = -EINVAL;
break;
}
@@ -369,8 +362,7 @@ parse_dmar_table(void)
#endif
break;
default:
- printk(KERN_WARNING PREFIX
- "Unknown DMAR structure type %d\n",
+ pr_warn(PREFIX "Unknown DMAR structure type %d\n",
entry_header->type);
ret = 0; /* for forward compatibility */
break;
@@ -469,12 +461,12 @@ int __init dmar_table_init(void)
ret = parse_dmar_table();
if (ret) {
if (ret != -ENODEV)
- printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
+ pr_info(PREFIX "parse DMAR table failure.\n");
return ret;
}

if (list_empty(&dmar_drhd_units)) {
- printk(KERN_INFO PREFIX "No DMAR devices found\n");
+ pr_info(PREFIX "No DMAR devices found\n");
return -ENODEV;
}

@@ -506,8 +498,7 @@ int __init check_zero_address(void)
(((unsigned long)dmar) + dmar_tbl->length)) {
/* Avoid looping forever on bad ACPI tables */
if (entry_header->length == 0) {
- printk(KERN_WARNING PREFIX
- "Invalid 0-length structure\n");
+ pr_warn(PREFIX "Invalid 0-length structure\n");
return 0;
}

@@ -558,8 +549,8 @@ int __init detect_intel_iommu(void)

if (ret && irq_remapping_enabled && cpu_has_x2apic &&
dmar->flags & 0x1)
- printk(KERN_INFO
- "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
+ pr_info("Queued invalidation will be enabled to "
+ "support x2apic and Intr-remapping.\n");

if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
iommu_detected = 1;
@@ -602,7 +593,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)

iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
if (!iommu->reg) {
- printk(KERN_ERR "IOMMU: can't map the region\n");
+ pr_err("IOMMU: can't map the region\n");
goto error;
}
iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
@@ -615,15 +606,13 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)

agaw = iommu_calculate_agaw(iommu);
if (agaw < 0) {
- printk(KERN_ERR
- "Cannot get a valid agaw for iommu (seq_id = %d)\n",
- iommu->seq_id);
+ pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
+ iommu->seq_id);
goto err_unmap;
}
msagaw = iommu_calculate_max_sagaw(iommu);
if (msagaw < 0) {
- printk(KERN_ERR
- "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
+ pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
iommu->seq_id);
goto err_unmap;
}
@@ -640,7 +629,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iounmap(iommu->reg);
iommu->reg = ioremap(drhd->reg_base_addr, map_size);
if (!iommu->reg) {
- printk(KERN_ERR "IOMMU: can't map the region\n");
+ pr_err("IOMMU: can't map the region\n");
goto error;
}
}
@@ -710,7 +699,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
if (fault & DMA_FSTS_IQE) {
head = readl(iommu->reg + DMAR_IQH_REG);
if ((head >> DMAR_IQ_SHIFT) == index) {
- printk(KERN_ERR "VT-d detected invalid descriptor: "
+ pr_err("VT-d detected invalid descriptor: "
"low=%llx, high=%llx\n",
(unsigned long long)qi->desc[index].low,
(unsigned long long)qi->desc[index].high);
@@ -1129,15 +1118,14 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
reason = dmar_get_fault_reason(fault_reason, &fault_type);

if (fault_type == INTR_REMAP)
- printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
+ pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
"fault index %llx\n"
"INTR-REMAP:[fault reason %02d] %s\n",
(source_id >> 8), PCI_SLOT(source_id & 0xFF),
PCI_FUNC(source_id & 0xFF), addr >> 48,
fault_reason, reason);
else
- printk(KERN_ERR
- "DMAR:[%s] Request device [%02x:%02x.%d] "
+ pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
"fault addr %llx \n"
"DMAR:[fault reason %02d] %s\n",
(type ? "DMA Read" : "DMA Write"),
@@ -1157,8 +1145,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
raw_spin_lock_irqsave(&iommu->register_lock, flag);
fault_status = readl(iommu->reg + DMAR_FSTS_REG);
if (fault_status)
- printk(KERN_ERR "DRHD: handling fault status reg %x\n",
- fault_status);
+ pr_err("DRHD: handling fault status reg %x\n", fault_status);

/* TBD: ignore advanced fault log currently */
if (!(fault_status & DMA_FSTS_PPF))
@@ -1224,7 +1211,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)

irq = create_irq();
if (!irq) {
- printk(KERN_ERR "IOMMU: no free vectors\n");
+ pr_err("IOMMU: no free vectors\n");
return -EINVAL;
}

@@ -1241,7 +1228,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)

ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
if (ret)
- printk(KERN_ERR "IOMMU: can't request irq\n");
+ pr_err("IOMMU: can't request irq\n");
return ret;
}

@@ -1258,7 +1245,7 @@ int __init enable_drhd_fault_handling(void)
ret = dmar_set_interrupt(iommu);

if (ret) {
- printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
+ pr_err("DRHD %Lx: failed to enable fault, "
" interrupt, ret %d\n",
(unsigned long long)drhd->reg_base_addr, ret);
return -1;

2012-06-08 14:56:30

by Donald Dutile

[permalink] [raw]
Subject: [tip:core/iommu] iommu/dmar: Reserve mmio space used by the IOMMU, if the BIOS forgets to

Commit-ID: 6f5cf52114dd87f9ed091678f7dfc8ff21bbe2b3
Gitweb: http://git.kernel.org/tip/6f5cf52114dd87f9ed091678f7dfc8ff21bbe2b3
Author: Donald Dutile <[email protected]>
AuthorDate: Mon, 4 Jun 2012 17:29:02 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 8 Jun 2012 12:15:43 +0200

iommu/dmar: Reserve mmio space used by the IOMMU, if the BIOS forgets to

Intel-iommu initialization doesn't currently reserve the memory
used for the IOMMU registers. This can allow the pci resource
allocator to assign a device BAR to the same address as the
IOMMU registers. This can cause some not so nice side affects
when the driver ioremap's that region.

Introduced two helper functions to map & unmap the IOMMU
registers as well as simplify the init and exit paths.

Signed-off-by: Donald Dutile <[email protected]>
Acked-by: Chris Wright <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
drivers/iommu/dmar.c | 111 ++++++++++++++++++++++++++++++++----------
include/linux/intel-iommu.h | 2 +
2 files changed, 86 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 1e5a10d..9ab6ebf 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -570,14 +570,89 @@ int __init detect_intel_iommu(void)
}


+static void unmap_iommu(struct intel_iommu *iommu)
+{
+ iounmap(iommu->reg);
+ release_mem_region(iommu->reg_phys, iommu->reg_size);
+}
+
+/**
+ * map_iommu: map the iommu's registers
+ * @iommu: the iommu to map
+ * @phys_addr: the physical address of the base resgister
+ *
+ * Memory map the iommu's registers. Start w/ a single page, and
+ * possibly expand if that turns out to be insufficent.
+ */
+static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
+{
+ int map_size, err=0;
+
+ iommu->reg_phys = phys_addr;
+ iommu->reg_size = VTD_PAGE_SIZE;
+
+ if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
+ pr_err("IOMMU: can't reserve memory\n");
+ err = -EBUSY;
+ goto out;
+ }
+
+ iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
+ if (!iommu->reg) {
+ pr_err("IOMMU: can't map the region\n");
+ err = -ENOMEM;
+ goto release;
+ }
+
+ iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
+ iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+
+ if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
+ err = -EINVAL;
+ warn_invalid_dmar(phys_addr, " returns all ones");
+ goto unmap;
+ }
+
+ /* the registers might be more than one page */
+ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
+ cap_max_fault_reg_offset(iommu->cap));
+ map_size = VTD_PAGE_ALIGN(map_size);
+ if (map_size > iommu->reg_size) {
+ iounmap(iommu->reg);
+ release_mem_region(iommu->reg_phys, iommu->reg_size);
+ iommu->reg_size = map_size;
+ if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
+ iommu->name)) {
+ pr_err("IOMMU: can't reserve memory\n");
+ err = -EBUSY;
+ goto out;
+ }
+ iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
+ if (!iommu->reg) {
+ pr_err("IOMMU: can't map the region\n");
+ err = -ENOMEM;
+ goto release;
+ }
+ }
+ err = 0;
+ goto out;
+
+unmap:
+ iounmap(iommu->reg);
+release:
+ release_mem_region(iommu->reg_phys, iommu->reg_size);
+out:
+ return err;
+}
+
int alloc_iommu(struct dmar_drhd_unit *drhd)
{
struct intel_iommu *iommu;
- int map_size;
u32 ver;
static int iommu_allocated = 0;
int agaw = 0;
int msagaw = 0;
+ int err;

if (!drhd->reg_base_addr) {
warn_invalid_dmar(0, "");
@@ -591,19 +666,13 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->seq_id = iommu_allocated++;
sprintf (iommu->name, "dmar%d", iommu->seq_id);

- iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
- if (!iommu->reg) {
- pr_err("IOMMU: can't map the region\n");
+ err = map_iommu(iommu, drhd->reg_base_addr);
+ if (err) {
+ pr_err("IOMMU: failed to map %s\n", iommu->name);
goto error;
}
- iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
- iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
-
- if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
- warn_invalid_dmar(drhd->reg_base_addr, " returns all ones");
- goto err_unmap;
- }

+ err = -EINVAL;
agaw = iommu_calculate_agaw(iommu);
if (agaw < 0) {
pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
@@ -621,19 +690,6 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)

iommu->node = -1;

- /* the registers might be more than one page */
- map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
- cap_max_fault_reg_offset(iommu->cap));
- map_size = VTD_PAGE_ALIGN(map_size);
- if (map_size > VTD_PAGE_SIZE) {
- iounmap(iommu->reg);
- iommu->reg = ioremap(drhd->reg_base_addr, map_size);
- if (!iommu->reg) {
- pr_err("IOMMU: can't map the region\n");
- goto error;
- }
- }
-
ver = readl(iommu->reg + DMAR_VER_REG);
pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
iommu->seq_id,
@@ -648,10 +704,10 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
return 0;

err_unmap:
- iounmap(iommu->reg);
+ unmap_iommu(iommu);
error:
kfree(iommu);
- return -1;
+ return err;
}

void free_iommu(struct intel_iommu *iommu)
@@ -662,7 +718,8 @@ void free_iommu(struct intel_iommu *iommu)
free_dmar_iommu(iommu);

if (iommu->reg)
- iounmap(iommu->reg);
+ unmap_iommu(iommu);
+
kfree(iommu);
}

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e6ca56d..78e2ada 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -308,6 +308,8 @@ enum {

struct intel_iommu {
void __iomem *reg; /* Pointer to hardware regs, virtual addr */
+ u64 reg_phys; /* physical address of hw register set */
+ u64 reg_size; /* size of hw register set */
u64 cap;
u64 ecap;
u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */