2008-10-01 16:57:38

by Fenghua Yu

[permalink] [raw]
Subject: [PATCH 1/2]Add Variable Page Size and IA64 Support in Intel IOMMU: Generic Part


The current Intel IOMMU code assumes that both host page size and Intel IOMMU page size are 4K. The first patch supports variable page size. This provides support for IA64 which has multiple page sizes.

This patch also adds some other code hooks for IA64 platform including DMAR_OPERATION_TIMEOUT definition, .

Signed-off-by: Fenghua Yu <[email protected]>
Signed-off-by: Tony Luck <[email protected]>

---

drivers/pci/dmar.c | 23 ++++--
drivers/pci/intel-iommu.c | 139 ++++++++++++++++++++++++------------------
include/linux/dma_remapping.h | 28 ++++----
include/linux/intel-iommu.h | 49 ++++++++++----
4 files changed, 145 insertions(+), 94 deletions(-)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 0c92e2c..31290c2 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -35,6 +35,10 @@
#undef PREFIX
#define PREFIX "DMAR:"

+#ifdef CONFIG_IA64
+#define cpu_has_x2apic 0
+#endif
+
/* No locks are needed as DMA remapping hardware unit
* list is constructed at boot time and hotplug of
* these units are not supported by the architecture.
@@ -277,14 +280,15 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
drhd = (struct acpi_dmar_hardware_unit *)header;
printk (KERN_INFO PREFIX
"DRHD (flags: 0x%08x)base: 0x%016Lx\n",
- drhd->flags, drhd->address);
+ drhd->flags, (unsigned long long)drhd->address);
break;
case ACPI_DMAR_TYPE_RESERVED_MEMORY:
rmrr = (struct acpi_dmar_reserved_memory *)header;

printk (KERN_INFO PREFIX
"RMRR base: 0x%016Lx end: 0x%016Lx\n",
- rmrr->base_address, rmrr->end_address);
+ (unsigned long long)rmrr->base_address,
+ (unsigned long long)rmrr->end_address);
break;
}
}
@@ -328,7 +332,7 @@ parse_dmar_table(void)
if (!dmar)
return -ENODEV;

- if (dmar->width < PAGE_SHIFT_4K - 1) {
+ if (dmar->width < PAGE_SHIFT - 1) {
printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
return -EINVAL;
}
@@ -510,7 +514,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)

iommu->seq_id = iommu_allocated++;

- iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
+ iommu->reg = ioremap(drhd->reg_base_addr, IOMMU_PAGE_SIZE);
if (!iommu->reg) {
printk(KERN_ERR "IOMMU: can't map the region\n");
goto error;
@@ -521,8 +525,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
cap_max_fault_reg_offset(iommu->cap));
- map_size = PAGE_ALIGN_4K(map_size);
- if (map_size > PAGE_SIZE_4K) {
+ map_size = IOMMU_PAGE_ALIGN(map_size);
+ if (map_size > IOMMU_PAGE_SIZE) {
iounmap(iommu->reg);
iommu->reg = ioremap(drhd->reg_base_addr, map_size);
if (!iommu->reg) {
@@ -533,8 +537,10 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)

ver = readl(iommu->reg + DMAR_VER_REG);
pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
- drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
- iommu->cap, iommu->ecap);
+ (unsigned long long)drhd->reg_base_addr,
+ DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
+ (unsigned long long)iommu->cap,
+ (unsigned long long)iommu->ecap);

spin_lock_init(&iommu->register_lock);

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index fc5f2db..18e1ea2 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -18,6 +18,7 @@
* Author: Ashok Raj <[email protected]>
* Author: Shaohua Li <[email protected]>
* Author: Anil S Keshavamurthy <[email protected]>
+ * Author: Fenghua Yu <[email protected]>
*/

#include <linux/init.h>
@@ -35,11 +36,13 @@
#include <linux/timer.h>
#include <linux/iova.h>
#include <linux/intel-iommu.h>
-#include <asm/proto.h> /* force_iommu in this header in x86-64*/
#include <asm/cacheflush.h>
#include <asm/iommu.h>
#include "pci.h"

+#define ROOT_SIZE IOMMU_PAGE_SIZE
+#define CONTEXT_SIZE IOMMU_PAGE_SIZE
+
#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)

@@ -132,6 +135,17 @@ static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
return vaddr;
}

+#ifdef CONFIG_IA64
+static inline void *ia64_get_zeroed_page(gfp_t gfp_mask)
+{
+ struct page *page;
+
+ page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
+ if (page)
+ return (void *)page_address(page);
+ return 0;
+}
+#endif

static inline void *alloc_pgtable_page(void)
{
@@ -141,7 +155,11 @@ static inline void *alloc_pgtable_page(void)
/* trying to avoid low memory issues */
flags = current->flags & PF_MEMALLOC;
current->flags |= PF_MEMALLOC;
+#ifdef CONFIG_IA64
+ vaddr = ia64_get_zeroed_page(GFP_ATOMIC);
+#else
vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
+#endif
current->flags &= (~PF_MEMALLOC | flags);
return vaddr;
}
@@ -199,7 +217,7 @@ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
spin_unlock_irqrestore(&iommu->lock, flags);
return NULL;
}
- __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
+ __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
phy_addr = virt_to_phys((void *)context);
set_root_value(root, phy_addr);
set_root_present(root);
@@ -345,7 +363,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
return NULL;
}
__iommu_flush_cache(domain->iommu, tmp_page,
- PAGE_SIZE_4K);
+ PAGE_SIZE);
dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
/*
* high level table always sets r/w, last level page
@@ -408,13 +426,13 @@ static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
start &= (((u64)1) << addr_width) - 1;
end &= (((u64)1) << addr_width) - 1;
/* in case it's partial page */
- start = PAGE_ALIGN_4K(start);
- end &= PAGE_MASK_4K;
+ start = PAGE_ALIGN(start);
+ end &= PAGE_MASK;

/* we don't need lock here, nobody else touches the iova range */
while (start < end) {
dma_pte_clear_one(domain, start);
- start += PAGE_SIZE_4K;
+ start += IOMMU_PAGE_SIZE;
}
}

@@ -468,7 +486,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
if (!root)
return -ENOMEM;

- __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
+ __iommu_flush_cache(iommu, root, ROOT_SIZE);

spin_lock_irqsave(&iommu->lock, flags);
iommu->root_entry = root;
@@ -655,7 +673,8 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
- DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
+ (unsigned long long)DMA_TLB_IIRG(type),
+ (unsigned long long) DMA_TLB_IAIG(val));
/* flush context entry will implictly flush write buffer */
return 0;
}
@@ -679,7 +698,7 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
{
unsigned int mask;

- BUG_ON(addr & (~PAGE_MASK_4K));
+ BUG_ON(addr & (~IOMMU_PAGE_MASK));
BUG_ON(pages == 0);

/* Fallback to domain selective flush if no PSI support */
@@ -831,7 +850,7 @@ void dmar_msi_read(int irq, struct msi_msg *msg)
}

static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
- u8 fault_reason, u16 source_id, u64 addr)
+ u8 fault_reason, u16 source_id, unsigned long long addr)
{
const char *reason;

@@ -1084,9 +1103,9 @@ static void dmar_init_reserved_ranges(void)
if (!r->flags || !(r->flags & IORESOURCE_MEM))
continue;
addr = r->start;
- addr &= PAGE_MASK_4K;
+ addr &= PAGE_MASK;
size = r->end - addr;
- size = PAGE_ALIGN_4K(size);
+ size = PAGE_ALIGN(size);
iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
IOVA_PFN(size + addr) - 1);
if (!iova)
@@ -1148,7 +1167,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
domain->pgd = (struct dma_pte *)alloc_pgtable_page();
if (!domain->pgd)
return -ENOMEM;
- __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
+ __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
return 0;
}

@@ -1164,7 +1183,7 @@ static void domain_exit(struct dmar_domain *domain)
/* destroy iovas */
put_iova_domain(&domain->iovad);
end = DOMAIN_MAX_ADDR(domain->gaw);
- end = end & (~PAGE_MASK_4K);
+ end = end & (~PAGE_MASK);

/* clear ptes */
dma_pte_clear_range(domain, 0, end);
@@ -1283,22 +1302,25 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
u64 start_pfn, end_pfn;
struct dma_pte *pte;
int index;
+ int addr_width = agaw_to_width(domain->agaw);
+
+ hpa &= (((u64)1) << addr_width) - 1;

if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
- iova &= PAGE_MASK_4K;
- start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
- end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
+ iova &= PAGE_MASK;
+ start_pfn = ((u64)hpa) >> IOMMU_PAGE_SHIFT;
+ end_pfn = (IOMMU_PAGE_ALIGN(((u64)hpa) + size)) >> IOMMU_PAGE_SHIFT;
index = 0;
while (start_pfn < end_pfn) {
- pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
+ pte = addr_to_dma_pte(domain, iova + IOMMU_PAGE_SIZE * index);
if (!pte)
return -ENOMEM;
/* We don't need lock here, nobody else
* touches the iova range
*/
BUG_ON(dma_pte_addr(*pte));
- dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
+ dma_set_pte_addr(*pte, start_pfn << IOMMU_PAGE_SHIFT);
dma_set_pte_prot(*pte, prot);
__iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
start_pfn++;
@@ -1474,11 +1496,12 @@ error:
return find_domain(pdev);
}

-static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
+static int iommu_prepare_identity_map(struct pci_dev *pdev,
+ unsigned long long start, unsigned long long end)
{
struct dmar_domain *domain;
unsigned long size;
- u64 base;
+ unsigned long long base;
int ret;

printk(KERN_INFO
@@ -1490,9 +1513,9 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
return -ENOMEM;

/* The address might not be aligned */
- base = start & PAGE_MASK_4K;
+ base = start & PAGE_MASK;
size = end - base;
- size = PAGE_ALIGN_4K(size);
+ size = PAGE_ALIGN(size);
if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
IOVA_PFN(base + size) - 1)) {
printk(KERN_ERR "IOMMU: reserve iova failed\n");
@@ -1738,8 +1761,8 @@ error:
static inline u64 aligned_size(u64 host_addr, size_t size)
{
u64 addr;
- addr = (host_addr & (~PAGE_MASK_4K)) + size;
- return PAGE_ALIGN_4K(addr);
+ addr = (host_addr & (~PAGE_MASK)) + size;
+ return PAGE_ALIGN(addr);
}

struct iova *
@@ -1753,7 +1776,7 @@ iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
return NULL;

piova = alloc_iova(&domain->iovad,
- size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
+ size >> PAGE_SHIFT, IOVA_PFN(end), 1);
return piova;
}

@@ -1813,8 +1836,9 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
return domain;
}

-static dma_addr_t
-intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
+dma_addr_t
+intel_map_single(struct device *hwdev, unsigned long paddr, size_t size,
+ int dir)
{
struct pci_dev *pdev = to_pci_dev(hwdev);
struct dmar_domain *domain;
@@ -1825,7 +1849,7 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)

BUG_ON(dir == DMA_NONE);
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
- return paddr;
+ return (dma_addr_t)paddr;

domain = get_valid_domain_for_dev(pdev);
if (!domain)
@@ -1837,7 +1861,7 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
if (!iova)
goto error;

- start_paddr = iova->pfn_lo << PAGE_SHIFT_4K;
+ start_paddr = iova->pfn_lo << PAGE_SHIFT;

/*
* Check if DMAR supports zero-length reads on write only
@@ -1855,27 +1879,23 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
* is not a big problem
*/
ret = domain_page_mapping(domain, start_paddr,
- ((u64)paddr) & PAGE_MASK_4K, size, prot);
+ ((u64)paddr) & PAGE_MASK, size, prot);
if (ret)
goto error;

- pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
- pci_name(pdev), size, (u64)paddr,
- size, (u64)start_paddr, dir);
-
/* it's a non-present to present mapping */
ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
- start_paddr, size >> PAGE_SHIFT_4K, 1);
+ start_paddr, size >> IOMMU_PAGE_SHIFT, 1);
if (ret)
iommu_flush_write_buffer(domain->iommu);

- return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K)));
+ return start_paddr + ((u64)paddr & (~PAGE_MASK));

error:
if (iova)
__free_iova(&domain->iovad, iova);
printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
- pci_name(pdev), size, (u64)paddr, dir);
+ pci_name(pdev), size, (unsigned long long)paddr, dir);
return 0;
}

@@ -1936,7 +1956,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
}

-static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
+void
+intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
size_t size, int dir)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -1953,11 +1974,11 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
if (!iova)
return;

- start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
+ start_addr = iova->pfn_lo << PAGE_SHIFT;
size = aligned_size((u64)dev_addr, size);

pr_debug("Device %s unmapping: %lx@%llx\n",
- pci_name(pdev), size, (u64)start_addr);
+ pci_name(pdev), size, (unsigned long long)start_addr);

/* clear the whole page */
dma_pte_clear_range(domain, start_addr, start_addr + size);
@@ -1965,7 +1986,7 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
if (intel_iommu_strict) {
if (iommu_flush_iotlb_psi(domain->iommu,
- domain->id, start_addr, size >> PAGE_SHIFT_4K, 0))
+ domain->id, start_addr, size >> IOMMU_PAGE_SHIFT, 0))
iommu_flush_write_buffer(domain->iommu);
/* free iova */
__free_iova(&domain->iovad, iova);
@@ -1978,13 +1999,14 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
}
}

-static void * intel_alloc_coherent(struct device *hwdev, size_t size,
+void *
+intel_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags)
{
void *vaddr;
int order;

- size = PAGE_ALIGN_4K(size);
+ size = PAGE_ALIGN(size);
order = get_order(size);
flags &= ~(GFP_DMA | GFP_DMA32);

@@ -2000,12 +2022,13 @@ static void * intel_alloc_coherent(struct device *hwdev, size_t size,
return NULL;
}

-static void intel_free_coherent(struct device *hwdev, size_t size,
+void
+intel_free_coherent(struct device *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle)
{
int order;

- size = PAGE_ALIGN_4K(size);
+ size = PAGE_ALIGN(size);
order = get_order(size);

intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
@@ -2013,7 +2036,8 @@ static void intel_free_coherent(struct device *hwdev, size_t size,
}

#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
-static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
+void
+intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
int nelems, int dir)
{
int i;
@@ -2038,7 +2062,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
size += aligned_size((u64)addr, sg->length);
}

- start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
+ start_addr = iova->pfn_lo << PAGE_SHIFT;

/* clear the whole page */
dma_pte_clear_range(domain, start_addr, start_addr + size);
@@ -2046,7 +2070,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
dma_pte_free_pagetable(domain, start_addr, start_addr + size);

if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
- size >> PAGE_SHIFT_4K, 0))
+ size >> IOMMU_PAGE_SHIFT, 0))
iommu_flush_write_buffer(domain->iommu);

/* free iova */
@@ -2067,7 +2091,8 @@ static int intel_nontranslate_map_sg(struct device *hddev,
return nelems;
}

-static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
+int
+intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
int nelems, int dir)
{
void *addr;
@@ -2112,14 +2137,14 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;

- start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
+ start_addr = iova->pfn_lo << PAGE_SHIFT;
offset = 0;
for_each_sg(sglist, sg, nelems, i) {
addr = SG_ENT_VIRT_ADDRESS(sg);
addr = (void *)virt_to_phys(addr);
size = aligned_size((u64)addr, sg->length);
ret = domain_page_mapping(domain, start_addr + offset,
- ((u64)addr) & PAGE_MASK_4K,
+ ((u64)addr) & PAGE_MASK,
size, prot);
if (ret) {
/* clear the page */
@@ -2133,14 +2158,14 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
return 0;
}
sg->dma_address = start_addr + offset +
- ((u64)addr & (~PAGE_MASK_4K));
+ ((u64)addr & (~PAGE_MASK));
sg->dma_length = sg->length;
offset += size;
}

/* it's a non-present to present mapping */
if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
- start_addr, offset >> PAGE_SHIFT_4K, 1))
+ start_addr, offset >> IOMMU_PAGE_SHIFT, 1))
iommu_flush_write_buffer(domain->iommu);
return nelems;
}
@@ -2180,7 +2205,6 @@ static inline int iommu_devinfo_cache_init(void)
sizeof(struct device_domain_info),
0,
SLAB_HWCACHE_ALIGN,
-
NULL);
if (!iommu_devinfo_cache) {
printk(KERN_ERR "Couldn't create devinfo cache\n");
@@ -2198,7 +2222,6 @@ static inline int iommu_iova_cache_init(void)
sizeof(struct iova),
0,
SLAB_HWCACHE_ALIGN,
-
NULL);
if (!iommu_iova_cache) {
printk(KERN_ERR "Couldn't create iova cache\n");
@@ -2327,7 +2350,7 @@ void intel_iommu_domain_exit(struct dmar_domain *domain)
return;

end = DOMAIN_MAX_ADDR(domain->gaw);
- end = end & (~PAGE_MASK_4K);
+ end = end & (~IOMMU_PAGE_MASK);

/* clear ptes */
dma_pte_clear_range(domain, 0, end);
@@ -2423,6 +2446,6 @@ u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
if (pte)
pfn = dma_pte_addr(*pte);

- return pfn >> PAGE_SHIFT_4K;
+ return pfn >> IOMMU_PAGE_SHIFT;
}
EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index bff5c65..218598d 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -2,15 +2,15 @@
#define _DMA_REMAPPING_H

/*
- * We need a fixed PAGE_SIZE of 4K irrespective of
- * arch PAGE_SIZE for IOMMU page tables.
+ * VT-d hardware uses 4K page size regardless host page size.
*/
-#define PAGE_SHIFT_4K (12)
-#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
-#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
-#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
+#define IOMMU_PAGE_SHIFT (12)
+#define IOMMU_PAGE_SIZE (1UL << IOMMU_PAGE_SHIFT)
+#define IOMMU_PAGE_MASK (((u64)-1) << IOMMU_PAGE_SHIFT)
+#define IOMMU_PAGE_ALIGN(addr) \
+ (((addr) + IOMMU_PAGE_SIZE - 1) & IOMMU_PAGE_MASK)

-#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
+#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)

@@ -25,7 +36,7 @@ struct root_entry {
u64 val;
u64 rsvd1;
};
-#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
+#define ROOT_ENTRY_NR (IOMMU_PAGE_SIZE/sizeof(struct root_entry))
static inline bool root_present(struct root_entry *root)
{
return (root->val & 1);
@@ -36,7 +47,7 @@ static inline void set_root_present(struct root_entry *root)
}
static inline void set_root_value(struct root_entry *root, unsigned long value)
{
- root->val |= value & PAGE_MASK_4K;
+ root->val |= value & IOMMU_PAGE_MASK;
}

struct context_entry;
@@ -45,7 +56,7 @@ get_context_addr_from_root(struct root_entry *root)
{
return (struct context_entry *)
(root_present(root)?phys_to_virt(
- root->val & PAGE_MASK_4K):
+ root->val & IOMMU_PAGE_MASK) :
NULL);
}

@@ -67,7 +78,7 @@ struct context_entry {
#define context_present(c) ((c).lo & 1)
#define context_fault_disable(c) (((c).lo >> 1) & 1)
#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
+#define context_address_root(c) ((c).lo & IOMMU_PAGE_MASK)
#define context_address_width(c) ((c).hi & 7)
#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))

@@ -81,7 +92,7 @@ struct context_entry {
} while (0)
#define CONTEXT_TT_MULTI_LEVEL 0
#define context_set_address_root(c, val) \
- do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
+ do {(c).lo |= (val) & IOMMU_PAGE_MASK; } while (0)
#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
#define context_set_domain_id(c, val) \
do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
@@ -107,9 +118,9 @@ struct dma_pte {
#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
#define dma_set_pte_prot(p, prot) \
do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define dma_pte_addr(p) ((p).val & IOMMU_PAGE_MASK)
#define dma_set_pte_addr(p, addr) do {\
- (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+ (p).val |= ((addr) & IOMMU_PAGE_MASK); } while (0)
#define dma_pte_present(p) (((p).val & 3) != 0)

struct intel_iommu;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e7b196b..d84612a 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -67,6 +67,13 @@
hi = readl(dmar + reg + 4); \
(((u64) hi) << 32) + lo; })
*/
+#ifdef CONFIG_IA64
+#define dmar_readq readq
+static inline void dmar_writeq(void __iomem *addr, u64 val)
+{
+ writeq(val, addr);
+}
+#else
static inline u64 dmar_readq(void __iomem *addr)
{
u32 lo, hi;
@@ -80,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
writel((u32)val, addr);
writel((u32)(val >> 32), addr + 4);
}
+#endif

#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4)
#define DMAR_VER_MINOR(v) ((v) & 0x0f)
@@ -200,22 +208,28 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define dma_frcd_type(d) ((d >> 30) & 1)
#define dma_frcd_fault_reason(c) (c & 0xff)
#define dma_frcd_source_id(c) (c & 0xffff)
-#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
-
-#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
+/* low 64 bit */
+#define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT))
+
+/* 10 seconds */
+#ifdef CONFIG_IA64
+#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
+#else
+#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
+#endif

-#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
-{\
- cycles_t start_time = get_cycles();\
- while (1) {\
- sts = op (iommu->reg + offset);\
- if (cond)\
- break;\
+#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
+do { \
+ cycles_t start_time = get_cycles(); \
+ while (1) { \
+ sts = op(iommu->reg + offset); \
+ if (cond) \
+ break; \
if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
- panic("DMAR hardware is malfunctioning\n");\
- cpu_relax();\
- }\
-}
+ panic("DMAR hardware is malfunctioning\n"); \
+ cpu_relax(); \
+ } \
+} while (0)

#define QI_LENGTH 256 /* queue length */

@@ -316,4 +330,11 @@ struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
int intel_iommu_found(void);
u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);

+extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t);
+extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t);
+extern dma_addr_t intel_map_single(struct device *, unsigned long, size_t, int);
+extern void intel_unmap_single(struct device *, dma_addr_t, size_t, int);
+extern int intel_map_sg(struct device *, struct scatterlist *, int, int);
+extern void intel_unmap_sg(struct device *, struct scatterlist *, int, int);
+
#endif


2008-10-02 08:30:18

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/2]Add Variable Page Size and IA64 Support in Intel IOMMU: Generic Part


* Fenghua Yu <[email protected]> wrote:

> --- a/drivers/pci/dmar.c
> +++ b/drivers/pci/dmar.c
> @@ -35,6 +35,10 @@
> #undef PREFIX
> #define PREFIX "DMAR:"
>
> +#ifdef CONFIG_IA64
> +#define cpu_has_x2apic 0
> +#endif

hm, that's not too nice - why not add it to arch/ia64/include/?

Ingo

2008-10-02 15:32:13

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH 1/2]Add Variable Page Size and IA64 Support in Intel IOMMU: Generic Part

On Wednesday 01 October 2008 10:57:25 am Fenghua Yu wrote:
> The current Intel IOMMU code assumes that both host page size and Intel IOMMU page size are 4K. The first patch supports variable page size. This provides support for IA64 which has multiple page sizes.
>
> This patch also adds some other code hooks for IA64 platform including DMAR_OPERATION_TIMEOUT definition, .

Can you split this patch up? It contains several logically separate
changes:
- casting things to unsigned long long
- adding stuff under #ifdef CONFIG_IA64
- page size changes
- whitespace changes

> @@ -510,7 +514,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
>
> iommu->seq_id = iommu_allocated++;
>
> - iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
> + iommu->reg = ioremap(drhd->reg_base_addr, IOMMU_PAGE_SIZE);
> if (!iommu->reg) {
> printk(KERN_ERR "IOMMU: can't map the region\n");

This printk should include a clue, like the IOMMU ID and/or address
we tried to map.

> +#ifdef CONFIG_IA64
> +static inline void *ia64_get_zeroed_page(gfp_t gfp_mask)
> +{
> + struct page *page;
> +
> + page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
> + if (page)
> + return (void *)page_address(page);
> + return 0;
> +}
> +#endif
>
> static inline void *alloc_pgtable_page(void)
> {
> @@ -141,7 +155,11 @@ static inline void *alloc_pgtable_page(void)
> /* trying to avoid low memory issues */
> flags = current->flags & PF_MEMALLOC;
> current->flags |= PF_MEMALLOC;
> +#ifdef CONFIG_IA64
> + vaddr = ia64_get_zeroed_page(GFP_ATOMIC);
> +#else
> vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
> +#endif

Why does ia64 need a special case here?

> @@ -655,7 +673,8 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
> printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
> if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
> pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
> - DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
> + (unsigned long long)DMA_TLB_IIRG(type),
> + (unsigned long long) DMA_TLB_IAIG(val));

These printks should include an IOMMU ID also (I assume a system can
contain multiple IOMMUs).

> @@ -1490,9 +1513,9 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
> return -ENOMEM;
>
> /* The address might not be aligned */
> - base = start & PAGE_MASK_4K;
> + base = start & PAGE_MASK;
> size = end - base;
> - size = PAGE_ALIGN_4K(size);
> + size = PAGE_ALIGN(size);
> if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
> IOVA_PFN(base + size) - 1)) {
> printk(KERN_ERR "IOMMU: reserve iova failed\n");

This should probably be a "dev_err(&pdev->dev," and include the
IOMMU ID.

> @@ -1855,27 +1879,23 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
> * is not a big problem
> */
> ret = domain_page_mapping(domain, start_paddr,
> - ((u64)paddr) & PAGE_MASK_4K, size, prot);
> + ((u64)paddr) & PAGE_MASK, size, prot);
> if (ret)
> goto error;
>
> - pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
> - pci_name(pdev), size, (u64)paddr,
> - size, (u64)start_paddr, dir);
> -
> /* it's a non-present to present mapping */
> ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
> - start_paddr, size >> PAGE_SHIFT_4K, 1);
> + start_paddr, size >> IOMMU_PAGE_SHIFT, 1);
> if (ret)
> iommu_flush_write_buffer(domain->iommu);
>
> - return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K)));
> + return start_paddr + ((u64)paddr & (~PAGE_MASK));
>
> error:
> if (iova)
> __free_iova(&domain->iovad, iova);
> printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
> - pci_name(pdev), size, (u64)paddr, dir);
> + pci_name(pdev), size, (unsigned long long)paddr, dir);

Use dev_err() here and include IOMMU ID.

> @@ -1953,11 +1974,11 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
> if (!iova)
> return;
>
> - start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
> + start_addr = iova->pfn_lo << PAGE_SHIFT;
> size = aligned_size((u64)dev_addr, size);
>
> pr_debug("Device %s unmapping: %lx@%llx\n",
> - pci_name(pdev), size, (u64)start_addr);
> + pci_name(pdev), size, (unsigned long long)start_addr);

Use dev_dbg() here.

> diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
> index bff5c65..218598d 100644
> --- a/include/linux/dma_remapping.h
> +++ b/include/linux/dma_remapping.h
> @@ -2,15 +2,15 @@
> #define _DMA_REMAPPING_H
>
> /*
> - * We need a fixed PAGE_SIZE of 4K irrespective of
> - * arch PAGE_SIZE for IOMMU page tables.
> + * VT-d hardware uses 4K page size regardless host page size.
> */
> -#define PAGE_SHIFT_4K (12)
> -#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
> -#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
> -#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
> +#define IOMMU_PAGE_SHIFT (12)
> +#define IOMMU_PAGE_SIZE (1UL << IOMMU_PAGE_SHIFT)
> +#define IOMMU_PAGE_MASK (((u64)-1) << IOMMU_PAGE_SHIFT)
> +#define IOMMU_PAGE_ALIGN(addr) \
> + (((addr) + IOMMU_PAGE_SIZE - 1) & IOMMU_PAGE_MASK)
>
> -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
> +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)

These are pretty generic names (IOMMU_PAGE_SHIFT, IOVA_PFN, etc),
but the definitions seem to be specific to VT-d. I can't tell if
this file is supposed to be sort of generic, or if it's Intel-specific.

> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index e7b196b..d84612a 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -67,6 +67,13 @@
> hi = readl(dmar + reg + 4); \
> (((u64) hi) << 32) + lo; })
> */
> +#ifdef CONFIG_IA64
> +#define dmar_readq readq
> +static inline void dmar_writeq(void __iomem *addr, u64 val)
> +{
> + writeq(val, addr);
> +}
> +#else
> static inline u64 dmar_readq(void __iomem *addr)
> {
> u32 lo, hi;
> @@ -80,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
> writel((u32)val, addr);
> writel((u32)(val >> 32), addr + 4);
> }
> +#endif

What's this all about? Why do we need #ifdef CONFIG_IA64 here?
Doesn't x86 provide its own readq/writeq implementation?

Bjorn

2008-10-02 21:46:21

by Fenghua Yu

[permalink] [raw]
Subject: RE: [PATCH 1/2]Add Variable Page Size and IA64 Support in Intel IOMMU: Generic Part

>> The current Intel IOMMU code assumes that both host page size and Intel IOMMU page size are 4K. The first patch supports variable page size. This provides support for IA64 which has multiple page sizes.
>>
>> This patch also adds some other code hooks for IA64 platform including DMAR_OPERATION_TIMEOUT definition, .

>Can you split this patch up? It contains several logically separate
changes:
> - casting things to unsigned long long
> - adding stuff under #ifdef CONFIG_IA64
> - page size changes
> - whitespace changes

Depends on who is picking up the generic patch. If it's needed, I can split it into multiple patches.

>> @@ -510,7 +514,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
>>
>> iommu->seq_id = iommu_allocated++;
>>
>> - iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
>> + iommu->reg = ioremap(drhd->reg_base_addr, IOMMU_PAGE_SIZE);
>> if (!iommu->reg) {
>> printk(KERN_ERR "IOMMU: can't map the region\n");
>
>This printk should include a clue, like the IOMMU ID and/or address
>we tried to map.

This is a good comment. This patch set is mainly for porting IOMMU to IA64. I will add IOMMU ID in a follow-up clean-up patch.

>> +#ifdef CONFIG_IA64
>> +static inline void *ia64_get_zeroed_page(gfp_t gfp_mask)
>> +{
>> + struct page *page;
>> +
>> + page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
>> + if (page)
>> + return (void *)page_address(page);
>> + return 0;
>> +}
>> +#endif
>>
>> static inline void *alloc_pgtable_page(void)
>> {
>> @@ -141,7 +155,11 @@ static inline void *alloc_pgtable_page(void)
>> /* trying to avoid low memory issues */
>> flags = current->flags & PF_MEMALLOC;
>> current->flags |= PF_MEMALLOC;
>> +#ifdef CONFIG_IA64
>> + vaddr = ia64_get_zeroed_page(GFP_ATOMIC);
>> +#else
>> vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
>> +#endif

>Why does ia64 need a special case here?

I think old kernel I originally work on doesn't have get_zeroed_page for ia64. Yes, I will remove this change in the updated patch.

>> @@ -655,7 +673,8 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
>> printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
>> if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
>> pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
>> - DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
>> + (unsigned long long)DMA_TLB_IIRG(type),
>> + (unsigned long long) DMA_TLB_IAIG(val));
>
>These printks should include an IOMMU ID also (I assume a system can
>contain multiple IOMMUs).

This patch set is mainly for porting IOMMU to IA64. I will add IOMMU ID in a follow-up clean-up patch.

>> @@ -1490,9 +1513,9 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
>> return -ENOMEM;
>>
>> /* The address might not be aligned */
>> - base = start & PAGE_MASK_4K;
>> + base = start & PAGE_MASK;
>> size = end - base;
>> - size = PAGE_ALIGN_4K(size);
>> + size = PAGE_ALIGN(size);
>> if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
>> IOVA_PFN(base + size) - 1)) {
>> printk(KERN_ERR "IOMMU: reserve iova failed\n");
>
>This should probably be a "dev_err(&pdev->dev," and include the
>IOMMU ID.

This patch set is mainly for porting IOMMU to IA64. I will add IOMMU ID and dev_err in a follow-up clean-up patch.


>> @@ -1855,27 +1879,23 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
>> * is not a big problem
>> */
>> ret = domain_page_mapping(domain, start_paddr,
>> - ((u64)paddr) & PAGE_MASK_4K, size, prot);
>> + ((u64)paddr) & PAGE_MASK, size, prot);
>> if (ret)
>> goto error;
>>
>> - pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
>> - pci_name(pdev), size, (u64)paddr,
>> - size, (u64)start_paddr, dir);
>> -
>> /* it's a non-present to present mapping */
>> ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
>> - start_paddr, size >> PAGE_SHIFT_4K, 1);
>> + start_paddr, size >> IOMMU_PAGE_SHIFT, 1);
>> if (ret)
>> iommu_flush_write_buffer(domain->iommu);
>>
>> - return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K)));
>> + return start_paddr + ((u64)paddr & (~PAGE_MASK));
>>
>> error:
>> if (iova)
>> __free_iova(&domain->iovad, iova);
>> printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
>> - pci_name(pdev), size, (u64)paddr, dir);
>> + pci_name(pdev), size, (unsigned long long)paddr, dir);
>
>Use dev_err() here and include IOMMU ID.

This patch set is mainly for porting IOMMU to IA64. I will add IOMMU ID and use dev_err() in a follow-up clean-up patch.

>> @@ -1953,11 +1974,11 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
>> if (!iova)
>> return;
>>
>> - start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
>> + start_addr = iova->pfn_lo << PAGE_SHIFT;
>> size = aligned_size((u64)dev_addr, size);
>>
>> pr_debug("Device %s unmapping: %lx@%llx\n",
>> - pci_name(pdev), size, (u64)start_addr);
>> + pci_name(pdev), size, (unsigned long long)start_addr);
>
>Use dev_dbg() here.

This patch set is mainly for porting IOMMU to IA64. I will change to dev_dbg() in a follow-up patch.

>> diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
>> index bff5c65..218598d 100644
>> --- a/include/linux/dma_remapping.h
>> +++ b/include/linux/dma_remapping.h
>> @@ -2,15 +2,15 @@
>> #define _DMA_REMAPPING_H
>>
>> /*
>> - * We need a fixed PAGE_SIZE of 4K irrespective of
>> - * arch PAGE_SIZE for IOMMU page tables.
>> + * VT-d hardware uses 4K page size regardless host page size.
>> */
>> -#define PAGE_SHIFT_4K (12)
>> -#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
>> -#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
>> -#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
>> +#define IOMMU_PAGE_SHIFT (12)
>> +#define IOMMU_PAGE_SIZE (1UL << IOMMU_PAGE_SHIFT)
>> +#define IOMMU_PAGE_MASK (((u64)-1) << IOMMU_PAGE_SHIFT)
>> +#define IOMMU_PAGE_ALIGN(addr) \
>> + (((addr) + IOMMU_PAGE_SIZE - 1) & IOMMU_PAGE_MASK)
>>
>> -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
>> +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
>
>These are pretty generic names (IOMMU_PAGE_SHIFT, IOVA_PFN, etc),
>but the definitions seem to be specific to VT-d. I can't tell if
>this file is supposed to be sort of generic, or if it's Intel-specific.

I can change IOMMU_PAGE_SHIFT etc to VTD_PAGE_SHIFT etc and change IOVA_PFN to VTD_IOVA_PFN. What do you think?

>> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
>> index e7b196b..d84612a 100644
>> --- a/include/linux/intel-iommu.h
>> +++ b/include/linux/intel-iommu.h
>> @@ -67,6 +67,13 @@
>> hi = readl(dmar + reg + 4); \
>> (((u64) hi) << 32) + lo; })
>> */
>> +#ifdef CONFIG_IA64
>> +#define dmar_readq readq
>> +static inline void dmar_writeq(void __iomem *addr, u64 val)
>> +{
>> + writeq(val, addr);
>> +}
>> +#else
>> static inline u64 dmar_readq(void __iomem *addr)
>> {
>> u32 lo, hi;
>> @@ -80,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
>> writel((u32)val, addr);
>> writel((u32)(val >> 32), addr + 4);
>> }
>> +#endif
>
>What's this all about? Why do we need #ifdef CONFIG_IA64 here?
>Doesn't x86 provide its own readq/writeq implementation?

Bjorn

2008-10-02 22:06:30

by Fenghua Yu

[permalink] [raw]
Subject: RE: [PATCH 1/2]Add Variable Page Size and IA64 Support in Intel IOMMU: Generic Part


>> --- a/drivers/pci/dmar.c
>> +++ b/drivers/pci/dmar.c
>> @@ -35,6 +35,10 @@
>> #undef PREFIX
>> #define PREFIX "DMAR:"
>>
>> +#ifdef CONFIG_IA64
>> +#define cpu_has_x2apic 0
>> +#endif
>
>hm, that's not too nice - why not add it to arch/ia64/include/?

OK. I'll move this to arch/ia64/include (along with other #ifdef CONFIG_IA64 places if needed).


>> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
>> index e7b196b..d84612a 100644
>> --- a/include/linux/intel-iommu.h
>> +++ b/include/linux/intel-iommu.h
>> @@ -67,6 +67,13 @@
>> hi = readl(dmar + reg + 4); \
>> (((u64) hi) << 32) + lo; })
>> */
>> +#ifdef CONFIG_IA64
>> +#define dmar_readq readq
>> +static inline void dmar_writeq(void __iomem *addr, u64 val)
>> +{
>> + writeq(val, addr);
>> +}
>> +#else
>> static inline u64 dmar_readq(void __iomem *addr)
>> {
>> u32 lo, hi;
>> @@ -80,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
>> writel((u32)val, addr);
>> writel((u32)(val >> 32), addr + 4);
>> }
>> +#endif
>
>What's this all about? Why do we need #ifdef CONFIG_IA64 here?
>Doesn't x86 provide its own readq/writeq implementation?

This is a comment from Bjorn.

In my patch, one readq/one writeq are working faster than two readl/two writel on IA64. X86 uses two readl/two writel so that the code works on both x86 and x86-64 although Intel IOMMU only has x86-64 version currently. dmar_readq() and dmar_writeq() are in moderate performance critical path.

Do you think my current implementation is ok to have #ifdef CONFIG_IA64 here? Or I can change X86 to use readq/writeq as well or IA64 uses two readl/two writel for clean code?

Thanks.

-Fenghua

2008-10-03 08:55:50

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/2]Add Variable Page Size and IA64 Support in Intel IOMMU: Generic Part


* Yu, Fenghua <[email protected]> wrote:

> >What's this all about? Why do we need #ifdef CONFIG_IA64 here?
> >Doesn't x86 provide its own readq/writeq implementation?
>
> This is a comment from Bjorn.
>
> In my patch, one readq/one writeq are working faster than two
> readl/two writel on IA64. X86 uses two readl/two writel so that the
> code works on both x86 and x86-64 although Intel IOMMU only has x86-64
> version currently. dmar_readq() and dmar_writeq() are in moderate
> performance critical path.
>
> Do you think my current implementation is ok to have #ifdef
> CONFIG_IA64 here? Or I can change X86 to use readq/writeq as well or
> IA64 uses two readl/two writel for clean code?

yes, clean code is very much preferred for a small detail like this.

Ingo

2008-10-03 15:33:27

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH 1/2]Add Variable Page Size and IA64 Support in Intel IOMMU: Generic Part

On Thursday 02 October 2008 03:46:06 pm Yu, Fenghua wrote:
> >> The current Intel IOMMU code assumes that both host page size and Intel IOMMU page size are 4K. The first patch supports variable page size. This provides support for IA64 which has multiple page sizes.
> >>
> >> This patch also adds some other code hooks for IA64 platform including DMAR_OPERATION_TIMEOUT definition, .
>
> >Can you split this patch up? It contains several logically separate
> changes:
> > - casting things to unsigned long long
> > - adding stuff under #ifdef CONFIG_IA64
> > - page size changes
> > - whitespace changes
>
> Depends on who is picking up the generic patch. If it's needed, I can split it into multiple patches.

Regardless of who's picking up the patch, splitting it makes it
easier to review and easier to spot bugs, and makes bisection yield
better information.

> >> @@ -510,7 +514,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
> >>
> >> iommu->seq_id = iommu_allocated++;
> >>
> >> - iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
> >> + iommu->reg = ioremap(drhd->reg_base_addr, IOMMU_PAGE_SIZE);
> >> if (!iommu->reg) {
> >> printk(KERN_ERR "IOMMU: can't map the region\n");
> >
> >This printk should include a clue, like the IOMMU ID and/or address
> >we tried to map.
>
> This is a good comment. This patch set is mainly for porting IOMMU to IA64. I will add IOMMU ID in a follow-up clean-up patch.

Since you're not actually adding the printk in this patch, it sounds
fair to clean it up in a follow-up patch.

> >> -#define PAGE_SHIFT_4K (12)
> >> -#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
> >> -#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
> >> -#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
> >> +#define IOMMU_PAGE_SHIFT (12)
> >> +#define IOMMU_PAGE_SIZE (1UL << IOMMU_PAGE_SHIFT)
> >> +#define IOMMU_PAGE_MASK (((u64)-1) << IOMMU_PAGE_SHIFT)
> >> +#define IOMMU_PAGE_ALIGN(addr) \
> >> + (((addr) + IOMMU_PAGE_SIZE - 1) & IOMMU_PAGE_MASK)
> >>
> >> -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
> >> +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
> >
> >These are pretty generic names (IOMMU_PAGE_SHIFT, IOVA_PFN, etc),
> >but the definitions seem to be specific to VT-d. I can't tell if
> >this file is supposed to be sort of generic, or if it's Intel-specific.
>
> I can change IOMMU_PAGE_SHIFT etc to VTD_PAGE_SHIFT etc and change IOVA_PFN to VTD_IOVA_PFN. What do you think?

Those sound good to me.

Bjorn

2008-10-04 00:21:49

by Fenghua Yu

[permalink] [raw]
Subject: RE: [PATCH 1/2]Add Variable Page Size and IA64 Support in Intel IOMMU: Generic Part

>> >> The current Intel IOMMU code assumes that both host page size and Intel IOMMU page size are 4K. The first patch supports variable page size. This provides support for IA64 which has multiple page sizes.
>> >>
> >> This patch also adds some other code hooks for IA64 platform including DMAR_OPERATION_TIMEOUT definition, .
>>
>> >Can you split this patch up? It contains several logically separate
>> changes:
>> > - casting things to unsigned long long
>> > - adding stuff under #ifdef CONFIG_IA64
>> > - page size changes
>> > - whitespace changes
>>
>> Depends on who is picking up the generic patch. If it's needed, I can >split it into multiple patches.

>Regardless of who's picking up the patch, splitting it makes it
>easier to review and easier to spot bugs, and makes bisection yield
>better information.

Since more than 95% of code in the patch is directly related to page size changes, splitting the patch will generate one big patch (95% of current patch size) and a few small patches. I would like to still send one single updated patch based on collected comments.

Thanks.

-Fenghua

2008-10-07 00:03:23

by Fenghua Yu

[permalink] [raw]
Subject: [PATCH V2 1/2] Add Variable Page Size and IA64 Support in Intel IOMMU: Generic Part

The current Intel IOMMU code assumes that both host page size and Intel IOMMU page size are 4K. This patch supports variable page size. It provides support for IA64 which has multiple page sizes.

This patch also re-organizes code to work on both X86-64 and IA64 platforms, e.g. DMAR_OPERATION_TIMEOUT definition etc.

Signed-off-by: Fenghua Yu <[email protected]>
Signed-off-by: Tony Luck <[email protected]>

---

arch/x86/kernel/pci-dma.c | 16 -----
drivers/pci/dmar.c | 19 +++---
drivers/pci/intel-iommu.c | 124 ++++++++++++++++++++++--------------------
drivers/pci/quirks.c | 14 ++++
include/asm-x86/iommu.h | 4 +
include/linux/dma_remapping.h | 27 ++++-----
include/linux/intel-iommu.h | 39 +++++++------
7 files changed, 131 insertions(+), 112 deletions(-)


diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 23882c4..6751f4c 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -9,8 +9,6 @@
#include <asm/calgary.h>
#include <asm/amd_iommu.h>

-static int forbid_dac __read_mostly;
-
struct dma_mapping_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);

@@ -262,17 +260,3 @@ void pci_iommu_shutdown(void)
}
/* Must execute after PCI subsystem */
fs_initcall(pci_iommu_init);
-
-#ifdef CONFIG_PCI
-/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
-
-static __devinit void via_no_dac(struct pci_dev *dev)
-{
- if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
- printk(KERN_INFO "PCI: VIA PCI bridge detected."
- "Disabling DAC.\n");
- forbid_dac = 1;
- }
-}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
-#endif
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 0c92e2c..59c974a 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -277,14 +277,15 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
drhd = (struct acpi_dmar_hardware_unit *)header;
printk (KERN_INFO PREFIX
"DRHD (flags: 0x%08x)base: 0x%016Lx\n",
- drhd->flags, drhd->address);
+ drhd->flags, (unsigned long long)drhd->address);
break;
case ACPI_DMAR_TYPE_RESERVED_MEMORY:
rmrr = (struct acpi_dmar_reserved_memory *)header;

printk (KERN_INFO PREFIX
"RMRR base: 0x%016Lx end: 0x%016Lx\n",
- rmrr->base_address, rmrr->end_address);
+ (unsigned long long)rmrr->base_address,
+ (unsigned long long)rmrr->end_address);
break;
}
}
@@ -328,7 +329,7 @@ parse_dmar_table(void)
if (!dmar)
return -ENODEV;

- if (dmar->width < PAGE_SHIFT_4K - 1) {
+ if (dmar->width < PAGE_SHIFT - 1) {
printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
return -EINVAL;
}
@@ -510,7 +511,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)

iommu->seq_id = iommu_allocated++;

- iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
+ iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
if (!iommu->reg) {
printk(KERN_ERR "IOMMU: can't map the region\n");
goto error;
@@ -521,8 +522,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
cap_max_fault_reg_offset(iommu->cap));
- map_size = PAGE_ALIGN_4K(map_size);
- if (map_size > PAGE_SIZE_4K) {
+ map_size = VTD_PAGE_ALIGN(map_size);
+ if (map_size > VTD_PAGE_SIZE) {
iounmap(iommu->reg);
iommu->reg = ioremap(drhd->reg_base_addr, map_size);
if (!iommu->reg) {
@@ -533,8 +534,10 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)

ver = readl(iommu->reg + DMAR_VER_REG);
pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
- drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
- iommu->cap, iommu->ecap);
+ (unsigned long long)drhd->reg_base_addr,
+ DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
+ (unsigned long long)iommu->cap,
+ (unsigned long long)iommu->ecap);

spin_lock_init(&iommu->register_lock);

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index fc5f2db..973c7ae 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -18,6 +18,7 @@
* Author: Ashok Raj <[email protected]>
* Author: Shaohua Li <[email protected]>
* Author: Anil S Keshavamurthy <[email protected]>
+ * Author: Fenghua Yu <[email protected]>
*/

#include <linux/init.h>
@@ -35,11 +36,13 @@
#include <linux/timer.h>
#include <linux/iova.h>
#include <linux/intel-iommu.h>
-#include <asm/proto.h> /* force_iommu in this header in x86-64*/
#include <asm/cacheflush.h>
#include <asm/iommu.h>
#include "pci.h"

+#define ROOT_SIZE VTD_PAGE_SIZE
+#define CONTEXT_SIZE VTD_PAGE_SIZE
+
#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)

@@ -199,7 +202,7 @@ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
spin_unlock_irqrestore(&iommu->lock, flags);
return NULL;
}
- __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
+ __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
phy_addr = virt_to_phys((void *)context);
set_root_value(root, phy_addr);
set_root_present(root);
@@ -345,7 +348,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
return NULL;
}
__iommu_flush_cache(domain->iommu, tmp_page,
- PAGE_SIZE_4K);
+ PAGE_SIZE);
dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
/*
* high level table always sets r/w, last level page
@@ -408,13 +411,13 @@ static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
start &= (((u64)1) << addr_width) - 1;
end &= (((u64)1) << addr_width) - 1;
/* in case it's partial page */
- start = PAGE_ALIGN_4K(start);
- end &= PAGE_MASK_4K;
+ start = PAGE_ALIGN(start);
+ end &= PAGE_MASK;

/* we don't need lock here, nobody else touches the iova range */
while (start < end) {
dma_pte_clear_one(domain, start);
- start += PAGE_SIZE_4K;
+ start += VTD_PAGE_SIZE;
}
}

@@ -468,7 +471,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
if (!root)
return -ENOMEM;

- __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
+ __iommu_flush_cache(iommu, root, ROOT_SIZE);

spin_lock_irqsave(&iommu->lock, flags);
iommu->root_entry = root;
@@ -655,7 +658,8 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
- DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
+ (unsigned long long)DMA_TLB_IIRG(type),
+ (unsigned long long) DMA_TLB_IAIG(val));
/* flush context entry will implictly flush write buffer */
return 0;
}
@@ -679,7 +683,7 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
{
unsigned int mask;

- BUG_ON(addr & (~PAGE_MASK_4K));
+ BUG_ON(addr & (~VTD_PAGE_MASK));
BUG_ON(pages == 0);

/* Fallback to domain selective flush if no PSI support */
@@ -831,7 +835,7 @@ void dmar_msi_read(int irq, struct msi_msg *msg)
}

static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
- u8 fault_reason, u16 source_id, u64 addr)
+ u8 fault_reason, u16 source_id, unsigned long long addr)
{
const char *reason;

@@ -1084,9 +1088,9 @@ static void dmar_init_reserved_ranges(void)
if (!r->flags || !(r->flags & IORESOURCE_MEM))
continue;
addr = r->start;
- addr &= PAGE_MASK_4K;
+ addr &= PAGE_MASK;
size = r->end - addr;
- size = PAGE_ALIGN_4K(size);
+ size = PAGE_ALIGN(size);
iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
IOVA_PFN(size + addr) - 1);
if (!iova)
@@ -1148,7 +1152,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
domain->pgd = (struct dma_pte *)alloc_pgtable_page();
if (!domain->pgd)
return -ENOMEM;
- __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
+ __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
return 0;
}

@@ -1164,7 +1168,7 @@ static void domain_exit(struct dmar_domain *domain)
/* destroy iovas */
put_iova_domain(&domain->iovad);
end = DOMAIN_MAX_ADDR(domain->gaw);
- end = end & (~PAGE_MASK_4K);
+ end = end & (~PAGE_MASK);

/* clear ptes */
dma_pte_clear_range(domain, 0, end);
@@ -1283,22 +1287,25 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
u64 start_pfn, end_pfn;
struct dma_pte *pte;
int index;
+ int addr_width = agaw_to_width(domain->agaw);
+
+ hpa &= (((u64)1) << addr_width) - 1;

if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
- iova &= PAGE_MASK_4K;
- start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
- end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
+ iova &= PAGE_MASK;
+ start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
+ end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
index = 0;
while (start_pfn < end_pfn) {
- pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
+ pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
if (!pte)
return -ENOMEM;
/* We don't need lock here, nobody else
* touches the iova range
*/
BUG_ON(dma_pte_addr(*pte));
- dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
+ dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
dma_set_pte_prot(*pte, prot);
__iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
start_pfn++;
@@ -1474,11 +1481,12 @@ error:
return find_domain(pdev);
}

-static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
+static int iommu_prepare_identity_map(struct pci_dev *pdev,
+ unsigned long long start, unsigned long long end)
{
struct dmar_domain *domain;
unsigned long size;
- u64 base;
+ unsigned long long base;
int ret;

printk(KERN_INFO
@@ -1490,9 +1498,9 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
return -ENOMEM;

/* The address might not be aligned */
- base = start & PAGE_MASK_4K;
+ base = start & PAGE_MASK;
size = end - base;
- size = PAGE_ALIGN_4K(size);
+ size = PAGE_ALIGN(size);
if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
IOVA_PFN(base + size) - 1)) {
printk(KERN_ERR "IOMMU: reserve iova failed\n");
@@ -1738,8 +1746,8 @@ error:
static inline u64 aligned_size(u64 host_addr, size_t size)
{
u64 addr;
- addr = (host_addr & (~PAGE_MASK_4K)) + size;
- return PAGE_ALIGN_4K(addr);
+ addr = (host_addr & (~PAGE_MASK)) + size;
+ return PAGE_ALIGN(addr);
}

struct iova *
@@ -1753,7 +1761,7 @@ iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
return NULL;

piova = alloc_iova(&domain->iovad,
- size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
+ size >> PAGE_SHIFT, IOVA_PFN(end), 1);
return piova;
}

@@ -1813,8 +1821,9 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
return domain;
}

-static dma_addr_t
-intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
+dma_addr_t
+intel_map_single(struct device *hwdev, unsigned long paddr, size_t size,
+ int dir)
{
struct pci_dev *pdev = to_pci_dev(hwdev);
struct dmar_domain *domain;
@@ -1825,7 +1834,7 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)

BUG_ON(dir == DMA_NONE);
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
- return paddr;
+ return (dma_addr_t)paddr;

domain = get_valid_domain_for_dev(pdev);
if (!domain)
@@ -1837,7 +1846,7 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
if (!iova)
goto error;

- start_paddr = iova->pfn_lo << PAGE_SHIFT_4K;
+ start_paddr = iova->pfn_lo << PAGE_SHIFT;

/*
* Check if DMAR supports zero-length reads on write only
@@ -1855,27 +1864,23 @@ intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
* is not a big problem
*/
ret = domain_page_mapping(domain, start_paddr,
- ((u64)paddr) & PAGE_MASK_4K, size, prot);
+ ((u64)paddr) & PAGE_MASK, size, prot);
if (ret)
goto error;

- pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
- pci_name(pdev), size, (u64)paddr,
- size, (u64)start_paddr, dir);
-
/* it's a non-present to present mapping */
ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
- start_paddr, size >> PAGE_SHIFT_4K, 1);
+ start_paddr, size >> VTD_PAGE_SHIFT, 1);
if (ret)
iommu_flush_write_buffer(domain->iommu);

- return (start_paddr + ((u64)paddr & (~PAGE_MASK_4K)));
+ return start_paddr + ((u64)paddr & (~PAGE_MASK));

error:
if (iova)
__free_iova(&domain->iovad, iova);
printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
- pci_name(pdev), size, (u64)paddr, dir);
+ pci_name(pdev), size, (unsigned long long)paddr, dir);
return 0;
}

@@ -1936,7 +1941,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
}

-static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
+void
+intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
size_t size, int dir)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -1953,11 +1959,11 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
if (!iova)
return;

- start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
+ start_addr = iova->pfn_lo << PAGE_SHIFT;
size = aligned_size((u64)dev_addr, size);

pr_debug("Device %s unmapping: %lx@%llx\n",
- pci_name(pdev), size, (u64)start_addr);
+ pci_name(pdev), size, (unsigned long long)start_addr);

/* clear the whole page */
dma_pte_clear_range(domain, start_addr, start_addr + size);
@@ -1965,7 +1971,7 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
if (intel_iommu_strict) {
if (iommu_flush_iotlb_psi(domain->iommu,
- domain->id, start_addr, size >> PAGE_SHIFT_4K, 0))
+ domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
iommu_flush_write_buffer(domain->iommu);
/* free iova */
__free_iova(&domain->iovad, iova);
@@ -1978,13 +1984,14 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
}
}

-static void * intel_alloc_coherent(struct device *hwdev, size_t size,
+void *
+intel_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags)
{
void *vaddr;
int order;

- size = PAGE_ALIGN_4K(size);
+ size = PAGE_ALIGN(size);
order = get_order(size);
flags &= ~(GFP_DMA | GFP_DMA32);

@@ -2000,12 +2007,13 @@ static void * intel_alloc_coherent(struct device *hwdev, size_t size,
return NULL;
}

-static void intel_free_coherent(struct device *hwdev, size_t size,
+void
+intel_free_coherent(struct device *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle)
{
int order;

- size = PAGE_ALIGN_4K(size);
+ size = PAGE_ALIGN(size);
order = get_order(size);

intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
@@ -2013,7 +2021,8 @@ static void intel_free_coherent(struct device *hwdev, size_t size,
}

#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
-static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
+void
+intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
int nelems, int dir)
{
int i;
@@ -2038,7 +2047,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
size += aligned_size((u64)addr, sg->length);
}

- start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
+ start_addr = iova->pfn_lo << PAGE_SHIFT;

/* clear the whole page */
dma_pte_clear_range(domain, start_addr, start_addr + size);
@@ -2046,7 +2055,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
dma_pte_free_pagetable(domain, start_addr, start_addr + size);

if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
- size >> PAGE_SHIFT_4K, 0))
+ size >> VTD_PAGE_SHIFT, 0))
iommu_flush_write_buffer(domain->iommu);

/* free iova */
@@ -2067,7 +2076,8 @@ static int intel_nontranslate_map_sg(struct device *hddev,
return nelems;
}

-static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
+int
+intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
int nelems, int dir)
{
void *addr;
@@ -2112,14 +2122,14 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;

- start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
+ start_addr = iova->pfn_lo << PAGE_SHIFT;
offset = 0;
for_each_sg(sglist, sg, nelems, i) {
addr = SG_ENT_VIRT_ADDRESS(sg);
addr = (void *)virt_to_phys(addr);
size = aligned_size((u64)addr, sg->length);
ret = domain_page_mapping(domain, start_addr + offset,
- ((u64)addr) & PAGE_MASK_4K,
+ ((u64)addr) & PAGE_MASK,
size, prot);
if (ret) {
/* clear the page */
@@ -2133,14 +2143,14 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
return 0;
}
sg->dma_address = start_addr + offset +
- ((u64)addr & (~PAGE_MASK_4K));
+ ((u64)addr & (~PAGE_MASK));
sg->dma_length = sg->length;
offset += size;
}

/* it's a non-present to present mapping */
if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
- start_addr, offset >> PAGE_SHIFT_4K, 1))
+ start_addr, offset >> VTD_PAGE_SHIFT, 1))
iommu_flush_write_buffer(domain->iommu);
return nelems;
}
@@ -2180,7 +2190,6 @@ static inline int iommu_devinfo_cache_init(void)
sizeof(struct device_domain_info),
0,
SLAB_HWCACHE_ALIGN,
-
NULL);
if (!iommu_devinfo_cache) {
printk(KERN_ERR "Couldn't create devinfo cache\n");
@@ -2198,7 +2207,6 @@ static inline int iommu_iova_cache_init(void)
sizeof(struct iova),
0,
SLAB_HWCACHE_ALIGN,
-
NULL);
if (!iommu_iova_cache) {
printk(KERN_ERR "Couldn't create iova cache\n");
@@ -2327,7 +2335,7 @@ void intel_iommu_domain_exit(struct dmar_domain *domain)
return;

end = DOMAIN_MAX_ADDR(domain->gaw);
- end = end & (~PAGE_MASK_4K);
+ end = end & (~VTD_PAGE_MASK);

/* clear ptes */
dma_pte_clear_range(domain, 0, end);
@@ -2423,6 +2431,6 @@ u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
if (pte)
pfn = dma_pte_addr(*pte);

- return pfn >> PAGE_SHIFT_4K;
+ return pfn >> VTD_PAGE_SHIFT;
}
EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index b7c2378..5c0ad61 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -30,8 +30,22 @@ int pci_pci_problems;
EXPORT_SYMBOL(pci_pci_problems);
int pcie_mch_quirk;
EXPORT_SYMBOL(pcie_mch_quirk);
+int forbid_dac __read_mostly;
+EXPORT_SYMBOL(forbid_dac);

#ifdef CONFIG_PCI_QUIRKS
+
+/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
+static __devinit void via_no_dac(struct pci_dev *dev)
+{
+ if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
+ dev_info(&dev->dev,
+ "VIA PCI bridge detected. Disabling DAC.\n");
+ forbid_dac = 1;
+ }
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
+
/* The Mellanox Tavor device gives false positive parity errors
* Mark this device with a broken_parity_status, to allow
* PCI scanning code to "skip" this now blacklisted device.
diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index 546ad31..fbccb97 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h
@@ -7,9 +7,13 @@ extern struct dma_mapping_ops nommu_dma_ops;
extern int force_iommu, no_iommu;
extern int iommu_detected;
extern int dmar_disabled;
+extern int forbid_dac;

extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len);

+/* 10 seconds */
+#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
+
#ifdef CONFIG_GART_IOMMU
extern int gart_iommu_aperture;
extern int gart_iommu_aperture_allowed;
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index bff5c65..1b49810 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -2,15 +2,14 @@
#define _DMA_REMAPPING_H

/*
- * We need a fixed PAGE_SIZE of 4K irrespective of
- * arch PAGE_SIZE for IOMMU page tables.
+ * VT-d hardware uses 4K page size regardless host page size.
*/
-#define PAGE_SHIFT_4K (12)
-#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
-#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
-#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
+#define VTD_PAGE_SHIFT (12)
+#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT)
+#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
+#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)

-#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
+#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)

@@ -25,7 +24,7 @@ struct root_entry {
u64 val;
u64 rsvd1;
};
-#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
+#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
static inline bool root_present(struct root_entry *root)
{
return (root->val & 1);
@@ -36,7 +35,7 @@ static inline void set_root_present(struct root_entry *root)
}
static inline void set_root_value(struct root_entry *root, unsigned long value)
{
- root->val |= value & PAGE_MASK_4K;
+ root->val |= value & VTD_PAGE_MASK;
}

struct context_entry;
@@ -45,7 +44,7 @@ get_context_addr_from_root(struct root_entry *root)
{
return (struct context_entry *)
(root_present(root)?phys_to_virt(
- root->val & PAGE_MASK_4K):
+ root->val & VTD_PAGE_MASK) :
NULL);
}

@@ -67,7 +66,7 @@ struct context_entry {
#define context_present(c) ((c).lo & 1)
#define context_fault_disable(c) (((c).lo >> 1) & 1)
#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
+#define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
#define context_address_width(c) ((c).hi & 7)
#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))

@@ -81,7 +80,7 @@ struct context_entry {
} while (0)
#define CONTEXT_TT_MULTI_LEVEL 0
#define context_set_address_root(c, val) \
- do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
+ do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
#define context_set_domain_id(c, val) \
do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
@@ -107,9 +106,9 @@ struct dma_pte {
#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
#define dma_set_pte_prot(p, prot) \
do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
#define dma_set_pte_addr(p, addr) do {\
- (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+ (p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
#define dma_pte_present(p) (((p).val & 3) != 0)

struct intel_iommu;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e7b196b..0683472 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -27,8 +27,9 @@
#include <linux/sysdev.h>
#include <linux/iova.h>
#include <linux/io.h>
-#include <asm/cacheflush.h>
#include <linux/dma_remapping.h>
+#include <asm/iommu.h>
+#include <asm/cacheflush.h>

/*
* Intel IOMMU register specification per version 1.0 public spec.
@@ -200,22 +201,21 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define dma_frcd_type(d) ((d >> 30) & 1)
#define dma_frcd_fault_reason(c) (c & 0xff)
#define dma_frcd_source_id(c) (c & 0xffff)
-#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
-
-#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
-
-#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
-{\
- cycles_t start_time = get_cycles();\
- while (1) {\
- sts = op (iommu->reg + offset);\
- if (cond)\
- break;\
+/* low 64 bit */
+#define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT))
+
+#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
+do { \
+ cycles_t start_time = get_cycles(); \
+ while (1) { \
+ sts = op(iommu->reg + offset); \
+ if (cond) \
+ break; \
if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
- panic("DMAR hardware is malfunctioning\n");\
- cpu_relax();\
- }\
-}
+ panic("DMAR hardware is malfunctioning\n"); \
+ cpu_relax(); \
+ } \
+} while (0)

#define QI_LENGTH 256 /* queue length */

@@ -316,4 +316,11 @@ struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
int intel_iommu_found(void);
u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);

+extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t);
+extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t);
+extern dma_addr_t intel_map_single(struct device *, unsigned long, size_t, int);
+extern void intel_unmap_single(struct device *, dma_addr_t, size_t, int);
+extern int intel_map_sg(struct device *, struct scatterlist *, int, int);
+extern void intel_unmap_sg(struct device *, struct scatterlist *, int, int);
+
#endif