2012-06-16 20:36:41

by Chris Metcalf

[permalink] [raw]
Subject: [PATCH 1/3] tilegx pci: support I/O to arbitrarily-cached pages

The tilegx PCI root complex support (currently only in linux-next)
is limited to pages that are homed on cached in the default manner,
i.e. "hash-for-home". This change supports delivery of I/O data to
pages that are cached in other ways (locally on a particular core,
uncached, user-managed incoherent, etc.).

A large part of the change is supporting flushing pages from cache
on particular homes so that we can transition the data that we are
delivering to or from the device appropriately. The new homecache_finv*
routines handle this.

Some changes to page_table_range_init() were also required to make
the fixmap code work correctly on tilegx; it hadn't been used there
before.

We also remove some stub mark_caches_evicted_*() routines that
were just no-ops anyway.

Signed-off-by: Chris Metcalf <[email protected]>
---
arch/tile/include/asm/cache.h | 12 ++-
arch/tile/include/asm/fixmap.h | 14 ++-
arch/tile/include/asm/homecache.h | 19 ++--
arch/tile/include/asm/page.h | 7 +-
arch/tile/kernel/pci-dma.c | 182 +++++++++++++++++++++++++++++--------
arch/tile/mm/homecache.c | 156 +++++++++++++++----------------
arch/tile/mm/init.c | 59 +++++-------
7 files changed, 278 insertions(+), 171 deletions(-)

diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h
index 392e533..a9a5299 100644
--- a/arch/tile/include/asm/cache.h
+++ b/arch/tile/include/asm/cache.h
@@ -27,11 +27,17 @@
#define L2_CACHE_ALIGN(x) (((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES)

/*
- * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN.
+ * TILEPro I/O is not always coherent (networking typically uses coherent
+ * I/O, but PCI traffic does not) and setting ARCH_DMA_MINALIGN to the
+ * L2 cacheline size helps ensure that kernel heap allocations are aligned.
+ * TILE-Gx I/O is always coherent when used on hash-for-home pages.
+ *
+ * However, it's possible at runtime to request not to use hash-for-home
+ * for the kernel heap, in which case the kernel will use flush-and-inval
+ * to manage coherence. As a result, we use L2_CACHE_BYTES for the
+ * DMA minimum alignment to avoid false sharing in the kernel heap.
*/
-#ifndef __tilegx__
#define ARCH_DMA_MINALIGN L2_CACHE_BYTES
-#endif

/* use the cache line size for the L2, which is where it counts */
#define SMP_CACHE_BYTES_SHIFT L2_CACHE_SHIFT
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h
index c66f793..e16dbf9 100644
--- a/arch/tile/include/asm/fixmap.h
+++ b/arch/tile/include/asm/fixmap.h
@@ -45,15 +45,23 @@
*
* TLB entries of such buffers will not be flushed across
* task switches.
- *
- * We don't bother with a FIX_HOLE since above the fixmaps
- * is unmapped memory in any case.
*/
enum fixed_addresses {
+#ifdef __tilegx__
+ /*
+ * TILEPro has unmapped memory above so the hole isn't needed,
+ * and in any case the hole pushes us over a single 16MB pmd.
+ */
+ FIX_HOLE,
+#endif
#ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
#endif
+#ifdef __tilegx__ /* see homecache.c */
+ FIX_HOMECACHE_BEGIN,
+ FIX_HOMECACHE_END = FIX_HOMECACHE_BEGIN+(NR_CPUS)-1,
+#endif
__end_of_permanent_fixed_addresses,

/*
diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h
index a824386..7b77713 100644
--- a/arch/tile/include/asm/homecache.h
+++ b/arch/tile/include/asm/homecache.h
@@ -79,10 +79,17 @@ extern void homecache_change_page_home(struct page *, int order, int home);
/*
* Flush a page out of whatever cache(s) it is in.
* This is more than just finv, since it properly handles waiting
- * for the data to reach memory on tilepro, but it can be quite
- * heavyweight, particularly on hash-for-home memory.
+ * for the data to reach memory, but it can be quite
+ * heavyweight, particularly on incoherent or immutable memory.
*/
-extern void homecache_flush_cache(struct page *, int order);
+extern void homecache_finv_page(struct page *);
+
+/*
+ * Flush a page out of the specified home cache.
+ * Note that the specified home need not be the actual home of the page,
+ * as for example might be the case when coordinating with I/O devices.
+ */
+extern void homecache_finv_map_page(struct page *, int home);

/*
* Allocate a page with the given GFP flags, home, and optionally
@@ -104,10 +111,10 @@ extern struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
* routines use homecache_change_page_home() to reset the home
* back to the default before returning the page to the allocator.
*/
+void __homecache_free_pages(struct page *, unsigned int order);
void homecache_free_pages(unsigned long addr, unsigned int order);
-#define homecache_free_page(page) \
- homecache_free_pages((page), 0)
-
+#define __homecache_free_page(page) __homecache_free_pages((page), 0)
+#define homecache_free_page(page) homecache_free_pages((page), 0)


/*
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index 9d9131e..dd033a4 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -174,7 +174,9 @@ static inline __attribute_const__ int get_order(unsigned long size)
#define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */
#define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */
#define PAGE_OFFSET MEM_HIGH_START
-#define _VMALLOC_START _AC(0xfffffff500000000, UL) /* 4 GB */
+#define FIXADDR_BASE _AC(0xfffffff400000000, UL) /* 4 GB */
+#define FIXADDR_TOP _AC(0xfffffff500000000, UL) /* 4 GB */
+#define _VMALLOC_START FIXADDR_TOP
#define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */
#define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */
#define MEM_SV_INTRPT MEM_SV_START
@@ -185,9 +187,6 @@ static inline __attribute_const__ int get_order(unsigned long size)
/* Highest DTLB address we will use */
#define KERNEL_HIGH_VADDR MEM_SV_START

-/* Since we don't currently provide any fixmaps, we use an impossible VA. */
-#define FIXADDR_TOP MEM_HV_START
-
#else /* !__tilegx__ */

/*
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
index b3ed19f..9814d70 100644
--- a/arch/tile/kernel/pci-dma.c
+++ b/arch/tile/kernel/pci-dma.c
@@ -22,9 +22,15 @@
/* Generic DMA mapping functions: */

/*
- * Allocate what Linux calls "coherent" memory, which for us just
- * means uncached.
+ * Allocate what Linux calls "coherent" memory. On TILEPro this is
+ * uncached memory; on TILE-Gx it is hash-for-home memory.
*/
+#ifdef __tilepro__
+#define PAGE_HOME_DMA PAGE_HOME_UNCACHED
+#else
+#define PAGE_HOME_DMA PAGE_HOME_HASH
+#endif
+
void *dma_alloc_coherent(struct device *dev,
size_t size,
dma_addr_t *dma_handle,
@@ -48,13 +54,13 @@ void *dma_alloc_coherent(struct device *dev,
if (dma_mask <= DMA_BIT_MASK(32))
node = 0;

- pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_UNCACHED);
+ pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
if (pg == NULL)
return NULL;

addr = page_to_phys(pg);
if (addr + size > dma_mask) {
- homecache_free_pages(addr, order);
+ __homecache_free_pages(pg, order);
return NULL;
}

@@ -87,22 +93,110 @@ EXPORT_SYMBOL(dma_free_coherent);
* can count on nothing having been touched.
*/

-/* Flush a PA range from cache page by page. */
-static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size)
+/* Set up a single page for DMA access. */
+static void __dma_prep_page(struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction direction)
{
- struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
- size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1));
+ /*
+ * Flush the page from cache if necessary.
+ * On tilegx, data is delivered to hash-for-home L3; on tilepro,
+ * data is delivered direct to memory.
+ *
+ * NOTE: If we were just doing DMA_TO_DEVICE we could optimize
+ * this to be a "flush" not a "finv" and keep some of the
+ * state in cache across the DMA operation, but it doesn't seem
+ * worth creating the necessary flush_buffer_xxx() infrastructure.
+ */
+ int home = page_home(page);
+ switch (home) {
+ case PAGE_HOME_HASH:
+#ifdef __tilegx__
+ return;
+#endif
+ break;
+ case PAGE_HOME_UNCACHED:
+#ifdef __tilepro__
+ return;
+#endif
+ break;
+ case PAGE_HOME_IMMUTABLE:
+ /* Should be going to the device only. */
+ BUG_ON(direction == DMA_FROM_DEVICE ||
+ direction == DMA_BIDIRECTIONAL);
+ return;
+ case PAGE_HOME_INCOHERENT:
+ /* Incoherent anyway, so no need to work hard here. */
+ return;
+ default:
+ BUG_ON(home < 0 || home >= NR_CPUS);
+ break;
+ }
+ homecache_finv_page(page);
+
+#ifdef DEBUG_ALIGNMENT
+ /* Warn if the region isn't cacheline aligned. */
+ if (offset & (L2_CACHE_BYTES - 1) || (size & (L2_CACHE_BYTES - 1)))
+ pr_warn("Unaligned DMA to non-hfh memory: PA %#llx/%#lx\n",
+ PFN_PHYS(page_to_pfn(page)) + offset, size);
+#endif
+}

- while ((ssize_t)size > 0) {
- /* Flush the page. */
- homecache_flush_cache(page++, 0);
+/* Make the page ready to be read by the core. */
+static void __dma_complete_page(struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction direction)
+{
+#ifdef __tilegx__
+ switch (page_home(page)) {
+ case PAGE_HOME_HASH:
+ /* I/O device delivered data the way the cpu wanted it. */
+ break;
+ case PAGE_HOME_INCOHERENT:
+ /* Incoherent anyway, so no need to work hard here. */
+ break;
+ case PAGE_HOME_IMMUTABLE:
+ /* Extra read-only copies are not a problem. */
+ break;
+ default:
+ /* Flush the bogus hash-for-home I/O entries to memory. */
+ homecache_finv_map_page(page, PAGE_HOME_HASH);
+ break;
+ }
+#endif
+}

- /* Figure out if we need to continue on the next page. */
- size -= bytesleft;
- bytesleft = PAGE_SIZE;
+static void __dma_prep_pa_range(dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
+ unsigned long offset = dma_addr & (PAGE_SIZE - 1);
+ size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
+
+ while (size != 0) {
+ __dma_prep_page(page, offset, bytes, direction);
+ size -= bytes;
+ ++page;
+ offset = 0;
+ bytes = min((size_t)PAGE_SIZE, size);
+ }
+}
+
+static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
+ unsigned long offset = dma_addr & (PAGE_SIZE - 1);
+ size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
+
+ while (size != 0) {
+ __dma_complete_page(page, offset, bytes, direction);
+ size -= bytes;
+ ++page;
+ offset = 0;
+ bytes = min((size_t)PAGE_SIZE, size);
}
}

+
/*
* dma_map_single can be passed any memory address, and there appear
* to be no alignment constraints.
@@ -111,28 +205,29 @@ static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size)
* line with some other data that has been touched in the meantime.
*/
dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
- enum dma_data_direction direction)
+ enum dma_data_direction direction)
{
dma_addr_t dma_addr = __pa(ptr);

BUG_ON(!valid_dma_direction(direction));
WARN_ON(size == 0);

- __dma_map_pa_range(dma_addr, size);
+ __dma_prep_pa_range(dma_addr, size, direction);

return dma_addr;
}
EXPORT_SYMBOL(dma_map_single);

void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
- enum dma_data_direction direction)
+ enum dma_data_direction direction)
{
BUG_ON(!valid_dma_direction(direction));
+ __dma_complete_pa_range(dma_addr, size, direction);
}
EXPORT_SYMBOL(dma_unmap_single);

int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
- enum dma_data_direction direction)
+ enum dma_data_direction direction)
{
struct scatterlist *sg;
int i;
@@ -143,17 +238,25 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,

for_each_sg(sglist, sg, nents, i) {
sg->dma_address = sg_phys(sg);
- __dma_map_pa_range(sg->dma_address, sg->length);
+ __dma_prep_pa_range(sg->dma_address, sg->length, direction);
}

return nents;
}
EXPORT_SYMBOL(dma_map_sg);

-void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
- enum dma_data_direction direction)
+void dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
+ enum dma_data_direction direction)
{
+ struct scatterlist *sg;
+ int i;
+
BUG_ON(!valid_dma_direction(direction));
+ for_each_sg(sglist, sg, nents, i) {
+ sg->dma_address = sg_phys(sg);
+ __dma_complete_pa_range(sg->dma_address, sg->length,
+ direction);
+ }
}
EXPORT_SYMBOL(dma_unmap_sg);

@@ -164,16 +267,17 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page,
BUG_ON(!valid_dma_direction(direction));

BUG_ON(offset + size > PAGE_SIZE);
- homecache_flush_cache(page, 0);
-
+ __dma_prep_page(page, offset, size, direction);
return page_to_pa(page) + offset;
}
EXPORT_SYMBOL(dma_map_page);

void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
- enum dma_data_direction direction)
+ enum dma_data_direction direction)
{
BUG_ON(!valid_dma_direction(direction));
+ __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
+ dma_address & PAGE_OFFSET, size, direction);
}
EXPORT_SYMBOL(dma_unmap_page);

@@ -181,33 +285,33 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction)
{
BUG_ON(!valid_dma_direction(direction));
+ __dma_complete_pa_range(dma_handle, size, direction);
}
EXPORT_SYMBOL(dma_sync_single_for_cpu);

void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction)
{
- unsigned long start = PFN_DOWN(dma_handle);
- unsigned long end = PFN_DOWN(dma_handle + size - 1);
- unsigned long i;
-
- BUG_ON(!valid_dma_direction(direction));
- for (i = start; i <= end; ++i)
- homecache_flush_cache(pfn_to_page(i), 0);
+ __dma_prep_pa_range(dma_handle, size, direction);
}
EXPORT_SYMBOL(dma_sync_single_for_device);

-void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
- enum dma_data_direction direction)
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction)
{
+ struct scatterlist *sg;
+ int i;
+
BUG_ON(!valid_dma_direction(direction));
- WARN_ON(nelems == 0 || sg[0].length == 0);
+ WARN_ON(nelems == 0 || sglist->length == 0);
+
+ for_each_sg(sglist, sg, nelems, i) {
+ dma_sync_single_for_cpu(dev, sg->dma_address,
+ sg_dma_len(sg), direction);
+ }
}
EXPORT_SYMBOL(dma_sync_sg_for_cpu);

-/*
- * Flush and invalidate cache for scatterlist.
- */
void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction)
{
@@ -242,8 +346,8 @@ void dma_sync_single_range_for_device(struct device *dev,
EXPORT_SYMBOL(dma_sync_single_range_for_device);

/*
- * dma_alloc_noncoherent() returns non-cacheable memory, so there's no
- * need to do any flushing here.
+ * dma_alloc_noncoherent() is #defined to return coherent memory,
+ * so there's no need to do any flushing here.
*/
void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction direction)
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index dbcbdf7..5f7868d 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -64,10 +64,6 @@ early_param("noallocl2", set_noallocl2);

#endif

-/* Provide no-op versions of these routines to keep flush_remote() cleaner. */
-#define mark_caches_evicted_start() 0
-#define mark_caches_evicted_finish(mask, timestamp) do {} while (0)
-

/*
* Update the irq_stat for cpus that we are going to interrupt
@@ -107,7 +103,6 @@ static void hv_flush_update(const struct cpumask *cache_cpumask,
* there's never any good reason for hv_flush_remote() to fail.
* - Accepts a 32-bit PFN rather than a 64-bit PA, which generally
* is the type that Linux wants to pass around anyway.
- * - Centralizes the mark_caches_evicted() handling.
* - Canonicalizes that lengths of zero make cpumasks NULL.
* - Handles deferring TLB flushes for dataplane tiles.
* - Tracks remote interrupts in the per-cpu irq_cpustat_t.
@@ -126,7 +121,6 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
HV_Remote_ASID *asids, int asidcount)
{
int rc;
- int timestamp = 0; /* happy compiler */
struct cpumask cache_cpumask_copy, tlb_cpumask_copy;
struct cpumask *cache_cpumask, *tlb_cpumask;
HV_PhysAddr cache_pa;
@@ -157,15 +151,11 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length,
asids, asidcount);
cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT;
- if (cache_control & HV_FLUSH_EVICT_L2)
- timestamp = mark_caches_evicted_start();
rc = hv_flush_remote(cache_pa, cache_control,
cpumask_bits(cache_cpumask),
tlb_va, tlb_length, tlb_pgsize,
cpumask_bits(tlb_cpumask),
asids, asidcount);
- if (cache_control & HV_FLUSH_EVICT_L2)
- mark_caches_evicted_finish(cache_cpumask, timestamp);
if (rc == 0)
return;
cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy);
@@ -180,85 +170,86 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
panic("Unsafe to continue.");
}

-void flush_remote_page(struct page *page, int order)
+static void homecache_finv_page_va(void* va, int home)
{
- int i, pages = (1 << order);
- for (i = 0; i < pages; ++i, ++page) {
- void *p = kmap_atomic(page);
- int hfh = 0;
- int home = page_home(page);
-#if CHIP_HAS_CBOX_HOME_MAP()
- if (home == PAGE_HOME_HASH)
- hfh = 1;
- else
-#endif
- BUG_ON(home < 0 || home >= NR_CPUS);
- finv_buffer_remote(p, PAGE_SIZE, hfh);
- kunmap_atomic(p);
+ if (home == smp_processor_id()) {
+ finv_buffer_local(va, PAGE_SIZE);
+ } else if (home == PAGE_HOME_HASH) {
+ finv_buffer_remote(va, PAGE_SIZE, 1);
+ } else {
+ BUG_ON(home < 0 || home >= NR_CPUS);
+ finv_buffer_remote(va, PAGE_SIZE, 0);
}
}

-void homecache_evict(const struct cpumask *mask)
+void homecache_finv_map_page(struct page *page, int home)
{
- flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0);
+ unsigned long flags;
+ unsigned long va;
+ pte_t *ptep;
+ pte_t pte;
+
+ if (home == PAGE_HOME_UNCACHED)
+ return;
+ local_irq_save(flags);
+#ifdef CONFIG_HIGHMEM
+ va = __fix_to_virt(FIX_KMAP_BEGIN + kmap_atomic_idx_push() +
+ (KM_TYPE_NR * smp_processor_id()));
+#else
+ va = __fix_to_virt(FIX_HOMECACHE_BEGIN + smp_processor_id());
+#endif
+ ptep = virt_to_pte(NULL, (unsigned long)va);
+ pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL);
+ __set_pte(ptep, pte_set_home(pte, home));
+ homecache_finv_page_va((void *)va, home);
+ __pte_clear(ptep);
+ hv_flush_page(va, PAGE_SIZE);
+#ifdef CONFIG_HIGHMEM
+ kmap_atomic_idx_pop();
+#endif
+ local_irq_restore(flags);
}

-/*
- * Return a mask of the cpus whose caches currently own these pages.
- * The return value is whether the pages are all coherently cached
- * (i.e. none are immutable, incoherent, or uncached).
- */
-static int homecache_mask(struct page *page, int pages,
- struct cpumask *home_mask)
+static void homecache_finv_page_home(struct page *page, int home)
{
- int i;
- int cached_coherently = 1;
- cpumask_clear(home_mask);
- for (i = 0; i < pages; ++i) {
- int home = page_home(&page[i]);
- if (home == PAGE_HOME_IMMUTABLE ||
- home == PAGE_HOME_INCOHERENT) {
- cpumask_copy(home_mask, cpu_possible_mask);
- return 0;
- }
-#if CHIP_HAS_CBOX_HOME_MAP()
- if (home == PAGE_HOME_HASH) {
- cpumask_or(home_mask, home_mask, &hash_for_home_map);
- continue;
- }
-#endif
- if (home == PAGE_HOME_UNCACHED) {
- cached_coherently = 0;
- continue;
- }
- BUG_ON(home < 0 || home >= NR_CPUS);
- cpumask_set_cpu(home, home_mask);
- }
- return cached_coherently;
+ if (!PageHighMem(page) && home == page_home(page))
+ homecache_finv_page_va(page_address(page), home);
+ else
+ homecache_finv_map_page(page, home);
}

-/*
- * Return the passed length, or zero if it's long enough that we
- * believe we should evict the whole L2 cache.
- */
-static unsigned long cache_flush_length(unsigned long length)
+static inline bool incoherent_home(int home)
{
- return (length >= CHIP_L2_CACHE_SIZE()) ? HV_FLUSH_EVICT_L2 : length;
+ return home == PAGE_HOME_IMMUTABLE || home == PAGE_HOME_INCOHERENT;
}

-/* Flush a page out of whatever cache(s) it is in. */
-void homecache_flush_cache(struct page *page, int order)
+static void homecache_finv_page_internal(struct page *page, int force_map)
{
- int pages = 1 << order;
- int length = cache_flush_length(pages * PAGE_SIZE);
- unsigned long pfn = page_to_pfn(page);
- struct cpumask home_mask;
-
- homecache_mask(page, pages, &home_mask);
- flush_remote(pfn, length, &home_mask, 0, 0, 0, NULL, NULL, 0);
- sim_validate_lines_evicted(PFN_PHYS(pfn), pages * PAGE_SIZE);
+ int home = page_home(page);
+ if (home == PAGE_HOME_UNCACHED)
+ return;
+ if (incoherent_home(home)) {
+ int cpu;
+ for_each_cpu(cpu, &cpu_cacheable_map)
+ homecache_finv_map_page(page, cpu);
+ } else if (force_map) {
+ /* Force if, e.g., the normal mapping is migrating. */
+ homecache_finv_map_page(page, home);
+ } else {
+ homecache_finv_page_home(page, home);
+ }
+ sim_validate_lines_evicted(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE);
}

+void homecache_finv_page(struct page *page)
+{
+ homecache_finv_page_internal(page, 0);
+}
+
+void homecache_evict(const struct cpumask *mask)
+{
+ flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0);
+}

/* Report the home corresponding to a given PTE. */
static int pte_to_home(pte_t pte)
@@ -441,15 +432,8 @@ struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
return page;
}

-void homecache_free_pages(unsigned long addr, unsigned int order)
+void __homecache_free_pages(struct page *page, unsigned int order)
{
- struct page *page;
-
- if (addr == 0)
- return;
-
- VM_BUG_ON(!virt_addr_valid((void *)addr));
- page = virt_to_page((void *)addr);
if (put_page_testzero(page)) {
homecache_change_page_home(page, order, initial_page_home());
if (order == 0) {
@@ -460,3 +444,13 @@ void homecache_free_pages(unsigned long addr, unsigned int order)
}
}
}
+EXPORT_SYMBOL(__homecache_free_pages);
+
+void homecache_free_pages(unsigned long addr, unsigned int order)
+{
+ if (addr != 0) {
+ VM_BUG_ON(!virt_addr_valid((void *)addr));
+ __homecache_free_pages(virt_to_page((void *)addr), order);
+ }
+}
+EXPORT_SYMBOL(homecache_free_pages);
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 630dd2c..a2417a0 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -150,7 +150,21 @@ void __init shatter_pmd(pmd_t *pmd)
assign_pte(pmd, pte);
}

-#ifdef CONFIG_HIGHMEM
+#ifdef __tilegx__
+static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
+{
+ pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va);
+ if (pud_none(*pud))
+ assign_pmd(pud, alloc_pmd());
+ return pmd_offset(pud, va);
+}
+#else
+static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
+{
+ return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va);
+}
+#endif
+
/*
* This function initializes a certain range of kernel virtual memory
* with new bootmem page tables, everywhere page tables are missing in
@@ -163,24 +177,17 @@ void __init shatter_pmd(pmd_t *pmd)
* checking the pgd every time.
*/
static void __init page_table_range_init(unsigned long start,
- unsigned long end, pgd_t *pgd_base)
+ unsigned long end, pgd_t *pgd)
{
- pgd_t *pgd;
- int pgd_idx;
unsigned long vaddr;
-
- vaddr = start;
- pgd_idx = pgd_index(vaddr);
- pgd = pgd_base + pgd_idx;
-
- for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
- pmd_t *pmd = pmd_offset(pud_offset(pgd, vaddr), vaddr);
+ start = round_down(start, PMD_SIZE);
+ end = round_up(end, PMD_SIZE);
+ for (vaddr = start; vaddr < end; vaddr += PMD_SIZE) {
+ pmd_t *pmd = get_pmd(pgd, vaddr);
if (pmd_none(*pmd))
assign_pte(pmd, alloc_pte());
- vaddr += PMD_SIZE;
}
}
-#endif /* CONFIG_HIGHMEM */


#if CHIP_HAS_CBOX_HOME_MAP()
@@ -404,21 +411,6 @@ static inline pgprot_t ktext_set_nocache(pgprot_t prot)
return prot;
}

-#ifndef __tilegx__
-static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
-{
- return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va);
-}
-#else
-static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
-{
- pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va);
- if (pud_none(*pud))
- assign_pmd(pud, alloc_pmd());
- return pmd_offset(pud, va);
-}
-#endif
-
/* Temporary page table we use for staging. */
static pgd_t pgtables[PTRS_PER_PGD]
__attribute__((aligned(HV_PAGE_TABLE_ALIGN)));
@@ -779,9 +771,6 @@ static void __init set_non_bootmem_pages_init(void)
*/
void __init paging_init(void)
{
-#ifdef CONFIG_HIGHMEM
- unsigned long vaddr, end;
-#endif
#ifdef __tilegx__
pud_t *pud;
#endif
@@ -789,14 +778,14 @@ void __init paging_init(void)

kernel_physical_mapping_init(pgd_base);

-#ifdef CONFIG_HIGHMEM
/*
* Fixed mappings, only the page table structure has to be
* created - mappings will be set by set_fixmap():
*/
- vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
- end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
- page_table_range_init(vaddr, end, pgd_base);
+ page_table_range_init(fix_to_virt(__end_of_fixed_addresses - 1),
+ FIXADDR_TOP, pgd_base);
+
+#ifdef CONFIG_HIGHMEM
permanent_kmaps_init(pgd_base);
#endif

--
1.7.10.3


2012-06-16 20:37:13

by Chris Metcalf

[permalink] [raw]
Subject: [PATCH 2/3] arch/tile: enable ZONE_DMA for tilegx

This is required for PCI root complex legacy support and USB OHCI root
complex support. With this change tilegx now supports allocating memory
whose PA fits in 32 bits.

Signed-off-by: Chris Metcalf <[email protected]>
---
arch/tile/Kconfig | 3 +++
arch/tile/kernel/pci-dma.c | 15 +++++++++------
arch/tile/kernel/setup.c | 12 +++++++++++-
arch/tile/mm/init.c | 11 +++++------
4 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index a5302d3..0ad771f 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -212,6 +212,9 @@ config HIGHMEM

If unsure, say "true".

+config ZONE_DMA
+ def_bool y
+
# We do not currently support disabling NUMA.
config NUMA
bool # "NUMA Memory Allocation and Scheduler Support"
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
index 9814d70..edd856a 100644
--- a/arch/tile/kernel/pci-dma.c
+++ b/arch/tile/kernel/pci-dma.c
@@ -45,14 +45,17 @@ void *dma_alloc_coherent(struct device *dev,
gfp |= __GFP_ZERO;

/*
- * By forcing NUMA node 0 for 32-bit masks we ensure that the
- * high 32 bits of the resulting PA will be zero. If the mask
- * size is, e.g., 24, we may still not be able to guarantee a
- * suitable memory address, in which case we will return NULL.
- * But such devices are uncommon.
+ * If the mask specifies that the memory be in the first 4 GB, then
+ * we force the allocation to come from the DMA zone. We also
+ * force the node to 0 since that's the only node where the DMA
+ * zone isn't empty. If the mask size is smaller than 32 bits, we
+ * may still not be able to guarantee a suitable memory address, in
+ * which case we will return NULL. But such devices are uncommon.
*/
- if (dma_mask <= DMA_BIT_MASK(32))
+ if (dma_mask <= DMA_BIT_MASK(32)) {
+ gfp |= GFP_DMA;
node = 0;
+ }

pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
if (pg == NULL)
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 6d179df..fdde3b6 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -658,6 +658,8 @@ static void __init zone_sizes_init(void)
unsigned long zones_size[MAX_NR_ZONES] = { 0 };
int size = percpu_size();
int num_cpus = smp_height * smp_width;
+ const unsigned long dma_end = (1UL << (32 - PAGE_SHIFT));
+
int i;

for (i = 0; i < num_cpus; ++i)
@@ -729,6 +731,14 @@ static void __init zone_sizes_init(void)
zones_size[ZONE_NORMAL] = end - start;
#endif

+ if (start < dma_end) {
+ zones_size[ZONE_DMA] = min(zones_size[ZONE_NORMAL],
+ dma_end - start);
+ zones_size[ZONE_NORMAL] -= zones_size[ZONE_DMA];
+ } else {
+ zones_size[ZONE_DMA] = 0;
+ }
+
/* Take zone metadata from controller 0 if we're isolnode. */
if (node_isset(i, isolnodes))
NODE_DATA(i)->bdata = &bootmem_node_data[0];
@@ -738,7 +748,7 @@ static void __init zone_sizes_init(void)
PFN_UP(node_percpu[i]));

/* Track the type of memory on each node */
- if (zones_size[ZONE_NORMAL])
+ if (zones_size[ZONE_NORMAL] || zones_size[ZONE_DMA])
node_set_state(i, N_NORMAL_MEMORY);
#ifdef CONFIG_HIGHMEM
if (end != start)
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index a2417a0..ef29d6c 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -733,16 +733,15 @@ static void __init set_non_bootmem_pages_init(void)
for_each_zone(z) {
unsigned long start, end;
int nid = z->zone_pgdat->node_id;
+#ifdef CONFIG_HIGHMEM
int idx = zone_idx(z);
+#endif

start = z->zone_start_pfn;
- if (start == 0)
- continue; /* bootmem */
end = start + z->spanned_pages;
- if (idx == ZONE_NORMAL) {
- BUG_ON(start != node_start_pfn[nid]);
- start = node_free_pfn[nid];
- }
+ start = max(start, node_free_pfn[nid]);
+ start = max(start, max_low_pfn);
+
#ifdef CONFIG_HIGHMEM
if (idx == ZONE_HIGHMEM)
totalhigh_pages += z->spanned_pages;
--
1.7.10.3

2012-06-16 20:37:22

by Chris Metcalf

[permalink] [raw]
Subject: [PATCH 3/3] tile pci: enable IOMMU to support DMA for legacy devices

This change uses the TRIO IOMMU to map the PCI DMA space and physical
memory at different addresses. We also now use the dma_mapping_ops
to provide support for non-PCI DMA, PCIe DMA (64-bit) and legacy PCI
DMA (32-bit). We use the kernel's software I/O TLB framework
(i.e. bounce buffers) for the legacy 32-bit PCI device support since
there are a limited number of TLB entries in the IOMMU and it is
non-trivial to handle indexing, searching, matching, etc. For 32-bit
devices the performance impact of bounce buffers should not be a concern.

Signed-off-by: Chris Metcalf <[email protected]>
---
arch/tile/Kconfig | 18 ++
arch/tile/include/asm/Kbuild | 1 -
arch/tile/include/asm/device.h | 33 ++++
arch/tile/include/asm/dma-mapping.h | 146 +++++++++-----
arch/tile/include/asm/pci.h | 76 +++++++-
arch/tile/kernel/pci-dma.c | 369 ++++++++++++++++++++++++++++-------
arch/tile/kernel/pci_gx.c | 113 +++++------
arch/tile/kernel/setup.c | 35 ++--
8 files changed, 588 insertions(+), 203 deletions(-)
create mode 100644 arch/tile/include/asm/device.h

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 0ad771f..557e3a3 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -3,6 +3,8 @@

config TILE
def_bool y
+ select HAVE_DMA_ATTRS
+ select HAVE_DMA_API_DEBUG
select HAVE_KVM if !TILEGX
select GENERIC_FIND_FIRST_BIT
select USE_GENERIC_SMP_HELPERS
@@ -79,6 +81,9 @@ config ARCH_DMA_ADDR_T_64BIT
config NEED_DMA_MAP_STATE
def_bool y

+config ARCH_HAS_DMA_SET_COHERENT_MASK
+ bool
+
config LOCKDEP_SUPPORT
def_bool y

@@ -215,6 +220,19 @@ config HIGHMEM
config ZONE_DMA
def_bool y

+config IOMMU_HELPER
+ bool
+
+config NEED_SG_DMA_LENGTH
+ bool
+
+config SWIOTLB
+ bool
+ default TILEGX
+ select IOMMU_HELPER
+ select NEED_SG_DMA_LENGTH
+ select ARCH_HAS_DMA_SET_COHERENT_MASK
+
# We do not currently support disabling NUMA.
config NUMA
bool # "NUMA Memory Allocation and Scheduler Support"
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 143473e..fb7c65a 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -9,7 +9,6 @@ header-y += hardwall.h
generic-y += bug.h
generic-y += bugs.h
generic-y += cputime.h
-generic-y += device.h
generic-y += div64.h
generic-y += emergency-restart.h
generic-y += errno.h
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
new file mode 100644
index 0000000..5182705
--- /dev/null
+++ b/arch/tile/include/asm/device.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ * Arch specific extensions to struct device
+ */
+
+#ifndef _ASM_TILE_DEVICE_H
+#define _ASM_TILE_DEVICE_H
+
+struct dev_archdata {
+ /* DMA operations on that device */
+ struct dma_map_ops *dma_ops;
+
+ /* Offset of the DMA address from the PA. */
+ dma_addr_t dma_offset;
+
+ /* Highest DMA address that can be generated by this device. */
+ dma_addr_t max_direct_dma_addr;
+};
+
+struct pdev_archdata {
+};
+
+#endif /* _ASM_TILE_DEVICE_H */
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
index eaa06d1..4b6247d 100644
--- a/arch/tile/include/asm/dma-mapping.h
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -20,69 +20,80 @@
#include <linux/cache.h>
#include <linux/io.h>

-/*
- * Note that on x86 and powerpc, there is a "struct dma_mapping_ops"
- * that is used for all the DMA operations. For now, we don't have an
- * equivalent on tile, because we only have a single way of doing DMA.
- * (Tilera bug 7994 to use dma_mapping_ops.)
- */
+extern struct dma_map_ops *tile_dma_map_ops;
+extern struct dma_map_ops *gx_pci_dma_map_ops;
+extern struct dma_map_ops *gx_legacy_pci_dma_map_ops;
+
+static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+{
+ if (dev && dev->archdata.dma_ops)
+ return dev->archdata.dma_ops;
+ else
+ return tile_dma_map_ops;
+}
+
+static inline dma_addr_t get_dma_offset(struct device *dev)
+{
+ return dev->archdata.dma_offset;
+}
+
+static inline void set_dma_offset(struct device *dev, dma_addr_t off)
+{
+ dev->archdata.dma_offset = off;
+}

-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
-extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
- enum dma_data_direction);
-extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction);
-extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction);
-extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nhwentries, enum dma_data_direction);
-extern dma_addr_t dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction);
-extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
- size_t size, enum dma_data_direction);
-extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction);
-extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction);
-
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag);
-
-void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle);
-
-extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t,
- enum dma_data_direction);
-extern void dma_sync_single_for_device(struct device *, dma_addr_t,
- size_t, enum dma_data_direction);
-extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t,
- unsigned long offset, size_t,
- enum dma_data_direction);
-extern void dma_sync_single_range_for_device(struct device *, dma_addr_t,
- unsigned long offset, size_t,
- enum dma_data_direction);
-extern void dma_cache_sync(struct device *dev, void *vaddr, size_t,
- enum dma_data_direction);
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return paddr + get_dma_offset(dev);
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+ return daddr - get_dma_offset(dev);
+}
+
+static inline void dma_mark_clean(void *addr, size_t size) {}
+
+#include <asm-generic/dma-mapping-common.h>
+
+static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
+{
+ dev->archdata.dma_ops = ops;
+}
+
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+ if (!dev->dma_mask)
+ return 0;
+
+ return addr + size - 1 <= *dev->dma_mask;
+}

static inline int
dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
- return 0;
+ return get_dma_ops(dev)->mapping_error(dev, dma_addr);
}

static inline int
dma_supported(struct device *dev, u64 mask)
{
- return 1;
+ return get_dma_ops(dev)->dma_supported(dev, mask);
}

static inline int
dma_set_mask(struct device *dev, u64 mask)
{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ /* Handle legacy PCI devices with limited memory addressability. */
+ if ((dma_ops == gx_pci_dma_map_ops) && (mask <= DMA_BIT_MASK(32))) {
+ set_dma_ops(dev, gx_legacy_pci_dma_map_ops);
+ set_dma_offset(dev, 0);
+ if (mask > dev->archdata.max_direct_dma_addr)
+ mask = dev->archdata.max_direct_dma_addr;
+ }
+
if (!dev->dma_mask || !dma_supported(dev, mask))
return -EIO;

@@ -91,4 +102,43 @@ dma_set_mask(struct device *dev, u64 mask)
return 0;
}

+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+ void *cpu_addr;
+
+ cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs);
+
+ debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
+
+ return cpu_addr;
+}
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
+
+ dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
+}
+
+#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
+#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
+
+/*
+ * dma_alloc_noncoherent() is #defined to return coherent memory,
+ * so there's no need to do any flushing here.
+ */
+static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+ enum dma_data_direction direction)
+{
+}
+
#endif /* _ASM_TILE_DMA_MAPPING_H */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index 2c224c4..553b7ff 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -15,6 +15,7 @@
#ifndef _ASM_TILE_PCI_H
#define _ASM_TILE_PCI_H

+#include <linux/dma-mapping.h>
#include <linux/pci.h>
#include <linux/numa.h>
#include <asm-generic/pci_iomap.h>
@@ -53,6 +54,16 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}

#define TILE_NUM_PCIE 2

+/*
+ * The hypervisor maps the entirety of CPA-space as bus addresses, so
+ * bus addresses are physical addresses. The networking and block
+ * device layers use this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS 1
+
+/* generic pci stuff */
+#include <asm-generic/pci.h>
+
#else

#include <asm/page.h>
@@ -85,7 +96,47 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
/*
* Each Mem-Map interrupt region occupies 4KB.
*/
-#define MEM_MAP_INTR_REGION_SIZE (1<< TRIO_MAP_MEM_LIM__ADDR_SHIFT)
+#define MEM_MAP_INTR_REGION_SIZE (1 << TRIO_MAP_MEM_LIM__ADDR_SHIFT)
+
+/*
+ * Allocate the PCI BAR window right below 4GB.
+ */
+#define TILE_PCI_BAR_WINDOW_TOP (1ULL << 32)
+
+/*
+ * Allocate 1GB for the PCI BAR window.
+ */
+#define TILE_PCI_BAR_WINDOW_SIZE (1 << 30)
+
+/*
+ * This is the highest bus address targeting the host memory that
+ * can be generated by legacy PCI devices with 32-bit or less
+ * DMA capability, dictated by the BAR window size and location.
+ */
+#define TILE_PCI_MAX_DIRECT_DMA_ADDRESS \
+ (TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE - 1)
+
+/*
+ * We shift the PCI bus range for all the physical memory up by the whole PA
+ * range. The corresponding CPA of an incoming PCI request will be the PCI
+ * address minus TILE_PCI_MEM_MAP_BASE_OFFSET. This also implies
+ * that the 64-bit capable devices will be given DMA addresses as
+ * the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit
+ * devices, we create a separate map region that handles the low
+ * 4GB.
+ */
+#define TILE_PCI_MEM_MAP_BASE_OFFSET (1ULL << CHIP_PA_WIDTH())
+
+/*
+ * End of the PCI memory resource.
+ */
+#define TILE_PCI_MEM_END \
+ ((1ULL << CHIP_PA_WIDTH()) + TILE_PCI_BAR_WINDOW_TOP)
+
+/*
+ * Start of the PCI memory resource.
+ */
+#define TILE_PCI_MEM_START (TILE_PCI_MEM_END - TILE_PCI_BAR_WINDOW_SIZE)

/*
* Structure of a PCI controller (host bridge) on Gx.
@@ -108,6 +159,8 @@ struct pci_controller {
int index; /* PCI domain number */
struct pci_bus *root_bus;

+ uint64_t mem_offset; /* cpu->bus memory mapping offset. */
+
int last_busno;

struct pci_ops *ops;
@@ -126,14 +179,22 @@ extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];

extern void pci_iounmap(struct pci_dev *dev, void __iomem *);

-#endif /* __tilegx__ */
+extern void
+pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+ struct resource *res);
+
+extern void
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+ struct pci_bus_region *region);

/*
- * The hypervisor maps the entirety of CPA-space as bus addresses, so
- * bus addresses are physical addresses. The networking and block
- * device layers use this boolean for bounce buffer decisions.
+ * The PCI address space does not equal the physical memory address
+ * space (we have an IOMMU). The IDE and SCSI device layers use this
+ * boolean for bounce buffer decisions.
*/
-#define PCI_DMA_BUS_IS_PHYS 1
+#define PCI_DMA_BUS_IS_PHYS 0
+
+#endif /* __tilegx__ */

int __init tile_pci_init(void);
int __init pcibios_init(void);
@@ -169,7 +230,4 @@ static inline int pcibios_assign_all_busses(void)
/* implement the pci_ DMA API in terms of the generic device dma_ one */
#include <asm-generic/pci-dma-compat.h>

-/* generic pci stuff */
-#include <asm-generic/pci.h>
-
#endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
index edd856a..b9fe80e 100644
--- a/arch/tile/kernel/pci-dma.c
+++ b/arch/tile/kernel/pci-dma.c
@@ -14,6 +14,7 @@

#include <linux/mm.h>
#include <linux/dma-mapping.h>
+#include <linux/swiotlb.h>
#include <linux/vmalloc.h>
#include <linux/export.h>
#include <asm/tlbflush.h>
@@ -31,10 +32,9 @@
#define PAGE_HOME_DMA PAGE_HOME_HASH
#endif

-void *dma_alloc_coherent(struct device *dev,
- size_t size,
- dma_addr_t *dma_handle,
- gfp_t gfp)
+static void *tile_dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ struct dma_attrs *attrs)
{
u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32);
int node = dev_to_node(dev);
@@ -68,19 +68,19 @@ void *dma_alloc_coherent(struct device *dev,
}

*dma_handle = addr;
+
return page_address(pg);
}
-EXPORT_SYMBOL(dma_alloc_coherent);

/*
- * Free memory that was allocated with dma_alloc_coherent.
+ * Free memory that was allocated with tile_dma_alloc_coherent.
*/
-void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle)
+static void tile_dma_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
{
homecache_free_pages((unsigned long)vaddr, get_order(size));
}
-EXPORT_SYMBOL(dma_free_coherent);

/*
* The map routines "map" the specified address range for DMA
@@ -199,38 +199,182 @@ static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size,
}
}

+static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct scatterlist *sg;
+ int i;

-/*
- * dma_map_single can be passed any memory address, and there appear
- * to be no alignment constraints.
- *
- * There is a chance that the start of the buffer will share a cache
- * line with some other data that has been touched in the meantime.
- */
-dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
- enum dma_data_direction direction)
+ BUG_ON(!valid_dma_direction(direction));
+
+ WARN_ON(nents == 0 || sglist->length == 0);
+
+ for_each_sg(sglist, sg, nents, i) {
+ sg->dma_address = sg_phys(sg);
+ __dma_prep_pa_range(sg->dma_address, sg->length, direction);
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
+ }
+
+ return nents;
+}
+
+static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct scatterlist *sg;
+ int i;
+
+ BUG_ON(!valid_dma_direction(direction));
+ for_each_sg(sglist, sg, nents, i) {
+ sg->dma_address = sg_phys(sg);
+ __dma_complete_pa_range(sg->dma_address, sg->length,
+ direction);
+ }
+}
+
+static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
- dma_addr_t dma_addr = __pa(ptr);
+ BUG_ON(!valid_dma_direction(direction));
+
+ BUG_ON(offset + size > PAGE_SIZE);
+ __dma_prep_page(page, offset, size, direction);
+
+ return page_to_pa(page) + offset;
+}
+
+static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
+ size_t size, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ BUG_ON(!valid_dma_direction(direction));
+
+ __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
+ dma_address & PAGE_OFFSET, size, direction);
+}

+static void tile_dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size,
+ enum dma_data_direction direction)
+{
BUG_ON(!valid_dma_direction(direction));
- WARN_ON(size == 0);

- __dma_prep_pa_range(dma_addr, size, direction);
+ __dma_complete_pa_range(dma_handle, size, direction);
+}

- return dma_addr;
+static void tile_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ enum dma_data_direction direction)
+{
+ __dma_prep_pa_range(dma_handle, size, direction);
}
-EXPORT_SYMBOL(dma_map_single);

-void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
- enum dma_data_direction direction)
+static void tile_dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction)
{
+ struct scatterlist *sg;
+ int i;
+
BUG_ON(!valid_dma_direction(direction));
- __dma_complete_pa_range(dma_addr, size, direction);
+ WARN_ON(nelems == 0 || sglist->length == 0);
+
+ for_each_sg(sglist, sg, nelems, i) {
+ dma_sync_single_for_cpu(dev, sg->dma_address,
+ sg_dma_len(sg), direction);
+ }
}
-EXPORT_SYMBOL(dma_unmap_single);

-int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
- enum dma_data_direction direction)
+static void tile_dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ int i;
+
+ BUG_ON(!valid_dma_direction(direction));
+ WARN_ON(nelems == 0 || sglist->length == 0);
+
+ for_each_sg(sglist, sg, nelems, i) {
+ dma_sync_single_for_device(dev, sg->dma_address,
+ sg_dma_len(sg), direction);
+ }
+}
+
+static inline int
+tile_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ return 0;
+}
+
+static inline int
+tile_dma_supported(struct device *dev, u64 mask)
+{
+ return 1;
+}
+
+static struct dma_map_ops tile_default_dma_map_ops = {
+ .alloc = tile_dma_alloc_coherent,
+ .free = tile_dma_free_coherent,
+ .map_page = tile_dma_map_page,
+ .unmap_page = tile_dma_unmap_page,
+ .map_sg = tile_dma_map_sg,
+ .unmap_sg = tile_dma_unmap_sg,
+ .sync_single_for_cpu = tile_dma_sync_single_for_cpu,
+ .sync_single_for_device = tile_dma_sync_single_for_device,
+ .sync_sg_for_cpu = tile_dma_sync_sg_for_cpu,
+ .sync_sg_for_device = tile_dma_sync_sg_for_device,
+ .mapping_error = tile_dma_mapping_error,
+ .dma_supported = tile_dma_supported
+};
+
+struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops;
+EXPORT_SYMBOL(tile_dma_map_ops);
+
+/* Generic PCI DMA mapping functions */
+
+static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ struct dma_attrs *attrs)
+{
+ int node = dev_to_node(dev);
+ int order = get_order(size);
+ struct page *pg;
+ dma_addr_t addr;
+
+ gfp |= __GFP_ZERO;
+
+ pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
+ if (pg == NULL)
+ return NULL;
+
+ addr = page_to_phys(pg);
+
+ *dma_handle = phys_to_dma(dev, addr);
+
+ return page_address(pg);
+}
+
+/*
+ * Free memory that was allocated with tile_pci_dma_alloc_coherent.
+ */
+static void tile_pci_dma_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ homecache_free_pages((unsigned long)vaddr, get_order(size));
+}
+
+static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
struct scatterlist *sg;
int i;
@@ -242,14 +386,20 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
for_each_sg(sglist, sg, nents, i) {
sg->dma_address = sg_phys(sg);
__dma_prep_pa_range(sg->dma_address, sg->length, direction);
+
+ sg->dma_address = phys_to_dma(dev, sg->dma_address);
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
}

return nents;
}
-EXPORT_SYMBOL(dma_map_sg);

-void dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
- enum dma_data_direction direction)
+static void tile_pci_dma_unmap_sg(struct device *dev,
+ struct scatterlist *sglist, int nents,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
struct scatterlist *sg;
int i;
@@ -261,46 +411,60 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
direction);
}
}
-EXPORT_SYMBOL(dma_unmap_sg);

-dma_addr_t dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
+static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
BUG_ON(!valid_dma_direction(direction));

BUG_ON(offset + size > PAGE_SIZE);
__dma_prep_page(page, offset, size, direction);
- return page_to_pa(page) + offset;
+
+ return phys_to_dma(dev, page_to_pa(page) + offset);
}
-EXPORT_SYMBOL(dma_map_page);

-void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
- enum dma_data_direction direction)
+static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
+ size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
BUG_ON(!valid_dma_direction(direction));
+
+ dma_address = dma_to_phys(dev, dma_address);
+
__dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
dma_address & PAGE_OFFSET, size, direction);
}
-EXPORT_SYMBOL(dma_unmap_page);

-void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
- size_t size, enum dma_data_direction direction)
+static void tile_pci_dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size,
+ enum dma_data_direction direction)
{
BUG_ON(!valid_dma_direction(direction));
+
+ dma_handle = dma_to_phys(dev, dma_handle);
+
__dma_complete_pa_range(dma_handle, size, direction);
}
-EXPORT_SYMBOL(dma_sync_single_for_cpu);

-void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
- size_t size, enum dma_data_direction direction)
+static void tile_pci_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size,
+ enum dma_data_direction
+ direction)
{
+ dma_handle = dma_to_phys(dev, dma_handle);
+
__dma_prep_pa_range(dma_handle, size, direction);
}
-EXPORT_SYMBOL(dma_sync_single_for_device);

-void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction)
+static void tile_pci_dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sglist,
+ int nelems,
+ enum dma_data_direction direction)
{
struct scatterlist *sg;
int i;
@@ -313,10 +477,11 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
sg_dma_len(sg), direction);
}
}
-EXPORT_SYMBOL(dma_sync_sg_for_cpu);

-void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction)
+static void tile_pci_dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sglist,
+ int nelems,
+ enum dma_data_direction direction)
{
struct scatterlist *sg;
int i;
@@ -329,31 +494,93 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
sg_dma_len(sg), direction);
}
}
-EXPORT_SYMBOL(dma_sync_sg_for_device);

-void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
+static inline int
+tile_pci_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
- dma_sync_single_for_cpu(dev, dma_handle + offset, size, direction);
+ return 0;
}
-EXPORT_SYMBOL(dma_sync_single_range_for_cpu);

-void dma_sync_single_range_for_device(struct device *dev,
- dma_addr_t dma_handle,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
+static inline int
+tile_pci_dma_supported(struct device *dev, u64 mask)
{
- dma_sync_single_for_device(dev, dma_handle + offset, size, direction);
+ return 1;
}
-EXPORT_SYMBOL(dma_sync_single_range_for_device);

-/*
- * dma_alloc_noncoherent() is #defined to return coherent memory,
- * so there's no need to do any flushing here.
- */
-void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
- enum dma_data_direction direction)
+static struct dma_map_ops tile_pci_default_dma_map_ops = {
+ .alloc = tile_pci_dma_alloc_coherent,
+ .free = tile_pci_dma_free_coherent,
+ .map_page = tile_pci_dma_map_page,
+ .unmap_page = tile_pci_dma_unmap_page,
+ .map_sg = tile_pci_dma_map_sg,
+ .unmap_sg = tile_pci_dma_unmap_sg,
+ .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
+ .sync_single_for_device = tile_pci_dma_sync_single_for_device,
+ .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
+ .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
+ .mapping_error = tile_pci_dma_mapping_error,
+ .dma_supported = tile_pci_dma_supported
+};
+
+struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops;
+EXPORT_SYMBOL(gx_pci_dma_map_ops);
+
+/* PCI DMA mapping functions for legacy PCI devices */
+
+#ifdef CONFIG_SWIOTLB
+static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ struct dma_attrs *attrs)
+{
+ gfp |= GFP_DMA;
+ return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
+}
+
+static void tile_swiotlb_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ struct dma_attrs *attrs)
{
+ swiotlb_free_coherent(dev, size, vaddr, dma_addr);
}
-EXPORT_SYMBOL(dma_cache_sync);
+
+static struct dma_map_ops pci_swiotlb_dma_ops = {
+ .alloc = tile_swiotlb_alloc_coherent,
+ .free = tile_swiotlb_free_coherent,
+ .map_page = swiotlb_map_page,
+ .unmap_page = swiotlb_unmap_page,
+ .map_sg = swiotlb_map_sg_attrs,
+ .unmap_sg = swiotlb_unmap_sg_attrs,
+ .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+ .sync_single_for_device = swiotlb_sync_single_for_device,
+ .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+ .sync_sg_for_device = swiotlb_sync_sg_for_device,
+ .dma_supported = swiotlb_dma_supported,
+ .mapping_error = swiotlb_dma_mapping_error,
+};
+
+struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops;
+#else
+struct dma_map_ops *gx_legacy_pci_dma_map_ops;
+#endif
+EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops);
+
+#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ /* Handle legacy PCI devices with limited memory addressability. */
+ if (((dma_ops == gx_pci_dma_map_ops) ||
+ (dma_ops == gx_legacy_pci_dma_map_ops)) &&
+ (mask <= DMA_BIT_MASK(32))) {
+ if (mask > dev->archdata.max_direct_dma_addr)
+ mask = dev->archdata.max_direct_dma_addr;
+ }
+
+ if (!dma_supported(dev, mask))
+ return -EIO;
+ dev->coherent_dma_mask = mask;
+ return 0;
+}
+EXPORT_SYMBOL(dma_set_coherent_mask);
+#endif
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 1b996bb..27f7ab0 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -40,22 +40,8 @@
#include <arch/sim.h>

/*
- * Initialization flow and process
- * -------------------------------
- *
- * This files containes the routines to search for PCI buses,
+ * This file containes the routines to search for PCI buses,
* enumerate the buses, and configure any attached devices.
- *
- * There are two entry points here:
- * 1) tile_pci_init
- * This sets up the pci_controller structs, and opens the
- * FDs to the hypervisor. This is called from setup_arch() early
- * in the boot process.
- * 2) pcibios_init
- * This probes the PCI bus(es) for any attached hardware. It's
- * called by subsys_initcall. All of the real work is done by the
- * generic Linux PCI layer.
- *
*/

#define DEBUG_PCI_CFG 0
@@ -110,6 +96,21 @@ static struct pci_ops tile_cfg_ops;
/* Mask of CPUs that should receive PCIe interrupts. */
static struct cpumask intr_cpus_map;

+/* PCI I/O space support is not implemented. */
+static struct resource pci_ioport_resource = {
+ .name = "PCI IO",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_IO,
+};
+
+static struct resource pci_iomem_resource = {
+ .name = "PCI mem",
+ .start = TILE_PCI_MEM_START,
+ .end = TILE_PCI_MEM_END,
+ .flags = IORESOURCE_MEM,
+};
+
/*
* We don't need to worry about the alignment of resources.
*/
@@ -334,8 +335,6 @@ free_irqs:
}

/*
- * First initialization entry point, called from setup_arch().
- *
* Find valid controllers and fill in pci_controller structs for each
* of them.
*
@@ -583,10 +582,7 @@ static int __devinit setup_pcie_rc_delay(char *str)
early_param("pcie_rc_delay", setup_pcie_rc_delay);

/*
- * Second PCI initialization entry point, called by subsys_initcall.
- *
- * The controllers have been set up by the time we get here, by a call to
- * tile_pci_init.
+ * PCI initialization entry point, called by subsys_initcall.
*/
int __init pcibios_init(void)
{
@@ -594,15 +590,13 @@ int __init pcibios_init(void)
LIST_HEAD(resources);
int i;

+ tile_pci_init();
+
if (num_rc_controllers == 0 && num_ep_controllers == 0)
return 0;

- pr_info("PCI: Probing PCI hardware\n");
-
/*
* We loop over all the TRIO shims and set up the MMIO mappings.
- * This step can't be done in tile_pci_init because the MM subsystem
- * hasn't been initialized then.
*/
for (i = 0; i < TILEGX_NUM_TRIO; i++) {
gxio_trio_context_t *context = &trio_contexts[i];
@@ -645,9 +639,7 @@ int __init pcibios_init(void)
unsigned int class_code_revision;
int trio_index;
int mac;
-#ifndef USE_SHARED_PCIE_CONFIG_REGION
int ret;
-#endif

if (trio_context->fd < 0)
continue;
@@ -802,8 +794,6 @@ int __init pcibios_init(void)
pr_err("PCI: PCI CFG PIO alloc failure for mac %d "
"on TRIO %d, give up\n", mac, trio_index);

- /* TBD: cleanup ... */
-
continue;
}

@@ -819,8 +809,6 @@ int __init pcibios_init(void)
pr_err("PCI: PCI CFG PIO init failure for mac %d "
"on TRIO %d, give up\n", mac, trio_index);

- /* TBD: cleanup ... */
-
continue;
}

@@ -837,8 +825,6 @@ int __init pcibios_init(void)
pr_err("PCI: PIO map failure for mac %d on TRIO %d\n",
mac, trio_index);

- /* TBD: cleanup ... */
-
continue;
}

@@ -852,7 +838,14 @@ int __init pcibios_init(void)
continue;
}

- pci_add_resource(&resources, &iomem_resource);
+ /*
+ * The PCI memory resource is located above the PA space.
+ * The memory range for the PCI root bus should not overlap
+ * with the physical RAM
+ */
+ pci_add_resource_offset(&resources, &iomem_resource,
+ 1ULL << CHIP_PA_WIDTH());
+
bus = pci_scan_root_bus(NULL, 0, controller->ops,
controller, &resources);
controller->root_bus = bus;
@@ -923,11 +916,6 @@ int __init pcibios_init(void)
}

/*
- * We always assign 32-bit PCI bus BAR ranges.
- */
- BUG_ON(bus_address_hi != 0);
-
- /*
* Alloc a PIO region for PCI memory access for each RC port.
*/
ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
@@ -936,8 +924,6 @@ int __init pcibios_init(void)
"give up\n", controller->trio_index,
controller->mac);

- /* TBD: cleanup ... */
-
continue;
}

@@ -950,15 +936,13 @@ int __init pcibios_init(void)
ret = gxio_trio_init_pio_region_aux(trio_context,
controller->pio_mem_index,
controller->mac,
- bus_address_hi,
+ 0,
0);
if (ret < 0) {
pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, "
"give up\n", controller->trio_index,
controller->mac);

- /* TBD: cleanup ... */
-
continue;
}

@@ -980,8 +964,6 @@ int __init pcibios_init(void)
controller->trio_index,
controller->mac, j);

- /* TBD: cleanup ... */
-
goto alloc_mem_map_failed;
}

@@ -991,9 +973,13 @@ int __init pcibios_init(void)
* Initialize the Mem-Map and the I/O MMU so that all
* the physical memory can be accessed by the endpoint
* devices. The base bus address is set to the base CPA
- * of this memory controller, so is the base VA. The
+ * of this memory controller plus an offset (see pci.h).
+ * The region's base VA is set to the base CPA. The
* I/O MMU table essentially translates the CPA to
- * the real PA.
+ * the real PA. Implicitly, for node 0, we create
+ * a separate Mem-Map region that serves as the inbound
+ * window for legacy 32-bit devices. This is a direct
+ * map of the low 4GB CPA space.
*/
ret = gxio_trio_init_memory_map_mmu_aux(trio_context,
controller->mem_maps[j],
@@ -1001,7 +987,8 @@ int __init pcibios_init(void)
nr_pages << PAGE_SHIFT,
trio_context->asid,
controller->mac,
- start_pfn << PAGE_SHIFT,
+ (start_pfn << PAGE_SHIFT) +
+ TILE_PCI_MEM_MAP_BASE_OFFSET,
j,
GXIO_TRIO_ORDER_MODE_UNORDERED);
if (ret < 0) {
@@ -1010,11 +997,8 @@ int __init pcibios_init(void)
controller->trio_index,
controller->mac, j);

- /* TBD: cleanup ... */
-
goto alloc_mem_map_failed;
}
-
continue;

alloc_mem_map_failed:
@@ -1028,11 +1012,19 @@ alloc_mem_map_failed:
subsys_initcall(pcibios_init);

/*
- * No bus fixups needed.
+ * PCI scan code calls the arch specific pcibios_fixup_bus() each time it scans
+ * a new bridge. Called after each bus is probed, but before its children are
+ * examined.
*/
void __devinit pcibios_fixup_bus(struct pci_bus *bus)
{
- /* Nothing needs to be done. */
+ struct pci_dev *dev = bus->self;
+
+ if (!dev) {
+ /* This is the root bus. */
+ bus->resource[0] = &pci_ioport_resource;
+ bus->resource[1] = &pci_iomem_resource;
+ }
}

/*
@@ -1069,6 +1061,17 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
return pci_enable_resources(dev, mask);
}

+/* Called for each device after PCI setup is done. */
+static void __init
+pcibios_fixup_final(struct pci_dev *pdev)
+{
+ set_dma_ops(&pdev->dev, gx_pci_dma_map_ops);
+ set_dma_offset(&pdev->dev, TILE_PCI_MEM_MAP_BASE_OFFSET);
+ pdev->dev.archdata.max_direct_dma_addr =
+ TILE_PCI_MAX_DIRECT_DMA_ADDRESS;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final);
+
/* Map a PCI MMIO bus address into VA space. */
void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
{
@@ -1127,7 +1130,7 @@ got_it:
* We need to keep the PCI bus address's in-page offset in the VA.
*/
return iorpc_ioremap(trio_fd, offset, size) +
- (phys_addr & (PAGE_SIZE - 1));
+ (phys_addr & (PAGE_SIZE - 1));
}
EXPORT_SYMBOL(ioremap);

diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index fdde3b6..2b8b689 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -23,6 +23,7 @@
#include <linux/irq.h>
#include <linux/kexec.h>
#include <linux/pci.h>
+#include <linux/swiotlb.h>
#include <linux/initrd.h>
#include <linux/io.h>
#include <linux/highmem.h>
@@ -109,7 +110,7 @@ static unsigned int __initdata maxnodemem_pfn[MAX_NUMNODES] = {
};
static nodemask_t __initdata isolnodes;

-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
enum { DEFAULT_PCI_RESERVE_MB = 64 };
static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB;
unsigned long __initdata pci_reserve_start_pfn = -1U;
@@ -160,7 +161,7 @@ static int __init setup_isolnodes(char *str)
}
early_param("isolnodes", setup_isolnodes);

-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
static int __init setup_pci_reserve(char* str)
{
unsigned long mb;
@@ -171,7 +172,7 @@ static int __init setup_pci_reserve(char* str)

pci_reserve_mb = mb;
pr_info("Reserving %dMB for PCIE root complex mappings\n",
- pci_reserve_mb);
+ pci_reserve_mb);
return 0;
}
early_param("pci_reserve", setup_pci_reserve);
@@ -411,7 +412,7 @@ static void __init setup_memory(void)
continue;
}
#endif
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
/*
* Blocks that overlap the pci reserved region must
* have enough space to hold the maximum percpu data
@@ -604,11 +605,9 @@ static void __init setup_bootmem_allocator_node(int i)
/* Free all the space back into the allocator. */
free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start));

-#if defined(CONFIG_PCI)
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
/*
- * Throw away any memory aliased by the PCI region. FIXME: this
- * is a temporary hack to work around bug 10502, and needs to be
- * fixed properly.
+ * Throw away any memory aliased by the PCI region.
*/
if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start)
reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn),
@@ -1353,8 +1352,7 @@ void __init setup_arch(char **cmdline_p)
setup_cpu_maps();


-#ifdef CONFIG_PCI
-#if !defined (__tilegx__)
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
/*
* Initialize the PCI structures. This is done before memory
* setup so that we know whether or not a pci_reserve region
@@ -1362,7 +1360,6 @@ void __init setup_arch(char **cmdline_p)
*/
if (tile_pci_init() == 0)
pci_reserve_mb = 0;
-#endif

/* PCI systems reserve a region just below 4GB for mapping iomem. */
pci_reserve_end_pfn = (1 << (32 - PAGE_SHIFT));
@@ -1384,6 +1381,10 @@ void __init setup_arch(char **cmdline_p)
* any memory using the bootmem allocator.
*/

+#ifdef CONFIG_SWIOTLB
+ swiotlb_init(0);
+#endif
+
paging_init();
setup_numa_mapping();
zone_sizes_init();
@@ -1391,10 +1392,6 @@ void __init setup_arch(char **cmdline_p)
setup_cpu(1);
setup_clock();
load_hv_initrd();
-
-#if defined(CONFIG_PCI) && defined (__tilegx__)
- tile_pci_init();
-#endif
}


@@ -1538,11 +1535,11 @@ static struct resource code_resource = {
};

/*
- * We reserve all resources above 4GB so that PCI won't try to put
+ * On Pro, we reserve all resources above 4GB so that PCI won't try to put
* mappings above 4GB; the standard allows that for some devices but
* the probing code trunates values to 32 bits.
*/
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
static struct resource* __init
insert_non_bus_resource(void)
{
@@ -1588,7 +1585,7 @@ static int __init request_standard_resources(void)
enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };

iomem_resource.end = -1LL;
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
insert_non_bus_resource();
#endif

@@ -1596,7 +1593,7 @@ static int __init request_standard_resources(void)
u64 start_pfn = node_start_pfn[i];
u64 end_pfn = node_end_pfn[i];

-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
if (start_pfn <= pci_reserve_start_pfn &&
end_pfn > pci_reserve_start_pfn) {
if (end_pfn > pci_reserve_end_pfn)
--
1.7.10.3

2012-06-22 11:24:28

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH 3/3] tile pci: enable IOMMU to support DMA for legacy devices

On Fri, Jun 15, 2012 at 1:23 PM, Chris Metcalf <[email protected]> wrote:
> This change uses the TRIO IOMMU to map the PCI DMA space and physical
> memory at different addresses. ?We also now use the dma_mapping_ops
> to provide support for non-PCI DMA, PCIe DMA (64-bit) and legacy PCI
> DMA (32-bit). ?We use the kernel's software I/O TLB framework
> (i.e. bounce buffers) for the legacy 32-bit PCI device support since
> there are a limited number of TLB entries in the IOMMU and it is
> non-trivial to handle indexing, searching, matching, etc. ?For 32-bit
> devices the performance impact of bounce buffers should not be a concern.
>
> Signed-off-by: Chris Metcalf <[email protected]>
> ---
> ?arch/tile/Kconfig ? ? ? ? ? ? ? ? ? | ? 18 ++
> ?arch/tile/include/asm/Kbuild ? ? ? ?| ? ?1 -
> ?arch/tile/include/asm/device.h ? ? ?| ? 33 ++++
> ?arch/tile/include/asm/dma-mapping.h | ?146 +++++++++-----
> ?arch/tile/include/asm/pci.h ? ? ? ? | ? 76 +++++++-
> ?arch/tile/kernel/pci-dma.c ? ? ? ? ?| ?369 ++++++++++++++++++++++++++++-------
> ?arch/tile/kernel/pci_gx.c ? ? ? ? ? | ?113 +++++------
> ?arch/tile/kernel/setup.c ? ? ? ? ? ?| ? 35 ++--
> ?8 files changed, 588 insertions(+), 203 deletions(-)
> ?create mode 100644 arch/tile/include/asm/device.h
>
> diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
> index 0ad771f..557e3a3 100644
> --- a/arch/tile/Kconfig
> +++ b/arch/tile/Kconfig
> @@ -3,6 +3,8 @@
>
> ?config TILE
> ? ? ? ?def_bool y
> + ? ? ? select HAVE_DMA_ATTRS
> + ? ? ? select HAVE_DMA_API_DEBUG
> ? ? ? ?select HAVE_KVM if !TILEGX
> ? ? ? ?select GENERIC_FIND_FIRST_BIT
> ? ? ? ?select USE_GENERIC_SMP_HELPERS
> @@ -79,6 +81,9 @@ config ARCH_DMA_ADDR_T_64BIT
> ?config NEED_DMA_MAP_STATE
> ? ? ? ?def_bool y
>
> +config ARCH_HAS_DMA_SET_COHERENT_MASK
> + ? ? ? bool
> +
> ?config LOCKDEP_SUPPORT
> ? ? ? ?def_bool y
>
> @@ -215,6 +220,19 @@ config HIGHMEM
> ?config ZONE_DMA
> ? ? ? ?def_bool y
>
> +config IOMMU_HELPER
> + ? ? ? bool
> +
> +config NEED_SG_DMA_LENGTH
> + ? ? ? bool
> +
> +config SWIOTLB
> + ? ? ? bool
> + ? ? ? default TILEGX
> + ? ? ? select IOMMU_HELPER
> + ? ? ? select NEED_SG_DMA_LENGTH
> + ? ? ? select ARCH_HAS_DMA_SET_COHERENT_MASK
> +
> ?# We do not currently support disabling NUMA.
> ?config NUMA
> ? ? ? ?bool # "NUMA Memory Allocation and Scheduler Support"
> diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
> index 143473e..fb7c65a 100644
> --- a/arch/tile/include/asm/Kbuild
> +++ b/arch/tile/include/asm/Kbuild
> @@ -9,7 +9,6 @@ header-y += hardwall.h
> ?generic-y += bug.h
> ?generic-y += bugs.h
> ?generic-y += cputime.h
> -generic-y += device.h
> ?generic-y += div64.h
> ?generic-y += emergency-restart.h
> ?generic-y += errno.h
> diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
> new file mode 100644
> index 0000000..5182705
> --- /dev/null
> +++ b/arch/tile/include/asm/device.h
> @@ -0,0 +1,33 @@
> +/*
> + * Copyright 2010 Tilera Corporation. All Rights Reserved.
> + *
> + * ? This program is free software; you can redistribute it and/or
> + * ? modify it under the terms of the GNU General Public License
> + * ? as published by the Free Software Foundation, version 2.
> + *
> + * ? This program is distributed in the hope that it will be useful, but
> + * ? WITHOUT ANY WARRANTY; without even the implied warranty of
> + * ? MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
> + * ? NON INFRINGEMENT. ?See the GNU General Public License for
> + * ? more details.
> + * Arch specific extensions to struct device
> + */
> +
> +#ifndef _ASM_TILE_DEVICE_H
> +#define _ASM_TILE_DEVICE_H
> +
> +struct dev_archdata {
> + ? ? ? /* DMA operations on that device */
> + ? ? ? ?struct dma_map_ops ? ? *dma_ops;
> +
> + ? ? ? /* Offset of the DMA address from the PA. */
> + ? ? ? dma_addr_t ? ? ? ? ? ? ?dma_offset;
> +
> + ? ? ? /* Highest DMA address that can be generated by this device. */
> + ? ? ? dma_addr_t ? ? ? ? ? ? ?max_direct_dma_addr;
> +};
> +
> +struct pdev_archdata {
> +};
> +
> +#endif /* _ASM_TILE_DEVICE_H */
> diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
> index eaa06d1..4b6247d 100644
> --- a/arch/tile/include/asm/dma-mapping.h
> +++ b/arch/tile/include/asm/dma-mapping.h
> @@ -20,69 +20,80 @@
> ?#include <linux/cache.h>
> ?#include <linux/io.h>
>
> -/*
> - * Note that on x86 and powerpc, there is a "struct dma_mapping_ops"
> - * that is used for all the DMA operations. ?For now, we don't have an
> - * equivalent on tile, because we only have a single way of doing DMA.
> - * (Tilera bug 7994 to use dma_mapping_ops.)
> - */
> +extern struct dma_map_ops *tile_dma_map_ops;
> +extern struct dma_map_ops *gx_pci_dma_map_ops;
> +extern struct dma_map_ops *gx_legacy_pci_dma_map_ops;
> +
> +static inline struct dma_map_ops *get_dma_ops(struct device *dev)
> +{
> + ? ? ? if (dev && dev->archdata.dma_ops)
> + ? ? ? ? ? ? ? return dev->archdata.dma_ops;
> + ? ? ? else
> + ? ? ? ? ? ? ? return tile_dma_map_ops;
> +}
> +
> +static inline dma_addr_t get_dma_offset(struct device *dev)
> +{
> + ? ? ? return dev->archdata.dma_offset;
> +}
> +
> +static inline void set_dma_offset(struct device *dev, dma_addr_t off)
> +{
> + ? ? ? dev->archdata.dma_offset = off;
> +}
>
> -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
> -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
> -
> -extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
> - ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction);
> -extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ?size_t size, enum dma_data_direction);
> -extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
> - ? ? ? ? ? ? ?enum dma_data_direction);
> -extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
> - ? ? ? ? ? ? ? ? ? ? ? ?int nhwentries, enum dma_data_direction);
> -extern dma_addr_t dma_map_page(struct device *dev, struct page *page,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unsigned long offset, size_t size,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?enum dma_data_direction);
> -extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
> - ? ? ? ? ? ? ? ? ? ? ? ? ?size_t size, enum dma_data_direction);
> -extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int nelems, enum dma_data_direction);
> -extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int nelems, enum dma_data_direction);
> -
> -
> -void *dma_alloc_coherent(struct device *dev, size_t size,
> - ? ? ? ? ? ? ? ? ? ? ? ? ?dma_addr_t *dma_handle, gfp_t flag);
> -
> -void dma_free_coherent(struct device *dev, size_t size,
> - ? ? ? ? ? ? ? ? ? ? ? ?void *vaddr, dma_addr_t dma_handle);
> -
> -extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction);
> -extern void dma_sync_single_for_device(struct device *, dma_addr_t,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?size_t, enum dma_data_direction);
> -extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long offset, size_t,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction);
> -extern void dma_sync_single_range_for_device(struct device *, dma_addr_t,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unsigned long offset, size_t,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?enum dma_data_direction);
> -extern void dma_cache_sync(struct device *dev, void *vaddr, size_t,
> - ? ? ? ? ? ? ? ? ? ? ? ? ?enum dma_data_direction);
> +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
> +{
> + ? ? ? return paddr + get_dma_offset(dev);
> +}
> +
> +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
> +{
> + ? ? ? return daddr - get_dma_offset(dev);
> +}
> +
> +static inline void dma_mark_clean(void *addr, size_t size) {}
> +
> +#include <asm-generic/dma-mapping-common.h>
> +
> +static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
> +{
> + ? ? ? dev->archdata.dma_ops = ops;
> +}
> +
> +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
> +{
> + ? ? ? if (!dev->dma_mask)
> + ? ? ? ? ? ? ? return 0;
> +
> + ? ? ? return addr + size - 1 <= *dev->dma_mask;
> +}
>
> ?static inline int
> ?dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
> ?{
> - ? ? ? return 0;
> + ? ? ? return get_dma_ops(dev)->mapping_error(dev, dma_addr);
> ?}
>
> ?static inline int
> ?dma_supported(struct device *dev, u64 mask)
> ?{
> - ? ? ? return 1;
> + ? ? ? return get_dma_ops(dev)->dma_supported(dev, mask);
> ?}
>
> ?static inline int
> ?dma_set_mask(struct device *dev, u64 mask)
> ?{
> + ? ? ? struct dma_map_ops *dma_ops = get_dma_ops(dev);
> +
> + ? ? ? /* Handle legacy PCI devices with limited memory addressability. */
> + ? ? ? if ((dma_ops == gx_pci_dma_map_ops) && (mask <= DMA_BIT_MASK(32))) {
> + ? ? ? ? ? ? ? set_dma_ops(dev, gx_legacy_pci_dma_map_ops);
> + ? ? ? ? ? ? ? set_dma_offset(dev, 0);
> + ? ? ? ? ? ? ? if (mask > dev->archdata.max_direct_dma_addr)
> + ? ? ? ? ? ? ? ? ? ? ? mask = dev->archdata.max_direct_dma_addr;
> + ? ? ? }
> +
> ? ? ? ?if (!dev->dma_mask || !dma_supported(dev, mask))
> ? ? ? ? ? ? ? ?return -EIO;
>
> @@ -91,4 +102,43 @@ dma_set_mask(struct device *dev, u64 mask)
> ? ? ? ?return 0;
> ?}
>
> +static inline void *dma_alloc_attrs(struct device *dev, size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? dma_addr_t *dma_handle, gfp_t flag,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct dma_attrs *attrs)
> +{
> + ? ? ? struct dma_map_ops *dma_ops = get_dma_ops(dev);
> + ? ? ? void *cpu_addr;
> +
> + ? ? ? cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs);
> +
> + ? ? ? debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
> +
> + ? ? ? return cpu_addr;
> +}
> +
> +static inline void dma_free_attrs(struct device *dev, size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? void *cpu_addr, dma_addr_t dma_handle,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct dma_attrs *attrs)
> +{
> + ? ? ? struct dma_map_ops *dma_ops = get_dma_ops(dev);
> +
> + ? ? ? debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
> +
> + ? ? ? dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
> +}
> +
> +#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
> +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
> +#define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
> +#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
> +
> +/*
> + * dma_alloc_noncoherent() is #defined to return coherent memory,
> + * so there's no need to do any flushing here.
> + */
> +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction direction)
> +{
> +}
> +
> ?#endif /* _ASM_TILE_DMA_MAPPING_H */
> diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
> index 2c224c4..553b7ff 100644
> --- a/arch/tile/include/asm/pci.h
> +++ b/arch/tile/include/asm/pci.h
> @@ -15,6 +15,7 @@
> ?#ifndef _ASM_TILE_PCI_H
> ?#define _ASM_TILE_PCI_H
>
> +#include <linux/dma-mapping.h>
> ?#include <linux/pci.h>
> ?#include <linux/numa.h>
> ?#include <asm-generic/pci_iomap.h>
> @@ -53,6 +54,16 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
>
> ?#define ? ? ? ?TILE_NUM_PCIE ? 2
>
> +/*
> + * The hypervisor maps the entirety of CPA-space as bus addresses, so
> + * bus addresses are physical addresses. ?The networking and block
> + * device layers use this boolean for bounce buffer decisions.
> + */
> +#define PCI_DMA_BUS_IS_PHYS ? ? 1
> +
> +/* generic pci stuff */
> +#include <asm-generic/pci.h>
> +
> ?#else
>
> ?#include <asm/page.h>
> @@ -85,7 +96,47 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
> ?/*
> ?* Each Mem-Map interrupt region occupies 4KB.
> ?*/
> -#define ? ? ? ?MEM_MAP_INTR_REGION_SIZE ? ? ? ?(1<< TRIO_MAP_MEM_LIM__ADDR_SHIFT)
> +#define ? ? ? ?MEM_MAP_INTR_REGION_SIZE ? ? ? ?(1 << TRIO_MAP_MEM_LIM__ADDR_SHIFT)
> +
> +/*
> + * Allocate the PCI BAR window right below 4GB.
> + */
> +#define ? ? ? ?TILE_PCI_BAR_WINDOW_TOP ? ? ? ? (1ULL << 32)
> +
> +/*
> + * Allocate 1GB for the PCI BAR window.
> + */
> +#define ? ? ? ?TILE_PCI_BAR_WINDOW_SIZE ? ? ? ?(1 << 30)
> +
> +/*
> + * This is the highest bus address targeting the host memory that
> + * can be generated by legacy PCI devices with 32-bit or less
> + * DMA capability, dictated by the BAR window size and location.
> + */
> +#define ? ? ? ?TILE_PCI_MAX_DIRECT_DMA_ADDRESS \
> + ? ? ? (TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE - 1)
> +
> +/*
> + * We shift the PCI bus range for all the physical memory up by the whole PA
> + * range. The corresponding CPA of an incoming PCI request will be the PCI
> + * address minus TILE_PCI_MEM_MAP_BASE_OFFSET. This also implies
> + * that the 64-bit capable devices will be given DMA addresses as
> + * the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit
> + * devices, we create a separate map region that handles the low
> + * 4GB.
> + */
> +#define ? ? ? ?TILE_PCI_MEM_MAP_BASE_OFFSET ? ?(1ULL << CHIP_PA_WIDTH())
> +
> +/*
> + * End of the PCI memory resource.
> + */
> +#define ? ? ? ?TILE_PCI_MEM_END ? ? ? ?\
> + ? ? ? ? ? ? ? ((1ULL << CHIP_PA_WIDTH()) + TILE_PCI_BAR_WINDOW_TOP)
> +
> +/*
> + * Start of the PCI memory resource.
> + */
> +#define ? ? ? ?TILE_PCI_MEM_START ? ? ?(TILE_PCI_MEM_END - TILE_PCI_BAR_WINDOW_SIZE)
>
> ?/*
> ?* Structure of a PCI controller (host bridge) on Gx.
> @@ -108,6 +159,8 @@ struct pci_controller {
> ? ? ? ?int index; ? ? ? ? ? ? ?/* PCI domain number */
> ? ? ? ?struct pci_bus *root_bus;
>
> + ? ? ? uint64_t mem_offset; ? ?/* cpu->bus memory mapping offset. */
> +
> ? ? ? ?int last_busno;
>
> ? ? ? ?struct pci_ops *ops;
> @@ -126,14 +179,22 @@ extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];
>
> ?extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
>
> -#endif /* __tilegx__ */
> +extern void
> +pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
> + ? ? ? ? ? ? ? ? ? ? ? struct resource *res);
> +
> +extern void
> +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
> + ? ? ? ? ? ? ? ? ? ? ? struct pci_bus_region *region);

These extern declarations look like leftovers that shouldn't be needed.

> ?/*
> - * The hypervisor maps the entirety of CPA-space as bus addresses, so
> - * bus addresses are physical addresses. ?The networking and block
> - * device layers use this boolean for bounce buffer decisions.
> + * The PCI address space does not equal the physical memory address
> + * space (we have an IOMMU). The IDE and SCSI device layers use this
> + * boolean for bounce buffer decisions.
> ?*/
> -#define PCI_DMA_BUS_IS_PHYS ? ? 1
> +#define PCI_DMA_BUS_IS_PHYS ? ? 0
> +
> +#endif /* __tilegx__ */
>
> ?int __init tile_pci_init(void);
> ?int __init pcibios_init(void);
> @@ -169,7 +230,4 @@ static inline int pcibios_assign_all_busses(void)
> ?/* implement the pci_ DMA API in terms of the generic device dma_ one */
> ?#include <asm-generic/pci-dma-compat.h>
>
> -/* generic pci stuff */
> -#include <asm-generic/pci.h>
> -
> ?#endif /* _ASM_TILE_PCI_H */
> diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
> index edd856a..b9fe80e 100644
> --- a/arch/tile/kernel/pci-dma.c
> +++ b/arch/tile/kernel/pci-dma.c
> @@ -14,6 +14,7 @@
>
> ?#include <linux/mm.h>
> ?#include <linux/dma-mapping.h>
> +#include <linux/swiotlb.h>
> ?#include <linux/vmalloc.h>
> ?#include <linux/export.h>
> ?#include <asm/tlbflush.h>
> @@ -31,10 +32,9 @@
> ?#define PAGE_HOME_DMA PAGE_HOME_HASH
> ?#endif
>
> -void *dma_alloc_coherent(struct device *dev,
> - ? ? ? ? ? ? ? ? ? ? ? ?size_t size,
> - ? ? ? ? ? ? ? ? ? ? ? ?dma_addr_t *dma_handle,
> - ? ? ? ? ? ? ? ? ? ? ? ?gfp_t gfp)
> +static void *tile_dma_alloc_coherent(struct device *dev, size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?dma_addr_t *dma_handle, gfp_t gfp,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct dma_attrs *attrs)
> ?{
> ? ? ? ?u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32);
> ? ? ? ?int node = dev_to_node(dev);
> @@ -68,19 +68,19 @@ void *dma_alloc_coherent(struct device *dev,
> ? ? ? ?}
>
> ? ? ? ?*dma_handle = addr;
> +
> ? ? ? ?return page_address(pg);
> ?}
> -EXPORT_SYMBOL(dma_alloc_coherent);
>
> ?/*
> - * Free memory that was allocated with dma_alloc_coherent.
> + * Free memory that was allocated with tile_dma_alloc_coherent.
> ?*/
> -void dma_free_coherent(struct device *dev, size_t size,
> - ? ? ? ? ? ? ? ? void *vaddr, dma_addr_t dma_handle)
> +static void tile_dma_free_coherent(struct device *dev, size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?void *vaddr, dma_addr_t dma_handle,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct dma_attrs *attrs)
> ?{
> ? ? ? ?homecache_free_pages((unsigned long)vaddr, get_order(size));
> ?}
> -EXPORT_SYMBOL(dma_free_coherent);
>
> ?/*
> ?* The map routines "map" the specified address range for DMA
> @@ -199,38 +199,182 @@ static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size,
> ? ? ? ?}
> ?}
>
> +static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist,
> + ? ? ? ? ? ? ? ? ? ? ? ? ?int nents, enum dma_data_direction direction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ?struct dma_attrs *attrs)
> +{
> + ? ? ? struct scatterlist *sg;
> + ? ? ? int i;
>
> -/*
> - * dma_map_single can be passed any memory address, and there appear
> - * to be no alignment constraints.
> - *
> - * There is a chance that the start of the buffer will share a cache
> - * line with some other data that has been touched in the meantime.
> - */
> -dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
> - ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction direction)
> + ? ? ? BUG_ON(!valid_dma_direction(direction));
> +
> + ? ? ? WARN_ON(nents == 0 || sglist->length == 0);
> +
> + ? ? ? for_each_sg(sglist, sg, nents, i) {
> + ? ? ? ? ? ? ? sg->dma_address = sg_phys(sg);
> + ? ? ? ? ? ? ? __dma_prep_pa_range(sg->dma_address, sg->length, direction);
> +#ifdef CONFIG_NEED_SG_DMA_LENGTH
> + ? ? ? ? ? ? ? sg->dma_length = sg->length;
> +#endif
> + ? ? ? }
> +
> + ? ? ? return nents;
> +}
> +
> +static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? int nents, enum dma_data_direction direction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct dma_attrs *attrs)
> +{
> + ? ? ? struct scatterlist *sg;
> + ? ? ? int i;
> +
> + ? ? ? BUG_ON(!valid_dma_direction(direction));
> + ? ? ? for_each_sg(sglist, sg, nents, i) {
> + ? ? ? ? ? ? ? sg->dma_address = sg_phys(sg);
> + ? ? ? ? ? ? ? __dma_complete_pa_range(sg->dma_address, sg->length,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? direction);
> + ? ? ? }
> +}
> +
> +static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long offset, size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction direction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct dma_attrs *attrs)
> ?{
> - ? ? ? dma_addr_t dma_addr = __pa(ptr);
> + ? ? ? BUG_ON(!valid_dma_direction(direction));
> +
> + ? ? ? BUG_ON(offset + size > PAGE_SIZE);
> + ? ? ? __dma_prep_page(page, offset, size, direction);
> +
> + ? ? ? return page_to_pa(page) + offset;
> +}
> +
> +static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? size_t size, enum dma_data_direction direction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct dma_attrs *attrs)
> +{
> + ? ? ? BUG_ON(!valid_dma_direction(direction));
> +
> + ? ? ? __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? dma_address & PAGE_OFFSET, size, direction);
> +}
>
> +static void tile_dma_sync_single_for_cpu(struct device *dev,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?dma_addr_t dma_handle,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?enum dma_data_direction direction)
> +{
> ? ? ? ?BUG_ON(!valid_dma_direction(direction));
> - ? ? ? WARN_ON(size == 0);
>
> - ? ? ? __dma_prep_pa_range(dma_addr, size, direction);
> + ? ? ? __dma_complete_pa_range(dma_handle, size, direction);
> +}
>
> - ? ? ? return dma_addr;
> +static void tile_dma_sync_single_for_device(struct device *dev,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? dma_addr_t dma_handle, size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction direction)
> +{
> + ? ? ? __dma_prep_pa_range(dma_handle, size, direction);
> ?}
> -EXPORT_SYMBOL(dma_map_single);
>
> -void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
> - ? ? ? ? ? ? ? ? ? ? enum dma_data_direction direction)
> +static void tile_dma_sync_sg_for_cpu(struct device *dev,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct scatterlist *sglist, int nelems,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?enum dma_data_direction direction)
> ?{
> + ? ? ? struct scatterlist *sg;
> + ? ? ? int i;
> +
> ? ? ? ?BUG_ON(!valid_dma_direction(direction));
> - ? ? ? __dma_complete_pa_range(dma_addr, size, direction);
> + ? ? ? WARN_ON(nelems == 0 || sglist->length == 0);
> +
> + ? ? ? for_each_sg(sglist, sg, nelems, i) {
> + ? ? ? ? ? ? ? dma_sync_single_for_cpu(dev, sg->dma_address,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sg_dma_len(sg), direction);
> + ? ? ? }
> ?}
> -EXPORT_SYMBOL(dma_unmap_single);
>
> -int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
> - ? ? ? ? ? ? ?enum dma_data_direction direction)
> +static void tile_dma_sync_sg_for_device(struct device *dev,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct scatterlist *sglist, int nelems,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction direction)
> +{
> + ? ? ? struct scatterlist *sg;
> + ? ? ? int i;
> +
> + ? ? ? BUG_ON(!valid_dma_direction(direction));
> + ? ? ? WARN_ON(nelems == 0 || sglist->length == 0);
> +
> + ? ? ? for_each_sg(sglist, sg, nelems, i) {
> + ? ? ? ? ? ? ? dma_sync_single_for_device(dev, sg->dma_address,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?sg_dma_len(sg), direction);
> + ? ? ? }
> +}
> +
> +static inline int
> +tile_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
> +{
> + ? ? ? return 0;
> +}
> +
> +static inline int
> +tile_dma_supported(struct device *dev, u64 mask)
> +{
> + ? ? ? return 1;
> +}
> +
> +static struct dma_map_ops tile_default_dma_map_ops = {
> + ? ? ? .alloc = tile_dma_alloc_coherent,
> + ? ? ? .free = tile_dma_free_coherent,
> + ? ? ? .map_page = tile_dma_map_page,
> + ? ? ? .unmap_page = tile_dma_unmap_page,
> + ? ? ? .map_sg = tile_dma_map_sg,
> + ? ? ? .unmap_sg = tile_dma_unmap_sg,
> + ? ? ? .sync_single_for_cpu = tile_dma_sync_single_for_cpu,
> + ? ? ? .sync_single_for_device = tile_dma_sync_single_for_device,
> + ? ? ? .sync_sg_for_cpu = tile_dma_sync_sg_for_cpu,
> + ? ? ? .sync_sg_for_device = tile_dma_sync_sg_for_device,
> + ? ? ? .mapping_error = tile_dma_mapping_error,
> + ? ? ? .dma_supported = tile_dma_supported
> +};
> +
> +struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops;
> +EXPORT_SYMBOL(tile_dma_map_ops);
> +
> +/* Generic PCI DMA mapping functions */
> +
> +static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?dma_addr_t *dma_handle, gfp_t gfp,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct dma_attrs *attrs)
> +{
> + ? ? ? int node = dev_to_node(dev);
> + ? ? ? int order = get_order(size);
> + ? ? ? struct page *pg;
> + ? ? ? dma_addr_t addr;
> +
> + ? ? ? gfp |= __GFP_ZERO;
> +
> + ? ? ? pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
> + ? ? ? if (pg == NULL)
> + ? ? ? ? ? ? ? return NULL;
> +
> + ? ? ? addr = page_to_phys(pg);
> +
> + ? ? ? *dma_handle = phys_to_dma(dev, addr);
> +
> + ? ? ? return page_address(pg);
> +}
> +
> +/*
> + * Free memory that was allocated with tile_pci_dma_alloc_coherent.
> + */
> +static void tile_pci_dma_free_coherent(struct device *dev, size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?void *vaddr, dma_addr_t dma_handle,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct dma_attrs *attrs)
> +{
> + ? ? ? homecache_free_pages((unsigned long)vaddr, get_order(size));
> +}
> +
> +static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int nents, enum dma_data_direction direction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct dma_attrs *attrs)
> ?{
> ? ? ? ?struct scatterlist *sg;
> ? ? ? ?int i;
> @@ -242,14 +386,20 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
> ? ? ? ?for_each_sg(sglist, sg, nents, i) {
> ? ? ? ? ? ? ? ?sg->dma_address = sg_phys(sg);
> ? ? ? ? ? ? ? ?__dma_prep_pa_range(sg->dma_address, sg->length, direction);
> +
> + ? ? ? ? ? ? ? sg->dma_address = phys_to_dma(dev, sg->dma_address);
> +#ifdef CONFIG_NEED_SG_DMA_LENGTH
> + ? ? ? ? ? ? ? sg->dma_length = sg->length;
> +#endif
> ? ? ? ?}
>
> ? ? ? ?return nents;
> ?}
> -EXPORT_SYMBOL(dma_map_sg);
>
> -void dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
> - ? ? ? ? ? ? ? ? enum dma_data_direction direction)
> +static void tile_pci_dma_unmap_sg(struct device *dev,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct scatterlist *sglist, int nents,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction direction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct dma_attrs *attrs)
> ?{
> ? ? ? ?struct scatterlist *sg;
> ? ? ? ?int i;
> @@ -261,46 +411,60 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?direction);
> ? ? ? ?}
> ?}
> -EXPORT_SYMBOL(dma_unmap_sg);
>
> -dma_addr_t dma_map_page(struct device *dev, struct page *page,
> - ? ? ? ? ? ? ? ? ? ? ? unsigned long offset, size_t size,
> - ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction direction)
> +static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long offset, size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction direction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct dma_attrs *attrs)
> ?{
> ? ? ? ?BUG_ON(!valid_dma_direction(direction));
>
> ? ? ? ?BUG_ON(offset + size > PAGE_SIZE);
> ? ? ? ?__dma_prep_page(page, offset, size, direction);
> - ? ? ? return page_to_pa(page) + offset;
> +
> + ? ? ? return phys_to_dma(dev, page_to_pa(page) + offset);
> ?}
> -EXPORT_SYMBOL(dma_map_page);
>
> -void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
> - ? ? ? ? ? ? ? ? ? enum dma_data_direction direction)
> +static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction direction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct dma_attrs *attrs)
> ?{
> ? ? ? ?BUG_ON(!valid_dma_direction(direction));
> +
> + ? ? ? dma_address = dma_to_phys(dev, dma_address);
> +
> ? ? ? ?__dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
> ? ? ? ? ? ? ? ? ? ? ? ? ? ?dma_address & PAGE_OFFSET, size, direction);
> ?}
> -EXPORT_SYMBOL(dma_unmap_page);
>
> -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ?size_t size, enum dma_data_direction direction)
> +static void tile_pci_dma_sync_single_for_cpu(struct device *dev,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?dma_addr_t dma_handle,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?enum dma_data_direction direction)
> ?{
> ? ? ? ?BUG_ON(!valid_dma_direction(direction));
> +
> + ? ? ? dma_handle = dma_to_phys(dev, dma_handle);
> +
> ? ? ? ?__dma_complete_pa_range(dma_handle, size, direction);
> ?}
> -EXPORT_SYMBOL(dma_sync_single_for_cpu);
>
> -void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? size_t size, enum dma_data_direction direction)
> +static void tile_pci_dma_sync_single_for_device(struct device *dev,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? dma_addr_t dma_handle,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? direction)
> ?{
> + ? ? ? dma_handle = dma_to_phys(dev, dma_handle);
> +
> ? ? ? ?__dma_prep_pa_range(dma_handle, size, direction);
> ?}
> -EXPORT_SYMBOL(dma_sync_single_for_device);
>
> -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
> - ? ? ? ? ? ? ? ? ? ? ? ?int nelems, enum dma_data_direction direction)
> +static void tile_pci_dma_sync_sg_for_cpu(struct device *dev,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct scatterlist *sglist,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int nelems,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?enum dma_data_direction direction)
> ?{
> ? ? ? ?struct scatterlist *sg;
> ? ? ? ?int i;
> @@ -313,10 +477,11 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?sg_dma_len(sg), direction);
> ? ? ? ?}
> ?}
> -EXPORT_SYMBOL(dma_sync_sg_for_cpu);
>
> -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? int nelems, enum dma_data_direction direction)
> +static void tile_pci_dma_sync_sg_for_device(struct device *dev,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct scatterlist *sglist,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int nelems,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction direction)
> ?{
> ? ? ? ?struct scatterlist *sg;
> ? ? ? ?int i;
> @@ -329,31 +494,93 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sg_dma_len(sg), direction);
> ? ? ? ?}
> ?}
> -EXPORT_SYMBOL(dma_sync_sg_for_device);
>
> -void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unsigned long offset, size_t size,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?enum dma_data_direction direction)
> +static inline int
> +tile_pci_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
> ?{
> - ? ? ? dma_sync_single_for_cpu(dev, dma_handle + offset, size, direction);
> + ? ? ? return 0;
> ?}
> -EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
>
> -void dma_sync_single_range_for_device(struct device *dev,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? dma_addr_t dma_handle,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? unsigned long offset, size_t size,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum dma_data_direction direction)
> +static inline int
> +tile_pci_dma_supported(struct device *dev, u64 mask)
> ?{
> - ? ? ? dma_sync_single_for_device(dev, dma_handle + offset, size, direction);
> + ? ? ? return 1;
> ?}
> -EXPORT_SYMBOL(dma_sync_single_range_for_device);
>
> -/*
> - * dma_alloc_noncoherent() is #defined to return coherent memory,
> - * so there's no need to do any flushing here.
> - */
> -void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
> - ? ? ? ? ? ? ? ? ? enum dma_data_direction direction)
> +static struct dma_map_ops tile_pci_default_dma_map_ops = {
> + ? ? ? .alloc = tile_pci_dma_alloc_coherent,
> + ? ? ? .free = tile_pci_dma_free_coherent,
> + ? ? ? .map_page = tile_pci_dma_map_page,
> + ? ? ? .unmap_page = tile_pci_dma_unmap_page,
> + ? ? ? .map_sg = tile_pci_dma_map_sg,
> + ? ? ? .unmap_sg = tile_pci_dma_unmap_sg,
> + ? ? ? .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
> + ? ? ? .sync_single_for_device = tile_pci_dma_sync_single_for_device,
> + ? ? ? .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
> + ? ? ? .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
> + ? ? ? .mapping_error = tile_pci_dma_mapping_error,
> + ? ? ? .dma_supported = tile_pci_dma_supported
> +};
> +
> +struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops;
> +EXPORT_SYMBOL(gx_pci_dma_map_ops);
> +
> +/* PCI DMA mapping functions for legacy PCI devices */
> +
> +#ifdef CONFIG_SWIOTLB
> +static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?dma_addr_t *dma_handle, gfp_t gfp,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct dma_attrs *attrs)
> +{
> + ? ? ? gfp |= GFP_DMA;
> + ? ? ? return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
> +}
> +
> +static void tile_swiotlb_free_coherent(struct device *dev, size_t size,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?void *vaddr, dma_addr_t dma_addr,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct dma_attrs *attrs)
> ?{
> + ? ? ? swiotlb_free_coherent(dev, size, vaddr, dma_addr);
> ?}
> -EXPORT_SYMBOL(dma_cache_sync);
> +
> +static struct dma_map_ops pci_swiotlb_dma_ops = {
> + ? ? ? .alloc = tile_swiotlb_alloc_coherent,
> + ? ? ? .free = tile_swiotlb_free_coherent,
> + ? ? ? .map_page = swiotlb_map_page,
> + ? ? ? .unmap_page = swiotlb_unmap_page,
> + ? ? ? .map_sg = swiotlb_map_sg_attrs,
> + ? ? ? .unmap_sg = swiotlb_unmap_sg_attrs,
> + ? ? ? .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
> + ? ? ? .sync_single_for_device = swiotlb_sync_single_for_device,
> + ? ? ? .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
> + ? ? ? .sync_sg_for_device = swiotlb_sync_sg_for_device,
> + ? ? ? .dma_supported = swiotlb_dma_supported,
> + ? ? ? .mapping_error = swiotlb_dma_mapping_error,
> +};
> +
> +struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops;
> +#else
> +struct dma_map_ops *gx_legacy_pci_dma_map_ops;
> +#endif
> +EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops);
> +
> +#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
> +int dma_set_coherent_mask(struct device *dev, u64 mask)
> +{
> + ? ? ? struct dma_map_ops *dma_ops = get_dma_ops(dev);
> +
> + ? ? ? /* Handle legacy PCI devices with limited memory addressability. */
> + ? ? ? if (((dma_ops == gx_pci_dma_map_ops) ||
> + ? ? ? ? ? (dma_ops == gx_legacy_pci_dma_map_ops)) &&
> + ? ? ? ? ? (mask <= DMA_BIT_MASK(32))) {
> + ? ? ? ? ? ? ? if (mask > dev->archdata.max_direct_dma_addr)
> + ? ? ? ? ? ? ? ? ? ? ? mask = dev->archdata.max_direct_dma_addr;
> + ? ? ? }
> +
> + ? ? ? if (!dma_supported(dev, mask))
> + ? ? ? ? ? ? ? return -EIO;
> + ? ? ? dev->coherent_dma_mask = mask;
> + ? ? ? return 0;
> +}
> +EXPORT_SYMBOL(dma_set_coherent_mask);
> +#endif
> diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
> index 1b996bb..27f7ab0 100644
> --- a/arch/tile/kernel/pci_gx.c
> +++ b/arch/tile/kernel/pci_gx.c
> @@ -40,22 +40,8 @@
> ?#include <arch/sim.h>
>
> ?/*
> - * Initialization flow and process
> - * -------------------------------
> - *
> - * This files containes the routines to search for PCI buses,
> + * This file containes the routines to search for PCI buses,
> ?* enumerate the buses, and configure any attached devices.
> - *
> - * There are two entry points here:
> - * 1) tile_pci_init
> - * ? ?This sets up the pci_controller structs, and opens the
> - * ? ?FDs to the hypervisor. ?This is called from setup_arch() early
> - * ? ?in the boot process.
> - * 2) pcibios_init
> - * ? ?This probes the PCI bus(es) for any attached hardware. ?It's
> - * ? ?called by subsys_initcall. ?All of the real work is done by the
> - * ? ?generic Linux PCI layer.
> - *
> ?*/
>
> ?#define DEBUG_PCI_CFG ?0
> @@ -110,6 +96,21 @@ static struct pci_ops tile_cfg_ops;
> ?/* Mask of CPUs that should receive PCIe interrupts. */
> ?static struct cpumask intr_cpus_map;
>
> +/* PCI I/O space support is not implemented. */
> +static struct resource pci_ioport_resource = {
> + ? ? ? .name ? = "PCI IO",
> + ? ? ? .start ?= 0,
> + ? ? ? .end ? ?= 0,
> + ? ? ? .flags ?= IORESOURCE_IO,
> +};

You don't need to define pci_ioport_resource at all if you don't
support I/O space.

> +
> +static struct resource pci_iomem_resource = {
> + ? ? ? .name ? = "PCI mem",
> + ? ? ? .start ?= TILE_PCI_MEM_START,
> + ? ? ? .end ? ?= TILE_PCI_MEM_END,
> + ? ? ? .flags ?= IORESOURCE_MEM,
> +};
> +
> ?/*
> ?* We don't need to worry about the alignment of resources.
> ?*/
> @@ -334,8 +335,6 @@ free_irqs:
> ?}
>
> ?/*
> - * First initialization entry point, called from setup_arch().
> - *
> ?* Find valid controllers and fill in pci_controller structs for each
> ?* of them.
> ?*
> @@ -583,10 +582,7 @@ static int __devinit setup_pcie_rc_delay(char *str)
> ?early_param("pcie_rc_delay", setup_pcie_rc_delay);
>
> ?/*
> - * Second PCI initialization entry point, called by subsys_initcall.
> - *
> - * The controllers have been set up by the time we get here, by a call to
> - * tile_pci_init.
> + * PCI initialization entry point, called by subsys_initcall.
> ?*/
> ?int __init pcibios_init(void)
> ?{
> @@ -594,15 +590,13 @@ int __init pcibios_init(void)
> ? ? ? ?LIST_HEAD(resources);
> ? ? ? ?int i;
>
> + ? ? ? tile_pci_init();
> +
> ? ? ? ?if (num_rc_controllers == 0 && num_ep_controllers == 0)
> ? ? ? ? ? ? ? ?return 0;
>
> - ? ? ? pr_info("PCI: Probing PCI hardware\n");
> -
> ? ? ? ?/*
> ? ? ? ? * We loop over all the TRIO shims and set up the MMIO mappings.
> - ? ? ? ?* This step can't be done in tile_pci_init because the MM subsystem
> - ? ? ? ?* hasn't been initialized then.
> ? ? ? ? */
> ? ? ? ?for (i = 0; i < TILEGX_NUM_TRIO; i++) {
> ? ? ? ? ? ? ? ?gxio_trio_context_t *context = &trio_contexts[i];
> @@ -645,9 +639,7 @@ int __init pcibios_init(void)
> ? ? ? ? ? ? ? ?unsigned int class_code_revision;
> ? ? ? ? ? ? ? ?int trio_index;
> ? ? ? ? ? ? ? ?int mac;
> -#ifndef USE_SHARED_PCIE_CONFIG_REGION
> ? ? ? ? ? ? ? ?int ret;
> -#endif
>
> ? ? ? ? ? ? ? ?if (trio_context->fd < 0)
> ? ? ? ? ? ? ? ? ? ? ? ?continue;
> @@ -802,8 +794,6 @@ int __init pcibios_init(void)
> ? ? ? ? ? ? ? ? ? ? ? ?pr_err("PCI: PCI CFG PIO alloc failure for mac %d "
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"on TRIO %d, give up\n", mac, trio_index);
>
> - ? ? ? ? ? ? ? ? ? ? ? /* TBD: cleanup ... */
> -
> ? ? ? ? ? ? ? ? ? ? ? ?continue;
> ? ? ? ? ? ? ? ?}
>
> @@ -819,8 +809,6 @@ int __init pcibios_init(void)
> ? ? ? ? ? ? ? ? ? ? ? ?pr_err("PCI: PCI CFG PIO init failure for mac %d "
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"on TRIO %d, give up\n", mac, trio_index);
>
> - ? ? ? ? ? ? ? ? ? ? ? /* TBD: cleanup ... */
> -
> ? ? ? ? ? ? ? ? ? ? ? ?continue;
> ? ? ? ? ? ? ? ?}
>
> @@ -837,8 +825,6 @@ int __init pcibios_init(void)
> ? ? ? ? ? ? ? ? ? ? ? ?pr_err("PCI: PIO map failure for mac %d on TRIO %d\n",
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?mac, trio_index);
>
> - ? ? ? ? ? ? ? ? ? ? ? /* TBD: cleanup ... */
> -
> ? ? ? ? ? ? ? ? ? ? ? ?continue;
> ? ? ? ? ? ? ? ?}
>
> @@ -852,7 +838,14 @@ int __init pcibios_init(void)
> ? ? ? ? ? ? ? ? ? ? ? ?continue;
> ? ? ? ? ? ? ? ?}
>
> - ? ? ? ? ? ? ? pci_add_resource(&resources, &iomem_resource);
> + ? ? ? ? ? ? ? /*
> + ? ? ? ? ? ? ? ?* The PCI memory resource is located above the PA space.
> + ? ? ? ? ? ? ? ?* The memory range for the PCI root bus should not overlap
> + ? ? ? ? ? ? ? ?* with the physical RAM
> + ? ? ? ? ? ? ? ?*/
> + ? ? ? ? ? ? ? pci_add_resource_offset(&resources, &iomem_resource,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1ULL << CHIP_PA_WIDTH());

This says that your entire physical address space (currently
0x0-0xffffffff_ffffffff) is routed to the PCI bus, which is not true.
I think what you want here is pci_iomem_resource, but I'm not sure
that's set up correctly. It should contain the CPU physical address
that are routed to the PCI bus. Since you mention an offset, the PCI
bus addresses will "CPU physical address - offset".

I don't understand the CHIP_PA_WIDTH() usage -- that seems to be the
physical address width, but you define TILE_PCI_MEM_END as "((1ULL <<
CHIP_PA_WIDTH()) + TILE_PCI_BAR_WINDOW_TOP)", which would mean the CPU
could never generate that address.

I might understand this better if you could give a concrete example of
the CPU address range and the corresponding PCI bus address range.
For example, I have a box where CPU physical address range [mem
0xf0000000000-0xf007edfffff] is routed to PCI bus address range
[0x80000000-0xfedfffff]. In this case, the struct resource contains
0xf0000000000-0xf007edfffff, and the offset is 0xf0000000000 -
0x80000000 or 0xeff80000000.

The comments at TILE_PCI_MEM_MAP_BASE_OFFSET suggest that you have two
MMIO regions (one for bus addresses <4GB), so there should be two
resources on the list here.

The list should also include a bus number resource describing the bus
numbers claimed by the host bridge. Since you don't have that, we'll
default to [bus 00-ff], but that's wrong if you have more than one
host bridge.

In fact, since it appears that you *do* have multiple host bridges,
the "resources" list should be constructed so it contains the bus
number and MMIO apertures for each bridge, which should be
non-overlapping.

> +
> ? ? ? ? ? ? ? ?bus = pci_scan_root_bus(NULL, 0, controller->ops,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?controller, &resources);
> ? ? ? ? ? ? ? ?controller->root_bus = bus;
> @@ -923,11 +916,6 @@ int __init pcibios_init(void)
> ? ? ? ? ? ? ? ?}
>
> ? ? ? ? ? ? ? ?/*
> - ? ? ? ? ? ? ? ?* We always assign 32-bit PCI bus BAR ranges.
> - ? ? ? ? ? ? ? ?*/
> - ? ? ? ? ? ? ? BUG_ON(bus_address_hi != 0);
> -
> - ? ? ? ? ? ? ? /*
> ? ? ? ? ? ? ? ? * Alloc a PIO region for PCI memory access for each RC port.
> ? ? ? ? ? ? ? ? */
> ? ? ? ? ? ? ? ?ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
> @@ -936,8 +924,6 @@ int __init pcibios_init(void)
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"give up\n", controller->trio_index,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?controller->mac);
>
> - ? ? ? ? ? ? ? ? ? ? ? /* TBD: cleanup ... */
> -
> ? ? ? ? ? ? ? ? ? ? ? ?continue;
> ? ? ? ? ? ? ? ?}
>
> @@ -950,15 +936,13 @@ int __init pcibios_init(void)
> ? ? ? ? ? ? ? ?ret = gxio_trio_init_pio_region_aux(trio_context,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?controller->pio_mem_index,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?controller->mac,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? bus_address_hi,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?0);
> ? ? ? ? ? ? ? ?if (ret < 0) {
> ? ? ? ? ? ? ? ? ? ? ? ?pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, "
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"give up\n", controller->trio_index,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?controller->mac);
>
> - ? ? ? ? ? ? ? ? ? ? ? /* TBD: cleanup ... */
> -
> ? ? ? ? ? ? ? ? ? ? ? ?continue;
> ? ? ? ? ? ? ? ?}
>
> @@ -980,8 +964,6 @@ int __init pcibios_init(void)
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?controller->trio_index,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?controller->mac, j);
>
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? /* TBD: cleanup ... */
> -
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?goto alloc_mem_map_failed;
> ? ? ? ? ? ? ? ? ? ? ? ?}
>
> @@ -991,9 +973,13 @@ int __init pcibios_init(void)
> ? ? ? ? ? ? ? ? ? ? ? ? * Initialize the Mem-Map and the I/O MMU so that all
> ? ? ? ? ? ? ? ? ? ? ? ? * the physical memory can be accessed by the endpoint
> ? ? ? ? ? ? ? ? ? ? ? ? * devices. The base bus address is set to the base CPA
> - ? ? ? ? ? ? ? ? ? ? ? ?* of this memory controller, so is the base VA. The
> + ? ? ? ? ? ? ? ? ? ? ? ?* of this memory controller plus an offset (see pci.h).
> + ? ? ? ? ? ? ? ? ? ? ? ?* The region's base VA is set to the base CPA. The
> ? ? ? ? ? ? ? ? ? ? ? ? * I/O MMU table essentially translates the CPA to
> - ? ? ? ? ? ? ? ? ? ? ? ?* the real PA.
> + ? ? ? ? ? ? ? ? ? ? ? ?* the real PA. Implicitly, for node 0, we create
> + ? ? ? ? ? ? ? ? ? ? ? ?* a separate Mem-Map region that serves as the inbound
> + ? ? ? ? ? ? ? ? ? ? ? ?* window for legacy 32-bit devices. This is a direct
> + ? ? ? ? ? ? ? ? ? ? ? ?* map of the low 4GB CPA space.
> ? ? ? ? ? ? ? ? ? ? ? ? */
> ? ? ? ? ? ? ? ? ? ? ? ?ret = gxio_trio_init_memory_map_mmu_aux(trio_context,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?controller->mem_maps[j],
> @@ -1001,7 +987,8 @@ int __init pcibios_init(void)
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?nr_pages << PAGE_SHIFT,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?trio_context->asid,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?controller->mac,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? start_pfn << PAGE_SHIFT,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (start_pfn << PAGE_SHIFT) +
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? TILE_PCI_MEM_MAP_BASE_OFFSET,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?j,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?GXIO_TRIO_ORDER_MODE_UNORDERED);
> ? ? ? ? ? ? ? ? ? ? ? ?if (ret < 0) {
> @@ -1010,11 +997,8 @@ int __init pcibios_init(void)
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?controller->trio_index,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?controller->mac, j);
>
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? /* TBD: cleanup ... */
> -
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?goto alloc_mem_map_failed;
> ? ? ? ? ? ? ? ? ? ? ? ?}
> -
> ? ? ? ? ? ? ? ? ? ? ? ?continue;
>
> ?alloc_mem_map_failed:
> @@ -1028,11 +1012,19 @@ alloc_mem_map_failed:
> ?subsys_initcall(pcibios_init);
>
> ?/*
> - * No bus fixups needed.
> + * PCI scan code calls the arch specific pcibios_fixup_bus() each time it scans
> + * a new bridge. Called after each bus is probed, but before its children are
> + * examined.
> ?*/
> ?void __devinit pcibios_fixup_bus(struct pci_bus *bus)
> ?{
> - ? ? ? /* Nothing needs to be done. */
> + ? ? ? struct pci_dev *dev = bus->self;
> +
> + ? ? ? if (!dev) {
> + ? ? ? ? ? ? ? /* This is the root bus. */
> + ? ? ? ? ? ? ? bus->resource[0] = &pci_ioport_resource;
> + ? ? ? ? ? ? ? bus->resource[1] = &pci_iomem_resource;
> + ? ? ? }

Please don't add this. I'm in the process of removing
pcibios_fixup_bus() altogether. Instead, you should put
pci_iomem_resource on a resources list and use pci_scan_root_bus().

> ?}
>
> ?/*
> @@ -1069,6 +1061,17 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
> ? ? ? ?return pci_enable_resources(dev, mask);
> ?}
>
> +/* Called for each device after PCI setup is done. */
> +static void __init
> +pcibios_fixup_final(struct pci_dev *pdev)
> +{
> + ? ? ? set_dma_ops(&pdev->dev, gx_pci_dma_map_ops);
> + ? ? ? set_dma_offset(&pdev->dev, TILE_PCI_MEM_MAP_BASE_OFFSET);
> + ? ? ? pdev->dev.archdata.max_direct_dma_addr =
> + ? ? ? ? ? ? ? TILE_PCI_MAX_DIRECT_DMA_ADDRESS;
> +}
> +DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final);
> +
> ?/* Map a PCI MMIO bus address into VA space. */
> ?void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
> ?{
> @@ -1127,7 +1130,7 @@ got_it:
> ? ? ? ? * We need to keep the PCI bus address's in-page offset in the VA.
> ? ? ? ? */
> ? ? ? ?return iorpc_ioremap(trio_fd, offset, size) +
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (phys_addr & (PAGE_SIZE - 1));
> + ? ? ? ? ? ? ? (phys_addr & (PAGE_SIZE - 1));
> ?}
> ?EXPORT_SYMBOL(ioremap);
>
> diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
> index fdde3b6..2b8b689 100644
> --- a/arch/tile/kernel/setup.c
> +++ b/arch/tile/kernel/setup.c
> @@ -23,6 +23,7 @@
> ?#include <linux/irq.h>
> ?#include <linux/kexec.h>
> ?#include <linux/pci.h>
> +#include <linux/swiotlb.h>
> ?#include <linux/initrd.h>
> ?#include <linux/io.h>
> ?#include <linux/highmem.h>
> @@ -109,7 +110,7 @@ static unsigned int __initdata maxnodemem_pfn[MAX_NUMNODES] = {
> ?};
> ?static nodemask_t __initdata isolnodes;
>
> -#ifdef CONFIG_PCI
> +#if defined(CONFIG_PCI) && !defined(__tilegx__)
> ?enum { DEFAULT_PCI_RESERVE_MB = 64 };
> ?static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB;
> ?unsigned long __initdata pci_reserve_start_pfn = -1U;
> @@ -160,7 +161,7 @@ static int __init setup_isolnodes(char *str)
> ?}
> ?early_param("isolnodes", setup_isolnodes);
>
> -#ifdef CONFIG_PCI
> +#if defined(CONFIG_PCI) && !defined(__tilegx__)
> ?static int __init setup_pci_reserve(char* str)
> ?{
> ? ? ? ?unsigned long mb;
> @@ -171,7 +172,7 @@ static int __init setup_pci_reserve(char* str)
>
> ? ? ? ?pci_reserve_mb = mb;
> ? ? ? ?pr_info("Reserving %dMB for PCIE root complex mappings\n",
> - ? ? ? ? ? ? ?pci_reserve_mb);
> + ? ? ? ? ? ? ? pci_reserve_mb);
> ? ? ? ?return 0;
> ?}
> ?early_param("pci_reserve", setup_pci_reserve);
> @@ -411,7 +412,7 @@ static void __init setup_memory(void)
> ? ? ? ? ? ? ? ? ? ? ? ?continue;
> ? ? ? ? ? ? ? ?}
> ?#endif
> -#ifdef CONFIG_PCI
> +#if defined(CONFIG_PCI) && !defined(__tilegx__)
> ? ? ? ? ? ? ? ?/*
> ? ? ? ? ? ? ? ? * Blocks that overlap the pci reserved region must
> ? ? ? ? ? ? ? ? * have enough space to hold the maximum percpu data
> @@ -604,11 +605,9 @@ static void __init setup_bootmem_allocator_node(int i)
> ? ? ? ?/* Free all the space back into the allocator. */
> ? ? ? ?free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start));
>
> -#if defined(CONFIG_PCI)
> +#if defined(CONFIG_PCI) && !defined(__tilegx__)
> ? ? ? ?/*
> - ? ? ? ?* Throw away any memory aliased by the PCI region. ?FIXME: this
> - ? ? ? ?* is a temporary hack to work around bug 10502, and needs to be
> - ? ? ? ?* fixed properly.
> + ? ? ? ?* Throw away any memory aliased by the PCI region.
> ? ? ? ? */
> ? ? ? ?if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start)
> ? ? ? ? ? ? ? ?reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn),
> @@ -1353,8 +1352,7 @@ void __init setup_arch(char **cmdline_p)
> ? ? ? ?setup_cpu_maps();
>
>
> -#ifdef CONFIG_PCI
> -#if !defined (__tilegx__)
> +#if defined(CONFIG_PCI) && !defined(__tilegx__)
> ? ? ? ?/*
> ? ? ? ? * Initialize the PCI structures. ?This is done before memory
> ? ? ? ? * setup so that we know whether or not a pci_reserve region
> @@ -1362,7 +1360,6 @@ void __init setup_arch(char **cmdline_p)
> ? ? ? ? */
> ? ? ? ?if (tile_pci_init() == 0)
> ? ? ? ? ? ? ? ?pci_reserve_mb = 0;
> -#endif
>
> ? ? ? ?/* PCI systems reserve a region just below 4GB for mapping iomem. */
> ? ? ? ?pci_reserve_end_pfn ?= (1 << (32 - PAGE_SHIFT));
> @@ -1384,6 +1381,10 @@ void __init setup_arch(char **cmdline_p)
> ? ? ? ? * any memory using the bootmem allocator.
> ? ? ? ? */
>
> +#ifdef CONFIG_SWIOTLB
> + ? ? ? swiotlb_init(0);
> +#endif
> +
> ? ? ? ?paging_init();
> ? ? ? ?setup_numa_mapping();
> ? ? ? ?zone_sizes_init();
> @@ -1391,10 +1392,6 @@ void __init setup_arch(char **cmdline_p)
> ? ? ? ?setup_cpu(1);
> ? ? ? ?setup_clock();
> ? ? ? ?load_hv_initrd();
> -
> -#if defined(CONFIG_PCI) && defined (__tilegx__)
> - ? ? ? tile_pci_init();
> -#endif
> ?}
>
>
> @@ -1538,11 +1535,11 @@ static struct resource code_resource = {
> ?};
>
> ?/*
> - * We reserve all resources above 4GB so that PCI won't try to put
> + * On Pro, we reserve all resources above 4GB so that PCI won't try to put
> ?* mappings above 4GB; the standard allows that for some devices but
> ?* the probing code trunates values to 32 bits.

I think this comment about probing code truncating values is out of
date. Or if it's not, please point me to it so we can fix it :)

> ?*/
> -#ifdef CONFIG_PCI
> +#if defined(CONFIG_PCI) && !defined(__tilegx__)
> ?static struct resource* __init
> ?insert_non_bus_resource(void)
> ?{
> @@ -1588,7 +1585,7 @@ static int __init request_standard_resources(void)
> ? ? ? ?enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };
>
> ? ? ? ?iomem_resource.end = -1LL;

This patch isn't touching iomem_resource, but iomem_resource.end
*should* be set to the highest physical address your CPU can generate,
which is probably smaller than this.

> -#ifdef CONFIG_PCI
> +#if defined(CONFIG_PCI) && !defined(__tilegx__)
> ? ? ? ?insert_non_bus_resource();
> ?#endif
>
> @@ -1596,7 +1593,7 @@ static int __init request_standard_resources(void)
> ? ? ? ? ? ? ? ?u64 start_pfn = node_start_pfn[i];
> ? ? ? ? ? ? ? ?u64 end_pfn = node_end_pfn[i];
>
> -#ifdef CONFIG_PCI
> +#if defined(CONFIG_PCI) && !defined(__tilegx__)
> ? ? ? ? ? ? ? ?if (start_pfn <= pci_reserve_start_pfn &&
> ? ? ? ? ? ? ? ? ? ?end_pfn > pci_reserve_start_pfn) {
> ? ? ? ? ? ? ? ? ? ? ? ?if (end_pfn > pci_reserve_end_pfn)
> --
> 1.7.10.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html