2009-04-14 00:45:33

by FUJITA Tomonori

[permalink] [raw]
Subject: [PATCH 1/2] dma-debug: add dma_debug_resize_entries() to adjust the number of dma_debug_entries

We use a static value for the number of dma_debug_entries. It can be
overwritten by a kernel command line option.

Some IOMMUs (e.g. GART) can't set an appropriate value by a kernel
command line option because they can't know such value until they
finish initializing up their hardware.

This patch adds dma_debug_resize_entries() enables IOMMUs to adjust
the number of dma_debug_entries anytime.

Signed-off-by: FUJITA Tomonori <[email protected]>
---
include/linux/dma-debug.h | 7 +++++
lib/dma-debug.c | 67 +++++++++++++++++++++++++++++++++++++++++----
2 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 28d53cb..171ad8a 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus);

extern void dma_debug_init(u32 num_entries);

+extern int dma_debug_resize_entries(u32 num_entries);
+
extern void debug_dma_map_page(struct device *dev, struct page *page,
size_t offset, size_t size,
int direction, dma_addr_t dma_addr,
@@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries)
{
}

+static inline int dma_debug_resize_entries(u32 num_entries)
+{
+ return 0;
+}
+
static inline void debug_dma_map_page(struct device *dev, struct page *page,
size_t offset, size_t size,
int direction, dma_addr_t dma_addr,
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d3da7ed..01439e6 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -85,6 +85,7 @@ static u32 show_num_errors = 1;

static u32 num_free_entries;
static u32 min_free_entries;
+static u32 nr_total_entries;

/* number of preallocated entries requested by kernel cmdline */
static u32 req_entries;
@@ -257,6 +258,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
put_hash_bucket(bucket, &flags);
}

+static struct dma_debug_entry *__dma_entry_alloc(void)
+{
+ struct dma_debug_entry *entry;
+
+ entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+ list_del(&entry->list);
+ memset(entry, 0, sizeof(*entry));
+
+ num_free_entries -= 1;
+ if (num_free_entries < min_free_entries)
+ min_free_entries = num_free_entries;
+
+ return entry;
+}
+
/* struct dma_entry allocator
*
* The next two functions implement the allocator for
@@ -276,9 +292,7 @@ static struct dma_debug_entry *dma_entry_alloc(void)
goto out;
}

- entry = list_entry(free_entries.next, struct dma_debug_entry, list);
- list_del(&entry->list);
- memset(entry, 0, sizeof(*entry));
+ entry = __dma_entry_alloc();

#ifdef CONFIG_STACKTRACE
entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
@@ -286,9 +300,6 @@ static struct dma_debug_entry *dma_entry_alloc(void)
entry->stacktrace.skip = 2;
save_stack_trace(&entry->stacktrace);
#endif
- num_free_entries -= 1;
- if (num_free_entries < min_free_entries)
- min_free_entries = num_free_entries;

out:
spin_unlock_irqrestore(&free_entries_lock, flags);
@@ -310,6 +321,48 @@ static void dma_entry_free(struct dma_debug_entry *entry)
spin_unlock_irqrestore(&free_entries_lock, flags);
}

+int dma_debug_resize_entries(u32 num_entries)
+{
+ int i, delta, ret = 0;
+ unsigned long flags;
+ struct dma_debug_entry *entry;
+
+ spin_lock_irqsave(&free_entries_lock, flags);
+
+ if (nr_total_entries < num_entries) {
+ delta = num_entries - nr_total_entries;
+
+ for (i = 0; i < delta; i++) {
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ break;
+
+ list_add_tail(&entry->list, &free_entries);
+ }
+
+ nr_total_entries += i;
+ num_free_entries += i;
+
+ } else {
+ delta = nr_total_entries - num_entries;
+
+ for (i = 0; i < delta && !list_empty(&free_entries); i++) {
+ entry = __dma_entry_alloc();
+ kfree(entry);
+ }
+
+ nr_total_entries -= i;
+ }
+
+ if (nr_total_entries != num_entries)
+ ret = 1;
+
+ spin_unlock_irqrestore(&free_entries_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(dma_debug_resize_entries);
+
/*
* DMA-API debugging init code
*
@@ -490,6 +543,8 @@ void dma_debug_init(u32 num_entries)
return;
}

+ nr_total_entries = num_free_entries;
+
printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
}

--
1.6.0.6


2009-04-14 00:45:10

by FUJITA Tomonori

[permalink] [raw]
Subject: [PATCH 2/2] gart: reimplement IOMMU_LEAK feature by using DMA_API_DEBUG

IOMMU_LEAK, GART's own feature, dumps the used IOMMU entries when
IOMMU entries is full, which might be useful to find a bad driver that
eats IOMMU entries.

DMA_API_DEBUG provides the similar feature, debug_dma_dump_mappings,
and it's better than GART's IOMMU_LEAK feature. GART's IOMMU_LEAK
feature doesn't say who uses IOMMU entries so it's hard to find a bad
driver.

This patch reimplements the GART's IOMMU_LEAK feature by using
DMA_API_DEBUG.

Signed-off-by: FUJITA Tomonori <[email protected]>
---
arch/x86/Kconfig.debug | 3 +-
arch/x86/kernel/pci-gart_64.c | 45 +++++++---------------------------------
2 files changed, 9 insertions(+), 39 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d8359e7..5865712 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -161,8 +161,7 @@ config IOMMU_DEBUG

config IOMMU_LEAK
bool "IOMMU leak tracing"
- depends on DEBUG_KERNEL
- depends on IOMMU_DEBUG
+ depends on IOMMU_DEBUG && DMA_API_DEBUG
---help---
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index b284b58..1e8920d 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -144,48 +144,21 @@ static void flush_gart(void)
}

#ifdef CONFIG_IOMMU_LEAK
-
-#define SET_LEAK(x) \
- do { \
- if (iommu_leak_tab) \
- iommu_leak_tab[x] = __builtin_return_address(0);\
- } while (0)
-
-#define CLEAR_LEAK(x) \
- do { \
- if (iommu_leak_tab) \
- iommu_leak_tab[x] = NULL; \
- } while (0)
-
/* Debugging aid for drivers that don't free their IOMMU tables */
-static void **iommu_leak_tab;
static int leak_trace;
static int iommu_leak_pages = 20;

static void dump_leak(void)
{
- int i;
static int dump;

- if (dump || !iommu_leak_tab)
+ if (dump)
return;
dump = 1;
- show_stack(NULL, NULL);

- /* Very crude. dump some from the end of the table too */
- printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
- iommu_leak_pages);
- for (i = 0; i < iommu_leak_pages; i += 2) {
- printk(KERN_DEBUG "%lu: ", iommu_pages-i);
- printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
- 0);
- printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
- }
- printk(KERN_DEBUG "\n");
+ show_stack(NULL, NULL);
+ debug_dma_dump_mappings(NULL);
}
-#else
-# define SET_LEAK(x)
-# define CLEAR_LEAK(x)
#endif

static void iommu_full(struct device *dev, size_t size, int dir)
@@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,

for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
- SET_LEAK(iommu_page + i);
phys_mem += PAGE_SIZE;
}
return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
@@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
- CLEAR_LEAK(iommu_page + i);
}
free_iommu(iommu_page, npages);
}
@@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
while (pages--) {
iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
- SET_LEAK(iommu_page);
addr += PAGE_SIZE;
iommu_page++;
}
@@ -801,11 +771,12 @@ void __init gart_iommu_init(void)

#ifdef CONFIG_IOMMU_LEAK
if (leak_trace) {
- iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
- get_order(iommu_pages*sizeof(void *)));
- if (!iommu_leak_tab)
+ int ret;
+
+ ret = dma_debug_resize_entries(iommu_pages);
+ if (ret)
printk(KERN_DEBUG
- "PCI-DMA: Cannot allocate leak trace area\n");
+ "PCI-DMA: Cannot trace all the entries\n");
}
#endif

--
1.6.0.6

2009-04-14 10:46:37

by Joerg Roedel

[permalink] [raw]
Subject: Re: [PATCH 1/2] dma-debug: add dma_debug_resize_entries() to adjust the number of dma_debug_entries

On Tue, Apr 14, 2009 at 09:43:18AM +0900, FUJITA Tomonori wrote:
> We use a static value for the number of dma_debug_entries. It can be
> overwritten by a kernel command line option.
>
> Some IOMMUs (e.g. GART) can't set an appropriate value by a kernel
> command line option because they can't know such value until they
> finish initializing up their hardware.
>
> This patch adds dma_debug_resize_entries() enables IOMMUs to adjust
> the number of dma_debug_entries anytime.
>
> Signed-off-by: FUJITA Tomonori <[email protected]>

Looks good.

Acked-by: Joerg Roedel <[email protected]>

> ---
> include/linux/dma-debug.h | 7 +++++
> lib/dma-debug.c | 67 +++++++++++++++++++++++++++++++++++++++++----
> 2 files changed, 68 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
> index 28d53cb..171ad8a 100644
> --- a/include/linux/dma-debug.h
> +++ b/include/linux/dma-debug.h
> @@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus);
>
> extern void dma_debug_init(u32 num_entries);
>
> +extern int dma_debug_resize_entries(u32 num_entries);
> +
> extern void debug_dma_map_page(struct device *dev, struct page *page,
> size_t offset, size_t size,
> int direction, dma_addr_t dma_addr,
> @@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries)
> {
> }
>
> +static inline int dma_debug_resize_entries(u32 num_entries)
> +{
> + return 0;
> +}
> +
> static inline void debug_dma_map_page(struct device *dev, struct page *page,
> size_t offset, size_t size,
> int direction, dma_addr_t dma_addr,
> diff --git a/lib/dma-debug.c b/lib/dma-debug.c
> index d3da7ed..01439e6 100644
> --- a/lib/dma-debug.c
> +++ b/lib/dma-debug.c
> @@ -85,6 +85,7 @@ static u32 show_num_errors = 1;
>
> static u32 num_free_entries;
> static u32 min_free_entries;
> +static u32 nr_total_entries;
>
> /* number of preallocated entries requested by kernel cmdline */
> static u32 req_entries;
> @@ -257,6 +258,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
> put_hash_bucket(bucket, &flags);
> }
>
> +static struct dma_debug_entry *__dma_entry_alloc(void)
> +{
> + struct dma_debug_entry *entry;
> +
> + entry = list_entry(free_entries.next, struct dma_debug_entry, list);
> + list_del(&entry->list);
> + memset(entry, 0, sizeof(*entry));
> +
> + num_free_entries -= 1;
> + if (num_free_entries < min_free_entries)
> + min_free_entries = num_free_entries;
> +
> + return entry;
> +}
> +
> /* struct dma_entry allocator
> *
> * The next two functions implement the allocator for
> @@ -276,9 +292,7 @@ static struct dma_debug_entry *dma_entry_alloc(void)
> goto out;
> }
>
> - entry = list_entry(free_entries.next, struct dma_debug_entry, list);
> - list_del(&entry->list);
> - memset(entry, 0, sizeof(*entry));
> + entry = __dma_entry_alloc();
>
> #ifdef CONFIG_STACKTRACE
> entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
> @@ -286,9 +300,6 @@ static struct dma_debug_entry *dma_entry_alloc(void)
> entry->stacktrace.skip = 2;
> save_stack_trace(&entry->stacktrace);
> #endif
> - num_free_entries -= 1;
> - if (num_free_entries < min_free_entries)
> - min_free_entries = num_free_entries;
>
> out:
> spin_unlock_irqrestore(&free_entries_lock, flags);
> @@ -310,6 +321,48 @@ static void dma_entry_free(struct dma_debug_entry *entry)
> spin_unlock_irqrestore(&free_entries_lock, flags);
> }
>
> +int dma_debug_resize_entries(u32 num_entries)
> +{
> + int i, delta, ret = 0;
> + unsigned long flags;
> + struct dma_debug_entry *entry;
> +
> + spin_lock_irqsave(&free_entries_lock, flags);
> +
> + if (nr_total_entries < num_entries) {
> + delta = num_entries - nr_total_entries;
> +
> + for (i = 0; i < delta; i++) {
> + entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
> + if (!entry)
> + break;
> +
> + list_add_tail(&entry->list, &free_entries);
> + }
> +
> + nr_total_entries += i;
> + num_free_entries += i;
> +
> + } else {
> + delta = nr_total_entries - num_entries;
> +
> + for (i = 0; i < delta && !list_empty(&free_entries); i++) {
> + entry = __dma_entry_alloc();
> + kfree(entry);
> + }
> +
> + nr_total_entries -= i;
> + }
> +
> + if (nr_total_entries != num_entries)
> + ret = 1;
> +
> + spin_unlock_irqrestore(&free_entries_lock, flags);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL(dma_debug_resize_entries);
> +
> /*
> * DMA-API debugging init code
> *
> @@ -490,6 +543,8 @@ void dma_debug_init(u32 num_entries)
> return;
> }
>
> + nr_total_entries = num_free_entries;
> +
> printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
> }
>
> --
> 1.6.0.6
>
>

--
| Advanced Micro Devices GmbH
Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei M?nchen
System |
Research | Gesch?ftsf?hrer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis M?nchen
| Registergericht M?nchen, HRB Nr. 43632

2009-04-14 10:47:30

by Joerg Roedel

[permalink] [raw]
Subject: Re: [PATCH 2/2] gart: reimplement IOMMU_LEAK feature by using DMA_API_DEBUG

On Tue, Apr 14, 2009 at 09:43:19AM +0900, FUJITA Tomonori wrote:
> IOMMU_LEAK, GART's own feature, dumps the used IOMMU entries when
> IOMMU entries is full, which might be useful to find a bad driver that
> eats IOMMU entries.
>
> DMA_API_DEBUG provides the similar feature, debug_dma_dump_mappings,
> and it's better than GART's IOMMU_LEAK feature. GART's IOMMU_LEAK
> feature doesn't say who uses IOMMU entries so it's hard to find a bad
> driver.
>
> This patch reimplements the GART's IOMMU_LEAK feature by using
> DMA_API_DEBUG.
>
> Signed-off-by: FUJITA Tomonori <[email protected]>

Yeah, I also had that on my list :-) This feature is indeed unnecessary now.

Acked-by: Joerg Roedel <[email protected]>

> ---
> arch/x86/Kconfig.debug | 3 +-
> arch/x86/kernel/pci-gart_64.c | 45 +++++++---------------------------------
> 2 files changed, 9 insertions(+), 39 deletions(-)
>
> diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
> index d8359e7..5865712 100644
> --- a/arch/x86/Kconfig.debug
> +++ b/arch/x86/Kconfig.debug
> @@ -161,8 +161,7 @@ config IOMMU_DEBUG
>
> config IOMMU_LEAK
> bool "IOMMU leak tracing"
> - depends on DEBUG_KERNEL
> - depends on IOMMU_DEBUG
> + depends on IOMMU_DEBUG && DMA_API_DEBUG
> ---help---
> Add a simple leak tracer to the IOMMU code. This is useful when you
> are debugging a buggy device driver that leaks IOMMU mappings.
> diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
> index b284b58..1e8920d 100644
> --- a/arch/x86/kernel/pci-gart_64.c
> +++ b/arch/x86/kernel/pci-gart_64.c
> @@ -144,48 +144,21 @@ static void flush_gart(void)
> }
>
> #ifdef CONFIG_IOMMU_LEAK
> -
> -#define SET_LEAK(x) \
> - do { \
> - if (iommu_leak_tab) \
> - iommu_leak_tab[x] = __builtin_return_address(0);\
> - } while (0)
> -
> -#define CLEAR_LEAK(x) \
> - do { \
> - if (iommu_leak_tab) \
> - iommu_leak_tab[x] = NULL; \
> - } while (0)
> -
> /* Debugging aid for drivers that don't free their IOMMU tables */
> -static void **iommu_leak_tab;
> static int leak_trace;
> static int iommu_leak_pages = 20;
>
> static void dump_leak(void)
> {
> - int i;
> static int dump;
>
> - if (dump || !iommu_leak_tab)
> + if (dump)
> return;
> dump = 1;
> - show_stack(NULL, NULL);
>
> - /* Very crude. dump some from the end of the table too */
> - printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
> - iommu_leak_pages);
> - for (i = 0; i < iommu_leak_pages; i += 2) {
> - printk(KERN_DEBUG "%lu: ", iommu_pages-i);
> - printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
> - 0);
> - printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
> - }
> - printk(KERN_DEBUG "\n");
> + show_stack(NULL, NULL);
> + debug_dma_dump_mappings(NULL);
> }
> -#else
> -# define SET_LEAK(x)
> -# define CLEAR_LEAK(x)
> #endif
>
> static void iommu_full(struct device *dev, size_t size, int dir)
> @@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
>
> for (i = 0; i < npages; i++) {
> iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
> - SET_LEAK(iommu_page + i);
> phys_mem += PAGE_SIZE;
> }
> return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
> @@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
> npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
> for (i = 0; i < npages; i++) {
> iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
> - CLEAR_LEAK(iommu_page + i);
> }
> free_iommu(iommu_page, npages);
> }
> @@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
> pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
> while (pages--) {
> iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
> - SET_LEAK(iommu_page);
> addr += PAGE_SIZE;
> iommu_page++;
> }
> @@ -801,11 +771,12 @@ void __init gart_iommu_init(void)
>
> #ifdef CONFIG_IOMMU_LEAK
> if (leak_trace) {
> - iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
> - get_order(iommu_pages*sizeof(void *)));
> - if (!iommu_leak_tab)
> + int ret;
> +
> + ret = dma_debug_resize_entries(iommu_pages);
> + if (ret)
> printk(KERN_DEBUG
> - "PCI-DMA: Cannot allocate leak trace area\n");
> + "PCI-DMA: Cannot trace all the entries\n");
> }
> #endif
>
> --
> 1.6.0.6
>
>

--
| Advanced Micro Devices GmbH
Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei M?nchen
System |
Research | Gesch?ftsf?hrer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis M?nchen
| Registergericht M?nchen, HRB Nr. 43632

2009-04-14 10:58:55

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/2] dma-debug: add dma_debug_resize_entries() to adjust the number of dma_debug_entries


* FUJITA Tomonori <[email protected]> wrote:

> +int dma_debug_resize_entries(u32 num_entries)
> +{
> + int i, delta, ret = 0;
> + unsigned long flags;
> + struct dma_debug_entry *entry;
> +
> + spin_lock_irqsave(&free_entries_lock, flags);
> +
> + if (nr_total_entries < num_entries) {
> + delta = num_entries - nr_total_entries;
> +
> + for (i = 0; i < delta; i++) {
> + entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
> + if (!entry)
> + break;

hm, using GFP_ATOMIC within a spinlock is not a very nice thing to
do in general. While this is boot-only and the GFP_ATOMIC will
likely succeed, this could become non-boot functionality and then
it's exposed to the momentary VM pressure situation that might make
GFP_ATOMIC fail.

Please fix this to be GFP_KERNEL.

a small detail:

> + delta = nr_total_entries - num_entries;
> +
> + for (i = 0; i < delta && !list_empty(&free_entries); i++) {
> + entry = __dma_entry_alloc();
> + kfree(entry);
> + }
> +
> + nr_total_entries -= i;
> + }

Can i != delta ever happen? It would suggest the list length being
out of sync with the nr_total_entries counter.

Ingo

2009-04-15 09:24:23

by FUJITA Tomonori

[permalink] [raw]
Subject: Re: [PATCH 1/2] dma-debug: add dma_debug_resize_entries() to adjust the number of dma_debug_entries

On Tue, 14 Apr 2009 12:58:23 +0200
Ingo Molnar <[email protected]> wrote:

>
> * FUJITA Tomonori <[email protected]> wrote:
>
> > +int dma_debug_resize_entries(u32 num_entries)
> > +{
> > + int i, delta, ret = 0;
> > + unsigned long flags;
> > + struct dma_debug_entry *entry;
> > +
> > + spin_lock_irqsave(&free_entries_lock, flags);
> > +
> > + if (nr_total_entries < num_entries) {
> > + delta = num_entries - nr_total_entries;
> > +
> > + for (i = 0; i < delta; i++) {
> > + entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
> > + if (!entry)
> > + break;
>
> hm, using GFP_ATOMIC within a spinlock is not a very nice thing to
> do in general. While this is boot-only and the GFP_ATOMIC will
> likely succeed, this could become non-boot functionality and then
> it's exposed to the momentary VM pressure situation that might make
> GFP_ATOMIC fail.
>
> Please fix this to be GFP_KERNEL.

Ok, fixed though I'm not sure it matters. It's unlikely that this could
become non-boot functionality so I chose a simple way.

I've attached a fixed patch. It's unlikely that this function is
called concurrently so I don't try hard.


> a small detail:
>
> > + delta = nr_total_entries - num_entries;
> > +
> > + for (i = 0; i < delta && !list_empty(&free_entries); i++) {
> > + entry = __dma_entry_alloc();
> > + kfree(entry);
> > + }
> > +
> > + nr_total_entries -= i;
> > + }
>
> Can i != delta ever happen? It would suggest the list length being
> out of sync with the nr_total_entries counter.

Yes, it can happen because only free entries are linked to
free_entries list. If some entries are already used, then it's
possible that we can't find enough entries in free_entries list here.

=
From: FUJITA Tomonori <[email protected]>
Subject: [PATCH] dma-debug: add dma_debug_resize_entries() to adjust the number of dma_debug_entries

We use a static value for the number of dma_debug_entries. It can be
overwritten by a kernel command line option.

Some IOMMUs (e.g. GART) can't set an appropriate value by a kernel
command line option because they can't know such value until they
finish initializing up their hardware.

This patch adds dma_debug_resize_entries() enables IOMMUs to adjust
the number of dma_debug_entries anytime.

Signed-off-by: FUJITA Tomonori <[email protected]>
---
include/linux/dma-debug.h | 7 ++++
lib/dma-debug.c | 72 +++++++++++++++++++++++++++++++++++++++++----
2 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 28d53cb..171ad8a 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus);

extern void dma_debug_init(u32 num_entries);

+extern int dma_debug_resize_entries(u32 num_entries);
+
extern void debug_dma_map_page(struct device *dev, struct page *page,
size_t offset, size_t size,
int direction, dma_addr_t dma_addr,
@@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries)
{
}

+static inline int dma_debug_resize_entries(u32 num_entries)
+{
+ return 0;
+}
+
static inline void debug_dma_map_page(struct device *dev, struct page *page,
size_t offset, size_t size,
int direction, dma_addr_t dma_addr,
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d3da7ed..5d61019 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -85,6 +85,7 @@ static u32 show_num_errors = 1;

static u32 num_free_entries;
static u32 min_free_entries;
+static u32 nr_total_entries;

/* number of preallocated entries requested by kernel cmdline */
static u32 req_entries;
@@ -257,6 +258,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
put_hash_bucket(bucket, &flags);
}

+static struct dma_debug_entry *__dma_entry_alloc(void)
+{
+ struct dma_debug_entry *entry;
+
+ entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+ list_del(&entry->list);
+ memset(entry, 0, sizeof(*entry));
+
+ num_free_entries -= 1;
+ if (num_free_entries < min_free_entries)
+ min_free_entries = num_free_entries;
+
+ return entry;
+}
+
/* struct dma_entry allocator
*
* The next two functions implement the allocator for
@@ -276,9 +292,7 @@ static struct dma_debug_entry *dma_entry_alloc(void)
goto out;
}

- entry = list_entry(free_entries.next, struct dma_debug_entry, list);
- list_del(&entry->list);
- memset(entry, 0, sizeof(*entry));
+ entry = __dma_entry_alloc();

#ifdef CONFIG_STACKTRACE
entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
@@ -286,9 +300,6 @@ static struct dma_debug_entry *dma_entry_alloc(void)
entry->stacktrace.skip = 2;
save_stack_trace(&entry->stacktrace);
#endif
- num_free_entries -= 1;
- if (num_free_entries < min_free_entries)
- min_free_entries = num_free_entries;

out:
spin_unlock_irqrestore(&free_entries_lock, flags);
@@ -310,6 +321,53 @@ static void dma_entry_free(struct dma_debug_entry *entry)
spin_unlock_irqrestore(&free_entries_lock, flags);
}

+int dma_debug_resize_entries(u32 num_entries)
+{
+ int i, delta, ret = 0;
+ unsigned long flags;
+ struct dma_debug_entry *entry;
+ LIST_HEAD(tmp);
+
+ spin_lock_irqsave(&free_entries_lock, flags);
+
+ if (nr_total_entries < num_entries) {
+ delta = num_entries - nr_total_entries;
+
+ spin_unlock_irqrestore(&free_entries_lock, flags);
+
+ for (i = 0; i < delta; i++) {
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ break;
+
+ list_add_tail(&entry->list, &tmp);
+ }
+
+ spin_lock_irqsave(&free_entries_lock, flags);
+
+ list_splice(&tmp, &free_entries);
+ nr_total_entries += i;
+ num_free_entries += i;
+ } else {
+ delta = nr_total_entries - num_entries;
+
+ for (i = 0; i < delta && !list_empty(&free_entries); i++) {
+ entry = __dma_entry_alloc();
+ kfree(entry);
+ }
+
+ nr_total_entries -= i;
+ }
+
+ if (nr_total_entries != num_entries)
+ ret = 1;
+
+ spin_unlock_irqrestore(&free_entries_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(dma_debug_resize_entries);
+
/*
* DMA-API debugging init code
*
@@ -490,6 +548,8 @@ void dma_debug_init(u32 num_entries)
return;
}

+ nr_total_entries = num_free_entries;
+
printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
}

--
1.6.0.6

2009-04-15 10:28:18

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/2] dma-debug: add dma_debug_resize_entries() to adjust the number of dma_debug_entries


* FUJITA Tomonori <[email protected]> wrote:

> On Tue, 14 Apr 2009 12:58:23 +0200
> Ingo Molnar <[email protected]> wrote:
>
> >
> > * FUJITA Tomonori <[email protected]> wrote:
> >
> > > +int dma_debug_resize_entries(u32 num_entries)
> > > +{
> > > + int i, delta, ret = 0;
> > > + unsigned long flags;
> > > + struct dma_debug_entry *entry;
> > > +
> > > + spin_lock_irqsave(&free_entries_lock, flags);
> > > +
> > > + if (nr_total_entries < num_entries) {
> > > + delta = num_entries - nr_total_entries;
> > > +
> > > + for (i = 0; i < delta; i++) {
> > > + entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
> > > + if (!entry)
> > > + break;
> >
> > hm, using GFP_ATOMIC within a spinlock is not a very nice thing to
> > do in general. While this is boot-only and the GFP_ATOMIC will
> > likely succeed, this could become non-boot functionality and then
> > it's exposed to the momentary VM pressure situation that might make
> > GFP_ATOMIC fail.
> >
> > Please fix this to be GFP_KERNEL.
>
> Ok, fixed though I'm not sure it matters. It's unlikely that this
> could become non-boot functionality so I chose a simple way.
>
> I've attached a fixed patch. It's unlikely that this function is
> called concurrently so I don't try hard.

thanks, it looks nice and clean now.

GFP_ATOMIC (mis-)use is prominently detected by Andrew's
barf-o-meter so even though it's probably fine here it's still
better to just not get into the habit. People would then see it in
an otherwise tidy looking piece of code and think it's an example to
follow.

Ingo

2009-04-15 11:26:37

by FUJITA Tomonori

[permalink] [raw]
Subject: [tip:core/iommu] dma-debug: add dma_debug_resize_entries() to adjust the number of dma_debug_entries

Commit-ID: e6a1a89d572c31b62d6dcf11a371c7323852d9b2
Gitweb: http://git.kernel.org/tip/e6a1a89d572c31b62d6dcf11a371c7323852d9b2
Author: FUJITA Tomonori <[email protected]>
AuthorDate: Wed, 15 Apr 2009 18:22:41 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 15 Apr 2009 12:22:37 +0200

dma-debug: add dma_debug_resize_entries() to adjust the number of dma_debug_entries

We use a static value for the number of dma_debug_entries. It can be
overwritten by a kernel command line option.

Some IOMMUs (e.g. GART) can't set an appropriate value by a kernel
command line option because they can't know such value until they
finish initializing up their hardware.

This patch adds dma_debug_resize_entries() enables IOMMUs to adjust
the number of dma_debug_entries anytime.

Signed-off-by: FUJITA Tomonori <[email protected]>
Acked-by: Joerg Roedel <[email protected]>
Cc: [email protected]
Cc: [email protected]
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
include/linux/dma-debug.h | 7 ++++
lib/dma-debug.c | 72 +++++++++++++++++++++++++++++++++++++++++----
2 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 28d53cb..171ad8a 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus);

extern void dma_debug_init(u32 num_entries);

+extern int dma_debug_resize_entries(u32 num_entries);
+
extern void debug_dma_map_page(struct device *dev, struct page *page,
size_t offset, size_t size,
int direction, dma_addr_t dma_addr,
@@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries)
{
}

+static inline int dma_debug_resize_entries(u32 num_entries)
+{
+ return 0;
+}
+
static inline void debug_dma_map_page(struct device *dev, struct page *page,
size_t offset, size_t size,
int direction, dma_addr_t dma_addr,
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d3da7ed..5d61019 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -85,6 +85,7 @@ static u32 show_num_errors = 1;

static u32 num_free_entries;
static u32 min_free_entries;
+static u32 nr_total_entries;

/* number of preallocated entries requested by kernel cmdline */
static u32 req_entries;
@@ -257,6 +258,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
put_hash_bucket(bucket, &flags);
}

+static struct dma_debug_entry *__dma_entry_alloc(void)
+{
+ struct dma_debug_entry *entry;
+
+ entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+ list_del(&entry->list);
+ memset(entry, 0, sizeof(*entry));
+
+ num_free_entries -= 1;
+ if (num_free_entries < min_free_entries)
+ min_free_entries = num_free_entries;
+
+ return entry;
+}
+
/* struct dma_entry allocator
*
* The next two functions implement the allocator for
@@ -276,9 +292,7 @@ static struct dma_debug_entry *dma_entry_alloc(void)
goto out;
}

- entry = list_entry(free_entries.next, struct dma_debug_entry, list);
- list_del(&entry->list);
- memset(entry, 0, sizeof(*entry));
+ entry = __dma_entry_alloc();

#ifdef CONFIG_STACKTRACE
entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
@@ -286,9 +300,6 @@ static struct dma_debug_entry *dma_entry_alloc(void)
entry->stacktrace.skip = 2;
save_stack_trace(&entry->stacktrace);
#endif
- num_free_entries -= 1;
- if (num_free_entries < min_free_entries)
- min_free_entries = num_free_entries;

out:
spin_unlock_irqrestore(&free_entries_lock, flags);
@@ -310,6 +321,53 @@ static void dma_entry_free(struct dma_debug_entry *entry)
spin_unlock_irqrestore(&free_entries_lock, flags);
}

+int dma_debug_resize_entries(u32 num_entries)
+{
+ int i, delta, ret = 0;
+ unsigned long flags;
+ struct dma_debug_entry *entry;
+ LIST_HEAD(tmp);
+
+ spin_lock_irqsave(&free_entries_lock, flags);
+
+ if (nr_total_entries < num_entries) {
+ delta = num_entries - nr_total_entries;
+
+ spin_unlock_irqrestore(&free_entries_lock, flags);
+
+ for (i = 0; i < delta; i++) {
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ break;
+
+ list_add_tail(&entry->list, &tmp);
+ }
+
+ spin_lock_irqsave(&free_entries_lock, flags);
+
+ list_splice(&tmp, &free_entries);
+ nr_total_entries += i;
+ num_free_entries += i;
+ } else {
+ delta = nr_total_entries - num_entries;
+
+ for (i = 0; i < delta && !list_empty(&free_entries); i++) {
+ entry = __dma_entry_alloc();
+ kfree(entry);
+ }
+
+ nr_total_entries -= i;
+ }
+
+ if (nr_total_entries != num_entries)
+ ret = 1;
+
+ spin_unlock_irqrestore(&free_entries_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(dma_debug_resize_entries);
+
/*
* DMA-API debugging init code
*
@@ -490,6 +548,8 @@ void dma_debug_init(u32 num_entries)
return;
}

+ nr_total_entries = num_free_entries;
+
printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
}

2009-04-15 11:27:19

by FUJITA Tomonori

[permalink] [raw]
Subject: [tip:core/iommu] x86 gart: reimplement IOMMU_LEAK feature by using DMA_API_DEBUG

Commit-ID: 19c1a6f5764d787113fa323ffb18be7991208f82
Gitweb: http://git.kernel.org/tip/19c1a6f5764d787113fa323ffb18be7991208f82
Author: FUJITA Tomonori <[email protected]>
AuthorDate: Tue, 14 Apr 2009 09:43:19 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 15 Apr 2009 12:22:47 +0200

x86 gart: reimplement IOMMU_LEAK feature by using DMA_API_DEBUG

IOMMU_LEAK, GART's own feature, dumps the used IOMMU entries when
IOMMU entries is full, which might be useful to find a bad driver that
eats IOMMU entries.

DMA_API_DEBUG provides the similar feature, debug_dma_dump_mappings,
and it's better than GART's IOMMU_LEAK feature. GART's IOMMU_LEAK
feature doesn't say who uses IOMMU entries so it's hard to find a bad
driver.

This patch reimplements the GART's IOMMU_LEAK feature by using
DMA_API_DEBUG.

Signed-off-by: FUJITA Tomonori <[email protected]>
Acked-by: Joerg Roedel <[email protected]>
Cc: Andrew Morton <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
arch/x86/Kconfig.debug | 3 +-
arch/x86/kernel/pci-gart_64.c | 45 +++++++---------------------------------
2 files changed, 9 insertions(+), 39 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d8359e7..5865712 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -161,8 +161,7 @@ config IOMMU_DEBUG

config IOMMU_LEAK
bool "IOMMU leak tracing"
- depends on DEBUG_KERNEL
- depends on IOMMU_DEBUG
+ depends on IOMMU_DEBUG && DMA_API_DEBUG
---help---
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index b284b58..1e8920d 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -144,48 +144,21 @@ static void flush_gart(void)
}

#ifdef CONFIG_IOMMU_LEAK
-
-#define SET_LEAK(x) \
- do { \
- if (iommu_leak_tab) \
- iommu_leak_tab[x] = __builtin_return_address(0);\
- } while (0)
-
-#define CLEAR_LEAK(x) \
- do { \
- if (iommu_leak_tab) \
- iommu_leak_tab[x] = NULL; \
- } while (0)
-
/* Debugging aid for drivers that don't free their IOMMU tables */
-static void **iommu_leak_tab;
static int leak_trace;
static int iommu_leak_pages = 20;

static void dump_leak(void)
{
- int i;
static int dump;

- if (dump || !iommu_leak_tab)
+ if (dump)
return;
dump = 1;
- show_stack(NULL, NULL);

- /* Very crude. dump some from the end of the table too */
- printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
- iommu_leak_pages);
- for (i = 0; i < iommu_leak_pages; i += 2) {
- printk(KERN_DEBUG "%lu: ", iommu_pages-i);
- printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
- 0);
- printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
- }
- printk(KERN_DEBUG "\n");
+ show_stack(NULL, NULL);
+ debug_dma_dump_mappings(NULL);
}
-#else
-# define SET_LEAK(x)
-# define CLEAR_LEAK(x)
#endif

static void iommu_full(struct device *dev, size_t size, int dir)
@@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,

for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
- SET_LEAK(iommu_page + i);
phys_mem += PAGE_SIZE;
}
return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
@@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
- CLEAR_LEAK(iommu_page + i);
}
free_iommu(iommu_page, npages);
}
@@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
while (pages--) {
iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
- SET_LEAK(iommu_page);
addr += PAGE_SIZE;
iommu_page++;
}
@@ -801,11 +771,12 @@ void __init gart_iommu_init(void)

#ifdef CONFIG_IOMMU_LEAK
if (leak_trace) {
- iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
- get_order(iommu_pages*sizeof(void *)));
- if (!iommu_leak_tab)
+ int ret;
+
+ ret = dma_debug_resize_entries(iommu_pages);
+ if (ret)
printk(KERN_DEBUG
- "PCI-DMA: Cannot allocate leak trace area\n");
+ "PCI-DMA: Cannot trace all the entries\n");
}
#endif