Hi,
this small series of patches adds three additional checks to the dma-api
debugging code. The first one checks if dma mappings are requested for
kernel text or rodata segments.
The second check added makes noise if a device is unbound from its
driver and there are still pending dma allocations we are aware of.
Joerg
diffstat:
arch/x86/kernel/pci-dma.c | 4 ++
include/linux/dma-debug.h | 7 ++++
lib/dma-debug.c | 81 ++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 91 insertions(+), 1 deletions(-)
Impact: get notified if a device dma maps illegal areas
This patch adds a check to print a warning message when a device driver
tries to map a memory area from the kernel text segment or rodata.
Signed-off-by: Joerg Roedel <[email protected]>
---
lib/dma-debug.c | 26 +++++++++++++++++++++++++-
1 files changed, 25 insertions(+), 1 deletions(-)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 128d9de..b6a303f 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -29,6 +29,8 @@
#include <linux/list.h>
#include <linux/slab.h>
+#include <asm/sections.h>
+
#define HASH_SIZE 1024ULL
#define HASH_FN_SHIFT 13
#define HASH_FN_MASK (HASH_SIZE - 1)
@@ -549,6 +551,24 @@ static void check_for_stack(struct device *dev, void *addr)
"stack [addr=%p]\n", addr);
}
+static inline bool overlap(void *addr, u64 size, void *start, void *end)
+{
+ void *addr2 = (char *)addr + size;
+
+ return ((addr >= start && addr < end) ||
+ (addr2 >= start && addr2 < end) ||
+ ((addr < start) && (addr2 >= end)));
+}
+
+static void check_for_illegal_area(struct device *dev, void *addr, u64 size)
+{
+ if (overlap(addr, size, _text, _etext) ||
+ overlap(addr, size, __start_rodata, __end_rodata))
+ err_printk(dev, NULL, "DMA-API: device driver maps "
+ "memory from kernel text or rodata "
+ "[addr=%p] [size=%llu]\n", addr, size);
+}
+
static void check_sync(struct device *dev, dma_addr_t addr,
u64 size, u64 offset, int direction, bool to_cpu)
{
@@ -645,8 +665,11 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
entry->direction = direction;
if (map_single) {
+ void *addr = ((char *)page_address(page)) + offset;
+
entry->type = dma_debug_single;
- check_for_stack(dev, page_address(page) + offset);
+ check_for_stack(dev, addr);
+ check_for_illegal_area(dev, addr, size);
}
add_dma_entry(entry);
@@ -699,6 +722,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
entry->sg_mapped_ents = mapped_ents;
check_for_stack(dev, sg_virt(s));
+ check_for_illegal_area(dev, sg_virt(s), s->length);
add_dma_entry(entry);
}
--
1.5.6.4
Impact: detect dma memory leaks for pci devices
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/pci-dma.c | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index ebf7d45..c7c4776 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -271,6 +271,10 @@ static int __init pci_iommu_init(void)
{
dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+#ifdef CONFIG_PCI
+ dma_debug_add_bus(&pci_bus_type);
+#endif
+
calgary_iommu_init();
intel_iommu_init();
--
1.5.6.4
Impact: allow architectures to monitor busses for dma mem leakage
This patch adds checking code to detect if a device has pending DMA
operations when it is about to be unbound from its device driver.
Signed-off-by: Joerg Roedel <[email protected]>
---
include/linux/dma-debug.h | 7 +++++
lib/dma-debug.c | 55 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 62 insertions(+), 0 deletions(-)
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 46a11c1..e851d23 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -24,9 +24,12 @@
struct device;
struct scatterlist;
+struct bus_type;
#ifdef CONFIG_DMA_API_DEBUG
+extern void dma_debug_add_bus(struct bus_type *bus);
+
extern void dma_debug_init(u32 num_entries);
extern void debug_dma_map_page(struct device *dev, struct page *page,
@@ -80,6 +83,10 @@ extern void debug_dma_dump_mappings(struct device *dev);
#else /* CONFIG_DMA_API_DEBUG */
+void dma_debug_add_bus(struct bus_type *bus)
+{
+}
+
static inline void dma_debug_init(u32 num_entries)
{
}
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index b6a303f..3b20ae9 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -400,6 +400,61 @@ out_err:
return -ENOMEM;
}
+static int device_dma_allocations(struct device *dev)
+{
+ struct dma_debug_entry *entry;
+ unsigned long flags;
+ int count = 0, i;
+
+ for (i = 0; i < HASH_SIZE; ++i) {
+ spin_lock_irqsave(&dma_entry_hash[i].lock, flags);
+ list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
+ if (entry->dev == dev)
+ count += 1;
+ }
+ spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags);
+ }
+
+ return count;
+}
+
+static int dma_debug_device_change(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ int count;
+
+
+ switch (action) {
+ case BUS_NOTIFY_UNBIND_DRIVER:
+ count = device_dma_allocations(dev);
+ if (count == 0)
+ break;
+ err_printk(dev, NULL, "DMA-API: device driver has pending "
+ "DMA allocations while released from device "
+ "[count=%d]\n", count);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void dma_debug_add_bus(struct bus_type *bus)
+{
+ struct notifier_block *nb;
+
+ nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
+ if (nb == NULL) {
+ printk(KERN_ERR "dma_debug_add_bus: out of memory\n");
+ return;
+ }
+
+ nb->notifier_call = dma_debug_device_change;
+
+ bus_register_notifier(bus, nb);
+}
/*
* Let the architectures decide how many entries should be preallocated.
--
1.5.6.4
On Mon, Mar 16, 2009 at 06:05:27PM +0100, Joerg Roedel wrote:
> Hi,
>
> this small series of patches adds three additional checks to the dma-api
> debugging code. The first one checks if dma mappings are requested for
> kernel text or rodata segments.
> The second check added makes noise if a device is unbound from its
> driver and there are still pending dma allocations we are aware of.
>
> Joerg
>
> diffstat:
>
> arch/x86/kernel/pci-dma.c | 4 ++
> include/linux/dma-debug.h | 7 ++++
> lib/dma-debug.c | 81 ++++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 91 insertions(+), 1 deletions(-)
Ingo,
you can also pull these patches from my dma-api/debug branch at
git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu.git dma-api/debug
including the other dma-debug patches relative to your tip/core/iommu
branch.
The complete diffstat and shortlog against tip/core/iommu is here:
Documentation/DMA-API.txt | 106 ++++
Documentation/kernel-parameters.txt | 10 +
arch/Kconfig | 2 +
arch/x86/Kconfig | 1 +
arch/x86/include/asm/dma-mapping.h | 45 ++-
arch/x86/kernel/pci-dma.c | 10 +
include/linux/dma-debug.h | 174 +++++++
lib/Kconfig.debug | 11 +
lib/Makefile | 2 +
lib/dma-debug.c | 949 +++++++++++++++++++++++++++++++++++
10 files changed, 1304 insertions(+), 6 deletions(-)
David Woodhouse (2):
dma-debug: add function to dump dma mappings
dma-debug: print stacktrace of mapping path on unmap error
Joerg Roedel (19):
dma-debug: add Kconfig entry
dma-debug: add header file and core data structures
dma-debug: add hash functions for dma_debug_entries
dma-debug: add allocator code
dma-debug: add initialization code
dma-debug: add kernel command line parameters
dma-debug: add debugfs interface
dma-debug: add core checking functions
dma-debug: add checking for map/unmap_page/single
dma-debug: add add checking for map/unmap_sg
dma-debug: add checking for [alloc|free]_coherent
dma-debug: add checks for sync_single_*
dma-debug: add checks for sync_single_range_*
dma-debug: add checks for sync_single_sg_*
dma-debug: x86 architecture bindings
dma-debug: Documentation update
dma-debug: add checks for kernel text and rodata
dma-debug: add a check dma memory leaks
dma-debug/x86: register pci bus for dma-debug leak detection
Joerg
--
| Advanced Micro Devices GmbH
Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei München
System |
Research | Geschäftsführer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
| Registergericht München, HRB Nr. 43632
* Joerg Roedel <[email protected]> wrote:
> On Mon, Mar 16, 2009 at 06:05:27PM +0100, Joerg Roedel wrote:
> > Hi,
> >
> > this small series of patches adds three additional checks to the dma-api
> > debugging code. The first one checks if dma mappings are requested for
> > kernel text or rodata segments.
> > The second check added makes noise if a device is unbound from its
> > driver and there are still pending dma allocations we are aware of.
> >
> > Joerg
> >
> > diffstat:
> >
> > arch/x86/kernel/pci-dma.c | 4 ++
> > include/linux/dma-debug.h | 7 ++++
> > lib/dma-debug.c | 81 ++++++++++++++++++++++++++++++++++++++++++++-
> > 3 files changed, 91 insertions(+), 1 deletions(-)
>
> Ingo,
>
> you can also pull these patches from my dma-api/debug branch at
>
> git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu.git dma-api/debug
>
> including the other dma-debug patches relative to your tip/core/iommu
> branch.
>
> The complete diffstat and shortlog against tip/core/iommu is here:
>
> Documentation/DMA-API.txt | 106 ++++
> Documentation/kernel-parameters.txt | 10 +
> arch/Kconfig | 2 +
> arch/x86/Kconfig | 1 +
> arch/x86/include/asm/dma-mapping.h | 45 ++-
> arch/x86/kernel/pci-dma.c | 10 +
> include/linux/dma-debug.h | 174 +++++++
> lib/Kconfig.debug | 11 +
> lib/Makefile | 2 +
> lib/dma-debug.c | 949 +++++++++++++++++++++++++++++++++++
> 10 files changed, 1304 insertions(+), 6 deletions(-)
Pulled into tip:core/iommu and started testing it, thanks a lot
Joerg!
btw., on latest Fedora rawhide a testbox of mine is getting this:
e1000e 0000:00:19.0: irq 30 for MSI/MSI-X
e1000e 0000:00:19.0: irq 30 for MSI/MSI-X
ADDRCONF(NETDEV_UP): eth0: link is not ready
e1000e: eth0 NIC Link is Up 100 Mbps Full Duplex, Flow Control: RX/TX
0000:00:19.0: eth0: 10/100 speed: disabling TSO
ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
------------[ cut here ]------------
WARNING: at lib/dma-debug.c:461 check_unmap+0xd4/0x3dd() (Not tainted)
Hardware name: 2241B48
e1000e 0000:00:19.0: DMA-API: device driver tries to free DMA memory
it has not allocated [device address=0x0000000052ff084a] [size=90 bytes]
[<ffffffff811a6fb0>] check_unmap+0xd4/0x3dd
[<ffffffff811a7406>] debug_dma_unmap_page+0x50/0x52
[<ffffffffa00f4523>] pci_unmap_page+0x4e/0x57 [e1000e]
[<ffffffffa00f455a>] e1000_put_txbuf+0x2e/0x4f [e1000e]
[<ffffffffa00f4681>] e1000_clean_tx_irq+0xc8/0x2c2 [e1000e]
[<ffffffffa00f809b>] ? e1000_clean+0x6b/0x246 [e1000e]
[<ffffffffa00f80a7>] e1000_clean+0x77/0x246 [e1000e]
[<ffffffff812f89dd>] net_rx_action+0xb6/0x1ee
[<ffffffff812f8acc>] ? net_rx_action+0x1a5/0x1ee
[<ffffffff81051353>] __do_softirq+0x94/0x179
[<ffffffff810127ac>] call_softirq+0x1c/0x30
[<ffffffff8101393e>] do_softirq+0x52/0xb9
[<ffffffff81050f76>] irq_exit+0x53/0x90
[<ffffffff81013c57>] do_IRQ+0x12c/0x151
[<ffffffff81011e93>] ret_from_intr+0x0/0x2e
<EOI> [<ffffffff811fe088>] ? acpi_idle_enter_bm+0x287/0x2de
[<ffffffff8106fbf9>] ? trace_hardirqs_on+0xd/0xf
[<ffffffff811fe090>] ? acpi_idle_enter_bm+0x28f/0x2de
[<ffffffff811fe088>] ? acpi_idle_enter_bm+0x287/0x2de
[<ffffffff81399916>] ? __atomic_notifier_call_chain+0x0/0x86
[<ffffffff812d62ad>] ? cpuidle_idle_call+0x8d/0xc4
[<ffffffff810102c7>] ? cpu_idle+0x68/0xb3
[<ffffffff81381817>] ? rest_init+0x6b/0x6d
---[ end trace b8ae2341b2e9bbc2 ]---
i have some vague memories of this being discussed somewhere - it
got fixed in the driver, right? If yes, do you have an URL to that
fix? (if it's not upstream yet i will pick it up into
tip:out-of-tree to not have already-fixed warnings)
Ingo
Commit-ID: 84be58d4601c86306cd939ebf58a9b90989883a4
Gitweb: http://git.kernel.org/tip/84be58d4601c86306cd939ebf58a9b90989883a4
Author: Ingo Molnar <[email protected]>
AuthorDate: Wed, 18 Mar 2009 11:50:29 +0100
Commit: Ingo Molnar <[email protected]>
CommitDate: Wed, 18 Mar 2009 11:53:48 +0100
dma-debug: fix dma_debug_add_bus() definition for !CONFIG_DMA_API_DEBUG
Impact: build fix
Fix:
arch/x86/kvm/x86.o: In function `dma_debug_add_bus':
(.text+0x0): multiple definition of `dma_debug_add_bus'
dma_debug_add_bus() should be a static inline function.
Cc: Joerg Roedel <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
include/linux/dma-debug.h | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index e851d23..28d53cb 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -83,7 +83,7 @@ extern void debug_dma_dump_mappings(struct device *dev);
#else /* CONFIG_DMA_API_DEBUG */
-void dma_debug_add_bus(struct bus_type *bus)
+static inline void dma_debug_add_bus(struct bus_type *bus)
{
}
-tip testing found this DMA-debug assert:
[ 19.648023] eth0: no link during initialization.
[ 21.952553] eth0: link up.
[ 22.684073] ------------[ cut here ]------------
[ 22.688023] WARNING: at lib/dma-debug.c:562 check_unmap+0x30e/0x4b4()
[ 22.688023] Hardware name: System Product Name
[ 22.688023]
forcedeth 0000:00:0a.0: DMA-API: device driver frees DMA memory with wrong function [device address=0x0000000035992232] [size=42 bytes] [mapped as single] [unmapped as page]
[ 22.688023] Modules linked in:
[ 22.688023] Pid: 1523, comm: arping Not tainted 2.6.29-rc8-tip #20999
[ 22.688023] Call Trace:
[ 22.688023] [<c0140eae>] warn_slowpath+0x76/0xad
[ 22.688023] [<c0293a0b>] ? add_dma_entry+0x4b/0x51
[ 22.688023] [<c015eec5>] ? mark_lock+0x1c/0x16f
[ 22.688023] [<c015f1a4>] ? __lock_acquire+0x18c/0x296
[ 22.688023] [<c03473cc>] ? nv_start_xmit_optimized+0x3bd/0x3eb
[ 22.688023] [<c015eec5>] ? mark_lock+0x1c/0x16f
[ 22.688023] [<c015f1a4>] ? __lock_acquire+0x18c/0x296
[ 22.688023] [<c0293572>] check_unmap+0x30e/0x4b4
[ 22.688023] [<c055f7c3>] ? _spin_lock+0x27/0x2f
[ 22.688023] [<c0145ba1>] ? _local_bh_enable_ip+0xa1/0xa7
[ 22.688023] [<c049e0f8>] ? dev_queue_xmit+0x325/0x359
[ 22.688023] [<c0188922>] ? trace_hardirqs_on+0x21/0x23
[ 22.688023] [<c0145ba1>] ? _local_bh_enable_ip+0xa1/0xa7
[ 22.688023] [<c0145bc6>] ? local_bh_enable+0x10/0x12
[ 22.688023] [<c050225c>] ? packet_sendmsg+0x1b7/0x20a
[ 22.688023] [<c02939b8>] debug_dma_unmap_page+0x59/0x61
[ 22.688023] [<c034421f>] pci_unmap_page+0x5b/0x66
[ 22.688023] [<c03446fd>] nv_tx_done_optimized+0x46/0x1c5
[ 22.688023] [<c0345922>] nv_nic_irq_optimized+0xa7/0x233
[ 22.688023] [<c017706e>] handle_IRQ_event+0x85/0xdf
[ 22.688023] [<c01785df>] handle_fasteoi_irq+0x79/0xb9
[ 22.688023] [<c0119973>] handle_irq+0x1f/0x24
[ 22.688023] [<c0119332>] do_IRQ+0x45/0x88
[ 22.688023] [<c0118455>] common_interrupt+0x35/0x40
[ 22.688023] [<c0144515>] ? do_setitimer+0x160/0x2d9
[ 22.688023] [<c055f69b>] ? _spin_unlock_irq+0x29/0x2b
[ 22.688023] [<c0144515>] do_setitimer+0x160/0x2d9
[ 22.688023] [<c04926fa>] ? sys_socketcall+0xed/0x188
[ 22.688023] [<c014473b>] alarm_setitimer+0x39/0x58
[ 22.688023] [<c0149776>] sys_alarm+0x10/0x12
[ 22.688023] [<c0117d47>] sysenter_do_call+0x12/0x32
[ 22.688023] ---[ end trace bf7fa25698012db6 ]---
[ 22.688023] Mapped at:
[ 22.688023] [<c0293cc1>] debug_dma_map_page+0x6c/0x128
[ 22.688023] [<c0345bd0>] pci_map_single+0xb5/0xc1
[ 22.688023] [<c0347147>] nv_start_xmit_optimized+0x138/0x3eb
[ 22.688023] [<c049dc7c>] dev_hard_start_xmit+0x123/0x18f
[ 22.688023] [<c04acbb9>] __qdisc_run+0xd0/0x1b3
[ 23.700081] arping used greatest stack depth: 5988 bytes left
[ 24.545512] Adding 3911816k swap on /dev/sda2. Priority:-1 extents:1 across:3911816k
Config and full bootlog attached.
Ingo
another -tip testbox started triggering:
BUG: MAX_LOCKDEP_ENTRIES too low!
it triggers due to CONFIG_DMA_API_DEBUG=y. Config attached.
Ingo
another -tip testbox triggered a DMA-debug warning on e1000e:
[ 33.284944] e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX
[ 34.129676] ------------[ cut here ]------------
[ 34.132025] WARNING: at lib/dma-debug.c:562 check_unmap+0x364/0x800()
[ 34.132025] Hardware name:
[ 34.132025]
e1000e 0000:04:00.0: DMA-API: device driver frees
DMA memory with wrong function [device address=0x000000003e5a2c02]
[size=42 bytes] [mapped as single] [unmapped as page]
[ 34.132025] Modules linked in:
[ 34.132025] Pid: 1937, comm: arping Not tainted 2.6.29-rc8-tip-02728-g7db4920-dirty #2993
[ 34.132025] Call Trace:
[ 34.132025] <IRQ> [<ffffffff80281f3d>] warn_slowpath+0xfd/0x140
[ 34.132025] [<ffffffff80c12976>] ? fn_hash_lookup+0x16/0x100
[ 34.132025] [<ffffffff80bc3e65>] ? ip_route_input_slow+0x635/0x820
[ 34.132025] [<ffffffff80231656>] ? ftrace_call+0x5/0x2b
[ 34.132025] [<ffffffff80231656>] ? ftrace_call+0x5/0x2b
[ 34.132025] [<ffffffff80231656>] ? ftrace_call+0x5/0x2b
[ 34.132025] [<ffffffff8067ef04>] check_unmap+0x364/0x800
[ 34.132025] [<ffffffff80231656>] ? ftrace_call+0x5/0x2b
[ 34.132025] [<ffffffff80231656>] ? ftrace_call+0x5/0x2b
[ 34.132025] [<ffffffff8067f6d8>] debug_dma_unmap_page+0xd8/0xe0
[ 34.132025] [<ffffffff80231656>] ? ftrace_call+0x5/0x2b
[ 34.132025] [<ffffffff807758b5>] e1000_put_txbuf+0xd5/0x170
[ 34.132025] [<ffffffff80775a53>] e1000_clean_tx_irq+0x103/0x350
[ 34.132025] [<ffffffff8077b668>] e1000_clean+0x148/0x1a0
[ 34.132025] [<ffffffff80231656>] ? ftrace_call+0x5/0x2b
[ 34.132025] [<ffffffff80b570ab>] net_rx_action+0x2bb/0x340
[ 34.132025] [<ffffffff8028a433>] __do_softirq+0xf3/0x2b0
[ 34.132025] [<ffffffff802329fa>] call_softirq+0x1a/0x50
[ 34.132025] [<ffffffff80234ca9>] do_softirq+0xd9/0x1c0
[ 34.132025] [<ffffffff8028a333>] irq_exit+0xc3/0xd0
[ 34.132025] [<ffffffff80cf6a65>] do_IRQ+0x75/0x110
[ 34.132025] [<ffffffff80232393>] ret_from_intr+0x0/0x15
[ 34.132025] <EOI> [<ffffffff80231622>] ? ftrace_caller+0x12/0x41
[ 34.132025] [<ffffffff80231656>] ? ftrace_call+0x5/0x2b
[ 34.132025] [<ffffffff80231656>] ? ftrace_call+0x5/0x2b
[ 34.132025] [<ffffffff80337d96>] ? free_page_and_swap_cache+0x16/0x80
[ 34.132025] [<ffffffff8032439f>] ? zap_pte_range+0x41f/0x580
[ 34.132025] [<ffffffff80231656>] ? ftrace_call+0x5/0x2b
[ 34.132025] [<ffffffff80231656>] ? ftrace_call+0x5/0x2b
[ 34.132025] [<ffffffff803248c8>] ? unmap_page_range+0x3c8/0x3d0
[ 34.132025] [<ffffffff80324b9f>] ? unmap_vmas+0x2cf/0x390
[ 34.132025] [<ffffffff8032b2e0>] ? exit_mmap+0x130/0x1f0
[ 34.132025] [<ffffffff8027f628>] ? mmput+0x48/0x110
[ 34.132025] [<ffffffff8028542f>] ? exit_mm+0x14f/0x160
[ 34.132025] [<ffffffff80287a6a>] ? do_exit+0x24a/0x4d0
[ 34.132025] [<ffffffff80287d64>] ? do_group_exit+0x74/0x110
[ 34.132025] [<ffffffff80287e17>] ? sys_exit_group+0x17/0x20
[ 34.132025] [<ffffffff8023196f>] ? system_call_fastpath+0x16/0x1b
[ 34.132025] ---[ end trace dd8d48ce9f465553 ]---
[ 34.132025] Mapped at:
[ 34.132025] [<ffffffff8067fa37>] debug_dma_map_page+0x107/0x2a0
[ 34.132025] [<ffffffff8077711b>] e1000_tx_map+0x29b/0x870
[ 34.132025] [<ffffffff8077aa65>] e1000_xmit_frame+0x355/0x500
[ 34.132025] [<ffffffff80b54018>] dev_hard_start_xmit+0xb8/0x270
[ 34.132025] [<ffffffff80b775b0>] __qdisc_run+0x380/0x3b0
[ 35.650726] ------------[ cut here ]------------
Ingo
On Wed, 2009-03-18 at 12:23 +0100, Ingo Molnar wrote:
> another -tip testbox started triggering:
>
> BUG: MAX_LOCKDEP_ENTRIES too low!
>
> it triggers due to CONFIG_DMA_API_DEBUG=y. Config attached.
I still have this laying about.. could be we're just at the limit due to
lock bloat in the kernel, could be dma_api_debug is doing something
all-together iffy
Signed-off-by: Peter Zijlstra <[email protected]>
---
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 71b567f..3e1cc47 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -793,6 +793,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return NULL;
}
class = lock_classes + nr_lock_classes++;
@@ -856,6 +857,7 @@ static struct lock_list *alloc_list_entry(void)
printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return NULL;
}
return list_entries + nr_list_entries++;
@@ -1682,6 +1684,7 @@ cache_hit:
printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return 0;
}
chain = lock_chains + nr_lock_chains++;
@@ -2524,6 +2527,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
debug_locks_off();
printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return 0;
}
@@ -2620,6 +2624,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
debug_locks_off();
printk("BUG: MAX_LOCK_DEPTH too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return 0;
}
Copy&paste bug :(
Thanks for the fix.
On Wed, Mar 18, 2009 at 10:00:00AM +0000, Ingo Molnar wrote:
> Commit-ID: 84be58d4601c86306cd939ebf58a9b90989883a4
> Gitweb: http://git.kernel.org/tip/84be58d4601c86306cd939ebf58a9b90989883a4
> Author: Ingo Molnar <[email protected]>
> AuthorDate: Wed, 18 Mar 2009 11:50:29 +0100
> Commit: Ingo Molnar <[email protected]>
> CommitDate: Wed, 18 Mar 2009 11:53:48 +0100
>
> dma-debug: fix dma_debug_add_bus() definition for !CONFIG_DMA_API_DEBUG
>
> Impact: build fix
>
> Fix:
>
> arch/x86/kvm/x86.o: In function `dma_debug_add_bus':
> (.text+0x0): multiple definition of `dma_debug_add_bus'
>
> dma_debug_add_bus() should be a static inline function.
>
> Cc: Joerg Roedel <[email protected]>
> LKML-Reference: <[email protected]>
> Signed-off-by: Ingo Molnar <[email protected]>
>
>
> ---
> include/linux/dma-debug.h | 2 +-
> 1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
> index e851d23..28d53cb 100644
> --- a/include/linux/dma-debug.h
> +++ b/include/linux/dma-debug.h
> @@ -83,7 +83,7 @@ extern void debug_dma_dump_mappings(struct device *dev);
>
> #else /* CONFIG_DMA_API_DEBUG */
>
> -void dma_debug_add_bus(struct bus_type *bus)
> +static inline void dma_debug_add_bus(struct bus_type *bus)
> {
> }
>
>
--
| Advanced Micro Devices GmbH
Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei München
System |
Research | Geschäftsführer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
| Registergericht München, HRB Nr. 43632
On Wed, Mar 18, 2009 at 10:38:47AM +0100, Ingo Molnar wrote:
> e1000e 0000:00:19.0: irq 30 for MSI/MSI-X
> e1000e 0000:00:19.0: irq 30 for MSI/MSI-X
> ADDRCONF(NETDEV_UP): eth0: link is not ready
> e1000e: eth0 NIC Link is Up 100 Mbps Full Duplex, Flow Control: RX/TX
> 0000:00:19.0: eth0: 10/100 speed: disabling TSO
> ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
> ------------[ cut here ]------------
> WARNING: at lib/dma-debug.c:461 check_unmap+0xd4/0x3dd() (Not tainted)
> Hardware name: 2241B48
> e1000e 0000:00:19.0: DMA-API: device driver tries to free DMA memory
> it has not allocated [device address=0x0000000052ff084a] [size=90 bytes]
> [<ffffffff811a6fb0>] check_unmap+0xd4/0x3dd
> [<ffffffff811a7406>] debug_dma_unmap_page+0x50/0x52
> [<ffffffffa00f4523>] pci_unmap_page+0x4e/0x57 [e1000e]
> [<ffffffffa00f455a>] e1000_put_txbuf+0x2e/0x4f [e1000e]
> [<ffffffffa00f4681>] e1000_clean_tx_irq+0xc8/0x2c2 [e1000e]
> [<ffffffffa00f809b>] ? e1000_clean+0x6b/0x246 [e1000e]
> [<ffffffffa00f80a7>] e1000_clean+0x77/0x246 [e1000e]
> [<ffffffff812f89dd>] net_rx_action+0xb6/0x1ee
> [<ffffffff812f8acc>] ? net_rx_action+0x1a5/0x1ee
> [<ffffffff81051353>] __do_softirq+0x94/0x179
> [<ffffffff810127ac>] call_softirq+0x1c/0x30
> [<ffffffff8101393e>] do_softirq+0x52/0xb9
> [<ffffffff81050f76>] irq_exit+0x53/0x90
> [<ffffffff81013c57>] do_IRQ+0x12c/0x151
> [<ffffffff81011e93>] ret_from_intr+0x0/0x2e
> <EOI> [<ffffffff811fe088>] ? acpi_idle_enter_bm+0x287/0x2de
> [<ffffffff8106fbf9>] ? trace_hardirqs_on+0xd/0xf
> [<ffffffff811fe090>] ? acpi_idle_enter_bm+0x28f/0x2de
> [<ffffffff811fe088>] ? acpi_idle_enter_bm+0x287/0x2de
> [<ffffffff81399916>] ? __atomic_notifier_call_chain+0x0/0x86
> [<ffffffff812d62ad>] ? cpuidle_idle_call+0x8d/0xc4
> [<ffffffff810102c7>] ? cpu_idle+0x68/0xb3
> [<ffffffff81381817>] ? rest_init+0x6b/0x6d
> ---[ end trace b8ae2341b2e9bbc2 ]---
>
> i have some vague memories of this being discussed somewhere - it
> got fixed in the driver, right? If yes, do you have an URL to that
> fix? (if it's not upstream yet i will pick it up into
> tip:out-of-tree to not have already-fixed warnings)
We had this issue in the ixgbe driver and it was fixed there. As far as
I know for e1000e this is a new issue.
Joerg
--
| Advanced Micro Devices GmbH
Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei München
System |
Research | Geschäftsführer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
| Registergericht München, HRB Nr. 43632
Commit-ID: 465499895e41ee15ebc75fe90dfe7b595e9b46d5
Gitweb: http://git.kernel.org/tip/465499895e41ee15ebc75fe90dfe7b595e9b46d5
Author: Peter Zijlstra <[email protected]>
AuthorDate: Wed, 18 Mar 2009 12:38:47 +0100
Commit: Ingo Molnar <[email protected]>
CommitDate: Wed, 18 Mar 2009 12:55:10 +0100
lockdep: add stack dumps to asserts
Have a better idea about exactly which loc causes a lockdep
limit overflow. Often it's a bug or inefficiency in that
subsystem.
Signed-off-by: Peter Zijlstra <[email protected]>
LKML-Reference: <1237376327.5069.253.camel@laptop>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/lockdep.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 9a1e2bc..77ac37f 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -792,6 +792,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return NULL;
}
class = lock_classes + nr_lock_classes++;
@@ -855,6 +856,7 @@ static struct lock_list *alloc_list_entry(void)
printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return NULL;
}
return list_entries + nr_list_entries++;
@@ -1681,6 +1683,7 @@ cache_hit:
printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return 0;
}
chain = lock_chains + nr_lock_chains++;
@@ -2523,6 +2526,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
debug_locks_off();
printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return 0;
}
@@ -2619,6 +2623,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
debug_locks_off();
printk("BUG: MAX_LOCK_DEPTH too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return 0;
}
( e1000e Cc:s added. A new debug feature, CONFIG_DMA_API_DEBUG=y,
has triggered the warning below in e1000_put_txbuf(). )
* Joerg Roedel <[email protected]> wrote:
> On Wed, Mar 18, 2009 at 10:38:47AM +0100, Ingo Molnar wrote:
> > e1000e 0000:00:19.0: irq 30 for MSI/MSI-X
> > e1000e 0000:00:19.0: irq 30 for MSI/MSI-X
> > ADDRCONF(NETDEV_UP): eth0: link is not ready
> > e1000e: eth0 NIC Link is Up 100 Mbps Full Duplex, Flow Control: RX/TX
> > 0000:00:19.0: eth0: 10/100 speed: disabling TSO
> > ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
> > ------------[ cut here ]------------
> > WARNING: at lib/dma-debug.c:461 check_unmap+0xd4/0x3dd() (Not tainted)
> > Hardware name: 2241B48
> > e1000e 0000:00:19.0: DMA-API: device driver tries to free DMA memory
> > it has not allocated [device address=0x0000000052ff084a] [size=90 bytes]
> > [<ffffffff811a6fb0>] check_unmap+0xd4/0x3dd
> > [<ffffffff811a7406>] debug_dma_unmap_page+0x50/0x52
> > [<ffffffffa00f4523>] pci_unmap_page+0x4e/0x57 [e1000e]
> > [<ffffffffa00f455a>] e1000_put_txbuf+0x2e/0x4f [e1000e]
> > [<ffffffffa00f4681>] e1000_clean_tx_irq+0xc8/0x2c2 [e1000e]
> > [<ffffffffa00f809b>] ? e1000_clean+0x6b/0x246 [e1000e]
> > [<ffffffffa00f80a7>] e1000_clean+0x77/0x246 [e1000e]
> > [<ffffffff812f89dd>] net_rx_action+0xb6/0x1ee
> > [<ffffffff812f8acc>] ? net_rx_action+0x1a5/0x1ee
> > [<ffffffff81051353>] __do_softirq+0x94/0x179
> > [<ffffffff810127ac>] call_softirq+0x1c/0x30
> > [<ffffffff8101393e>] do_softirq+0x52/0xb9
> > [<ffffffff81050f76>] irq_exit+0x53/0x90
> > [<ffffffff81013c57>] do_IRQ+0x12c/0x151
> > [<ffffffff81011e93>] ret_from_intr+0x0/0x2e
> > <EOI> [<ffffffff811fe088>] ? acpi_idle_enter_bm+0x287/0x2de
> > [<ffffffff8106fbf9>] ? trace_hardirqs_on+0xd/0xf
> > [<ffffffff811fe090>] ? acpi_idle_enter_bm+0x28f/0x2de
> > [<ffffffff811fe088>] ? acpi_idle_enter_bm+0x287/0x2de
> > [<ffffffff81399916>] ? __atomic_notifier_call_chain+0x0/0x86
> > [<ffffffff812d62ad>] ? cpuidle_idle_call+0x8d/0xc4
> > [<ffffffff810102c7>] ? cpu_idle+0x68/0xb3
> > [<ffffffff81381817>] ? rest_init+0x6b/0x6d
> > ---[ end trace b8ae2341b2e9bbc2 ]---
> >
> > i have some vague memories of this being discussed somewhere - it
> > got fixed in the driver, right? If yes, do you have an URL to that
> > fix? (if it's not upstream yet i will pick it up into
> > tip:out-of-tree to not have already-fixed warnings)
>
> We had this issue in the ixgbe driver and it was fixed there. As far as
> I know for e1000e this is a new issue.
>
> Joerg
>
> --
> | Advanced Micro Devices GmbH
> Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei M?nchen
> System |
> Research | Gesch?ftsf?hrer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
> Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis M?nchen
> | Registergericht M?nchen, HRB Nr. 43632
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
On Wed, Mar 18, 2009 at 12:38:47PM +0100, Peter Zijlstra wrote:
> On Wed, 2009-03-18 at 12:23 +0100, Ingo Molnar wrote:
> > another -tip testbox started triggering:
> >
> > BUG: MAX_LOCKDEP_ENTRIES too low!
> >
> > it triggers due to CONFIG_DMA_API_DEBUG=y. Config attached.
>
>
> I still have this laying about.. could be we're just at the limit due to
> lock bloat in the kernel, could be dma_api_debug is doing something
> all-together iffy
I had a look and the maximum locking depth in dma-debug code was two.
Attached patch reduces this to one.
>From d28fc4a308bf66ed98c68e1db18e4e1434206541 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <[email protected]>
Date: Wed, 18 Mar 2009 13:15:20 +0100
Subject: [PATCH] dma-debug: serialize locking in unmap path
Impact: reduce maximum lockdepth to one
This patch reduces the maximum spin lock depth from two to one in the
dma-debug code.
Signed-off-by: Joerg Roedel <[email protected]>
---
lib/dma-debug.c | 7 +++----
1 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 9a350b4..3bd8d1d 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -542,7 +542,8 @@ static void check_unmap(struct dma_debug_entry *ref)
"to free DMA memory it has not allocated "
"[device address=0x%016llx] [size=%llu bytes]\n",
ref->dev_addr, ref->size);
- goto out;
+ put_hash_bucket(bucket, &flags);
+ return;
}
if (ref->size != entry->size) {
@@ -593,10 +594,8 @@ static void check_unmap(struct dma_debug_entry *ref)
}
hash_bucket_del(entry);
- dma_entry_free(entry);
-
-out:
put_hash_bucket(bucket, &flags);
+ dma_entry_free(entry);
}
static void check_for_stack(struct device *dev, void *addr)
--
1.5.6.4
--
| Advanced Micro Devices GmbH
Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei München
System |
Research | Geschäftsführer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
| Registergericht München, HRB Nr. 43632
On Wed, 2009-03-18 at 13:19 +0100, Joerg Roedel wrote:
> On Wed, Mar 18, 2009 at 12:38:47PM +0100, Peter Zijlstra wrote:
> > On Wed, 2009-03-18 at 12:23 +0100, Ingo Molnar wrote:
> > > another -tip testbox started triggering:
> > >
> > > BUG: MAX_LOCKDEP_ENTRIES too low!
> > >
> > > it triggers due to CONFIG_DMA_API_DEBUG=y. Config attached.
> >
> >
> > I still have this laying about.. could be we're just at the limit due to
> > lock bloat in the kernel, could be dma_api_debug is doing something
> > all-together iffy
>
> I had a look and the maximum locking depth in dma-debug code was two.
> Attached patch reduces this to one.
>
> From d28fc4a308bf66ed98c68e1db18e4e1434206541 Mon Sep 17 00:00:00 2001
> From: Joerg Roedel <[email protected]>
> Date: Wed, 18 Mar 2009 13:15:20 +0100
> Subject: [PATCH] dma-debug: serialize locking in unmap path
>
> Impact: reduce maximum lockdepth to one
>
> This patch reduces the maximum spin lock depth from two to one in the
> dma-debug code.
While appreciated, this failure is not about lock depth, but about lock
entries, that is items in the dependency chains.
Of course, these two are not unrelated, deeper lock hierarchies lead to
longer chains -> more entries.
Assuming dma api debug doesn't do anything spectaculary odd, I'd say
we've just lock bloated the kernel and might need to increase this
static array a bit.
* Peter Zijlstra <[email protected]> wrote:
> On Wed, 2009-03-18 at 13:19 +0100, Joerg Roedel wrote:
> > On Wed, Mar 18, 2009 at 12:38:47PM +0100, Peter Zijlstra wrote:
> > > On Wed, 2009-03-18 at 12:23 +0100, Ingo Molnar wrote:
> > > > another -tip testbox started triggering:
> > > >
> > > > BUG: MAX_LOCKDEP_ENTRIES too low!
> > > >
> > > > it triggers due to CONFIG_DMA_API_DEBUG=y. Config attached.
> > >
> > >
> > > I still have this laying about.. could be we're just at the limit due to
> > > lock bloat in the kernel, could be dma_api_debug is doing something
> > > all-together iffy
> >
> > I had a look and the maximum locking depth in dma-debug code was two.
> > Attached patch reduces this to one.
> >
> > From d28fc4a308bf66ed98c68e1db18e4e1434206541 Mon Sep 17 00:00:00 2001
> > From: Joerg Roedel <[email protected]>
> > Date: Wed, 18 Mar 2009 13:15:20 +0100
> > Subject: [PATCH] dma-debug: serialize locking in unmap path
> >
> > Impact: reduce maximum lockdepth to one
> >
> > This patch reduces the maximum spin lock depth from two to one in the
> > dma-debug code.
>
> While appreciated, this failure is not about lock depth, but about
> lock entries, that is items in the dependency chains.
>
> Of course, these two are not unrelated, deeper lock hierarchies
> lead to longer chains -> more entries.
>
> Assuming dma api debug doesn't do anything spectaculary odd, I'd
> say we've just lock bloated the kernel and might need to increase
> this static array a bit.
appears to be the case:
BUG: MAX_LOCKDEP_ENTRIES too low!
turning off the locking correctness validator.
Pid: 7508, comm: sshd Not tainted 2.6.29-rc8-tip-02759-g4bb5a10-dirty #21037
Call Trace:
[<ffffffff802679aa>] add_lock_to_list+0x53/0xba
[<ffffffff8065b0e9>] ? add_dma_entry+0x2f/0x5d
[<ffffffff80269398>] check_prev_add+0x14b/0x1c7
[<ffffffff8026985d>] validate_chain+0x449/0x4f7
[<ffffffff80269b96>] __lock_acquire+0x28b/0x302
[<ffffffff80269d07>] lock_acquire+0xfa/0x11e
[<ffffffff8065b0e9>] ? add_dma_entry+0x2f/0x5d
[<ffffffff80c9cf77>] _spin_lock_irqsave+0x4c/0x84
[<ffffffff8065b0e9>] ? add_dma_entry+0x2f/0x5d
[<ffffffff8065b0e9>] add_dma_entry+0x2f/0x5d
[<ffffffff8065bbd6>] debug_dma_map_page+0x110/0x11f
[<ffffffff807f2775>] pci_map_single+0xb5/0xc7
[<ffffffff807f36d7>] nv_start_xmit_optimized+0x174/0x49c
[<ffffffff80269fbd>] ? __lock_acquired+0x182/0x1a7
[<ffffffff80af7d20>] dev_hard_start_xmit+0xd4/0x147
[<ffffffff80b11c08>] __qdisc_run+0xf4/0x200
[<ffffffff80af80b0>] dev_queue_xmit+0x21f/0x32a
[<ffffffff80b3051f>] ip_finish_output2+0x205/0x24e
[<ffffffff80b305c9>] ip_finish_output+0x61/0x63
[<ffffffff80b3066d>] ip_output+0xa2/0xab
[<ffffffff80b2dfaf>] ip_local_out+0x65/0x67
[<ffffffff80b30122>] ip_queue_xmit+0x2f0/0x37b
[<ffffffff80267542>] ? register_lock_class+0x20/0x304
[<ffffffff80b40320>] tcp_transmit_skb+0x655/0x694
[<ffffffff80b42924>] tcp_write_xmit+0x2e2/0x3b6
[<ffffffff80b42a48>] __tcp_push_pending_frames+0x2f/0x61
[<ffffffff80b35320>] tcp_push+0x86/0x88
[<ffffffff80b377a5>] tcp_sendmsg+0x7a4/0x8aa
[<ffffffff80ae9206>] __sock_sendmsg+0x5e/0x67
[<ffffffff80ae92fc>] sock_aio_write+0xed/0xfd
[<ffffffff802d89e6>] do_sync_write+0xec/0x132
[<ffffffff8025d09b>] ? autoremove_wake_function+0x0/0x3d
[<ffffffff8026a688>] ? __lock_release+0xba/0xd3
[<ffffffff80241369>] ? get_parent_ip+0x16/0x46
[<ffffffff80242fef>] ? sub_preempt_count+0x67/0x7a
[<ffffffff80604287>] ? security_file_permission+0x16/0x18
[<ffffffff802d9275>] vfs_write+0xbf/0xe6
[<ffffffff802d936a>] sys_write+0x4c/0x74
[<ffffffff8020bd6b>] system_call_fastpath+0x16/0x1b
[ OK ]
so we need to bump up the limits some more.
Ingo
On Wed, March 18, 2009 8:04 am, Ingo Molnar wrote:
>
> ( e1000e Cc:s added. A new debug feature, CONFIG_DMA_API_DEBUG=y,
> has triggered the warning below in e1000_put_txbuf(). )
http://lkml.org/lkml/2009/3/2/318
--
Lubomir Rintel
* Lubomir Rintel <[email protected]> wrote:
> On Wed, March 18, 2009 8:04 am, Ingo Molnar wrote:
> >
> > ( e1000e Cc:s added. A new debug feature, CONFIG_DMA_API_DEBUG=y,
> > has triggered the warning below in e1000_put_txbuf(). )
>
> http://lkml.org/lkml/2009/3/2/318
Thanks.
Ingo
There's a similar-looking warning triggering for the forcedeth
driver as well on another test-system (see the details below).
Does anyone know whether there's a fix for this too, or is it a
new warning?
Ingo
----- Forwarded message from Ingo Molnar <[email protected]> -----
Date: Wed, 18 Mar 2009 12:20:26 +0100
From: Ingo Molnar <[email protected]>
To: Joerg Roedel <[email protected]>
Subject: forcedeth 0000:00:0a.0: DMA-API: device driver frees DMA memory
with wrong function [device address=0x0000000035992232] [size=42
bytes] [mapped as single] [unmapped as page]
Cc: Ingo Molnar <[email protected]>, [email protected],
[email protected]
-tip testing found this DMA-debug assert:
[ 19.648023] eth0: no link during initialization.
[ 21.952553] eth0: link up.
[ 22.684073] ------------[ cut here ]------------
[ 22.688023] WARNING: at lib/dma-debug.c:562 check_unmap+0x30e/0x4b4()
[ 22.688023] Hardware name: System Product Name
[ 22.688023]
forcedeth 0000:00:0a.0: DMA-API: device driver frees DMA memory with wrong function [device address=0x0000000035992232] [size=42 bytes] [mapped as single] [unmapped as page]
[ 22.688023] Modules linked in:
[ 22.688023] Pid: 1523, comm: arping Not tainted 2.6.29-rc8-tip #20999
[ 22.688023] Call Trace:
[ 22.688023] [<c0140eae>] warn_slowpath+0x76/0xad
[ 22.688023] [<c0293a0b>] ? add_dma_entry+0x4b/0x51
[ 22.688023] [<c015eec5>] ? mark_lock+0x1c/0x16f
[ 22.688023] [<c015f1a4>] ? __lock_acquire+0x18c/0x296
[ 22.688023] [<c03473cc>] ? nv_start_xmit_optimized+0x3bd/0x3eb
[ 22.688023] [<c015eec5>] ? mark_lock+0x1c/0x16f
[ 22.688023] [<c015f1a4>] ? __lock_acquire+0x18c/0x296
[ 22.688023] [<c0293572>] check_unmap+0x30e/0x4b4
[ 22.688023] [<c055f7c3>] ? _spin_lock+0x27/0x2f
[ 22.688023] [<c0145ba1>] ? _local_bh_enable_ip+0xa1/0xa7
[ 22.688023] [<c049e0f8>] ? dev_queue_xmit+0x325/0x359
[ 22.688023] [<c0188922>] ? trace_hardirqs_on+0x21/0x23
[ 22.688023] [<c0145ba1>] ? _local_bh_enable_ip+0xa1/0xa7
[ 22.688023] [<c0145bc6>] ? local_bh_enable+0x10/0x12
[ 22.688023] [<c050225c>] ? packet_sendmsg+0x1b7/0x20a
[ 22.688023] [<c02939b8>] debug_dma_unmap_page+0x59/0x61
[ 22.688023] [<c034421f>] pci_unmap_page+0x5b/0x66
[ 22.688023] [<c03446fd>] nv_tx_done_optimized+0x46/0x1c5
[ 22.688023] [<c0345922>] nv_nic_irq_optimized+0xa7/0x233
[ 22.688023] [<c017706e>] handle_IRQ_event+0x85/0xdf
[ 22.688023] [<c01785df>] handle_fasteoi_irq+0x79/0xb9
[ 22.688023] [<c0119973>] handle_irq+0x1f/0x24
[ 22.688023] [<c0119332>] do_IRQ+0x45/0x88
[ 22.688023] [<c0118455>] common_interrupt+0x35/0x40
[ 22.688023] [<c0144515>] ? do_setitimer+0x160/0x2d9
[ 22.688023] [<c055f69b>] ? _spin_unlock_irq+0x29/0x2b
[ 22.688023] [<c0144515>] do_setitimer+0x160/0x2d9
[ 22.688023] [<c04926fa>] ? sys_socketcall+0xed/0x188
[ 22.688023] [<c014473b>] alarm_setitimer+0x39/0x58
[ 22.688023] [<c0149776>] sys_alarm+0x10/0x12
[ 22.688023] [<c0117d47>] sysenter_do_call+0x12/0x32
[ 22.688023] ---[ end trace bf7fa25698012db6 ]---
[ 22.688023] Mapped at:
[ 22.688023] [<c0293cc1>] debug_dma_map_page+0x6c/0x128
[ 22.688023] [<c0345bd0>] pci_map_single+0xb5/0xc1
[ 22.688023] [<c0347147>] nv_start_xmit_optimized+0x138/0x3eb
[ 22.688023] [<c049dc7c>] dev_hard_start_xmit+0x123/0x18f
[ 22.688023] [<c04acbb9>] __qdisc_run+0xd0/0x1b3
[ 23.700081] arping used greatest stack depth: 5988 bytes left
[ 24.545512] Adding 3911816k swap on /dev/sda2. Priority:-1 extents:1 across:3911816k
Config and full bootlog attached.
Ingo
Ingo Molnar wrote:
> * Lubomir Rintel <[email protected]> wrote:
>
>> On Wed, March 18, 2009 8:04 am, Ingo Molnar wrote:
>>>
>>> ( e1000e Cc:s added. A new debug feature, CONFIG_DMA_API_DEBUG=y,
>>> has triggered the warning below in e1000_put_txbuf(). )
>>
>> http://lkml.org/lkml/2009/3/2/318
We are still looking for more testing from the general community of
this patch, in particular if any tx hangs are reported. Thanks in
advance for any reports.-
* Brandeburg, Jesse <[email protected]> wrote:
> Ingo Molnar wrote:
> > * Lubomir Rintel <[email protected]> wrote:
> >
> >> On Wed, March 18, 2009 8:04 am, Ingo Molnar wrote:
> >>>
> >>> ( e1000e Cc:s added. A new debug feature, CONFIG_DMA_API_DEBUG=y,
> >>> has triggered the warning below in e1000_put_txbuf(). )
> >>
> >> http://lkml.org/lkml/2009/3/2/318
>
> We are still looking for more testing from the general community of
> this patch, in particular if any tx hangs are reported. Thanks in
> advance for any reports.--
Yeah - already picked it up into tip:out-of-tree and it passed about
a hundred bootup tests on various e1000e using systems already -
with no problems whatsoever. Will let you know if anything breaks -
e1000e has been rock solid for me in the .29 cycle.
Ingo
On Wed, 18 Mar 2009, Ingo Molnar wrote:
> > >> http://lkml.org/lkml/2009/3/2/318
> >
> > We are still looking for more testing from the general community of
> > this patch, in particular if any tx hangs are reported. Thanks in
> > advance for any reports.--
>
> Yeah - already picked it up into tip:out-of-tree and it passed about
> a hundred bootup tests on various e1000e using systems already -
> with no problems whatsoever. Will let you know if anything breaks -
> e1000e has been rock solid for me in the .29 cycle.
thanks! did you pull in both e1000 and e1000e fixes? We also have a
similar fix in internal testing for igb.
* Brandeburg, Jesse <[email protected]> wrote:
> On Wed, 18 Mar 2009, Ingo Molnar wrote:
> > > >> http://lkml.org/lkml/2009/3/2/318
> > >
> > > We are still looking for more testing from the general community of
> > > this patch, in particular if any tx hangs are reported. Thanks in
> > > advance for any reports.--
> >
> > Yeah - already picked it up into tip:out-of-tree and it passed about
> > a hundred bootup tests on various e1000e using systems already -
> > with no problems whatsoever. Will let you know if anything breaks -
> > e1000e has been rock solid for me in the .29 cycle.
>
> thanks! did you pull in both e1000 and e1000e fixes? We also have a
> similar fix in internal testing for igb.
yeah, i picked up both - albeit i've switched over all boxes to
e1000e so e1000 will get little testing from me. Do you have an sha1
for the igb fix by any chance?
Ingo
FYI, triggered a new type of DMA debug warning today, this time on
an older box, in the boomerang/vortex 3c59x driver:
[ 211.098328] eth0: setting full-duplex.
[ 1745.974321] ------------[ cut here ]------------
[ 1745.980344] WARNING: at lib/dma-debug.c:627 check_for_illegal_area+0xa6/0xe4()
[ 1745.983613] 3c59x 0000:00:0b.0: DMA-API: device driver maps memory from kernel text or rodata [addr=c0fffe54] [size=428]
[ 1745.986521] Modules linked in:
[ 1745.986521] Pid: 19063, comm: distcc Not tainted 2.6.29-rc8-tip-02734-gfd4c260-dirty #729
[ 1745.993830] Call Trace:
[ 1745.996309] [<c102e98b>] warn_slowpath+0x5e/0x74
[ 1746.001069] [<c124de1e>] ? memcpy+0xe/0x31
[ 1746.005322] [<c14eb801>] ? skb_put+0x62/0x7b
[ 1746.009732] [<c15e0cf5>] ? _spin_unlock_irqrestore+0x19/0x25
[ 1746.011540] [<c1006274>] ? enable_8259A_irq+0x45/0x48
[ 1746.016729] [<c105ee38>] ? handle_level_irq+0xe3/0xef
[ 1746.021921] [<c1004efb>] ? handle_irq+0x36/0x43
[ 1746.022582] [<c1002c35>] ? restore_nocheck_notrace+0x0/0xe
[ 1746.028198] [<c124e0e0>] ? trace_hardirqs_on_thunk+0xc/0x10
[ 1746.033900] [<c1002c35>] ? restore_nocheck_notrace+0x0/0xe
[ 1746.035517] [<c125724f>] check_for_illegal_area+0xa6/0xe4
[ 1746.041046] [<c1258382>] debug_dma_map_page+0x12a/0x13b
[ 1746.043356] [<c1316366>] boomerang_start_xmit+0x40b/0x54c
[ 1746.048900] [<c109c226>] ? kmem_cache_free+0xf6/0xfe
[ 1746.054013] [<c14f34ca>] dev_hard_start_xmit+0x101/0x171
[ 1746.055471] [<c15081bc>] __qdisc_run+0xf4/0x21f
[ 1746.060143] [<c14f5e02>] dev_queue_xmit+0x1e2/0x2dc
[ 1746.065162] [<c1524fc8>] ip_finish_output+0x213/0x24d
[ 1746.066360] [<c109a489>] ? check_object+0xfa/0x156
[ 1746.071291] [<c15252f8>] ip_output+0x4e/0x51
[ 1746.075693] [<c1523a51>] ip_local_out+0x39/0x42
[ 1746.080366] [<c1525727>] ip_queue_xmit+0x2ce/0x314
[ 1746.085295] [<c109d146>] ? __kmalloc_node_track_caller+0x165/0x172
[ 1746.087612] [<c14ec135>] ? __alloc_skb+0x38/0x115
[ 1746.092458] [<c15391de>] ? tcp_v4_md5_do_lookup+0xa/0x36
[ 1746.097916] [<c15350ee>] ? __tcp_select_window+0xe/0x12f
[ 1746.099368] [<c1535b2d>] ? tcp_options_write+0x138/0x1b7
[ 1746.104820] [<c1536257>] tcp_transmit_skb+0x452/0x492
[ 1746.110009] [<c1537824>] tcp_write_xmit+0x21d/0x2c4
[ 1746.111029] [<c153791d>] __tcp_push_pending_frames+0x52/0x12e
[ 1746.116912] [<c152b3a4>] do_tcp_sendpages+0x4a9/0x4d6
[ 1746.122112] [<c152c036>] tcp_sendpage+0x54/0x69
[ 1746.122783] [<c14e51f7>] sock_sendpage+0x34/0x3b
[ 1746.127547] [<c10bc5bf>] pipe_to_sendpage+0x5b/0x66
[ 1746.132576] [<c10bd2a2>] __splice_from_pipe+0x65/0x1db
[ 1746.137853] [<c10bc564>] ? pipe_to_sendpage+0x0/0x66
[ 1746.138956] [<c10bd864>] splice_from_pipe+0x56/0x6d
[ 1746.143975] [<c10bd893>] generic_splice_sendpage+0x18/0x1a
[ 1746.149599] [<c10bc564>] ? pipe_to_sendpage+0x0/0x66
[ 1746.150703] [<c10bca34>] do_splice_from+0xc0/0xd1
[ 1746.155547] [<c10bcbb1>] direct_splice_actor+0x1c/0x21
[ 1746.160835] [<c10bcfd5>] splice_direct_to_actor+0x124/0x1f1
[ 1746.162841] [<c10bcb95>] ? direct_splice_actor+0x0/0x21
[ 1746.168203] [<c10bd0dc>] do_splice_direct+0x3a/0x55
[ 1746.173220] [<c10a3110>] do_sendfile+0x1b0/0x23e
[ 1746.177980] [<c10a38af>] sys_sendfile+0x5b/0xac
[ 1746.178649] [<c1002b74>] sysenter_do_call+0x12/0x2b
[ 1746.183668] ---[ end trace e9b0fe468818fdae ]---
Ingo
Commit-ID: eedeeabdeeadb016b8c783e3620d06b98d0cb4e1
Gitweb: http://git.kernel.org/tip/eedeeabdeeadb016b8c783e3620d06b98d0cb4e1
Author: Peter Zijlstra <[email protected]>
AuthorDate: Wed, 18 Mar 2009 12:38:47 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 31 Mar 2009 14:53:01 +0200
lockdep: add stack dumps to asserts
Have a better idea about exactly which loc causes a lockdep
limit overflow. Often it's a bug or inefficiency in that
subsystem.
Signed-off-by: Peter Zijlstra <[email protected]>
LKML-Reference: <1237376327.5069.253.camel@laptop>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/lockdep.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 981cd48..a288ae1 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -792,6 +792,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return NULL;
}
class = lock_classes + nr_lock_classes++;
@@ -855,6 +856,7 @@ static struct lock_list *alloc_list_entry(void)
printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return NULL;
}
return list_entries + nr_list_entries++;
@@ -1681,6 +1683,7 @@ cache_hit:
printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return 0;
}
chain = lock_chains + nr_lock_chains++;
@@ -2540,6 +2543,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
debug_locks_off();
printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return 0;
}
@@ -2636,6 +2640,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
debug_locks_off();
printk("BUG: MAX_LOCK_DEPTH too low!\n");
printk("turning off the locking correctness validator.\n");
+ dump_stack();
return 0;
}