2002-08-29 07:31:14

by Jordan Breeding

[permalink] [raw]
Subject: Problems with 2.5.23-mm1

ksymoops 2.4.5 on i686 2.5.32. Options used
-V (specified)
-k /proc/ksyms (default)
-l /proc/modules (default)
-o /lib/modules/2.5.32 (specified)
-m /boot/System.map-2.5.32-mm1 (specified)

Aug 29 01:54:04 ledzep kernel: f1e07df8 00000000 f1e07e28 c0141452 c18ce500 ebd92000 c18ce524 c04fa680
Aug 29 01:54:04 ledzep kernel: 00000002 f7f5c7bc c16cf678 eb95c000 f1e07e48 c0142997 c18ce500 f7f5c83c
Aug 29 01:54:04 ledzep kernel: 0000001e c16cf678 eb95c000 006cf660 f1e07e68 c01415cc c18ce500 eb95c000
Aug 29 01:54:04 ledzep kernel: Call Trace: [<c0141452>] [<c0142997>] [<c01415cc>] [<c035caf7>] [<c035b561>]
Aug 29 01:54:04 ledzep kernel: [<c035cc22>] [<c03b06ad>] [<c035893f>] [<c0358a70>] [<c0151fb9>] [<c012b5c0>]
Aug 29 01:54:04 ledzep kernel: [<c015213c>] [<c0109e6b>]
Aug 29 01:58:26 ledzep kernel: f4ca5da4 f4ca4000 f4ca5dd8 c0142669 c18ce940 f6c8965c c18ce964 00003246
Aug 29 01:58:26 ledzep kernel: 00000000 f4ca5dec f6d5d1b4 f6d5d1b4 00003246 f4ca5df4 c035c8d6 c18ce940
Aug 29 01:58:26 ledzep kernel: 000001d0 ffffffe0 00000000 f6cd2ab4 f4ca5e1c c035b8a5 00003fc0 000001d0
Aug 29 01:58:26 ledzep kernel: Call Trace: [<c0142669>] [<c035c8d6>] [<c035b8a5>] [<c035b9ce>] [<c03aff87>]
Aug 29 01:58:26 ledzep kernel: [<c03588b9>] [<c0358c11>] [<c0358cef>] [<c01523c8>] [<c012c052>] [<c012b5c0>]
Aug 29 01:58:26 ledzep kernel: [<c01525ff>] [<c0109e6b>]
Aug 29 01:58:26 ledzep kernel: f4ca5da4 f4ca4000 f4ca5dd8 c0142669 c18ce940 f6c8965c c18ce964 00003246
Aug 29 01:58:26 ledzep kernel: 00000000 f4ca5dec efbe4c64 efbe4c64 00003246 f4ca5df4 c035c8d6 c18ce940
Aug 29 01:58:26 ledzep kernel: 000001d0 ffffffe0 00000000 f6cd2ab4 f4ca5e1c c035b8a5 00003fc0 000001d0
Aug 29 01:58:26 ledzep kernel: Call Trace: [<c0142669>] [<c035c8d6>] [<c035b8a5>] [<c035b9ce>] [<c03aff87>]
Aug 29 01:58:26 ledzep kernel: [<c03588b9>] [<c0358c11>] [<c0358cef>] [<c01523c8>] [<c012c052>] [<c012b5c0>]
Aug 29 01:58:26 ledzep kernel: [<c01525ff>] [<c0109e6b>]
Aug 29 02:09:41 ledzep kernel: e24b7e84 00000000 e24b7eb4 c0141452 c18dfe50 e6396878 c18dfe74 f7b14400
Aug 29 02:09:41 ledzep kernel: e24b7eb4 f7f0a000 c15f8f88 e6396a48 e24b7ed4 c0142997 c18dfe50 f7f0a200
Aug 29 02:09:41 ledzep kernel: 0000007e 00000246 005f8f70 e6396a48 e24b7ef0 c0141526 c18dfe50 e6396a48
Aug 29 02:09:41 ledzep kernel: Call Trace: [<c0141452>] [<c0142997>] [<c0141526>] [<c016a743>] [<c011b403>]
Aug 29 02:09:41 ledzep kernel: [<c016aca4>] [<c0161fef>] [<c011b660>] [<c0162189>] [<c01623e9>] [<c0109e6b>]
Aug 29 02:09:41 ledzep kernel: e24b7e80 00000000 e24b7eb0 c0141452 c18cf080 f2f1a5fc c18cf0a4 c18dfe74
Aug 29 02:09:41 ledzep kernel: e24b7edc f7efd800 c17f5c28 f2f1a5d4 e24b7ed0 c0142997 c18cf080 f7efda00
Aug 29 02:09:41 ledzep kernel: 0000007e c17f5c28 f2f1a5d4 007f5c10 e24b7ef0 c01415cc c18cf080 f2f1a5d4
Aug 29 02:09:41 ledzep kernel: Call Trace: [<c0141452>] [<c0142997>] [<c01415cc>] [<c016a7f2>] [<c016aca4>]
Aug 29 02:09:41 ledzep kernel: [<c0161fef>] [<c015fa1f>] [<c0162189>] [<c01623e9>] [<c0109e6b>]
Aug 29 02:10:00 ledzep kernel: e081fe84 00000000 e081feb4 c0141452 c18dfe50 f048fe5c c18dfe74 0058fe58
Aug 29 02:10:00 ledzep kernel: e081ff84 f7f0b400 c16e0158 ec008220 e081fed4 c0142997 c18dfe50 f7f0b600
Aug 29 02:10:00 ledzep kernel: 0000007e 00000246 006e0140 ec008220 e081fef0 c0141526 c18dfe50 ec008220
Aug 29 02:10:00 ledzep kernel: Call Trace: [<c0141452>] [<c0142997>] [<c0141526>] [<c016a743>] [<c016aca4>]
Aug 29 02:10:00 ledzep kernel: [<c0161fef>] [<c015fa1f>] [<c0162189>] [<c01623e9>] [<c0109e6b>]
Aug 29 02:25:10 ledzep kernel: f1e07df8 00000000 f1e07e28 c0141452 c18ce500 ec21d000 c18ce524 00000000
Aug 29 02:25:10 ledzep kernel: 00000001 f7f5c8c4 c16873c8 e9c7e000 f1e07e48 c0142997 c18ce500 f7f5c944
Aug 29 02:25:10 ledzep kernel: 0000001e c16873c8 e9c7e000 006873b0 f1e07e68 c01415cc c18ce500 e9c7e000
Aug 29 02:25:10 ledzep kernel: Call Trace: [<c0141452>] [<c0142997>] [<c01415cc>] [<c035caf7>] [<c035b561>]
Aug 29 02:25:10 ledzep kernel: [<c035cc22>] [<c03b06ad>] [<c035893f>] [<c0358a70>] [<c0151fb9>] [<c015213c>]
Aug 29 02:25:10 ledzep kernel: [<c0109e6b>]
Warning (Oops_read): Code line not seen, dumping what data is available


Trace; c0141452 <free_block+b2/c0>
Trace; c0142997 <__kmem_cache_free+97/f2>
Trace; c01415cc <kfree+5c/a0>
Trace; c035caf7 <kfree_skbmem+17/80>
Trace; c035b561 <sock_wfree+41/50>
Trace; c035cc22 <__kfree_skb+c2/110>
Trace; c03b06ad <unix_stream_recvmsg+1bd/330>
Trace; c035893f <sock_recvmsg+4f/f0>
Trace; c0358a70 <sock_read+90/a0>
Trace; c0151fb9 <vfs_read+b9/100>
Trace; c012b5c0 <update_process_times+40/50>
Trace; c015213c <sys_read+3c/50>
Trace; c0109e6b <syscall_call+7/b>
Trace; c0142669 <__kmem_cache_alloc+139/1d0>
Trace; c035c8d6 <alloc_skb+b6/1c0>
Trace; c035b8a5 <sock_alloc_send_pskb+d5/1d0>
Trace; c035b9ce <sock_alloc_send_skb+2e/30>
Trace; c03aff87 <unix_stream_sendmsg+107/330>
Trace; c03588b9 <sock_sendmsg+79/b0>
Trace; c0358c11 <sock_readv_writev+71/a0>
Trace; c0358cef <sock_writev+4f/60>
Trace; c01523c8 <do_readv_writev+148/250>
Trace; c012c052 <update_times+112/117>
Trace; c012b5c0 <update_process_times+40/50>
Trace; c01525ff <sys_writev+8f/a0>
Trace; c0109e6b <syscall_call+7/b>
Trace; c0142669 <__kmem_cache_alloc+139/1d0>
Trace; c035c8d6 <alloc_skb+b6/1c0>
Trace; c035b8a5 <sock_alloc_send_pskb+d5/1d0>
Trace; c035b9ce <sock_alloc_send_skb+2e/30>
Trace; c03aff87 <unix_stream_sendmsg+107/330>
Trace; c03588b9 <sock_sendmsg+79/b0>
Trace; c0358c11 <sock_readv_writev+71/a0>
Trace; c0358cef <sock_writev+4f/60>
Trace; c01523c8 <do_readv_writev+148/250>
Trace; c012c052 <update_times+112/117>
Trace; c012b5c0 <update_process_times+40/50>
Trace; c01525ff <sys_writev+8f/a0>
Trace; c0109e6b <syscall_call+7/b>
Trace; c0141452 <free_block+b2/c0>
Trace; c0142997 <__kmem_cache_free+97/f2>
Trace; c0141526 <kmem_cache_free+66/b0>
Trace; c016a743 <prune_dcache+103/1f0>
Trace; c011b403 <schedule+1b3/3e0>
Trace; c016aca4 <shrink_dcache_parent+24/30>
Trace; c0161fef <d_unhash+af/160>
Trace; c011b660 <preempt_schedule+30/70>
Trace; c0162189 <vfs_rmdir+e9/270>
Trace; c01623e9 <sys_rmdir+d9/100>
Trace; c0109e6b <syscall_call+7/b>
Trace; c0141452 <free_block+b2/c0>
Trace; c0142997 <__kmem_cache_free+97/f2>
Trace; c01415cc <kfree+5c/a0>
Trace; c016a7f2 <prune_dcache+1b2/1f0>
Trace; c016aca4 <shrink_dcache_parent+24/30>
Trace; c0161fef <d_unhash+af/160>
Trace; c015fa1f <permission+4f/60>
Trace; c0162189 <vfs_rmdir+e9/270>
Trace; c01623e9 <sys_rmdir+d9/100>
Trace; c0109e6b <syscall_call+7/b>
Trace; c0141452 <free_block+b2/c0>
Trace; c0142997 <__kmem_cache_free+97/f2>
Trace; c0141526 <kmem_cache_free+66/b0>
Trace; c016a743 <prune_dcache+103/1f0>
Trace; c016aca4 <shrink_dcache_parent+24/30>
Trace; c0161fef <d_unhash+af/160>
Trace; c015fa1f <permission+4f/60>
Trace; c0162189 <vfs_rmdir+e9/270>
Trace; c01623e9 <sys_rmdir+d9/100>
Trace; c0109e6b <syscall_call+7/b>
Trace; c0141452 <free_block+b2/c0>
Trace; c0142997 <__kmem_cache_free+97/f2>
Trace; c01415cc <kfree+5c/a0>
Trace; c035caf7 <kfree_skbmem+17/80>
Trace; c035b561 <sock_wfree+41/50>
Trace; c035cc22 <__kfree_skb+c2/110>
Trace; c03b06ad <unix_stream_recvmsg+1bd/330>
Trace; c035893f <sock_recvmsg+4f/f0>
Trace; c0358a70 <sock_read+90/a0>
Trace; c0151fb9 <vfs_read+b9/100>
Trace; c015213c <sys_read+3c/50>
Trace; c0109e6b <syscall_call+7/b>


1 warning issued. Results may not be reliable.


Attachments:
error.processed (7.33 kB)

2002-08-29 09:42:25

by Andrew Morton

[permalink] [raw]
Subject: Re: Problems with 2.5.23-mm1

Jordan Breeding wrote:
>
> Hello,
>
> I am trying to run 2.5.32-mm1. The first problem that I have is that
> if SMP, Preempt and Highmem are all turned on I get lots of problems at
> boot including a BUG in highmem.c, I can get the line number later if
> someone wants it (later tomorrow night). I then disabled highmem and
> got the system to boot. I have a few weird problems, one is that every
> once in a while I see the message "bad: schedule() with irqs disabled!"
> and then there is a code trace. I am attaching the decoded output of
> some of the traces. Another problem I am having is that I get this
> message on bootup: "mtrr: SMP support incomplete for this vendor". It
> seems that this would be a problem however the box works fine as far as
> I can tell. Thanks for any light anyone can shed on any of this and
> please let me know whether anyone needs to know more about this box or
> about the highmem line number to figure and of these problems out.
>

Well there are a couple of dopey bugs in swap.c which this patch fixes.

--- 2.5.32/mm/swap.c~preempt-fix Thu Aug 29 02:00:44 2002
+++ 2.5.32-akpm/mm/swap.c Thu Aug 29 02:06:21 2002
@@ -54,14 +54,14 @@ static struct pagevec lru_add_pvecs[NR_C
void lru_cache_add(struct page *page)
{
unsigned long flags;
- struct pagevec *pvec;
+ struct pagevec *pvec = &lru_add_pvecs[get_cpu()];

local_irq_save(flags);
- pvec = &lru_add_pvecs[smp_processor_id()];
page_cache_get(page);
if (!pagevec_add(pvec, page))
__pagevec_lru_add(pvec);
local_irq_restore(flags);
+ put_cpu();
}

void lru_add_drain(void)
@@ -210,6 +210,7 @@ void pagevec_deactivate_inactive(struct
void __pagevec_lru_add(struct pagevec *pvec)
{
int i;
+ unsigned long flags = 0;
struct zone *zone = NULL;

for (i = 0; i < pagevec_count(pvec); i++) {
@@ -218,16 +219,16 @@ void __pagevec_lru_add(struct pagevec *p

if (pagezone != zone) {
if (zone)
- spin_unlock_irq(&zone->lru_lock);
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
zone = pagezone;
- spin_lock_irq(&zone->lru_lock);
+ spin_lock_irqsave(&zone->lru_lock, flags);
}
if (TestSetPageLRU(page))
BUG();
add_page_to_inactive_list(zone, page);
}
if (zone)
- spin_unlock_irq(&zone->lru_lock);
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
pagevec_release(pvec);
}


But the slab.c one is trickier. The code is, basically:

local_irq_save(flags);
spin_lock(lock);
...
spin_unlock(lock);

and sure, if another CPU sets need_resched against this task and sends
a reschedule IPI then we can hit the spin_unlock() with need_resched
set, and this task will reschedule with interrupts off.

So there's a slab.c patch here which tries to fix that up too, but it's
really rather nasty, and it doesn't work, and I've had enough for the day.

There are other code paths in there which I missed, such the second
spin_unlock in __kmem_cache_alloc(). I think it would be cleaner
and saner to just use a local_irq_save_and_preempt_disable(flags)
throughout slab.

--- 2.5.32/mm/slab.c~preempt-fix Thu Aug 29 02:32:53 2002
+++ 2.5.32-akpm/mm/slab.c Thu Aug 29 02:51:03 2002
@@ -998,6 +998,7 @@ static void drain_cpu_caches(kmem_cache_
local_irq_disable();
free_block(cachep, cc_entry(ccold), ccold->avail);
local_irq_enable();
+ preempt_enable();
ccold->avail = 0;
}
smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
@@ -1610,11 +1611,14 @@ static inline void __free_block (kmem_ca
kmem_cache_free_one(cachep, *objpp);
}

+/*
+ * Returns with preemption disabled - caller must perform preempt_enable().
+ */
static void free_block (kmem_cache_t* cachep, void** objpp, int len)
{
spin_lock(&cachep->spinlock);
__free_block(cachep, objpp, len);
- spin_unlock(&cachep->spinlock);
+ _raw_spin_unlock(&cachep->spinlock);
}
#endif

@@ -1716,6 +1720,9 @@ void kmem_cache_free (kmem_cache_t *cach
local_irq_save(flags);
__kmem_cache_free(cachep, objp);
local_irq_restore(flags);
+#ifdef CONFIG_SMP
+ preempt_enable();
+#endif
}

/**
@@ -1737,6 +1744,9 @@ void kfree (const void *objp)
c = GET_PAGE_CACHE(virt_to_page(objp));
__kmem_cache_free(c, (void*)objp);
local_irq_restore(flags);
+#ifdef CONFIG_SMP
+ preempt_enable();
+#endif
}

unsigned int kmem_cache_size(kmem_cache_t *cachep)
@@ -1814,6 +1824,7 @@ static int kmem_tune_cpucache (kmem_cach
local_irq_disable();
free_block(cachep, cc_entry(ccold), ccold->avail);
local_irq_enable();
+ preempt_enable();
kfree(ccold);
}
return 0;

.

2002-08-29 11:27:07

by Dave Jones

[permalink] [raw]
Subject: Re: Problems with 2.5.23-mm1

On Thu, Aug 29, 2002 at 02:35:29AM -0500, Jordan Breeding wrote:
> Hello,
>
> Another problem I am having is that I get this
> message on bootup: "mtrr: SMP support incomplete for this vendor". It
> seems that this would be a problem however the box works fine as far as
> I can tell. Thanks for any light anyone can shed on any of this and
> please let me know whether anyone needs to know more about this box

Patrick Mochel (author of new mtrr driver) added to Cc:
What CPUs are in this system ?

Dave

--
| Dave Jones. http://www.codemonkey.org.uk
| SuSE Labs

2002-08-29 13:12:25

by Jordan Breeding

[permalink] [raw]
Subject: Re: Problems with 2.5.23-mm1

Dave Jones wrote:
> On Thu, Aug 29, 2002 at 02:35:29AM -0500, Jordan Breeding wrote:
> > Hello,
> >
> > Another problem I am having is that I get this
> > message on bootup: "mtrr: SMP support incomplete for this vendor". It
> > seems that this would be a problem however the box works fine as far as
> > I can tell. Thanks for any light anyone can shed on any of this and
> > please let me know whether anyone needs to know more about this box
>
> Patrick Mochel (author of new mtrr driver) added to Cc:
> What CPUs are in this system ?
>
> Dave
>

It is a Tyan Thunder K7 (S2462UNG) with dual AMD Athlon MP 1900+ cpus in it.

Jordan