LinuxLists.cc - 2.6.16rc1-git4 slab corruption.

2006-01-31 18:03:20

Subject: 2.6.16rc1-git4 slab corruption.

Redzone: 0x5a2cf071/0x5a2cf071.
Last user: [<ffffffff80181cc0>](free_buffer_head+0x2a/0x43)

Call Trace: <ffffffff8017b4d0>{check_poison_obj+127}
<ffffffff80181cea>{alloc_buffer_head+17} <ffffffff8017b638>{cache_alloc_debugcheck_after+48}
<ffffffff8017b828>{kmem_cache_alloc+231} <ffffffff80181cea>{alloc_buffer_head+17}
<ffffffff801824b1>{alloc_page_buffers+53} <ffffffff8018255c>{create_empty_buffers+20}
<ffffffff801831c8>{__block_prepare_write+148} <ffffffff8807e5f4>{:ext3:ext3_get_block+0}
<ffffffff8017b0cc>{poison_obj+38} <ffffffff8017b6f7>{cache_alloc_debugcheck_after+239}
<ffffffff80183536>{block_prepare_write+26} <ffffffff8807fcd1>{:ext3:ext3_prepare_write+148}
<ffffffff80340386>{_write_unlock_irq+9} <ffffffff8015e3b7>{generic_file_buffered_write+603}
<ffffffff80137896>{current_fs_time+59} <ffffffff80137896>{current_fs_time+59}
<ffffffff8015ea13>{__generic_file_aio_write_nolock+767}
<ffffffff8015ee22>{generic_file_aio_write+78} <ffffffff80149d23>{debug_mutex_add_waiter+159}
<ffffffff8033fb23>{__mutex_lock_slowpath+817} <ffffffff8015ee39>{generic_file_aio_write+101}
<ffffffff8807be5e>{:ext3:ext3_file_write+22} <ffffffff8018054e>{do_sync_write+199}
<ffffffff801464d6>{autoremove_wake_function+0} <ffffffff8015ae1f>{audit_syscall_entry+301}
<ffffffff80180e48>{vfs_write+206} <ffffffff801813fa>{sys_write+69}
<ffffffff8010aa78>{tracesys+209}
020: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 01 00 00 00
Prev obj: start=ffff81000057a2f0, len=88
Redzone: 0x170fc2a5/0x170fc2a5.
Last user: [<ffffffff80181cea>](alloc_buffer_head+0x11/0x36)
000: 23 40 00 00 00 00 00 00 f0 a2 57 00 00 81 ff ff
010: 80 6c 11 01 00 81 ff ff 01 00 00 00 00 10 00 00
Next obj: start=ffff81000057a3d0, len=88
Redzone: 0x5a2cf071/0x5a2cf071.
Last user: [<ffffffff80181cc0>](free_buffer_head+0x2a/0x43)
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b

2006-01-31 19:08:48

by Chris Mason

[permalink] [raw]

Subject: Re: 2.6.16rc1-git4 slab corruption.

On Tuesday 31 January 2006 13:03, Dave Jones wrote:
> Redzone: 0x5a2cf071/0x5a2cf071.
> Last user: [<ffffffff80181cc0>](free_buffer_head+0x2a/0x43)

Haven't seen this one yet, but we have an assortment of strange bugs on 2.6.16-rc1-git. What were you doing to trigger it?

I've been trying to hammer on things with the slab exerciser below, but haven't had much luck in getting a nice reliable test case.

modprobe crasher threads=X

X defaults to 1.

-chris

diff -r abc01241b9e0 drivers/char/Kconfig
--- a/drivers/char/Kconfig Tue Jan 24 15:03:39 2006 -0500
+++ b/drivers/char/Kconfig Wed Jan 25 08:03:06 2006 -0500
@@ -1020,5 +1020,10 @@ config TELCLOCK
sysfs directory, /sys/devices/platform/telco_clock, with a number of
files for controlling the behavior of this hardware.

+config CRASHER
+ tristate "Crasher Module"
+ help
+ Slab cache memory tester. Only use this as a module
+
endmenu

diff -r abc01241b9e0 drivers/char/Makefile
--- a/drivers/char/Makefile Tue Jan 24 15:03:39 2006 -0500
+++ b/drivers/char/Makefile Wed Jan 25 08:03:06 2006 -0500
@@ -95,6 +95,7 @@ obj-$(CONFIG_IPMI_HANDLER) += ipmi/

obj-$(CONFIG_HANGCHECK_TIMER) += hangcheck-timer.o
obj-$(CONFIG_TCG_TPM) += tpm/
+obj-$(CONFIG_CRASHER) += crasher.o
# Files generated that shall be removed upon make clean
clean-files := consolemap_deftbl.c defkeymap.c qtronixmap.c

diff -r abc01241b9e0 drivers/char/crasher.c
--- /dev/null Thu Jan 1 00:00:00 1970 +0000
+++ b/drivers/char/crasher.c Wed Jan 25 08:03:06 2006 -0500
@@ -0,0 +1,148 @@
+/*
+ * crasher.c, it breaks things
+ */
+
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/moduleparam.h>
+
+static int module_exiting;
+static struct completion startup = COMPLETION_INITIALIZER(startup);
+static unsigned long rand_seed = 152L;
+static unsigned long seed = 152L;
+static int threads = 1;
+
+module_param(seed, ulong, 0);
+module_param(threads, int, 0);
+MODULE_PARM_DESC(seed, "random seed for memory tests");
+MODULE_PARM_DESC(threads, "number of threads to run");
+MODULE_LICENSE("GPL");
+
+#define NUM_ALLOC 24
+#define NUM_SIZES 8
+static int sizes[] = { 32, 64, 128, 192, 256, 1024, 2048, 4096 };
+
+struct mem_buf {
+ char *buf;
+ int size;
+};
+
+static unsigned long crasher_random(void)
+{
+ rand_seed = rand_seed*69069L+1;
+ return rand_seed^jiffies;
+}
+
+void crasher_srandom(unsigned long entropy)
+{
+ rand_seed ^= entropy;
+ crasher_random();
+}
+
+static char *mem_alloc(int size) {
+ char *p = kmalloc(size, GFP_KERNEL);
+ int i;
+ if (!p)
+ return p;
+ for (i = 0 ; i < size; i++)
+ p[i] = (i % 119) + 8;
+ return p;
+}
+
+static void mem_check(char *p, int size) {
+ int i;
+ if (!p)
+ return;
+ for (i = 0 ; i < size; i++) {
+ if (p[i] != ((i % 119) + 8)) {
+ printk(KERN_CRIT "verify error at %lX offset %d "
+ " wanted %d found %d size %d\n",
+ (unsigned long)(p + i), i, (i % 119) + 8,
+ p[i], size);
+ }
+ }
+ // try and trigger slab poisoning for people using this buffer
+ // wrong
+ memset(p, 0, size);
+}
+
+static void mem_verify(void) {
+ struct mem_buf bufs[NUM_ALLOC];
+ struct mem_buf *b;
+ int index;
+ int size;
+ unsigned long sleep;
+ memset(bufs, 0, sizeof(struct mem_buf) * NUM_ALLOC);
+ while(!module_exiting) {
+ index = crasher_random() % NUM_ALLOC;
+ b = bufs + index;
+ if (b->size) {
+ mem_check(b->buf, b->size);
+ kfree(b->buf);
+ b->buf = NULL;
+ b->size = 0;
+ } else {
+ size = crasher_random() % NUM_SIZES;
+ size = sizes[size];
+ b->buf = mem_alloc(size);
+ b->size = size;
+ }
+ sleep = crasher_random() % (HZ / 10);
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(sleep);
+ set_current_state(TASK_RUNNING);
+ }
+ for (index = 0 ; index < NUM_ALLOC ; index++) {
+ b = bufs + index;
+ if (b->size) {
+ mem_check(b->buf, b->size);
+ kfree(b->buf);
+ }
+ }
+}
+
+static int crasher_thread(void *unused)
+{
+ daemonize("crasher");
+ complete(&startup);
+ mem_verify();
+ complete(&startup);
+ return 0;
+}
+
+static int __init crasher_init(void)
+{
+ int i;
+ init_completion(&startup);
+ crasher_srandom(seed);
+
+ printk("crasher module (%d threads). Testing sizes: ", threads);
+ for (i = 0 ; i < NUM_SIZES ; i++)
+ printk("%d ", sizes[i]);
+ printk("\n");
+
+ for (i = 0 ; i < threads ; i++)
+ kernel_thread(crasher_thread, crasher_thread,
+ CLONE_FS | CLONE_FILES);
+ for (i = 0 ; i < threads ; i++)
+ wait_for_completion(&startup);
+ return 0;
+}
+
+static void __exit crasher_exit(void)
+{
+ int i;
+ module_exiting = 1;
+ for (i = 0 ; i < threads ; i++)
+ wait_for_completion(&startup);
+ printk("all crasher threads done\n");
+ return;
+}
+
+module_init(crasher_init);
+module_exit(crasher_exit);

2006-01-31 22:15:58

by Dave Jones

[permalink] [raw]

Subject: Re: 2.6.16rc1-git4 slab corruption.

On Tue, Jan 31, 2006 at 02:08:35PM -0500, Chris Mason wrote:
> On Tuesday 31 January 2006 13:03, Dave Jones wrote:
> > Redzone: 0x5a2cf071/0x5a2cf071.
> > Last user: [<ffffffff80181cc0>](free_buffer_head+0x2a/0x43)
>
> Haven't seen this one yet, but we have an assortment of strange bugs on
> 2.6.16-rc1-git. What were you doing to trigger it?

Running fetchmail/procmail/spamassassin to pick up ~1000 mails.

> I've been trying to hammer on things with the slab exerciser below, but haven't had much luck in getting a nice reliable test case.

Manfred had a nice 'check all slabs before they're freed' patch, which might
be worth resurrecting for some tests. It may be that we're corrupting rarely
free'd slabs, making them hard to hit.

Dave

2006-02-01 07:27:05

by Pekka Enberg

[permalink] [raw]

Subject: Re: 2.6.16rc1-git4 slab corruption.

Hi,

On 2/1/06, Dave Jones <[email protected]> wrote:
> Manfred had a nice 'check all slabs before they're freed' patch, which might
> be worth resurrecting for some tests. It may be that we're corrupting rarely
> free'd slabs, making them hard to hit.

Do you know where I can find that patch? I would like to try to sneak
that past Andrew. It seems silly not to have these useful slab
debugging patches within mainline.

Pekka

2006-02-01 16:09:33

by Dave Jones

[permalink] [raw]

Subject: Re: 2.6.16rc1-git4 slab corruption.

On Wed, Feb 01, 2006 at 09:27:02AM +0200, Pekka Enberg wrote:
> Hi,
>
> On 2/1/06, Dave Jones <[email protected]> wrote:
> > Manfred had a nice 'check all slabs before they're freed' patch, which might
> > be worth resurrecting for some tests. It may be that we're corrupting rarely
> > free'd slabs, making them hard to hit.
>
> Do you know where I can find that patch? I would like to try to sneak
> that past Andrew. It seems silly not to have these useful slab
> debugging patches within mainline.

Here's the last version that I had that was rediffed against
2.6.13 or .14 (I forget which, it's been a while since I used it).

Dave

diff -urNp --exclude-from=/home/davej/.exclude linux-1000/mm/slab.c linux-1010/mm/slab.c
--- linux-1000/mm/slab.c
+++ linux-1010/mm/slab.c
@@ -189,7 +189,7 @@
*/

#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
+#define BUFCTL_ALLOC (((kmem_bufctl_t)(~0U))-1)
#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2)

/* Max number of objs-per-slab for caches which use off-slab slabs.
@@ -355,6 +355,7 @@ struct kmem_cache_s {
#if DEBUG
int dbghead;
int reallen;
+ unsigned long redzonetest;
#endif
};

@@ -370,6 +371,7 @@ struct kmem_cache_s {
*/
#define REAPTIMEOUT_CPUC (2*HZ)
#define REAPTIMEOUT_LIST3 (4*HZ)
+#define REDZONETIMEOUT (300*HZ)

#if STATS
#define STATS_INC_ACTIVE(x) ((x)->num_active++)
@@ -1446,7 +1448,11 @@ next:
}

cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
- ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
+ ((unsigned long)cachep/L1_CACHE_BYTES)%REAPTIMEOUT_LIST3;
+#if DEBUG
+ cachep->redzonetest = jiffies + REDZONETIMEOUT +
+ ((unsigned long)cachep/L1_CACHE_BYTES)%REDZONETIMEOUT;
+#endif

/* Need the semaphore to access the chain. */
down(&cache_chain_sem);
@@ -2043,7 +2049,7 @@ retry:
slabp->inuse++;
next = slab_bufctl(slabp)[slabp->free];
#if DEBUG
- slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
+ slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC;
#endif
slabp->free = next;
}
@@ -2181,7 +2187,7 @@ static void free_block(kmem_cache_t *cac
objnr = (objp - slabp->s_mem) / cachep->objsize;
check_slabp(cachep, slabp);
#if DEBUG
- if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
+ if (slab_bufctl(slabp)[objnr] != BUFCTL_ALLOC) {
printk(KERN_ERR "slab: double free detected in cache '%s', objp %p.\n",
cachep->name, objp);
BUG();
@@ -2409,7 +2415,7 @@ got_slabp:
slabp->inuse++;
next = slab_bufctl(slabp)[slabp->free];
#if DEBUG
- slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
+ slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC;
#endif
slabp->free = next;
check_slabp(cachep, slabp);
@@ -2615,6 +2621,94 @@ unsigned int kmem_cache_size(kmem_cache_

EXPORT_SYMBOL(kmem_cache_size);

+#if DEBUG
+static void check_slabuse(kmem_cache_t *cachep, struct slab *slabp)
+{
+ int i;
+
+ if (!(cachep->flags & SLAB_RED_ZONE))
+ return; /* no redzone data to check */
+
+#if CONFIG_DEBUG_PAGEALLOC
+ /* Page alloc debugging on for this cache. Mapping & Unmapping happens
+ * without any locking, thus parallel checks are impossible.
+ */
+ if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep))
+ return;
+#endif
+
+ for (i=0;i<cachep->num;i++) {
+ void *objp = slabp->s_mem + cachep->objsize * i;
+ unsigned long red1, red2;
+
+ red1 = *dbg_redzone1(cachep, objp);
+ red2 = *dbg_redzone2(cachep, objp);
+
+ /* simplest case: marked as inactive */
+ if (red1 == RED_INACTIVE && red2 == RED_INACTIVE)
+ continue;
+
+ /* tricky case: if the bufctl value is BUFCTL_ALLOC, then
+ * the object is either allocated or somewhere in a cpu
+ * cache. The cpu caches are lockless and there might be
+ * a concurrent alloc/free call, thus we must accept random
+ * combinations of RED_ACTIVE and _INACTIVE
+ */
+ if (slab_bufctl(slabp)[i] == BUFCTL_ALLOC &&
+ (red1 == RED_INACTIVE || red1 == RED_ACTIVE) &&
+ (red2 == RED_INACTIVE || red2 == RED_ACTIVE))
+ continue;
+
+ printk(KERN_ERR "slab %s: redzone mismatch in slabp %p, objp %p, bufctl 0x%x\n",
+ cachep->name, slabp, objp, slab_bufctl(slabp)[i]);
+ print_objinfo(cachep, objp, 2);
+ }
+}
+
+/*
+ * Perform a self test on all slabs from a cache
+ */
+static void check_redzone(kmem_cache_t *cachep)
+{
+ struct list_head *q;
+ struct slab *slabp;
+
+ check_spinlock_acquired(cachep);
+
+ list_for_each(q,&cachep->lists.slabs_full) {
+ slabp = list_entry(q, struct slab, list);
+
+ if (slabp->inuse != cachep->num) {
+ printk(KERN_INFO "slab %s: wrong slabp found in full slab chain at %p (%d/%d).\n",
+ cachep->name, slabp, slabp->inuse, cachep->num);
+ }
+ check_slabp(cachep, slabp);
+ check_slabuse(cachep, slabp);
+ }
+ list_for_each(q,&cachep->lists.slabs_partial) {
+ slabp = list_entry(q, struct slab, list);
+
+ if (slabp->inuse == cachep->num || slabp->inuse == 0) {
+ printk(KERN_INFO "slab %s: wrong slab found in partial chain at %p (%d/%d).\n",
+ cachep->name, slabp, slabp->inuse, cachep->num);
+ }
+ check_slabp(cachep, slabp);
+ check_slabuse(cachep, slabp);
+ }
+ list_for_each(q,&cachep->lists.slabs_free) {
+ slabp = list_entry(q, struct slab, list);
+
+ if (slabp->inuse != 0) {
+ printk(KERN_INFO "slab %s: wrong slab found in free chain at %p (%d/%d).\n",
+ cachep->name, slabp, slabp->inuse, cachep->num);
+ }
+ check_slabp(cachep, slabp);
+ check_slabuse(cachep, slabp);
+ }
+}
+
+#endif
+
struct ccupdate_struct {
kmem_cache_t *cachep;
struct array_cache *new[NR_CPUS];
@@ -2798,6 +2892,12 @@ static void cache_reap(void *unused)

drain_array_locked(searchp, ac_data(searchp), 0);

+#if DEBUG
+ if(time_before(searchp->redzonetest, jiffies)) {
+ searchp->redzonetest = jiffies + REDZONETIMEOUT;
+ check_redzone(searchp);
+ }
+#endif
if(time_after(searchp->lists.next_reap, jiffies))
goto next_unlock;

2006-02-01 16:27:42

by Pekka Enberg

[permalink] [raw]

Subject: Re: 2.6.16rc1-git4 slab corruption.

On 2/1/06, Dave Jones <[email protected]> wrote:
> > > Manfred had a nice 'check all slabs before they're freed' patch, which might
> > > be worth resurrecting for some tests. It may be that we're corrupting rarely
> > > free'd slabs, making them hard to hit.

On Wed, Feb 01, 2006 at 09:27:02AM +0200, Pekka Enberg wrote:
> > Do you know where I can find that patch? I would like to try to sneak
> > that past Andrew. It seems silly not to have these useful slab
> > debugging patches within mainline.

On Wed, 2006-02-01 at 11:09 -0500, Dave Jones wrote:
> Here's the last version that I had that was rediffed against
> 2.6.13 or .14 (I forget which, it's been a while since I used it).

Thanks Dave. Manfred, is there are reason this wasn't merged with
mainline? Needs bit of cleanup but seems useful for detecting slab
corruption early.

Pekka

>
> diff -urNp --exclude-from=/home/davej/.exclude linux-1000/mm/slab.c linux-1010/mm/slab.c
> --- linux-1000/mm/slab.c
> +++ linux-1010/mm/slab.c
> @@ -189,7 +189,7 @@
> */
>
> #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
> -#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
> +#define BUFCTL_ALLOC (((kmem_bufctl_t)(~0U))-1)
> #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2)
>
> /* Max number of objs-per-slab for caches which use off-slab slabs.
> @@ -355,6 +355,7 @@ struct kmem_cache_s {
> #if DEBUG
> int dbghead;
> int reallen;
> + unsigned long redzonetest;
> #endif
> };
>
> @@ -370,6 +371,7 @@ struct kmem_cache_s {
> */
> #define REAPTIMEOUT_CPUC (2*HZ)
> #define REAPTIMEOUT_LIST3 (4*HZ)
> +#define REDZONETIMEOUT (300*HZ)
>
> #if STATS
> #define STATS_INC_ACTIVE(x) ((x)->num_active++)
> @@ -1446,7 +1448,11 @@ next:
> }
>
> cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
> - ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
> + ((unsigned long)cachep/L1_CACHE_BYTES)%REAPTIMEOUT_LIST3;
> +#if DEBUG
> + cachep->redzonetest = jiffies + REDZONETIMEOUT +
> + ((unsigned long)cachep/L1_CACHE_BYTES)%REDZONETIMEOUT;
> +#endif
>
> /* Need the semaphore to access the chain. */
> down(&cache_chain_sem);
> @@ -2043,7 +2049,7 @@ retry:
> slabp->inuse++;
> next = slab_bufctl(slabp)[slabp->free];
> #if DEBUG
> - slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
> + slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC;
> #endif
> slabp->free = next;
> }
> @@ -2181,7 +2187,7 @@ static void free_block(kmem_cache_t *cac
> objnr = (objp - slabp->s_mem) / cachep->objsize;
> check_slabp(cachep, slabp);
> #if DEBUG
> - if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
> + if (slab_bufctl(slabp)[objnr] != BUFCTL_ALLOC) {
> printk(KERN_ERR "slab: double free detected in cache '%s', objp %p.\n",
> cachep->name, objp);
> BUG();
> @@ -2409,7 +2415,7 @@ got_slabp:
> slabp->inuse++;
> next = slab_bufctl(slabp)[slabp->free];
> #if DEBUG
> - slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
> + slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC;
> #endif
> slabp->free = next;
> check_slabp(cachep, slabp);
> @@ -2615,6 +2621,94 @@ unsigned int kmem_cache_size(kmem_cache_
>
> EXPORT_SYMBOL(kmem_cache_size);
>
> +#if DEBUG
> +static void check_slabuse(kmem_cache_t *cachep, struct slab *slabp)
> +{
> + int i;
> +
> + if (!(cachep->flags & SLAB_RED_ZONE))
> + return; /* no redzone data to check */
> +
> +#if CONFIG_DEBUG_PAGEALLOC
> + /* Page alloc debugging on for this cache. Mapping & Unmapping happens
> + * without any locking, thus parallel checks are impossible.
> + */
> + if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep))
> + return;
> +#endif
> +
> + for (i=0;i<cachep->num;i++) {
> + void *objp = slabp->s_mem + cachep->objsize * i;
> + unsigned long red1, red2;
> +
> + red1 = *dbg_redzone1(cachep, objp);
> + red2 = *dbg_redzone2(cachep, objp);
> +
> + /* simplest case: marked as inactive */
> + if (red1 == RED_INACTIVE && red2 == RED_INACTIVE)
> + continue;
> +
> + /* tricky case: if the bufctl value is BUFCTL_ALLOC, then
> + * the object is either allocated or somewhere in a cpu
> + * cache. The cpu caches are lockless and there might be
> + * a concurrent alloc/free call, thus we must accept random
> + * combinations of RED_ACTIVE and _INACTIVE
> + */
> + if (slab_bufctl(slabp)[i] == BUFCTL_ALLOC &&
> + (red1 == RED_INACTIVE || red1 == RED_ACTIVE) &&
> + (red2 == RED_INACTIVE || red2 == RED_ACTIVE))
> + continue;
> +
> + printk(KERN_ERR "slab %s: redzone mismatch in slabp %p, objp %p, bufctl 0x%x\n",
> + cachep->name, slabp, objp, slab_bufctl(slabp)[i]);
> + print_objinfo(cachep, objp, 2);
> + }
> +}
> +
> +/*
> + * Perform a self test on all slabs from a cache
> + */
> +static void check_redzone(kmem_cache_t *cachep)
> +{
> + struct list_head *q;
> + struct slab *slabp;
> +
> + check_spinlock_acquired(cachep);
> +
> + list_for_each(q,&cachep->lists.slabs_full) {
> + slabp = list_entry(q, struct slab, list);
> +
> + if (slabp->inuse != cachep->num) {
> + printk(KERN_INFO "slab %s: wrong slabp found in full slab chain at %p (%d/%d).\n",
> + cachep->name, slabp, slabp->inuse, cachep->num);
> + }
> + check_slabp(cachep, slabp);
> + check_slabuse(cachep, slabp);
> + }
> + list_for_each(q,&cachep->lists.slabs_partial) {
> + slabp = list_entry(q, struct slab, list);
> +
> + if (slabp->inuse == cachep->num || slabp->inuse == 0) {
> + printk(KERN_INFO "slab %s: wrong slab found in partial chain at %p (%d/%d).\n",
> + cachep->name, slabp, slabp->inuse, cachep->num);
> + }
> + check_slabp(cachep, slabp);
> + check_slabuse(cachep, slabp);
> + }
> + list_for_each(q,&cachep->lists.slabs_free) {
> + slabp = list_entry(q, struct slab, list);
> +
> + if (slabp->inuse != 0) {
> + printk(KERN_INFO "slab %s: wrong slab found in free chain at %p (%d/%d).\n",
> + cachep->name, slabp, slabp->inuse, cachep->num);
> + }
> + check_slabp(cachep, slabp);
> + check_slabuse(cachep, slabp);
> + }
> +}
> +
> +#endif
> +
> struct ccupdate_struct {
> kmem_cache_t *cachep;
> struct array_cache *new[NR_CPUS];
> @@ -2798,6 +2892,12 @@ static void cache_reap(void *unused)
>
> drain_array_locked(searchp, ac_data(searchp), 0);
>
> +#if DEBUG
> + if(time_before(searchp->redzonetest, jiffies)) {
> + searchp->redzonetest = jiffies + REDZONETIMEOUT;
> + check_redzone(searchp);
> + }
> +#endif
> if(time_after(searchp->lists.next_reap, jiffies))
> goto next_unlock;
>
>
>

2006-02-01 16:29:27

by Pekka Enberg

[permalink] [raw]

Subject: Re: 2.6.16rc1-git4 slab corruption.

Hi Dave,

On Wed, 2006-02-01 at 11:09 -0500, Dave Jones wrote:
> @@ -1446,7 +1448,11 @@ next:
> }
>
> cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
> - ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
> + ((unsigned long)cachep/L1_CACHE_BYTES)%REAPTIMEOUT_LIST3;

Hmm. This bit seems unrelated. Was it in the original patch?

Pekka

2006-02-01 16:38:39

by Dave Jones

[permalink] [raw]

Subject: Re: 2.6.16rc1-git4 slab corruption.

On Wed, Feb 01, 2006 at 06:29:24PM +0200, Pekka Enberg wrote:
> Hi Dave,
>
> On Wed, 2006-02-01 at 11:09 -0500, Dave Jones wrote:
> > @@ -1446,7 +1448,11 @@ next:
> > }
> >
> > cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
> > - ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
> > + ((unsigned long)cachep/L1_CACHE_BYTES)%REAPTIMEOUT_LIST3;
>
> Hmm. This bit seems unrelated. Was it in the original patch?

as far as I recall, yes.

Dave

2006-02-01 17:09:28

by Pekka Enberg

[permalink] [raw]

Subject: Re: 2.6.16rc1-git4 slab corruption.

Hi,

On Wed, 2006-02-01 at 11:09 -0500, Dave Jones wrote:
> Here's the last version that I had that was rediffed against
> 2.6.13 or .14 (I forget which, it's been a while since I used it).

Here's an untested rediff for 2.6.16-rc1-mm4. The patch should apply to
mainline when Linus merges the slab bits from Andrew. I am wondering if
this should be a separate config option, CONFIG_VERIFY_SLAB?

Pekka

mm/slab.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 124 insertions(+), 3 deletions(-)

Index: 2.6-mm/mm/slab.c
===================================================================
--- 2.6-mm.orig/mm/slab.c
+++ 2.6-mm/mm/slab.c
@@ -202,7 +202,7 @@

typedef unsigned long kmem_bufctl_t;
#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
+#define BUFCTL_ALLOC (((kmem_bufctl_t)(~0U))-1)
#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2)

/* Max number of objs-per-slab for caches which use off-slab slabs.
@@ -433,6 +433,7 @@ struct kmem_cache {
*/
int obj_offset;
int obj_size;
+ unsigned long redzonetest;
#endif
};

@@ -448,6 +449,7 @@ struct kmem_cache {
*/
#define REAPTIMEOUT_CPUC (2*HZ)
#define REAPTIMEOUT_LIST3 (4*HZ)
+#define REDZONETIMEOUT (300*HZ)

#if STATS
#define STATS_INC_ACTIVE(x) ((x)->num_active++)
@@ -1932,6 +1934,11 @@ kmem_cache_create (const char *name, siz
cachep->limit = BOOT_CPUCACHE_ENTRIES;
}

+#if DEBUG
+ cachep->redzonetest = jiffies + REDZONETIMEOUT +
+ ((unsigned long)cachep/L1_CACHE_BYTES)%REDZONETIMEOUT;
+#endif
+
/* cache setup completed, link it into the list */
list_add(&cachep->next, &cache_chain);
oops:
@@ -2261,7 +2268,7 @@ static void *slab_get_obj(struct kmem_ca
slabp->inuse++;
next = slab_bufctl(slabp)[slabp->free];
#if DEBUG
- slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
+ slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC;
WARN_ON(slabp->nodeid != nodeid);
#endif
slabp->free = next;
@@ -2278,7 +2285,7 @@ static void slab_put_obj(struct kmem_cac
/* Verify that the slab belongs to the intended node */
WARN_ON(slabp->nodeid != nodeid);

- if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
+ if (slab_bufctl(slabp)[objnr] != BUFCTL_ALLOC) {
printk(KERN_ERR "slab: double free detected in cache "
"'%s', objp %p\n", cachep->name, objp);
BUG();
@@ -3285,6 +3292,113 @@ static int alloc_kmemlist(struct kmem_ca
return err;
}

+#if DEBUG
+
+static void check_slabuse(kmem_cache_t *cachep, struct slab *slabp)
+{
+ int i;
+
+ if (!(cachep->flags & SLAB_RED_ZONE))
+ return; /* no redzone data to check */
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /* Page alloc debugging on for this cache. Mapping & Unmapping happens
+ * without any locking, thus parallel checks are impossible.
+ */
+ if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
+ return;
+#endif
+
+ for (i=0;i<cachep->num;i++) {
+ void *objp = slabp->s_mem + cachep->buffer_size * i;
+ unsigned long red1, red2;
+
+ red1 = *dbg_redzone1(cachep, objp);
+ red2 = *dbg_redzone2(cachep, objp);
+
+ /* simplest case: marked as inactive */
+ if (red1 == RED_INACTIVE && red2 == RED_INACTIVE)
+ continue;
+
+ /* tricky case: if the bufctl value is BUFCTL_ALLOC, then
+ * the object is either allocated or somewhere in a cpu
+ * cache. The cpu caches are lockless and there might be
+ * a concurrent alloc/free call, thus we must accept random
+ * combinations of RED_ACTIVE and _INACTIVE
+ */
+ if (slab_bufctl(slabp)[i] == BUFCTL_ALLOC &&
+ (red1 == RED_INACTIVE || red1 == RED_ACTIVE) &&
+ (red2 == RED_INACTIVE || red2 == RED_ACTIVE))
+ continue;
+
+ printk(KERN_ERR "slab %s: redzone mismatch in slabp %p, objp %p, bufctl 0x%lx\n",
+ cachep->name, slabp, objp, slab_bufctl(slabp)[i]);
+ print_objinfo(cachep, objp, 2);
+ }
+}
+
+static void print_invalid_slab(const char *list_name, struct kmem_cache *cache,
+ struct slab *slab)
+{
+ printk(KERN_ERR "slab %s: invalid slab found in %s list at %p (%d/%d).\n",
+ cache->name, list_name, slab, slab->inuse, cache->num);
+}
+
+static void verify_node_redzone(struct kmem_cache *cache,
+ struct kmem_list3 *lists)
+{
+ struct list_head *q;
+ struct slab *slab;
+
+ list_for_each(q, &lists->slabs_full) {
+ slab = list_entry(q, struct slab, list);
+
+ if (slab->inuse != cache->num)
+ print_invalid_slab("full", cache, slab);
+
+ check_slabp(cache, slab);
+ check_slabuse(cache, slab);
+ }
+ list_for_each(q, &lists->slabs_partial) {
+ slab = list_entry(q, struct slab, list);
+
+ if (slab->inuse == cache->num || slab->inuse == 0)
+ print_invalid_slab("partial", cache, slab);
+
+ check_slabp(cache, slab);
+ check_slabuse(cache, slab);
+ }
+ list_for_each(q, &lists->slabs_free) {
+ slab = list_entry(q, struct slab, list);
+
+ if (slab->inuse != 0)
+ print_invalid_slab("free", cache, slab);
+
+ check_slabp(cache, slab);
+ check_slabuse(cache, slab);
+ }
+}
+
+/*
+ * Perform a self test on all slabs from a cache
+ */
+static void verify_redzone(struct kmem_cache *cache)
+{
+ int node;
+
+ check_spinlock_acquired(cache);
+
+ for_each_online_node(node) {
+ struct kmem_list3 *lists = cache->nodelists[node];
+
+ if (!lists)
+ continue;
+ verify_node_redzone(cache, lists);
+ }
+}
+
+#endif
+
struct ccupdate_struct {
struct kmem_cache *cachep;
struct array_cache *new[NR_CPUS];
@@ -3465,6 +3579,13 @@ static void cache_reap(void *unused)
drain_array_locked(searchp, cpu_cache_get(searchp), 0,
numa_node_id());

+#if DEBUG
+ if (time_before(searchp->redzonetest, jiffies)) {
+ searchp->redzonetest = jiffies + REDZONETIMEOUT;
+ verify_redzone(searchp);
+ }
+#endif
+
if (time_after(l3->next_reap, jiffies))
goto next_unlock;

2006-02-02 05:07:23

by Dave Jones

[permalink] [raw]

Subject: Re: 2.6.16rc1-git4 slab corruption.

On Tue, Jan 31, 2006 at 01:03:19PM -0500, Dave Jones wrote:
> Slab corruption: start=ffff81000057a360, len=88
> Redzone: 0x5a2cf071/0x5a2cf071.
> Last user: [<ffffffff80181cc0>](free_buffer_head+0x2a/0x43)
>
> Call Trace: <ffffffff8017b4d0>{check_poison_obj+127}
> <ffffffff80181cea>{alloc_buffer_head+17} <ffffffff8017b638>{cache_alloc_debugcheck_after+48}
> <ffffffff8017b828>{kmem_cache_alloc+231} <ffffffff80181cea>{alloc_buffer_head+17}
> <ffffffff801824b1>{alloc_page_buffers+53} <ffffffff8018255c>{create_empty_buffers+20}
> <ffffffff801831c8>{__block_prepare_write+148} <ffffffff8807e5f4>{:ext3:ext3_get_block+0}
> <ffffffff8017b0cc>{poison_obj+38} <ffffffff8017b6f7>{cache_alloc_debugcheck_after+239}
> <ffffffff80183536>{block_prepare_write+26} <ffffffff8807fcd1>{:ext3:ext3_prepare_write+148}
> <ffffffff80340386>{_write_unlock_irq+9} <ffffffff8015e3b7>{generic_file_buffered_write+603}
> <ffffffff80137896>{current_fs_time+59} <ffffffff80137896>{current_fs_time+59}
> <ffffffff8015ea13>{__generic_file_aio_write_nolock+767}
> <ffffffff8015ee22>{generic_file_aio_write+78} <ffffffff80149d23>{debug_mutex_add_waiter+159}
> <ffffffff8033fb23>{__mutex_lock_slowpath+817} <ffffffff8015ee39>{generic_file_aio_write+101}
> <ffffffff8807be5e>{:ext3:ext3_file_write+22} <ffffffff8018054e>{do_sync_write+199}
> <ffffffff801464d6>{autoremove_wake_function+0} <ffffffff8015ae1f>{audit_syscall_entry+301}
> <ffffffff80180e48>{vfs_write+206} <ffffffff801813fa>{sys_write+69}
> <ffffffff8010aa78>{tracesys+209}
> 020: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 01 00 00 00

I just hit corruption again (I had rebooted since), but this time with
a completely different trace.

Slab corruption: start=ffff81000057a000, len=4096

Call Trace: <ffffffff8017b4f0>{check_poison_obj+127}
<ffffffff802dd12a>{__alloc_skb+92} <ffffffff8017b658>{cache_alloc_debugcheck_after+48}
<ffffffff8017c271>{__kmalloc+294} <ffffffff802dd12a>{__alloc_skb+92}
<ffffffff802d9ba3>{sock_alloc_send_skb+101} <ffffffff801dd163>{avc_has_perm+67}
<ffffffff8017b340>{cache_free_debugcheck+554} <ffffffff8033952e>{unix_stream_sendmsg+348}
<ffffffff801dd4c2>{socket_has_perm+93} <ffffffff802d70e2>{do_sock_write+193}
<ffffffff802d90ad>{sock_writev+183} <ffffffff801464de>{autoremove_wake_function+0}
<ffffffff801dd818>{inode_has_perm+86} <ffffffff802d7668>{sock_aio_read+81}
<ffffffff801dd8bb>{file_has_perm+150} <ffffffff80180bf7>{do_readv_writev+381}
<ffffffff801811a7>{sys_writev+69} <ffffffff8010a906>{system_call+126}
380: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 01 00 00 00

What I find interesting here is the corruption pattern is the same both times.
Strange, and very scary.

Dave

2006-02-02 07:11:31

by Pekka Enberg

[permalink] [raw]

Subject: Re: 2.6.16rc1-git4 slab corruption.

Hi,

On 2/2/06, Dave Jones <[email protected]> wrote:
> I just hit corruption again (I had rebooted since), but this time with
> a completely different trace.

[snip, snip]

> What I find interesting here is the corruption pattern is the same both times.
> Strange, and very scary.

Did you try out the slab verifier? The patch I sent now applies to
latest -linus.

Pekka