Hi,
Here is the fix to "bh: Ensure bh fits within a page" problem
caused by JBD.
BTW, I realized that this problem can happen only with 1k, 2k
filesystems - as 4k, 8k allocations disable slab debug
automatically. But for completeness, I created slabs for those
also.
What do you think ? I ran basic tests and things are fine.
Thanks,
Badari
JBD currently allocates commit and frozen buffers from slabs.
With CONFIG_SLAB_DEBUG, its possible for an allocation to
cross the page boundary causing IO problems.
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=200127
So, instead of allocating these from regular slabs - manage
allocation from its own slabs and disable slab debug for these slabs.
Signed-off-by: Badari Pulavarty <[email protected]>
---
fs/jbd/commit.c | 6 +--
fs/jbd/journal.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++---
fs/jbd/transaction.c | 9 ++---
include/linux/jbd.h | 3 +
4 files changed, 95 insertions(+), 11 deletions(-)
Index: linux-2.6.18-rc4/fs/jbd/journal.c
===================================================================
--- linux-2.6.18-rc4.orig/fs/jbd/journal.c 2006-08-23 14:47:25.000000000 -0700
+++ linux-2.6.18-rc4/fs/jbd/journal.c 2006-08-23 15:59:06.000000000 -0700
@@ -328,10 +328,10 @@
char *tmp;
jbd_unlock_bh_state(bh_in);
- tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS);
+ tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
jbd_lock_bh_state(bh_in);
if (jh_in->b_frozen_data) {
- kfree(tmp);
+ jbd_slab_free(tmp, bh_in->b_size);
goto repeat;
}
@@ -1612,6 +1612,89 @@
}
/*
+ * jbd slab management: create 1k, 2k, 4k, 8k slabs and allocate
+ * frozen and commit buffers from these slabs.
+ *
+ * Reason for doing this is to avoid, SLAB_DEBUG - since it could
+ * cause bh to cross page boundary.
+ */
+
+static kmem_cache_t *jbd_slab[4];
+static const char *jbd_slab_names[4] = {
+ "jbd_1k",
+ "jbd_2k",
+ "jbd_4k",
+ "jbd_8k",
+};
+
+static void journal_destroy_jbd_slabs(void)
+{
+ int i;
+
+ for (i=0; i<4; i++) {
+ if (jbd_slab[i])
+ kmem_cache_destroy(jbd_slab[i]);
+ jbd_slab[i] = NULL;
+ }
+}
+
+static int journal_init_jbd_slabs(void)
+{
+ int i = 0;
+ int retval = 0;
+
+ for (i=0; i<4; i++) {
+ unsigned long slab_size = 1024 << i;
+ jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
+ slab_size, slab_size,
+ 0, NULL, NULL);
+ if (jbd_slab[i] == 0) {
+ journal_destroy_jbd_slabs();
+ retval = -ENOMEM;
+ printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
+ goto out;
+ }
+ }
+out:
+ return retval;
+}
+
+static int jbd_find_slab_index(size_t size)
+{
+ int idx = 0;
+
+ switch (size) {
+ case 1024: idx = 0;
+ break;
+ case 2048: idx = 1;
+ break;
+ case 4096: idx = 2;
+ break;
+ case 8192: idx = 3;
+ break;
+ default: printk("JBD unknown slab size %ld\n", size);
+ BUG();
+ }
+ return idx;
+}
+
+void * jbd_slab_alloc(size_t size, gfp_t flags)
+{
+ int idx;
+
+ idx = jbd_find_slab_index(size);
+ return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
+}
+
+void jbd_slab_free(void *ptr, size_t size)
+{
+ int idx;
+
+ idx = jbd_find_slab_index(size);
+ kmem_cache_free(jbd_slab[idx], ptr);
+}
+
+/*
* Journal_head storage management
*/
static kmem_cache_t *journal_head_cache;
@@ -1799,13 +1882,13 @@
printk(KERN_WARNING "%s: freeing "
"b_frozen_data\n",
__FUNCTION__);
- kfree(jh->b_frozen_data);
+ jbd_slab_free(jh->b_frozen_data, bh->b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing "
"b_committed_data\n",
__FUNCTION__);
- kfree(jh->b_committed_data);
+ jbd_slab_free(jh->b_committed_data, bh->b_size);
}
bh->b_private = NULL;
jh->b_bh = NULL; /* debug, really */
@@ -1953,6 +2036,8 @@
ret = journal_init_journal_head_cache();
if (ret == 0)
ret = journal_init_handle_cache();
+ if (ret == 0)
+ ret = journal_init_jbd_slabs();
return ret;
}
@@ -1961,6 +2046,7 @@
journal_destroy_revoke_caches();
journal_destroy_journal_head_cache();
journal_destroy_handle_cache();
+ journal_destroy_jbd_slabs();
}
static int __init journal_init(void)
Index: linux-2.6.18-rc4/fs/jbd/transaction.c
===================================================================
--- linux-2.6.18-rc4.orig/fs/jbd/transaction.c 2006-08-23 14:05:57.000000000 -0700
+++ linux-2.6.18-rc4/fs/jbd/transaction.c 2006-08-23 14:50:17.000000000 -0700
@@ -666,8 +666,9 @@
if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer");
jbd_unlock_bh_state(bh);
- frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
- GFP_NOFS);
+ frozen_buffer =
+ jbd_slab_alloc(jh2bh(jh)->b_size,
+ GFP_NOFS);
if (!frozen_buffer) {
printk(KERN_EMERG
"%s: OOM for frozen_buffer\n",
@@ -879,7 +880,7 @@
repeat:
if (!jh->b_committed_data) {
- committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS);
+ committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
if (!committed_data) {
printk(KERN_EMERG "%s: No memory for committed data\n",
__FUNCTION__);
@@ -906,7 +907,7 @@
out:
journal_put_journal_head(jh);
if (unlikely(committed_data))
- kfree(committed_data);
+ jbd_slab_free(committed_data, bh->b_size);
return err;
}
Index: linux-2.6.18-rc4/fs/jbd/commit.c
===================================================================
--- linux-2.6.18-rc4.orig/fs/jbd/commit.c 2006-08-23 14:05:57.000000000 -0700
+++ linux-2.6.18-rc4/fs/jbd/commit.c 2006-08-23 14:50:17.000000000 -0700
@@ -261,7 +261,7 @@
struct buffer_head *bh = jh2bh(jh);
jbd_lock_bh_state(bh);
- kfree(jh->b_committed_data);
+ jbd_slab_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
jbd_unlock_bh_state(bh);
}
@@ -745,14 +745,14 @@
* Otherwise, we can just throw away the frozen data now.
*/
if (jh->b_committed_data) {
- kfree(jh->b_committed_data);
+ jbd_slab_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
if (jh->b_frozen_data) {
jh->b_committed_data = jh->b_frozen_data;
jh->b_frozen_data = NULL;
}
} else if (jh->b_frozen_data) {
- kfree(jh->b_frozen_data);
+ jbd_slab_free(jh->b_frozen_data, bh->b_size);
jh->b_frozen_data = NULL;
}
Index: linux-2.6.18-rc4/include/linux/jbd.h
===================================================================
--- linux-2.6.18-rc4.orig/include/linux/jbd.h 2006-08-23 14:05:57.000000000 -0700
+++ linux-2.6.18-rc4/include/linux/jbd.h 2006-08-23 14:50:17.000000000 -0700
@@ -72,6 +72,9 @@
#endif
extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
+extern void * jbd_slab_alloc(size_t size, gfp_t flags);
+extern void jbd_slab_free(void *ptr, size_t size);
+
#define jbd_kmalloc(size, flags) \
__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
#define jbd_rep_kmalloc(size, flags) \
On Wed, 23 Aug 2006 16:08:15 -0700
Badari Pulavarty <[email protected]> wrote:
> Hi,
>
> Here is the fix to "bh: Ensure bh fits within a page" problem
> caused by JBD.
>
> BTW, I realized that this problem can happen only with 1k, 2k
> filesystems - as 4k, 8k allocations disable slab debug
> automatically. But for completeness, I created slabs for those
> also.
>
> What do you think ? I ran basic tests and things are fine.
>
Thanks for working on this.
> ...
>
> /*
> + * jbd slab management: create 1k, 2k, 4k, 8k slabs and allocate
> + * frozen and commit buffers from these slabs.
> + *
> + * Reason for doing this is to avoid, SLAB_DEBUG - since it could
> + * cause bh to cross page boundary.
> + */
> +
> +static kmem_cache_t *jbd_slab[4];
> +static const char *jbd_slab_names[4] = {
> + "jbd_1k",
> + "jbd_2k",
> + "jbd_4k",
> + "jbd_8k",
> +};
> +
> +static void journal_destroy_jbd_slabs(void)
> +{
> + int i;
> +
> + for (i=0; i<4; i++) {
> + if (jbd_slab[i])
> + kmem_cache_destroy(jbd_slab[i]);
> + jbd_slab[i] = NULL;
> + }
> +}
> +
> +static int journal_init_jbd_slabs(void)
> +{
> + int i = 0;
> + int retval = 0;
> +
> + for (i=0; i<4; i++) {
> + unsigned long slab_size = 1024 << i;
> + jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
> + slab_size, slab_size,
> + 0, NULL, NULL);
OK, passing align=slab_size fixes the bug.
> + if (jbd_slab[i] == 0) {
> + journal_destroy_jbd_slabs();
> + retval = -ENOMEM;
> + printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
> + goto out;
> + }
> + }
> +out:
> + return retval;
> +}
Do we have to create all four slabs up-front? Perhaps we can defer that
until mount-time, after we have determined the filesystem's block size.
That way, most people's machines will only ever create a single slab cache:
jbd_4k.
> +static int jbd_find_slab_index(size_t size)
> +{
> + int idx = 0;
> +
> + switch (size) {
> + case 1024: idx = 0;
> + break;
> + case 2048: idx = 1;
> + break;
> + case 4096: idx = 2;
> + break;
> + case 8192: idx = 3;
> + break;
> + default: printk("JBD unknown slab size %ld\n", size);
> + BUG();
> + }
> + return idx;
> +}
I'd suggest this function be changed to directly return a kmem_cache_t *.
> +void * jbd_slab_alloc(size_t size, gfp_t flags)
> +{
> + int idx;
> +
> + idx = jbd_find_slab_index(size);
> + return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
> +}
> +
> +void jbd_slab_free(void *ptr, size_t size)
> +{
> + int idx;
> +
> + idx = jbd_find_slab_index(size);
> + kmem_cache_free(jbd_slab[idx], ptr);
> +}
Then these become simpler.
On Wed, 2006-08-23 at 16:08 -0700, Badari Pulavarty wrote:
> Hi,
>
> Here is the fix to "bh: Ensure bh fits within a page" problem
> caused by JBD.
>
> BTW, I realized that this problem can happen only with 1k, 2k
> filesystems - as 4k, 8k allocations disable slab debug
> automatically. But for completeness, I created slabs for those
> also.
With a larger base page size, you could run into the same problems for
4K and 8K allocations, so it's a good thing to do them all.
> What do you think ? I ran basic tests and things are fine.
Looks sane to me.
Shaggy
--
David Kleikamp
IBM Linux Technology Center
On Wed, 2006-08-23 at 16:34 -0700, Andrew Morton wrote:
> On Wed, 23 Aug 2006 16:08:15 -0700
> Badari Pulavarty <[email protected]> wrote:
> > /*
> > + * jbd slab management: create 1k, 2k, 4k, 8k slabs and allocate
> > + * frozen and commit buffers from these slabs.
> > + *
> > + * Reason for doing this is to avoid, SLAB_DEBUG - since it could
> > + * cause bh to cross page boundary.
> > + */
> > +
> > +static kmem_cache_t *jbd_slab[4];
> > +static const char *jbd_slab_names[4] = {
> > + "jbd_1k",
> > + "jbd_2k",
> > + "jbd_4k",
> > + "jbd_8k",
> > +};
> > +
> > +static void journal_destroy_jbd_slabs(void)
> > +{
> > + int i;
> > +
> > + for (i=0; i<4; i++) {
> > + if (jbd_slab[i])
> > + kmem_cache_destroy(jbd_slab[i]);
> > + jbd_slab[i] = NULL;
> > + }
> > +}
> > +
> > +static int journal_init_jbd_slabs(void)
> > +{
> > + int i = 0;
> > + int retval = 0;
> > +
> > + for (i=0; i<4; i++) {
> > + unsigned long slab_size = 1024 << i;
> > + jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
> > + slab_size, slab_size,
> > + 0, NULL, NULL);
>
> OK, passing align=slab_size fixes the bug.
The comments above don't mention the alignment of the slabs, so a
comment here may help explain that the alignment is the key to avoiding
the page-straddling. Or you could elaborate above.
Shaggy
--
David Kleikamp
IBM Linux Technology Center
Dave Kleikamp wrote:
> On Wed, 2006-08-23 at 16:08 -0700, Badari Pulavarty wrote:
>
>> Hi,
>>
>> Here is the fix to "bh: Ensure bh fits within a page" problem
>> caused by JBD.
>>
>> BTW, I realized that this problem can happen only with 1k, 2k
>> filesystems - as 4k, 8k allocations disable slab debug
>> automatically. But for completeness, I created slabs for those
>> also.
>>
>
> With a larger base page size, you could run into the same problems for
> 4K and 8K allocations, so it's a good thing to do them all.
>
>
Yes, with bigger base page size - this can happen for 4k, 8k also.
And also, I am re-doing the patch to address Andrew's comments - I will
send out latest
in few hours (currently testing).
Thanks,
Badari
On Wed, Aug 23, 2006 at 04:08:15PM -0700, Badari Pulavarty wrote:
> Hi,
>
> Here is the fix to "bh: Ensure bh fits within a page" problem
> caused by JBD.
>
> BTW, I realized that this problem can happen only with 1k, 2k
> filesystems - as 4k, 8k allocations disable slab debug
> automatically. But for completeness, I created slabs for those
> also.
>
> What do you think ? I ran basic tests and things are fine.
Why can't you just use alloc_page? I bet the whole slab overhead
eats more memory than what's wasted when using alloc_pages. Especially
as the typical usecase is a 4k blocks filesystem with 4k pagesize
where the overhead of alloc_page is non-existant.
Christoph Hellwig wrote:
> On Wed, Aug 23, 2006 at 04:08:15PM -0700, Badari Pulavarty wrote:
>
>> Hi,
>>
>> Here is the fix to "bh: Ensure bh fits within a page" problem
>> caused by JBD.
>>
>> BTW, I realized that this problem can happen only with 1k, 2k
>> filesystems - as 4k, 8k allocations disable slab debug
>> automatically. But for completeness, I created slabs for those
>> also.
>>
>> What do you think ? I ran basic tests and things are fine.
>>
>
> Why can't you just use alloc_page? I bet the whole slab overhead
> eats more memory than what's wasted when using alloc_pages. Especially
> as the typical usecase is a 4k blocks filesystem with 4k pagesize
> where the overhead of alloc_page is non-existant.
>
Yes. That was what proposed earlier. But for 1k, 2k allocations we end
up wasting whole page.
Isn't it ? Thats why I created right sized slabs and disable slab-debug.
I guess, I can do this
only for 1k, 2k filesystems and directly use alloc_page() for 4k and 8k
- but that would make
code ugly and also it doesn't handle cases for bigger base pagesize
systems (64k power).
Thanks,
Badari
Andrew,
Here is the latest patch. Unfortunately, its not surviving my stress
tests on 1k filesystem. I keep running into
fs/buffer.c: 2791 assert in submit_bh()
BUG_ON(!buffer_mapped(bh));
I haven't touched "bh" itself. So, I am not sure whats happening
here. I am trying to reproduce it on mainline 2.6.18-rc4 (seen
it once so far - but not consistently).
Changes since the last patch:
- create appropriate slabs only when we mount the filesystem with
that blocksize.
- simplify the find slab idx process and get it by shifting size.
Thanks,
Badari
JBD currently allocates commit and frozen buffers from slabs.
With CONFIG_SLAB_DEBUG, its possible for an allocation to
cross the page boundary causing IO problems.
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=200127
So, instead of allocating these from regular slabs - manage
allocation from its own slabs and disable slab debug for these slabs.
Signed-off-by: Badari Pulavarty <[email protected]>
---
fs/jbd/commit.c | 6 +--
fs/jbd/journal.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++---
fs/jbd/transaction.c | 9 ++---
include/linux/jbd.h | 3 +
4 files changed, 93 insertions(+), 11 deletions(-)
Index: linux-2.6.18-rc4/fs/jbd/journal.c
===================================================================
--- linux-2.6.18-rc4.orig/fs/jbd/journal.c 2006-08-24 13:23:28.000000000 -0700
+++ linux-2.6.18-rc4/fs/jbd/journal.c 2006-08-24 16:19:27.000000000 -0700
@@ -84,6 +84,7 @@
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
static void __journal_abort_soft (journal_t *journal, int errno);
+static int journal_create_jbd_slab(size_t slab_size);
/*
* Helper function used to manage commit timeouts
@@ -328,10 +329,10 @@
char *tmp;
jbd_unlock_bh_state(bh_in);
- tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS);
+ tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
jbd_lock_bh_state(bh_in);
if (jh_in->b_frozen_data) {
- kfree(tmp);
+ jbd_slab_free(tmp, bh_in->b_size);
goto repeat;
}
@@ -1090,6 +1091,13 @@
}
}
+ /*
+ * Make sure to create a slab for this blocksize
+ */
+ err = journal_create_jbd_slab(cpu_to_be32(journal->j_superblock->s_blocksize));
+ if (err)
+ return err;
+
/* Let the recovery code check whether it needs to recover any
* data from the journal. */
if (journal_recover(journal))
@@ -1612,6 +1620,76 @@
}
/*
+ * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
+ * and allocate frozen and commit buffers from these slabs.
+ *
+ * Reason for doing this is to avoid, SLAB_DEBUG - since it could
+ * cause bh to cross page boundary.
+ */
+
+#define JBD_MAX_SLABS 5
+#define JBD_SLAB_INDEX(size) (size >> 11)
+
+static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static const char *jbd_slab_names[JBD_MAX_SLABS] = {
+ "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
+};
+
+static void journal_destroy_jbd_slabs(void)
+{
+ int i;
+
+ for (i=0; i<JBD_MAX_SLABS; i++) {
+ if (jbd_slab[i])
+ kmem_cache_destroy(jbd_slab[i]);
+ jbd_slab[i] = NULL;
+ }
+}
+
+static int journal_create_jbd_slab(size_t slab_size)
+{
+ int i = JBD_SLAB_INDEX(slab_size);
+
+ BUG_ON(i >= JBD_MAX_SLABS);
+ /*
+ * Check if we already have a slab created for this size
+ */
+ if (jbd_slab[i])
+ return 0;
+
+ /*
+ * Create a slab and force alignment to be same as slabsize -
+ * this will make sure that allocations won't cross the page
+ * boundary.
+ */
+ jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
+ slab_size, slab_size, 0, NULL, NULL);
+ if (!jbd_slab[i]) {
+ printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void * jbd_slab_alloc(size_t size, gfp_t flags)
+{
+ int idx;
+
+ idx = JBD_SLAB_INDEX(size);
+ BUG_ON(jbd_slab[idx] == NULL);
+ return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
+}
+
+void jbd_slab_free(void *ptr, size_t size)
+{
+ int idx;
+
+ idx = JBD_SLAB_INDEX(size);
+ BUG_ON(jbd_slab[idx] == NULL);
+ kmem_cache_free(jbd_slab[idx], ptr);
+}
+
+/*
* Journal_head storage management
*/
static kmem_cache_t *journal_head_cache;
@@ -1799,13 +1877,13 @@
printk(KERN_WARNING "%s: freeing "
"b_frozen_data\n",
__FUNCTION__);
- kfree(jh->b_frozen_data);
+ jbd_slab_free(jh->b_frozen_data, bh->b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing "
"b_committed_data\n",
__FUNCTION__);
- kfree(jh->b_committed_data);
+ jbd_slab_free(jh->b_committed_data, bh->b_size);
}
bh->b_private = NULL;
jh->b_bh = NULL; /* debug, really */
@@ -1961,6 +2039,7 @@
journal_destroy_revoke_caches();
journal_destroy_journal_head_cache();
journal_destroy_handle_cache();
+ journal_destroy_jbd_slabs();
}
static int __init journal_init(void)
Index: linux-2.6.18-rc4/fs/jbd/transaction.c
===================================================================
--- linux-2.6.18-rc4.orig/fs/jbd/transaction.c 2006-08-24 13:23:28.000000000 -0700
+++ linux-2.6.18-rc4/fs/jbd/transaction.c 2006-08-24 13:23:55.000000000 -0700
@@ -666,8 +666,9 @@
if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer");
jbd_unlock_bh_state(bh);
- frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
- GFP_NOFS);
+ frozen_buffer =
+ jbd_slab_alloc(jh2bh(jh)->b_size,
+ GFP_NOFS);
if (!frozen_buffer) {
printk(KERN_EMERG
"%s: OOM for frozen_buffer\n",
@@ -879,7 +880,7 @@
repeat:
if (!jh->b_committed_data) {
- committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS);
+ committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
if (!committed_data) {
printk(KERN_EMERG "%s: No memory for committed data\n",
__FUNCTION__);
@@ -906,7 +907,7 @@
out:
journal_put_journal_head(jh);
if (unlikely(committed_data))
- kfree(committed_data);
+ jbd_slab_free(committed_data, bh->b_size);
return err;
}
Index: linux-2.6.18-rc4/fs/jbd/commit.c
===================================================================
--- linux-2.6.18-rc4.orig/fs/jbd/commit.c 2006-08-24 13:23:28.000000000 -0700
+++ linux-2.6.18-rc4/fs/jbd/commit.c 2006-08-24 13:23:55.000000000 -0700
@@ -261,7 +261,7 @@
struct buffer_head *bh = jh2bh(jh);
jbd_lock_bh_state(bh);
- kfree(jh->b_committed_data);
+ jbd_slab_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
jbd_unlock_bh_state(bh);
}
@@ -745,14 +745,14 @@
* Otherwise, we can just throw away the frozen data now.
*/
if (jh->b_committed_data) {
- kfree(jh->b_committed_data);
+ jbd_slab_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
if (jh->b_frozen_data) {
jh->b_committed_data = jh->b_frozen_data;
jh->b_frozen_data = NULL;
}
} else if (jh->b_frozen_data) {
- kfree(jh->b_frozen_data);
+ jbd_slab_free(jh->b_frozen_data, bh->b_size);
jh->b_frozen_data = NULL;
}
Index: linux-2.6.18-rc4/include/linux/jbd.h
===================================================================
--- linux-2.6.18-rc4.orig/include/linux/jbd.h 2006-08-24 13:23:28.000000000 -0700
+++ linux-2.6.18-rc4/include/linux/jbd.h 2006-08-24 13:23:55.000000000 -0700
@@ -72,6 +72,9 @@
#endif
extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
+extern void * jbd_slab_alloc(size_t size, gfp_t flags);
+extern void jbd_slab_free(void *ptr, size_t size);
+
#define jbd_kmalloc(size, flags) \
__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
#define jbd_rep_kmalloc(size, flags) \
On Thu, Aug 24, 2006 at 12:11:25PM -0700, Badari Pulavarty wrote:
> > Why can't you just use alloc_page? I bet the whole slab overhead
> > eats more memory than what's wasted when using alloc_pages. Especially
> > as the typical usecase is a 4k blocks filesystem with 4k pagesize
> > where the overhead of alloc_page is non-existant.
>
> Yes. That was what proposed earlier. But for 1k, 2k allocations we
> end up wasting whole page. Isn't it ? Thats why I created right
> sized slabs and disable slab-debug. I guess, I can do this only for
> 1k, 2k filesystems and directly use alloc_page() for 4k and 8k - but
> that would make code ugly and also it doesn't handle cases for
> bigger base pagesize systems (64k power).
Is there some way we can cleanly have a shortcut case where we use
alloc_page() if fs_blocksize == PAGE_SIZE? The efficiency gains for
what will be the common case on many architectures will probably make
this worthwhile....
- Ted