2008-10-14 06:33:41

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Patches for patchqueue

Hi Ted,

Below patches are updates for patches in the patch queue.
0001-ext4-Use-an-rbtree-for-tracking-blocks-freed-during.patch
Updated with the fix from you. I also removed the definition of
EXT4_BB_MAX_BLOCKS

0004-ext4-Use-tag-dirty-lookup-during-mpage_da_submit_io.patch
0005-vfs-Remove-the-range_cont-writeback-mode.patch
0006-vfs-Add-no_nrwrite_update-and-no_index_update-write.patch
0007-ext4-Fix-file-fragmentation-during-large-file-write.patch

Split the single patch fix_file_fragmentation_during_large_file_write
into 4 patches. The VFS changes are cc to [email protected]

0010-ext4-Free-ext4_prealloc_space-using-kmem_cache_free.patch

-aneesh






2008-10-14 06:33:43

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH -V5] ext4: Use an rbtree for tracking blocks freed during transaction.

With this patch we track the block freed during a transaction using
rb tree. We also make sure contiguous blocks freed are collected
in one rb node.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
Signed-off-by: Theodore Ts'o <[email protected]>
---
fs/ext4/mballoc.c | 184 +++++++++++++++++++++++++++++++++-------------------
fs/ext4/mballoc.h | 26 +++++---
2 files changed, 133 insertions(+), 77 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b580714..7023228 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
}

INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
+ meta_group_info[i]->bb_free_root.rb_node = NULL;;

#ifdef DOUBLE_CHECK
{
@@ -2647,13 +2648,11 @@ int ext4_mb_release(struct super_block *sb)
static noinline_for_stack void
ext4_mb_free_committed_blocks(struct super_block *sb)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- int err;
- int i;
- int count = 0;
- int count2 = 0;
- struct ext4_free_metadata *md;
struct ext4_buddy e4b;
+ struct ext4_group_info *db;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int err, count = 0, count2 = 0;
+ struct ext4_free_data *entry;

if (list_empty(&sbi->s_committed_transaction))
return;
@@ -2661,44 +2660,46 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
/* there is committed blocks to be freed yet */
do {
/* get next array of blocks */
- md = NULL;
+ entry = NULL;
spin_lock(&sbi->s_md_lock);
if (!list_empty(&sbi->s_committed_transaction)) {
- md = list_entry(sbi->s_committed_transaction.next,
- struct ext4_free_metadata, list);
- list_del(&md->list);
+ entry = list_entry(sbi->s_committed_transaction.next,
+ struct ext4_free_data, list);
+ list_del(&entry->list);
}
spin_unlock(&sbi->s_md_lock);

- if (md == NULL)
+ if (entry == NULL)
break;

mb_debug("gonna free %u blocks in group %lu (0x%p):",
- md->num, md->group, md);
+ entry->count, entry->group, entry);

- err = ext4_mb_load_buddy(sb, md->group, &e4b);
+ err = ext4_mb_load_buddy(sb, entry->group, &e4b);
/* we expect to find existing buddy because it's pinned */
BUG_ON(err != 0);

+ db = e4b.bd_info;
/* there are blocks to put in buddy to make them really free */
- count += md->num;
+ count += entry->count;
count2++;
- ext4_lock_group(sb, md->group);
- for (i = 0; i < md->num; i++) {
- mb_debug(" %u", md->blocks[i]);
- mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
+ ext4_lock_group(sb, entry->group);
+ /* Take it out of per group rb tree */
+ rb_erase(&entry->node, &(db->bb_free_root));
+ mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+
+ if (!db->bb_free_root.rb_node) {
+ /* No more items in the per group rb tree
+ * balance refcounts from ext4_mb_free_metadata()
+ */
+ page_cache_release(e4b.bd_buddy_page);
+ page_cache_release(e4b.bd_bitmap_page);
}
- mb_debug("\n");
- ext4_unlock_group(sb, md->group);
-
- /* balance refcounts from ext4_mb_free_metadata() */
- page_cache_release(e4b.bd_buddy_page);
- page_cache_release(e4b.bd_bitmap_page);
+ ext4_unlock_group(sb, entry->group);

- kfree(md);
+ kmem_cache_free(ext4_free_ext_cachep, entry);
ext4_mb_release_desc(&e4b);
-
- } while (md);
+ } while (1);

mb_debug("freed %u blocks in %u structures\n", count, count2);
}
@@ -2771,6 +2772,16 @@ int __init init_ext4_mballoc(void)
kmem_cache_destroy(ext4_pspace_cachep);
return -ENOMEM;
}
+
+ ext4_free_ext_cachep =
+ kmem_cache_create("ext4_free_block_extents",
+ sizeof(struct ext4_free_data),
+ 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ if (ext4_free_ext_cachep == NULL) {
+ kmem_cache_destroy(ext4_pspace_cachep);
+ kmem_cache_destroy(ext4_ac_cachep);
+ return -ENOMEM;
+ }
return 0;
}

@@ -2779,6 +2790,7 @@ void exit_ext4_mballoc(void)
/* XXX: synchronize_rcu(); */
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
+ kmem_cache_destroy(ext4_free_ext_cachep);
}


@@ -4415,6 +4427,21 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
ext4_mb_free_committed_blocks(sb);
}

+/*
+ * We can merge two free data extents only if the physical blocks
+ * are contiguous, AND the extents were freed by the same transaction,
+ * AND the blocks are associated with the same group.
+ */
+static int can_merge(struct ext4_free_data *entry1,
+ struct ext4_free_data *entry2)
+{
+ if (entry1->t_tid == entry2->t_tid &&
+ (entry1->group == entry2->group) &&
+ (entry1->start_blk + entry1->count) == entry2->start_blk)
+ return 1;
+ return 0;
+}
+
static noinline_for_stack int
ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
ext4_group_t group, ext4_grpblk_t block, int count)
@@ -4422,57 +4449,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
struct ext4_group_info *db = e4b->bd_info;
struct super_block *sb = e4b->bd_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_free_metadata *md;
- int i;
+ struct ext4_free_data *entry, *new_entry;
+ struct rb_node **n = &db->bb_free_root.rb_node, *node;
+ struct rb_node *parent = NULL, *new_node;
+

BUG_ON(e4b->bd_bitmap_page == NULL);
BUG_ON(e4b->bd_buddy_page == NULL);

+ new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+ new_entry->start_blk = block;
+ new_entry->group = group;
+ new_entry->count = count;
+ new_entry->t_tid = handle->h_transaction->t_tid;
+ new_node = &new_entry->node;
+
ext4_lock_group(sb, group);
- for (i = 0; i < count; i++) {
- md = db->bb_md_cur;
- if (md && db->bb_tid != handle->h_transaction->t_tid) {
- db->bb_md_cur = NULL;
- md = NULL;
+ if (!*n) {
+ /* first free block exent. We need to
+ protect buddy cache from being freed,
+ * otherwise we'll refresh it from
+ * on-disk bitmap and lose not-yet-available
+ * blocks */
+ page_cache_get(e4b->bd_buddy_page);
+ page_cache_get(e4b->bd_bitmap_page);
+ }
+ while (*n) {
+ parent = *n;
+ entry = rb_entry(parent, struct ext4_free_data, node);
+ if (block < entry->start_blk)
+ n = &(*n)->rb_left;
+ else if (block >= (entry->start_blk + entry->count))
+ n = &(*n)->rb_right;
+ else {
+ ext4_error(sb, __func__,
+ "Double free of blocks %d (%d %d)\n",
+ block, entry->start_blk, entry->count);
+ return 0;
}
+ }

- if (md == NULL) {
- ext4_unlock_group(sb, group);
- md = kmalloc(sizeof(*md), GFP_NOFS);
- if (md == NULL)
- return -ENOMEM;
- md->num = 0;
- md->group = group;
-
- ext4_lock_group(sb, group);
- if (db->bb_md_cur == NULL) {
- spin_lock(&sbi->s_md_lock);
- list_add(&md->list, &sbi->s_active_transaction);
- spin_unlock(&sbi->s_md_lock);
- /* protect buddy cache from being freed,
- * otherwise we'll refresh it from
- * on-disk bitmap and lose not-yet-available
- * blocks */
- page_cache_get(e4b->bd_buddy_page);
- page_cache_get(e4b->bd_bitmap_page);
- db->bb_md_cur = md;
- db->bb_tid = handle->h_transaction->t_tid;
- mb_debug("new md 0x%p for group %lu\n",
- md, md->group);
- } else {
- kfree(md);
- md = db->bb_md_cur;
- }
+ rb_link_node(new_node, parent, n);
+ rb_insert_color(new_node, &db->bb_free_root);
+
+ /* Now try to see the extent can be merged to left and right */
+ node = rb_prev(new_node);
+ if (node) {
+ entry = rb_entry(node, struct ext4_free_data, node);
+ if (can_merge(entry, new_entry)) {
+ new_entry->start_blk = entry->start_blk;
+ new_entry->count += entry->count;
+ rb_erase(node, &(db->bb_free_root));
+ spin_lock(&sbi->s_md_lock);
+ list_del(&entry->list);
+ spin_unlock(&sbi->s_md_lock);
+ kmem_cache_free(ext4_free_ext_cachep, entry);
}
+ }

- BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS);
- md->blocks[md->num] = block + i;
- md->num++;
- if (md->num == EXT4_BB_MAX_BLOCKS) {
- /* no more space, put full container on a sb's list */
- db->bb_md_cur = NULL;
+ node = rb_next(new_node);
+ if (node) {
+ entry = rb_entry(node, struct ext4_free_data, node);
+ if (can_merge(new_entry, entry)) {
+ new_entry->count += entry->count;
+ rb_erase(node, &(db->bb_free_root));
+ spin_lock(&sbi->s_md_lock);
+ list_del(&entry->list);
+ spin_unlock(&sbi->s_md_lock);
+ kmem_cache_free(ext4_free_ext_cachep, entry);
}
}
+ /* Add the extent to active_transaction list */
+ spin_lock(&sbi->s_md_lock);
+ list_add(&new_entry->list, &sbi->s_active_transaction);
+ spin_unlock(&sbi->s_md_lock);
ext4_unlock_group(sb, group);
return 0;
}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index b3b4828..9e815c4 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -98,23 +98,29 @@

static struct kmem_cache *ext4_pspace_cachep;
static struct kmem_cache *ext4_ac_cachep;
+static struct kmem_cache *ext4_free_ext_cachep;

-#ifdef EXT4_BB_MAX_BLOCKS
-#undef EXT4_BB_MAX_BLOCKS
-#endif
-#define EXT4_BB_MAX_BLOCKS 30
+struct ext4_free_data {
+ /* this links the free block information from group_info */
+ struct rb_node node;

-struct ext4_free_metadata {
- ext4_group_t group;
- unsigned short num;
- ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
+ /* this links the free block information from ext4_sb_info */
struct list_head list;
+
+ /* group which free block extent belongs */
+ ext4_group_t group;
+
+ /* free block extent */
+ ext4_grpblk_t start_blk;
+ ext4_grpblk_t count;
+
+ /* transaction which freed this extent */
+ tid_t t_tid;
};

struct ext4_group_info {
unsigned long bb_state;
- unsigned long bb_tid;
- struct ext4_free_metadata *bb_md_cur;
+ struct rb_root bb_free_root;
unsigned short bb_first_free;
unsigned short bb_free;
unsigned short bb_fragments;
--
1.6.0.2.526.g5c283


2008-10-14 06:33:44

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH] ext4: Use tag dirty lookup during mpage_da_submit_io

This enables us to drop the range_cont writeback mode
use from ext4_da_writepages.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/inode.c | 30 +++++++++++++-----------------
1 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7c2820e..cba7960 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1656,17 +1656,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)

while (index <= end) {
/* XXX: optimize tail */
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+ /*
+ * We can use PAGECACHE_TAG_DIRTY lookup here because
+ * even though we have cleared the dirty flag on the page
+ * We still keep the page in the radix tree with tag
+ * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
+ * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
+ * which is called via the below writepage callback.
+ */
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY,
+ min(end - index,
+ (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];

- index = page->index;
- if (index > end)
- break;
- index++;
-
err = mapping->a_ops->writepage(page, mpd->wbc);
if (!err)
mpd->pages_written++;
@@ -2361,7 +2367,6 @@ static int ext4_da_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
handle_t *handle = NULL;
- loff_t range_start = 0;
struct mpage_da_data mpd;
struct inode *inode = mapping->host;
int needed_blocks, ret = 0, nr_to_writebump = 0;
@@ -2386,14 +2391,7 @@ static int ext4_da_writepages(struct address_space *mapping,
wbc->nr_to_write = sbi->s_mb_stream_request;
}

- if (!wbc->range_cyclic)
- /*
- * If range_cyclic is not set force range_cont
- * and save the old writeback_index
- */
- wbc->range_cont = 1;

- range_start = wbc->range_start;
pages_skipped = wbc->pages_skipped;

mpd.wbc = wbc;
@@ -2452,9 +2450,8 @@ static int ext4_da_writepages(struct address_space *mapping,
wbc->nr_to_write = to_write;
}

- if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
+ if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
/* We skipped pages in this loop */
- wbc->range_start = range_start;
wbc->nr_to_write = to_write +
wbc->pages_skipped - pages_skipped;
wbc->pages_skipped = pages_skipped;
@@ -2463,7 +2460,6 @@ static int ext4_da_writepages(struct address_space *mapping,

out_writepages:
wbc->nr_to_write = to_write - nr_to_writebump;
- wbc->range_start = range_start;
return ret;
}

--
1.6.0.2.526.g5c283


2008-10-14 06:33:25

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH] vfs: Remove the range_cont writeback mode.

Ext4 was the only user of range_cont writeback mode
and ext4 switched to a different method. So remove
the range_cont mode which is not used in the kernel.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
CC: [email protected]
---
include/linux/writeback.h | 1 -
mm/page-writeback.c | 2 --
2 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 12b15c5..bd91987 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -63,7 +63,6 @@ struct writeback_control {
unsigned for_writepages:1; /* This is a writepages() call */
unsigned range_cyclic:1; /* range_start is cyclic */
unsigned more_io:1; /* more io to be dispatched */
- unsigned range_cont:1;
};

/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 24de8b6..718efa6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -961,8 +961,6 @@ int write_cache_pages(struct address_space *mapping,
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = index;

- if (wbc->range_cont)
- wbc->range_start = index << PAGE_CACHE_SHIFT;
return ret;
}
EXPORT_SYMBOL(write_cache_pages);
--
1.6.0.2.526.g5c283


2008-10-14 06:33:26

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH] vfs: Add no_nrwrite_update and no_index_update writeback control flags

If no_nrwrite_update is set we don't update nr_to_write in
write_cache_pages. Similarly if no_index_update is we don't
update address space writeback_index. These changes enable a
file system to skip these updates in write_cache_pages and do
them in the writepages() callback. This patch will be followed
by an ext4 patch that make use of these new flags.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
CC: [email protected]
---
include/linux/writeback.h | 4 ++++
mm/page-writeback.c | 9 +++++++--
2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index bd91987..b04287e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -63,6 +63,10 @@ struct writeback_control {
unsigned for_writepages:1; /* This is a writepages() call */
unsigned range_cyclic:1; /* range_start is cyclic */
unsigned more_io:1; /* more io to be dispatched */
+
+ /* write_cache_pages() control */
+ unsigned no_nrwrite_update:1; /* don't update nr_to_write */
+ unsigned no_index_update:1; /* don't update writeback_index */
};

/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 718efa6..4f359f4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -876,6 +876,7 @@ int write_cache_pages(struct address_space *mapping,
pgoff_t end; /* Inclusive */
int scanned = 0;
int range_whole = 0;
+ long nr_to_write = wbc->nr_to_write;

if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
@@ -939,7 +940,7 @@ int write_cache_pages(struct address_space *mapping,
unlock_page(page);
ret = 0;
}
- if (ret || (--(wbc->nr_to_write) <= 0))
+ if (ret || (--nr_to_write <= 0))
done = 1;
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
@@ -958,8 +959,12 @@ int write_cache_pages(struct address_space *mapping,
index = 0;
goto retry;
}
- if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ if (!wbc->no_index_update &&
+ (wbc->range_cyclic || (range_whole && nr_to_write > 0))) {
mapping->writeback_index = index;
+ }
+ if (!wbc->no_nrwrite_update)
+ wbc->nr_to_write = nr_to_write;

return ret;
}
--
1.6.0.2.526.g5c283


2008-10-14 06:33:52

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH] ext4: Fix file fragmentation during large file write.

The range_cyclic writeback mode uses the address_space writeback_index
as the start index for writeback. With delayed allocation we were
updating writeback_index wrongly resulting in highly fragmented file.
Number of extents reduced from 4000 to 27 for a 3GB file with the below
patch.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
Signed-off-by: Theodore Ts'o <[email protected]>
---
fs/ext4/inode.c | 83 +++++++++++++++++++++++++++++++++----------------------
1 files changed, 50 insertions(+), 33 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cba7960..844c136 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
int ret = 0, err, nr_pages, i;
unsigned long index, end;
struct pagevec pvec;
+ long pages_skipped;

BUG_ON(mpd->next_page <= mpd->first_page);
pagevec_init(&pvec, 0);
@@ -1655,7 +1656,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
end = mpd->next_page - 1;

while (index <= end) {
- /* XXX: optimize tail */
/*
* We can use PAGECACHE_TAG_DIRTY lookup here because
* even though we have cleared the dirty flag on the page
@@ -1673,8 +1673,13 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];

+ pages_skipped = mpd->wbc->pages_skipped;
err = mapping->a_ops->writepage(page, mpd->wbc);
- if (!err)
+ if (!err && (pages_skipped == mpd->wbc->pages_skipped))
+ /*
+ * have successfully written the page
+ * without skipping the same
+ */
mpd->pages_written++;
/*
* In error case, we have to continue because
@@ -2110,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
struct writeback_control *wbc,
struct mpage_da_data *mpd)
{
- long to_write;
int ret;

if (!mpd->get_block)
@@ -2125,10 +2129,7 @@ static int mpage_da_writepages(struct address_space *mapping,
mpd->pages_written = 0;
mpd->retval = 0;

- to_write = wbc->nr_to_write;
-
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
-
/*
* Handle last extent of pages
*/
@@ -2137,7 +2138,7 @@ static int mpage_da_writepages(struct address_space *mapping,
mpage_da_submit_io(mpd);
}

- wbc->nr_to_write = to_write - mpd->pages_written;
+ wbc->nr_to_write -= mpd->pages_written;
return ret;
}

@@ -2366,11 +2367,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
static int ext4_da_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
+ pgoff_t index;
+ int range_whole = 0;
handle_t *handle = NULL;
+ long pages_written = 0;
struct mpage_da_data mpd;
struct inode *inode = mapping->host;
+ int no_nrwrite_update, no_index_update;
int needed_blocks, ret = 0, nr_to_writebump = 0;
- long to_write, pages_skipped = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);

/*
@@ -2390,16 +2394,27 @@ static int ext4_da_writepages(struct address_space *mapping,
nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
wbc->nr_to_write = sbi->s_mb_stream_request;
}
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ range_whole = 1;

-
- pages_skipped = wbc->pages_skipped;
+ if (wbc->range_cyclic)
+ index = mapping->writeback_index;
+ else
+ index = wbc->range_start >> PAGE_CACHE_SHIFT;

mpd.wbc = wbc;
mpd.inode = mapping->host;

-restart_loop:
- to_write = wbc->nr_to_write;
- while (!ret && to_write > 0) {
+ /*
+ * we don't want write_cache_pages to update
+ * nr_to_write and writeback_index
+ */
+ no_nrwrite_update = wbc->no_nrwrite_update;
+ wbc->no_nrwrite_update = 1;
+ no_index_update = wbc->no_index_update;
+ wbc->no_index_update = 1;
+
+ while (!ret && wbc->nr_to_write > 0) {

/*
* we insert one extent at a time. So we need
@@ -2420,46 +2435,48 @@ static int ext4_da_writepages(struct address_space *mapping,
dump_stack();
goto out_writepages;
}
- to_write -= wbc->nr_to_write;
-
mpd.get_block = ext4_da_get_block_write;
ret = mpage_da_writepages(mapping, wbc, &mpd);

ext4_journal_stop(handle);

- if (mpd.retval == -ENOSPC)
+ if (mpd.retval == -ENOSPC) {
+ /* commit the transaction which would
+ * free blocks released in the transaction
+ * and try again
+ */
jbd2_journal_force_commit_nested(sbi->s_journal);
-
- /* reset the retry count */
- if (ret == MPAGE_DA_EXTENT_TAIL) {
+ ret = 0;
+ } else if (ret == MPAGE_DA_EXTENT_TAIL) {
/*
* got one extent now try with
* rest of the pages
*/
- to_write += wbc->nr_to_write;
+ pages_written += mpd.pages_written;
ret = 0;
- } else if (wbc->nr_to_write) {
+ } else if (wbc->nr_to_write)
/*
* There is no more writeout needed
* or we requested for a noblocking writeout
* and we found the device congested
*/
- to_write += wbc->nr_to_write;
break;
- }
- wbc->nr_to_write = to_write;
- }
-
- if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
- /* We skipped pages in this loop */
- wbc->nr_to_write = to_write +
- wbc->pages_skipped - pages_skipped;
- wbc->pages_skipped = pages_skipped;
- goto restart_loop;
}
+ /* Update index */
+ index += pages_written;
+ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ /*
+ * set the writeback_index so that range_cyclic
+ * mode will write it back later
+ */
+ mapping->writeback_index = index;

out_writepages:
- wbc->nr_to_write = to_write - nr_to_writebump;
+ if (!no_nrwrite_update)
+ wbc->no_nrwrite_update = 0;
+ if (!no_index_update)
+ wbc->no_index_update = 0;
+ wbc->nr_to_write -= nr_to_writebump;
return ret;
}

--
1.6.0.2.526.g5c283


2008-10-14 06:33:52

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH] ext4: Free ext4_prealloc_space using kmem_cache_free

We should use kmem_cache_free to free memory allocated
via kmem_cache_alloc

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/mballoc.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7023228..c22b904 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2569,7 +2569,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
list_del(&pa->pa_group_list);
count++;
- kfree(pa);
+ kmem_cache_free(ext4_pspace_cachep, pa);
}
if (count)
mb_debug("mballoc: %u PAs left\n", count);
--
1.6.0.2.526.g5c283


2008-10-14 13:22:57

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH] vfs: Add no_nrwrite_update and no_index_update writeback control flags

On Tue, Oct 14, 2008 at 12:03:26PM +0530, Aneesh Kumar K.V wrote:
> If no_nrwrite_update is set we don't update nr_to_write in
> write_cache_pages. Similarly if no_index_update is we don't
> update address space writeback_index. These changes enable a
> file system to skip these updates in write_cache_pages and do
> them in the writepages() callback. This patch will be followed
> by an ext4 patch that make use of these new flags.

I looked over this and discussed it a little with Ted and it looks good
to me.

> + /* write_cache_pages() control */
> + unsigned no_nrwrite_update:1; /* don't update nr_to_write */
> + unsigned no_index_update:1; /* don't update writeback_index */

But thinking about it I suspect we don't want to different flags for
this, but just one. This is done because the writepage callback may
write back more pages than the one requested, nad becase of that both
indices don't need to be updated. Adding this rational to the flag
description might also be rally helpful.

> + if (!wbc->no_index_update &&
> + (wbc->range_cyclic || (range_whole && nr_to_write > 0))) {

Might be a little too nitpicky, but can you follow normal indentation?
Shouldn't matter anyway if the two flags are merged into one and this is
split into two nested if conditions.