From: Alex Tomas Subject: [RFC] ext4-locality-groups patch Date: Fri, 01 Dec 2006 02:56:02 +0300 Message-ID: Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Return-path: Received: from [80.71.248.82] ([80.71.248.82]:15568 "EHLO gw.home.net") by vger.kernel.org with ESMTP id S1031639AbWLAASR (ORCPT ); Thu, 30 Nov 2006 19:18:17 -0500 Received: from bzzz.home.net (gw.home.net [127.0.0.1]) by gw.home.net (8.13.7/8.13.4) with ESMTP id kB11JUW6005501 for ; Fri, 1 Dec 2006 04:19:30 +0300 To: linux-ext4@vger.kernel.org Sender: linux-ext4-owner@vger.kernel.org List-Id: linux-ext4.vger.kernel.org this patch implements locality groups idea in a very simplified form. the policy is silly and ->sync_inodes() not very well tested on different workloads. thanks, Alex Index: linux-2.6.19-rc6/include/linux/ext4_fs_i.h =================================================================== --- linux-2.6.19-rc6.orig/include/linux/ext4_fs_i.h 2006-11-16 07:03:40.000000000 +0300 +++ linux-2.6.19-rc6/include/linux/ext4_fs_i.h 2006-12-01 02:51:13.000000000 +0300 @@ -150,6 +150,8 @@ struct ext4_inode_info { */ struct mutex truncate_mutex; struct inode vfs_inode; + struct list_head i_lg_list; + struct ext4_locality_group *i_locality_group; unsigned long i_ext_generation; struct ext4_ext_cache i_cached_extent; Index: linux-2.6.19-rc6/include/linux/ext4_fs.h =================================================================== --- linux-2.6.19-rc6.orig/include/linux/ext4_fs.h 2006-11-16 07:03:40.000000000 +0300 +++ linux-2.6.19-rc6/include/linux/ext4_fs.h 2006-12-01 02:51:13.000000000 +0300 @@ -727,6 +727,34 @@ struct dx_hash_info /* + * Locality group: + * we try to group all related changes together + * so that writeback can flush/allocate them together as well + */ +struct ext4_locality_group { + int lg_parent; + int lg_pgid; + int lg_sid; + struct list_head lg_hash; + spinlock_t lg_lock; + int lg_deleted; + atomic_t lg_count; + atomic_t lg_inodes_nr; + + /* */ + unsigned long lg_flags; + struct list_head lg_list; + + /* inode lists for the group */ + struct list_head lg_inodes; /* inodes in the group */ + struct list_head lg_dirty; /* dirty inodes from s_dirty */ + struct list_head lg_io; /* inodes scheduled for flush */ + + atomic_t lg_dirty_pages; /* pages to write */ + atomic_t lg_nonallocated;/* non-allocated pages */ +}; + +/* * Describe an inode's exact location on disk and in memory */ struct ext4_iloc @@ -784,6 +812,15 @@ void ext4_get_group_no_and_offset(struct # define ATTRIB_NORET __attribute__((noreturn)) # define NORET_AND noreturn, +/* lg.c */ +extern void ext4_lg_init(struct super_block *sb); +extern void ext4_lg_release(struct super_block *sb); +extern void ext4_lg_inode_leave_group(struct inode *inode); +extern void ext4_lg_page_enter_inode(struct inode *inode, struct page *page, int allocated); +extern void ext4_lg_page_leave_inode(struct inode *inode, struct page *page, int allocated); +extern void ext4_lg_sync_inodes(struct super_block *, struct writeback_control *); + + /* balloc.c */ extern unsigned int ext4_block_group(struct super_block *sb, ext4_fsblk_t blocknr); Index: linux-2.6.19-rc6/include/linux/ext4_fs_sb.h =================================================================== --- linux-2.6.19-rc6.orig/include/linux/ext4_fs_sb.h 2006-11-16 07:03:40.000000000 +0300 +++ linux-2.6.19-rc6/include/linux/ext4_fs_sb.h 2006-12-01 02:51:13.000000000 +0300 @@ -80,6 +80,11 @@ struct ext4_sb_info { int s_jquota_fmt; /* Format of quota to use */ #endif + struct list_head s_locality_dirty; + struct list_head s_locality_io; + struct list_head s_locality_groups; + spinlock_t s_locality_lock; + #ifdef EXTENTS_STATS /* ext4 extents stats */ unsigned long s_ext_min; Index: linux-2.6.19-rc6/fs/ext4/lg.c =================================================================== --- linux-2.6.19-rc6.orig/fs/ext4/lg.c 2006-11-30 15:32:10.563465031 +0300 +++ linux-2.6.19-rc6/fs/ext4/lg.c 2006-12-01 02:54:29.000000000 +0300 @@ -0,0 +1,533 @@ +/* + * Copyright (c) 2006, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + +/* + * locality groups + * + */ + +/* + * TODO: + * - too many of tricks + * - mmap'ed files support (we need to link them to some group) + * - too silly grouping policy + * - free non-used groups after some timeout + * - anonymous group for non-regular inodes + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef TestClearPageChecked +#define TestClearPageChecked(page) test_and_clear_bit(PG_checked, &(page)->flags) +#endif +#ifndef TestSetPageChecked +#define TestSetPageChecked(page) test_and_set_bit(PG_checked, &(page)->flags) +#endif + + +extern struct super_block *blockdev_superblock; +static inline int sb_is_blkdev_sb(struct super_block *sb) +{ + return sb == blockdev_superblock; +} + +extern int __writeback_single_inode(struct inode *, struct writeback_control *); + +struct ext4_locality_group *ext4_lg_find_group(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_locality_group *lg; + struct list_head *cur; + + rcu_read_lock(); + list_for_each_rcu(cur, &sbi->s_locality_groups) { + lg = list_entry(cur, struct ext4_locality_group, lg_hash); + if (lg->lg_pgid == current->signal->pgrp) { + spin_lock(&lg->lg_lock); + if (lg->lg_deleted == 0) { + atomic_inc(&lg->lg_count); + spin_unlock(&lg->lg_lock); + break; + } + spin_unlock(&lg->lg_lock); + } + lg = NULL; + } + rcu_read_unlock(); + return lg; +} + +void ext4_lg_put_group(struct ext4_locality_group *lg) +{ + atomic_dec(&lg->lg_count); +} + +struct ext4_locality_group *ext4_lg_find_or_allocate_group(struct inode *inode) +{ + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_locality_group *lg, *olg; + + lg = ext4_lg_find_group(inode->i_sb); + if (lg == NULL) { + lg = kmalloc(sizeof(struct ext4_locality_group), GFP_NOFS); + if (lg == NULL) + return NULL; + lg->lg_pgid = current->signal->pgrp; + lg->lg_sid = current->signal->session; + spin_lock_init(&lg->lg_lock); + lg->lg_deleted = 0; + lg->lg_flags = 0; + atomic_set(&lg->lg_count, 1); + atomic_set(&lg->lg_inodes_nr, 0); + INIT_LIST_HEAD(&lg->lg_list); + INIT_LIST_HEAD(&lg->lg_inodes); + INIT_LIST_HEAD(&lg->lg_dirty); + INIT_LIST_HEAD(&lg->lg_io); + atomic_set(&lg->lg_dirty_pages, 0); + atomic_set(&lg->lg_nonallocated, 0); + + spin_lock(&sbi->s_locality_lock); + olg = ext4_lg_find_group(inode->i_sb); + if (olg == NULL) { + list_add(&lg->lg_hash, &sbi->s_locality_groups); + } else { + kfree(lg); + lg = olg; + } + spin_unlock(&sbi->s_locality_lock); + } + + /* + * XXX locking here? + */ + spin_lock(&inode_lock); + if (EXT4_I(inode)->i_locality_group == NULL) { + EXT4_I(inode)->i_locality_group = lg; + list_add(&EXT4_I(inode)->i_lg_list, &lg->lg_inodes); + atomic_inc(&lg->lg_inodes_nr); + } else { + printk("somebody has already set lg %p (our %p) to inode %lu(%p)\n", + EXT4_I(inode)->i_locality_group, lg, inode->i_ino, inode); + ext4_lg_put_group(lg); + lg = EXT4_I(inode)->i_locality_group; + } + spin_unlock(&inode_lock); + + return lg; +} + +/* + * every dirty page should be counted + */ +void ext4_lg_page_enter_inode(struct inode *inode, + struct page *page, int allocated) +{ + struct ext4_locality_group *lg; + + lg = EXT4_I(inode)->i_locality_group; + if (lg == NULL) { + lg = ext4_lg_find_or_allocate_group(inode); + if (lg == NULL) + return; + } + + if (!TestSetPageChecked(page)) { + atomic_inc(&lg->lg_dirty_pages); + if (!allocated) + atomic_inc(&lg->lg_nonallocated); + } +} + + +/* + * + */ +void ext4_lg_page_leave_inode(struct inode *inode, + struct page *page, int allocated) +{ + struct ext4_locality_group *lg; + + lg = EXT4_I(inode)->i_locality_group; + if (lg == NULL) { + if (S_ISREG(inode->i_mode)) + printk("regular file %lu/%u with no locality group?!\n", + inode->i_ino, inode->i_generation); + return; + } + + if (!TestClearPageChecked(page)) + return; + + atomic_dec(&lg->lg_dirty_pages); + if (!allocated) + atomic_dec(&lg->lg_nonallocated); +} + +/* + * Inode leave group + */ +void ext4_lg_inode_leave_group(struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_locality_group *lg; + + if (inode->i_nlink != 0 && S_ISREG(inode->i_mode)) { + BUG_ON(mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)); + } + + spin_lock(&inode_lock); + lg = ei->i_locality_group; + ei->i_locality_group = NULL; + spin_unlock(&inode_lock); + + if (lg != NULL) { + spin_lock(&lg->lg_lock); + list_del(&ei->i_lg_list); + spin_unlock(&lg->lg_lock); + atomic_dec(&lg->lg_inodes_nr); + ext4_lg_put_group(lg); + } +} + +#define EXT4_LG_DIRTY 0 + +#define EXT4_CONTINUE_WRITEBACK 1 +#define EXT4_STOP_WRITEBACK 2 + +static char *__sync_modes[] = { "NONE", "ALL", "HOLD" }; + +/* + * The function syncs a single group like generic_sync_sb_inodes() does + * returns: + * 0 - continue syncing with a next group + * 1 - break syncing + */ +int ext4_lg_sync_single_group(struct super_block *sb, + struct ext4_locality_group *lg, + struct writeback_control *wbc, + unsigned long start) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + int nr_to_write = wbc->nr_to_write; + int dirty_pages, nonallocated; + int rc, code = 0; + + dirty_pages = atomic_read(&lg->lg_dirty_pages); + nonallocated = atomic_read(&lg->lg_nonallocated); + + rc = EXT4_CONTINUE_WRITEBACK; + + spin_lock(&inode_lock); + + if (!wbc->for_kupdate || list_empty(&lg->lg_io)) + list_splice_init(&lg->lg_dirty, &lg->lg_io); + + while (!list_empty(&lg->lg_io)) { + struct inode *inode = list_entry(lg->lg_io.prev, + struct inode, i_list); + struct address_space *mapping = inode->i_mapping; + struct backing_dev_info *bdi = mapping->backing_dev_info; + long pages_skipped; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + /* underlying device is congested + * break all writeback immediately */ + wbc->encountered_congestion = 1; + + /* keep this inode on the head so that + * we'll continue writeback with it + * when we return to this locality group */ + + /* same for the locality group */ + set_bit(EXT4_LG_DIRTY, &lg->lg_flags); + list_move(&lg->lg_list, &sbi->s_locality_io); + + /* signal to the caller */ + rc = EXT4_STOP_WRITEBACK; + code = 1; + break; + } + + if (wbc->bdi && bdi != wbc->bdi) { + printk("wbc->bdi (%p) != bdi (%p)\n", wbc->bdi, bdi); + list_move(&inode->i_list, &inode_in_use); + rc = EXT4_CONTINUE_WRITEBACK; + code = 2; + break; + } + + /* Was this inode dirtied after sync_sb_inodes was called? */ + if (time_after(inode->dirtied_when, start)) { + /* keep this inode on the head so that + * we'll continue writeback with it + * when we return to this locality group */ + + /* continue with next locality group + * move this one to the dirty tail */ + set_bit(EXT4_LG_DIRTY, &lg->lg_flags); + list_move_tail(&lg->lg_list, &sbi->s_locality_dirty); + + rc = EXT4_CONTINUE_WRITEBACK; + code = 3; + break; + } + + /* Was this inode dirtied too recently? */ + if (wbc->older_than_this && time_after(inode->dirtied_when, + *wbc->older_than_this)) { + /* keep this inode on the head so that + * we'll continue writeback with it + * when we return to this locality group */ + + /* continue with next locality group + * move this one to the dirty tail */ + set_bit(EXT4_LG_DIRTY, &lg->lg_flags); + list_move_tail(&lg->lg_list, &sbi->s_locality_dirty); + + rc = EXT4_CONTINUE_WRITEBACK; + code = 4; + break; + } + + /* Is another pdflush already flushing this queue? */ + if (current_is_pdflush() && !writeback_acquire(bdi)) { + /* keep this inode on the head so that + * we'll continue writeback with it + * when we return to this locality group */ + + /* same for the locality group */ + list_move(&lg->lg_list, &sbi->s_locality_io); + + rc = EXT4_STOP_WRITEBACK; + code = 5; + break; + } + + BUG_ON(inode->i_state & I_FREEING); + __iget(inode); + pages_skipped = wbc->pages_skipped; + __writeback_single_inode(inode, wbc); + if (wbc->sync_mode == WB_SYNC_HOLD) { + inode->dirtied_when = jiffies; + list_move(&inode->i_list, &lg->lg_dirty); + set_bit(EXT4_LG_DIRTY, &lg->lg_flags); + list_move(&lg->lg_list, &sbi->s_locality_dirty); + } + if (current_is_pdflush()) + writeback_release(bdi); + if (wbc->pages_skipped != pages_skipped) { + /* + * writeback is not making progress due to locked + * buffers. Skip this inode for now. + */ + list_move(&inode->i_list, &lg->lg_dirty); + + set_bit(EXT4_LG_DIRTY, &lg->lg_flags); + list_move(&lg->lg_list, &sbi->s_locality_dirty); + } + spin_unlock(&inode_lock); + iput(inode); + cond_resched(); + spin_lock(&inode_lock); + if (wbc->nr_to_write <= 0) { + rc = EXT4_STOP_WRITEBACK; + code = 6; + break; + } + } + + spin_unlock(&inode_lock); + + if (0 && nr_to_write - wbc->nr_to_write) { + printk("#%u: %s/%lu/%s%s%s%s%s%s M: %lu/%lu/%lu " + "LG:%p/%u/%u[%u/%u] wrote %lu/%d\n", + current->pid, __sync_modes[wbc->sync_mode], + wbc->nr_to_write, + wbc->nonblocking ? "N" : "", + wbc->encountered_congestion ? "C" : "", + wbc->for_kupdate ? "U" : "", + wbc->for_reclaim ? "R" : "", + wbc->for_writepages ? "W" : "", + wbc->range_cyclic ? "I" : "", + global_page_state(NR_FILE_DIRTY), + global_page_state(NR_UNSTABLE_NFS), + global_page_state(NR_WRITEBACK), + lg, atomic_read(&lg->lg_count), lg->lg_pgid, + dirty_pages, nonallocated, + nr_to_write - wbc->nr_to_write, code); + } + + return rc; +} + +/* + * the core of inode syncer: + * - loop over locality groups + * - maintain them in order to avoid starvation + */ +void ext4_lg_sync_groups(struct super_block *sb, struct writeback_control *wbc) +{ + const unsigned long start = jiffies; /* livelock avoidance */ + struct ext4_locality_group *lg = NULL, *prev = NULL; + struct ext4_sb_info *sbi = EXT4_SB(sb); + int rc; + + spin_lock(&inode_lock); + + /*printk("#%u: mode %s, nr2wr %lu, %s%s%s%s%s%s M: %lu/%lu/%lu " + "LGs: %sdirty %sio\n", current->pid, + __sync_modes[wbc->sync_mode], wbc->nr_to_write, + wbc->nonblocking ? "nonblock " : "", + wbc->encountered_congestion ? "congested " : "", + wbc->for_kupdate ? "kupdate " : "", + wbc->for_reclaim ? "reclaim " : "", + wbc->for_writepages ? "writepages " : "", + wbc->range_cyclic ? "cyclic " : "", + global_page_state(NR_FILE_DIRTY), + global_page_state(NR_UNSTABLE_NFS), + global_page_state(NR_WRITEBACK), + list_empty(&sbi->s_locality_dirty) ? "-" : "+", + list_empty(&sbi->s_locality_io) ? "-" : "+");*/ + + if (!wbc->for_kupdate || list_empty(&sbi->s_locality_io)) + list_splice_init(&sbi->s_locality_dirty, &sbi->s_locality_io); + + while (!list_empty(&sbi->s_locality_io)) { + + /* we should handle same group twice in a row */ + WARN_ON(prev && prev == lg); + prev = lg; + + lg = list_entry(sbi->s_locality_io.prev, + struct ext4_locality_group, lg_list); + + /* protect locality group */ + atomic_inc(&lg->lg_count); + + /* to avoid two concurrent threads flushing same group */ + list_del_init(&lg->lg_list); + + spin_unlock(&inode_lock); + + clear_bit(EXT4_LG_DIRTY, &lg->lg_flags); + rc = ext4_lg_sync_single_group(sb, lg, wbc, start); + + spin_lock(&inode_lock); + ext4_lg_put_group(lg); + + if (rc == EXT4_STOP_WRITEBACK) + break; + } + spin_unlock(&inode_lock); +} + +/* + * entry function for inode syncing + * it's responsbility is to sort all inode out in their locality groups + */ +void ext4_lg_sync_inodes(struct super_block *sb, struct writeback_control *wbc) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_locality_group *lg; + + /* refill pending groups from s_dirty */ + spin_lock(&inode_lock); + while (!list_empty(&sb->s_dirty)) { + struct inode *inode = list_entry(sb->s_dirty.prev, + struct inode, i_list); + struct ext4_inode_info *ei = EXT4_I(inode); + + lg = ei->i_locality_group; + if (lg == NULL) { + if (S_ISDIR(inode->i_mode) || i_size_read(inode) == 0) { + if (atomic_read(&inode->i_count)) { + /* + * The inode is clean, inuse + */ + list_move(&inode->i_list, &inode_in_use); + } else { + /* + * The inode is clean, unused + */ + list_move(&inode->i_list, &inode_unused); + } + continue; + } + /* XXX: atime changed ? */ + list_move(&inode->i_list, &inode_in_use); + continue; + } + + /* move inode in proper locality group's dirty list */ + spin_lock(&lg->lg_lock); + list_move_tail(&inode->i_list, &lg->lg_dirty); + spin_unlock(&lg->lg_lock); + + if (!test_and_set_bit(EXT4_LG_DIRTY, &lg->lg_flags)) + list_move(&lg->lg_list, &sbi->s_locality_dirty); + } + spin_unlock(&inode_lock); + + ext4_lg_sync_groups(sb, wbc); +} + +void ext4_lg_init(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + sb->s_flags |= 2048; /* XXX: i'll fix this, i promise */ + spin_lock_init(&sbi->s_locality_lock); + INIT_LIST_HEAD(&sbi->s_locality_groups); + INIT_LIST_HEAD(&sbi->s_locality_dirty); + INIT_LIST_HEAD(&sbi->s_locality_io); +} + +void ext4_lg_release(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_locality_group *lg; + struct list_head *cur, *tmp; + + list_for_each_safe_rcu(cur, tmp, &sbi->s_locality_groups) { + lg = list_entry(cur, struct ext4_locality_group, lg_hash); + if (atomic_read(&lg->lg_count)) + printk("LG %p/%d (pgid %u), %u inodes, dirty %d, non-allocated %d\n", + lg, atomic_read(&lg->lg_count), + atomic_read(&lg->lg_inodes_nr), lg->lg_pgid, + atomic_read(&lg->lg_dirty_pages), + atomic_read(&lg->lg_nonallocated)); + list_del(&lg->lg_hash); + kfree(lg); + } +} + Index: linux-2.6.19-rc6/fs/ext4/super.c =================================================================== --- linux-2.6.19-rc6.orig/fs/ext4/super.c 2006-11-16 07:03:40.000000000 +0300 +++ linux-2.6.19-rc6/fs/ext4/super.c 2006-12-01 02:51:13.000000000 +0300 @@ -449,6 +449,7 @@ static void ext4_put_super (struct super mark_buffer_dirty(sbi->s_sbh); ext4_commit_super(sb, es, 1); } + ext4_lg_release(sb); for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); @@ -498,6 +499,7 @@ static struct inode *ext4_alloc_inode(st ei = kmem_cache_alloc(ext4_inode_cachep, SLAB_NOFS); if (!ei) return NULL; + ei->i_locality_group = NULL; #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL ei->i_acl = EXT4_ACL_NOT_CACHED; ei->i_default_acl = EXT4_ACL_NOT_CACHED; @@ -564,6 +566,7 @@ static void ext4_clear_inode(struct inod EXT4_I(inode)->i_block_alloc_info = NULL; if (unlikely(rsv)) kfree(rsv); + ext4_lg_inode_leave_group(inode); } static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) @@ -706,6 +709,7 @@ static struct super_operations ext4_sops .remount_fs = ext4_remount, .clear_inode = ext4_clear_inode, .show_options = ext4_show_options, + .sync_inodes = ext4_lg_sync_inodes, #ifdef CONFIG_QUOTA .quota_read = ext4_quota_read, .quota_write = ext4_quota_write, @@ -1860,6 +1864,7 @@ static int ext4_fill_super (struct super test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": "writeback"); + ext4_lg_init(sb); ext4_ext_init(sb); lock_kernel(); Index: linux-2.6.19-rc6/fs/ext4/Makefile =================================================================== --- linux-2.6.19-rc6.orig/fs/ext4/Makefile 2006-11-16 07:03:40.000000000 +0300 +++ linux-2.6.19-rc6/fs/ext4/Makefile 2006-12-01 02:51:13.000000000 +0300 @@ -5,7 +5,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o + ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o lg.o ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o Index: linux-2.6.19-rc6/fs/fs-writeback.c =================================================================== --- linux-2.6.19-rc6.orig/fs/fs-writeback.c 2006-11-29 02:32:06.000000000 +0300 +++ linux-2.6.19-rc6/fs/fs-writeback.c 2006-12-01 02:51:13.000000000 +0300 @@ -149,8 +149,7 @@ static int write_inode(struct inode *ino * * Called under inode_lock. */ -static int -__sync_single_inode(struct inode *inode, struct writeback_control *wbc) +int __sync_single_inode(struct inode *inode, struct writeback_control *wbc) { unsigned dirty; struct address_space *mapping = inode->i_mapping; @@ -240,8 +239,7 @@ __sync_single_inode(struct inode *inode, * caller has ref on the inode (either via __iget or via syscall against an fd) * or the inode has I_WILL_FREE set (via generic_forget_inode) */ -static int -__writeback_single_inode(struct inode *inode, struct writeback_control *wbc) +int __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { wait_queue_head_t *wqh; @@ -429,7 +427,7 @@ writeback_inodes(struct writeback_contro restart: sb = sb_entry(super_blocks.prev); for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { - if (!list_empty(&sb->s_dirty) || !list_empty(&sb->s_io)) { + if (!list_empty(&sb->s_dirty) || !list_empty(&sb->s_io) || (sb->s_flags & 2048)) { /* we're making our own get_super here */ sb->s_count++; spin_unlock(&sb_lock);