Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933263AbZIDI2U (ORCPT ); Fri, 4 Sep 2009 04:28:20 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S933067AbZIDI2S (ORCPT ); Fri, 4 Sep 2009 04:28:18 -0400 Received: from cantor.suse.de ([195.135.220.2]:59034 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933031AbZIDI2P (ORCPT ); Fri, 4 Sep 2009 04:28:15 -0400 Date: Fri, 4 Sep 2009 10:28:13 +0200 From: Jan Kara To: Jens Axboe Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, chris.mason@oracle.com, david@fromorbit.com, hch@infradead.org, tytso@mit.edu, akpm@linux-foundation.org, jack@suse.cz Subject: Re: [PATCH 1/8] writeback: get rid of generic_sync_sb_inodes() export Message-ID: <20090904082813.GC19857@duck.suse.cz> References: <1252050406-22467-1-git-send-email-jens.axboe@oracle.com> <1252050406-22467-2-git-send-email-jens.axboe@oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1252050406-22467-2-git-send-email-jens.axboe@oracle.com> User-Agent: Mutt/1.5.17 (2007-11-01) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10308 Lines: 287 On Fri 04-09-09 09:46:39, Jens Axboe wrote: > This adds two new exported functions: > > - writeback_inodes_sb(), which only attempts to writeback dirty inodes on > this super_block, for WB_SYNC_NONE writeout. > - sync_inodes_sbt(), which writes out all dirty inodes on this super_block > and also waits for the IO to complete. > > Signed-off-by: Jens Axboe The patch looks good. A nice cleanup. Acked-by: Jan Kara > --- > drivers/staging/pohmelfs/inode.c | 9 +---- > fs/fs-writeback.c | 70 ++++++++++++++++++++++--------------- > fs/sync.c | 18 +++++---- > fs/ubifs/budget.c | 16 +------- > fs/ubifs/super.c | 8 +---- > include/linux/fs.h | 2 - > include/linux/writeback.h | 3 +- > 7 files changed, 58 insertions(+), 68 deletions(-) > > diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c > index 7b60579..e63c9be 100644 > --- a/drivers/staging/pohmelfs/inode.c > +++ b/drivers/staging/pohmelfs/inode.c > @@ -1950,14 +1950,7 @@ static int pohmelfs_get_sb(struct file_system_type *fs_type, > */ > static void pohmelfs_kill_super(struct super_block *sb) > { > - struct writeback_control wbc = { > - .sync_mode = WB_SYNC_ALL, > - .range_start = 0, > - .range_end = LLONG_MAX, > - .nr_to_write = LONG_MAX, > - }; > - generic_sync_sb_inodes(sb, &wbc); > - > + sync_inodes_sb(sb); > kill_anon_super(sb); > } > > diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c > index c54226b..271e5f4 100644 > --- a/fs/fs-writeback.c > +++ b/fs/fs-writeback.c > @@ -458,8 +458,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) > * on the writer throttling path, and we get decent balancing between many > * throttled threads: we don't want them all piling up on inode_sync_wait. > */ > -void generic_sync_sb_inodes(struct super_block *sb, > - struct writeback_control *wbc) > +static void generic_sync_sb_inodes(struct super_block *sb, > + struct writeback_control *wbc) > { > const unsigned long start = jiffies; /* livelock avoidance */ > int sync = wbc->sync_mode == WB_SYNC_ALL; > @@ -593,13 +593,6 @@ void generic_sync_sb_inodes(struct super_block *sb, > > return; /* Leave any unwritten inodes on s_io */ > } > -EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); > - > -static void sync_sb_inodes(struct super_block *sb, > - struct writeback_control *wbc) > -{ > - generic_sync_sb_inodes(sb, wbc); > -} > > /* > * Start writeback of dirty pagecache data against all unlocked inodes. > @@ -640,7 +633,7 @@ restart: > */ > if (down_read_trylock(&sb->s_umount)) { > if (sb->s_root) > - sync_sb_inodes(sb, wbc); > + generic_sync_sb_inodes(sb, wbc); > up_read(&sb->s_umount); > } > spin_lock(&sb_lock); > @@ -653,35 +646,56 @@ restart: > spin_unlock(&sb_lock); > } > > -/* > - * writeback and wait upon the filesystem's dirty inodes. The caller will > - * do this in two passes - one to write, and one to wait. > - * > - * A finite limit is set on the number of pages which will be written. > - * To prevent infinite livelock of sys_sync(). > +/** > + * writeback_inodes_sb - writeback dirty inodes from given super_block > + * @sb: the superblock > * > - * We add in the number of potentially dirty inodes, because each inode write > - * can dirty pagecache in the underlying blockdev. > + * Start writeback on some inodes on this super_block. No guarantees are made > + * on how many (if any) will be written, and this function does not wait > + * for IO completion of submitted IO. The number of pages submitted is > + * returned. > */ > -void sync_inodes_sb(struct super_block *sb, int wait) > +long writeback_inodes_sb(struct super_block *sb) > { > struct writeback_control wbc = { > - .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, > + .sync_mode = WB_SYNC_NONE, > .range_start = 0, > .range_end = LLONG_MAX, > }; > + unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); > + unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); > + long nr_to_write; > > - if (!wait) { > - unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); > - unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); > - > - wbc.nr_to_write = nr_dirty + nr_unstable + > + nr_to_write = nr_dirty + nr_unstable + > (inodes_stat.nr_inodes - inodes_stat.nr_unused); > - } else > - wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */ > > - sync_sb_inodes(sb, &wbc); > + wbc.nr_to_write = nr_to_write; > + generic_sync_sb_inodes(sb, &wbc); > + return nr_to_write - wbc.nr_to_write; > +} > +EXPORT_SYMBOL(writeback_inodes_sb); > + > +/** > + * sync_inodes_sb - sync sb inode pages > + * @sb: the superblock > + * > + * This function writes and waits on any dirty inode belonging to this > + * super_block. The number of pages synced is returned. > + */ > +long sync_inodes_sb(struct super_block *sb) > +{ > + struct writeback_control wbc = { > + .sync_mode = WB_SYNC_ALL, > + .range_start = 0, > + .range_end = LLONG_MAX, > + }; > + long nr_to_write = LONG_MAX; /* doesn't actually matter */ > + > + wbc.nr_to_write = nr_to_write; > + generic_sync_sb_inodes(sb, &wbc); > + return nr_to_write - wbc.nr_to_write; > } > +EXPORT_SYMBOL(sync_inodes_sb); > > /** > * write_inode_now - write an inode to disk > diff --git a/fs/sync.c b/fs/sync.c > index 3422ba6..66f2104 100644 > --- a/fs/sync.c > +++ b/fs/sync.c > @@ -19,20 +19,22 @@ > SYNC_FILE_RANGE_WAIT_AFTER) > > /* > - * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) > - * just dirties buffers with inodes so we have to submit IO for these buffers > - * via __sync_blockdev(). This also speeds up the wait == 1 case since in that > - * case write_inode() functions do sync_dirty_buffer() and thus effectively > - * write one block at a time. > + * Do the filesystem syncing work. For simple filesystems > + * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to > + * submit IO for these buffers via __sync_blockdev(). This also speeds up the > + * wait == 1 case since in that case write_inode() functions do > + * sync_dirty_buffer() and thus effectively write one block at a time. > */ > static int __sync_filesystem(struct super_block *sb, int wait) > { > /* Avoid doing twice syncing and cache pruning for quota sync */ > - if (!wait) > + if (!wait) { > writeout_quota_sb(sb, -1); > - else > + writeback_inodes_sb(sb); > + } else { > sync_quota_sb(sb, -1); > - sync_inodes_sb(sb, wait); > + sync_inodes_sb(sb); > + } > if (sb->s_op->sync_fs) > sb->s_op->sync_fs(sb, wait); > return __sync_blockdev(sb->s_bdev, wait); > diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c > index eaf6d89..1c8991b 100644 > --- a/fs/ubifs/budget.c > +++ b/fs/ubifs/budget.c > @@ -65,26 +65,14 @@ > static int shrink_liability(struct ubifs_info *c, int nr_to_write) > { > int nr_written; > - struct writeback_control wbc = { > - .sync_mode = WB_SYNC_NONE, > - .range_end = LLONG_MAX, > - .nr_to_write = nr_to_write, > - }; > - > - generic_sync_sb_inodes(c->vfs_sb, &wbc); > - nr_written = nr_to_write - wbc.nr_to_write; > > + nr_written = writeback_inodes_sb(c->vfs_sb); > if (!nr_written) { > /* > * Re-try again but wait on pages/inodes which are being > * written-back concurrently (e.g., by pdflush). > */ > - memset(&wbc, 0, sizeof(struct writeback_control)); > - wbc.sync_mode = WB_SYNC_ALL; > - wbc.range_end = LLONG_MAX; > - wbc.nr_to_write = nr_to_write; > - generic_sync_sb_inodes(c->vfs_sb, &wbc); > - nr_written = nr_to_write - wbc.nr_to_write; > + nr_written = sync_inodes_sb(c->vfs_sb); > } > > dbg_budg("%d pages were written back", nr_written); > diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c > index 26d2e0d..8d6050a 100644 > --- a/fs/ubifs/super.c > +++ b/fs/ubifs/super.c > @@ -438,12 +438,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) > { > int i, err; > struct ubifs_info *c = sb->s_fs_info; > - struct writeback_control wbc = { > - .sync_mode = WB_SYNC_ALL, > - .range_start = 0, > - .range_end = LLONG_MAX, > - .nr_to_write = LONG_MAX, > - }; > > /* > * Zero @wait is just an advisory thing to help the file system shove > @@ -462,7 +456,7 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) > * the user be able to get more accurate results of 'statfs()' after > * they synchronize the file system. > */ > - generic_sync_sb_inodes(sb, &wbc); > + sync_inodes_sb(sb); > > /* > * Synchronize write buffers, because 'ubifs_run_commit()' does not > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 73e9b64..07b0f66 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -2070,8 +2070,6 @@ static inline void invalidate_remote_inode(struct inode *inode) > extern int invalidate_inode_pages2(struct address_space *mapping); > extern int invalidate_inode_pages2_range(struct address_space *mapping, > pgoff_t start, pgoff_t end); > -extern void generic_sync_sb_inodes(struct super_block *sb, > - struct writeback_control *wbc); > extern int write_inode_now(struct inode *, int); > extern int filemap_fdatawrite(struct address_space *); > extern int filemap_flush(struct address_space *); > diff --git a/include/linux/writeback.h b/include/linux/writeback.h > index 3224820..0703929 100644 > --- a/include/linux/writeback.h > +++ b/include/linux/writeback.h > @@ -78,7 +78,8 @@ struct writeback_control { > */ > void writeback_inodes(struct writeback_control *wbc); > int inode_wait(void *); > -void sync_inodes_sb(struct super_block *, int wait); > +long writeback_inodes_sb(struct super_block *); > +long sync_inodes_sb(struct super_block *); > > /* writeback.h requires fs.h; it, too, is not included from here. */ > static inline void wait_on_inode(struct inode *inode) > -- > 1.6.4.1.207.g68ea > -- Jan Kara SUSE Labs, CR -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/