From: Dmitry Monakhov Subject: Re: [PATCH-v7 1/3] vfs: add support for a lazytime mount option Date: Mon, 19 Jan 2015 18:21:31 +0400 Message-ID: <87k30ic0lw.fsf@openvz.org> References: <1418097870-8232-1-git-send-email-tytso@mit.edu> <1418097870-8232-2-git-send-email-tytso@mit.edu> Mime-Version: 1.0 Content-Type: text/plain Cc: Ext4 Developers List , Theodore Ts'o To: Theodore Ts'o , Linux Filesystem Development List Return-path: Received: from mail-qa0-f52.google.com ([209.85.216.52]:63029 "EHLO mail-qa0-f52.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751520AbbASOVo (ORCPT ); Mon, 19 Jan 2015 09:21:44 -0500 In-Reply-To: <1418097870-8232-2-git-send-email-tytso@mit.edu> Sender: linux-ext4-owner@vger.kernel.org List-ID: Theodore Ts'o writes: > Add a new mount option which enables a new "lazytime" mode. This mode > causes atime, mtime, and ctime updates to only be made to the > in-memory version of the inode. The on-disk times will only get > updated when (a) if the inode needs to be updated for some non-time > related change, (b) if userspace calls fsync(), syncfs() or sync(), or > (c) just before an undeleted inode is evicted from memory. > > This is OK according to POSIX because there are no guarantees after a > crash unless userspace explicitly requests via a fsync(2) call. > > For workloads which feature a large number of random write to a > preallocated file, the lazytime mount option significantly reduces > writes to the inode table. The repeated 4k writes to a single block > will result in undesirable stress on flash devices and SMR disk > drives. Even on conventional HDD's, the repeated writes to the inode > table block will trigger Adjacent Track Interference (ATI) remediation > latencies, which very negatively impact long tail latencies --- which > is a very big deal for web serving tiers (for example). > > Google-Bug-Id: 18297052 > > Signed-off-by: Theodore Ts'o > --- > fs/ext4/inode.c | 6 ++++ > fs/fs-writeback.c | 64 ++++++++++++++++++++++++++++++++-------- > fs/gfs2/file.c | 4 +-- > fs/inode.c | 56 +++++++++++++++++++++++++---------- > fs/jfs/file.c | 2 +- > fs/libfs.c | 2 +- > fs/proc_namespace.c | 1 + > fs/sync.c | 8 +++++ > include/linux/backing-dev.h | 1 + > include/linux/fs.h | 5 ++++ > include/trace/events/writeback.h | 60 ++++++++++++++++++++++++++++++++++++- > include/uapi/linux/fs.h | 4 ++- > mm/backing-dev.c | 10 +++++-- > 13 files changed, 187 insertions(+), 36 deletions(-) > > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 5653fa4..628df5b 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -4840,11 +4840,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) > * If the inode is marked synchronous, we don't honour that here - doing > * so would cause a commit on atime updates, which we don't bother doing. > * We handle synchronous inodes at the highest possible level. > + * > + * If only the I_DIRTY_TIME flag is set, we can skip everything. If > + * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need > + * to copy into the on-disk inode structure are the timestamp files. > */ > void ext4_dirty_inode(struct inode *inode, int flags) > { > handle_t *handle; > > + if (flags == I_DIRTY_TIME) > + return; > handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); > if (IS_ERR(handle)) > goto out; > diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c > index ef9bef1..d5e02b8 100644 > --- a/fs/fs-writeback.c > +++ b/fs/fs-writeback.c > @@ -247,14 +247,19 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) > return ret; > } > > +#define EXPIRE_DIRTY_ATIME 0x0001 > + > /* > * Move expired (dirtied before work->older_than_this) dirty inodes from > * @delaying_queue to @dispatch_queue. > */ > static int move_expired_inodes(struct list_head *delaying_queue, > struct list_head *dispatch_queue, > + int flags, > struct wb_writeback_work *work) > { > + unsigned long *older_than_this = NULL; > + unsigned long expire_time; > LIST_HEAD(tmp); > struct list_head *pos, *node; > struct super_block *sb = NULL; > @@ -262,13 +267,21 @@ static int move_expired_inodes(struct list_head *delaying_queue, > int do_sb_sort = 0; > int moved = 0; > > + if ((flags & EXPIRE_DIRTY_ATIME) == 0) > + older_than_this = work->older_than_this; > + else if ((work->reason == WB_REASON_SYNC) == 0) { > + expire_time = jiffies - (HZ * 86400); > + older_than_this = &expire_time; > + } > while (!list_empty(delaying_queue)) { > inode = wb_inode(delaying_queue->prev); > - if (work->older_than_this && > - inode_dirtied_after(inode, *work->older_than_this)) > + if (older_than_this && > + inode_dirtied_after(inode, *older_than_this)) > break; > list_move(&inode->i_wb_list, &tmp); > moved++; > + if (flags & EXPIRE_DIRTY_ATIME) > + set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state); > if (sb_is_blkdev_sb(inode->i_sb)) > continue; > if (sb && sb != inode->i_sb) > @@ -309,9 +322,12 @@ out: > static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work) > { > int moved; > + > assert_spin_locked(&wb->list_lock); > list_splice_init(&wb->b_more_io, &wb->b_io); > - moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work); > + moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work); > + moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, > + EXPIRE_DIRTY_ATIME, work); > trace_writeback_queue_io(wb, work, moved); > } > > @@ -435,6 +451,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, > * updates after data IO completion. > */ > redirty_tail(inode, wb); > + } else if (inode->i_state & I_DIRTY_TIME) { > + list_move(&inode->i_wb_list, &wb->b_dirty_time); > } else { > /* The inode is clean. Remove from writeback lists. */ > list_del_init(&inode->i_wb_list); > @@ -482,11 +500,18 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) > /* Clear I_DIRTY_PAGES if we've written out all dirty pages */ > if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) > inode->i_state &= ~I_DIRTY_PAGES; > - dirty = inode->i_state & I_DIRTY; > - inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); > + dirty = inode->i_state & (I_DIRTY_SYNC | I_DIRTY_DATASYNC); > + if ((dirty && (inode->i_state & I_DIRTY_TIME)) || > + (inode->i_state & I_DIRTY_TIME_EXPIRED)) { > + dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; > + trace_writeback_lazytime(inode); > + } > + inode->i_state &= ~dirty; > spin_unlock(&inode->i_lock); > + if (dirty & I_DIRTY_TIME) > + mark_inode_dirty_sync(inode); > /* Don't write the inode if only I_DIRTY_PAGES was set */ > - if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { > + if (dirty) { > int err = write_inode(inode, wbc); > if (ret == 0) > ret = err; > @@ -534,7 +559,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, > * make sure inode is on some writeback list and leave it there unless > * we have completely cleaned the inode. > */ > - if (!(inode->i_state & I_DIRTY) && > + if (!(inode->i_state & I_DIRTY_ALL) && > (wbc->sync_mode != WB_SYNC_ALL || > !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) > goto out; > @@ -549,7 +574,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, > * If inode is clean, remove it from writeback lists. Otherwise don't > * touch it. See comment above for explanation. > */ > - if (!(inode->i_state & I_DIRTY)) > + if (!(inode->i_state & I_DIRTY_ALL)) > list_del_init(&inode->i_wb_list); > spin_unlock(&wb->list_lock); > inode_sync_complete(inode); > @@ -691,7 +716,7 @@ static long writeback_sb_inodes(struct super_block *sb, > wrote += write_chunk - wbc.nr_to_write; > spin_lock(&wb->list_lock); > spin_lock(&inode->i_lock); > - if (!(inode->i_state & I_DIRTY)) > + if (!(inode->i_state & I_DIRTY_ALL)) > wrote++; > requeue_inode(inode, wb, &wbc); > inode_sync_complete(inode); > @@ -1129,16 +1154,20 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) > * page->mapping->host, so the page-dirtying time is recorded in the internal > * blockdev inode. > */ > +#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) > void __mark_inode_dirty(struct inode *inode, int flags) > { > struct super_block *sb = inode->i_sb; > struct backing_dev_info *bdi = NULL; > + int dirtytime; > + > + trace_writeback_mark_inode_dirty(inode, flags); > > /* > * Don't do this for I_DIRTY_PAGES - that doesn't actually > * dirty the inode itself > */ > - if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { > + if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) { > trace_writeback_dirty_inode_start(inode, flags); > > if (sb->s_op->dirty_inode) > @@ -1146,6 +1175,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) > > trace_writeback_dirty_inode(inode, flags); > } > + if (flags & I_DIRTY_INODE) > + flags &= ~I_DIRTY_TIME; > + dirtytime = flags & I_DIRTY_TIME; TYPO? 'dirtytime' is always false because you have already cleared that bit. Probably you want to do that: dirtytime = flags & I_DIRTY_TIME; if (flags & I_DIRTY_INODE) flags &= ~I_DIRTY_TIME; > > /* > * make sure that changes are seen by all cpus before we test i_state > @@ -1154,16 +1186,22 @@ void __mark_inode_dirty(struct inode *inode, int flags) > smp_mb(); > > /* avoid the locking if we can */ > - if ((inode->i_state & flags) == flags) > + if (((inode->i_state & flags) == flags) || > + (dirtytime && (inode->i_state & I_DIRTY_INODE))) > return; > > if (unlikely(block_dump)) > block_dump___mark_inode_dirty(inode); > > spin_lock(&inode->i_lock); > + if (dirtytime && (inode->i_state & I_DIRTY_INODE)) > + return; > if ((inode->i_state & flags) != flags) { > const int was_dirty = inode->i_state & I_DIRTY; > > + if (dirtytime && (inode->i_state & I_DIRTY_INODE)) > + inode->i_state &= ~I_DIRTY_TIME; > + > inode->i_state |= flags; > > /* > @@ -1210,8 +1248,10 @@ void __mark_inode_dirty(struct inode *inode, int flags) > } > > inode->dirtied_when = jiffies; > - list_move(&inode->i_wb_list, &bdi->wb.b_dirty); > + list_move(&inode->i_wb_list, dirtytime ? > + &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); > spin_unlock(&bdi->wb.list_lock); > + trace_writeback_dirty_inode_enqueue(inode); > > if (wakeup_bdi) > bdi_wakeup_thread_delayed(bdi); > diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c > index 80dd44d..e584bf9 100644 > --- a/fs/gfs2/file.c > +++ b/fs/gfs2/file.c > @@ -654,7 +654,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, > { > struct address_space *mapping = file->f_mapping; > struct inode *inode = mapping->host; > - int sync_state = inode->i_state & I_DIRTY; > + int sync_state = inode->i_state & I_DIRTY_ALL; > struct gfs2_inode *ip = GFS2_I(inode); > int ret = 0, ret1 = 0; > > @@ -667,7 +667,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, > if (!gfs2_is_jdata(ip)) > sync_state &= ~I_DIRTY_PAGES; > if (datasync) > - sync_state &= ~I_DIRTY_SYNC; > + sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME); > > if (sync_state) { > ret = sync_inode_metadata(inode, 1); > diff --git a/fs/inode.c b/fs/inode.c > index 26753ba..dc48a23 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -18,6 +18,7 @@ > #include /* for inode_has_buffers */ > #include > #include > +#include > #include "internal.h" > > /* > @@ -30,7 +31,7 @@ > * inode_sb_list_lock protects: > * sb->s_inodes, inode->i_sb_list > * bdi->wb.list_lock protects: > - * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list > + * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list > * inode_hash_lock protects: > * inode_hashtable, inode->i_hash > * > @@ -414,7 +415,8 @@ static void inode_lru_list_add(struct inode *inode) > */ > void inode_add_lru(struct inode *inode) > { > - if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) && > + if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC | > + I_FREEING | I_WILL_FREE)) && > !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE) > inode_lru_list_add(inode); > } > @@ -645,7 +647,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) > spin_unlock(&inode->i_lock); > continue; > } > - if (inode->i_state & I_DIRTY && !kill_dirty) { > + if (inode->i_state & I_DIRTY_ALL && !kill_dirty) { > spin_unlock(&inode->i_lock); > busy = 1; > continue; > @@ -1430,11 +1432,20 @@ static void iput_final(struct inode *inode) > */ > void iput(struct inode *inode) > { > - if (inode) { > - BUG_ON(inode->i_state & I_CLEAR); > - > - if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) > - iput_final(inode); > + if (!inode) > + return; > + BUG_ON(inode->i_state & I_CLEAR); > +retry: > + if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) { > + if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) { > + atomic_inc(&inode->i_count); > + inode->i_state &= ~I_DIRTY_TIME; > + spin_unlock(&inode->i_lock); > + trace_writeback_lazytime_iput(inode); > + mark_inode_dirty_sync(inode); > + goto retry; > + } > + iput_final(inode); > } > } > EXPORT_SYMBOL(iput); > @@ -1493,14 +1504,9 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, > return 0; > } > > -/* > - * This does the actual work of updating an inodes time or version. Must have > - * had called mnt_want_write() before calling this. > - */ > -static int update_time(struct inode *inode, struct timespec *time, int flags) > +int generic_update_time(struct inode *inode, struct timespec *time, int flags) > { > - if (inode->i_op->update_time) > - return inode->i_op->update_time(inode, time, flags); > + int iflags = I_DIRTY_TIME; > > if (flags & S_ATIME) > inode->i_atime = *time; > @@ -1510,9 +1516,27 @@ static int update_time(struct inode *inode, struct timespec *time, int flags) > inode->i_ctime = *time; > if (flags & S_MTIME) > inode->i_mtime = *time; > - mark_inode_dirty_sync(inode); > + > + if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION)) > + iflags |= I_DIRTY_SYNC; > + __mark_inode_dirty(inode, iflags); > return 0; > } > +EXPORT_SYMBOL(generic_update_time); > + > +/* > + * This does the actual work of updating an inodes time or version. Must have > + * had called mnt_want_write() before calling this. > + */ > +static int update_time(struct inode *inode, struct timespec *time, int flags) > +{ > + int (*update_time)(struct inode *, struct timespec *, int); > + > + update_time = inode->i_op->update_time ? inode->i_op->update_time : > + generic_update_time; > + > + return update_time(inode, time, flags); > +} > > /** > * touch_atime - update the access time > diff --git a/fs/jfs/file.c b/fs/jfs/file.c > index 33aa0cc..10815f8 100644 > --- a/fs/jfs/file.c > +++ b/fs/jfs/file.c > @@ -39,7 +39,7 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) > return rc; > > mutex_lock(&inode->i_mutex); > - if (!(inode->i_state & I_DIRTY) || > + if (!(inode->i_state & I_DIRTY_ALL) || > (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { > /* Make sure committed changes hit the disk */ > jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); > diff --git a/fs/libfs.c b/fs/libfs.c > index 171d284..7cb9cef 100644 > --- a/fs/libfs.c > +++ b/fs/libfs.c > @@ -948,7 +948,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end, > > mutex_lock(&inode->i_mutex); > ret = sync_mapping_buffers(inode->i_mapping); > - if (!(inode->i_state & I_DIRTY)) > + if (!(inode->i_state & I_DIRTY_ALL)) > goto out; > if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) > goto out; > diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c > index 73ca174..f98234a 100644 > --- a/fs/proc_namespace.c > +++ b/fs/proc_namespace.c > @@ -44,6 +44,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb) > { MS_SYNCHRONOUS, ",sync" }, > { MS_DIRSYNC, ",dirsync" }, > { MS_MANDLOCK, ",mand" }, > + { MS_LAZYTIME, ",lazytime" }, > { 0, NULL } > }; > const struct proc_fs_info *fs_infop; > diff --git a/fs/sync.c b/fs/sync.c > index bdc729d..6ac7bf0 100644 > --- a/fs/sync.c > +++ b/fs/sync.c > @@ -177,8 +177,16 @@ SYSCALL_DEFINE1(syncfs, int, fd) > */ > int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) > { > + struct inode *inode = file->f_mapping->host; > + > if (!file->f_op->fsync) > return -EINVAL; > + if (!datasync && (inode->i_state & I_DIRTY_TIME)) { > + spin_lock(&inode->i_lock); > + inode->i_state &= ~I_DIRTY_TIME; > + spin_unlock(&inode->i_lock); > + mark_inode_dirty_sync(inode); > + } > return file->f_op->fsync(file, start, end, datasync); > } > EXPORT_SYMBOL(vfs_fsync_range); > diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h > index 5da6012..4cdf733 100644 > --- a/include/linux/backing-dev.h > +++ b/include/linux/backing-dev.h > @@ -55,6 +55,7 @@ struct bdi_writeback { > struct list_head b_dirty; /* dirty inodes */ > struct list_head b_io; /* parked for writeback */ > struct list_head b_more_io; /* parked for more writeback */ > + struct list_head b_dirty_time; /* time stamps are dirty */ > spinlock_t list_lock; /* protects the b_* lists */ > }; > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 9ab779e..bf00e98 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1720,8 +1720,12 @@ struct super_operations { > #define __I_DIO_WAKEUP 9 > #define I_DIO_WAKEUP (1 << I_DIO_WAKEUP) > #define I_LINKABLE (1 << 10) > +#define I_DIRTY_TIME (1 << 11) > +#define __I_DIRTY_TIME_EXPIRED 12 > +#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) > > #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) > +#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) > > extern void __mark_inode_dirty(struct inode *, int); > static inline void mark_inode_dirty(struct inode *inode) > @@ -1884,6 +1888,7 @@ extern int current_umask(void); > > extern void ihold(struct inode * inode); > extern void iput(struct inode *); > +extern int generic_update_time(struct inode *, struct timespec *, int); > > static inline struct inode *file_inode(const struct file *f) > { > diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h > index cee02d6..5ecb4c2 100644 > --- a/include/trace/events/writeback.h > +++ b/include/trace/events/writeback.h > @@ -18,6 +18,8 @@ > {I_FREEING, "I_FREEING"}, \ > {I_CLEAR, "I_CLEAR"}, \ > {I_SYNC, "I_SYNC"}, \ > + {I_DIRTY_TIME, "I_DIRTY_TIME"}, \ > + {I_DIRTY_TIME_EXPIRED, "I_DIRTY_TIME_EXPIRED"}, \ > {I_REFERENCED, "I_REFERENCED"} \ > ) > > @@ -68,6 +70,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, > TP_STRUCT__entry ( > __array(char, name, 32) > __field(unsigned long, ino) > + __field(unsigned long, state) > __field(unsigned long, flags) > ), > > @@ -78,16 +81,25 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, > strncpy(__entry->name, > bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); > __entry->ino = inode->i_ino; > + __entry->state = inode->i_state; > __entry->flags = flags; > ), > > - TP_printk("bdi %s: ino=%lu flags=%s", > + TP_printk("bdi %s: ino=%lu state=%s flags=%s", > __entry->name, > __entry->ino, > + show_inode_state(__entry->state), > show_inode_state(__entry->flags) > ) > ); > > +DEFINE_EVENT(writeback_dirty_inode_template, writeback_mark_inode_dirty, > + > + TP_PROTO(struct inode *inode, int flags), > + > + TP_ARGS(inode, flags) > +); > + > DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start, > > TP_PROTO(struct inode *inode, int flags), > @@ -598,6 +610,52 @@ DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, > TP_ARGS(inode, wbc, nr_to_write) > ); > > +DECLARE_EVENT_CLASS(writeback_lazytime_template, > + TP_PROTO(struct inode *inode), > + > + TP_ARGS(inode), > + > + TP_STRUCT__entry( > + __field( dev_t, dev ) > + __field(unsigned long, ino ) > + __field(unsigned long, state ) > + __field( __u16, mode ) > + __field(unsigned long, dirtied_when ) > + ), > + > + TP_fast_assign( > + __entry->dev = inode->i_sb->s_dev; > + __entry->ino = inode->i_ino; > + __entry->state = inode->i_state; > + __entry->mode = inode->i_mode; > + __entry->dirtied_when = inode->dirtied_when; > + ), > + > + TP_printk("dev %d,%d ino %lu dirtied %lu state %s mode 0%o", > + MAJOR(__entry->dev), MINOR(__entry->dev), > + __entry->ino, __entry->dirtied_when, > + show_inode_state(__entry->state), __entry->mode) > +); > + > +DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime, > + TP_PROTO(struct inode *inode), > + > + TP_ARGS(inode) > +); > + > +DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime_iput, > + TP_PROTO(struct inode *inode), > + > + TP_ARGS(inode) > +); > + > +DEFINE_EVENT(writeback_lazytime_template, writeback_dirty_inode_enqueue, > + > + TP_PROTO(struct inode *inode), > + > + TP_ARGS(inode) > +); > + > #endif /* _TRACE_WRITEBACK_H */ > > /* This part must be outside protection */ > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h > index 3735fa0..9b964a5 100644 > --- a/include/uapi/linux/fs.h > +++ b/include/uapi/linux/fs.h > @@ -90,6 +90,7 @@ struct inodes_stat_t { > #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ > #define MS_I_VERSION (1<<23) /* Update inode I_version field */ > #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ > +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ > > /* These sb flags are internal to the kernel */ > #define MS_NOSEC (1<<28) > @@ -100,7 +101,8 @@ struct inodes_stat_t { > /* > * Superblock flags that can be altered by MS_REMOUNT > */ > -#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) > +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ > + MS_LAZYTIME) > > /* > * Old magic mount flag and mask > diff --git a/mm/backing-dev.c b/mm/backing-dev.c > index 0ae0df5..915feea 100644 > --- a/mm/backing-dev.c > +++ b/mm/backing-dev.c > @@ -69,10 +69,10 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) > unsigned long background_thresh; > unsigned long dirty_thresh; > unsigned long bdi_thresh; > - unsigned long nr_dirty, nr_io, nr_more_io; > + unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time; > struct inode *inode; > > - nr_dirty = nr_io = nr_more_io = 0; > + nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0; > spin_lock(&wb->list_lock); > list_for_each_entry(inode, &wb->b_dirty, i_wb_list) > nr_dirty++; > @@ -80,6 +80,9 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) > nr_io++; > list_for_each_entry(inode, &wb->b_more_io, i_wb_list) > nr_more_io++; > + list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list) > + if (inode->i_state & I_DIRTY_TIME) > + nr_dirty_time++; > spin_unlock(&wb->list_lock); > > global_dirty_limits(&background_thresh, &dirty_thresh); > @@ -98,6 +101,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) > "b_dirty: %10lu\n" > "b_io: %10lu\n" > "b_more_io: %10lu\n" > + "b_dirty_time: %10lu\n" > "bdi_list: %10u\n" > "state: %10lx\n", > (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), > @@ -111,6 +115,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) > nr_dirty, > nr_io, > nr_more_io, > + nr_dirty_time, > !list_empty(&bdi->bdi_list), bdi->state); > #undef K > > @@ -418,6 +423,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) > INIT_LIST_HEAD(&wb->b_dirty); > INIT_LIST_HEAD(&wb->b_io); > INIT_LIST_HEAD(&wb->b_more_io); > + INIT_LIST_HEAD(&wb->b_dirty_time); > spin_lock_init(&wb->list_lock); > INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn); > } > -- > 2.1.0 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html