2007-10-11 17:33:31

by Jan Kara

[permalink] [raw]
Subject: [PATCH 1/2] Make ext3 use bit operations to manipulate i_flags

Hello,

attached patch makes ext3 use bit operations to manipulate its
EXT3_I->i_flags. So far i_flags modifications were usually guarded
by i_mutex but my next patch needs to modify i_flags without i_mutex.
BTW: Is there any type + functions, which allow both bit operations
and things like atomic_read() and atomic_set()? It would be nice to
use them here - currently I just assign to the variable but I'm not sure if
this cannot result in some garbage on some strange architecture.

Honza

--
Jan Kara <[email protected]>
SUSE Labs, CR

Use set_bit(), clear_bit() and test_bit() to manipulate EXT3_I->i_flags.
This allows concurrent updates to EXT3_I->i_flags. At several places
we need to copy EXT3_I->i_flags as a whole - we do it non-atomically
and hope we don't get any garbage.

Signed-off-by: Jan Kara <[email protected]>

diff -rupX /home/jack/.kerndiffexclude linux-2.6.23/fs/ext3/dir.c linux-2.6.23-1-ext3_iflags_locking/fs/ext3/dir.c
--- linux-2.6.23/fs/ext3/dir.c 2007-10-11 12:01:23.000000000 +0200
+++ linux-2.6.23-1-ext3_iflags_locking/fs/ext3/dir.c 2007-10-11 18:03:19.000000000 +0200
@@ -110,7 +110,7 @@ static int ext3_readdir(struct file * fi
#ifdef CONFIG_EXT3_INDEX
if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
EXT3_FEATURE_COMPAT_DIR_INDEX) &&
- ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
+ (test_bit(EXT3_INDEX_FL, &EXT3_I(inode)->i_flags) ||
((inode->i_size >> sb->s_blocksize_bits) == 1))) {
err = ext3_dx_readdir(filp, dirent, filldir);
if (err != ERR_BAD_DX_DIR) {
@@ -121,7 +121,7 @@ static int ext3_readdir(struct file * fi
* We don't set the inode dirty flag since it's not
* critical that it get flushed back to the disk.
*/
- EXT3_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
+ clear_bit(EXT3_INDEX_FL, &EXT3_I(filp->f_path.dentry->d_inode)->i_flags);
}
#endif
stored = 0;
diff -rupX /home/jack/.kerndiffexclude linux-2.6.23/fs/ext3/ialloc.c linux-2.6.23-1-ext3_iflags_locking/fs/ext3/ialloc.c
--- linux-2.6.23/fs/ext3/ialloc.c 2006-11-29 22:57:37.000000000 +0100
+++ linux-2.6.23-1-ext3_iflags_locking/fs/ext3/ialloc.c 2007-10-11 18:05:48.000000000 +0200
@@ -278,7 +278,7 @@ static int find_group_orlov(struct super
ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);

if ((parent == sb->s_root->d_inode) ||
- (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) {
+ test_bit(EXT3_TOPDIR_FL, &EXT3_I(parent)->i_flags)) {
int best_ndir = inodes_per_group;
int best_group = -1;

diff -rupX /home/jack/.kerndiffexclude linux-2.6.23/fs/ext3/inode.c linux-2.6.23-1-ext3_iflags_locking/fs/ext3/inode.c
--- linux-2.6.23/fs/ext3/inode.c 2007-10-11 12:01:23.000000000 +0200
+++ linux-2.6.23-1-ext3_iflags_locking/fs/ext3/inode.c 2007-10-11 18:13:04.000000000 +0200
@@ -2557,18 +2557,16 @@ int ext3_get_inode_loc(struct inode *ino

void ext3_set_inode_flags(struct inode *inode)
{
- unsigned int flags = EXT3_I(inode)->i_flags;
-
inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
- if (flags & EXT3_SYNC_FL)
+ if (test_bit(EXT3_SYNC_FL, &EXT3_I(inode)->i_flags))
inode->i_flags |= S_SYNC;
- if (flags & EXT3_APPEND_FL)
+ if (test_bit(EXT3_APPEND_FL, &EXT3_I(inode)->i_flags))
inode->i_flags |= S_APPEND;
- if (flags & EXT3_IMMUTABLE_FL)
+ if (test_bit(EXT3_IMMUTABLE_FL, &EXT3_I(inode)->i_flags))
inode->i_flags |= S_IMMUTABLE;
- if (flags & EXT3_NOATIME_FL)
+ if (test_bit(EXT3_NOATIME_FL, &EXT3_I(inode)->i_flags))
inode->i_flags |= S_NOATIME;
- if (flags & EXT3_DIRSYNC_FL)
+ if (test_bit(EXT3_DIRSYNC_FL, &EXT3_I(inode)->i_flags))
inode->i_flags |= S_DIRSYNC;
}

@@ -2577,18 +2575,26 @@ void ext3_get_inode_flags(struct ext3_in
{
unsigned int flags = ei->vfs_inode.i_flags;

- ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL|
- EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL);
if (flags & S_SYNC)
- ei->i_flags |= EXT3_SYNC_FL;
+ set_bit(EXT3_SYNC_FL, &ei->i_flags);
+ else
+ clear_bit(EXT3_SYNC_FL, &ei->i_flags);
if (flags & S_APPEND)
- ei->i_flags |= EXT3_APPEND_FL;
+ set_bit(EXT3_APPEND_FL, &ei->i_flags);
+ else
+ clear_bit(EXT3_APPEND_FL, &ei->i_flags);
if (flags & S_IMMUTABLE)
- ei->i_flags |= EXT3_IMMUTABLE_FL;
+ set_bit(EXT3_IMMUTABLE_FL, &ei->i_flags);
+ else
+ clear_bit(EXT3_IMMUTABLE_FL, &ei->i_flags);
if (flags & S_NOATIME)
- ei->i_flags |= EXT3_NOATIME_FL;
+ set_bit(EXT3_NOATIME_FL, &ei->i_flags);
+ else
+ clear_bit(EXT3_NOATIME_FL, &ei->i_flags);
if (flags & S_DIRSYNC)
- ei->i_flags |= EXT3_DIRSYNC_FL;
+ set_bit(EXT3_DIRSYNC_FL, &ei->i_flags);
+ else
+ clear_bit(EXT3_DIRSYNC_FL, &ei->i_flags);
}

void ext3_read_inode(struct inode * inode)
@@ -3210,9 +3216,9 @@ int ext3_change_inode_journal_flag(struc
*/

if (val)
- EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
+ set_bit(EXT3_JOURNAL_DATA_FL, &EXT3_I(inode)->i_flags);
else
- EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL;
+ clear_bit(EXT3_JOURNAL_DATA_FL, &EXT3_I(inode)->i_flags);
ext3_set_aops(inode);

journal_unlock_updates(journal);
diff -rupX /home/jack/.kerndiffexclude linux-2.6.23/fs/ext3/namei.c linux-2.6.23-1-ext3_iflags_locking/fs/ext3/namei.c
--- linux-2.6.23/fs/ext3/namei.c 2007-10-11 12:01:23.000000000 +0200
+++ linux-2.6.23-1-ext3_iflags_locking/fs/ext3/namei.c 2007-10-11 18:09:11.000000000 +0200
@@ -629,7 +629,7 @@ int ext3_htree_fill_tree(struct file *di
dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
start_minor_hash));
dir = dir_file->f_path.dentry->d_inode;
- if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
+ if (!test_bit(EXT3_INDEX_FL, &EXT3_I(dir)->i_flags)) {
hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
@@ -775,7 +775,7 @@ static void ext3_update_dx_flag(struct i
{
if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
EXT3_FEATURE_COMPAT_DIR_INDEX))
- EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
+ clear_bit(&EXT3_I(inode)->i_flags, EXT3_INDEX_FL);
}

/*
@@ -1405,7 +1405,7 @@ static int make_indexed_dir(handle_t *ha
brelse(bh);
return retval;
}
- EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
+ set_bit(&EXT3_I(dir)->i_flags, EXT3_INDEX_FL);
data1 = bh2->b_data;

/* The 0th block becomes the root, move the dirents out */
@@ -1481,7 +1481,7 @@ static int ext3_add_entry (handle_t *han
retval = ext3_dx_add_entry(handle, dentry, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
return retval;
- EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
+ clear_bit(EXT3_INDEX_FL, &EXT3_I(dir)->i_flags);
dx_fallback++;
ext3_mark_inode_dirty(handle, dir);
}
diff -rupX /home/jack/.kerndiffexclude linux-2.6.23/include/linux/ext3_fs.h linux-2.6.23-1-ext3_iflags_locking/include/linux/ext3_fs.h
--- linux-2.6.23/include/linux/ext3_fs.h 2007-07-16 17:47:28.000000000 +0200
+++ linux-2.6.23-1-ext3_iflags_locking/include/linux/ext3_fs.h 2007-10-11 17:01:28.000000000 +0200
@@ -157,27 +157,27 @@ struct ext3_group_desc
/*
* Inode flags
*/
-#define EXT3_SECRM_FL 0x00000001 /* Secure deletion */
-#define EXT3_UNRM_FL 0x00000002 /* Undelete */
-#define EXT3_COMPR_FL 0x00000004 /* Compress file */
-#define EXT3_SYNC_FL 0x00000008 /* Synchronous updates */
-#define EXT3_IMMUTABLE_FL 0x00000010 /* Immutable file */
-#define EXT3_APPEND_FL 0x00000020 /* writes to file may only append */
-#define EXT3_NODUMP_FL 0x00000040 /* do not dump file */
-#define EXT3_NOATIME_FL 0x00000080 /* do not update atime */
+#define EXT3_SECRM_FL 0 /* Secure deletion */
+#define EXT3_UNRM_FL 1 /* Undelete */
+#define EXT3_COMPR_FL 2 /* Compress file */
+#define EXT3_SYNC_FL 3 /* Synchronous updates */
+#define EXT3_IMMUTABLE_FL 4 /* Immutable file */
+#define EXT3_APPEND_FL 5 /* writes to file may only append */
+#define EXT3_NODUMP_FL 6 /* do not dump file */
+#define EXT3_NOATIME_FL 7 /* do not update atime */
/* Reserved for compression usage... */
-#define EXT3_DIRTY_FL 0x00000100
-#define EXT3_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
-#define EXT3_NOCOMPR_FL 0x00000400 /* Don't compress */
-#define EXT3_ECOMPR_FL 0x00000800 /* Compression error */
+#define EXT3_DIRTY_FL 8
+#define EXT3_COMPRBLK_FL 9 /* One or more compressed clusters */
+#define EXT3_NOCOMPR_FL 10 /* Don't compress */
+#define EXT3_ECOMPR_FL 11 /* Compression error */
/* End compression flags --- maybe not all used */
-#define EXT3_INDEX_FL 0x00001000 /* hash-indexed directory */
-#define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */
-#define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
-#define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */
-#define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
-#define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
-#define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
+#define EXT3_INDEX_FL 12 /* hash-indexed directory */
+#define EXT3_IMAGIC_FL 13 /* AFS directory */
+#define EXT3_JOURNAL_DATA_FL 14 /* file data should be journaled */
+#define EXT3_NOTAIL_FL 15 /* file tail should not be merged */
+#define EXT3_DIRSYNC_FL 16 /* dirsync behaviour (directories only) */
+#define EXT3_TOPDIR_FL 17 /* Top of directory hierarchies*/
+#define EXT3_RESERVED_FL 31 /* reserved for ext3 lib */

#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
#define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */


2007-10-11 17:38:24

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 1/2] Make ext3 use bit operations to manipulate i_flags

Hello,

attached patch implements recursive mtime feature for ext3. Things like
rsync or updatedb can be much faster with it... More detailed description
is in the patch.
BTW: Hardlinks are currently unsolved - change is propagated to just the
first parent. Currently it's upto userspace to handle this - if you have
some idea how to make hardlinked files easier for userspace, please speak
up :).
Any comments welcome.

Honza

--------
Implement recursive mtime (rtime) feature for ext3. The feature works as
follows: In each inode we keep a flag EXT3_RTIME_FL (modifiable by user)
whether rtime should be updated. In case the inode is modified and when
the flag is set, inode's rtime is updated, the flag is cleared, and we
move to the parent. If the flag is set there, we clear it, update rtime
and continue upwards upto the root of the filesystem. Because the flag
is always cleared after updating rtime, we have constant amortized
complexity of rtime updates.

Intended use case is that application which wants to watch any modification
in a subtree scans the subtree and sets flags for all inodes there. Next time,
it just needs to recurse in directories having rtime newer than the start
of the previous scan. There it can handle modifications and set the flag
again.

Signed-off-by: Jan Kara <[email protected]>

diff -rupX /home/jack/.kerndiffexclude linux-2.6.23-1-ext3_iflags_locking/fs/ext3/ialloc.c linux-2.6.23-2-ext3_recursive_mtime/fs/ext3/ialloc.c
--- linux-2.6.23-1-ext3_iflags_locking/fs/ext3/ialloc.c 2007-10-11 18:05:48.000000000 +0200
+++ linux-2.6.23-2-ext3_recursive_mtime/fs/ext3/ialloc.c 2007-10-11 18:14:03.000000000 +0200
@@ -580,6 +580,7 @@ got:
ei->i_file_acl = 0;
ei->i_dir_acl = 0;
ei->i_dtime = 0;
+ ei->i_rtime = inode->i_mtime.tv_sec;
ei->i_block_alloc_info = NULL;
ei->i_block_group = group;

diff -rupX /home/jack/.kerndiffexclude linux-2.6.23-1-ext3_iflags_locking/fs/ext3/inode.c linux-2.6.23-2-ext3_recursive_mtime/fs/ext3/inode.c
--- linux-2.6.23-1-ext3_iflags_locking/fs/ext3/inode.c 2007-10-11 18:13:04.000000000 +0200
+++ linux-2.6.23-2-ext3_recursive_mtime/fs/ext3/inode.c 2007-10-11 18:57:50.000000000 +0200
@@ -1232,6 +1232,8 @@ static int ext3_ordered_commit_write(str
ret2 = ext3_journal_stop(handle);
if (!ret)
ret = ret2;
+ if (!ret)
+ ext3_update_rtimes(inode);
return ret;
}

@@ -1255,6 +1257,8 @@ static int ext3_writeback_commit_write(s
ret2 = ext3_journal_stop(handle);
if (!ret)
ret = ret2;
+ if (!ret)
+ ext3_update_rtimes(inode);
return ret;
}

@@ -1288,6 +1292,8 @@ static int ext3_journalled_commit_write(
ret2 = ext3_journal_stop(handle);
if (!ret)
ret = ret2;
+ if (!ret)
+ ext3_update_rtimes(inode);
return ret;
}

@@ -2386,6 +2392,10 @@ out_stop:
ext3_orphan_del(handle, inode);

ext3_journal_stop(handle);
+ /* We update time only for linked inodes. Unlinked ones already
+ * notified parent during unlink... */
+ if (inode->i_nlink)
+ ext3_update_rtimes(inode);
}

static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -2628,6 +2638,8 @@ void ext3_read_inode(struct inode * inod
inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
+ if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, EXT3_FEATURE_COMPAT_RTIME))
+ ei->i_rtime = le32_to_cpu(raw_inode->i_rtime);

ei->i_state = 0;
ei->i_dir_start_lookup = 0;
@@ -2785,6 +2797,8 @@ static int ext3_do_update_inode(handle_t
raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+ if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, EXT3_FEATURE_COMPAT_RTIME))
+ raw_inode->i_rtime = cpu_to_le32(ei->i_rtime);
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags);
@@ -3137,6 +3151,7 @@ void ext3_dirty_inode(struct inode *inod
handle_t *current_handle = ext3_journal_current_handle();
handle_t *handle;

+ /* Reserve 2 blocks for inode and superblock */
handle = ext3_journal_start(inode, 2);
if (IS_ERR(handle))
goto out;
diff -rupX /home/jack/.kerndiffexclude linux-2.6.23-1-ext3_iflags_locking/fs/ext3/ioctl.c linux-2.6.23-2-ext3_recursive_mtime/fs/ext3/ioctl.c
--- linux-2.6.23-1-ext3_iflags_locking/fs/ext3/ioctl.c 2007-10-11 16:54:03.000000000 +0200
+++ linux-2.6.23-2-ext3_recursive_mtime/fs/ext3/ioctl.c 2007-10-11 18:14:03.000000000 +0200
@@ -23,10 +23,18 @@ int ext3_ioctl (struct inode * inode, st
struct ext3_inode_info *ei = EXT3_I(inode);
unsigned int flags;
unsigned short rsv_window_size;
+ unsigned int rtime;

ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);

switch (cmd) {
+ case EXT3_IOC_GETRTIME:
+ if (!test_opt(inode->i_sb, RTIME))
+ return -ENOTSUPP;
+ spin_lock(&inode->i_lock);
+ rtime = ei->i_rtime;
+ spin_unlock(&inode->i_lock);
+ return put_user(rtime, (unsigned int __user *) arg);
case EXT3_IOC_GETFLAGS:
ext3_get_inode_flags(ei);
flags = ei->i_flags & EXT3_FL_USER_VISIBLE;
diff -rupX /home/jack/.kerndiffexclude linux-2.6.23-1-ext3_iflags_locking/fs/ext3/namei.c linux-2.6.23-2-ext3_recursive_mtime/fs/ext3/namei.c
--- linux-2.6.23-1-ext3_iflags_locking/fs/ext3/namei.c 2007-10-11 18:09:11.000000000 +0200
+++ linux-2.6.23-2-ext3_recursive_mtime/fs/ext3/namei.c 2007-10-11 18:57:09.000000000 +0200
@@ -65,6 +65,55 @@ static struct buffer_head *ext3_append(h
return bh;
}

+/* We don't want to get new handle for every inode updated. Thus we batch
+ * updates of this many inodes into one transaction */
+#define RTIME_UPDATES_PER_TRANS 16
+
+/* Walk up the directory tree and modify rtimes.
+ * We journal i_rtime updates into a separate transaction - we don't guarantee
+ * consistency between other inode times and rtime. Only consistency between
+ * i_flags and i_rtime. */
+int __ext3_update_rtimes(struct inode *inode)
+{
+ struct dentry *dentry = list_entry(inode->i_dentry.next, struct dentry,
+ d_alias);
+ handle_t *handle;
+ int updates = 0;
+ int err = 0;
+
+ /* We should not have any transaction started - noone knows how many
+ * inode updates will be needed */
+ WARN_ON(ext3_journal_current_handle() != NULL);
+ while (test_bit(EXT3_RTIME_FL, &EXT3_I(inode)->i_flags)) {
+ if (!updates) {
+ /* For inode updates + superblock */
+ handle = ext3_journal_start(inode, RTIME_UPDATES_PER_TRANS + 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ updates = RTIME_UPDATES_PER_TRANS;
+ }
+
+ spin_lock(&inode->i_lock);
+ EXT3_I(inode)->i_rtime = get_seconds();
+ spin_unlock(&inode->i_lock);
+ clear_bit(EXT3_RTIME_FL, &EXT3_I(inode)->i_flags);
+ ext3_mark_inode_dirty(handle, inode);
+ if (!--updates) {
+ err = ext3_journal_stop(handle);
+ if (err)
+ return err;
+ }
+
+ if (dentry == inode->i_sb->s_root)
+ break;
+ dentry = dentry->d_parent;
+ inode = dentry->d_inode;
+ }
+ if (updates)
+ err = ext3_journal_stop(handle);
+ return err;
+}
+
#ifndef assert
#define assert(test) J_ASSERT(test)
#endif
@@ -775,7 +824,7 @@ static void ext3_update_dx_flag(struct i
{
if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
EXT3_FEATURE_COMPAT_DIR_INDEX))
- clear_bit(&EXT3_I(inode)->i_flags, EXT3_INDEX_FL);
+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
}

/*
@@ -1405,7 +1454,7 @@ static int make_indexed_dir(handle_t *ha
brelse(bh);
return retval;
}
- set_bit(&EXT3_I(dir)->i_flags, EXT3_INDEX_FL);
+ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
data1 = bh2->b_data;

/* The 0th block becomes the root, move the dirents out */
@@ -1738,6 +1787,8 @@ retry:
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
+ if (!err)
+ ext3_update_rtimes(dir);
return err;
}

@@ -1773,6 +1824,8 @@ retry:
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
+ if (!err)
+ ext3_update_rtimes(dir);
return err;
}

@@ -1847,6 +1900,8 @@ out_stop:
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
+ if (!err)
+ ext3_update_rtimes(dir);
return err;
}

@@ -2123,6 +2178,8 @@ static int ext3_rmdir (struct inode * di

end_rmdir:
ext3_journal_stop(handle);
+ if (!retval)
+ ext3_update_rtimes(dir);
brelse (bh);
return retval;
}
@@ -2177,6 +2234,8 @@ static int ext3_unlink(struct inode * di

end_unlink:
ext3_journal_stop(handle);
+ if (!retval)
+ ext3_update_rtimes(dir);
brelse (bh);
return retval;
}
@@ -2234,6 +2293,8 @@ out_stop:
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
+ if (!err)
+ ext3_update_rtimes(dir);
return err;
}

@@ -2270,6 +2331,8 @@ retry:
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
+ if (!err)
+ ext3_update_rtimes(dir);
return err;
}

@@ -2429,6 +2492,10 @@ end_rename:
brelse (old_bh);
brelse (new_bh);
ext3_journal_stop(handle);
+ if (!retval) {
+ ext3_update_rtimes(old_dir);
+ ext3_update_rtimes(new_dir);
+ }
return retval;
}

diff -rupX /home/jack/.kerndiffexclude linux-2.6.23-1-ext3_iflags_locking/fs/ext3/super.c linux-2.6.23-2-ext3_recursive_mtime/fs/ext3/super.c
--- linux-2.6.23-1-ext3_iflags_locking/fs/ext3/super.c 2007-10-11 12:01:23.000000000 +0200
+++ linux-2.6.23-2-ext3_recursive_mtime/fs/ext3/super.c 2007-10-11 18:14:03.000000000 +0200
@@ -684,7 +684,7 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
- Opt_grpquota
+ Opt_grpquota, Opt_rtime
};

static match_table_t tokens = {
@@ -734,6 +734,7 @@ static match_table_t tokens = {
{Opt_quota, "quota"},
{Opt_usrquota, "usrquota"},
{Opt_barrier, "barrier=%u"},
+ {Opt_rtime, "rtime"},
{Opt_err, NULL},
{Opt_resize, "resize"},
};
@@ -1066,6 +1067,14 @@ clear_qf_name:
case Opt_bh:
clear_opt(sbi->s_mount_opt, NOBH);
break;
+ case Opt_rtime:
+ if (!EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_RTIME)) {
+ printk("EXT3-fs: rtime option available only "
+ "if superblock has RTIME feature.\n");
+ return 0;
+ }
+ set_opt(sbi->s_mount_opt, RTIME);
+ break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
diff -rupX /home/jack/.kerndiffexclude linux-2.6.23-1-ext3_iflags_locking/include/linux/ext3_fs.h linux-2.6.23-2-ext3_recursive_mtime/include/linux/ext3_fs.h
--- linux-2.6.23-1-ext3_iflags_locking/include/linux/ext3_fs.h 2007-10-11 17:01:28.000000000 +0200
+++ linux-2.6.23-2-ext3_recursive_mtime/include/linux/ext3_fs.h 2007-10-11 18:57:06.000000000 +0200
@@ -177,10 +177,11 @@ struct ext3_group_desc
#define EXT3_NOTAIL_FL 15 /* file tail should not be merged */
#define EXT3_DIRSYNC_FL 16 /* dirsync behaviour (directories only) */
#define EXT3_TOPDIR_FL 17 /* Top of directory hierarchies*/
+#define EXT3_RTIME_FL 18 /* Update recursive mtime */
#define EXT3_RESERVED_FL 31 /* reserved for ext3 lib */

-#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
-#define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
+#define EXT3_FL_USER_VISIBLE 0x0007DFFF /* User visible flags */
+#define EXT3_FL_USER_MODIFIABLE 0x000780FF /* User modifiable flags */

/*
* Inode dynamic state flags
@@ -229,6 +230,7 @@ struct ext3_new_group_data {
#endif
#define EXT3_IOC_GETRSVSZ _IOR('f', 5, long)
#define EXT3_IOC_SETRSVSZ _IOW('f', 6, long)
+#define EXT3_IOC_GETRTIME _IOR('f', 9, unsigned int)

/*
* ioctl commands in 32 bit emulation
@@ -318,6 +320,7 @@ struct ext3_inode {
} osd2; /* OS dependent 2 */
__le16 i_extra_isize;
__le16 i_pad1;
+ __le32 i_rtime; /* Recursive Modification Time */
};

#define i_size_high i_dir_acl
@@ -384,6 +387,7 @@ struct ext3_inode {
#define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */
#define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
#define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
+#define EXT3_MOUNT_RTIME 0x400000 /* Update rtime */

/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
@@ -572,6 +576,7 @@ static inline int ext3_valid_inum(struct
#define EXT3_FEATURE_COMPAT_EXT_ATTR 0x0008
#define EXT3_FEATURE_COMPAT_RESIZE_INODE 0x0010
#define EXT3_FEATURE_COMPAT_DIR_INDEX 0x0020
+#define EXT3_FEATURE_COMPAT_RTIME 0x0040

#define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
@@ -837,6 +842,14 @@ extern int ext3_orphan_add(handle_t *, s
extern int ext3_orphan_del(handle_t *, struct inode *);
extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
__u32 start_minor_hash, __u32 *next_hash);
+extern int __ext3_update_rtimes(struct inode *inode);
+static inline int ext3_update_rtimes(struct inode *inode)
+{
+ if (test_opt(inode->i_sb, RTIME) &&
+ test_bit(EXT3_RTIME_FL, &EXT3_I(inode)->i_flags))
+ return __ext3_update_rtimes(inode);
+ return 0;
+}

/* resize.c */
extern int ext3_group_add(struct super_block *sb,
diff -rupX /home/jack/.kerndiffexclude linux-2.6.23-1-ext3_iflags_locking/include/linux/ext3_fs_i.h linux-2.6.23-2-ext3_recursive_mtime/include/linux/ext3_fs_i.h
--- linux-2.6.23-1-ext3_iflags_locking/include/linux/ext3_fs_i.h 2007-07-16 17:47:28.000000000 +0200
+++ linux-2.6.23-2-ext3_recursive_mtime/include/linux/ext3_fs_i.h 2007-10-11 18:14:03.000000000 +0200
@@ -78,6 +78,7 @@ struct ext3_inode_info {
ext3_fsblk_t i_file_acl;
__u32 i_dir_acl;
__u32 i_dtime;
+ __u32 i_rtime;

/*
* i_block_group is the number of the block group which contains

2007-10-11 20:08:34

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 1/2] Make ext3 use bit operations to manipulate i_flags

On Oct 11, 2007 20:01 +0200, Jan Kara wrote:
> #define EXT3_TOPDIR_FL 17 /* Top of directory hierarchies*/
> +#define EXT3_RTIME_FL 18 /* Update recursive mtime */

Could you please use "20" (0x100000) for this flag, as there were patches to
use 18 (0x40000) for EXT4_HUGE_FILE_FL, and 19 (0x80000) is definitely
reserved for EXT4_EXTENTS_FL.

I'd suggest emailing Ted to reserve a flag for this, and also adding the
other reserved flags to ext2 and ext3 so there is no risk of conflicts.

> @@ -572,6 +576,7 @@ static inline int ext3_valid_inum(struct
> #define EXT3_FEATURE_COMPAT_EXT_ATTR 0x0008
> #define EXT3_FEATURE_COMPAT_RESIZE_INODE 0x0010
> #define EXT3_FEATURE_COMPAT_DIR_INDEX 0x0020
> +#define EXT3_FEATURE_COMPAT_RTIME 0x0040

Similarly, there is already
#define EXT2_FEATURE_COMPAT_LAZY_BG 0x0040

in e2fsprogs so you need to use 0x0080 for your feature. Please update
all of the ext*_fs.h files to have the most recent flags from e2fsprogs
so that we can avoid such dangers in the future.

Cheers, Andreas
--
Andreas Dilger
Principal Software Engineer
Cluster File Systems, Inc.