2007-02-02 14:49:44

by Kalpak Shah

[permalink] [raw]
Subject: [RFC] [PATCH 1/1] Nanosecond timestamps

Hi,

This patch is a spinoff of the old nanosecond patches. It includes some
cleanups and addition of a creation timestamp. The
EXT3_FEATURE_RO_COMPAT_EXTRA_ISIZE flag has also been added along with
s_{min, want}_extra_isize fields in struct ext3_super_block.

Any comments are welcome.

Index: linux-2.6.19/fs/ext3/ialloc.c
===================================================================
--- linux-2.6.19.orig/fs/ext3/ialloc.c
+++ linux-2.6.19/fs/ext3/ialloc.c
@@ -560,7 +560,8 @@ got:
inode->i_ino = ino;
/* This is the optimal IO size (for stat), not the fs block size
*/
inode->i_blocks = 0;
- inode->i_mtime = inode->i_atime = inode->i_ctime =
CURRENT_TIME_SEC;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime
=
+
ext3_current_time(inode);

memset(ei->i_data, 0, sizeof(ei->i_data));
ei->i_dir_start_lookup = 0;
@@ -592,9 +593,8 @@ got:
spin_unlock(&sbi->s_next_gen_lock);

ei->i_state = EXT3_STATE_NEW;
- ei->i_extra_isize =
- (EXT3_INODE_SIZE(inode->i_sb) >
EXT3_GOOD_OLD_INODE_SIZE) ?
- sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE :
0;
+
+ ei->i_extra_isize = EXT3_SB(sb)->s_want_extra_isize;

ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
Index: linux-2.6.19/fs/ext3/inode.c
===================================================================
--- linux-2.6.19.orig/fs/ext3/inode.c
+++ linux-2.6.19/fs/ext3/inode.c
@@ -729,7 +729,7 @@ static int ext3_splice_branch(handle_t *

/* We are done with atomic stuff, now do the rest of
housekeeping */

- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext3_current_time(inode);
ext3_mark_inode_dirty(handle, inode);

/* had we spliced it onto indirect block? */
@@ -2374,7 +2374,7 @@ do_indirects:
ext3_discard_reservation(inode);

mutex_unlock(&ei->truncate_mutex);
- inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_mtime = inode->i_ctime = ext3_current_time(inode);
ext3_mark_inode_dirty(handle, inode);

/*
@@ -2608,10 +2608,11 @@ void ext3_read_inode(struct inode * inod
}
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
inode->i_size = le32_to_cpu(raw_inode->i_size);
- inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
- inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime);
- inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime);
- inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec =
inode->i_mtime.tv_nsec = 0;
+
+ EXT3_INODE_GET_XTIME(i_ctime, i_ctime_extra, inode, raw_inode);
+ EXT3_INODE_GET_XTIME(i_mtime, i_mtime_extra, inode, raw_inode);
+ EXT3_INODE_GET_XTIME(i_atime, i_atime_extra, inode, raw_inode);
+ EXT3_INODE_GET_XTIME(i_crtime, i_crtime_extra, ei, raw_inode);

ei->i_state = 0;
ei->i_dir_start_lookup = 0;
@@ -2763,9 +2764,11 @@ static int ext3_do_update_inode(handle_t
}
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le32(ei->i_disksize);
- raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
- raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
- raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+ EXT3_INODE_SET_XTIME(i_ctime, i_ctime_extra, inode, raw_inode);
+ EXT3_INODE_SET_XTIME(i_mtime, i_mtime_extra, inode, raw_inode);
+ EXT3_INODE_SET_XTIME(i_atime, i_atime_extra, inode, raw_inode);
+ EXT3_INODE_SET_XTIME(i_crtime, i_crtime_extra, ei, raw_inode);
+
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags);
Index: linux-2.6.19/fs/ext3/ioctl.c
===================================================================
--- linux-2.6.19.orig/fs/ext3/ioctl.c
+++ linux-2.6.19/fs/ext3/ioctl.c
@@ -96,7 +96,7 @@ int ext3_ioctl (struct inode * inode, st
ei->i_flags = flags;

ext3_set_inode_flags(inode);
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext3_current_time(inode);

err = ext3_mark_iloc_dirty(handle, inode, &iloc);
flags_err:
@@ -133,7 +133,7 @@ flags_err:
return PTR_ERR(handle);
err = ext3_reserve_inode_write(handle, inode, &iloc);
if (err == 0) {
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext3_current_time(inode);
inode->i_generation = generation;
err = ext3_mark_iloc_dirty(handle, inode,
&iloc);
}
Index: linux-2.6.19/fs/ext3/namei.c
===================================================================
--- linux-2.6.19.orig/fs/ext3/namei.c
+++ linux-2.6.19/fs/ext3/namei.c
@@ -1275,7 +1275,7 @@ static int add_dirent_to_buf(handle_t *h
* happen is that the times are slightly out of date
* and/or different from the directory change time.
*/
- dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ dir->i_mtime = dir->i_ctime = ext3_current_time(dir);
ext3_update_dx_flag(dir);
dir->i_version++;
ext3_mark_inode_dirty(handle, dir);
@@ -2051,7 +2051,7 @@ static int ext3_rmdir (struct inode * di
* recovery. */
inode->i_size = 0;
ext3_orphan_add(handle, inode);
- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime =
ext3_current_time(inode);
ext3_mark_inode_dirty(handle, inode);
drop_nlink(dir);
ext3_update_dx_flag(dir);
@@ -2101,7 +2101,7 @@ static int ext3_unlink(struct inode * di
retval = ext3_delete_entry(handle, dir, de, bh);
if (retval)
goto end_unlink;
- dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+ dir->i_ctime = dir->i_mtime = ext3_current_time(dir);
ext3_update_dx_flag(dir);
ext3_mark_inode_dirty(handle, dir);
drop_nlink(inode);
@@ -2192,7 +2192,7 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;

- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext3_current_time(inode);
ext3_inc_count(handle, inode);
atomic_inc(&inode->i_count);

@@ -2294,7 +2294,7 @@ static int ext3_rename (struct inode * o
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
- old_inode->i_ctime = CURRENT_TIME_SEC;
+ old_inode->i_ctime = ext3_current_time(old_inode);
ext3_mark_inode_dirty(handle, old_inode);

/*
@@ -2327,9 +2327,9 @@ static int ext3_rename (struct inode * o

if (new_inode) {
drop_nlink(new_inode);
- new_inode->i_ctime = CURRENT_TIME_SEC;
+ new_inode->i_ctime = ext3_current_time(new_inode);
}
- old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
+ old_dir->i_ctime = old_dir->i_mtime =
ext3_current_time(old_dir);
ext3_update_dx_flag(old_dir);
if (dir_bh) {
BUFFER_TRACE(dir_bh, "get_write_access");
Index: linux-2.6.19/fs/ext3/super.c
===================================================================
--- linux-2.6.19.orig/fs/ext3/super.c
+++ linux-2.6.19/fs/ext3/super.c
@@ -1568,6 +1568,8 @@ static int ext3_fill_super (struct super
sbi->s_inode_size);
goto failed_mount;
}
+ if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE)
+ sb->s_time_gran = 1 << (EXT3_EPOCH_BITS - 2);
}
sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
le32_to_cpu(es->s_log_frag_size);
@@ -1770,6 +1772,32 @@ static int ext3_fill_super (struct super
}

ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+
+ /* determine the minimum size of new large inodes, if present */
+ if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE) {
+ EXT3_SB(sb)->s_want_extra_isize = sizeof(struct ext3_inode)
-
+
EXT3_GOOD_OLD_INODE_SIZE;
+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
+ EXT3_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
+ if (EXT3_SB(sb)->s_want_extra_isize <
+ le32_to_cpu(es->s_want_extra_isize))
+ EXT3_SB(sb)->s_want_extra_isize =
+ le32_to_cpu(es->s_want_extra_isize);
+ if (EXT3_SB(sb)->s_want_extra_isize <
+ le32_to_cpu(es->s_min_extra_isize))
+ EXT3_SB(sb)->s_want_extra_isize =
+ le32_to_cpu(es->s_min_extra_isize);
+ }
+ }
+ /* Check if enough inode space is available */
+ if (EXT3_GOOD_OLD_INODE_SIZE + EXT3_SB(sb)->s_want_extra_isize >
+ sbi->s_inode_size) {
+ EXT3_SB(sb)->s_want_extra_isize = sizeof(struct
ext3_inode) -
+
EXT3_GOOD_OLD_INODE_SIZE;
+ printk(KERN_INFO "EXT3-fs: required extra inode space
not"
+ "available.\n");
+ }
+
/*
* akpm: core read_super() calls in here with the superblock
locked.
* That deadlocks, because orphan cleanup needs to lock the
superblock
Index: linux-2.6.19/fs/ext3/xattr.c
===================================================================
--- linux-2.6.19.orig/fs/ext3/xattr.c
+++ linux-2.6.19/fs/ext3/xattr.c
@@ -1007,7 +1007,7 @@ ext3_xattr_set_handle(handle_t *handle,
}
if (!error) {
ext3_xattr_update_super_block(handle, inode->i_sb);
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext3_current_time(inode);
error = ext3_mark_iloc_dirty(handle, inode, &is.iloc);
/*
* The bh is consumed by ext3_mark_iloc_dirty, even with
Index: linux-2.6.19/include/linux/ext3_fs.h
===================================================================
--- linux-2.6.19.orig/include/linux/ext3_fs.h
+++ linux-2.6.19/include/linux/ext3_fs.h
@@ -269,7 +269,7 @@ struct ext3_inode {
__le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size; /* Size in bytes */
__le32 i_atime; /* Access time */
- __le32 i_ctime; /* Creation time */
+ __le32 i_ctime; /* Inode Change time */
__le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
@@ -318,10 +318,53 @@ struct ext3_inode {
} osd2; /* OS dependent 2 */
__le16 i_extra_isize;
__le16 i_pad1;
+ __le32 i_ctime_extra; /* extra Change time (nsec << 2 |
epoch)
*/
+ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 |
epoch)
*/
+ __le32 i_atime_extra; /* extra Access time (nsec << 2 |
epoch)
*/
+ __le32 i_crtime; /* File Creation time */
+ __le32 i_crtime_extra; /* extra File Creation time (nsec << 2 |
epoch) */
};

#define i_size_high i_dir_acl

+#define EXT3_EPOCH_BITS 2
+#define EXT3_EPOCH_MASK ((1 << EXT3_EPOCH_BITS) - 1)
+#define EXT3_NSEC_MASK (~0UL << EXT3_EPOCH_BITS)
+
+#define EXT3_INODE_SET_XTIME(xtime, extra_xtime, inode, raw_inode)
\
+do {
\
+ (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec);
\
+
\
+ if (offsetof(typeof(*raw_inode), extra_xtime) -
\
+ offsetof(typeof(*raw_inode), i_extra_isize) +
\
+ sizeof((raw_inode)->extra_xtime) <=
\
+ le16_to_cpu((raw_inode)->i_extra_isize))
\
+ (raw_inode)->extra_xtime =
\
+ cpu_to_le32((sizeof((inode)->xtime.tv_sec) > 4 ?
\
+ ((__u64)(inode)->xtime.tv_sec >> 32) :
0)| \
+ (((inode)->xtime.tv_nsec << 2) &
\
+ EXT3_NSEC_MASK));
\
+} while (0)
+
+#define EXT3_INODE_GET_XTIME(xtime, extra_xtime, inode, raw_inode)
\
+do {
\
+ (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime);
\
+
\
+ if (offsetof(typeof(*raw_inode), extra_xtime) -
\
+ offsetof(typeof(*raw_inode), i_extra_isize) +
\
+ sizeof((raw_inode)->extra_xtime) <=
\
+ le16_to_cpu((raw_inode)->i_extra_isize)) {
\
+ if (sizeof((inode)->xtime.tv_sec) > 4)
\
+ (inode)->xtime.tv_sec |=
\
+ (__u64)(le32_to_cpu((raw_inode)->extra_xtime) &
\
+ EXT3_EPOCH_MASK) << 32;
\
+ (inode)->xtime.tv_nsec =
\
+ (le32_to_cpu((raw_inode)->extra_xtime) &
\
+ EXT3_NSEC_MASK) >> 2;
\
+ }
\
+} while (0)
+
+
#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1 osd1.linux1.l_i_reserved1
#define i_frag osd2.linux2.l_i_frag
@@ -491,7 +534,9 @@ struct ext3_super_block {
__u16 s_reserved_word_pad;
__le32 s_default_mount_opts;
__le32 s_first_meta_bg; /* First metablock block group
*/
- __u32 s_reserved[190]; /* Padding to the end of the
block */
+ __u16 s_min_extra_isize; /* All inodes have at least #
bytes */
+ __u16 s_want_extra_isize; /* New inodes should reserve #
bytes
*/
+ __u32 s_reserved[189]; /* Padding to the end of the
block */
};

#ifdef __KERNEL__
@@ -514,6 +559,13 @@ static inline int ext3_valid_inum(struct
(ino >= EXT3_FIRST_INO(sb) &&
ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count));
}
+
+static inline struct timespec ext3_current_time(struct inode *inode)
+{
+ return (inode->i_sb->s_time_gran < 1000000000) ?
+ current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
+}
+
#else
/* Assume that user mode programs are passing in an ext3fs superblock,
not
* a kernel struct super_block. This will allow us to call the
feature-test
@@ -576,6 +628,7 @@ static inline int ext3_valid_inum(struct
#define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
+#define EXT3_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040

#define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -589,6 +642,7 @@ static inline int ext3_valid_inum(struct
EXT3_FEATURE_INCOMPAT_META_BG)
#define EXT3_FEATURE_RO_COMPAT_SUPP
(EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \

EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
+
EXT3_FEATURE_RO_COMPAT_EXTRA_ISIZE| \

EXT3_FEATURE_RO_COMPAT_BTREE_DIR)

/*
Index: linux-2.6.19/include/linux/ext3_fs_i.h
===================================================================
--- linux-2.6.19.orig/include/linux/ext3_fs_i.h
+++ linux-2.6.19/include/linux/ext3_fs_i.h
@@ -142,6 +142,7 @@ struct ext3_inode_info {
*/
struct mutex truncate_mutex;
struct inode vfs_inode;
+ struct timespec i_crtime;
};

#endif /* _LINUX_EXT3_FS_I */
Index: linux-2.6.19/include/linux/ext3_fs_sb.h
===================================================================
--- linux-2.6.19.orig/include/linux/ext3_fs_sb.h
+++ linux-2.6.19/include/linux/ext3_fs_sb.h
@@ -78,6 +78,7 @@ struct ext3_sb_info {
char *s_qf_names[MAXQUOTAS]; /* Names of quota files
with journalled
quota */
int s_jquota_fmt; /* Format of quota to
use */
#endif
+ u16 s_want_extra_isize; /* New inodes should
reserve #
bytes */
};

#endif /* _LINUX_EXT3_FS_SB */


Thanks,
Kalpak <[email protected]>


2007-02-06 15:12:10

by Johann Lombardi

[permalink] [raw]
Subject: Re: [RFC] [PATCH 1/1] Nanosecond timestamps

On Fri, Feb 02, 2007 at 08:19:50PM +0530, Kalpak Shah wrote:
> Index: linux-2.6.19/fs/ext3/super.c
> ===================================================================
> --- linux-2.6.19.orig/fs/ext3/super.c
> +++ linux-2.6.19/fs/ext3/super.c
> @@ -1770,6 +1772,32 @@ static int ext3_fill_super (struct super
> }
>
> ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
> +
> + /* determine the minimum size of new large inodes, if present */
> + if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE) {
> + EXT3_SB(sb)->s_want_extra_isize = sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;

Maybe EXT3_SB(sb)-> could be replaced by sbi-> here and in the lines below.

> + if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
> + EXT3_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
> + if (EXT3_SB(sb)->s_want_extra_isize <
> + le32_to_cpu(es->s_want_extra_isize))
^^
> + EXT3_SB(sb)->s_want_extra_isize =
> + le32_to_cpu(es->s_want_extra_isize);
^^
> + if (EXT3_SB(sb)->s_want_extra_isize <
> + le32_to_cpu(es->s_min_extra_isize))
^^
> + EXT3_SB(sb)->s_want_extra_isize =
> + le32_to_cpu(es->s_min_extra_isize);
^^
Since es->s_{min,want}_extra_isize are both __u16 (BTW, shouldn't it be __le16?),
I think you should use le16_to_cpu() instead of le32_to_cpu().

> + }
> + }
> + /* Check if enough inode space is available */
> + if (EXT3_GOOD_OLD_INODE_SIZE + EXT3_SB(sb)->s_want_extra_isize >
> + sbi->s_inode_size) {
> + EXT3_SB(sb)->s_want_extra_isize = sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;
> + printk(KERN_INFO "EXT3-fs: required extra inode space not"
> + "available.\n");
> + }

If the inode size is EXT3_GOOD_OLD_INODE_SIZE, sbi->s_want_extra_isize won't be
initialized. However, it should not be an issue because the ext3_sb_info
is set to zero in ext3_fill_super().

Johann

2007-02-07 20:39:49

by Andreas Dilger

[permalink] [raw]
Subject: Re: [RFC] [PATCH 1/1] Nanosecond timestamps

On Feb 06, 2007 16:12 +0100, Johann Lombardi wrote:
> > + if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE) {
> > + EXT3_SB(sb)->s_want_extra_isize = sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;
>
> Maybe EXT3_SB(sb)-> could be replaced by sbi-> here and in the lines below.

Yes, this should definitely be done. It also increases clarity between
sbi->s_want_extra_isize and es->s_want_extra_isize.

> > + if (EXT3_SB(sb)->s_want_extra_isize <
> > + le32_to_cpu(es->s_min_extra_isize))
> ^^
> > + EXT3_SB(sb)->s_want_extra_isize =
> > + le32_to_cpu(es->s_min_extra_isize);
> ^^
> Since es->s_{min,want}_extra_isize are both __u16 (BTW, shouldn't it be
> __le16?), I think you should use le16_to_cpu() instead of le32_to_cpu().

You are right - this works fine on little endian systems, but fails on
big endian systems where you will get the other half of the word.

This has been a bug in several places already, and I wonder if the
le*_to_cpu() and cpu_to_le*() macros shouldn't do some type checking
instead of just casting the variable to the specified type?

The only problem is if casting constants it would be a bit of a pain
to have to cast them explicitly, though we could have something like:

#define le16_to_cpu(var) (__builtin_constant(var) || !typecheck(__u16, var) ? \
__constant_cpu_to_le16(var) : __le16_to_cpu(var))

The only question is whether "typecheck" adds extra variables on the stack
or if the compiler will always optimize them away.

> > + /* Check if enough inode space is available */
> > + if (EXT3_GOOD_OLD_INODE_SIZE + EXT3_SB(sb)->s_want_extra_isize >
> > + sbi->s_inode_size) {
> > + EXT3_SB(sb)->s_want_extra_isize = sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;
> > + printk(KERN_INFO "EXT3-fs: required extra inode space not"
> > + "available.\n");
> > + }
>
> If the inode size is EXT3_GOOD_OLD_INODE_SIZE, sbi->s_want_extra_isize won't
> be initialized. However, it should not be an issue because the ext3_sb_info
> is set to zero in ext3_fill_super().

So I'm not sure I understand if you have an objection or if this is just a
comment. sbi->s_want_extra_isize will be zero and it is not possible for
sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE so this case won't be hit.

Cheers, Andreas
--
Andreas Dilger
Principal Software Engineer
Cluster File Systems, Inc.

2007-02-07 20:49:53

by Johann Lombardi

[permalink] [raw]
Subject: Re: [RFC] [PATCH 1/1] Nanosecond timestamps

On Fri, Feb 02, 2007 at 08:19:50PM +0530, Kalpak Shah wrote:
> +#define EXT3_INODE_SET_XTIME(xtime, extra_xtime, inode, raw_inode) \
> +do { \
> + (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
> + \
> + if (offsetof(typeof(*raw_inode), extra_xtime) - \
> + offsetof(typeof(*raw_inode), i_extra_isize) + \
> + sizeof((raw_inode)->extra_xtime) <= \
> + le16_to_cpu((raw_inode)->i_extra_isize)) \
^^^^^^^^^^^^^^^^^^^^^^^^^
> + (raw_inode)->extra_xtime = \

With 128-byte inodes, raw_inode->i_extra_isize is beyond the inode limit
and the above will corrupt the filesystem.
IMO, i_extra_isize from ext3_inode_info should be used instead of
raw_inode->i_extra_isize.

> +#define EXT3_INODE_GET_XTIME(xtime, extra_xtime, inode, raw_inode) \
> +do { \
> + (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
> + \
> + if (offsetof(typeof(*raw_inode), extra_xtime) - \
> + offsetof(typeof(*raw_inode), i_extra_isize) + \
> + sizeof((raw_inode)->extra_xtime) <= \
> + le16_to_cpu((raw_inode)->i_extra_isize)) { \
^^^^^^^^^^^^^^^^^^^^^^^^^
ditto

Cheers,
Johann

2007-02-07 21:06:13

by Dave Kleikamp

[permalink] [raw]
Subject: Re: [RFC] [PATCH 1/1] Nanosecond timestamps

On Wed, 2007-02-07 at 13:39 -0700, Andreas Dilger wrote:
> On Feb 06, 2007 16:12 +0100, Johann Lombardi wrote:
> > > + if (EXT3_SB(sb)->s_want_extra_isize <
> > > + le32_to_cpu(es->s_min_extra_isize))
> > ^^
> > > + EXT3_SB(sb)->s_want_extra_isize =
> > > + le32_to_cpu(es->s_min_extra_isize);
> > ^^
> > Since es->s_{min,want}_extra_isize are both __u16 (BTW, shouldn't it be
> > __le16?), I think you should use le16_to_cpu() instead of le32_to_cpu().
>
> You are right - this works fine on little endian systems, but fails on
> big endian systems where you will get the other half of the word.
>
> This has been a bug in several places already, and I wonder if the
> le*_to_cpu() and cpu_to_le*() macros shouldn't do some type checking
> instead of just casting the variable to the specified type?

I think that sparse will catch this. To get the endian checks you need
to do something like this:

make C=2 CF="-D__CHECK_ENDIAN__"'

--
David Kleikamp
IBM Linux Technology Center

2007-02-08 10:29:50

by Johann Lombardi

[permalink] [raw]
Subject: Re: [RFC] [PATCH 1/1] Nanosecond timestamps

On Wed, Feb 07, 2007 at 01:39:46PM -0700, Andreas Dilger wrote:
> This has been a bug in several places already, and I wonder if the
> le*_to_cpu() and cpu_to_le*() macros shouldn't do some type checking
> instead of just casting the variable to the specified type?

That would be great.

> The only problem is if casting constants it would be a bit of a pain
> to have to cast them explicitly, though we could have something like:
>
> #define le16_to_cpu(var) (__builtin_constant(var) || !typecheck(__u16, var) ? \
> __constant_cpu_to_le16(var) : __le16_to_cpu(var))

Very good idea!

> The only question is whether "typecheck" adds extra variables on the stack
> or if the compiler will always optimize them away.

I tend to think it will always be optimized by the compiler.

> > If the inode size is EXT3_GOOD_OLD_INODE_SIZE, sbi->s_want_extra_isize won't
> > be initialized. However, it should not be an issue because the ext3_sb_info
> > is set to zero in ext3_fill_super().
>
> So I'm not sure I understand if you have an objection or if this is just a
> comment.

Just a useless comment :)

> sbi->s_want_extra_isize will be zero and it is not possible for
> sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE so this case won't be hit.

I agree.

Cheers,
Johann

2007-02-08 10:32:50

by Johann Lombardi

[permalink] [raw]
Subject: Re: [RFC] [PATCH 1/1] Nanosecond timestamps

On Wed, Feb 07, 2007 at 03:05:39PM -0600, Dave Kleikamp wrote:
> On Wed, 2007-02-07 at 13:39 -0700, Andreas Dilger wrote:
> > You are right - this works fine on little endian systems, but fails on
> > big endian systems where you will get the other half of the word.
> >
> > This has been a bug in several places already, and I wonder if the
> > le*_to_cpu() and cpu_to_le*() macros shouldn't do some type checking
> > instead of just casting the variable to the specified type?
>
> I think that sparse will catch this. To get the endian checks you need
> to do something like this:
>
> make C=2 CF="-D__CHECK_ENDIAN__"'

Indeed:

CHECK fs/ext3/super.c
fs/ext3/super.c:1787:8: warning: cast to restricted type
fs/ext3/super.c:1789:6: warning: cast to restricted type
fs/ext3/super.c:1791:8: warning: cast to restricted type
fs/ext3/super.c:1793:6: warning: cast to restricted type

Thanks,
Johann

2007-02-13 13:16:22

by Kalpak Shah

[permalink] [raw]
Subject: [PATCH Take2 1/1] Nanosecond timestamps

Hi All,

Thanks for all your comments. I have made the changes as suggested and ensured that no fields after EXT4_GOOD_OLD_INODE_SIZE are accessed without proper checks to avoid corruptions. Also I have rebased the code to ext4 in linux-2.6.20 for inclusion upstream.

Index: linux-2.6.20/fs/ext4/ialloc.c
===================================================================
--- linux-2.6.20.orig/fs/ext4/ialloc.c
+++ linux-2.6.20/fs/ext4/ialloc.c
@@ -563,7 +563,8 @@ got:
inode->i_ino = ino;
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
+ ext4_current_time(inode);

memset(ei->i_data, 0, sizeof(ei->i_data));
ei->i_dir_start_lookup = 0;
@@ -595,9 +596,8 @@ got:
spin_unlock(&sbi->s_next_gen_lock);

ei->i_state = EXT4_STATE_NEW;
- ei->i_extra_isize =
- (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ?
- sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0;
+
+ ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;

ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
Index: linux-2.6.20/fs/ext4/inode.c
===================================================================
--- linux-2.6.20.orig/fs/ext4/inode.c
+++ linux-2.6.20/fs/ext4/inode.c
@@ -727,7 +727,7 @@ static int ext4_splice_branch(handle_t *

/* We are done with atomic stuff, now do the rest of housekeeping */

- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);

/* had we spliced it onto indirect block? */
@@ -2441,7 +2441,7 @@ do_indirects:
ext4_discard_reservation(inode);

mutex_unlock(&ei->truncate_mutex);
- inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);

/*
@@ -2676,10 +2676,11 @@ void ext4_read_inode(struct inode * inod
}
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
inode->i_size = le32_to_cpu(raw_inode->i_size);
- inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
- inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime);
- inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime);
- inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
+
+ EXT4_INODE_GET_XTIME(i_ctime, i_ctime_extra, ei, inode, raw_inode);
+ EXT4_INODE_GET_XTIME(i_mtime, i_mtime_extra, ei, inode, raw_inode);
+ EXT4_INODE_GET_XTIME(i_atime, i_atime_extra, ei, inode, raw_inode);
+ EXT4_INODE_GET_XTIME(i_crtime, i_crtime_extra, ei, ei, raw_inode);

ei->i_state = 0;
ei->i_dir_start_lookup = 0;
@@ -2835,9 +2836,12 @@ static int ext4_do_update_inode(handle_t
}
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le32(ei->i_disksize);
- raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
- raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
- raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+
+ EXT4_INODE_SET_XTIME(i_ctime, i_ctime_extra, ei, inode, raw_inode);
+ EXT4_INODE_SET_XTIME(i_mtime, i_mtime_extra, ei, inode, raw_inode);
+ EXT4_INODE_SET_XTIME(i_atime, i_atime_extra, ei, inode, raw_inode);
+ EXT4_INODE_SET_XTIME(i_crtime, i_crtime_extra, ei, ei, raw_inode);
+
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags);
Index: linux-2.6.20/fs/ext4/ioctl.c
===================================================================
--- linux-2.6.20.orig/fs/ext4/ioctl.c
+++ linux-2.6.20/fs/ext4/ioctl.c
@@ -96,7 +96,7 @@ int ext4_ioctl (struct inode * inode, st
ei->i_flags = flags;

ext4_set_inode_flags(inode);
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext4_current_time(inode);

err = ext4_mark_iloc_dirty(handle, inode, &iloc);
flags_err:
@@ -133,7 +133,7 @@ flags_err:
return PTR_ERR(handle);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err == 0) {
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext4_current_time(inode);
inode->i_generation = generation;
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
}
Index: linux-2.6.20/fs/ext4/namei.c
===================================================================
--- linux-2.6.20.orig/fs/ext4/namei.c
+++ linux-2.6.20/fs/ext4/namei.c
@@ -1282,7 +1282,7 @@ static int add_dirent_to_buf(handle_t *h
* happen is that the times are slightly out of date
* and/or different from the directory change time.
*/
- dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
dir->i_version++;
ext4_mark_inode_dirty(handle, dir);
@@ -2058,7 +2058,7 @@ static int ext4_rmdir (struct inode * di
* recovery. */
inode->i_size = 0;
ext4_orphan_add(handle, inode);
- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
drop_nlink(dir);
ext4_update_dx_flag(dir);
@@ -2108,13 +2108,13 @@ static int ext4_unlink(struct inode * di
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
goto end_unlink;
- dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+ dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
drop_nlink(inode);
if (!inode->i_nlink)
ext4_orphan_add(handle, inode);
- inode->i_ctime = dir->i_ctime;
+ inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
retval = 0;

@@ -2199,7 +2199,7 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;

- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
atomic_inc(&inode->i_count);

@@ -2301,7 +2301,7 @@ static int ext4_rename (struct inode * o
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
- old_inode->i_ctime = CURRENT_TIME_SEC;
+ old_inode->i_ctime = ext4_current_time(old_inode);
ext4_mark_inode_dirty(handle, old_inode);

/*
@@ -2334,9 +2334,9 @@ static int ext4_rename (struct inode * o

if (new_inode) {
drop_nlink(new_inode);
- new_inode->i_ctime = CURRENT_TIME_SEC;
+ new_inode->i_ctime = ext4_current_time(new_inode);
}
- old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
+ old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
ext4_update_dx_flag(old_dir);
if (dir_bh) {
BUFFER_TRACE(dir_bh, "get_write_access");
Index: linux-2.6.20/fs/ext4/super.c
===================================================================
--- linux-2.6.20.orig/fs/ext4/super.c
+++ linux-2.6.20/fs/ext4/super.c
@@ -1631,6 +1631,8 @@ static int ext4_fill_super (struct super
sbi->s_inode_size);
goto failed_mount;
}
+ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
+ sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
}
sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
le32_to_cpu(es->s_log_frag_size);
@@ -1847,6 +1849,32 @@ static int ext4_fill_super (struct super
}

ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+
+ /* determine the minimum size of new large inodes, if present */
+ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
+ sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
+ if (sbi->s_want_extra_isize <
+ le16_to_cpu(es->s_want_extra_isize))
+ sbi->s_want_extra_isize =
+ le16_to_cpu(es->s_want_extra_isize);
+ if (sbi->s_want_extra_isize <
+ le16_to_cpu(es->s_min_extra_isize))
+ sbi->s_want_extra_isize =
+ le16_to_cpu(es->s_min_extra_isize);
+ }
+ }
+ /* Check if enough inode space is available */
+ if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
+ sbi->s_inode_size) {
+ sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+ printk(KERN_INFO "EXT4-fs: required extra inode space not"
+ "available.\n");
+ }
+
/*
* akpm: core read_super() calls in here with the superblock locked.
* That deadlocks, because orphan cleanup needs to lock the superblock
Index: linux-2.6.20/fs/ext4/xattr.c
===================================================================
--- linux-2.6.20.orig/fs/ext4/xattr.c
+++ linux-2.6.20/fs/ext4/xattr.c
@@ -1004,7 +1004,7 @@ ext4_xattr_set_handle(handle_t *handle,
}
if (!error) {
ext4_xattr_update_super_block(handle, inode->i_sb);
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext4_current_time(inode);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
/*
* The bh is consumed by ext4_mark_iloc_dirty, even with
Index: linux-2.6.20/include/linux/ext4_fs.h
===================================================================
--- linux-2.6.20.orig/include/linux/ext4_fs.h
+++ linux-2.6.20/include/linux/ext4_fs.h
@@ -282,7 +282,7 @@ struct ext4_inode {
__le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size; /* Size in bytes */
__le32 i_atime; /* Access time */
- __le32 i_ctime; /* Creation time */
+ __le32 i_ctime; /* Inode Change time */
__le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
@@ -331,10 +331,54 @@ struct ext4_inode {
} osd2; /* OS dependent 2 */
__le16 i_extra_isize;
__le16 i_pad1;
+ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
+ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
+ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
+ __le32 i_crtime; /* File Creation time */
+ __le32 i_crtime_extra; /* extra File Creation time (nsec << 2 | epoch) */
};

#define i_size_high i_dir_acl

+#define EXT4_EPOCH_BITS 2
+#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
+#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
+
+#define EXT4_INODE_SET_XTIME(xtime, extra_xtime, ei, inode, raw_inode) \
+do { \
+ if (offsetof(typeof(*raw_inode), xtime) + \
+ sizeof((raw_inode)->xtime) <= \
+ EXT4_GOOD_OLD_INODE_SIZE + (ei)->i_extra_isize) \
+ (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
+ if (offsetof(typeof(*raw_inode), extra_xtime) + \
+ sizeof((raw_inode)->extra_xtime) <= \
+ EXT4_GOOD_OLD_INODE_SIZE + (ei)->i_extra_isize) \
+ (raw_inode)->extra_xtime = \
+ cpu_to_le32((sizeof((inode)->xtime.tv_sec) > 4 ? \
+ ((__u64)(inode)->xtime.tv_sec >> 32) : 0)| \
+ (((inode)->xtime.tv_nsec << 2) & \
+ EXT4_NSEC_MASK)); \
+} while (0)
+
+#define EXT4_INODE_GET_XTIME(xtime, extra_xtime, ei, inode, raw_inode) \
+do { \
+ if (offsetof(typeof(*raw_inode), xtime) + \
+ sizeof((raw_inode)->xtime) <= \
+ EXT4_GOOD_OLD_INODE_SIZE + (ei)->i_extra_isize) \
+ (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
+ if (offsetof(typeof(*raw_inode), extra_xtime) + \
+ sizeof((raw_inode)->extra_xtime) <= \
+ EXT4_GOOD_OLD_INODE_SIZE + (ei)->i_extra_isize){ \
+ if (sizeof((inode)->xtime.tv_sec) > 4) \
+ (inode)->xtime.tv_sec |= \
+ (__u64)(le32_to_cpu((raw_inode)->extra_xtime) &\
+ EXT4_EPOCH_MASK) << 32; \
+ (inode)->xtime.tv_nsec = \
+ (le32_to_cpu((raw_inode)->extra_xtime) & \
+ EXT4_NSEC_MASK) >> 2; \
+ } \
+} while (0)
+
#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1 osd1.linux1.l_i_reserved1
#define i_frag osd2.linux2.l_i_frag
@@ -513,7 +557,9 @@ struct ext4_super_block {
/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
__le32 s_r_blocks_count_hi; /* Reserved blocks count */
__le32 s_free_blocks_count_hi; /* Free blocks count */
- __u32 s_reserved[169]; /* Padding to the end of the block */
+ __le16 s_min_extra_isize; /* All inodes have at least # bytes */
+ __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
+ __u32 s_reserved[168]; /* Padding to the end of the block */
};

#ifdef __KERNEL__
@@ -526,6 +572,13 @@ static inline struct ext4_inode_info *EX
return container_of(inode, struct ext4_inode_info, vfs_inode);
}

+static inline struct timespec ext4_current_time(struct inode *inode)
+{
+ return (inode->i_sb->s_time_gran < 1000000000) ?
+ current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
+}
+
+
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
@@ -596,6 +649,7 @@ static inline int ext4_valid_inum(struct
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
+#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040

#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -613,6 +667,7 @@ static inline int ext4_valid_inum(struct
EXT4_FEATURE_INCOMPAT_64BIT)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE| \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)

/*
Index: linux-2.6.20/include/linux/ext4_fs_i.h
===================================================================
--- linux-2.6.20.orig/include/linux/ext4_fs_i.h
+++ linux-2.6.20/include/linux/ext4_fs_i.h
@@ -153,6 +153,7 @@ struct ext4_inode_info {

unsigned long i_ext_generation;
struct ext4_ext_cache i_cached_extent;
+ struct timespec i_crtime;
};

#endif /* _LINUX_EXT4_FS_I */
Index: linux-2.6.20/include/linux/ext4_fs_sb.h
===================================================================
--- linux-2.6.20.orig/include/linux/ext4_fs_sb.h
+++ linux-2.6.20/include/linux/ext4_fs_sb.h
@@ -89,6 +89,7 @@ struct ext4_sb_info {
unsigned long s_ext_blocks;
unsigned long s_ext_extents;
#endif
+ unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
};

#endif /* _LINUX_EXT4_FS_SB */


Thanks,
Kalpak. <[email protected]>


On Tue, 2007-02-06 at 16:12 +0100, Johann Lombardi wrote:
> On Fri, Feb 02, 2007 at 08:19:50PM +0530, Kalpak Shah wrote:
> > Index: linux-2.6.19/fs/ext3/super.c
> > ===================================================================
> > --- linux-2.6.19.orig/fs/ext3/super.c
> > +++ linux-2.6.19/fs/ext3/super.c
> > @@ -1770,6 +1772,32 @@ static int ext3_fill_super (struct super
> > }
> >
> > ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
> > +
> > + /* determine the minimum size of new large inodes, if present */
> > + if (sbi->s_inode_size > EXT3_GOOD_OLD_INODE_SIZE) {
> > + EXT3_SB(sb)->s_want_extra_isize = sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;
>
> Maybe EXT3_SB(sb)-> could be replaced by sbi-> here and in the lines below.
>
> > + if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
> > + EXT3_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
> > + if (EXT3_SB(sb)->s_want_extra_isize <
> > + le32_to_cpu(es->s_want_extra_isize))
> ^^
> > + EXT3_SB(sb)->s_want_extra_isize =
> > + le32_to_cpu(es->s_want_extra_isize);
> ^^
> > + if (EXT3_SB(sb)->s_want_extra_isize <
> > + le32_to_cpu(es->s_min_extra_isize))
> ^^
> > + EXT3_SB(sb)->s_want_extra_isize =
> > + le32_to_cpu(es->s_min_extra_isize);
> ^^
> Since es->s_{min,want}_extra_isize are both __u16 (BTW, shouldn't it be __le16?),
> I think you should use le16_to_cpu() instead of le32_to_cpu().
>
> > + }
> > + }
> > + /* Check if enough inode space is available */
> > + if (EXT3_GOOD_OLD_INODE_SIZE + EXT3_SB(sb)->s_want_extra_isize >
> > + sbi->s_inode_size) {
> > + EXT3_SB(sb)->s_want_extra_isize = sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;
> > + printk(KERN_INFO "EXT3-fs: required extra inode space not"
> > + "available.\n");
> > + }
>
> If the inode size is EXT3_GOOD_OLD_INODE_SIZE, sbi->s_want_extra_isize won't be
> initialized. However, it should not be an issue because the ext3_sb_info
> is set to zero in ext3_fill_super().
>
> Johann

2007-02-19 09:55:28

by Johann Lombardi

[permalink] [raw]
Subject: Re: [PATCH Take2 1/1] Nanosecond timestamps

On Tue, Feb 13, 2007 at 06:46:44PM +0530, Kalpak Shah wrote:
> Index: linux-2.6.20/fs/ext4/inode.c
> ===================================================================
> --- linux-2.6.20.orig/fs/ext4/inode.c
> +++ linux-2.6.20/fs/ext4/inode.c
> @@ -2676,10 +2676,11 @@ void ext4_read_inode(struct inode * inod
> }
> inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
> inode->i_size = le32_to_cpu(raw_inode->i_size);
> - inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
> - inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime);
> - inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime);
> - inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
> +
> + EXT4_INODE_GET_XTIME(i_ctime, i_ctime_extra, ei, inode, raw_inode);
> + EXT4_INODE_GET_XTIME(i_mtime, i_mtime_extra, ei, inode, raw_inode);
> + EXT4_INODE_GET_XTIME(i_atime, i_atime_extra, ei, inode, raw_inode);
> + EXT4_INODE_GET_XTIME(i_crtime, i_crtime_extra, ei, ei, raw_inode);

ei->i_extra_isize is not yet initialized at this point (it is set a couple of
lines lower).
I guess the consequence of this is that nanosecond timestamps are always
discarded since ei->i_extra_isize is equal to 0.

Cheers,
Johann