2007-07-01 07:36:56

by Mingming Cao

[permalink] [raw]
Subject: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

This patch is a spinoff of the old nanosecond patches.

It includes some cleanups and addition of a creation timestamp. The
EXT3_FEATURE_RO_COMPAT_EXTRA_ISIZE flag has also been added along with
s_{min, want}_extra_isize fields in struct ext3_super_block.

Signed-off-by: Andreas Dilger <[email protected]>
Signed-off-by: Kalpak Shah <[email protected]>
Signed-off-by: Eric Sandeen <[email protected]>
Signed-off-by: Dave Kleikamp <[email protected]>
Signed-off-by: Mingming Cao <[email protected]>

Index: linux-2.6.22-rc4/fs/ext4/ialloc.c
===================================================================
--- linux-2.6.22-rc4.orig/fs/ext4/ialloc.c 2007-06-11 17:22:15.000000000 -0700
+++ linux-2.6.22-rc4/fs/ext4/ialloc.c 2007-06-11 17:39:05.000000000 -0700
@@ -563,7 +563,8 @@
inode->i_ino = ino;
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
+ ext4_current_time(inode);

memset(ei->i_data, 0, sizeof(ei->i_data));
ei->i_dir_start_lookup = 0;
@@ -595,9 +596,8 @@
spin_unlock(&sbi->s_next_gen_lock);

ei->i_state = EXT4_STATE_NEW;
- ei->i_extra_isize =
- (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ?
- sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0;
+
+ ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;

ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
Index: linux-2.6.22-rc4/fs/ext4/inode.c
===================================================================
--- linux-2.6.22-rc4.orig/fs/ext4/inode.c 2007-06-11 17:24:28.000000000 -0700
+++ linux-2.6.22-rc4/fs/ext4/inode.c 2007-06-11 17:39:05.000000000 -0700
@@ -726,7 +726,7 @@

/* We are done with atomic stuff, now do the rest of housekeeping */

- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);

/* had we spliced it onto indirect block? */
@@ -2375,7 +2375,7 @@
ext4_discard_reservation(inode);

mutex_unlock(&ei->truncate_mutex);
- inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);

/*
@@ -2629,10 +2629,6 @@
}
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
inode->i_size = le32_to_cpu(raw_inode->i_size);
- inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
- inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
- inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
- inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;

ei->i_state = 0;
ei->i_dir_start_lookup = 0;
@@ -2708,6 +2704,11 @@
} else
ei->i_extra_isize = 0;

+ EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
+ EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
+ EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
+ EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
+
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
@@ -2789,9 +2790,12 @@
}
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le32(ei->i_disksize);
- raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
- raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
- raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+
+ EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+ EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
+ EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
+ EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
+
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags);
Index: linux-2.6.22-rc4/fs/ext4/ioctl.c
===================================================================
--- linux-2.6.22-rc4.orig/fs/ext4/ioctl.c 2007-06-11 17:25:11.000000000 -0700
+++ linux-2.6.22-rc4/fs/ext4/ioctl.c 2007-06-11 17:39:05.000000000 -0700
@@ -97,7 +97,7 @@
ei->i_flags = flags;

ext4_set_inode_flags(inode);
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext4_current_time(inode);

err = ext4_mark_iloc_dirty(handle, inode, &iloc);
flags_err:
@@ -134,7 +134,7 @@
return PTR_ERR(handle);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err == 0) {
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext4_current_time(inode);
inode->i_generation = generation;
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
}
Index: linux-2.6.22-rc4/fs/ext4/namei.c
===================================================================
--- linux-2.6.22-rc4.orig/fs/ext4/namei.c 2007-06-11 17:22:15.000000000 -0700
+++ linux-2.6.22-rc4/fs/ext4/namei.c 2007-06-11 17:39:05.000000000 -0700
@@ -1285,7 +1285,7 @@
* happen is that the times are slightly out of date
* and/or different from the directory change time.
*/
- dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
dir->i_version++;
ext4_mark_inode_dirty(handle, dir);
@@ -2046,7 +2046,7 @@
* recovery. */
inode->i_size = 0;
ext4_orphan_add(handle, inode);
- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
drop_nlink(dir);
ext4_update_dx_flag(dir);
@@ -2096,13 +2096,13 @@
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
goto end_unlink;
- dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+ dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
drop_nlink(inode);
if (!inode->i_nlink)
ext4_orphan_add(handle, inode);
- inode->i_ctime = dir->i_ctime;
+ inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
retval = 0;

@@ -2193,7 +2193,7 @@
if (IS_DIRSYNC(dir))
handle->h_sync = 1;

- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext4_current_time(inode);
inc_nlink(inode);
atomic_inc(&inode->i_count);

@@ -2295,7 +2295,7 @@
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
- old_inode->i_ctime = CURRENT_TIME_SEC;
+ old_inode->i_ctime = ext4_current_time(old_inode);
ext4_mark_inode_dirty(handle, old_inode);

/*
@@ -2328,9 +2328,9 @@

if (new_inode) {
drop_nlink(new_inode);
- new_inode->i_ctime = CURRENT_TIME_SEC;
+ new_inode->i_ctime = ext4_current_time(new_inode);
}
- old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
+ old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
ext4_update_dx_flag(old_dir);
if (dir_bh) {
BUFFER_TRACE(dir_bh, "get_write_access");
Index: linux-2.6.22-rc4/fs/ext4/super.c
===================================================================
--- linux-2.6.22-rc4.orig/fs/ext4/super.c 2007-06-11 17:28:09.000000000 -0700
+++ linux-2.6.22-rc4/fs/ext4/super.c 2007-06-11 17:39:05.000000000 -0700
@@ -1642,6 +1642,8 @@
sbi->s_inode_size);
goto failed_mount;
}
+ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
+ sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
}
sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
le32_to_cpu(es->s_log_frag_size);
@@ -1865,6 +1867,32 @@
}

ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+
+ /* determine the minimum size of new large inodes, if present */
+ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
+ sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
+ if (sbi->s_want_extra_isize <
+ le16_to_cpu(es->s_want_extra_isize))
+ sbi->s_want_extra_isize =
+ le16_to_cpu(es->s_want_extra_isize);
+ if (sbi->s_want_extra_isize <
+ le16_to_cpu(es->s_min_extra_isize))
+ sbi->s_want_extra_isize =
+ le16_to_cpu(es->s_min_extra_isize);
+ }
+ }
+ /* Check if enough inode space is available */
+ if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
+ sbi->s_inode_size) {
+ sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+ printk(KERN_INFO "EXT4-fs: required extra inode space not"
+ "available.\n");
+ }
+
/*
* akpm: core read_super() calls in here with the superblock locked.
* That deadlocks, because orphan cleanup needs to lock the superblock
Index: linux-2.6.22-rc4/fs/ext4/xattr.c
===================================================================
--- linux-2.6.22-rc4.orig/fs/ext4/xattr.c 2007-06-11 17:22:15.000000000 -0700
+++ linux-2.6.22-rc4/fs/ext4/xattr.c 2007-06-11 17:39:05.000000000 -0700
@@ -1013,7 +1013,7 @@
}
if (!error) {
ext4_xattr_update_super_block(handle, inode->i_sb);
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = ext4_current_time(inode);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
/*
* The bh is consumed by ext4_mark_iloc_dirty, even with
Index: linux-2.6.22-rc4/include/linux/ext4_fs.h
===================================================================
--- linux-2.6.22-rc4.orig/include/linux/ext4_fs.h 2007-06-11 17:36:13.000000000 -0700
+++ linux-2.6.22-rc4/include/linux/ext4_fs.h 2007-06-11 17:41:55.000000000 -0700
@@ -288,7 +288,7 @@
__le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size; /* Size in bytes */
__le32 i_atime; /* Access time */
- __le32 i_ctime; /* Creation time */
+ __le32 i_ctime; /* Inode Change time */
__le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
@@ -337,10 +337,74 @@
} osd2; /* OS dependent 2 */
__le16 i_extra_isize;
__le16 i_pad1;
+ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
+ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
+ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
+ __le32 i_crtime; /* File Creation time */
+ __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
};

#define i_size_high i_dir_acl

+#define EXT4_EPOCH_BITS 2
+#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
+#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
+
+#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
+ ((offsetof(typeof(*ext4_inode), field) + \
+ sizeof((ext4_inode)->field)) \
+ <= (EXT4_GOOD_OLD_INODE_SIZE + \
+ (einode)->i_extra_isize)) \
+
+static inline __le32 ext4_encode_extra_time(struct timespec *time)
+{
+ return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
+ time->tv_sec >> 32 : 0) |
+ ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
+}
+
+static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
+{
+ if (sizeof(time->tv_sec) > 4)
+ time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
+ << 32;
+ time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
+}
+
+#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
+do { \
+ (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
+ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
+ (raw_inode)->xtime ## _extra = \
+ ext4_encode_extra_time(&(inode)->xtime); \
+} while (0)
+
+#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
+do { \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
+ (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
+ (raw_inode)->xtime ## _extra = \
+ ext4_encode_extra_time(&(einode)->xtime); \
+} while (0)
+
+#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
+do { \
+ (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
+ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
+ ext4_decode_extra_time(&(inode)->xtime, \
+ raw_inode->xtime ## _extra); \
+} while (0)
+
+#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
+do { \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
+ (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
+ ext4_decode_extra_time(&(einode)->xtime, \
+ raw_inode->xtime ## _extra); \
+} while (0)
+
#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1 osd1.linux1.l_i_reserved1
#define i_frag osd2.linux2.l_i_frag
@@ -539,6 +603,13 @@
return container_of(inode, struct ext4_inode_info, vfs_inode);
}

+static inline struct timespec ext4_current_time(struct inode *inode)
+{
+ return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
+ current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
+}
+
+
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
@@ -609,6 +680,7 @@
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
+#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040

#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -626,6 +698,7 @@
EXT4_FEATURE_INCOMPAT_64BIT)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)

/*
Index: linux-2.6.22-rc4/include/linux/ext4_fs_i.h
===================================================================
--- linux-2.6.22-rc4.orig/include/linux/ext4_fs_i.h 2007-06-11 17:22:15.000000000 -0700
+++ linux-2.6.22-rc4/include/linux/ext4_fs_i.h 2007-06-11 17:39:05.000000000 -0700
@@ -153,6 +153,7 @@

unsigned long i_ext_generation;
struct ext4_ext_cache i_cached_extent;
+ struct timespec i_crtime;
};

#endif /* _LINUX_EXT4_FS_I */
Index: linux-2.6.22-rc4/include/linux/ext4_fs_sb.h
===================================================================
--- linux-2.6.22-rc4.orig/include/linux/ext4_fs_sb.h 2007-06-11 17:28:15.000000000 -0700
+++ linux-2.6.22-rc4/include/linux/ext4_fs_sb.h 2007-06-11 17:39:05.000000000 -0700
@@ -79,6 +79,7 @@
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
+ unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */

#ifdef EXTENTS_STATS
/* ext4 extents stats */




2007-07-03 06:38:50

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp



Mingming Cao wrote:
> This patch is a spinoff of the old nanosecond patches.
>
> It includes some cleanups and addition of a creation timestamp. The
> EXT3_FEATURE_RO_COMPAT_EXTRA_ISIZE flag has also been added along with
> s

Should be EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE

-aneesh

2007-07-03 10:41:15

by Kalpak Shah

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Sun, 2007-07-01 at 03:36 -0400, Mingming Cao wrote:
> +
> +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
> +do { \
> + (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
> + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
> + ext4_decode_extra_time(&(inode)->xtime, \
> + raw_inode->xtime ## _extra); \
> +} while (0)
> +
> +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
> +do { \
> + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
> + (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
> + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
> + ext4_decode_extra_time(&(einode)->xtime, \
> + raw_inode->xtime ## _extra); \
> +} while (0)
> +

This nanosecond patch seems to be missing the fix below which is required for http://bugzilla.kernel.org/show_bug.cgi?id=5079

If the timestamp is set to before epoch i.e. a negative timestamp then the file may have its date set into the future on 64-bit systems. So when the timestamp is read it must be cast as signed.

Index: linux-2.6.21/include/linux/ext4_fs.h
===================================================================
--- linux-2.6.21.orig/include/linux/ext4_fs.h
+++ linux-2.6.21/include/linux/ext4_fs.h
@@ -390,7 +390,7 @@ do { \

#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
do { \
- (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
+ (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
ext4_decode_extra_time(&(inode)->xtime, \
raw_inode->xtime ## _extra); \
@@ -399,7 +399,8 @@ do { \
#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
- (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
+ (einode)->xtime.tv_sec = \
+ (signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
ext4_decode_extra_time(&(einode)->xtime, \
raw_inode->xtime ## _extra); \


Thanks,
Kalpak.



2007-07-03 10:28:26

by Kalpak Shah

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Sun, 2007-07-01 at 03:36 -0400, Mingming Cao wrote:
> +
> +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
> +do { \
> + (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
> + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
> + ext4_decode_extra_time(&(inode)->xtime, \
> + raw_inode->xtime ## _extra); \
> +} while (0)
> +
> +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
> +do { \
> + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
> + (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
> + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
> + ext4_decode_extra_time(&(einode)->xtime, \
> + raw_inode->xtime ## _extra); \
> +} while (0)
> +

This nanosecond patch seems to be missing the fix below which is required for http://bugzilla.kernel.org/show_bug.cgi?id=5079

If the timestamp is set to before epoch i.e. a negative timestamp then the file may have its date set into the future on 64-bit systems. So when the timestamp is read it must be cast as signed.

Index: linux-2.6.21/include/linux/ext4_fs.h
===================================================================
--- linux-2.6.21.orig/include/linux/ext4_fs.h
+++ linux-2.6.21/include/linux/ext4_fs.h
@@ -390,7 +390,7 @@ do { \

#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
do { \
- (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
+ (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
ext4_decode_extra_time(&(inode)->xtime, \
raw_inode->xtime ## _extra); \
@@ -399,7 +399,8 @@ do { \
#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
- (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
+ (einode)->xtime.tv_sec = \
+ (signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
ext4_decode_extra_time(&(einode)->xtime, \
raw_inode->xtime ## _extra); \


Thanks,
Kalpak.


2007-07-04 06:32:54

by Mingming Cao

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Tue, 2007-07-03 at 15:58 +0530, Kalpak Shah wrote:
> On Sun, 2007-07-01 at 03:36 -0400, Mingming Cao wrote:
> > +
> > +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
> > +do { \
> > + (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
> > + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
> > + ext4_decode_extra_time(&(inode)->xtime, \
> > + raw_inode->xtime ## _extra); \
> > +} while (0)
> > +
> > +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
> > +do { \
> > + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
> > + (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
> > + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
> > + ext4_decode_extra_time(&(einode)->xtime, \
> > + raw_inode->xtime ## _extra); \
> > +} while (0)
> > +
>
> This nanosecond patch seems to be missing the fix below which is required for http://bugzilla.kernel.org/show_bug.cgi?id=5079
>
> If the timestamp is set to before epoch i.e. a negative timestamp then the file may have its date set into the future on 64-bit systems. So when the timestamp is read it must be cast as signed.

Missed this one.
Thanks. Will update ext4 patch queue tonight with this fix.

Mingming

2007-07-04 06:36:10

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp



Mingming Cao wrote:
> On Tue, 2007-07-03 at 15:58 +0530, Kalpak Shah wrote:
>> On Sun, 2007-07-01 at 03:36 -0400, Mingming Cao wrote:
>>> +
>>> +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
>>> +do { \
>>> + (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
>>> + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
>>> + ext4_decode_extra_time(&(inode)->xtime, \
>>> + raw_inode->xtime ## _extra); \
>>> +} while (0)
>>> +
>>> +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
>>> +do { \
>>> + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
>>> + (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
>>> + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
>>> + ext4_decode_extra_time(&(einode)->xtime, \
>>> + raw_inode->xtime ## _extra); \
>>> +} while (0)
>>> +
>> This nanosecond patch seems to be missing the fix below which is required for http://bugzilla.kernel.org/show_bug.cgi?id=5079
>>
>> If the timestamp is set to before epoch i.e. a negative timestamp then the file may have its date set into the future on 64-bit systems. So when the timestamp is read it must be cast as signed.
>
> Missed this one.
> Thanks. Will update ext4 patch queue tonight with this fix.
>
>


IIRC in the conference call it was decided to not to apply this patch. Andreas may be able to update better.

-aneesh

2007-07-04 07:24:18

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp



Aneesh Kumar K.V wrote:
>
>
> Mingming Cao wrote:
>> On Tue, 2007-07-03 at 15:58 +0530, Kalpak Shah wrote:
>>> On Sun, 2007-07-01 at 03:36 -0400, Mingming Cao wrote:
>>>> +
>>>> +#define EXT4_INODE_GET_XTIME(xtime, inode,
>>>> raw_inode) \
>>>> +do { \
>>>> + (inode)->xtime.tv_sec =
>>>> le32_to_cpu((raw_inode)->xtime); \
>>>> + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ##
>>>> _extra)) \
>>>> + ext4_decode_extra_time(&(inode)->xtime, \
>>>> + raw_inode->xtime ## _extra); \
>>>> +} while (0)
>>>> +
>>>> +#define EXT4_EINODE_GET_XTIME(xtime, einode,
>>>> raw_inode) \
>>>> +do { \
>>>> + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
>>>> + (einode)->xtime.tv_sec =
>>>> le32_to_cpu((raw_inode)->xtime); \
>>>> + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ##
>>>> _extra)) \
>>>> + ext4_decode_extra_time(&(einode)->xtime, \
>>>> + raw_inode->xtime ## _extra); \
>>>> +} while (0)
>>>> +
>>> This nanosecond patch seems to be missing the fix below which is
>>> required for http://bugzilla.kernel.org/show_bug.cgi?id=5079
>>>
>>> If the timestamp is set to before epoch i.e. a negative timestamp
>>> then the file may have its date set into the future on 64-bit
>>> systems. So when the timestamp is read it must be cast as signed.
>>
>> Missed this one.
>> Thanks. Will update ext4 patch queue tonight with this fix.
>>
>>
>
>
> IIRC in the conference call it was decided to not to apply this patch.
> Andreas may be able to update better.
>


Looking at the git log i understand the core patch got applied to ext4 tree with the comment
from Andreas. So may be we can apply this patch also.


commit 4d7bf11d649c72621ca31b8ea12b9c94af380e63

Andreas says:

This patch is now treating timestamps with the high bit set as negative
times (before Jan 1, 1970). This means we lose 1/2 of the possible range
of timestamps (lopping off 68 years before unix timestamp overflow -
now only 30 years away :-) to handle the extremely rare case of setting
timestamps into the distant past.

If we are only interested in fixing the underflow case, we could just
limit the values to 0 instead of storing negative values. At worst this
will skew the timestamp by a few hours for timezones in the far east
(files would still show Jan 1, 1970 in "ls -l" output).

That said, it seems 32-bit systems (mine at least) allow files to be set
into the past (01/01/1907 works fine) so it seems this patch is bringing
the x86_64 behaviour into sync with other kernels.

On the plus side, we have a patch that is ready to add nanosecond timestamps
to ext3 and as an added bonus adds 2 high bits to the on-disk timestamp so
this extends the maximum date to 2242.

NOTE: The conference call i mentioned above is http://ext4.wiki.kernel.org/index.php/Ext4_Developer%27s_Conference_Call


-aneesh

2007-07-04 16:44:32

by Andreas Dilger

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Jul 04, 2007 12:06 +0530, Aneesh Kumar K.V wrote:
> Mingming Cao wrote:
> >On Tue, 2007-07-03 at 15:58 +0530, Kalpak Shah wrote:
> >>On Sun, 2007-07-01 at 03:36 -0400, Mingming Cao wrote:
> >>>+
> >>>+#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
> >>>+do { \
> >>>+ (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime);
> >>>\
> >>>+ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))
> >>>\
> >>>+ ext4_decode_extra_time(&(inode)->xtime,
> >>>\
> >>>+ raw_inode->xtime ## _extra);
> >>>\
> >>>+} while (0)
> >>>+
> >>>+#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
> >>>+do { \
> >>>+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))
> >>>\
> >>>+ (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime);
> >>>\
> >>>+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))
> >>>\
> >>>+ ext4_decode_extra_time(&(einode)->xtime,
> >>>\
> >>>+ raw_inode->xtime ## _extra);
> >>>\
> >>>+} while (0)
> >>>+
> >>This nanosecond patch seems to be missing the fix below which is
> >>required for http://bugzilla.kernel.org/show_bug.cgi?id=5079
> >>
> >>If the timestamp is set to before epoch i.e. a negative timestamp then
> >>the file may have its date set into the future on 64-bit systems. So
> >>when the timestamp is read it must be cast as signed.
> >
> >Missed this one.
> >Thanks. Will update ext4 patch queue tonight with this fix.
>
> IIRC in the conference call it was decided to not to apply this patch.
> Andreas may be able to update better.

I wasn't on the most recent concall, and I've forgotten the details of
any discussion on a previous concall.

Care really needs to be taken here that negative timestamps are handled
properly. We can take the sign bit from the inode i_*time, but then we
need to change the load/save of the extra time to use a shift of 31
instead of 32. If we overflow the epoch we have to ensure that the high
bits of the seconds is handled correctly.

Cheers, Andreas
--
Andreas Dilger
Principal Software Engineer
Cluster File Systems, Inc.


2007-07-10 23:30:42

by Andrew Morton

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Sun, 01 Jul 2007 03:36:56 -0400
Mingming Cao <[email protected]> wrote:

> This patch is a spinoff of the old nanosecond patches.

I don't know what the "old nanosecond patches" are. A link to a suitable
changlog for those patches would do in a pinch. Preferable would be to
write a proper changelog for this patch.

> It includes some cleanups and addition of a creation timestamp. The
> EXT3_FEATURE_RO_COMPAT_EXTRA_ISIZE flag has also been added along with
> s_{min, want}_extra_isize fields in struct ext3_super_block.
>
> Signed-off-by: Andreas Dilger <[email protected]>
> Signed-off-by: Kalpak Shah <[email protected]>
> Signed-off-by: Eric Sandeen <[email protected]>
> Signed-off-by: Dave Kleikamp <[email protected]>
> Signed-off-by: Mingming Cao <[email protected]>
>
> Index: linux-2.6.22-rc4/fs/ext4/ialloc.c

Please include diffstat output when preparing patches.

> +
> +#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
> + ((offsetof(typeof(*ext4_inode), field) + \
> + sizeof((ext4_inode)->field)) \
> + <= (EXT4_GOOD_OLD_INODE_SIZE + \
> + (einode)->i_extra_isize)) \

Please add explanatory commentary to EXT4_FITS_IN_INODE(): tell readers
under what circumstances something will not fit in an inode and what the
consequences of this are.

> +static inline __le32 ext4_encode_extra_time(struct timespec *time)
> +{
> + return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
> + time->tv_sec >> 32 : 0) |
> + ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
> +}
> +
> +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
> +{
> + if (sizeof(time->tv_sec) > 4)
> + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
> + << 32;
> + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
> +}

Consider uninlining these functions.

> +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
> +do { \
> + (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
> + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
> + (raw_inode)->xtime ## _extra = \
> + ext4_encode_extra_time(&(inode)->xtime); \
> +} while (0)
> +
> +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
> +do { \
> + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
> + (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
> + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
> + (raw_inode)->xtime ## _extra = \
> + ext4_encode_extra_time(&(einode)->xtime); \
> +} while (0)
> +
> +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
> +do { \
> + (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
> + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
> + ext4_decode_extra_time(&(inode)->xtime, \
> + raw_inode->xtime ## _extra); \
> +} while (0)
> +
> +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
> +do { \
> + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
> + (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
> + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
> + ext4_decode_extra_time(&(einode)->xtime, \
> + raw_inode->xtime ## _extra); \
> +} while (0)

Ugly. I expect these could be implemented as plain old C functions.
Caller could pass in the address of the ext4_inode field which the function
is to operate upon.

> #if defined(__KERNEL__) || defined(__linux__)

(What's the __linux__ for?)

> #define i_reserved1 osd1.linux1.l_i_reserved1
> #define i_frag osd2.linux2.l_i_frag
> @@ -539,6 +603,13 @@
> return container_of(inode, struct ext4_inode_info, vfs_inode);
> }
>
> +static inline struct timespec ext4_current_time(struct inode *inode)
> +{
> + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
> + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
> +}

Now, I've forgotten how this works. Remind me, please. Can ext4
filesystems ever have one-second timestamp granularity? If so, how does
one cause that to come about?

> --- linux-2.6.22-rc4.orig/include/linux/ext4_fs_i.h 2007-06-11 17:22:15.000000000 -0700
> +++ linux-2.6.22-rc4/include/linux/ext4_fs_i.h 2007-06-11 17:39:05.000000000 -0700
> @@ -153,6 +153,7 @@
>
> unsigned long i_ext_generation;
> struct ext4_ext_cache i_cached_extent;
> + struct timespec i_crtime;
> };

It is unobvious what this field does. Please prefer to add commentary to
_all_ struct fields - it really helps.

I thought checkpatch was going to have a little whine about that but the
version I have here doesn't.

>
> #endif /* _LINUX_EXT4_FS_I */
> Index: linux-2.6.22-rc4/include/linux/ext4_fs_sb.h
> ===================================================================
> --- linux-2.6.22-rc4.orig/include/linux/ext4_fs_sb.h 2007-06-11 17:28:15.000000000 -0700
> +++ linux-2.6.22-rc4/include/linux/ext4_fs_sb.h 2007-06-11 17:39:05.000000000 -0700
> @@ -79,6 +79,7 @@
> char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
> int s_jquota_fmt; /* Format of quota to use */
> #endif
> + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
>
> #ifdef EXTENTS_STATS

OK, I can kind-of see how this is working, but some overall description of
how the inode sizing design operates would be helpful. It would certainly
make reviewing of this proposed change more fruitful. Perhaps that new
comment over EXT4_FITS_IN_INODE() would be a suitable place.

2007-07-11 02:00:41

by Mingming Cao

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Tue, 2007-07-10 at 16:30 -0700, Andrew Morton wrote:
> On Sun, 01 Jul 2007 03:36:56 -0400
> Mingming Cao <[email protected]> wrote:
>
> > This patch is a spinoff of the old nanosecond patches.
>
> I don't know what the "old nanosecond patches" are. A link to a suitable
> changlog for those patches would do in a pinch. Preferable would be to
> write a proper changelog for this patch.
>
I found the original patch
http://marc.info/?l=linux-ext4&m=115091699809181&w=2

Andreas or Kalpak, is changelog from the original patch is accurate to
apply here?

Mingming

2007-07-11 11:14:49

by Andreas Dilger

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Jul 10, 2007 22:00 -0400, Mingming Cao wrote:
> On Tue, 2007-07-10 at 16:30 -0700, Andrew Morton wrote:
> > On Sun, 01 Jul 2007 03:36:56 -0400
> > Mingming Cao <[email protected]> wrote:
> >
> > > This patch is a spinoff of the old nanosecond patches.
> >
> > I don't know what the "old nanosecond patches" are. A link to a suitable
> > changlog for those patches would do in a pinch. Preferable would be to
> > write a proper changelog for this patch.
> >
> I found the original patch
> http://marc.info/?l=linux-ext4&m=115091699809181&w=2
>
> Andreas or Kalpak, is changelog from the original patch is accurate to
> apply here?

Mostly, yes, but the name of the feature flag has changed.

Cheers, Andreas
--
Andreas Dilger
Principal Software Engineer
Cluster File Systems, Inc.

2007-07-11 11:28:54

by Andreas Dilger

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Jul 10, 2007 16:30 -0700, Andrew Morton wrote:
> > +#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
> > + ((offsetof(typeof(*ext4_inode), field) + \
> > + sizeof((ext4_inode)->field)) \
> > + <= (EXT4_GOOD_OLD_INODE_SIZE + \
> > + (einode)->i_extra_isize)) \
>
> Please add explanatory commentary to EXT4_FITS_IN_INODE(): tell readers
> under what circumstances something will not fit in an inode and what the
> consequences of this are.

/* Extended fields will fit into an inode if the filesystem was formatted
* with large inodes (-I 256 or larger) and there are not currently EAs
* consuming all of the available space. For new inodes we always reserve
* enough space for the kernel's known extended fields, but for inodes
* created with an old kernel this might not have been the case. None of
* the extended inode fields is critical for correct filesystem operation.
*/

> > +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
> > +do { \
> > + (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
> > + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
> > + ext4_decode_extra_time(&(inode)->xtime, \
> > + raw_inode->xtime ## _extra); \
> > +} while (0)
>
> Ugly. I expect these could be implemented as plain old C functions.
> Caller could pass in the address of the ext4_inode field which the function
> is to operate upon.

We thought about that also, but then the caller needs to do all of the
pointer gymnastics themselves like:

ext4_inode_get_xtime(&inode->i_ctime, &inode->i_ctime_extra,
&raw_inode->i_ctime, &raw_inode->i_ctime_extra)

instead of the current:

EXT4_INODE_GET_XTIME(ctime, inode, raw_inode);

IMHO it is preferrable to make the multiple callsites more readable than
the macros.

> > #if defined(__KERNEL__) || defined(__linux__)
>
> (What's the __linux__ for?)
>
> > #define i_reserved1 osd1.linux1.l_i_reserved1
> > #define i_frag osd2.linux2.l_i_frag

This is actually unrelated to the current patch, just part of the context.
AFAIK, this is historical, so that the kernel and e2fsprogs can use the
same ext2_fs.h header. I don't think it is really needed, but such cleanup
shouldn't be a part of this patch either.

> > +static inline struct timespec ext4_current_time(struct inode *inode)
> > +{
> > + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
> > + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
> > +}
>
> Now, I've forgotten how this works. Remind me, please. Can ext4
> filesystems ever have one-second timestamp granularity? If so, how does
> one cause that to come about?

Yes, this is possible if an ext2/3/4 filesystem is formatted with 128-byte
inodes (which is the default for all but ext4) and this fs is mounted as
ext4dev. The inodes can never hold the extra time information (FITS_IN_INODE
check above) so the superblock limits the timestamp resolution to 1s in that
case.

> > @@ -153,6 +153,7 @@
> >
> > unsigned long i_ext_generation;
> > struct ext4_ext_cache i_cached_extent;
> > + struct timespec i_crtime;
> > };
>
> It is unobvious what this field does. Please prefer to add commentary to
> _all_ struct fields - it really helps.

It is the inode creation time. This is useful for debug/forensic purposes,
and at some point there will be an API so that Samba can use it also.

> > #endif /* _LINUX_EXT4_FS_I */
> > Index: linux-2.6.22-rc4/include/linux/ext4_fs_sb.h
> > ===================================================================
> > --- linux-2.6.22-rc4.orig/include/linux/ext4_fs_sb.h 2007-06-11 17:28:15.000000000 -0700
> > +++ linux-2.6.22-rc4/include/linux/ext4_fs_sb.h 2007-06-11 17:39:05.000000000 -0700
> > @@ -79,6 +79,7 @@
> > char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
> > int s_jquota_fmt; /* Format of quota to use */
> > #endif
> > + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
>
> OK, I can kind-of see how this is working, but some overall description of
> how the inode sizing design operates would be helpful. It would certainly
> make reviewing of this proposed change more fruitful. Perhaps that new
> comment over EXT4_FITS_IN_INODE() would be a suitable place.

Hmm, I'm sure there were emails on the topic, but they aren't attached to
the patch. s_want_extra_isize is just an override for sizeof(ext4_inode)
in case the sysadmin wants to reserve more fields in new inodes. There is
also s_min_extra_isize which is what the kernel and e2fsck guarantee that
will be available in all in-use inodes, if RO_COMPAT_EXTRA_ISIZE is set
(ro-compat so that older kernels can't create inodes with a smaller
extra_isize). That feature is only enabled if requested by the sysadmin.

Cheers, Andreas
--
Andreas Dilger
Principal Software Engineer
Cluster File Systems, Inc.

2007-07-12 12:57:51

by Kalpak Shah

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Tue, 2007-07-10 at 16:30 -0700, Andrew Morton wrote:
> On Sun, 01 Jul 2007 03:36:56 -0400
> Mingming Cao <[email protected]> wrote:
>
> > This patch is a spinoff of the old nanosecond patches.
>
> I don't know what the "old nanosecond patches" are. A link to a suitable
> changlog for those patches would do in a pinch. Preferable would be to
> write a proper changelog for this patch.

The incremental patch contains a proper changelog describing the patch.

> > +static inline __le32 ext4_encode_extra_time(struct timespec *time)
> > +{
> > + return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
> > + time->tv_sec >> 32 : 0) |
> > + ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
> > +}
> > +
> > +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
> > +{
> > + if (sizeof(time->tv_sec) > 4)
> > + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
> > + << 32;
> > + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
> > +}
>
> Consider uninlining these functions.

Done.

This patch adds comments, few small corrections and an appropriate
changelog entry.

Thanks,
Kalpak.


Attachments:
ext4_nanosecond_correction.patch (3.34 kB)

2007-07-13 04:30:06

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp



Kalpak Shah wrote:
> On Tue, 2007-07-10 at 16:30 -0700, Andrew Morton wrote:
>> On Sun, 01 Jul 2007 03:36:56 -0400
>> Mingming Cao <[email protected]> wrote:
>>
>>> This patch is a spinoff of the old nanosecond patches.
>> I don't know what the "old nanosecond patches" are. A link to a suitable
>> changlog for those patches would do in a pinch. Preferable would be to
>> write a proper changelog for this patch.
>
> The incremental patch contains a proper changelog describing the patch.
>


Instead of putting incremental patches it would be nice if we can have replacement patches.
for the already existing patches with the comments addressed. For example if we have a
review comment on the patch message ( commit log ) then adding an incremental patch doesn't help.


-aneesh

2007-07-13 07:04:27

by Kalpak Shah

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Fri, 2007-07-13 at 09:59 +0530, Aneesh Kumar K.V wrote:
>
> Kalpak Shah wrote:
> > On Tue, 2007-07-10 at 16:30 -0700, Andrew Morton wrote:
> >> On Sun, 01 Jul 2007 03:36:56 -0400
> >> Mingming Cao <[email protected]> wrote:
> >>
> >>> This patch is a spinoff of the old nanosecond patches.
> >> I don't know what the "old nanosecond patches" are. A link to a suitable
> >> changlog for those patches would do in a pinch. Preferable would be to
> >> write a proper changelog for this patch.
> >
> > The incremental patch contains a proper changelog describing the patch.
> >
>
>
> Instead of putting incremental patches it would be nice if we can have replacement patches.
> for the already existing patches with the comments addressed. For example if we have a
> review comment on the patch message ( commit log ) then adding an incremental patch doesn't help.

I think that it would be easier to review just the changes that have
been made to the patches instead of having people go through the entire
patch again. I was hoping that someone with write access to ext4-git
would update the commit logs.

If replacement patches are preferred, then I will send them again.

Thanks,
Kalpak.

>
>
> -aneesh
> -
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2007-07-13 21:46:58

by Mingming Cao

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Fri, 2007-07-13 at 12:35 +0530, Kalpak Shah wrote:
> On Fri, 2007-07-13 at 09:59 +0530, Aneesh Kumar K.V wrote:
> >
> > Kalpak Shah wrote:
> > > On Tue, 2007-07-10 at 16:30 -0700, Andrew Morton wrote:
> > >> On Sun, 01 Jul 2007 03:36:56 -0400
> > >> Mingming Cao <[email protected]> wrote:
> > >>
> > >>> This patch is a spinoff of the old nanosecond patches.
> > >> I don't know what the "old nanosecond patches" are. A link to a suitable
> > >> changlog for those patches would do in a pinch. Preferable would be to
> > >> write a proper changelog for this patch.
> > >
> > > The incremental patch contains a proper changelog describing the patch.
> > >
> >
> >
> > Instead of putting incremental patches it would be nice if we can have replacement patches.
> > for the already existing patches with the comments addressed. For example if we have a
> > review comment on the patch message ( commit log ) then adding an incremental patch doesn't help.
>
> I think that it would be easier to review just the changes that have
> been made to the patches instead of having people go through the entire
> patch again. I was hoping that someone with write access to ext4-git
> would update the commit logs.
>
> If replacement patches are preferred, then I will send them again.
>

No need, I already fold your fix patch to the parent patches, so in the
updated ext4-patch-queue it saved the updated nanosecond patch.

> Thanks,
> Kalpak.
>
> >
> >
> > -aneesh
> > -
> > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
>


2007-07-17 00:49:55

by Mingming Cao

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Tue, 2007-07-10 at 16:30 -0700, Andrew Morton wrote:
> On Sun, 01 Jul 2007 03:36:56 -0400
> Mingming Cao <[email protected]> wrote:
>
> > This patch is a spinoff of the old nanosecond patches.
>
> I don't know what the "old nanosecond patches" are. A link to a suitable
> changlog for those patches would do in a pinch. Preferable would be to
> write a proper changelog for this patch.
>
> > It includes some cleanups and addition of a creation timestamp. The
> > EXT3_FEATURE_RO_COMPAT_EXTRA_ISIZE flag has also been added along with
> > s_{min, want}_extra_isize fields in struct ext3_super_block.
> >
> > Signed-off-by: Andreas Dilger <[email protected]>
> > Signed-off-by: Kalpak Shah <[email protected]>
> > Signed-off-by: Eric Sandeen <[email protected]>
> > Signed-off-by: Dave Kleikamp <[email protected]>
> > Signed-off-by: Mingming Cao <[email protected]>
> >
> > Index: linux-2.6.22-rc4/fs/ext4/ialloc.c
>
> Please include diffstat output when preparing patches.
>
> > +
> > +#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
> > + ((offsetof(typeof(*ext4_inode), field) + \
> > + sizeof((ext4_inode)->field)) \
> > + <= (EXT4_GOOD_OLD_INODE_SIZE + \
> > + (einode)->i_extra_isize)) \
>
> Please add explanatory commentary to EXT4_FITS_IN_INODE(): tell readers
> under what circumstances something will not fit in an inode and what the
> consequences of this are.
>
> > +static inline __le32 ext4_encode_extra_time(struct timespec *time)
> > +{
> > + return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
> > + time->tv_sec >> 32 : 0) |
> > + ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
> > +}
> > +
> > +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
> > +{
> > + if (sizeof(time->tv_sec) > 4)
> > + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
> > + << 32;
> > + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
> > +}
>
> Consider uninlining these functions.
>
I got compile warining after apply Kalpal's update nanosecond patch,
which makes these two function inline. It complains these functions are
defined but not used. It's being used only in the following
micros(EXT4_INODE_SET_XTIME etc). So if the .c file included the
ext4_fs.h but not using the micros, the compile will think these two
functions are not used.

Mingming

> > +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
> > +do { \
> > + (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
> > + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
> > + (raw_inode)->xtime ## _extra = \
> > + ext4_encode_extra_time(&(inode)->xtime); \
> > +} while (0)
> > +
> > +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
> > +do { \
> > + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
> > + (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
> > + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
> > + (raw_inode)->xtime ## _extra = \
> > + ext4_encode_extra_time(&(einode)->xtime); \
> > +} while (0)
> > +
> > +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
> > +do { \
> > + (inode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
> > + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
> > + ext4_decode_extra_time(&(inode)->xtime, \
> > + raw_inode->xtime ## _extra); \
> > +} while (0)
> > +
> > +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
> > +do { \
> > + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
> > + (einode)->xtime.tv_sec = le32_to_cpu((raw_inode)->xtime); \
> > + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
> > + ext4_decode_extra_time(&(einode)->xtime, \
> > + raw_inode->xtime ## _extra); \
> > +} while (0)
>
> Ugly. I expect these could be implemented as plain old C functions.
> Caller could pass in the address of the ext4_inode field which the function
> is to operate upon.
>
> > #if defined(__KERNEL__) || defined(__linux__)
>
> (What's the __linux__ for?)
>
> > #define i_reserved1 osd1.linux1.l_i_reserved1
> > #define i_frag osd2.linux2.l_i_frag
> > @@ -539,6 +603,13 @@
> > return container_of(inode, struct ext4_inode_info, vfs_inode);
> > }
> >
> > +static inline struct timespec ext4_current_time(struct inode *inode)
> > +{
> > + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
> > + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
> > +}
>
> Now, I've forgotten how this works. Remind me, please. Can ext4
> filesystems ever have one-second timestamp granularity? If so, how does
> one cause that to come about?
>
> > --- linux-2.6.22-rc4.orig/include/linux/ext4_fs_i.h 2007-06-11 17:22:15.000000000 -0700
> > +++ linux-2.6.22-rc4/include/linux/ext4_fs_i.h 2007-06-11 17:39:05.000000000 -0700
> > @@ -153,6 +153,7 @@
> >
> > unsigned long i_ext_generation;
> > struct ext4_ext_cache i_cached_extent;
> > + struct timespec i_crtime;
> > };
>
> It is unobvious what this field does. Please prefer to add commentary to
> _all_ struct fields - it really helps.
>
> I thought checkpatch was going to have a little whine about that but the
> version I have here doesn't.
>
> >
> > #endif /* _LINUX_EXT4_FS_I */
> > Index: linux-2.6.22-rc4/include/linux/ext4_fs_sb.h
> > ===================================================================
> > --- linux-2.6.22-rc4.orig/include/linux/ext4_fs_sb.h 2007-06-11 17:28:15.000000000 -0700
> > +++ linux-2.6.22-rc4/include/linux/ext4_fs_sb.h 2007-06-11 17:39:05.000000000 -0700
> > @@ -79,6 +79,7 @@
> > char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
> > int s_jquota_fmt; /* Format of quota to use */
> > #endif
> > + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
> >
> > #ifdef EXTENTS_STATS
>
> OK, I can kind-of see how this is working, but some overall description of
> how the inode sizing design operates would be helpful. It would certainly
> make reviewing of this proposed change more fruitful. Perhaps that new
> comment over EXT4_FITS_IN_INODE() would be a suitable place.


2007-07-17 10:04:09

by Kalpak Shah

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Mon, 2007-07-16 at 17:49 -0700, Mingming Cao wrote:
> On Tue, 2007-07-10 at 16:30 -0700, Andrew Morton wrote:
> > On Sun, 01 Jul 2007 03:36:56 -0400
> > Mingming Cao <[email protected]> wrote:
> > > +static inline __le32 ext4_encode_extra_time(struct timespec *time)
> > > +{
> > > + return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
> > > + time->tv_sec >> 32 : 0) |
> > > + ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
> > > +}
> > > +
> > > +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
> > > +{
> > > + if (sizeof(time->tv_sec) > 4)
> > > + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
> > > + << 32;
> > > + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
> > > +}
> >
> > Consider uninlining these functions.
> >
> I got compile warining after apply Kalpal's update nanosecond patch,
> which makes these two function inline. It complains these functions are
> defined but not used. It's being used only in the following
> micros(EXT4_INODE_SET_XTIME etc). So if the .c file included the
> ext4_fs.h but not using the micros, the compile will think these two
> functions are not used.

The compile warnings were introduced because the functions were
uninlined. So we can either keep these functions inlined or consider
adding a "__used" attribute to these two functions.

Thanks,
Kalpak.

2007-07-17 19:08:43

by Mingming Cao

[permalink] [raw]
Subject: Re: [EXT4 set 3][PATCH 1/1] ext4 nanosecond timestamp

On Tue, 2007-07-17 at 15:29 +0530, Kalpak Shah wrote:
> On Mon, 2007-07-16 at 17:49 -0700, Mingming Cao wrote:
> > On Tue, 2007-07-10 at 16:30 -0700, Andrew Morton wrote:
> > > On Sun, 01 Jul 2007 03:36:56 -0400
> > > Mingming Cao <[email protected]> wrote:
> > > > +static inline __le32 ext4_encode_extra_time(struct timespec *time)
> > > > +{
> > > > + return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
> > > > + time->tv_sec >> 32 : 0) |
> > > > + ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
> > > > +}
> > > > +
> > > > +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
> > > > +{
> > > > + if (sizeof(time->tv_sec) > 4)
> > > > + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
> > > > + << 32;
> > > > + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
> > > > +}
> > >
> > > Consider uninlining these functions.
> > >
> > I got compile warining after apply Kalpal's update nanosecond patch,
> > which makes these two function inline. It complains these functions are
> > defined but not used. It's being used only in the following
> > micros(EXT4_INODE_SET_XTIME etc). So if the .c file included the
> > ext4_fs.h but not using the micros, the compile will think these two
> > functions are not used.
>
> The compile warnings were introduced because the functions were
> uninlined. So we can either keep these functions inlined or consider
> adding a "__used" attribute to these two functions.
>
okay for now I keep these functions inlined.

> Thanks,
> Kalpak.
>


2007-07-17 20:44:39

by Mingming Cao

[permalink] [raw]
Subject: ext4 patch queue updated

Hi Ted,

The ext4 patch queue has been updated with updates to address latest
review comments. http://repo.or.cz/w/ext4-patch-queue.git

The series is being reordered: the patches still has outstanding
comments are moved to the end of the series. Per our discussion on
Monday, tThose patches are ready for submission are

# Rebased the patches to 2.6.22

# fallocate() syscall patches and ext4 fallocate() implementation
# Missing manpages
#ext4-fallocate-1-man-page
ext4-fallocate-2-syscall_i386_amd64_ppc
ext4-fallocate-3-ext4_support
ext4-fallocate-4-uninit_write_support
ext4-fallocate-5-new-ondisk-format

# Add mount option to turn off extents
ext4_noextent_mount_opt.patch

# Mounted ext4dev fs with extents by default for testing purpose,
# for Ext4 product release, extents mount option
# will be turn on only if the fs has EXTENTS feature on
ext4_extents_on_by_default.patch

# Propagate inode flags
ext4-propagate_flags.patch

# Add extent sanity checks
ext4-extent-sanity-checks.patch

# Bug fix:set 64bit JBD2 feature on >32bit ext4 fs
ext4_set_jbd2_64bit_feature.patch

# Fix: Rename CONFIG_JBD_DEBUG to CONFIG_JBD2_DEBUG
jbd2_config_jbd2_debug_fix.patch

# Export jbd2-debug via debugfs
jbd2_move_jbd2_debug_to_debugfs.patch

# Nanosecond timestamp support
ext4-nanosecond-patch
ext4_negative_timestamp_handle_fix.patch

# New patch to expand inode i_extra_isize to support features
# in high part of inode (>128 bytes)
ext4_expand_inode_extra_isize.patch

# Remove 32000 subdirs limit.
ext4_remove_subdirs_limit.patch

# Various Cleanups
ext4-zero_user_page.patch
is_power_of_2-ext4-superc.patch
ext4-remove-extra-is_rdonly-check.patch
ext4_extent_compilation_fixes.patch
ext4_extent_macros_cleanup.patch

#JBD->JBD2 naming cleanups