This is the hopefully the final set of on-disk format changes. The main
change from the last one is I split up the inode checksum field so that
half of the checksum is in the first 128 bytes of the inode (with 16
bits left over as a spare for emergencies) and the high 16 bits of the
crc32 is using what was previously the i_pad field in a 256 byte (and
larger) inode.
Please scream if you see any problems, because otherwise this is what's
going in!
The second patch adds support so that debugfs can easily handle setting
data objects which are split between a foo_hi and foo_lo fields on disk.
Along the way I also fixed some bugs where we weren't byte-swapping the
newer fields in the 256-byte inode (i.e., ctime_extra, mtime_extra,
etc.).
- Ted
Theodore Ts'o (2):
libext2fs: add metadata checksum and snapshot feature flags
debugfs: add 64-bit support to the set_field commands
debugfs/debugfs.h | 2 +
debugfs/set_fields.c | 452 ++++++++++++++++++++++++++++---------------
debugfs/util.c | 16 ++
e2fsck/pass1.c | 4 +-
lib/e2p/feature.c | 4 +
lib/e2p/ls.c | 4 +
lib/ext2fs/ext2_fs.h | 31 ++-
lib/ext2fs/swapfs.c | 16 ++-
lib/ext2fs/tst_inode_size.c | 3 +-
lib/ext2fs/tst_super_size.c | 1 +
10 files changed, 359 insertions(+), 174 deletions(-)
--
1.7.4.1.22.gec8e1.dirty
Reserve EXT4_FEATURE_RO_COMPAT_METADATA_CSUM and
EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP. Also reserve fields in the
superblock and the inode for the checksums. In the block group
descriptor, reserve the exclude bitmap field for the snapshot feature,
and checksums for the inode and block allocation bitmaps.
With this commit, the metadata checksum and exclude bitmap features
should have reserved all of the fields they need in ext4's on-disk
format.
This commit also fixes an a missing byte swap for s_overhead_blocks.
Signed-off-by: "Theodore Ts'o" <[email protected]>
Cc: Darrick J. Wong <[email protected]>
Cc: Amir Goldstein <[email protected]>
---
debugfs/set_fields.c | 3 ++-
e2fsck/pass1.c | 4 ++--
lib/e2p/feature.c | 4 ++++
lib/e2p/ls.c | 4 ++++
lib/ext2fs/ext2_fs.h | 31 ++++++++++++++++++++++---------
lib/ext2fs/swapfs.c | 16 ++++++++++++++--
lib/ext2fs/tst_inode_size.c | 3 ++-
lib/ext2fs/tst_super_size.c | 1 +
8 files changed, 51 insertions(+), 15 deletions(-)
diff --git a/debugfs/set_fields.c b/debugfs/set_fields.c
index ac6bc25..1a5ec7f 100644
--- a/debugfs/set_fields.c
+++ b/debugfs/set_fields.c
@@ -144,6 +144,7 @@ static struct field_set_info super_fields[] = {
{ "usr_quota_inum", &set_sb.s_usr_quota_inum, 4, parse_uint },
{ "grp_quota_inum", &set_sb.s_grp_quota_inum, 4, parse_uint },
{ "overhead_blocks", &set_sb.s_overhead_blocks, 4, parse_uint },
+ { "checksum", &set_sb.s_checksum, 4, parse_uint },
{ 0, 0, 0, 0 }
};
@@ -179,6 +180,7 @@ static struct field_set_info inode_fields[] = {
{ "fsize", &set_inode.osd2.hurd2.h_i_fsize, 1, parse_uint },
{ "uid_high", &set_inode.osd2.linux2.l_i_uid_high, 2, parse_uint },
{ "gid_high", &set_inode.osd2.linux2.l_i_gid_high, 2, parse_uint },
+ { "checksum", &set_inode.osd2.linux2.l_i_checksum_lo, 2, parse_uint },
{ "author", &set_inode.osd2.hurd2.h_i_author, 4, parse_uint },
{ "bmap", NULL, 4, parse_bmap, FLAG_ARRAY },
{ 0, 0, 0, 0 }
@@ -192,7 +194,6 @@ static struct field_set_info ext2_bg_fields[] = {
{ "free_inodes_count", &set_gd.bg_free_inodes_count, 2, parse_uint },
{ "used_dirs_count", &set_gd.bg_used_dirs_count, 2, parse_uint },
{ "flags", &set_gd.bg_flags, 2, parse_uint },
- { "reserved", &set_gd.bg_reserved, 2, parse_uint, FLAG_ARRAY, 2 },
{ "itable_unused", &set_gd.bg_itable_unused, 2, parse_uint },
{ "checksum", &set_gd.bg_checksum, 2, parse_gd_csum },
{ 0, 0, 0, 0 }
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index dd18ade..16ddcda 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -364,8 +364,8 @@ static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
printf("inode #%u, i_extra_size %d\n", pctx->ino,
inode->i_extra_isize);
#endif
- /* i_extra_isize must cover i_extra_isize + i_pad1 at least */
- min = sizeof(inode->i_extra_isize) + sizeof(inode->i_pad1);
+ /* i_extra_isize must cover i_extra_isize + i_checksum_hi at least */
+ min = sizeof(inode->i_extra_isize) + sizeof(inode->i_checksum_hi);
max = EXT2_INODE_SIZE(sb) - EXT2_GOOD_OLD_INODE_SIZE;
/*
* For now we will allow i_extra_isize to be 0, but really
diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
index 16fba53..965fc16 100644
--- a/lib/e2p/feature.c
+++ b/lib/e2p/feature.c
@@ -40,6 +40,8 @@ static struct feature feature_list[] = {
"resize_inode" },
{ E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_LAZY_BG,
"lazy_bg" },
+ { E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP,
+ "snapshot" },
{ E2P_FEATURE_RO_INCOMPAT, EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER,
"sparse_super" },
@@ -59,6 +61,8 @@ static struct feature feature_list[] = {
"quota" },
{ E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_BIGALLOC,
"bigalloc"},
+ { E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM,
+ "metadata_csum"},
{ E2P_FEATURE_INCOMPAT, EXT2_FEATURE_INCOMPAT_COMPRESSION,
"compression" },
diff --git a/lib/e2p/ls.c b/lib/e2p/ls.c
index 0f36f40..aaacdaa 100644
--- a/lib/e2p/ls.c
+++ b/lib/e2p/ls.c
@@ -413,6 +413,10 @@ void list_super2(struct ext2_super_block * sb, FILE *f)
if (sb->s_grp_quota_inum)
fprintf(f, "Group quota inode: %u\n",
sb->s_grp_quota_inum);
+
+ if (sb->s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)
+ fprintf(f, "Checksum: 0x%08x\n",
+ sb->s_checksum);
}
void list_super (struct ext2_super_block * s)
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index 4fec5db..1c86cb2 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -142,7 +142,9 @@ struct ext2_group_desc
__u16 bg_free_inodes_count; /* Free inodes count */
__u16 bg_used_dirs_count; /* Directories count */
__u16 bg_flags;
- __u32 bg_reserved[2];
+ __u32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */
+ __u16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
+ __u16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
__u16 bg_itable_unused; /* Unused inodes count */
__u16 bg_checksum; /* crc16(s_uuid+grouo_num+group_desc)*/
};
@@ -159,7 +161,9 @@ struct ext4_group_desc
__u16 bg_free_inodes_count; /* Free inodes count */
__u16 bg_used_dirs_count; /* Directories count */
__u16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
- __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
+ __u32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */
+ __u16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
+ __u16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
__u16 bg_itable_unused; /* Unused inodes count */
__u16 bg_checksum; /* crc16(sb_uuid+group+desc) */
__u32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
@@ -169,7 +173,10 @@ struct ext4_group_desc
__u16 bg_free_inodes_count_hi;/* Free inodes count MSB */
__u16 bg_used_dirs_count_hi; /* Directories count MSB */
__u16 bg_itable_unused_hi; /* Unused inodes count MSB */
- __u32 bg_reserved2[3];
+ __u32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */
+ __u16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
+ __u16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
+ __u32 bg_reserved;
};
#define EXT2_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not initialized */
@@ -363,7 +370,8 @@ struct ext2_inode {
__u16 l_i_file_acl_high;
__u16 l_i_uid_high; /* these 2 fields */
__u16 l_i_gid_high; /* were reserved2[0] */
- __u32 l_i_reserved2;
+ __u16 l_i_checksum_lo; /* crc32c(uuid+inum+inode) */
+ __u16 l_i_reserved; /* crc32c(uuid+inum+inode) */
} linux2;
struct {
__u8 h_i_frag; /* Fragment number */
@@ -410,7 +418,8 @@ struct ext2_inode_large {
__u16 l_i_file_acl_high;
__u16 l_i_uid_high; /* these 2 fields */
__u16 l_i_gid_high; /* were reserved2[0] */
- __u32 l_i_reserved2;
+ __u16 l_i_checksum_lo; /* crc32c(uuid+inum+inode) */
+ __u16 l_i_reserved;
} linux2;
struct {
__u8 h_i_frag; /* Fragment number */
@@ -422,7 +431,7 @@ struct ext2_inode_large {
} hurd2;
} osd2; /* OS dependent 2 */
__u16 i_extra_isize;
- __u16 i_pad1;
+ __u16 i_checksum_hi; /* crc32c(uuid+inum+inode) */
__u32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
__u32 i_mtime_extra; /* extra Modification time (nsec << 2 | epoch) */
__u32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
@@ -441,7 +450,7 @@ struct ext2_inode_large {
#define i_gid_low i_gid
#define i_uid_high osd2.linux2.l_i_uid_high
#define i_gid_high osd2.linux2.l_i_gid_high
-#define i_reserved2 osd2.linux2.l_i_reserved2
+#define i_checksum osd2.linux2.l_i_checksum
#else
#if defined(__GNU__)
@@ -623,7 +632,8 @@ struct ext2_super_block {
__u32 s_usr_quota_inum; /* inode number of user quota file */
__u32 s_grp_quota_inum; /* inode number of group quota file */
__u32 s_overhead_blocks; /* overhead blocks/clusters in fs */
- __u32 s_reserved[109]; /* Padding to the end of the block */
+ __u32 s_checksum; /* crc32c(superblock) */
+ __u32 s_reserved[108]; /* Padding to the end of the block */
};
#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
@@ -671,7 +681,9 @@ struct ext2_super_block {
#define EXT2_FEATURE_COMPAT_RESIZE_INODE 0x0010
#define EXT2_FEATURE_COMPAT_DIR_INDEX 0x0020
#define EXT2_FEATURE_COMPAT_LAZY_BG 0x0040
-#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE 0x0080
+/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE 0x0080 not used, legacy */
+#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP 0x0100
+
#define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT2_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
@@ -683,6 +695,7 @@ struct ext2_super_block {
#define EXT4_FEATURE_RO_COMPAT_HAS_SNAPSHOT 0x0080
#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
+#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
#define EXT2_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT2_FEATURE_INCOMPAT_FILETYPE 0x0002
diff --git a/lib/ext2fs/swapfs.c b/lib/ext2fs/swapfs.c
index 87b1a2e..d1c4a56 100644
--- a/lib/ext2fs/swapfs.c
+++ b/lib/ext2fs/swapfs.c
@@ -78,6 +78,8 @@ void ext2fs_swap_super(struct ext2_super_block * sb)
sb->s_snapshot_list = ext2fs_swab32(sb->s_snapshot_list);
sb->s_usr_quota_inum = ext2fs_swab32(sb->s_usr_quota_inum);
sb->s_grp_quota_inum = ext2fs_swab32(sb->s_grp_quota_inum);
+ sb->s_overhead_blocks = ext2fs_swab32(sb->s_overhead_blocks);
+ sb->s_checksum = ext2fs_swab32(sb->s_checksum);
for (i=0; i < 4; i++)
sb->s_hash_seed[i] = ext2fs_swab32(sb->s_hash_seed[i]);
@@ -106,6 +108,11 @@ void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)
gdp->bg_free_inodes_count = ext2fs_swab16(gdp->bg_free_inodes_count);
gdp->bg_used_dirs_count = ext2fs_swab16(gdp->bg_used_dirs_count);
gdp->bg_flags = ext2fs_swab16(gdp->bg_flags);
+ gdp->bg_exclude_bitmap_lo = ext2fs_swab32(gdp->bg_exclude_bitmap_lo);
+ gdp->bg_block_bitmap_csum_lo =
+ ext2fs_swab16(gdp->bg_block_bitmap_csum_lo);
+ gdp->bg_inode_bitmap_csum_lo =
+ ext2fs_swab16(gdp->bg_inode_bitmap_csum_lo);
gdp->bg_itable_unused = ext2fs_swab16(gdp->bg_itable_unused);
gdp->bg_checksum = ext2fs_swab16(gdp->bg_checksum);
/* If we're 32-bit, we're done */
@@ -125,6 +132,11 @@ void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)
gdp4->bg_used_dirs_count_hi =
ext2fs_swab16(gdp4->bg_used_dirs_count_hi);
gdp4->bg_itable_unused_hi = ext2fs_swab16(gdp4->bg_itable_unused_hi);
+ gdp->bg_exclude_bitmap_hi = ext2fs_swab16(gdp->bg_exclude_bitmap_hi);
+ gdp->bg_block_bitmap_csum_hi =
+ ext2fs_swab16(gdp->bg_block_bitmap_csum_hi);
+ gdp->bg_inode_bitmap_csum_hi =
+ ext2fs_swab16(gdp->bg_inode_bitmap_csum_hi);
}
void ext2fs_swap_group_desc(struct ext2_group_desc *gdp)
@@ -244,8 +256,8 @@ void ext2fs_swap_inode_full(ext2_filsys fs, struct ext2_inode_large *t,
ext2fs_swab16 (f->osd2.linux2.l_i_uid_high);
t->osd2.linux2.l_i_gid_high =
ext2fs_swab16 (f->osd2.linux2.l_i_gid_high);
- t->osd2.linux2.l_i_reserved2 =
- ext2fs_swab32(f->osd2.linux2.l_i_reserved2);
+ t->osd2.linux2.l_i_checksum =
+ ext2fs_swab32(f->osd2.linux2.checksum);
break;
case EXT2_OS_HURD:
t->osd1.hurd1.h_i_translator =
diff --git a/lib/ext2fs/tst_inode_size.c b/lib/ext2fs/tst_inode_size.c
index 962f1cd..a4f6247 100644
--- a/lib/ext2fs/tst_inode_size.c
+++ b/lib/ext2fs/tst_inode_size.c
@@ -61,7 +61,8 @@ void check_structure_fields()
check_field(osd2.linux2.l_i_file_acl_high);
check_field(osd2.linux2.l_i_uid_high);
check_field(osd2.linux2.l_i_gid_high);
- check_field(osd2.linux2.l_i_reserved2);
+ check_field(osd2.linux2.l_i_checksum_lo);
+ check_field(osd2.linux2.l_i_reserved);
printf("Ending offset is %d\n\n", cur_offset);
#endif
}
diff --git a/lib/ext2fs/tst_super_size.c b/lib/ext2fs/tst_super_size.c
index 1e5a524..75659ae 100644
--- a/lib/ext2fs/tst_super_size.c
+++ b/lib/ext2fs/tst_super_size.c
@@ -126,6 +126,7 @@ void check_superblock_fields()
check_field(s_usr_quota_inum);
check_field(s_grp_quota_inum);
check_field(s_overhead_blocks);
+ check_field(s_checksum);
check_field(s_reserved);
printf("Ending offset is %d\n\n", cur_offset);
#endif
--
1.7.4.1.22.gec8e1.dirty
The set_fields commands (set_super_value, set_inode_field,
set_block_group) now handle fields which store in split fields on
ext4's on-disk format. For example, the superblock fields
s_blocks_count and s_blocks_count_hi.
The user can either set the low or high part of the field via
"blocks_count_lo" or "blocks_count_hi", or both parts can be set via
"blocks_count".
Signed-off-by: "Theodore Ts'o" <[email protected]>
---
debugfs/debugfs.h | 2 +
debugfs/set_fields.c | 453 ++++++++++++++++++++++++++++++++------------------
debugfs/util.c | 16 ++
3 files changed, 310 insertions(+), 161 deletions(-)
diff --git a/debugfs/debugfs.h b/debugfs/debugfs.h
index 0ea2474..24c4dce 100644
--- a/debugfs/debugfs.h
+++ b/debugfs/debugfs.h
@@ -50,6 +50,8 @@ extern int debugfs_read_inode_full(ext2_ino_t ino, struct ext2_inode * inode,
const char *cmd, int bufsize);
extern int debugfs_write_inode(ext2_ino_t ino, struct ext2_inode * inode,
const char *cmd);
+extern int debugfs_write_inode_full(ext2_ino_t ino, struct ext2_inode * inode,
+ const char *cmd, int bufsize);
extern int debugfs_write_new_inode(ext2_ino_t ino, struct ext2_inode * inode,
const char *cmd);
diff --git a/debugfs/set_fields.c b/debugfs/set_fields.c
index 1a5ec7f..06d0ad8 100644
--- a/debugfs/set_fields.c
+++ b/debugfs/set_fields.c
@@ -40,8 +40,9 @@
#include "e2p/e2p.h"
static struct ext2_super_block set_sb;
-static struct ext2_inode set_inode;
+static struct ext2_inode_large set_inode;
static struct ext2_group_desc set_gd;
+static struct ext4_group_desc set_gd4;
static dgrp_t set_bg;
static ext2_ino_t set_ino;
static int array_idx;
@@ -51,154 +52,208 @@ static int array_idx;
struct field_set_info {
const char *name;
void *ptr;
+ void *ptr2;
unsigned int size;
- errcode_t (*func)(struct field_set_info *info, char *arg);
+ errcode_t (*func)(struct field_set_info *info, char *field, char *arg);
int flags;
int max_idx;
};
-static errcode_t parse_uint(struct field_set_info *info, char *arg);
-static errcode_t parse_int(struct field_set_info *info, char *arg);
-static errcode_t parse_string(struct field_set_info *info, char *arg);
-static errcode_t parse_uuid(struct field_set_info *info, char *arg);
-static errcode_t parse_hashalg(struct field_set_info *info, char *arg);
-static errcode_t parse_time(struct field_set_info *info, char *arg);
-static errcode_t parse_bmap(struct field_set_info *info, char *arg);
-static errcode_t parse_gd_csum(struct field_set_info *info, char *arg);
+static errcode_t parse_uint(struct field_set_info *info, char *field, char *arg);
+static errcode_t parse_int(struct field_set_info *info, char *field, char *arg);
+static errcode_t parse_string(struct field_set_info *info, char *field, char *arg);
+static errcode_t parse_uuid(struct field_set_info *info, char *field, char *arg);
+static errcode_t parse_hashalg(struct field_set_info *info, char *field, char *arg);
+static errcode_t parse_time(struct field_set_info *info, char *field, char *arg);
+static errcode_t parse_bmap(struct field_set_info *info, char *field, char *arg);
+static errcode_t parse_gd_csum(struct field_set_info *info, char *field, char *arg);
static struct field_set_info super_fields[] = {
- { "inodes_count", &set_sb.s_inodes_count, 4, parse_uint },
- { "blocks_count", &set_sb.s_blocks_count, 4, parse_uint },
- { "r_blocks_count", &set_sb.s_r_blocks_count, 4, parse_uint },
- { "free_blocks_count", &set_sb.s_free_blocks_count, 4, parse_uint },
- { "free_inodes_count", &set_sb.s_free_inodes_count, 4, parse_uint },
- { "first_data_block", &set_sb.s_first_data_block, 4, parse_uint },
- { "log_block_size", &set_sb.s_log_block_size, 4, parse_uint },
- { "log_cluster_size", &set_sb.s_log_cluster_size, 4, parse_int },
- { "blocks_per_group", &set_sb.s_blocks_per_group, 4, parse_uint },
- { "clusters_per_group", &set_sb.s_clusters_per_group, 4, parse_uint },
- { "inodes_per_group", &set_sb.s_inodes_per_group, 4, parse_uint },
- { "mtime", &set_sb.s_mtime, 4, parse_time },
- { "wtime", &set_sb.s_wtime, 4, parse_time },
- { "mnt_count", &set_sb.s_mnt_count, 2, parse_uint },
- { "max_mnt_count", &set_sb.s_max_mnt_count, 2, parse_int },
+ { "inodes_count", &set_sb.s_inodes_count, NULL, 4, parse_uint },
+ { "blocks_count", &set_sb.s_blocks_count, &set_sb.s_blocks_count_hi,
+ 4, parse_uint },
+ { "r_blocks_count", &set_sb.s_r_blocks_count,
+ &set_sb.s_r_blocks_count_hi, 4, parse_uint },
+ { "free_blocks_count", &set_sb.s_free_blocks_count,
+ &set_sb.s_free_blocks_hi, 4, parse_uint },
+ { "free_inodes_count", &set_sb.s_free_inodes_count, NULL, 4, parse_uint },
+ { "first_data_block", &set_sb.s_first_data_block, NULL, 4, parse_uint },
+ { "log_block_size", &set_sb.s_log_block_size, NULL, 4, parse_uint },
+ { "log_cluster_size", &set_sb.s_log_cluster_size, NULL, 4, parse_int },
+ { "blocks_per_group", &set_sb.s_blocks_per_group, NULL, 4, parse_uint },
+ { "clusters_per_group", &set_sb.s_clusters_per_group, NULL, 4, parse_uint },
+ { "inodes_per_group", &set_sb.s_inodes_per_group, NULL, 4, parse_uint },
+ { "mtime", &set_sb.s_mtime, NULL, 4, parse_time },
+ { "wtime", &set_sb.s_wtime, NULL, 4, parse_time },
+ { "mnt_count", &set_sb.s_mnt_count, NULL, 2, parse_uint },
+ { "max_mnt_count", &set_sb.s_max_mnt_count, NULL, 2, parse_int },
/* s_magic */
- { "state", &set_sb.s_state, 2, parse_uint },
- { "errors", &set_sb.s_errors, 2, parse_uint },
- { "minor_rev_level", &set_sb.s_minor_rev_level, 2, parse_uint },
- { "lastcheck", &set_sb.s_lastcheck, 4, parse_time },
- { "checkinterval", &set_sb.s_checkinterval, 4, parse_uint },
- { "creator_os", &set_sb.s_creator_os, 4, parse_uint },
- { "rev_level", &set_sb.s_rev_level, 4, parse_uint },
- { "def_resuid", &set_sb.s_def_resuid, 2, parse_uint },
- { "def_resgid", &set_sb.s_def_resgid, 2, parse_uint },
- { "first_ino", &set_sb.s_first_ino, 4, parse_uint },
- { "inode_size", &set_sb.s_inode_size, 2, parse_uint },
- { "block_group_nr", &set_sb.s_block_group_nr, 2, parse_uint },
- { "feature_compat", &set_sb.s_feature_compat, 4, parse_uint },
- { "feature_incompat", &set_sb.s_feature_incompat, 4, parse_uint },
- { "feature_ro_compat", &set_sb.s_feature_ro_compat, 4, parse_uint },
- { "uuid", &set_sb.s_uuid, 16, parse_uuid },
- { "volume_name", &set_sb.s_volume_name, 16, parse_string },
- { "last_mounted", &set_sb.s_last_mounted, 64, parse_string },
- { "lastcheck", &set_sb.s_lastcheck, 4, parse_uint },
- { "algorithm_usage_bitmap", &set_sb.s_algorithm_usage_bitmap,
+ { "state", &set_sb.s_state, NULL, 2, parse_uint },
+ { "errors", &set_sb.s_errors, NULL, 2, parse_uint },
+ { "minor_rev_level", &set_sb.s_minor_rev_level, NULL, 2, parse_uint },
+ { "lastcheck", &set_sb.s_lastcheck, NULL, 4, parse_time },
+ { "checkinterval", &set_sb.s_checkinterval, NULL, 4, parse_uint },
+ { "creator_os", &set_sb.s_creator_os, NULL, 4, parse_uint },
+ { "rev_level", &set_sb.s_rev_level, NULL, 4, parse_uint },
+ { "def_resuid", &set_sb.s_def_resuid, NULL, 2, parse_uint },
+ { "def_resgid", &set_sb.s_def_resgid, NULL, 2, parse_uint },
+ { "first_ino", &set_sb.s_first_ino, NULL, 4, parse_uint },
+ { "inode_size", &set_sb.s_inode_size, NULL, 2, parse_uint },
+ { "block_group_nr", &set_sb.s_block_group_nr, NULL, 2, parse_uint },
+ { "feature_compat", &set_sb.s_feature_compat, NULL, 4, parse_uint },
+ { "feature_incompat", &set_sb.s_feature_incompat, NULL, 4, parse_uint },
+ { "feature_ro_compat", &set_sb.s_feature_ro_compat, NULL, 4, parse_uint },
+ { "uuid", &set_sb.s_uuid, NULL, 16, parse_uuid },
+ { "volume_name", &set_sb.s_volume_name, NULL, 16, parse_string },
+ { "last_mounted", &set_sb.s_last_mounted, NULL, 64, parse_string },
+ { "lastcheck", &set_sb.s_lastcheck, NULL, 4, parse_uint },
+ { "algorithm_usage_bitmap", &set_sb.s_algorithm_usage_bitmap, NULL,
4, parse_uint },
- { "prealloc_blocks", &set_sb.s_prealloc_blocks, 1, parse_uint },
- { "prealloc_dir_blocks", &set_sb.s_prealloc_dir_blocks, 1,
+ { "prealloc_blocks", &set_sb.s_prealloc_blocks, NULL, 1, parse_uint },
+ { "prealloc_dir_blocks", &set_sb.s_prealloc_dir_blocks, NULL, 1,
parse_uint },
- { "reserved_gdt_blocks", &set_sb.s_reserved_gdt_blocks, 2,
+ { "reserved_gdt_blocks", &set_sb.s_reserved_gdt_blocks, NULL, 2,
parse_uint },
- { "journal_uuid", &set_sb.s_journal_uuid, 16, parse_uuid },
- { "journal_inum", &set_sb.s_journal_inum, 4, parse_uint },
- { "journal_dev", &set_sb.s_journal_dev, 4, parse_uint },
- { "last_orphan", &set_sb.s_last_orphan, 4, parse_uint },
- { "hash_seed", &set_sb.s_hash_seed, 16, parse_uuid },
- { "def_hash_version", &set_sb.s_def_hash_version, 1, parse_hashalg },
- { "jnl_backup_type", &set_sb.s_jnl_backup_type, 1, parse_uint },
- { "desc_size", &set_sb.s_desc_size, 2, parse_uint },
- { "default_mount_opts", &set_sb.s_default_mount_opts, 4, parse_uint },
- { "first_meta_bg", &set_sb.s_first_meta_bg, 4, parse_uint },
- { "mkfs_time", &set_sb.s_mkfs_time, 4, parse_time },
- { "jnl_blocks", &set_sb.s_jnl_blocks[0], 4, parse_uint, FLAG_ARRAY,
+ { "journal_uuid", &set_sb.s_journal_uuid, NULL, 16, parse_uuid },
+ { "journal_inum", &set_sb.s_journal_inum, NULL, 4, parse_uint },
+ { "journal_dev", &set_sb.s_journal_dev, NULL, 4, parse_uint },
+ { "last_orphan", &set_sb.s_last_orphan, NULL, 4, parse_uint },
+ { "hash_seed", &set_sb.s_hash_seed, NULL, 16, parse_uuid },
+ { "def_hash_version", &set_sb.s_def_hash_version, NULL, 1, parse_hashalg },
+ { "jnl_backup_type", &set_sb.s_jnl_backup_type, NULL, 1, parse_uint },
+ { "desc_size", &set_sb.s_desc_size, NULL, 2, parse_uint },
+ { "default_mount_opts", &set_sb.s_default_mount_opts, NULL, 4, parse_uint },
+ { "first_meta_bg", &set_sb.s_first_meta_bg, NULL, 4, parse_uint },
+ { "mkfs_time", &set_sb.s_mkfs_time, NULL, 4, parse_time },
+ { "jnl_blocks", &set_sb.s_jnl_blocks[0], NULL, 4, parse_uint, FLAG_ARRAY,
17 },
- { "blocks_count_hi", &set_sb.s_blocks_count_hi, 4, parse_uint },
- { "r_blocks_count_hi", &set_sb.s_r_blocks_count_hi, 4, parse_uint },
- { "free_blocks_hi", &set_sb.s_free_blocks_hi, 4, parse_uint },
- { "min_extra_isize", &set_sb.s_min_extra_isize, 2, parse_uint },
- { "want_extra_isize", &set_sb.s_want_extra_isize, 2, parse_uint },
- { "flags", &set_sb.s_flags, 4, parse_uint },
- { "raid_stride", &set_sb.s_raid_stride, 2, parse_uint },
- { "min_extra_isize", &set_sb.s_min_extra_isize, 4, parse_uint },
- { "mmp_interval", &set_sb.s_mmp_interval, 2, parse_uint },
- { "mmp_block", &set_sb.s_mmp_block, 8, parse_uint },
- { "raid_stripe_width", &set_sb.s_raid_stripe_width, 4, parse_uint },
- { "log_groups_per_flex", &set_sb.s_log_groups_per_flex, 1, parse_uint },
- { "kbytes_written", &set_sb.s_kbytes_written, 8, parse_uint },
- { "snapshot_inum", &set_sb.s_snapshot_inum, 4, parse_uint },
- { "snapshot_id", &set_sb.s_snapshot_id, 4, parse_uint },
- { "snapshot_r_blocks_count", &set_sb.s_snapshot_r_blocks_count, 8,
- parse_uint },
- { "snapshot_list", &set_sb.s_snapshot_list, 4, parse_uint },
- { "mount_opts", &set_sb.s_mount_opts, 64, parse_string },
- { "usr_quota_inum", &set_sb.s_usr_quota_inum, 4, parse_uint },
- { "grp_quota_inum", &set_sb.s_grp_quota_inum, 4, parse_uint },
- { "overhead_blocks", &set_sb.s_overhead_blocks, 4, parse_uint },
- { "checksum", &set_sb.s_checksum, 4, parse_uint },
+ { "min_extra_isize", &set_sb.s_min_extra_isize, NULL, 2, parse_uint },
+ { "want_extra_isize", &set_sb.s_want_extra_isize, NULL, 2, parse_uint },
+ { "flags", &set_sb.s_flags, NULL, 4, parse_uint },
+ { "raid_stride", &set_sb.s_raid_stride, NULL, 2, parse_uint },
+ { "min_extra_isize", &set_sb.s_min_extra_isize, NULL, 4, parse_uint },
+ { "mmp_interval", &set_sb.s_mmp_interval, NULL, 2, parse_uint },
+ { "mmp_block", &set_sb.s_mmp_block, NULL, 8, parse_uint },
+ { "raid_stripe_width", &set_sb.s_raid_stripe_width, NULL, 4, parse_uint },
+ { "log_groups_per_flex", &set_sb.s_log_groups_per_flex, NULL, 1, parse_uint },
+ { "kbytes_written", &set_sb.s_kbytes_written, NULL, 8, parse_uint },
+ { "snapshot_inum", &set_sb.s_snapshot_inum, NULL, 4, parse_uint },
+ { "snapshot_id", &set_sb.s_snapshot_id, NULL, 4, parse_uint },
+ { "snapshot_r_blocks_count", &set_sb.s_snapshot_r_blocks_count,
+ NULL, 8, parse_uint },
+ { "snapshot_list", &set_sb.s_snapshot_list, NULL, 4, parse_uint },
+ { "mount_opts", &set_sb.s_mount_opts, NULL, 64, parse_string },
+ { "usr_quota_inum", &set_sb.s_usr_quota_inum, NULL, 4, parse_uint },
+ { "grp_quota_inum", &set_sb.s_grp_quota_inum, NULL, 4, parse_uint },
+ { "overhead_blocks", &set_sb.s_overhead_blocks, NULL, 4, parse_uint },
+ { "checksum", &set_sb.s_checksum, NULL, 4, parse_uint },
{ 0, 0, 0, 0 }
};
static struct field_set_info inode_fields[] = {
- { "inodes_count", &set_sb.s_inodes_count, 4, parse_uint },
- { "mode", &set_inode.i_mode, 2, parse_uint },
- { "uid", &set_inode.i_uid, 2, parse_uint },
- { "size", &set_inode.i_size, 4, parse_uint },
- { "atime", &set_inode.i_atime, 4, parse_time },
- { "ctime", &set_inode.i_ctime, 4, parse_time },
- { "mtime", &set_inode.i_mtime, 4, parse_time },
- { "dtime", &set_inode.i_dtime, 4, parse_time },
- { "gid", &set_inode.i_gid, 2, parse_uint },
- { "links_count", &set_inode.i_links_count, 2, parse_uint },
- { "blocks", &set_inode.i_blocks, 4, parse_uint },
- { "flags", &set_inode.i_flags, 4, parse_uint },
- { "version", &set_inode.osd1.linux1.l_i_version, 4, parse_uint },
- { "translator", &set_inode.osd1.hurd1.h_i_translator, 4, parse_uint },
- { "block", &set_inode.i_block[0], 4, parse_uint, FLAG_ARRAY,
+ { "inodes_count", &set_sb.s_inodes_count, NULL, 4, parse_uint },
+ { "mode", &set_inode.i_mode, NULL, 2, parse_uint },
+ { "uid", &set_inode.i_uid, &set_inode.osd2.linux2.l_i_uid_high,
+ 2, parse_uint },
+ { "size", &set_inode.i_size, &set_inode.i_size_high, 4, parse_uint },
+ { "atime", &set_inode.i_atime, NULL, 4, parse_time },
+ { "ctime", &set_inode.i_ctime, NULL, 4, parse_time },
+ { "mtime", &set_inode.i_mtime, NULL, 4, parse_time },
+ { "dtime", &set_inode.i_dtime, NULL, 4, parse_time },
+ { "gid", &set_inode.i_gid, &set_inode.osd2.linux2.l_i_gid_high,
+ 2, parse_uint },
+ { "links_count", &set_inode.i_links_count, NULL, 2, parse_uint },
+ /* Special case: i_blocks is 4 bytes, i_blocks_high is 2 bytes */
+ { "blocks", &set_inode.i_blocks, &set_inode.osd2.linux2.l_i_blocks_hi,
+ 6, parse_uint },
+ { "flags", &set_inode.i_flags, NULL, 4, parse_uint },
+ { "version", &set_inode.osd1.linux1.l_i_version,
+ &set_inode.i_version_hi, 4, parse_uint },
+ { "translator", &set_inode.osd1.hurd1.h_i_translator, NULL, 4, parse_uint },
+ { "block", &set_inode.i_block[0], NULL, 4, parse_uint, FLAG_ARRAY,
EXT2_NDIR_BLOCKS },
- { "block[IND]", &set_inode.i_block[EXT2_IND_BLOCK], 4, parse_uint },
- { "block[DIND]", &set_inode.i_block[EXT2_DIND_BLOCK], 4, parse_uint },
- { "block[TIND]", &set_inode.i_block[EXT2_TIND_BLOCK], 4, parse_uint },
- { "generation", &set_inode.i_generation, 4, parse_uint },
- { "file_acl", &set_inode.i_file_acl, 4, parse_uint },
- { "file_acl_high", &set_inode.osd2.linux2.l_i_file_acl_high, 2,
- parse_uint },
- { "dir_acl", &set_inode.i_dir_acl, 4, parse_uint },
- { "size_high", &set_inode.i_size_high, 4, parse_uint },
- { "faddr", &set_inode.i_faddr, 4, parse_uint },
- { "blocks_hi", &set_inode.osd2.linux2.l_i_blocks_hi, 2, parse_uint },
- { "frag", &set_inode.osd2.hurd2.h_i_frag, 1, parse_uint },
- { "fsize", &set_inode.osd2.hurd2.h_i_fsize, 1, parse_uint },
- { "uid_high", &set_inode.osd2.linux2.l_i_uid_high, 2, parse_uint },
- { "gid_high", &set_inode.osd2.linux2.l_i_gid_high, 2, parse_uint },
- { "checksum", &set_inode.osd2.linux2.l_i_checksum_lo, 2, parse_uint },
- { "author", &set_inode.osd2.hurd2.h_i_author, 4, parse_uint },
- { "bmap", NULL, 4, parse_bmap, FLAG_ARRAY },
+ { "block[IND]", &set_inode.i_block[EXT2_IND_BLOCK], NULL, 4, parse_uint },
+ { "block[DIND]", &set_inode.i_block[EXT2_DIND_BLOCK], NULL, 4, parse_uint },
+ { "block[TIND]", &set_inode.i_block[EXT2_TIND_BLOCK], NULL, 4, parse_uint },
+ { "generation", &set_inode.i_generation, NULL, 4, parse_uint },
+ /* Special case: i_file_acl_high is 2 bytes */
+ { "file_acl", &set_inode.i_file_acl,
+ &set_inode.osd2.linux2.l_i_file_acl_high, 6, parse_uint },
+ { "dir_acl", &set_inode.i_dir_acl, NULL, 4, parse_uint },
+ { "faddr", &set_inode.i_faddr, NULL, 4, parse_uint },
+ { "frag", &set_inode.osd2.hurd2.h_i_frag, NULL, 1, parse_uint },
+ { "fsize", &set_inode.osd2.hurd2.h_i_fsize, NULL, 1, parse_uint },
+ { "checksum", &set_inode.osd2.linux2.l_i_checksum_lo,
+ &set_inode.i_checksum_hi, 2, parse_uint },
+ { "author", &set_inode.osd2.hurd2.h_i_author, NULL,
+ 4, parse_uint },
+ { "extra_isize", &set_inode.i_extra_isize, NULL,
+ 2, parse_uint },
+ { "ctime_extra", &set_inode.i_ctime_extra, NULL,
+ 4, parse_uint },
+ { "mtime_extra", &set_inode.i_mtime_extra, NULL,
+ 4, parse_uint },
+ { "atime_extra", &set_inode.i_atime_extra, NULL,
+ 4, parse_uint },
+ { "crtime", &set_inode.i_crtime, NULL, 4, parse_uint },
+ { "crtime_extra", &set_inode.i_crtime_extra, NULL,
+ 4, parse_uint },
+ { "bmap", NULL, NULL, 4, parse_bmap, FLAG_ARRAY },
{ 0, 0, 0, 0 }
};
static struct field_set_info ext2_bg_fields[] = {
- { "block_bitmap", &set_gd.bg_block_bitmap, 4, parse_uint },
- { "inode_bitmap", &set_gd.bg_inode_bitmap, 4, parse_uint },
- { "inode_table", &set_gd.bg_inode_table, 4, parse_uint },
- { "free_blocks_count", &set_gd.bg_free_blocks_count, 2, parse_uint },
- { "free_inodes_count", &set_gd.bg_free_inodes_count, 2, parse_uint },
- { "used_dirs_count", &set_gd.bg_used_dirs_count, 2, parse_uint },
- { "flags", &set_gd.bg_flags, 2, parse_uint },
- { "itable_unused", &set_gd.bg_itable_unused, 2, parse_uint },
- { "checksum", &set_gd.bg_checksum, 2, parse_gd_csum },
+ { "block_bitmap", &set_gd.bg_block_bitmap, NULL, 4, parse_uint },
+ { "inode_bitmap", &set_gd.bg_inode_bitmap, NULL, 4, parse_uint },
+ { "inode_table", &set_gd.bg_inode_table, NULL, 4, parse_uint },
+ { "free_blocks_count", &set_gd.bg_free_blocks_count, NULL, 2, parse_uint },
+ { "free_inodes_count", &set_gd.bg_free_inodes_count, NULL, 2, parse_uint },
+ { "used_dirs_count", &set_gd.bg_used_dirs_count, NULL, 2, parse_uint },
+ { "flags", &set_gd.bg_flags, NULL, 2, parse_uint },
+ { "itable_unused", &set_gd.bg_itable_unused, NULL, 2, parse_uint },
+ { "checksum", &set_gd.bg_checksum, NULL, 2, parse_gd_csum },
{ 0, 0, 0, 0 }
};
+static struct field_set_info ext4_bg_fields[] = {
+ { "block_bitmap", &set_gd4.bg_block_bitmap,
+ &set_gd4.bg_block_bitmap_hi, 4, parse_uint },
+ { "inode_bitmap", &set_gd4.bg_inode_bitmap,
+ &set_gd4.bg_inode_bitmap_hi, 4, parse_uint },
+ { "inode_table", &set_gd4.bg_inode_table,
+ &set_gd4.bg_inode_table_hi, 4, parse_uint },
+ { "free_blocks_count", &set_gd4.bg_free_blocks_count,
+ &set_gd4.bg_free_blocks_count_hi, 2, parse_uint },
+ { "free_inodes_count", &set_gd4.bg_free_inodes_count,
+ &set_gd4.bg_free_inodes_count_hi, 2, parse_uint },
+ { "used_dirs_count", &set_gd4.bg_used_dirs_count,
+ &set_gd4.bg_used_dirs_count_hi, 2, parse_uint },
+ { "flags", &set_gd4.bg_flags, NULL, 2, parse_uint },
+ { "exclude_bitmap", &set_gd4.bg_exclude_bitmap_lo,
+ &set_gd4.bg_exclude_bitmap_hi, 4, parse_uint },
+ { "block_bitmap_csum", &set_gd4.bg_block_bitmap_csum_lo,
+ &set_gd4.bg_block_bitmap_csum_hi, 2, parse_uint },
+ { "inode_bitmap_csum", &set_gd4.bg_inode_bitmap_csum_lo,
+ &set_gd4.bg_inode_bitmap_csum_hi, 2, parse_uint },
+ { "itable_unused", &set_gd4.bg_itable_unused,
+ &set_gd4.bg_itable_unused_hi, 2, parse_uint },
+ { "checksum", &set_gd4.bg_checksum, NULL, 2, parse_gd_csum },
+ { 0, 0, 0, 0 }
+};
+
+static int check_suffix(const char *field)
+{
+ int len = strlen(field);
+
+ if (len <= 3)
+ return 0;
+ field += len-3;
+ if (!strcmp(field, "_lo"))
+ return 1;
+ if (!strcmp(field, "_hi"))
+ return 2;
+ return 0;
+}
static struct field_set_info *find_field(struct field_set_info *fields,
char *field)
@@ -206,7 +261,7 @@ static struct field_set_info *find_field(struct field_set_info *fields,
struct field_set_info *ss;
const char *prefix;
char *arg, *delim, *idx, *tmp;
- int prefix_len;
+ int suffix, prefix_len;
if (fields == super_fields)
prefix = "s_";
@@ -241,14 +296,23 @@ static struct field_set_info *find_field(struct field_set_info *fields,
idx = 0;
}
+ /*
+ * If there is a valid _hi or a _lo suffix, strip it off
+ */
+ suffix = check_suffix(arg);
+ if (suffix > 0)
+ arg[strlen(arg)-3] = 0;
+
for (ss = fields ; ss->name ; ss++) {
+ if (suffix && ss->ptr2 == 0)
+ continue;
if (ss->flags & FLAG_ARRAY) {
if (!idx || (strcmp(ss->name, arg) != 0))
continue;
if (ss->max_idx > 0 && array_idx >= ss->max_idx)
continue;
} else {
- if (strcmp(ss->name, field) != 0)
+ if (strcmp(ss->name, arg) != 0)
continue;
}
free(arg);
@@ -258,10 +322,19 @@ static struct field_set_info *find_field(struct field_set_info *fields,
return NULL;
}
-static errcode_t parse_uint(struct field_set_info *info, char *arg)
+/*
+ * Note: info->size == 6 is special; this means a base size 4 bytes,
+ * and secondiory (high) size of 2 bytes. This is needed for the
+ * special case of i_blocks_high and i_file_acl_high.
+ */
+static errcode_t parse_uint(struct field_set_info *info, char *field,
+ char *arg)
{
- unsigned long long num, limit;
+ unsigned long long n, num, mask, limit;
+ int suffix = check_suffix(field);
char *tmp;
+ void *field1 = info->ptr, *field2 = info->ptr2;
+ int size = (info->size == 6) ? 4 : info->size;
union {
__u64 *ptr64;
__u32 *ptr32;
@@ -269,7 +342,14 @@ static errcode_t parse_uint(struct field_set_info *info, char *arg)
__u8 *ptr8;
} u;
- u.ptr8 = (__u8 *) info->ptr;
+ if (suffix == 1)
+ field2 = 0;
+ if (suffix == 2) {
+ field1 = field2;
+ field2 = 0;
+ }
+
+ u.ptr8 = (__u8 *) field1;
if (info->flags & FLAG_ARRAY)
u.ptr8 += array_idx * info->size;
@@ -280,30 +360,55 @@ static errcode_t parse_uint(struct field_set_info *info, char *arg)
arg, info->name);
return EINVAL;
}
+ mask = ~0ULL >> ((8 - size) * 8);
limit = ~0ULL >> ((8 - info->size) * 8);
+ if (field2 && info->size != 6)
+ limit = ~0ULL >> ((8 - info->size*2) * 8);
+
if (num > limit) {
fprintf(stderr, "Value '%s' exceeds field %s maximum %llu.\n",
arg, info->name, limit);
return EINVAL;
}
- switch (info->size) {
+ n = num & mask;
+ switch (size) {
case 8:
- *u.ptr64 = num;
+ *u.ptr64 = n;
break;
case 4:
- *u.ptr32 = num;
+ *u.ptr32 = n;
break;
case 2:
- *u.ptr16 = num;
+ *u.ptr16 = n;
break;
case 1:
- *u.ptr8 = num;
+ *u.ptr8 = n;
+ break;
+ }
+ if (!field2)
+ return 0;
+ n = num >> (size*8);
+ u.ptr8 = (__u8 *) field2;
+ if (info->size == 6)
+ size = 2;
+ switch (size) {
+ case 8:
+ *u.ptr64 = n;
+ break;
+ case 4:
+ *u.ptr32 = n;
+ break;
+ case 2:
+ *u.ptr16 = n;
+ break;
+ case 1:
+ *u.ptr8 = n;
break;
}
return 0;
}
-static errcode_t parse_int(struct field_set_info *info, char *arg)
+static errcode_t parse_int(struct field_set_info *info, char *field, char *arg)
{
long num;
char *tmp;
@@ -334,7 +439,8 @@ static errcode_t parse_int(struct field_set_info *info, char *arg)
return 0;
}
-static errcode_t parse_string(struct field_set_info *info, char *arg)
+static errcode_t parse_string(struct field_set_info *info, char *field,
+ char *arg)
{
char *cp = (char *) info->ptr;
@@ -347,7 +453,7 @@ static errcode_t parse_string(struct field_set_info *info, char *arg)
return 0;
}
-static errcode_t parse_time(struct field_set_info *info, char *arg)
+static errcode_t parse_time(struct field_set_info *info, char *field, char *arg)
{
time_t t;
__u32 *ptr32;
@@ -365,7 +471,7 @@ static errcode_t parse_time(struct field_set_info *info, char *arg)
return 0;
}
-static errcode_t parse_uuid(struct field_set_info *info, char *arg)
+static errcode_t parse_uuid(struct field_set_info *info, char *field, char *arg)
{
unsigned char * p = (unsigned char *) info->ptr;
@@ -383,7 +489,8 @@ static errcode_t parse_uuid(struct field_set_info *info, char *arg)
return 0;
}
-static errcode_t parse_hashalg(struct field_set_info *info, char *arg)
+static errcode_t parse_hashalg(struct field_set_info *info, char *field,
+ char *arg)
{
int hashv;
unsigned char *p = (unsigned char *) info->ptr;
@@ -397,7 +504,8 @@ static errcode_t parse_hashalg(struct field_set_info *info, char *arg)
return 0;
}
-static errcode_t parse_bmap(struct field_set_info *info, char *arg)
+static errcode_t parse_bmap(struct field_set_info *info, char *field,
+ char *arg)
{
unsigned long num;
blk_t blk;
@@ -412,15 +520,17 @@ static errcode_t parse_bmap(struct field_set_info *info, char *arg)
}
blk = num;
- retval = ext2fs_bmap(current_fs, set_ino, &set_inode, 0, BMAP_SET,
- array_idx, &blk);
+ retval = ext2fs_bmap(current_fs, set_ino,
+ (struct ext2_inode *) &set_inode,
+ 0, BMAP_SET, array_idx, &blk);
if (retval) {
com_err("set_inode", retval, "while setting block map");
}
return retval;
}
-static errcode_t parse_gd_csum(struct field_set_info *info, char *arg)
+static errcode_t parse_gd_csum(struct field_set_info *info, char *field,
+ char *arg)
{
if (strcmp(arg, "calc") == 0) {
@@ -434,7 +544,7 @@ static errcode_t parse_gd_csum(struct field_set_info *info, char *arg)
return 0;
}
- return parse_uint(info, arg);
+ return parse_uint(info, field, arg);
}
static void print_possible_fields(struct field_set_info *fields)
@@ -474,6 +584,8 @@ static void print_possible_fields(struct field_set_info *fields)
type = "date/time";
else if (ss->func == parse_bmap)
type = "set physical->logical block map";
+ else if (ss->func == parse_gd_csum)
+ type = "unsigned integer OR \"calc\"";
strcpy(name, ss->name);
if (ss->flags & FLAG_ARRAY) {
if (ss->max_idx > 0)
@@ -482,7 +594,9 @@ static void print_possible_fields(struct field_set_info *fields)
strcpy(idx, "[]");
strcat(name, idx);
}
- fprintf(f, "\t%-20s\t%s\n", name, type);
+ if (ss->ptr2)
+ strcat(name, "[_hi|_lo]");
+ fprintf(f, "\t%-25s\t%s\n", name, type);
}
close_pager(f);
}
@@ -509,7 +623,7 @@ void do_set_super(int argc, char *argv[])
return;
}
set_sb = *current_fs->super;
- if (ss->func(ss, argv[2]) == 0) {
+ if (ss->func(ss, argv[1], argv[2]) == 0) {
*current_fs->super = set_sb;
ext2fs_mark_super_dirty(current_fs);
}
@@ -540,11 +654,15 @@ void do_set_inode(int argc, char *argv[])
if (!set_ino)
return;
- if (debugfs_read_inode(set_ino, &set_inode, argv[1]))
+ if (debugfs_read_inode_full(set_ino,
+ (struct ext2_inode *) &set_inode, argv[1],
+ sizeof(set_inode)))
return;
- if (ss->func(ss, argv[3]) == 0) {
- if (debugfs_write_inode(set_ino, &set_inode, argv[1]))
+ if (ss->func(ss, argv[2], argv[3]) == 0) {
+ if (debugfs_write_inode_full(set_ino,
+ (struct ext2_inode *) &set_inode,
+ argv[1], sizeof(set_inode)))
return;
}
}
@@ -554,11 +672,29 @@ void do_set_block_group_descriptor(int argc, char *argv[])
const char *usage = "<bg number> <field> <value>\n"
"\t\"set_block_group_descriptor -l\" will list the names of "
"the fields in a block group descriptor\n\twhich can be set.";
+ struct field_set_info *table;
struct field_set_info *ss;
char *end;
+ void *edit, *target;
+ int size;
+
+ /*
+ *Determine whether we are editing an ext2 or ext4 block
+ * group descriptor
+ */
+ if (current_fs && current_fs->super->s_feature_incompat &
+ EXT4_FEATURE_INCOMPAT_64BIT) {
+ table = ext4_bg_fields;
+ edit = &set_gd4;
+ size = sizeof(set_gd4);
+ } else {
+ table = ext2_bg_fields;
+ edit = &set_gd;
+ size = sizeof(set_gd);
+ }
if ((argc == 2) && !strcmp(argv[1], "-l")) {
- print_possible_fields(ext2_bg_fields);
+ print_possible_fields(table);
return;
}
@@ -577,20 +713,15 @@ void do_set_block_group_descriptor(int argc, char *argv[])
return;
}
-
- if ((ss = find_field(ext2_bg_fields, argv[2])) == 0) {
+ if ((ss = find_field(table, argv[2])) == 0) {
com_err(argv[0], 0, "invalid field specifier: %s", argv[2]);
return;
}
- memcpy(&set_gd, ext2fs_group_desc(current_fs,
- current_fs->group_desc, set_bg),
- sizeof(set_gd));
-
- if (ss->func(ss, argv[3]) == 0) {
- memcpy(ext2fs_group_desc(current_fs,
- current_fs->group_desc, set_bg),
- &set_gd, sizeof(set_gd));
+ target = ext2fs_group_desc(current_fs, current_fs->group_desc, set_bg);
+ memcpy(edit, target, size);
+ if (ss->func(ss, argv[2], argv[3]) == 0) {
+ memcpy(target, edit, size);
ext2fs_mark_super_dirty(current_fs);
}
}
diff --git a/debugfs/util.c b/debugfs/util.c
index c3ac6f8..34fc179 100644
--- a/debugfs/util.c
+++ b/debugfs/util.c
@@ -401,6 +401,22 @@ int debugfs_read_inode(ext2_ino_t ino, struct ext2_inode * inode,
return 0;
}
+int debugfs_write_inode_full(ext2_ino_t ino,
+ struct ext2_inode *inode,
+ const char *cmd,
+ int bufsize)
+{
+ int retval;
+
+ retval = ext2fs_write_inode_full(current_fs, ino,
+ inode, bufsize);
+ if (retval) {
+ com_err(cmd, retval, "while writing inode %u", ino);
+ return 1;
+ }
+ return 0;
+}
+
int debugfs_write_inode(ext2_ino_t ino, struct ext2_inode * inode,
const char *cmd)
{
--
1.7.4.1.22.gec8e1.dirty
On 2011-09-15, at 4:50 PM, Theodore Ts'o wrote:
> Reserve EXT4_FEATURE_RO_COMPAT_METADATA_CSUM and
> EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP. Also reserve fields in the
> superblock and the inode for the checksums. In the block group
> descriptor, reserve the exclude bitmap field for the snapshot feature,
> and checksums for the inode and block allocation bitmaps.
>
> With this commit, the metadata checksum and exclude bitmap features
> should have reserved all of the fields they need in ext4's on-disk
> format.
>
> #define EXT2_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not initialized */
> @@ -363,7 +370,8 @@ struct ext2_inode {
> __u16 l_i_file_acl_high;
> __u16 l_i_uid_high; /* these 2 fields */
> __u16 l_i_gid_high; /* were reserved2[0] */
> - __u32 l_i_reserved2;
> + __u16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) */
> + __u16 l_i_reserved; /* crc32c(uuid+inum+inode) */
> } linux2;
The comment for l_i_reserved is incorrect, and the comment should include "LSB"?
Also, if the order of these two fields was swapped it would avoid an extra
call to the CRC function to checksum the last 2 bytes:
__u16 l_i_uid_high; /* these 2 fields */
__u16 l_i_gid_high; /* were reserved2[0] */
- __u32 l_i_reserved2;
+ __u16 l_i_reserved;
+ __u16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LSB*/
} linux2;
> @@ -422,7 +431,7 @@ struct ext2_inode_large {
> } hurd2;
> } osd2; /* OS dependent 2 */
> __u16 i_extra_isize;
> - __u16 i_pad1;
> + __u16 i_checksum_hi; /* crc32c(uuid+inum+inode) */
This comment should include "MSB".
> @@ -623,7 +632,8 @@ struct ext2_super_block {
> __u32 s_usr_quota_inum; /* inode number of user quota file */
> __u32 s_grp_quota_inum; /* inode number of group quota file */
> __u32 s_overhead_blocks; /* overhead blocks/clusters in fs */
> - __u32 s_reserved[109]; /* Padding to the end of the block */
> + __u32 s_checksum; /* crc32c(superblock) */
> + __u32 s_reserved[108]; /* Padding to the end of the block */
> };
I thought it would be better to move s_checksum to be the last field in the
superblock to avoid multiple calls to the CRC function? So:
__u32 s_grp_quota_inum; /* inode number of group quota file */
__u32 s_overhead_blocks; /* overhead blocks/clusters in fs */
- __u32 s_reserved[109]; /* Padding to the end of the block */
+ __u32 s_reserved[108]; /* Padding to the end of the block */
+ __u32 s_checksum; /* crc32c(superblock) */
};
> diff --git a/lib/ext2fs/tst_inode_size.c b/lib/ext2fs/tst_inode_size.c
> index 962f1cd..a4f6247 100644
> --- a/lib/ext2fs/tst_inode_size.c
> +++ b/lib/ext2fs/tst_inode_size.c
> @@ -61,7 +61,8 @@ void check_structure_fields()
> check_field(osd2.linux2.l_i_file_acl_high);
> check_field(osd2.linux2.l_i_uid_high);
> check_field(osd2.linux2.l_i_gid_high);
> - check_field(osd2.linux2.l_i_reserved2);
> + check_field(osd2.linux2.l_i_checksum_lo);
> + check_field(osd2.linux2.l_i_reserved);
> printf("Ending offset is %d\n\n", cur_offset);
> #endif
> }
> diff --git a/lib/ext2fs/tst_super_size.c b/lib/ext2fs/tst_super_size.c
> index 1e5a524..75659ae 100644
> --- a/lib/ext2fs/tst_super_size.c
> +++ b/lib/ext2fs/tst_super_size.c
> @@ -126,6 +126,7 @@ void check_superblock_fields()
> check_field(s_usr_quota_inum);
> check_field(s_grp_quota_inum);
> check_field(s_overhead_blocks);
> + check_field(s_checksum);
> check_field(s_reserved);
> printf("Ending offset is %d\n\n", cur_offset);
> #endif
These two checks would need to be reversed to match the above changes.
Cheers, Andreas
On Thu, Sep 15, 2011 at 05:09:13PM -0600, Andreas Dilger wrote:
>
> I thought it would be better to move s_checksum to be the last field in the
> superblock to avoid multiple calls to the CRC function?
Did you see my comment about just zero'ing the checksum field before
running the CRC? We're going to have to do that for other data
structures, such as the inode structure, and it's what we do with the
block group descriptor checksum. Might as well do that everywhere for
consistency's sake.
- Ted
On 2011-09-15, at 5:11 PM, Ted Ts'o wrote:
> On Thu, Sep 15, 2011 at 05:09:13PM -0600, Andreas Dilger wrote:
>>
>> I thought it would be better to move s_checksum to be the last field in the
>> superblock to avoid multiple calls to the CRC function?
>
> Did you see my comment about just zero'ing the checksum field before
> running the CRC? We're going to have to do that for other data
> structures, such as the inode structure, and it's what we do with the
> block group descriptor checksum.
That isn't correct. The group descriptor checksum is computed in chunks:
__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
struct ext4_group_desc *gdp)
{
int offset = offsetof(struct ext4_group_desc, bg_checksum);
__le32 le_group = cpu_to_le32(block_group);
crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
crc = crc16(crc, (__u8 *)gdp, offset);
offset += sizeof(gdp->bg_checksum); /* skip checksum */
****HERE****
/* for checksum of struct ext4_group_desc do the rest...*/
if ((sbi->s_es->s_feature_incompat &
cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
offset < le16_to_cpu(sbi->s_es->s_desc_size))
crc = crc16(crc, (__u8 *)gdp + offset,
le16_to_cpu(sbi->s_es->s_desc_size) -
offset);
}
Darrick and I discussed zeroing the checksum fields, but then there is a
race with other threads accessing the same structure.
If we went to a crc32c LSB for filesystems with RO_COMPAT_CSUM it would
be possible to change how it is computed. Since we have freedom to move
the checksum field now, why have the added complexity to do zeroing of
the field or two chunks?
Since we naturally have to break the checksum calculation for 128-byte
inodes and 32-byte descriptors, due to old versions of those structs,
there is little overhead in just skipping the field, and no races.
Cheers, Andreas
On Thu, Sep 15, 2011 at 05:34:41PM -0600, Andreas Dilger wrote:
>
> Darrick and I discussed zeroing the checksum fields, but then there is a
> race with other threads accessing the same structure.
What race are you worried about? The moment you modify some part of
the data structure, the checksum is going to be wrong. This is true
whether you zero out the checksum field before you do the calculations
or not.
> If we went to a crc32c LSB for filesystems with RO_COMPAT_CSUM it would
> be possible to change how it is computed. Since we have freedom to move
> the checksum field now, why have the added complexity to do zeroing of
> the field or two chunks?
Why is zero'ing out the field complex? It's a single line of code....
It's certainly easier than doing it in two chunks, and there will be
some data structures (the block group descriptors at the very least)
where zero'ing the checksum is definitely going to be the easier way
to go.
- Ted
On 2011-09-15, at 5:41 PM, Ted Ts'o wrote:
> On Thu, Sep 15, 2011 at 05:34:41PM -0600, Andreas Dilger wrote:
>>
>> Darrick and I discussed zeroing the checksum fields, but then there is a
>> race with other threads accessing the same structure.
>
> What race are you worried about? The moment you modify some part of
> the data structure, the checksum is going to be wrong. This is true
> whether you zero out the checksum field before you do the calculations
> or not.
If you are reading the structure (not modifying it) and trying to verify
the checksum, it would be necessary to read-lock the structure, zero
the field, compute the checksum, reset the field, unlock, and then
compare checksums. Alternately, one would have to make a copy of the
struct to zero out the field and compute the checksum on the copy. Both
are more complex than just doing the checksum on two separate chunks.
>> If we went to a crc32c LSB for filesystems with RO_COMPAT_CSUM it would
>> be possible to change how it is computed. Since we have freedom to move
>> the checksum field now, why have the added complexity to do zeroing of
>> the field or two chunks?
>
> Why is zero'ing out the field complex? It's a single line of code....
> It's certainly easier than doing it in two chunks, and there will be
> some data structures (the block group descriptors at the very least)
> where zero'ing the checksum is definitely going to be the easier way
> to go.
No, because for group descriptors, the size is conditional on whether
RO_COMPAT_GDT_CSUM is set, so it is just as easy to always compute the
first 32-byte crc32 (excluding the bg_checksum field) and then only
conditionally compute the second 32-byte checksum. The same is true
of the inode checksum and s_inode_size, if the checksum is at the last
field of struct ext2_inode.
Cheers, Andreas
On Thu, Sep 15, 2011 at 06:09:04PM -0600, Andreas Dilger wrote:
> On 2011-09-15, at 5:41 PM, Ted Ts'o wrote:
> > On Thu, Sep 15, 2011 at 05:34:41PM -0600, Andreas Dilger wrote:
> >>
> >> Darrick and I discussed zeroing the checksum fields, but then there is a
> >> race with other threads accessing the same structure.
> >
> > What race are you worried about? The moment you modify some part of
> > the data structure, the checksum is going to be wrong. This is true
> > whether you zero out the checksum field before you do the calculations
> > or not.
>
> If you are reading the structure (not modifying it) and trying to verify
> the checksum, it would be necessary to read-lock the structure, zero
> the field, compute the checksum, reset the field, unlock, and then
> compare checksums. Alternately, one would have to make a copy of the
> struct to zero out the field and compute the checksum on the copy. Both
> are more complex than just doing the checksum on two separate chunks.
I think a lot of the metadata update code already has locking to prevent
multiple writers. I'm not as sure about the locking around the read calls,
though I suspect that we take some of the same locks before starting the read.
However, I've yet to audit all those code paths. Point is, we might already be
doing most of the necessary locking. Too bad I'm not sure.
>
> >> If we went to a crc32c LSB for filesystems with RO_COMPAT_CSUM it would
> >> be possible to change how it is computed. Since we have freedom to move
> >> the checksum field now, why have the added complexity to do zeroing of
> >> the field or two chunks?
> >
> > Why is zero'ing out the field complex? It's a single line of code....
> > It's certainly easier than doing it in two chunks, and there will be
> > some data structures (the block group descriptors at the very least)
> > where zero'ing the checksum is definitely going to be the easier way
> > to go.
>
> No, because for group descriptors, the size is conditional on whether
> RO_COMPAT_GDT_CSUM is set, so it is just as easy to always compute the
> first 32-byte crc32 (excluding the bg_checksum field) and then only
> conditionally compute the second 32-byte checksum. The same is true
> of the inode checksum and s_inode_size, if the checksum is at the last
> field of struct ext2_inode.
Um.... it looks to me like the size is conditional on
EXT4_FEATURE_INCOMPAT_64BIT being set and then sb.s_desc_size. Are you
suggesting that we make the group descriptor checksum a full 32 bits and just
stuff the upper 16 bits somewhere near the end of the structure? I suppose
that would leave us with very little space in the group descriptor. On the
other hand the 32-bit format is totally full already and any size increase
depends on 64bit, which means we can make it even bigger with little pain if we
need to.
Incidentally, I have cached versions of the ext4 disk layout and metadata
checksumming wiki pages mirrored here:
http://djwong.org/docs/ext4_metadata_checksums.html
http://djwong.org/docs/ext4_disk_layout.pdf
--D
On Thu, Sep 15, 2011 at 06:09:04PM -0600, Andreas Dilger wrote:
>
> If you are reading the structure (not modifying it) and trying to verify
> the checksum, it would be necessary to read-lock the structure, zero
> the field, compute the checksum, reset the field, unlock, and then
> compare checksums. Alternately, one would have to make a copy of the
> struct to zero out the field and compute the checksum on the copy. Both
> are more complex than just doing the checksum on two separate chunks.
You have to read-lock the structure before calculating the checksum in
any case, since otherwise when someone else is modifying the
structure, before they have a chance to update the checksum, you'll
calculate the checksum and discover that it is incorrect.
In practice we would probably be calculating the checksum when the
inode if first read into memory, before it it is visible to the rest
of the system, so this shouldn't be an issue. But if it is visible to
the rest of the system, even you put the checksum at the end, if
someone else can modify the data structure while you are calculating
it, the checksum will be wrong.
> No, because for group descriptors, the size is conditional on whether
> RO_COMPAT_GDT_CSUM is set, so it is just as easy to always compute the
> first 32-byte crc32 (excluding the bg_checksum field) and then only
> conditionally compute the second 32-byte checksum. The same is true
> of the inode checksum and s_inode_size, if the checksum is at the last
> field of struct ext2_inode.
But wouldn't it be faster to zero out the two fields, and then
caluclate a single 64-byte checksum? That way we avoid the setup
costs of the crc32, especially in the case of the crc32c-sby8-[lb]e
implementation.
- Ted
On 2011-09-15, at 7:06 PM, Ted Ts'o wrote:
> On Thu, Sep 15, 2011 at 06:09:04PM -0600, Andreas Dilger wrote:
>>
>> If you are reading the structure (not modifying it) and trying to verify
>> the checksum, it would be necessary to read-lock the structure, zero
>> the field, compute the checksum, reset the field, unlock, and then
>> compare checksums. Alternately, one would have to make a copy of the
>> struct to zero out the field and compute the checksum on the copy. Both
>> are more complex than just doing the checksum on two separate chunks.
>
> You have to read-lock the structure before calculating the checksum in
> any case, since otherwise when someone else is modifying the
> structure, before they have a chance to update the checksum, you'll
> calculate the checksum and discover that it is incorrect.
Sure, but if the structure is read-locked it shouldn't be modified...
> In practice we would probably be calculating the checksum when the
> inode if first read into memory, before it it is visible to the rest
> of the system, so this shouldn't be an issue. But if it is visible to
> the rest of the system, even you put the checksum at the end, if
> someone else can modify the data structure while you are calculating
> it, the checksum will be wrong.
True, but at the same time is there a reason _not_ to put the checksum
at the end? For the superblock in particular it seems easy to do and
simplifies the code either way.
>> No, because for group descriptors, the size is conditional on whether
>> RO_COMPAT_GDT_CSUM is set, so it is just as easy to always compute the
>> first 32-byte crc32 (excluding the bg_checksum field) and then only
>> conditionally compute the second 32-byte checksum. The same is true
>> of the inode checksum and s_inode_size, if the checksum is at the last
>> field of struct ext2_inode.
>
> But wouldn't it be faster to zero out the two fields, and then
> caluclate a single 64-byte checksum? That way we avoid the setup
> costs of the crc32, especially in the case of the crc32c-sby8-[lb]e
> implementation.
Looking at Darrick's performance tests for the checksums, at 32 bytes
the performance is 75%/95% (crc32-sby8-le/crc32-intel) of peak, and
91%/97% of peak for 128 bytes, so the overhead of computing the checksum
in two parts is probably not significant.
Cheers, Andreas
On 2011-09-15, at 6:56 PM, Darrick J. Wong wrote:
> On Thu, Sep 15, 2011 at 06:09:04PM -0600, Andreas Dilger wrote:
>> If you are reading the structure (not modifying it) and trying to verify
>> the checksum, it would be necessary to read-lock the structure, zero
>> the field, compute the checksum, reset the field, unlock, and then
>> compare checksums. Alternately, one would have to make a copy of the
>> struct to zero out the field and compute the checksum on the copy. Both
>> are more complex than just doing the checksum on two separate chunks.
>
> I think a lot of the metadata update code already has locking to prevent
> multiple writers. I'm not as sure about the locking around the read calls,
> though I suspect that we take some of the same locks before starting the read.
> However, I've yet to audit all those code paths. Point is, we might already be
> doing most of the necessary locking. Too bad I'm not sure.
>
>>>> If we went to a crc32c LSB for filesystems with RO_COMPAT_METADATA_CSUM
>>>> it would be possible to change how it is computed. Since we have freedom
>>>> to move the checksum field now, why have the added complexity to do
>>>> zeroing of the field or two chunks?
>>>
>>> Why is zero'ing out the field complex? It's a single line of code....
>>> It's certainly easier than doing it in two chunks, and there will be
>>> some data structures (the block group descriptors at the very least)
>>> where zero'ing the checksum is definitely going to be the easier way
>>> to go.
>>
>> No, because for group descriptors, the size is conditional on whether
>> RO_COMPAT_GDT_CSUM is set, so it is just as easy to always compute the
>> first 32-byte crc32 (excluding the bg_checksum field) and then only
>> conditionally compute the second 32-byte checksum. The same is true
>> of the inode checksum and s_inode_size, if the checksum is at the last
>> field of struct ext2_inode.
>
> Um.... it looks to me like the size is conditional on
> EXT4_FEATURE_INCOMPAT_64BIT being set and then sb.s_desc_size.
My bad, I was looking at ext4_group_desc_csum() for the flag name, and my
eyes caught the wrong one...
> Are you
> suggesting that we make the group descriptor checksum a full 32 bits and just
> stuff the upper 16 bits somewhere near the end of the structure?
No, I don't think we need a 32-bit checksum for the group descriptor. Rather
that the crc32c is so much faster than crc16 that it might make sense to use
the low 16 bits of crc32c for the group descriptor if RO_COMPAT_METADATA_CSUM
is set.
> Incidentally, I have cached versions of the ext4 disk layout and metadata
> checksumming wiki pages mirrored here:
> http://djwong.org/docs/ext4_metadata_checksums.html
> http://djwong.org/docs/ext4_disk_layout.pdf
Cheers, Andreas
On Thu, Sep 15, 2011 at 09:57:33PM -0600, Andreas Dilger wrote:
> True, but at the same time is there a reason _not_ to put the checksum
> at the end? For the superblock in particular it seems easy to do and
> simplifies the code either way.
For the superblock, OK. I'll buy that and I'll make the change so
that tail end of the superblock looks like this:
__u32 s_overhead_blocks; /* overhead blocks/clusters in fs */
__u32 s_reserved[108]; /* Padding to the end of the block */
__u32 s_checksum; /* crc32c(superblock) */
};
- Ted
On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
<snip>
> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> index 4fec5db..1c86cb2 100644
> --- a/lib/ext2fs/ext2_fs.h
> +++ b/lib/ext2fs/ext2_fs.h
> @@ -363,7 +370,8 @@ struct ext2_inode {
> __u16 l_i_file_acl_high;
> __u16 l_i_uid_high; /* these 2 fields */
> __u16 l_i_gid_high; /* were reserved2[0] */
> - __u32 l_i_reserved2;
> + __u16 l_i_checksum_lo; /* crc32c(uuid+inum+inode) */
> + __u16 l_i_reserved; /* crc32c(uuid+inum+inode) */
> } linux2;
> struct {
> __u8 h_i_frag; /* Fragment number */
> @@ -410,7 +418,8 @@ struct ext2_inode_large {
> __u16 l_i_file_acl_high;
> __u16 l_i_uid_high; /* these 2 fields */
> __u16 l_i_gid_high; /* were reserved2[0] */
> - __u32 l_i_reserved2;
> + __u16 l_i_checksum_lo; /* crc32c(uuid+inum+inode) */
> + __u16 l_i_reserved;
> } linux2;
> struct {
> __u8 h_i_frag; /* Fragment number */
> @@ -422,7 +431,7 @@ struct ext2_inode_large {
> } hurd2;
> } osd2; /* OS dependent 2 */
> __u16 i_extra_isize;
> - __u16 i_pad1;
> + __u16 i_checksum_hi; /* crc32c(uuid+inum+inode) */
> __u32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
> __u32 i_mtime_extra; /* extra Modification time (nsec << 2 | epoch) */
> __u32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
> @@ -441,7 +450,7 @@ struct ext2_inode_large {
> #define i_gid_low i_gid
> #define i_uid_high osd2.linux2.l_i_uid_high
> #define i_gid_high osd2.linux2.l_i_gid_high
> -#define i_reserved2 osd2.linux2.l_i_reserved2
> +#define i_checksum osd2.linux2.l_i_checksum
s/i_checksum/i_checksum_lo/, I think.
--D
On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
> Reserve EXT4_FEATURE_RO_COMPAT_METADATA_CSUM and
> EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP. Also reserve fields in the
> superblock and the inode for the checksums. In the block group
> descriptor, reserve the exclude bitmap field for the snapshot feature,
> and checksums for the inode and block allocation bitmaps.
>
> With this commit, the metadata checksum and exclude bitmap features
> should have reserved all of the fields they need in ext4's on-disk
> format.
>
> This commit also fixes an a missing byte swap for s_overhead_blocks.
>
> Signed-off-by: "Theodore Ts'o" <[email protected]>
> Cc: Darrick J. Wong <[email protected]>
> Cc: Amir Goldstein <[email protected]>
> ---
> debugfs/set_fields.c | 3 ++-
> e2fsck/pass1.c | 4 ++--
> lib/e2p/feature.c | 4 ++++
> lib/e2p/ls.c | 4 ++++
> lib/ext2fs/ext2_fs.h | 31 ++++++++++++++++++++++---------
> lib/ext2fs/swapfs.c | 16 ++++++++++++++--
> lib/ext2fs/tst_inode_size.c | 3 ++-
> lib/ext2fs/tst_super_size.c | 1 +
> 8 files changed, 51 insertions(+), 15 deletions(-)
>
> diff --git a/debugfs/set_fields.c b/debugfs/set_fields.c
> index ac6bc25..1a5ec7f 100644
> --- a/debugfs/set_fields.c
> +++ b/debugfs/set_fields.c
> @@ -144,6 +144,7 @@ static struct field_set_info super_fields[] = {
> { "usr_quota_inum", &set_sb.s_usr_quota_inum, 4, parse_uint },
> { "grp_quota_inum", &set_sb.s_grp_quota_inum, 4, parse_uint },
> { "overhead_blocks", &set_sb.s_overhead_blocks, 4, parse_uint },
> + { "checksum", &set_sb.s_checksum, 4, parse_uint },
> { 0, 0, 0, 0 }
> };
>
> @@ -179,6 +180,7 @@ static struct field_set_info inode_fields[] = {
> { "fsize", &set_inode.osd2.hurd2.h_i_fsize, 1, parse_uint },
> { "uid_high", &set_inode.osd2.linux2.l_i_uid_high, 2, parse_uint },
> { "gid_high", &set_inode.osd2.linux2.l_i_gid_high, 2, parse_uint },
> + { "checksum", &set_inode.osd2.linux2.l_i_checksum_lo, 2, parse_uint },
> { "author", &set_inode.osd2.hurd2.h_i_author, 4, parse_uint },
> { "bmap", NULL, 4, parse_bmap, FLAG_ARRAY },
> { 0, 0, 0, 0 }
> @@ -192,7 +194,6 @@ static struct field_set_info ext2_bg_fields[] = {
> { "free_inodes_count", &set_gd.bg_free_inodes_count, 2, parse_uint },
> { "used_dirs_count", &set_gd.bg_used_dirs_count, 2, parse_uint },
> { "flags", &set_gd.bg_flags, 2, parse_uint },
> - { "reserved", &set_gd.bg_reserved, 2, parse_uint, FLAG_ARRAY, 2 },
> { "itable_unused", &set_gd.bg_itable_unused, 2, parse_uint },
> { "checksum", &set_gd.bg_checksum, 2, parse_gd_csum },
> { 0, 0, 0, 0 }
> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
> index dd18ade..16ddcda 100644
> --- a/e2fsck/pass1.c
> +++ b/e2fsck/pass1.c
> @@ -364,8 +364,8 @@ static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
> printf("inode #%u, i_extra_size %d\n", pctx->ino,
> inode->i_extra_isize);
> #endif
> - /* i_extra_isize must cover i_extra_isize + i_pad1 at least */
> - min = sizeof(inode->i_extra_isize) + sizeof(inode->i_pad1);
> + /* i_extra_isize must cover i_extra_isize + i_checksum_hi at least */
> + min = sizeof(inode->i_extra_isize) + sizeof(inode->i_checksum_hi);
> max = EXT2_INODE_SIZE(sb) - EXT2_GOOD_OLD_INODE_SIZE;
> /*
> * For now we will allow i_extra_isize to be 0, but really
> diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
> index 16fba53..965fc16 100644
> --- a/lib/e2p/feature.c
> +++ b/lib/e2p/feature.c
> @@ -40,6 +40,8 @@ static struct feature feature_list[] = {
> "resize_inode" },
> { E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_LAZY_BG,
> "lazy_bg" },
> + { E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP,
> + "snapshot" },
>
> { E2P_FEATURE_RO_INCOMPAT, EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER,
> "sparse_super" },
> @@ -59,6 +61,8 @@ static struct feature feature_list[] = {
> "quota" },
> { E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_BIGALLOC,
> "bigalloc"},
> + { E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM,
> + "metadata_csum"},
>
> { E2P_FEATURE_INCOMPAT, EXT2_FEATURE_INCOMPAT_COMPRESSION,
> "compression" },
> diff --git a/lib/e2p/ls.c b/lib/e2p/ls.c
> index 0f36f40..aaacdaa 100644
> --- a/lib/e2p/ls.c
> +++ b/lib/e2p/ls.c
> @@ -413,6 +413,10 @@ void list_super2(struct ext2_super_block * sb, FILE *f)
> if (sb->s_grp_quota_inum)
> fprintf(f, "Group quota inode: %u\n",
> sb->s_grp_quota_inum);
> +
> + if (sb->s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)
> + fprintf(f, "Checksum: 0x%08x\n",
> + sb->s_checksum);
> }
>
> void list_super (struct ext2_super_block * s)
> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> index 4fec5db..1c86cb2 100644
> --- a/lib/ext2fs/ext2_fs.h
> +++ b/lib/ext2fs/ext2_fs.h
> @@ -142,7 +142,9 @@ struct ext2_group_desc
> __u16 bg_free_inodes_count; /* Free inodes count */
> __u16 bg_used_dirs_count; /* Directories count */
> __u16 bg_flags;
> - __u32 bg_reserved[2];
> + __u32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */
> + __u16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> + __u16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> __u16 bg_itable_unused; /* Unused inodes count */
> __u16 bg_checksum; /* crc16(s_uuid+grouo_num+group_desc)*/
> };
> @@ -159,7 +161,9 @@ struct ext4_group_desc
> __u16 bg_free_inodes_count; /* Free inodes count */
> __u16 bg_used_dirs_count; /* Directories count */
> __u16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
> - __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
> + __u32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */
> + __u16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> + __u16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> __u16 bg_itable_unused; /* Unused inodes count */
> __u16 bg_checksum; /* crc16(sb_uuid+group+desc) */
> __u32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
> @@ -169,7 +173,10 @@ struct ext4_group_desc
> __u16 bg_free_inodes_count_hi;/* Free inodes count MSB */
> __u16 bg_used_dirs_count_hi; /* Directories count MSB */
> __u16 bg_itable_unused_hi; /* Unused inodes count MSB */
> - __u32 bg_reserved2[3];
> + __u32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */
> + __u16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
> + __u16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
> + __u32 bg_reserved;
Should we attach a checksum to the exclude bitmap? That would, unfortunately,
put the 64-byte block group pretty close to full, unless we're ok with making
the bg even bigger...
--D
> };
>
> #define EXT2_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not initialized */
> @@ -363,7 +370,8 @@ struct ext2_inode {
> __u16 l_i_file_acl_high;
> __u16 l_i_uid_high; /* these 2 fields */
> __u16 l_i_gid_high; /* were reserved2[0] */
> - __u32 l_i_reserved2;
> + __u16 l_i_checksum_lo; /* crc32c(uuid+inum+inode) */
> + __u16 l_i_reserved; /* crc32c(uuid+inum+inode) */
> } linux2;
> struct {
> __u8 h_i_frag; /* Fragment number */
> @@ -410,7 +418,8 @@ struct ext2_inode_large {
> __u16 l_i_file_acl_high;
> __u16 l_i_uid_high; /* these 2 fields */
> __u16 l_i_gid_high; /* were reserved2[0] */
> - __u32 l_i_reserved2;
> + __u16 l_i_checksum_lo; /* crc32c(uuid+inum+inode) */
> + __u16 l_i_reserved;
> } linux2;
> struct {
> __u8 h_i_frag; /* Fragment number */
> @@ -422,7 +431,7 @@ struct ext2_inode_large {
> } hurd2;
> } osd2; /* OS dependent 2 */
> __u16 i_extra_isize;
> - __u16 i_pad1;
> + __u16 i_checksum_hi; /* crc32c(uuid+inum+inode) */
> __u32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
> __u32 i_mtime_extra; /* extra Modification time (nsec << 2 | epoch) */
> __u32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
> @@ -441,7 +450,7 @@ struct ext2_inode_large {
> #define i_gid_low i_gid
> #define i_uid_high osd2.linux2.l_i_uid_high
> #define i_gid_high osd2.linux2.l_i_gid_high
> -#define i_reserved2 osd2.linux2.l_i_reserved2
> +#define i_checksum osd2.linux2.l_i_checksum
> #else
> #if defined(__GNU__)
>
> @@ -623,7 +632,8 @@ struct ext2_super_block {
> __u32 s_usr_quota_inum; /* inode number of user quota file */
> __u32 s_grp_quota_inum; /* inode number of group quota file */
> __u32 s_overhead_blocks; /* overhead blocks/clusters in fs */
> - __u32 s_reserved[109]; /* Padding to the end of the block */
> + __u32 s_checksum; /* crc32c(superblock) */
> + __u32 s_reserved[108]; /* Padding to the end of the block */
> };
>
> #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
> @@ -671,7 +681,9 @@ struct ext2_super_block {
> #define EXT2_FEATURE_COMPAT_RESIZE_INODE 0x0010
> #define EXT2_FEATURE_COMPAT_DIR_INDEX 0x0020
> #define EXT2_FEATURE_COMPAT_LAZY_BG 0x0040
> -#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE 0x0080
> +/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE 0x0080 not used, legacy */
> +#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP 0x0100
> +
>
> #define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
> #define EXT2_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
> @@ -683,6 +695,7 @@ struct ext2_super_block {
> #define EXT4_FEATURE_RO_COMPAT_HAS_SNAPSHOT 0x0080
> #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
> #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
> +#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
>
> #define EXT2_FEATURE_INCOMPAT_COMPRESSION 0x0001
> #define EXT2_FEATURE_INCOMPAT_FILETYPE 0x0002
> diff --git a/lib/ext2fs/swapfs.c b/lib/ext2fs/swapfs.c
> index 87b1a2e..d1c4a56 100644
> --- a/lib/ext2fs/swapfs.c
> +++ b/lib/ext2fs/swapfs.c
> @@ -78,6 +78,8 @@ void ext2fs_swap_super(struct ext2_super_block * sb)
> sb->s_snapshot_list = ext2fs_swab32(sb->s_snapshot_list);
> sb->s_usr_quota_inum = ext2fs_swab32(sb->s_usr_quota_inum);
> sb->s_grp_quota_inum = ext2fs_swab32(sb->s_grp_quota_inum);
> + sb->s_overhead_blocks = ext2fs_swab32(sb->s_overhead_blocks);
> + sb->s_checksum = ext2fs_swab32(sb->s_checksum);
>
> for (i=0; i < 4; i++)
> sb->s_hash_seed[i] = ext2fs_swab32(sb->s_hash_seed[i]);
> @@ -106,6 +108,11 @@ void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)
> gdp->bg_free_inodes_count = ext2fs_swab16(gdp->bg_free_inodes_count);
> gdp->bg_used_dirs_count = ext2fs_swab16(gdp->bg_used_dirs_count);
> gdp->bg_flags = ext2fs_swab16(gdp->bg_flags);
> + gdp->bg_exclude_bitmap_lo = ext2fs_swab32(gdp->bg_exclude_bitmap_lo);
> + gdp->bg_block_bitmap_csum_lo =
> + ext2fs_swab16(gdp->bg_block_bitmap_csum_lo);
> + gdp->bg_inode_bitmap_csum_lo =
> + ext2fs_swab16(gdp->bg_inode_bitmap_csum_lo);
> gdp->bg_itable_unused = ext2fs_swab16(gdp->bg_itable_unused);
> gdp->bg_checksum = ext2fs_swab16(gdp->bg_checksum);
> /* If we're 32-bit, we're done */
> @@ -125,6 +132,11 @@ void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)
> gdp4->bg_used_dirs_count_hi =
> ext2fs_swab16(gdp4->bg_used_dirs_count_hi);
> gdp4->bg_itable_unused_hi = ext2fs_swab16(gdp4->bg_itable_unused_hi);
> + gdp->bg_exclude_bitmap_hi = ext2fs_swab16(gdp->bg_exclude_bitmap_hi);
> + gdp->bg_block_bitmap_csum_hi =
> + ext2fs_swab16(gdp->bg_block_bitmap_csum_hi);
> + gdp->bg_inode_bitmap_csum_hi =
> + ext2fs_swab16(gdp->bg_inode_bitmap_csum_hi);
> }
>
> void ext2fs_swap_group_desc(struct ext2_group_desc *gdp)
> @@ -244,8 +256,8 @@ void ext2fs_swap_inode_full(ext2_filsys fs, struct ext2_inode_large *t,
> ext2fs_swab16 (f->osd2.linux2.l_i_uid_high);
> t->osd2.linux2.l_i_gid_high =
> ext2fs_swab16 (f->osd2.linux2.l_i_gid_high);
> - t->osd2.linux2.l_i_reserved2 =
> - ext2fs_swab32(f->osd2.linux2.l_i_reserved2);
> + t->osd2.linux2.l_i_checksum =
> + ext2fs_swab32(f->osd2.linux2.checksum);
> break;
> case EXT2_OS_HURD:
> t->osd1.hurd1.h_i_translator =
> diff --git a/lib/ext2fs/tst_inode_size.c b/lib/ext2fs/tst_inode_size.c
> index 962f1cd..a4f6247 100644
> --- a/lib/ext2fs/tst_inode_size.c
> +++ b/lib/ext2fs/tst_inode_size.c
> @@ -61,7 +61,8 @@ void check_structure_fields()
> check_field(osd2.linux2.l_i_file_acl_high);
> check_field(osd2.linux2.l_i_uid_high);
> check_field(osd2.linux2.l_i_gid_high);
> - check_field(osd2.linux2.l_i_reserved2);
> + check_field(osd2.linux2.l_i_checksum_lo);
> + check_field(osd2.linux2.l_i_reserved);
> printf("Ending offset is %d\n\n", cur_offset);
> #endif
> }
> diff --git a/lib/ext2fs/tst_super_size.c b/lib/ext2fs/tst_super_size.c
> index 1e5a524..75659ae 100644
> --- a/lib/ext2fs/tst_super_size.c
> +++ b/lib/ext2fs/tst_super_size.c
> @@ -126,6 +126,7 @@ void check_superblock_fields()
> check_field(s_usr_quota_inum);
> check_field(s_grp_quota_inum);
> check_field(s_overhead_blocks);
> + check_field(s_checksum);
> check_field(s_reserved);
> printf("Ending offset is %d\n\n", cur_offset);
> #endif
> --
> 1.7.4.1.22.gec8e1.dirty
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
On Tue, Sep 20, 2011 at 9:56 PM, Darrick J. Wong <[email protected]> wrote:
> On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
>> Reserve EXT4_FEATURE_RO_COMPAT_METADATA_CSUM and
>> EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP. ?Also reserve fields in the
>> superblock and the inode for the checksums. ?In the block group
>> descriptor, reserve the exclude bitmap field for the snapshot feature,
>> and checksums for the inode and block allocation bitmaps.
>>
>> With this commit, the metadata checksum and exclude bitmap features
>> should have reserved all of the fields they need in ext4's on-disk
>> format.
>>
>> This commit also fixes an a missing byte swap for s_overhead_blocks.
>>
>> Signed-off-by: "Theodore Ts'o" <[email protected]>
>> Cc: Darrick J. Wong <[email protected]>
>> Cc: Amir Goldstein <[email protected]>
>> ---
>> ?debugfs/set_fields.c ? ? ? ?| ? ?3 ++-
>> ?e2fsck/pass1.c ? ? ? ? ? ? ?| ? ?4 ++--
>> ?lib/e2p/feature.c ? ? ? ? ? | ? ?4 ++++
>> ?lib/e2p/ls.c ? ? ? ? ? ? ? ?| ? ?4 ++++
>> ?lib/ext2fs/ext2_fs.h ? ? ? ?| ? 31 ++++++++++++++++++++++---------
>> ?lib/ext2fs/swapfs.c ? ? ? ? | ? 16 ++++++++++++++--
>> ?lib/ext2fs/tst_inode_size.c | ? ?3 ++-
>> ?lib/ext2fs/tst_super_size.c | ? ?1 +
>> ?8 files changed, 51 insertions(+), 15 deletions(-)
>>
>> diff --git a/debugfs/set_fields.c b/debugfs/set_fields.c
>> index ac6bc25..1a5ec7f 100644
>> --- a/debugfs/set_fields.c
>> +++ b/debugfs/set_fields.c
>> @@ -144,6 +144,7 @@ static struct field_set_info super_fields[] = {
>> ? ? ? { "usr_quota_inum", &set_sb.s_usr_quota_inum, 4, parse_uint },
>> ? ? ? { "grp_quota_inum", &set_sb.s_grp_quota_inum, 4, parse_uint },
>> ? ? ? { "overhead_blocks", &set_sb.s_overhead_blocks, 4, parse_uint },
>> + ? ? { "checksum", &set_sb.s_checksum, 4, parse_uint },
>> ? ? ? { 0, 0, 0, 0 }
>> ?};
>>
>> @@ -179,6 +180,7 @@ static struct field_set_info inode_fields[] = {
>> ? ? ? { "fsize", &set_inode.osd2.hurd2.h_i_fsize, 1, parse_uint },
>> ? ? ? { "uid_high", &set_inode.osd2.linux2.l_i_uid_high, 2, parse_uint },
>> ? ? ? { "gid_high", &set_inode.osd2.linux2.l_i_gid_high, 2, parse_uint },
>> + ? ? { "checksum", &set_inode.osd2.linux2.l_i_checksum_lo, 2, parse_uint },
>> ? ? ? { "author", &set_inode.osd2.hurd2.h_i_author, 4, parse_uint },
>> ? ? ? { "bmap", NULL, 4, parse_bmap, FLAG_ARRAY },
>> ? ? ? { 0, 0, 0, 0 }
>> @@ -192,7 +194,6 @@ static struct field_set_info ext2_bg_fields[] = {
>> ? ? ? { "free_inodes_count", &set_gd.bg_free_inodes_count, 2, parse_uint },
>> ? ? ? { "used_dirs_count", &set_gd.bg_used_dirs_count, 2, parse_uint },
>> ? ? ? { "flags", &set_gd.bg_flags, 2, parse_uint },
>> - ? ? { "reserved", &set_gd.bg_reserved, 2, parse_uint, FLAG_ARRAY, 2 },
>> ? ? ? { "itable_unused", &set_gd.bg_itable_unused, 2, parse_uint },
>> ? ? ? { "checksum", &set_gd.bg_checksum, 2, parse_gd_csum },
>> ? ? ? { 0, 0, 0, 0 }
>> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
>> index dd18ade..16ddcda 100644
>> --- a/e2fsck/pass1.c
>> +++ b/e2fsck/pass1.c
>> @@ -364,8 +364,8 @@ static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
>> ? ? ? printf("inode #%u, i_extra_size %d\n", pctx->ino,
>> ? ? ? ? ? ? ? ? ? ? ? inode->i_extra_isize);
>> ?#endif
>> - ? ? /* i_extra_isize must cover i_extra_isize + i_pad1 at least */
>> - ? ? min = sizeof(inode->i_extra_isize) + sizeof(inode->i_pad1);
>> + ? ? /* i_extra_isize must cover i_extra_isize + i_checksum_hi at least */
>> + ? ? min = sizeof(inode->i_extra_isize) + sizeof(inode->i_checksum_hi);
>> ? ? ? max = EXT2_INODE_SIZE(sb) - EXT2_GOOD_OLD_INODE_SIZE;
>> ? ? ? /*
>> ? ? ? ?* For now we will allow i_extra_isize to be 0, but really
>> diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
>> index 16fba53..965fc16 100644
>> --- a/lib/e2p/feature.c
>> +++ b/lib/e2p/feature.c
>> @@ -40,6 +40,8 @@ static struct feature feature_list[] = {
>> ? ? ? ? ? ? ? ? ? ? ? "resize_inode" },
>> ? ? ? { ? ? ? E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_LAZY_BG,
>> ? ? ? ? ? ? ? ? ? ? ? "lazy_bg" },
>> + ? ? { ? ? ? E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP,
>> + ? ? ? ? ? ? ? ? ? ? "snapshot" },
>>
>> ? ? ? { ? ? ? E2P_FEATURE_RO_INCOMPAT, EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER,
>> ? ? ? ? ? ? ? ? ? ? ? "sparse_super" },
>> @@ -59,6 +61,8 @@ static struct feature feature_list[] = {
>> ? ? ? ? ? ? ? ? ? ? ? "quota" },
>> ? ? ? { ? ? ? E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_BIGALLOC,
>> ? ? ? ? ? ? ? ? ? ? ? "bigalloc"},
>> + ? ? { ? ? ? E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM,
>> + ? ? ? ? ? ? ? ? ? ? "metadata_csum"},
>>
>> ? ? ? { ? ? ? E2P_FEATURE_INCOMPAT, EXT2_FEATURE_INCOMPAT_COMPRESSION,
>> ? ? ? ? ? ? ? ? ? ? ? "compression" },
>> diff --git a/lib/e2p/ls.c b/lib/e2p/ls.c
>> index 0f36f40..aaacdaa 100644
>> --- a/lib/e2p/ls.c
>> +++ b/lib/e2p/ls.c
>> @@ -413,6 +413,10 @@ void list_super2(struct ext2_super_block * sb, FILE *f)
>> ? ? ? if (sb->s_grp_quota_inum)
>> ? ? ? ? ? ? ? fprintf(f, "Group quota inode: ? ? ? ?%u\n",
>> ? ? ? ? ? ? ? ? ? ? ? sb->s_grp_quota_inum);
>> +
>> + ? ? if (sb->s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)
>> + ? ? ? ? ? ? fprintf(f, "Checksum: ? ? ? ? ? ? ? ? 0x%08x\n",
>> + ? ? ? ? ? ? ? ? ? ? sb->s_checksum);
>> ?}
>>
>> ?void list_super (struct ext2_super_block * s)
>> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
>> index 4fec5db..1c86cb2 100644
>> --- a/lib/ext2fs/ext2_fs.h
>> +++ b/lib/ext2fs/ext2_fs.h
>> @@ -142,7 +142,9 @@ struct ext2_group_desc
>> ? ? ? __u16 ? bg_free_inodes_count; ? /* Free inodes count */
>> ? ? ? __u16 ? bg_used_dirs_count; ? ? /* Directories count */
>> ? ? ? __u16 ? bg_flags;
>> - ? ? __u32 ? bg_reserved[2];
>> + ? ? __u32 ? bg_exclude_bitmap_lo; ? /* Exclude bitmap for snapshots */
>> + ? ? __u16 ? bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> + ? ? __u16 ? bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> ? ? ? __u16 ? bg_itable_unused; ? ? ? /* Unused inodes count */
>> ? ? ? __u16 ? bg_checksum; ? ? ? ? ? ?/* crc16(s_uuid+grouo_num+group_desc)*/
>> ?};
>> @@ -159,7 +161,9 @@ struct ext4_group_desc
>> ? ? ? __u16 ? bg_free_inodes_count; ? /* Free inodes count */
>> ? ? ? __u16 ? bg_used_dirs_count; ? ? /* Directories count */
>> ? ? ? __u16 ? bg_flags; ? ? ? ? ? ? ? /* EXT4_BG_flags (INODE_UNINIT, etc) */
>> - ? ? __u32 ? bg_reserved[2]; ? ? ? ? /* Likely block/inode bitmap checksum */
>> + ? ? __u32 ? bg_exclude_bitmap_lo; ? /* Exclude bitmap for snapshots */
>> + ? ? __u16 ? bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> + ? ? __u16 ? bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> ? ? ? __u16 ? bg_itable_unused; ? ? ? /* Unused inodes count */
>> ? ? ? __u16 ? bg_checksum; ? ? ? ? ? ?/* crc16(sb_uuid+group+desc) */
>> ? ? ? __u32 ? bg_block_bitmap_hi; ? ? /* Blocks bitmap block MSB */
>> @@ -169,7 +173,10 @@ struct ext4_group_desc
>> ? ? ? __u16 ? bg_free_inodes_count_hi;/* Free inodes count MSB */
>> ? ? ? __u16 ? bg_used_dirs_count_hi; ?/* Directories count MSB */
>> ? ? ? __u16 ? bg_itable_unused_hi; ? ?/* Unused inodes count MSB */
>> - ? ? __u32 ? bg_reserved2[3];
>> + ? ? __u32 ? bg_exclude_bitmap_hi; ? /* Exclude bitmap block MSB */
>> + ? ? __u16 ? bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
>> + ? ? __u16 ? bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
>> + ? ? __u32 ? bg_reserved;
>
> Should we attach a checksum to the exclude bitmap? ?That would, unfortunately,
> put the 64-byte block group pretty close to full, unless we're ok with making
> the bg even bigger...
Good point.
My initial approach (which I probably expressed somewhere) was that when
exclude_bitmap feature is set, the block bitmap checksum should cover both
block bitmap and exclude bitmap.
The reason being that exclude bitmap adds little entropy over block bitmap:
- it should always be a sub set of block bitmap bits
- clearing exclude bit is done together with clearing the block used bit
- the only case of modifying exclude bitmap only is when a used block becomes
excluded (a.k.a moved to snapshot)
so how do you feel about compressing 2 blocks into 16bits of checksum :-/?
>
> --D
>> ?};
>>
>> ?#define EXT2_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not initialized */
>> @@ -363,7 +370,8 @@ struct ext2_inode {
>> ? ? ? ? ? ? ? ? ? ? ? __u16 ? l_i_file_acl_high;
>> ? ? ? ? ? ? ? ? ? ? ? __u16 ? l_i_uid_high; ? /* these 2 fields ? ?*/
>> ? ? ? ? ? ? ? ? ? ? ? __u16 ? l_i_gid_high; ? /* were reserved2[0] */
>> - ? ? ? ? ? ? ? ? ? ? __u32 ? l_i_reserved2;
>> + ? ? ? ? ? ? ? ? ? ? __u16 ? l_i_checksum_lo; ? ? ? ?/* crc32c(uuid+inum+inode) */
>> + ? ? ? ? ? ? ? ? ? ? __u16 ? l_i_reserved; ? /* crc32c(uuid+inum+inode) */
>> ? ? ? ? ? ? ? } linux2;
>> ? ? ? ? ? ? ? struct {
>> ? ? ? ? ? ? ? ? ? ? ? __u8 ? ?h_i_frag; ? ? ? /* Fragment number */
>> @@ -410,7 +418,8 @@ struct ext2_inode_large {
>> ? ? ? ? ? ? ? ? ? ? ? __u16 ? l_i_file_acl_high;
>> ? ? ? ? ? ? ? ? ? ? ? __u16 ? l_i_uid_high; ? /* these 2 fields ? ?*/
>> ? ? ? ? ? ? ? ? ? ? ? __u16 ? l_i_gid_high; ? /* were reserved2[0] */
>> - ? ? ? ? ? ? ? ? ? ? __u32 ? l_i_reserved2;
>> + ? ? ? ? ? ? ? ? ? ? __u16 ? l_i_checksum_lo; /* crc32c(uuid+inum+inode) */
>> + ? ? ? ? ? ? ? ? ? ? __u16 ? l_i_reserved;
>> ? ? ? ? ? ? ? } linux2;
>> ? ? ? ? ? ? ? struct {
>> ? ? ? ? ? ? ? ? ? ? ? __u8 ? ?h_i_frag; ? ? ? /* Fragment number */
>> @@ -422,7 +431,7 @@ struct ext2_inode_large {
>> ? ? ? ? ? ? ? } hurd2;
>> ? ? ? } osd2; ? ? ? ? ? ? ? ? ? ? ? ? /* OS dependent 2 */
>> ? ? ? __u16 ? i_extra_isize;
>> - ? ? __u16 ? i_pad1;
>> + ? ? __u16 ? i_checksum_hi; ?/* crc32c(uuid+inum+inode) */
>> ? ? ? __u32 ? i_ctime_extra; ?/* extra Change time (nsec << 2 | epoch) */
>> ? ? ? __u32 ? i_mtime_extra; ?/* extra Modification time (nsec << 2 | epoch) */
>> ? ? ? __u32 ? i_atime_extra; ?/* extra Access time (nsec << 2 | epoch) */
>> @@ -441,7 +450,7 @@ struct ext2_inode_large {
>> ?#define i_gid_low ? ?i_gid
>> ?#define i_uid_high ? osd2.linux2.l_i_uid_high
>> ?#define i_gid_high ? osd2.linux2.l_i_gid_high
>> -#define i_reserved2 ?osd2.linux2.l_i_reserved2
>> +#define i_checksum ? osd2.linux2.l_i_checksum
>> ?#else
>> ?#if defined(__GNU__)
>>
>> @@ -623,7 +632,8 @@ struct ext2_super_block {
>> ? ? ? __u32 ? s_usr_quota_inum; ? ? ? /* inode number of user quota file */
>> ? ? ? __u32 ? s_grp_quota_inum; ? ? ? /* inode number of group quota file */
>> ? ? ? __u32 ? s_overhead_blocks; ? ? ?/* overhead blocks/clusters in fs */
>> - ? ? __u32 ? s_reserved[109]; ? ? ? ?/* Padding to the end of the block */
>> + ? ? __u32 ? s_checksum; ? ? ? ? ? ? /* crc32c(superblock) */
>> + ? ? __u32 ? s_reserved[108]; ? ? ? ?/* Padding to the end of the block */
>> ?};
>>
>> ?#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
>> @@ -671,7 +681,9 @@ struct ext2_super_block {
>> ?#define EXT2_FEATURE_COMPAT_RESIZE_INODE ? ? 0x0010
>> ?#define EXT2_FEATURE_COMPAT_DIR_INDEX ? ? ? ? ? ? ? ?0x0020
>> ?#define EXT2_FEATURE_COMPAT_LAZY_BG ? ? ? ? ?0x0040
>> -#define EXT2_FEATURE_COMPAT_EXCLUDE_INODE ? ?0x0080
>> +/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE 0x0080 not used, legacy */
>> +#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP ? 0x0100
>> +
>>
>> ?#define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER ?0x0001
>> ?#define EXT2_FEATURE_RO_COMPAT_LARGE_FILE ? ?0x0002
>> @@ -683,6 +695,7 @@ struct ext2_super_block {
>> ?#define EXT4_FEATURE_RO_COMPAT_HAS_SNAPSHOT ?0x0080
>> ?#define EXT4_FEATURE_RO_COMPAT_QUOTA ? ? ? ? 0x0100
>> ?#define EXT4_FEATURE_RO_COMPAT_BIGALLOC ? ? ? ? ? ? ?0x0200
>> +#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
>>
>> ?#define EXT2_FEATURE_INCOMPAT_COMPRESSION ? ?0x0001
>> ?#define EXT2_FEATURE_INCOMPAT_FILETYPE ? ? ? ? ? ? ? 0x0002
>> diff --git a/lib/ext2fs/swapfs.c b/lib/ext2fs/swapfs.c
>> index 87b1a2e..d1c4a56 100644
>> --- a/lib/ext2fs/swapfs.c
>> +++ b/lib/ext2fs/swapfs.c
>> @@ -78,6 +78,8 @@ void ext2fs_swap_super(struct ext2_super_block * sb)
>> ? ? ? sb->s_snapshot_list = ext2fs_swab32(sb->s_snapshot_list);
>> ? ? ? sb->s_usr_quota_inum = ext2fs_swab32(sb->s_usr_quota_inum);
>> ? ? ? sb->s_grp_quota_inum = ext2fs_swab32(sb->s_grp_quota_inum);
>> + ? ? sb->s_overhead_blocks = ext2fs_swab32(sb->s_overhead_blocks);
>> + ? ? sb->s_checksum = ext2fs_swab32(sb->s_checksum);
>>
>> ? ? ? for (i=0; i < 4; i++)
>> ? ? ? ? ? ? ? sb->s_hash_seed[i] = ext2fs_swab32(sb->s_hash_seed[i]);
>> @@ -106,6 +108,11 @@ void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)
>> ? ? ? gdp->bg_free_inodes_count = ext2fs_swab16(gdp->bg_free_inodes_count);
>> ? ? ? gdp->bg_used_dirs_count = ext2fs_swab16(gdp->bg_used_dirs_count);
>> ? ? ? gdp->bg_flags = ext2fs_swab16(gdp->bg_flags);
>> + ? ? gdp->bg_exclude_bitmap_lo = ext2fs_swab32(gdp->bg_exclude_bitmap_lo);
>> + ? ? gdp->bg_block_bitmap_csum_lo =
>> + ? ? ? ? ? ? ext2fs_swab16(gdp->bg_block_bitmap_csum_lo);
>> + ? ? gdp->bg_inode_bitmap_csum_lo =
>> + ? ? ? ? ? ? ext2fs_swab16(gdp->bg_inode_bitmap_csum_lo);
>> ? ? ? gdp->bg_itable_unused = ext2fs_swab16(gdp->bg_itable_unused);
>> ? ? ? gdp->bg_checksum = ext2fs_swab16(gdp->bg_checksum);
>> ? ? ? /* If we're 32-bit, we're done */
>> @@ -125,6 +132,11 @@ void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)
>> ? ? ? gdp4->bg_used_dirs_count_hi =
>> ? ? ? ? ? ? ? ext2fs_swab16(gdp4->bg_used_dirs_count_hi);
>> ? ? ? gdp4->bg_itable_unused_hi = ext2fs_swab16(gdp4->bg_itable_unused_hi);
>> + ? ? gdp->bg_exclude_bitmap_hi = ext2fs_swab16(gdp->bg_exclude_bitmap_hi);
>> + ? ? gdp->bg_block_bitmap_csum_hi =
>> + ? ? ? ? ? ? ext2fs_swab16(gdp->bg_block_bitmap_csum_hi);
>> + ? ? gdp->bg_inode_bitmap_csum_hi =
>> + ? ? ? ? ? ? ext2fs_swab16(gdp->bg_inode_bitmap_csum_hi);
>> ?}
>>
>> ?void ext2fs_swap_group_desc(struct ext2_group_desc *gdp)
>> @@ -244,8 +256,8 @@ void ext2fs_swap_inode_full(ext2_filsys fs, struct ext2_inode_large *t,
>> ? ? ? ? ? ? ? ? ext2fs_swab16 (f->osd2.linux2.l_i_uid_high);
>> ? ? ? ? ? ? ? t->osd2.linux2.l_i_gid_high =
>> ? ? ? ? ? ? ? ? ext2fs_swab16 (f->osd2.linux2.l_i_gid_high);
>> - ? ? ? ? ? ? t->osd2.linux2.l_i_reserved2 =
>> - ? ? ? ? ? ? ? ? ? ? ext2fs_swab32(f->osd2.linux2.l_i_reserved2);
>> + ? ? ? ? ? ? t->osd2.linux2.l_i_checksum =
>> + ? ? ? ? ? ? ? ? ? ? ext2fs_swab32(f->osd2.linux2.checksum);
>> ? ? ? ? ? ? ? break;
>> ? ? ? case EXT2_OS_HURD:
>> ? ? ? ? ? ? ? t->osd1.hurd1.h_i_translator =
>> diff --git a/lib/ext2fs/tst_inode_size.c b/lib/ext2fs/tst_inode_size.c
>> index 962f1cd..a4f6247 100644
>> --- a/lib/ext2fs/tst_inode_size.c
>> +++ b/lib/ext2fs/tst_inode_size.c
>> @@ -61,7 +61,8 @@ void check_structure_fields()
>> ? ? ? check_field(osd2.linux2.l_i_file_acl_high);
>> ? ? ? check_field(osd2.linux2.l_i_uid_high);
>> ? ? ? check_field(osd2.linux2.l_i_gid_high);
>> - ? ? check_field(osd2.linux2.l_i_reserved2);
>> + ? ? check_field(osd2.linux2.l_i_checksum_lo);
>> + ? ? check_field(osd2.linux2.l_i_reserved);
>> ? ? ? printf("Ending offset is %d\n\n", cur_offset);
>> ?#endif
>> ?}
>> diff --git a/lib/ext2fs/tst_super_size.c b/lib/ext2fs/tst_super_size.c
>> index 1e5a524..75659ae 100644
>> --- a/lib/ext2fs/tst_super_size.c
>> +++ b/lib/ext2fs/tst_super_size.c
>> @@ -126,6 +126,7 @@ void check_superblock_fields()
>> ? ? ? check_field(s_usr_quota_inum);
>> ? ? ? check_field(s_grp_quota_inum);
>> ? ? ? check_field(s_overhead_blocks);
>> + ? ? check_field(s_checksum);
>> ? ? ? check_field(s_reserved);
>> ? ? ? printf("Ending offset is %d\n\n", cur_offset);
>> ?#endif
>> --
>> 1.7.4.1.22.gec8e1.dirty
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>> the body of a message to [email protected]
>> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
>>
>
>
On Wed, Sep 21, 2011 at 11:03:57AM +0300, Amir Goldstein wrote:
> On Tue, Sep 20, 2011 at 9:56 PM, Darrick J. Wong <[email protected]> wrote:
> > On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
<snip>
> >> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> >> index 4fec5db..1c86cb2 100644
> >> --- a/lib/ext2fs/ext2_fs.h
> >> +++ b/lib/ext2fs/ext2_fs.h
> >> @@ -142,7 +142,9 @@ struct ext2_group_desc
> >> ? ? ? __u16 ? bg_free_inodes_count; ? /* Free inodes count */
> >> ? ? ? __u16 ? bg_used_dirs_count; ? ? /* Directories count */
> >> ? ? ? __u16 ? bg_flags;
> >> - ? ? __u32 ? bg_reserved[2];
> >> + ? ? __u32 ? bg_exclude_bitmap_lo; ? /* Exclude bitmap for snapshots */
> >> + ? ? __u16 ? bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> + ? ? __u16 ? bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> ? ? ? __u16 ? bg_itable_unused; ? ? ? /* Unused inodes count */
> >> ? ? ? __u16 ? bg_checksum; ? ? ? ? ? ?/* crc16(s_uuid+grouo_num+group_desc)*/
> >> ?};
> >> @@ -159,7 +161,9 @@ struct ext4_group_desc
> >> ? ? ? __u16 ? bg_free_inodes_count; ? /* Free inodes count */
> >> ? ? ? __u16 ? bg_used_dirs_count; ? ? /* Directories count */
> >> ? ? ? __u16 ? bg_flags; ? ? ? ? ? ? ? /* EXT4_BG_flags (INODE_UNINIT, etc) */
> >> - ? ? __u32 ? bg_reserved[2]; ? ? ? ? /* Likely block/inode bitmap checksum */
> >> + ? ? __u32 ? bg_exclude_bitmap_lo; ? /* Exclude bitmap for snapshots */
> >> + ? ? __u16 ? bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> + ? ? __u16 ? bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> ? ? ? __u16 ? bg_itable_unused; ? ? ? /* Unused inodes count */
> >> ? ? ? __u16 ? bg_checksum; ? ? ? ? ? ?/* crc16(sb_uuid+group+desc) */
> >> ? ? ? __u32 ? bg_block_bitmap_hi; ? ? /* Blocks bitmap block MSB */
> >> @@ -169,7 +173,10 @@ struct ext4_group_desc
> >> ? ? ? __u16 ? bg_free_inodes_count_hi;/* Free inodes count MSB */
> >> ? ? ? __u16 ? bg_used_dirs_count_hi; ?/* Directories count MSB */
> >> ? ? ? __u16 ? bg_itable_unused_hi; ? ?/* Unused inodes count MSB */
> >> - ? ? __u32 ? bg_reserved2[3];
> >> + ? ? __u32 ? bg_exclude_bitmap_hi; ? /* Exclude bitmap block MSB */
> >> + ? ? __u16 ? bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
> >> + ? ? __u16 ? bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
> >> + ? ? __u32 ? bg_reserved;
> >
> > Should we attach a checksum to the exclude bitmap? ?That would, unfortunately,
> > put the 64-byte block group pretty close to full, unless we're ok with making
> > the bg even bigger...
>
> Good point.
> My initial approach (which I probably expressed somewhere) was that when
> exclude_bitmap feature is set, the block bitmap checksum should cover both
> block bitmap and exclude bitmap.
> The reason being that exclude bitmap adds little entropy over block bitmap:
> - it should always be a sub set of block bitmap bits
> - clearing exclude bit is done together with clearing the block used bit
> - the only case of modifying exclude bitmap only is when a used block becomes
> excluded (a.k.a moved to snapshot)
>
> so how do you feel about compressing 2 blocks into 16bits of checksum :-/?
Hrm.... is it generally the case that both block and exclude bitmaps are loaded
at the same time? I don't think it'd be difficult to checksum both blocks
unless it's common that one of the two are not in memory.
--D
On Wed, Sep 21, 2011 at 11:40 PM, Darrick J. Wong <[email protected]> wrote:
> On Wed, Sep 21, 2011 at 11:03:57AM +0300, Amir Goldstein wrote:
>> On Tue, Sep 20, 2011 at 9:56 PM, Darrick J. Wong <[email protected]> wrote:
>> > On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
> <snip>
>> >> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
>> >> index 4fec5db..1c86cb2 100644
>> >> --- a/lib/ext2fs/ext2_fs.h
>> >> +++ b/lib/ext2fs/ext2_fs.h
>> >> @@ -142,7 +142,9 @@ struct ext2_group_desc
>> >> ? ? ? __u16 ? bg_free_inodes_count; ? /* Free inodes count */
>> >> ? ? ? __u16 ? bg_used_dirs_count; ? ? /* Directories count */
>> >> ? ? ? __u16 ? bg_flags;
>> >> - ? ? __u32 ? bg_reserved[2];
>> >> + ? ? __u32 ? bg_exclude_bitmap_lo; ? /* Exclude bitmap for snapshots */
>> >> + ? ? __u16 ? bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> + ? ? __u16 ? bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> ? ? ? __u16 ? bg_itable_unused; ? ? ? /* Unused inodes count */
>> >> ? ? ? __u16 ? bg_checksum; ? ? ? ? ? ?/* crc16(s_uuid+grouo_num+group_desc)*/
>> >> ?};
>> >> @@ -159,7 +161,9 @@ struct ext4_group_desc
>> >> ? ? ? __u16 ? bg_free_inodes_count; ? /* Free inodes count */
>> >> ? ? ? __u16 ? bg_used_dirs_count; ? ? /* Directories count */
>> >> ? ? ? __u16 ? bg_flags; ? ? ? ? ? ? ? /* EXT4_BG_flags (INODE_UNINIT, etc) */
>> >> - ? ? __u32 ? bg_reserved[2]; ? ? ? ? /* Likely block/inode bitmap checksum */
>> >> + ? ? __u32 ? bg_exclude_bitmap_lo; ? /* Exclude bitmap for snapshots */
>> >> + ? ? __u16 ? bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> + ? ? __u16 ? bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> ? ? ? __u16 ? bg_itable_unused; ? ? ? /* Unused inodes count */
>> >> ? ? ? __u16 ? bg_checksum; ? ? ? ? ? ?/* crc16(sb_uuid+group+desc) */
>> >> ? ? ? __u32 ? bg_block_bitmap_hi; ? ? /* Blocks bitmap block MSB */
>> >> @@ -169,7 +173,10 @@ struct ext4_group_desc
>> >> ? ? ? __u16 ? bg_free_inodes_count_hi;/* Free inodes count MSB */
>> >> ? ? ? __u16 ? bg_used_dirs_count_hi; ?/* Directories count MSB */
>> >> ? ? ? __u16 ? bg_itable_unused_hi; ? ?/* Unused inodes count MSB */
>> >> - ? ? __u32 ? bg_reserved2[3];
>> >> + ? ? __u32 ? bg_exclude_bitmap_hi; ? /* Exclude bitmap block MSB */
>> >> + ? ? __u16 ? bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
>> >> + ? ? __u16 ? bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
>> >> + ? ? __u32 ? bg_reserved;
>> >
>> > Should we attach a checksum to the exclude bitmap? ?That would, unfortunately,
>> > put the 64-byte block group pretty close to full, unless we're ok with making
>> > the bg even bigger...
>>
>> Good point.
>> My initial approach (which I probably expressed somewhere) was that when
>> exclude_bitmap feature is set, the block bitmap checksum should cover both
>> block bitmap and exclude bitmap.
>> The reason being that exclude bitmap adds little entropy over block bitmap:
>> - it should always be a sub set of block bitmap bits
>> - clearing exclude bit is done together with clearing the block used bit
>> - the only case of modifying exclude bitmap only is when a used block becomes
>> ? excluded (a.k.a moved to snapshot)
>>
>> so how do you feel about compressing 2 blocks into 16bits of checksum :-/?
>
> Hrm.... is it generally the case that both block and exclude bitmaps are loaded
> at the same time? ?I don't think it'd be difficult to checksum both blocks
> unless it's common that one of the two are not in memory.
>
Hrm indeed. no. exclude bitmap is currently not loaded always together with
block bitmap, so unless this is changed, we'll have to think of something else.
However that brings me to wonder:
block/inode/exclude bitmap seldom change more than a handful of bits/words
at a time.
Does it make sense to diff update the CRC of the bitmaps instead of
recomputing it?
Amir.
On Thu, Sep 22, 2011 at 05:12:32AM +0300, Amir Goldstein wrote:
> On Wed, Sep 21, 2011 at 11:40 PM, Darrick J. Wong <[email protected]> wrote:
> > On Wed, Sep 21, 2011 at 11:03:57AM +0300, Amir Goldstein wrote:
> >> On Tue, Sep 20, 2011 at 9:56 PM, Darrick J. Wong <[email protected]> wrote:
> >> > On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
> > <snip>
> >> >> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> >> >> index 4fec5db..1c86cb2 100644
> >> >> --- a/lib/ext2fs/ext2_fs.h
> >> >> +++ b/lib/ext2fs/ext2_fs.h
> >> >> @@ -142,7 +142,9 @@ struct ext2_group_desc
> >> >> ? ? ? __u16 ? bg_free_inodes_count; ? /* Free inodes count */
> >> >> ? ? ? __u16 ? bg_used_dirs_count; ? ? /* Directories count */
> >> >> ? ? ? __u16 ? bg_flags;
> >> >> - ? ? __u32 ? bg_reserved[2];
> >> >> + ? ? __u32 ? bg_exclude_bitmap_lo; ? /* Exclude bitmap for snapshots */
> >> >> + ? ? __u16 ? bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> >> + ? ? __u16 ? bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> >> ? ? ? __u16 ? bg_itable_unused; ? ? ? /* Unused inodes count */
> >> >> ? ? ? __u16 ? bg_checksum; ? ? ? ? ? ?/* crc16(s_uuid+grouo_num+group_desc)*/
> >> >> ?};
> >> >> @@ -159,7 +161,9 @@ struct ext4_group_desc
> >> >> ? ? ? __u16 ? bg_free_inodes_count; ? /* Free inodes count */
> >> >> ? ? ? __u16 ? bg_used_dirs_count; ? ? /* Directories count */
> >> >> ? ? ? __u16 ? bg_flags; ? ? ? ? ? ? ? /* EXT4_BG_flags (INODE_UNINIT, etc) */
> >> >> - ? ? __u32 ? bg_reserved[2]; ? ? ? ? /* Likely block/inode bitmap checksum */
> >> >> + ? ? __u32 ? bg_exclude_bitmap_lo; ? /* Exclude bitmap for snapshots */
> >> >> + ? ? __u16 ? bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> >> + ? ? __u16 ? bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
> >> >> ? ? ? __u16 ? bg_itable_unused; ? ? ? /* Unused inodes count */
> >> >> ? ? ? __u16 ? bg_checksum; ? ? ? ? ? ?/* crc16(sb_uuid+group+desc) */
> >> >> ? ? ? __u32 ? bg_block_bitmap_hi; ? ? /* Blocks bitmap block MSB */
> >> >> @@ -169,7 +173,10 @@ struct ext4_group_desc
> >> >> ? ? ? __u16 ? bg_free_inodes_count_hi;/* Free inodes count MSB */
> >> >> ? ? ? __u16 ? bg_used_dirs_count_hi; ?/* Directories count MSB */
> >> >> ? ? ? __u16 ? bg_itable_unused_hi; ? ?/* Unused inodes count MSB */
> >> >> - ? ? __u32 ? bg_reserved2[3];
> >> >> + ? ? __u32 ? bg_exclude_bitmap_hi; ? /* Exclude bitmap block MSB */
> >> >> + ? ? __u16 ? bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
> >> >> + ? ? __u16 ? bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
> >> >> + ? ? __u32 ? bg_reserved;
> >> >
> >> > Should we attach a checksum to the exclude bitmap? ?That would, unfortunately,
> >> > put the 64-byte block group pretty close to full, unless we're ok with making
> >> > the bg even bigger...
> >>
> >> Good point.
> >> My initial approach (which I probably expressed somewhere) was that when
> >> exclude_bitmap feature is set, the block bitmap checksum should cover both
> >> block bitmap and exclude bitmap.
> >> The reason being that exclude bitmap adds little entropy over block bitmap:
> >> - it should always be a sub set of block bitmap bits
> >> - clearing exclude bit is done together with clearing the block used bit
> >> - the only case of modifying exclude bitmap only is when a used block becomes
> >> ? excluded (a.k.a moved to snapshot)
> >>
> >> so how do you feel about compressing 2 blocks into 16bits of checksum :-/?
> >
> > Hrm.... is it generally the case that both block and exclude bitmaps are loaded
> > at the same time? ?I don't think it'd be difficult to checksum both blocks
> > unless it's common that one of the two are not in memory.
> >
>
> Hrm indeed. no. exclude bitmap is currently not loaded always together with
> block bitmap, so unless this is changed, we'll have to think of something else.
Or... does it make sense to preload the exclude bitmap at the same time as the
block bitmap? If I'm reading you correctly, (block_bitmap | exclude_bitmap)
yields a bitmap of all blocks that are not available, correct? So in the case
that exclude bitmaps are enabled, you'd need both to be in memory anyway.
> However that brings me to wonder:
> block/inode/exclude bitmap seldom change more than a handful of bits/words
> at a time.
> Does it make sense to diff update the CRC of the bitmaps instead of
> recomputing it?
Yes, that would be a good (later) optimization at least to consider.
--D
On Thu, Sep 22, 2011 at 10:18 PM, Darrick J. Wong <[email protected]> wrote:
> On Thu, Sep 22, 2011 at 05:12:32AM +0300, Amir Goldstein wrote:
>> On Wed, Sep 21, 2011 at 11:40 PM, Darrick J. Wong <[email protected]> wrote:
>> > On Wed, Sep 21, 2011 at 11:03:57AM +0300, Amir Goldstein wrote:
>> >> On Tue, Sep 20, 2011 at 9:56 PM, Darrick J. Wong <[email protected]> wrote:
>> >> > On Thu, Sep 15, 2011 at 06:50:51PM -0400, Theodore Ts'o wrote:
>> > <snip>
>> >> >> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
>> >> >> index 4fec5db..1c86cb2 100644
>> >> >> --- a/lib/ext2fs/ext2_fs.h
>> >> >> +++ b/lib/ext2fs/ext2_fs.h
>> >> >> @@ -142,7 +142,9 @@ struct ext2_group_desc
>> >> >> ? ? ? __u16 ? bg_free_inodes_count; ? /* Free inodes count */
>> >> >> ? ? ? __u16 ? bg_used_dirs_count; ? ? /* Directories count */
>> >> >> ? ? ? __u16 ? bg_flags;
>> >> >> - ? ? __u32 ? bg_reserved[2];
>> >> >> + ? ? __u32 ? bg_exclude_bitmap_lo; ? /* Exclude bitmap for snapshots */
>> >> >> + ? ? __u16 ? bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> >> + ? ? __u16 ? bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> >> ? ? ? __u16 ? bg_itable_unused; ? ? ? /* Unused inodes count */
>> >> >> ? ? ? __u16 ? bg_checksum; ? ? ? ? ? ?/* crc16(s_uuid+grouo_num+group_desc)*/
>> >> >> ?};
>> >> >> @@ -159,7 +161,9 @@ struct ext4_group_desc
>> >> >> ? ? ? __u16 ? bg_free_inodes_count; ? /* Free inodes count */
>> >> >> ? ? ? __u16 ? bg_used_dirs_count; ? ? /* Directories count */
>> >> >> ? ? ? __u16 ? bg_flags; ? ? ? ? ? ? ? /* EXT4_BG_flags (INODE_UNINIT, etc) */
>> >> >> - ? ? __u32 ? bg_reserved[2]; ? ? ? ? /* Likely block/inode bitmap checksum */
>> >> >> + ? ? __u32 ? bg_exclude_bitmap_lo; ? /* Exclude bitmap for snapshots */
>> >> >> + ? ? __u16 ? bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> >> + ? ? __u16 ? bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bitmap) LSB */
>> >> >> ? ? ? __u16 ? bg_itable_unused; ? ? ? /* Unused inodes count */
>> >> >> ? ? ? __u16 ? bg_checksum; ? ? ? ? ? ?/* crc16(sb_uuid+group+desc) */
>> >> >> ? ? ? __u32 ? bg_block_bitmap_hi; ? ? /* Blocks bitmap block MSB */
>> >> >> @@ -169,7 +173,10 @@ struct ext4_group_desc
>> >> >> ? ? ? __u16 ? bg_free_inodes_count_hi;/* Free inodes count MSB */
>> >> >> ? ? ? __u16 ? bg_used_dirs_count_hi; ?/* Directories count MSB */
>> >> >> ? ? ? __u16 ? bg_itable_unused_hi; ? ?/* Unused inodes count MSB */
>> >> >> - ? ? __u32 ? bg_reserved2[3];
>> >> >> + ? ? __u32 ? bg_exclude_bitmap_hi; ? /* Exclude bitmap block MSB */
>> >> >> + ? ? __u16 ? bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
>> >> >> + ? ? __u16 ? bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bitmap) MSB */
>> >> >> + ? ? __u32 ? bg_reserved;
>> >> >
>> >> > Should we attach a checksum to the exclude bitmap? ?That would, unfortunately,
>> >> > put the 64-byte block group pretty close to full, unless we're ok with making
>> >> > the bg even bigger...
>> >>
>> >> Good point.
>> >> My initial approach (which I probably expressed somewhere) was that when
>> >> exclude_bitmap feature is set, the block bitmap checksum should cover both
>> >> block bitmap and exclude bitmap.
>> >> The reason being that exclude bitmap adds little entropy over block bitmap:
>> >> - it should always be a sub set of block bitmap bits
>> >> - clearing exclude bit is done together with clearing the block used bit
>> >> - the only case of modifying exclude bitmap only is when a used block becomes
>> >> ? excluded (a.k.a moved to snapshot)
>> >>
>> >> so how do you feel about compressing 2 blocks into 16bits of checksum :-/?
>> >
>> > Hrm.... is it generally the case that both block and exclude bitmaps are loaded
>> > at the same time? ?I don't think it'd be difficult to checksum both blocks
>> > unless it's common that one of the two are not in memory.
>> >
>>
>> Hrm indeed. no. exclude bitmap is currently not loaded always together with
>> block bitmap, so unless this is changed, we'll have to think of something else.
>
> Or... does it make sense to preload the exclude bitmap at the same time as the
> block bitmap?
it wouldn't be terrible to do that, since exclude bitmap will normally
be located
right next to block bitmap, but if memory pressure is high, that would
lead to half
the number of block bitmaps in memory at any given time.
>?If I'm reading you correctly, (block_bitmap | exclude_bitmap)
> yields a bitmap of all blocks that are not available, correct?
not exactly. if all is sane, then
(block_bitmap | exclude_bitmap) == block_bitmap
(block_bitmap & exclude_bitmap) == exclude_bitmap
> So in the case
> that exclude bitmaps are enabled, you'd need both to be in memory anyway.
>
exclude_bitmap is loaded into memory in the following cases:
1. allocating snapshot inode blocks (during COW)
2. deleting snapshot inode blocks (snapshot delete)
3. moving "deleted" blocks to snapshot (MOW)
specifically, exclude_bitmap is NOT loaded into memory in the following cases:
1. allocating regular inode blocks
2. deleting regular inode blocks, which were allocated after last snapshot
For example, in kernel compilation benchmark, all temp files
are created and deleted without modifying exclude bitmap (or snapshots).
>> However that brings me to wonder:
>> block/inode/exclude bitmap seldom change more than a handful of bits/words
>> at a time.
>> Does it make sense to diff update the CRC of the bitmaps instead of
>> recomputing it?
>
> Yes, that would be a good (later) optimization at least to consider.
>
> --D
>