From: "Darrick J. Wong" Subject: [PATCH 02/23] ext4: Change on-disk layout to support extended metadata checksumming Date: Tue, 06 Mar 2012 12:48:07 -0800 Message-ID: <20120306204807.1663.62824.stgit@elm3b70.beaverton.ibm.com> References: <20120306204750.1663.96751.stgit@elm3b70.beaverton.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Cc: Sunil Mushran , Martin K Petersen , Greg Freemyer , Amir Goldstein , linux-kernel , Andi Kleen , Mingming Cao , Joel Becker , linux-fsdevel , linux-ext4@vger.kernel.org, Coly Li To: Andreas Dilger , Theodore Tso , "Darrick J. Wong" Return-path: In-Reply-To: <20120306204750.1663.96751.stgit@elm3b70.beaverton.ibm.com> Sender: linux-kernel-owner@vger.kernel.org List-Id: linux-ext4.vger.kernel.org Define flags and change structure definitions to allow checksumming of ext4 metadata. Signed-off-by: Darrick J. Wong --- fs/ext4/ext4.h | 47 +++++++++++++++++++++++++++++++++++++++-------- fs/ext4/ext4_extents.h | 13 +++++++++++++ fs/ext4/namei.c | 8 ++++++++ fs/ext4/xattr.h | 4 +++- 4 files changed, 63 insertions(+), 9 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 513004f..0f0b0d5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -289,7 +289,9 @@ struct ext4_group_desc __le16 bg_free_inodes_count_lo;/* Free inodes count */ __le16 bg_used_dirs_count_lo; /* Directories count */ __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ - __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ + __le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */ + __le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */ + __le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */ __le16 bg_itable_unused_lo; /* Unused inodes count */ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ @@ -299,7 +301,10 @@ struct ext4_group_desc __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ __le16 bg_used_dirs_count_hi; /* Directories count MSB */ __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ - __u32 bg_reserved2[3]; + __le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */ + __le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */ + __le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */ + __u32 bg_reserved; }; /* @@ -641,7 +646,8 @@ struct ext4_inode { __le16 l_i_file_acl_high; __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ - __u32 l_i_reserved2; + __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */ + __le16 l_i_reserved; } linux2; struct { __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ @@ -657,7 +663,7 @@ struct ext4_inode { } masix2; } osd2; /* OS dependent 2 */ __le16 i_extra_isize; - __le16 i_pad1; + __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ @@ -759,7 +765,7 @@ do { \ #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high -#define i_reserved2 osd2.linux2.l_i_reserved2 +#define i_checksum_lo osd2.linux2.l_i_checksum_lo #elif defined(__GNU__) @@ -992,6 +998,9 @@ extern void ext4_set_bits(void *bm, int cur, int len); #define EXT4_ERRORS_PANIC 3 /* Panic */ #define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE +/* Metadata checksum algorithm codes */ +#define EXT4_CRC32C_CHKSUM 1 + /* * Structure of the super block */ @@ -1078,7 +1087,7 @@ struct ext4_super_block { __le64 s_mmp_block; /* Block for multi-mount protection */ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ __u8 s_log_groups_per_flex; /* FLEX_BG group size */ - __u8 s_reserved_char_pad; + __u8 s_checksum_type; /* metadata checksum algorithm used */ __le16 s_reserved_pad; __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ __le32 s_snapshot_inum; /* Inode number of active snapshot */ @@ -1104,7 +1113,8 @@ struct ext4_super_block { __le32 s_usr_quota_inum; /* inode for tracking user quota */ __le32 s_grp_quota_inum; /* inode for tracking group quota */ __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ - __le32 s_reserved[109]; /* Padding to the end of the block */ + __le32 s_reserved[108]; /* Padding to the end of the block */ + __le32 s_checksum; /* crc32c(superblock) */ }; #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) @@ -1407,6 +1417,12 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 +/* + * METADATA_CSUM also enables group descriptor checksums (GDT_CSUM). When + * METADATA_CSUM is set, group descriptor checksums use the same algorithm as + * all other data structures' checksums. However, the METADATA_CSUM and + * GDT_CSUM bits are mutually exclusive. + */ #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 @@ -1519,6 +1535,18 @@ struct ext4_dir_entry_2 { }; /* + * This is a bogus directory entry at the end of each leaf block that + * records checksums. + */ +struct ext4_dir_entry_tail { + __le32 det_reserved_zero1; /* Pretend to be unused */ + __le16 det_rec_len; /* 12 */ + __u8 det_reserved_zero2; /* Zero name length */ + __u8 det_reserved_ft; /* 0xDE, fake file type */ + __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */ +}; + +/* * Ext4 directory file types. Only the low 3 bits are used. The * other bits are reserved for now. */ @@ -1533,6 +1561,8 @@ struct ext4_dir_entry_2 { #define EXT4_FT_MAX 8 +#define EXT4_FT_DIR_CSUM 0xDE + /* * EXT4_DIR_PAD defines the directory entries boundaries * @@ -1729,7 +1759,8 @@ struct mmp_struct { __le16 mmp_check_interval; __le16 mmp_pad1; - __le32 mmp_pad2[227]; + __le32 mmp_pad2[226]; + __le32 mmp_checksum; /* crc32c(uuid+mmp_block) */ }; /* arguments passed to the mmp thread */ diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index a52db3a..5c7fbade 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -63,9 +63,22 @@ * ext4_inode has i_block array (60 bytes total). * The first 12 bytes store ext4_extent_header; * the remainder stores an array of ext4_extent. + * For non-inode extent blocks, ext4_extent_tail + * follows the array. */ /* + * This is the extent tail on-disk structure. + * All other extent structures are 12 bytes long. It turns out that + * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which + * covers all valid ext4 block sizes. Therefore, this tail structure can be + * crammed into the end of the block without having to rebalance the tree. + */ +struct ext4_extent_tail { + __le32 et_checksum; /* crc32c(uuid+inum+extent_block) */ +}; + +/* * This is the extent on-disk structure. * It's used at the bottom of the tree. */ diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2043f48..8b56fdf 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -145,6 +145,14 @@ struct dx_map_entry u16 size; }; +/* + * This goes at the end of each htree block. + */ +struct dx_tail { + u32 dt_reserved; + __le32 dt_checksum; /* crc32c(uuid+inum+dirblock) */ +}; + static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); static inline unsigned dx_get_hash(struct dx_entry *entry); diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 25b7387..91f31ca 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -27,7 +27,9 @@ struct ext4_xattr_header { __le32 h_refcount; /* reference count */ __le32 h_blocks; /* number of disk blocks used */ __le32 h_hash; /* hash value of all attributes */ - __u32 h_reserved[4]; /* zero right now */ + __le32 h_checksum; /* crc32c(uuid+id+xattrblock) */ + /* id = inum if refcount=1, blknum otherwise */ + __u32 h_reserved[3]; /* zero right now */ }; struct ext4_xattr_ibody_header {