2007-08-23 05:40:35

by Avantika Mathur

[permalink] [raw]
Subject: [PATCH] uninitialized block groups

From: Andreas Dilger <[email protected]>
Signed-off-by: Avantika Mathur <[email protected]>
---

Index: linux-2.6.23-rc3/fs/ext4/balloc.c
===================================================================
--- linux-2.6.23-rc3.orig/fs/ext4/balloc.c 2007-08-21 17:31:50.000000000 -0700
+++ linux-2.6.23-rc3/fs/ext4/balloc.c 2007-08-21 17:31:51.000000000 -0700
@@ -20,6 +20,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>

+#include "group.h"
/*
* balloc.c contains the blocks allocation and deallocation routines
*/
@@ -42,6 +43,75 @@ void ext4_get_group_no_and_offset(struct

}

+/* Initializes an uninitialized block bitmap if given, and returns the
+ * number of blocks free in the group. */
+unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+ int block_group, struct ext4_group_desc *gdp)
+{
+ unsigned long start;
+ int bit, bit_max;
+ unsigned free_blocks;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (bh) {
+ J_ASSERT_BH(bh, buffer_locked(bh));
+
+ /* If checksum is bad mark all blocks use to prevent allocation,
+ * essentially implementing a per-group read-only flag. */
+ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+ ext4_error(sb, __FUNCTION__,
+ "Checksum bad for group %u\n", block_group);
+ gdp->bg_free_blocks_count = 0;
+ gdp->bg_free_inodes_count = 0;
+ gdp->bg_itable_unused = 0;
+ memset(bh->b_data, 0xff, sb->s_blocksize);
+ return 0;
+ }
+ memset(bh->b_data, 0, sb->s_blocksize);
+ }
+
+ /* Check for superblock and gdt backups in this group */
+ bit_max = ext4_bg_has_super(sb, block_group);
+
+ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
+ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
+ sbi->s_desc_per_block) {
+ if (bit_max) {
+ bit_max += ext4_bg_num_gdb(sb, block_group);
+ bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
+ }
+ } else { /* For META_BG_BLOCK_GROUPS */
+ int group_rel = (block_group -
+ le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
+ EXT4_DESC_PER_BLOCK(sb);
+ if (group_rel == 0 || group_rel == 1 ||
+ (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
+ bit_max += 1;
+ }
+
+ /* Last and first groups are always initialized */
+ free_blocks = EXT4_BLOCKS_PER_GROUP(sb) - bit_max;
+
+ if (bh) {
+ for (bit = 0; bit < bit_max; bit++)
+ ext4_set_bit(bit, bh->b_data);
+
+ start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
+ le32_to_cpu(sbi->s_es->s_first_data_block);
+
+ /* Set bits for block and inode bitmaps, and inode table */
+ ext4_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - start,
+ bh->b_data);
+ ext4_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - start,
+ bh->b_data);
+ for (bit = le32_to_cpu(gdp->bg_inode_table) - start,
+ bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
+ ext4_set_bit(bit, bh->b_data);
+ }
+
+ return free_blocks - sbi->s_itb_per_group - 2;
+}
+
/*
* The free blocks are managed by bitmaps. A file system contains several
* blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
@@ -116,10 +186,22 @@ read_block_bitmap(struct super_block *sb
struct ext4_group_desc * desc;
struct buffer_head * bh = NULL;

- desc = ext4_get_group_desc (sb, block_group, NULL);
+ desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
goto error_out;
- bh = sb_bread(sb, ext4_block_bitmap(sb, desc));
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap));
+ if (!buffer_uptodate(bh)) {
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ ext4_init_block_bitmap(sb, bh,block_group,desc);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ }
+ } else {
+ bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
+ }
if (!bh)
ext4_error (sb, "read_block_bitmap",
"Cannot read block bitmap - "
@@ -588,6 +670,7 @@ do_more:
desc->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
group_freed);
+ desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_mod(&sbi->s_freeblocks_counter, count);

@@ -1656,8 +1739,11 @@ allocated:
ret_block, goal_hits, goal_attempts);

spin_lock(sb_bgl_lock(sbi, group_no));
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
spin_unlock(sb_bgl_lock(sbi, group_no));
percpu_counter_mod(&sbi->s_freeblocks_counter, -num);

Index: linux-2.6.23-rc3/fs/ext4/group.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.23-rc3/fs/ext4/group.h 2007-08-22 21:30:55.000000000 -0700
@@ -0,0 +1,29 @@
+/*
+ * linux/fs/ext4/group.h
+ *
+ * Copyright (C) 2007 Cluster File Systems, Inc
+ *
+ * Author: Andreas Dilger <[email protected]>
+ */
+
+#ifndef _LINUX_EXT4_GROUP_H
+#define _LINUX_EXT4_GROUP_H
+#if defined(CONFIG_CRC16)
+#include <linux/crc16.h>
+#endif
+
+extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
+ struct ext4_group_desc *gdp);
+extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
+ struct ext4_group_desc *gdp);
+struct buffer_head *read_block_bitmap(struct super_block *sb,
+ unsigned int block_group);
+extern unsigned ext4_init_block_bitmap(struct super_block *sb,
+ struct buffer_head *bh, int group,
+ struct ext4_group_desc *desc);
+#define ext4_free_blocks_after_init(sb, group, desc) \
+ ext4_init_block_bitmap(sb, NULL, group, desc)
+extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
+ struct buffer_head *bh, int group,
+ struct ext4_group_desc *desc);
+#endif /* _LINUX_EXT4_GROUP_H */
Index: linux-2.6.23-rc3/fs/ext4/ialloc.c
===================================================================
--- linux-2.6.23-rc3.orig/fs/ext4/ialloc.c 2007-08-12 21:25:24.000000000 -0700
+++ linux-2.6.23-rc3/fs/ext4/ialloc.c 2007-08-21 17:31:51.000000000 -0700
@@ -28,6 +28,7 @@

#include "xattr.h"
#include "acl.h"
+#include "group.h"

/*
* ialloc.c contains the inodes allocation and deallocation routines
@@ -43,6 +44,52 @@
* the free blocks count in the block.
*/

+/*
+ * To avoid calling the atomic setbit hundreds or thousands of times, we only
+ * need to use it within a single byte (to ensure we get endianness right).
+ * We can use memset for the rest of the bitmap as there are no other users.
+ */
+static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+{
+ int i;
+
+ if (start_bit >= end_bit)
+ return;
+
+ ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
+ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
+ ext4_set_bit(i, bitmap);
+ if (i < end_bit)
+ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
+}
+
+/* Initializes an uninitialized inode bitmap */
+unsigned ext4_init_inode_bitmap(struct super_block *sb,
+ struct buffer_head *bh, int block_group,
+ struct ext4_group_desc *gdp)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ J_ASSERT_BH(bh, buffer_locked(bh));
+
+ /* If checksum is bad mark all blocks and inodes use to prevent
+ * allocation, essentially implementing a per-group read-only flag. */
+ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+ ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
+ block_group);
+ gdp->bg_free_blocks_count = 0;
+ gdp->bg_free_inodes_count = 0;
+ gdp->bg_itable_unused = 0;
+ memset(bh->b_data, 0xff, sb->s_blocksize);
+ return 0;
+ }
+
+ memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
+ mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
+ bh->b_data);
+
+ return EXT4_INODES_PER_GROUP(sb);
+}

/*
* Read the inode allocation bitmap for a given block_group, reading
@@ -59,8 +106,19 @@ read_inode_bitmap(struct super_block * s
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
goto error_out;
-
- bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap));
+ if (!buffer_uptodate(bh)) {
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ ext4_init_inode_bitmap(sb, bh,block_group,desc);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ }
+ } else {
+ bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
+ }
if (!bh)
ext4_error(sb, "read_inode_bitmap",
"Cannot read inode bitmap - "
@@ -169,6 +227,8 @@ void ext4_free_inode (handle_t *handle,
if (is_directory)
gdp->bg_used_dirs_count = cpu_to_le16(
le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi,block_group,
+ gdp);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_inc(&sbi->s_freeinodes_counter);
if (is_directory)
@@ -438,7 +498,7 @@ struct inode *ext4_new_inode(handle_t *h
struct ext4_sb_info *sbi;
int err = 0;
struct inode *ret;
- int i;
+ int i, free = 0;

/* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink)
@@ -520,11 +580,13 @@ repeat_in_this_group:
goto out;

got:
- ino += group * EXT4_INODES_PER_GROUP(sb) + 1;
- if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
- ext4_error (sb, "ext4_new_inode",
- "reserved inode or inode > inodes count - "
- "block_group = %d, inode=%lu", group, ino);
+ ino++;
+ if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
+ ino > EXT4_INODES_PER_GROUP(sb)) {
+ ext4_error(sb, __FUNCTION__,
+ "reserved inode or inode > inodes count - "
+ "block_group = %d, inode=%lu", group,
+ ino + group * EXT4_INODES_PER_GROUP(sb));
err = -EIO;
goto fail;
}
@@ -532,13 +594,65 @@ got:
BUFFER_TRACE(bh2, "get_write_access");
err = ext4_journal_get_write_access(handle, bh2);
if (err) goto fail;
+
+ /* We may have to initialize the block bitmap if it isn't already */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
+ gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ struct buffer_head *block_bh = read_block_bitmap(sb, group);
+
+ BUFFER_TRACE(block_bh, "get block bitmap access");
+ err = ext4_journal_get_write_access(handle, block_bh);
+ if (err) {
+ brelse(block_bh);
+ goto fail;
+ }
+
+ free = 0;
+ spin_lock(sb_bgl_lock(sbi, group));
+ /* recheck and clear flag under lock if we still need to */
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+ free = ext4_free_blocks_after_init(sb, group, gdp);
+ gdp->bg_free_blocks_count = cpu_to_le16(free);
+ }
+ spin_unlock(sb_bgl_lock(sbi, group));
+
+ /* Don't need to dirty bitmap block if we didn't change it */
+ if (free) {
+ BUFFER_TRACE(block_bh, "dirty block bitmap");
+ err = ext4_journal_dirty_metadata(handle, block_bh);
+ }
+
+ brelse(block_bh);
+ if (err)
+ goto fail;
+ }
+
spin_lock(sb_bgl_lock(sbi, group));
+ /* If we didn't allocate from within the initialized part of the inode
+ * table then we need to initialize up to this inode. */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+ free = EXT4_INODES_PER_GROUP(sb);
+ } else {
+ free = EXT4_INODES_PER_GROUP(sb) -
+ le16_to_cpu(gdp->bg_itable_unused);
+ }
+
+ if (ino > free) {
+ gdp->bg_itable_unused =
+ cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
+ }
+ }
+
gdp->bg_free_inodes_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
if (S_ISDIR(mode)) {
gdp->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
}
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
spin_unlock(sb_bgl_lock(sbi, group));
BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
err = ext4_journal_dirty_metadata(handle, bh2);
@@ -560,7 +674,7 @@ got:
inode->i_gid = current->fsgid;
inode->i_mode = mode;

- inode->i_ino = ino;
+ inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
Index: linux-2.6.23-rc3/fs/ext4/resize.c
===================================================================
--- linux-2.6.23-rc3.orig/fs/ext4/resize.c 2007-08-12 21:25:24.000000000 -0700
+++ linux-2.6.23-rc3/fs/ext4/resize.c 2007-08-21 17:31:51.000000000 -0700
@@ -16,6 +16,7 @@
#include <linux/errno.h>
#include <linux/slab.h>

+#include "group.h"

#define outside(b, first, last) ((b) < (first) || (b) >= (last))
#define inside(b, first, last) ((b) >= (first) && (b) < (last))
@@ -842,6 +843,7 @@ int ext4_group_add(struct super_block *s
ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);

/*
* Make the new blocks and inodes valid next. We do this before
Index: linux-2.6.23-rc3/fs/ext4/super.c
===================================================================
--- linux-2.6.23-rc3.orig/fs/ext4/super.c 2007-08-21 17:31:50.000000000 -0700
+++ linux-2.6.23-rc3/fs/ext4/super.c 2007-08-22 21:32:27.000000000 -0700
@@ -43,6 +43,7 @@
#include "xattr.h"
#include "acl.h"
#include "namei.h"
+#include "group.h"

static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
@@ -1271,6 +1272,94 @@ static int ext4_setup_super(struct super
return res;
}

+#if !defined(CONFIG_CRC16)
+/** CRC table for the CRC-16. The poly is 0x8005 (x16 + x15 + x2 + 1) */
+__u16 const crc16_table[256] = {
+ 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
+ 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
+ 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
+ 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
+ 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
+ 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
+ 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
+ 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
+ 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
+ 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441,
+ 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41,
+ 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840,
+ 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41,
+ 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40,
+ 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640,
+ 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041,
+ 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240,
+ 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
+ 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41,
+ 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840,
+ 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41,
+ 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40,
+ 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640,
+ 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041,
+ 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241,
+ 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440,
+ 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
+ 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841,
+ 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40,
+ 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41,
+ 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
+ 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040
+};
+
+static inline __u16 crc16_byte(__u16 crc, const __u8 data)
+{
+ return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff];
+}
+
+__u16 crc16(__u16 crc, __u8 const *buffer, size_t len)
+{
+ while (len--)
+ crc = crc16_byte(crc, *buffer++);
+ return crc;
+}
+#endif
+
+__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
+ struct ext4_group_desc *gdp)
+{
+ __u16 crc = 0;
+
+ if (sbi->s_es->s_feature_ro_compat &
+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ int offset = offsetof(struct ext4_group_desc, bg_checksum);
+ __le32 le_group = cpu_to_le32(block_group);
+
+ crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
+ crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
+ crc = crc16(crc, (__u8 *)gdp, offset);
+ offset += sizeof(gdp->bg_checksum); /* skip checksum */
+ /* for checksum of struct ext4_group_desc do the rest...*/
+ if ((sbi->s_es->s_feature_incompat &
+ cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
+ offset < le16_to_cpu(sbi->s_es->s_desc_size)) {
+ crc = crc16(crc, (__u8 *)gdp + offset,
+ le16_to_cpu(sbi->s_es->s_desc_size)
+ - offset);
+ }
+ }
+
+ return cpu_to_le16(crc);
+}
+
+int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
+ struct ext4_group_desc *gdp)
+{
+ if ((sbi->s_es->s_feature_ro_compat &
+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
+ (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
+ return 0;
+
+ return 1;
+}
+
/* Called at mount-time, super-block is locked */
static int ext4_check_descriptors (struct super_block * sb)
{
@@ -1325,6 +1414,13 @@ static int ext4_check_descriptors (struc
i, inode_table);
return 0;
}
+ if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
+ ext4_error(sb, __FUNCTION__,
+ "Checksum for group %d failed (%u!=%u)\n", i,
+ le16_to_cpu(ext4_group_desc_csum(sbi,i,gdp)),
+ le16_to_cpu(gdp->bg_checksum));
+ return 0;
+ }
first_block += EXT4_BLOCKS_PER_GROUP(sb);
gdp = (struct ext4_group_desc *)
((__u8 *)gdp + EXT4_DESC_SIZE(sb));
Index: linux-2.6.23-rc3/include/linux/ext4_fs.h
===================================================================
--- linux-2.6.23-rc3.orig/include/linux/ext4_fs.h 2007-08-21 17:31:50.000000000 -0700
+++ linux-2.6.23-rc3/include/linux/ext4_fs.h 2007-08-21 17:31:51.000000000 -0700
@@ -150,19 +150,25 @@ struct ext4_allocation_request {
*/
struct ext4_group_desc
{
- __le32 bg_block_bitmap; /* Blocks bitmap block */
- __le32 bg_inode_bitmap; /* Inodes bitmap block */
+ __le32 bg_block_bitmap; /* Blocks bitmap block */
+ __le32 bg_inode_bitmap; /* Inodes bitmap block */
__le32 bg_inode_table; /* Inodes table block */
__le16 bg_free_blocks_count; /* Free blocks count */
__le16 bg_free_inodes_count; /* Free inodes count */
__le16 bg_used_dirs_count; /* Directories count */
__u16 bg_flags;
- __u32 bg_reserved[3];
+ __u32 bg_reserved[2];
+ __le16 bg_itable_unused; /* Unused inodes count */
+ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
__le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
__le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
__le32 bg_inode_table_hi; /* Inodes table block MSB */
};

+#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
+#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
+#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
+
#ifdef __KERNEL__
#include <linux/ext4_fs_i.h>
#include <linux/ext4_fs_sb.h>
@@ -734,6 +740,7 @@ static inline int ext4_valid_inum(struct
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
+#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040

@@ -755,6 +762,7 @@ static inline int ext4_valid_inum(struct
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)

/*


Attachments:
uninit (20.01 kB)

2007-08-31 00:45:38

by Avantika Mathur

[permalink] [raw]
Subject: Re: [PATCH] uninitialized block groups

Andreas Dilger wrote:
> On Aug 22, 2007 22:41 -0700, Avantika Mathur wrote:
>
>> Below is an updated version of the uninitialized block groups patch,
>> ported to the end of the patch queue. There are small changes to the
>> previous version:
>> - consistent format of the group_desc structure with the e2fsprogs patches
>> - do not verify group_desc checksums if the feature is not enabled
>>
>
> Can you please keep the original description intact with the patch, so
> it is useful for Andrew when it is submitted upstream:
Thank you for the comments Andreas. Here is the patch updated with your
comments, and ported to the latest ext4 patch queue after the
journal_checkusm patches.

Avantika

---




Attachments:
ext4_uninit_blockgroup.patch (20.97 kB)

2007-09-07 23:05:40

by Avantika Mathur

[permalink] [raw]
Subject: Re: [PATCH] uninitialized block groups

Signed-off-by: Andreas Dilger <[email protected]>

Index: linux-2.6.23-rc4/fs/ext4/mballoc.c
===================================================================
--- linux-2.6.23-rc4.orig/fs/ext4/mballoc.c 2007-09-05 13:57:53.000000000 -0700
+++ linux-2.6.23-rc4/fs/ext4/mballoc.c 2007-09-05 13:59:51.000000000 -0700
@@ -36,6 +36,8 @@
#include <linux/seq_file.h>
#include <linux/version.h>

+#include "group.h"
+
/*
* MUSTDO:
* - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -321,6 +323,7 @@
unsigned long bb_state;
unsigned long bb_tid;
struct ext4_free_metadata *bb_md_cur;
+ struct ext4_group_desc *bb_gdp;
unsigned short bb_first_free;
unsigned short bb_free;
unsigned short bb_fragments;
@@ -921,10 +924,7 @@
if (first_group + i >= EXT4_SB(sb)->s_groups_count)
break;

- err = -EIO;
- desc = ext4_get_group_desc(sb, first_group + i, NULL);
- if (desc == NULL)
- goto out;
+ desc = EXT4_GROUP_INFO(sb, first_group + i)->bb_gdp;

err = -ENOMEM;
bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap));
@@ -939,7 +939,12 @@
unlock_buffer(bh[i]);
continue;
}
-
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ ext4_init_block_bitmap(sb, bh[i], first_group + i,desc);
+ set_buffer_uptodate(bh[i]);
+ unlock_buffer(bh[i]);
+ continue;
+ }
get_bh(bh[i]);
bh[i]->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh[i]);
@@ -1722,6 +1727,10 @@
switch (cr) {
case 0:
BUG_ON(ac->ac_2order == 0);
+ /* If this group is uninitialized, skip it initially */
+ if (grp->bb_gdp->bg_flags &
+ cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
+ return 0;
bits = ac->ac_sb->s_blocksize_bits + 1;
for (i = ac->ac_2order; i <= bits; i++)
if (grp->bb_counters[i] > 0)
@@ -1825,7 +1834,9 @@
}

ac->ac_groups_scanned++;
- if (cr == 0)
+ if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags &
+ cpu_to_le16(EXT4_BG_BLOCK_UNINIT) &&
+ ac->ac_2order != 0))
ext4_mb_simple_scan_group(ac, &e3b);
else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe)
ext4_mb_scan_aligned(ac, &e3b);
@@ -2304,12 +2315,13 @@
i--;
goto err_freebuddy;
}
+ memset(meta_group_info[j], 0, len);
desc = ext4_get_group_desc(sb, i, NULL);
+ meta_group_info[j]->bb_gdp = desc;
if (desc == NULL) {
printk(KERN_ERR"EXT4-fs: can't read descriptor %u\n",i);
goto err_freebuddy;
}
- memset(meta_group_info[j], 0, len);
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
&meta_group_info[j]->bb_state);

@@ -2874,9 +2886,17 @@
mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);

spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+ gdp->bg_free_blocks_count =
+ cpu_to_le16(ext4_free_blocks_after_init(sb,
+ ac->ac_b_ex.fe_group,
+ gdp));
+ }
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
- ac->ac_b_ex.fe_len);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len);

@@ -4256,6 +4276,7 @@
spin_lock(sb_bgl_lock(sbi, block_group));
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_mod(&sbi->s_freeblocks_counter, count);



Attachments:
mballoc-uninit-fix (3.50 kB)