The following series of patches add support creating and checking
filesystems with the FLEX_BG feature. This feature currently groups
meta-data from a series of groups at the beginning of a flex group in
order to improve performance during heavy meta-data operations.
Changes from last time:
- When making a filesystem with FLEX_BG also enable META_BG feature.
- Allocate meta data within the META_BG group range.
- Descriptor checking ensures bitmaps and inode tables are in the META
group.
Problems and TODOs:
- Fsck has some failures using FLEX_BG and resize_inode features at the
same time. Still investigating.
- Need to define how unallocated inode tables will look like.
- Need to create test case
-JRS
From: Jose R. Santos <[email protected]>
Reserve the INCOMPAT feature number for FLEX_BG.
Signed-off-by: Jose R. Santos <[email protected]>
--
lib/ext2fs/ext2_fs.h | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index a316665..2394857 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -640,6 +640,7 @@ struct ext2_super_block {
#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040
#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
#define EXT4_FEATURE_INCOMPAT_MMP 0x0100
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
#define EXT2_FEATURE_COMPAT_SUPP 0
From: Jose R. Santos <[email protected]>
Allow FLEX_BG to be use as a feature option at mke2fs time.
Signed-off-by: Jose R. Santos <[email protected]>
--
lib/e2p/feature.c | 2 ++
lib/ext2fs/ext2fs.h | 6 ++++--
misc/mke2fs.c | 7 ++++++-
3 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
index fe7e65a..4bf5630 100644
--- a/lib/e2p/feature.c
+++ b/lib/e2p/feature.c
@@ -67,6 +67,8 @@ static struct feature feature_list[] = {
"extent" },
{ E2P_FEATURE_INCOMPAT, EXT4_FEATURE_INCOMPAT_64BIT,
"64bit" },
+ { E2P_FEATURE_INCOMPAT, EXT4_FEATURE_INCOMPAT_FLEX_BG,
+ "flex_bg"},
{ 0, 0, 0 },
};
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 83a9091..5c461c9 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -432,12 +432,14 @@ typedef struct ext2_icount *ext2_icount_t;
EXT2_FEATURE_INCOMPAT_COMPRESSION|\
EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|\
EXT2_FEATURE_INCOMPAT_META_BG|\
- EXT3_FEATURE_INCOMPAT_RECOVER)
+ EXT3_FEATURE_INCOMPAT_RECOVER|\
+ EXT4_FEATURE_INCOMPAT_FLEX_BG)
#else
#define EXT2_LIB_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE|\
EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|\
EXT2_FEATURE_INCOMPAT_META_BG|\
- EXT3_FEATURE_INCOMPAT_RECOVER)
+ EXT3_FEATURE_INCOMPAT_RECOVER|\
+ EXT4_FEATURE_INCOMPAT_FLEX_BG)
#endif
#define EXT2_LIB_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\
EXT2_FEATURE_RO_COMPAT_LARGE_FILE)
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 4a6cace..6dd8d30 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -873,7 +873,8 @@ static __u32 ok_features[3] = {
EXT2_FEATURE_COMPAT_LAZY_BG, /* Compat */
EXT2_FEATURE_INCOMPAT_FILETYPE| /* Incompat */
EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|
- EXT2_FEATURE_INCOMPAT_META_BG,
+ EXT2_FEATURE_INCOMPAT_META_BG|
+ EXT4_FEATURE_INCOMPAT_FLEX_BG,
EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER /* R/O compat */
};
@@ -1363,6 +1364,10 @@ static void PRS(int argc, char *argv[])
fs_param.s_feature_ro_compat = 0;
}
+ if (fs_param.s_feature_incompat &
+ EXT4_FEATURE_INCOMPAT_FLEX_BG)
+ fs_param.s_feature_incompat |= EXT2_FEATURE_INCOMPAT_META_BG;
+
/* Set first meta blockgroup via an environment variable */
/* (this is mostly for debugging purposes) */
if ((fs_param.s_feature_incompat & EXT2_FEATURE_INCOMPAT_META_BG) &&
From: Jose R. Santos <[email protected]>
Relax group descriptor checking.
In order for tools such as dump2efs, e2fsck and debugfs to open a ext4
filesystem with FLEX_BG feature enable, some descriptor checking needs
to be relaxed. This patch changes the group desciptor checking so
that bitmaps and inode tables can be located anywhere in the
partitions block range.
Signed-off-by: Jose R. Santos <[email protected]>
--
e2fsck/super.c | 14 ++++++++++++--
lib/ext2fs/check_desc.c | 15 +++++++++++++--
2 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/e2fsck/super.c b/e2fsck/super.c
index 00a131c..ed28732 100644
--- a/e2fsck/super.c
+++ b/e2fsck/super.c
@@ -463,6 +463,8 @@ void check_super_block(e2fsck_t ctx)
int inodes_per_block;
int ipg_max;
int inode_size;
+ dgrp_t start_group;
+ int meta_bg_size;
dgrp_t i;
blk_t should_be;
struct problem_context pctx;
@@ -578,8 +580,16 @@ void check_super_block(e2fsck_t ctx)
for (i = 0, gd=fs->group_desc; i < fs->group_desc_count; i++, gd++) {
pctx.group = i;
- first_block = ext2fs_group_first_block(fs, i);
- last_block = ext2fs_group_last_block(fs, i);
+ if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
+ EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+ meta_bg_size = (fs->blocksize / sizeof (struct ext2_group_desc));
+ start_group = (i / meta_bg_size) * meta_bg_size;
+ first_block = ext2fs_group_first_block(fs, start_group);
+ last_block = ext2fs_group_first_block(fs, start_group + meta_bg_size);
+ } else {
+ first_block = ext2fs_group_first_block(fs, i);
+ last_block = ext2fs_group_last_block(fs, i);
+ }
if ((gd->bg_block_bitmap < first_block) ||
(gd->bg_block_bitmap > last_block)) {
diff --git a/lib/ext2fs/check_desc.c b/lib/ext2fs/check_desc.c
index 146f9e5..dbbcfb3 100644
--- a/lib/ext2fs/check_desc.c
+++ b/lib/ext2fs/check_desc.c
@@ -34,12 +34,23 @@ errcode_t ext2fs_check_desc(ext2_filsys fs)
dgrp_t i;
blk_t first_block = fs->super->s_first_data_block;
blk_t last_block;
+ dgrp_t start_group;
+ int meta_bg_size;
EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS);
for (i = 0; i < fs->group_desc_count; i++) {
- first_block = ext2fs_group_first_block(fs, i);
- last_block = ext2fs_group_last_block(fs, i);
+ if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
+ EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+ meta_bg_size = (fs->blocksize / sizeof (struct ext2_group_desc));
+ start_group = (i / meta_bg_size) * meta_bg_size;
+ first_block = ext2fs_group_first_block(fs, start_group);
+ last_block = ext2fs_group_first_block(fs, start_group + meta_bg_size);
+ }
+ else {
+ first_block = ext2fs_group_first_block(fs, i);
+ last_block = ext2fs_group_last_block(fs, i);
+ }
/*
* Check to make sure block bitmap for group is
From: Jose R. Santos <[email protected]>
New bitmap and inode table allocation for FLEX_BG
Change the way we allocate bitmaps and inode tables if the FLEX_BG
feature is used at mke2fs time. The block and inode bitmaps are
allocated as a one contiguous set for each flex block group. Due to
the size of the inode tables, the inode table for each block group is
allocate individually but packed close together at the beginning of a
flex group. For now, this allow for the inode table to be packed
close to the inode bitmaps in cases where we try to allocate a large
group of inode tables right after the bitmaps and fail.
Signed-off-by: Jose R. Santos <[email protected]>
--
lib/ext2fs/alloc_tables.c | 132 ++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 128 insertions(+), 4 deletions(-)
diff --git a/lib/ext2fs/alloc_tables.c b/lib/ext2fs/alloc_tables.c
index 4ad2ba9..9740a2f 100644
--- a/lib/ext2fs/alloc_tables.c
+++ b/lib/ext2fs/alloc_tables.c
@@ -27,6 +27,124 @@
#include "ext2_fs.h"
#include "ext2fs.h"
+#define ALLOC_BLOCK_BITMAPS 1
+#define ALLOC_INODE_BITMAPS 2
+#define ALLOC_INODE_TABLES 3
+
+errcode_t ext2fs_allocate_contiguous(ext2_filsys fs, dgrp_t group,
+ int type, blk_t start_blk, blk_t last_blk,
+ int count, ext2fs_block_bitmap bmap)
+{
+ errcode_t retval;
+ blk_t new_blk, blk;
+ int i, j;
+
+ if (!bmap)
+ bmap = fs->block_map;
+
+ switch (type) {
+ case ALLOC_BLOCK_BITMAPS:
+ retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
+ 1 * count, bmap, &new_blk);
+ if (retval)
+ return retval;
+ for (i=0, blk=new_blk; i < count; i++, blk++) {
+ ext2fs_mark_block_bitmap(bmap, blk);
+ fs->group_desc[group+i].bg_block_bitmap = blk;
+ }
+ break;
+
+ case ALLOC_INODE_BITMAPS:
+ retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
+ 1 * count, bmap, &new_blk);
+ if (retval)
+ return retval;
+ for (i=0, blk=new_blk; i < count; i++, blk++) {
+ ext2fs_mark_block_bitmap(bmap, blk);
+ fs->group_desc[group+i].bg_inode_bitmap = blk;
+ }
+ break;
+
+ case ALLOC_INODE_TABLES:
+ for (i=0; i < count; i++) {
+ retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
+ fs->inode_blocks_per_group,
+ bmap, &new_blk);
+ if (retval)
+ return retval;
+ blk = new_blk;
+ for (j=0; j < fs->inode_blocks_per_group; j++, blk++)
+ ext2fs_mark_block_bitmap(bmap, blk);
+ fs->group_desc[group+i].bg_inode_table = new_blk;
+ }
+ break;
+
+ }
+ return 0;
+}
+
+
+
+errcode_t ext2fs_allocate_flex_groups(ext2_filsys fs)
+{
+ errcode_t retval;
+ blk_t start, last, j, blocks;
+ dgrp_t i, k;
+ int meta_bg_size;
+
+ meta_bg_size = (fs->blocksize / sizeof (struct ext2_group_desc));
+ blocks = 0;
+
+ for (i = 0; i < fs->group_desc_count; i=i+meta_bg_size) {
+
+ start = ext2fs_group_first_block(fs, i);
+
+ if (i+meta_bg_size >= fs->group_desc_count) {
+ last = ext2fs_group_last_block(fs, fs->group_desc_count);
+ meta_bg_size = fs->group_desc_count - i;
+ }
+ else
+ last = ext2fs_group_last_block(fs, i+meta_bg_size-1);
+
+ retval = ext2fs_allocate_contiguous(fs, i, ALLOC_BLOCK_BITMAPS,
+ start, last, meta_bg_size,
+ fs->block_map);
+ if (retval)
+ return retval;
+ retval = ext2fs_allocate_contiguous(fs, i, ALLOC_INODE_BITMAPS,
+ start, last, meta_bg_size,
+ fs->block_map);
+ if (retval)
+ return retval;
+ retval = ext2fs_allocate_contiguous(fs, i, ALLOC_INODE_TABLES,
+ start, last, meta_bg_size,
+ fs->block_map);
+ if (retval)
+ return retval;
+
+ /*
+ * The content of bg_free_blocks_count is previously
+ * assigned with out knowledge of the new allocation
+ * scheme. Need to update the number of free blocks
+ * per group descriptor or fsck will complain.
+ */
+
+ for (k=i; k<i+meta_bg_size; k++){
+ if (k > fs->group_desc_count)
+ break;
+ start = ext2fs_group_first_block(fs, k);
+ last = ext2fs_group_last_block(fs, k);
+ for (j=start; j<=last; j++) {
+ if( !ext2fs_fast_test_block_bitmap(fs->block_map, j))
+ blocks++;
+ }
+ fs->group_desc[k].bg_free_blocks_count = blocks;
+ blocks = 0;
+ }
+ }
+ return 0;
+}
+
errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
ext2fs_block_bitmap bmap)
{
@@ -107,10 +225,16 @@ errcode_t ext2fs_allocate_tables(ext2_filsys fs)
errcode_t retval;
dgrp_t i;
- for (i = 0; i < fs->group_desc_count; i++) {
- retval = ext2fs_allocate_group_table(fs, i, fs->block_map);
- if (retval)
- return retval;
+ if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
+ EXT4_FEATURE_INCOMPAT_FLEX_BG))
+ ext2fs_allocate_flex_groups(fs);
+
+ else {
+ for (i = 0; i < fs->group_desc_count; i++) {
+ retval = ext2fs_allocate_group_table(fs, i, fs->block_map);
+ if (retval)
+ return retval;
+ }
}
return 0;
}
On Mon, Aug 13, 2007 at 11:32:57PM -0500, Jose R. Santos wrote:
> From: Jose R. Santos <[email protected]>
>
> Allow FLEX_BG to be use as a feature option at mke2fs time.
>
> @@ -1363,6 +1364,10 @@ static void PRS(int argc, char *argv[])
> fs_param.s_feature_ro_compat = 0;
> }
>
> + if (fs_param.s_feature_incompat &
> + EXT4_FEATURE_INCOMPAT_FLEX_BG)
> + fs_param.s_feature_incompat |= EXT2_FEATURE_INCOMPAT_META_BG;
> +
> /* Set first meta blockgroup via an environment variable */
> /* (this is mostly for debugging purposes) */
> if ((fs_param.s_feature_incompat & EXT2_FEATURE_INCOMPAT_META_BG) &&
Why is this hunk here? Just for convenience, right? Technically
there's no reason why we can't relax the location of the inode tables
and bitmap blocks without going to meta_bg. My inclination is to keep
the options as orthogonal as possible, and use mke2fs.conf to allow
the user to conveniently set up filesystems with the appropriate
configuration parameters.
- Ted
On Mon, Aug 13, 2007 at 11:33:03PM -0500, Jose R. Santos wrote:
> From: Jose R. Santos <[email protected]>
>
> Relax group descriptor checking.
This patch should really be before patch #2 in the series (add the
ability to handle the new feature before adding the ability to add in
mke2fs). (Actually, I would have split up #2 into one patch which
added the libe2p handling for the feature, then added the change to
e2fsck, and then added the mke2fs changes, but that's just me
quibbling.)
> @@ -578,8 +580,16 @@ void check_super_block(e2fsck_t ctx)
> for (i = 0, gd=fs->group_desc; i < fs->group_desc_count; i++, gd++) {
> pctx.group = i;
>
> - first_block = ext2fs_group_first_block(fs, i);
> - last_block = ext2fs_group_last_block(fs, i);
> + if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
> + EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
> + meta_bg_size = (fs->blocksize / sizeof (struct ext2_group_desc));
> + start_group = (i / meta_bg_size) * meta_bg_size;
> + first_block = ext2fs_group_first_block(fs, start_group);
> + last_block = ext2fs_group_first_block(fs, start_group + meta_bg_size);
This patch requires that the metadata be in located in the metablock
group descriptor, instead of anywhere in the filesystem, which is what
we ultimately ended up checking into the kernel. Being more flexible
is good (even if that's not the layout we use by default). I'll fix
this up using git rebase --interactive and republish the patch in the
next branch.
- Ted
On Mon, Aug 13, 2007 at 11:33:14PM -0500, Jose R. Santos wrote:
> + if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
> + EXT4_FEATURE_INCOMPAT_FLEX_BG))
> + ext2fs_allocate_flex_groups(fs);
> +
> + else {
> + for (i = 0; i < fs->group_desc_count; i++) {
> + retval = ext2fs_allocate_group_table(fs, i, fs->block_map);
> + if (retval)
> + return retval;
> + }
The code to handle flex groups needs to be moved into
ext2fs_allocate_group_table(), and not put in
ext2fs_allocate_tables(), since resize2fs calls
ext2fs_allocate_group_table(), and we want resize2fs to do the right
thing for filesystems that have both the FLEX_BG and META_BG flags
set. Speaking of which, we need to fix the kernel on-line resizing
code to allocate new blocks for filesystem metadata blocks using the
new placement algorithm for FLEX_BG && META_BG filesystems.
I'll fix this up for the 'pu' branch of e2fsprogs, as well as making
ext2fs_allocate_flex_groups static, but I'm not going to graduate this
to the 'next' branch just yet, since it needs a bit more testing,
specifically with resize2fs.
- Ted
On Sat, 3 Nov 2007 19:36:09 -0400
Theodore Tso <[email protected]> wrote:
> On Mon, Aug 13, 2007 at 11:33:03PM -0500, Jose R. Santos wrote:
> > From: Jose R. Santos <[email protected]>
> >
> > Relax group descriptor checking.
>
> This patch should really be before patch #2 in the series (add the
> ability to handle the new feature before adding the ability to add in
> mke2fs). (Actually, I would have split up #2 into one patch which
> added the libe2p handling for the feature, then added the change to
> e2fsck, and then added the mke2fs changes, but that's just me
> quibbling.)
Sound reasonable. I'll take care of patch ordering better next time.
> > @@ -578,8 +580,16 @@ void check_super_block(e2fsck_t ctx)
> > for (i = 0, gd=fs->group_desc; i < fs->group_desc_count; i++, gd++) {
> > pctx.group = i;
> >
> > - first_block = ext2fs_group_first_block(fs, i);
> > - last_block = ext2fs_group_last_block(fs, i);
> > + if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
> > + EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
> > + meta_bg_size = (fs->blocksize / sizeof (struct ext2_group_desc));
> > + start_group = (i / meta_bg_size) * meta_bg_size;
> > + first_block = ext2fs_group_first_block(fs, start_group);
> > + last_block = ext2fs_group_first_block(fs, start_group + meta_bg_size);
>
> This patch requires that the metadata be in located in the metablock
> group descriptor, instead of anywhere in the filesystem, which is what
> we ultimately ended up checking into the kernel. Being more flexible
> is good (even if that's not the layout we use by default). I'll fix
> this up using git rebase --interactive and republish the patch in the
> next branch.
Yes, the patch is slightly off and I did send an email pointing to
exactly this, but you had already committed the patch by the time the
email was sent. This patch was submitted to the mailing list before
the final kernel changes made it into the queue.
> - Ted
-JRS
On Sat, 3 Nov 2007 20:52:08 -0400
Theodore Tso <[email protected]> wrote:
> On Mon, Aug 13, 2007 at 11:33:14PM -0500, Jose R. Santos wrote:
> > + if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
> > + EXT4_FEATURE_INCOMPAT_FLEX_BG))
> > + ext2fs_allocate_flex_groups(fs);
> > +
> > + else {
> > + for (i = 0; i < fs->group_desc_count; i++) {
> > + retval = ext2fs_allocate_group_table(fs, i, fs->block_map);
> > + if (retval)
> > + return retval;
> > + }
>
> The code to handle flex groups needs to be moved into
> ext2fs_allocate_group_table(), and not put in
> ext2fs_allocate_tables(), since resize2fs calls
> ext2fs_allocate_group_table(), and we want resize2fs to do the right
> thing for filesystems that have both the FLEX_BG and META_BG flags
> set. Speaking of which, we need to fix the kernel on-line resizing
> code to allocate new blocks for filesystem metadata blocks using the
> new placement algorithm for FLEX_BG && META_BG filesystems.
>
> I'll fix this up for the 'pu' branch of e2fsprogs, as well as making
> ext2fs_allocate_flex_groups static, but I'm not going to graduate this
> to the 'next' branch just yet, since it needs a bit more testing,
> specifically with resize2fs.
Yes, this code is not ready for 'next' branch. This was mostly
intended as RFC and testing. Resizing is not handle at all in this
code and fsck needs more testing.
> - Ted
-JRS
On Mon, Nov 05, 2007 at 08:53:52AM -0600, Jose R. Santos wrote:
> Yes, the patch is slightly off and I did send an email pointing to
> exactly this, but you had already committed the patch by the time the
> email was sent. This patch was submitted to the mailing list before
> the final kernel changes made it into the queue.
Feel free to resend patches series that are only in the pu series.
(Don't bother if it's just to fix merge changes, since I can do that
fairly easily; if I need help I'll people know.) But if you want to
fix substantive issues, send me a new set of patches and I'll replace
them; the whole point of the "pu" branch is that it will rewind as
patches get refined and "cooked".
- Ted