Enable the use of 64-bit bitmaps, so e2freefrag will work on file
systems with the 64-bit feature enabled. In addition, enable the
rbtree-based bitmaps, which significantly saves the amount of memory
required (from 97 megs to 1.7 megs for an empty 3T file system) at the
cost of additional CPU overhead (but we will claw back some of the
additional CPU overhead in the next commit).
Addresses-Google-Bug: 7269948
Signed-off-by: "Theodore Ts'o" <[email protected]>
---
misc/e2freefrag.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/misc/e2freefrag.c b/misc/e2freefrag.c
index 30af43e..58f1ff5 100644
--- a/misc/e2freefrag.c
+++ b/misc/e2freefrag.c
@@ -249,13 +249,14 @@ static void collect_info(ext2_filsys fs, struct chunk_info *chunk_info, FILE *f)
static void open_device(char *device_name, ext2_filsys *fs)
{
int retval;
- int flag = EXT2_FLAG_FORCE;
+ int flag = EXT2_FLAG_FORCE | EXT2_FLAG_64BITS;
retval = ext2fs_open(device_name, flag, 0, 0, unix_io_manager, fs);
if (retval) {
com_err(device_name, retval, "while opening filesystem");
exit(1);
}
+ (*fs)->default_bitmap_type = EXT2FS_BMAP64_RBTREE;
}
#endif
--
1.7.12.rc0.22.gcdd159b
Optimize testing for a bit in an rbtree-based bitmap for the case
where the calling application is scanning through the bitmap
sequentially. Previously, we did this for a set of bits which were
inside an allocated extent, but we did not optimize the case where
there was a large number of bits after an allocated extents which were
not in use.
1111111111111110000000000000000000
^ optimized ^not optimized
In my tests of a roughly half-filled file system, the run time of
e2freefrag was halved, and the cpu time spent in userspace was during
e2fsck's pass 5 was reduced by a factor of 30%.
Signed-off-by: "Theodore Ts'o" <[email protected]>
---
lib/ext2fs/blkmap64_rb.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/lib/ext2fs/blkmap64_rb.c b/lib/ext2fs/blkmap64_rb.c
index a83f8ac..c9006f8 100644
--- a/lib/ext2fs/blkmap64_rb.c
+++ b/lib/ext2fs/blkmap64_rb.c
@@ -314,8 +314,8 @@ static errcode_t rb_resize_bmap(ext2fs_generic_bitmap bmap,
inline static int
rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
{
- struct bmap_rb_extent *rcursor;
- struct rb_node *parent = NULL;
+ struct bmap_rb_extent *rcursor, *next_ext;
+ struct rb_node *parent = NULL, *next;
struct rb_node **n = &bp->root.rb_node;
struct bmap_rb_extent *ext;
@@ -330,6 +330,18 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
return 1;
}
+ next = ext2fs_rb_next(&rcursor->node);
+ if (next) {
+ next_ext = ext2fs_rb_entry(next, struct bmap_rb_extent, node);
+ if ((bit >= rcursor->start + rcursor->count) &&
+ (bit < next_ext->start)) {
+#ifdef BMAP_STATS_OPS
+ bp->test_hit++;
+#endif
+ return 0;
+ }
+ }
+
rcursor = *bp->wcursor;
if (!rcursor)
goto search_tree;
--
1.7.12.rc0.22.gcdd159b
The code was previously allocating a single 4 or 8 byte pointer for
the rcursor and wcursor fields in the ext2fs_rb_private structure;
this added two extra memory allocations (which could fail), and extra
indirections, for no good reason. Removing the extra indirection also
makes the code more readable, so it's all upside and no downside.
Signed-off-by: "Theodore Ts'o" <[email protected]>
---
lib/ext2fs/blkmap64_rb.c | 46 +++++++++++++++++++---------------------------
1 file changed, 19 insertions(+), 27 deletions(-)
diff --git a/lib/ext2fs/blkmap64_rb.c b/lib/ext2fs/blkmap64_rb.c
index c9006f8..900c0d3 100644
--- a/lib/ext2fs/blkmap64_rb.c
+++ b/lib/ext2fs/blkmap64_rb.c
@@ -38,8 +38,8 @@ struct bmap_rb_extent {
struct ext2fs_rb_private {
struct rb_root root;
- struct bmap_rb_extent **wcursor;
- struct bmap_rb_extent **rcursor;
+ struct bmap_rb_extent *wcursor;
+ struct bmap_rb_extent *rcursor;
#ifdef BMAP_STATS_OPS
__u64 mark_hit;
__u64 test_hit;
@@ -148,10 +148,10 @@ inline
static void rb_free_extent(struct ext2fs_rb_private *bp,
struct bmap_rb_extent *ext)
{
- if (*bp->wcursor == ext)
- *bp->wcursor = NULL;
- if (*bp->rcursor == ext)
- *bp->rcursor = NULL;
+ if (bp->wcursor == ext)
+ bp->wcursor = NULL;
+ if (bp->rcursor == ext)
+ bp->rcursor = NULL;
ext2fs_free_mem(&ext);
}
@@ -165,14 +165,8 @@ static errcode_t rb_alloc_private_data (ext2fs_generic_bitmap bitmap)
return retval;
bp->root = RB_ROOT;
- retval = ext2fs_get_mem(sizeof(struct bmap_rb_extent *), &bp->rcursor);
- if (retval)
- return retval;
- retval = ext2fs_get_mem(sizeof(struct bmap_rb_extent *), &bp->wcursor);
- if (retval)
- return retval;
- *bp->rcursor = NULL;
- *bp->wcursor = NULL;
+ bp->rcursor = NULL;
+ bp->wcursor = NULL;
#ifdef BMAP_STATS_OPS
bp->test_hit = 0;
@@ -215,8 +209,6 @@ static void rb_free_bmap(ext2fs_generic_bitmap bitmap)
bp = (struct ext2fs_rb_private *) bitmap->private;
rb_free_tree(&bp->root);
- ext2fs_free_mem(&bp->rcursor);
- ext2fs_free_mem(&bp->wcursor);
ext2fs_free_mem(&bp);
bp = 0;
}
@@ -235,8 +227,8 @@ static errcode_t rb_copy_bmap(ext2fs_generic_bitmap src,
src_bp = (struct ext2fs_rb_private *) src->private;
dest_bp = (struct ext2fs_rb_private *) dest->private;
- *src_bp->rcursor = NULL;
- *dest_bp->rcursor = NULL;
+ src_bp->rcursor = NULL;
+ dest_bp->rcursor = NULL;
src_node = ext2fs_rb_first(&src_bp->root);
while (src_node) {
@@ -299,8 +291,8 @@ static errcode_t rb_resize_bmap(ext2fs_generic_bitmap bmap,
}
bp = (struct ext2fs_rb_private *) bmap->private;
- *bp->rcursor = NULL;
- *bp->wcursor = NULL;
+ bp->rcursor = NULL;
+ bp->wcursor = NULL;
/* truncate tree to new_real_end size */
rb_truncate(new_real_end, &bp->root);
@@ -319,7 +311,7 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
struct rb_node **n = &bp->root.rb_node;
struct bmap_rb_extent *ext;
- rcursor = *bp->rcursor;
+ rcursor = bp->rcursor;
if (!rcursor)
goto search_tree;
@@ -342,7 +334,7 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
}
}
- rcursor = *bp->wcursor;
+ rcursor = bp->wcursor;
if (!rcursor)
goto search_tree;
@@ -359,7 +351,7 @@ search_tree:
else if (bit >= (ext->start + ext->count))
n = &(*n)->rb_right;
else {
- *bp->rcursor = ext;
+ bp->rcursor = ext;
return 1;
}
}
@@ -376,7 +368,7 @@ static int rb_insert_extent(__u64 start, __u64 count,
struct bmap_rb_extent *ext;
int retval = 0;
- ext = *bp->wcursor;
+ ext = bp->wcursor;
if (ext) {
if (start >= ext->start &&
start <= (ext->start + ext->count)) {
@@ -419,7 +411,7 @@ got_extent:
new_node = &new_ext->node;
ext2fs_rb_link_node(new_node, parent, n);
ext2fs_rb_insert_color(new_node, root);
- *bp->wcursor = new_ext;
+ bp->wcursor = new_ext;
node = ext2fs_rb_prev(new_node);
if (node) {
@@ -745,8 +737,8 @@ static void rb_clear_bmap(ext2fs_generic_bitmap bitmap)
bp = (struct ext2fs_rb_private *) bitmap->private;
rb_free_tree(&bp->root);
- *bp->rcursor = NULL;
- *bp->wcursor = NULL;
+ bp->rcursor = NULL;
+ bp->wcursor = NULL;
}
#ifdef BMAP_STATS
--
1.7.12.rc0.22.gcdd159b
Profiling shows that rb_test_bit() is now calling ext2fs_rb_next() a
lot, and this function is now the hot spot when running e2freefrag.
If we cache the results of ext2fs_rb_next(), we can eliminate those
extra calls, which further speeds up both e2freefrag and e2fsck by
reducing the amount of CPU time spent in userspace.
Signed-off-by: "Theodore Ts'o" <[email protected]>
---
lib/ext2fs/blkmap64_rb.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/lib/ext2fs/blkmap64_rb.c b/lib/ext2fs/blkmap64_rb.c
index 900c0d3..40d982f 100644
--- a/lib/ext2fs/blkmap64_rb.c
+++ b/lib/ext2fs/blkmap64_rb.c
@@ -40,6 +40,7 @@ struct ext2fs_rb_private {
struct rb_root root;
struct bmap_rb_extent *wcursor;
struct bmap_rb_extent *rcursor;
+ struct bmap_rb_extent *rcursor_next;
#ifdef BMAP_STATS_OPS
__u64 mark_hit;
__u64 test_hit;
@@ -152,6 +153,8 @@ static void rb_free_extent(struct ext2fs_rb_private *bp,
bp->wcursor = NULL;
if (bp->rcursor == ext)
bp->rcursor = NULL;
+ if (bp->rcursor_next == ext)
+ bp->rcursor_next = NULL;
ext2fs_free_mem(&ext);
}
@@ -166,6 +169,7 @@ static errcode_t rb_alloc_private_data (ext2fs_generic_bitmap bitmap)
bp->root = RB_ROOT;
bp->rcursor = NULL;
+ bp->rcursor_next = NULL;
bp->wcursor = NULL;
#ifdef BMAP_STATS_OPS
@@ -306,7 +310,7 @@ static errcode_t rb_resize_bmap(ext2fs_generic_bitmap bmap,
inline static int
rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
{
- struct bmap_rb_extent *rcursor, *next_ext;
+ struct bmap_rb_extent *rcursor, *next_ext = NULL;
struct rb_node *parent = NULL, *next;
struct rb_node **n = &bp->root.rb_node;
struct bmap_rb_extent *ext;
@@ -322,9 +326,15 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
return 1;
}
- next = ext2fs_rb_next(&rcursor->node);
- if (next) {
- next_ext = ext2fs_rb_entry(next, struct bmap_rb_extent, node);
+ next_ext = bp->rcursor_next;
+ if (!next_ext) {
+ next = ext2fs_rb_next(&rcursor->node);
+ if (next)
+ next_ext = ext2fs_rb_entry(next, struct bmap_rb_extent,
+ node);
+ bp->rcursor_next = next_ext;
+ }
+ if (next_ext) {
if ((bit >= rcursor->start + rcursor->count) &&
(bit < next_ext->start)) {
#ifdef BMAP_STATS_OPS
@@ -333,6 +343,8 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
return 0;
}
}
+ bp->rcursor = NULL;
+ bp->rcursor_next = NULL;
rcursor = bp->wcursor;
if (!rcursor)
@@ -352,6 +364,7 @@ search_tree:
n = &(*n)->rb_right;
else {
bp->rcursor = ext;
+ bp->rcursor_next = 0;
return 1;
}
}
@@ -368,6 +381,7 @@ static int rb_insert_extent(__u64 start, __u64 count,
struct bmap_rb_extent *ext;
int retval = 0;
+ bp->rcursor_next = NULL;
ext = bp->wcursor;
if (ext) {
if (start >= ext->start &&
@@ -738,6 +752,7 @@ static void rb_clear_bmap(ext2fs_generic_bitmap bitmap)
rb_free_tree(&bp->root);
bp->rcursor = NULL;
+ bp->rcursor_next = NULL;
bp->wcursor = NULL;
}
--
1.7.12.rc0.22.gcdd159b
Also fix a bug caused by a stray continuation backslash which caused
the e2fsck/Makefile to fail when profiling is enabled.
Signed-off-by: "Theodore Ts'o" <[email protected]>
---
e2fsck/Makefile.in | 2 +-
misc/Makefile.in | 8 +++++++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/e2fsck/Makefile.in b/e2fsck/Makefile.in
index a52bbe1..87b71bd 100644
--- a/e2fsck/Makefile.in
+++ b/e2fsck/Makefile.in
@@ -28,7 +28,7 @@ STATIC_DEPLIBS= $(DEPSTATIC_LIBQUOTA) $(STATIC_LIBEXT2FS) \
PROFILED_LIBS= $(PROFILED_LIBQUOTA) $(PROFILED_LIBEXT2FS) \
$(PROFILED_LIBCOM_ERR) $(PROFILED_LIBBLKID) $(PROFILED_LIBUUID) \
- $(PROFILED_LIBE2P) $(LIBINTL) \
+ $(PROFILED_LIBE2P) $(LIBINTL)
PROFILED_DEPLIBS= $(DEPPROFILED_LIBQUOTA) $(PROFILED_LIBEXT2FS) \
$(DEPPROFILED_LIBCOM_ERR) $(DEPPROFILED_LIBBLKID) \
$(DEPPROFILED_LIBUUID) $(DEPPROFILED_LIBE2P)
diff --git a/misc/Makefile.in b/misc/Makefile.in
index 0692126..575a6d5 100644
--- a/misc/Makefile.in
+++ b/misc/Makefile.in
@@ -72,6 +72,7 @@ PROFILED_FSCK_OBJS= profiled/fsck.o profiled/base_device.o \
profiled/ismounted.o
PROFILED_BLKID_OBJS= profiled/blkid.o
PROFILED_FILEFRAG_OBJS= profiled/filefrag.o
+PROFILED_E2FREEFRAG_OBJS= profiled/e2freefrag.o
PROFILED_E2UNDO_OBJS= profiled/e2undo.o
PROFILED_E4DEFRAG_OBJS= profiled/e4defrag.o
@@ -107,7 +108,7 @@ all:: profiled $(SPROGS) $(UPROGS) $(USPROGS) $(SMANPAGES) $(UMANPAGES) \
@PROFILE_CMT@all:: tune2fs.profiled blkid.profiled e2image.profiled \
e2undo.profiled mke2fs.profiled dumpe2fs.profiled fsck.profiled \
logsave.profiled filefrag.profiled uuidgen.profiled uuidd.profiled \
- e2image.profiled e4defrag.profiled
+ e2image.profiled e4defrag.profiled e2freefrag.profiled
profiled:
@PROFILE_CMT@ $(E) " MKDIR $@"
@@ -319,6 +320,11 @@ e2freefrag: $(E2FREEFRAG_OBJS)
$(E) " LD $@"
$(Q) $(CC) $(ALL_LDFLAGS) -o e2freefrag $(E2FREEFRAG_OBJS) $(LIBS)
+e2freefrag.profiled: $(PROFILED_E2FREEFRAG_OBJS) $(PROFILED_DEPLIBS)
+ $(E) " LD $@"
+ $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o e2freefrag.profiled \
+ $(PROFILED_E2FREEFRAG_OBJS) $(PROFILED_LIBS)
+
filefrag: $(FILEFRAG_OBJS)
$(E) " LD $@"
$(Q) $(CC) $(ALL_LDFLAGS) -o filefrag $(FILEFRAG_OBJS)
--
1.7.12.rc0.22.gcdd159b
On 10/5/12 9:04 PM, Theodore Ts'o wrote:
> The code was previously allocating a single 4 or 8 byte pointer for
> the rcursor and wcursor fields in the ext2fs_rb_private structure;
> this added two extra memory allocations (which could fail), and extra
> indirections, for no good reason. Removing the extra indirection also
> makes the code more readable, so it's all upside and no downside.
>
> Signed-off-by: "Theodore Ts'o" <[email protected]>
Makes sense to me!
Reviewed-by: Eric Sandeen <[email protected]>
> ---
> lib/ext2fs/blkmap64_rb.c | 46 +++++++++++++++++++---------------------------
> 1 file changed, 19 insertions(+), 27 deletions(-)
>
> diff --git a/lib/ext2fs/blkmap64_rb.c b/lib/ext2fs/blkmap64_rb.c
> index c9006f8..900c0d3 100644
> --- a/lib/ext2fs/blkmap64_rb.c
> +++ b/lib/ext2fs/blkmap64_rb.c
> @@ -38,8 +38,8 @@ struct bmap_rb_extent {
>
> struct ext2fs_rb_private {
> struct rb_root root;
> - struct bmap_rb_extent **wcursor;
> - struct bmap_rb_extent **rcursor;
> + struct bmap_rb_extent *wcursor;
> + struct bmap_rb_extent *rcursor;
> #ifdef BMAP_STATS_OPS
> __u64 mark_hit;
> __u64 test_hit;
> @@ -148,10 +148,10 @@ inline
> static void rb_free_extent(struct ext2fs_rb_private *bp,
> struct bmap_rb_extent *ext)
> {
> - if (*bp->wcursor == ext)
> - *bp->wcursor = NULL;
> - if (*bp->rcursor == ext)
> - *bp->rcursor = NULL;
> + if (bp->wcursor == ext)
> + bp->wcursor = NULL;
> + if (bp->rcursor == ext)
> + bp->rcursor = NULL;
> ext2fs_free_mem(&ext);
> }
>
> @@ -165,14 +165,8 @@ static errcode_t rb_alloc_private_data (ext2fs_generic_bitmap bitmap)
> return retval;
>
> bp->root = RB_ROOT;
> - retval = ext2fs_get_mem(sizeof(struct bmap_rb_extent *), &bp->rcursor);
> - if (retval)
> - return retval;
> - retval = ext2fs_get_mem(sizeof(struct bmap_rb_extent *), &bp->wcursor);
> - if (retval)
> - return retval;
> - *bp->rcursor = NULL;
> - *bp->wcursor = NULL;
> + bp->rcursor = NULL;
> + bp->wcursor = NULL;
>
> #ifdef BMAP_STATS_OPS
> bp->test_hit = 0;
> @@ -215,8 +209,6 @@ static void rb_free_bmap(ext2fs_generic_bitmap bitmap)
> bp = (struct ext2fs_rb_private *) bitmap->private;
>
> rb_free_tree(&bp->root);
> - ext2fs_free_mem(&bp->rcursor);
> - ext2fs_free_mem(&bp->wcursor);
> ext2fs_free_mem(&bp);
> bp = 0;
> }
> @@ -235,8 +227,8 @@ static errcode_t rb_copy_bmap(ext2fs_generic_bitmap src,
>
> src_bp = (struct ext2fs_rb_private *) src->private;
> dest_bp = (struct ext2fs_rb_private *) dest->private;
> - *src_bp->rcursor = NULL;
> - *dest_bp->rcursor = NULL;
> + src_bp->rcursor = NULL;
> + dest_bp->rcursor = NULL;
>
> src_node = ext2fs_rb_first(&src_bp->root);
> while (src_node) {
> @@ -299,8 +291,8 @@ static errcode_t rb_resize_bmap(ext2fs_generic_bitmap bmap,
> }
>
> bp = (struct ext2fs_rb_private *) bmap->private;
> - *bp->rcursor = NULL;
> - *bp->wcursor = NULL;
> + bp->rcursor = NULL;
> + bp->wcursor = NULL;
>
> /* truncate tree to new_real_end size */
> rb_truncate(new_real_end, &bp->root);
> @@ -319,7 +311,7 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
> struct rb_node **n = &bp->root.rb_node;
> struct bmap_rb_extent *ext;
>
> - rcursor = *bp->rcursor;
> + rcursor = bp->rcursor;
> if (!rcursor)
> goto search_tree;
>
> @@ -342,7 +334,7 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
> }
> }
>
> - rcursor = *bp->wcursor;
> + rcursor = bp->wcursor;
> if (!rcursor)
> goto search_tree;
>
> @@ -359,7 +351,7 @@ search_tree:
> else if (bit >= (ext->start + ext->count))
> n = &(*n)->rb_right;
> else {
> - *bp->rcursor = ext;
> + bp->rcursor = ext;
> return 1;
> }
> }
> @@ -376,7 +368,7 @@ static int rb_insert_extent(__u64 start, __u64 count,
> struct bmap_rb_extent *ext;
> int retval = 0;
>
> - ext = *bp->wcursor;
> + ext = bp->wcursor;
> if (ext) {
> if (start >= ext->start &&
> start <= (ext->start + ext->count)) {
> @@ -419,7 +411,7 @@ got_extent:
> new_node = &new_ext->node;
> ext2fs_rb_link_node(new_node, parent, n);
> ext2fs_rb_insert_color(new_node, root);
> - *bp->wcursor = new_ext;
> + bp->wcursor = new_ext;
>
> node = ext2fs_rb_prev(new_node);
> if (node) {
> @@ -745,8 +737,8 @@ static void rb_clear_bmap(ext2fs_generic_bitmap bitmap)
> bp = (struct ext2fs_rb_private *) bitmap->private;
>
> rb_free_tree(&bp->root);
> - *bp->rcursor = NULL;
> - *bp->wcursor = NULL;
> + bp->rcursor = NULL;
> + bp->wcursor = NULL;
> }
>
> #ifdef BMAP_STATS
>
On 10/5/12 9:04 PM, Theodore Ts'o wrote:
> Also fix a bug caused by a stray continuation backslash which caused
> the e2fsck/Makefile to fail when profiling is enabled.
>
> Signed-off-by: "Theodore Ts'o" <[email protected]>
Reviewed-by: Eric Sandeen <[email protected]>
> ---
> e2fsck/Makefile.in | 2 +-
> misc/Makefile.in | 8 +++++++-
> 2 files changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/e2fsck/Makefile.in b/e2fsck/Makefile.in
> index a52bbe1..87b71bd 100644
> --- a/e2fsck/Makefile.in
> +++ b/e2fsck/Makefile.in
> @@ -28,7 +28,7 @@ STATIC_DEPLIBS= $(DEPSTATIC_LIBQUOTA) $(STATIC_LIBEXT2FS) \
>
> PROFILED_LIBS= $(PROFILED_LIBQUOTA) $(PROFILED_LIBEXT2FS) \
> $(PROFILED_LIBCOM_ERR) $(PROFILED_LIBBLKID) $(PROFILED_LIBUUID) \
> - $(PROFILED_LIBE2P) $(LIBINTL) \
> + $(PROFILED_LIBE2P) $(LIBINTL)
> PROFILED_DEPLIBS= $(DEPPROFILED_LIBQUOTA) $(PROFILED_LIBEXT2FS) \
> $(DEPPROFILED_LIBCOM_ERR) $(DEPPROFILED_LIBBLKID) \
> $(DEPPROFILED_LIBUUID) $(DEPPROFILED_LIBE2P)
> diff --git a/misc/Makefile.in b/misc/Makefile.in
> index 0692126..575a6d5 100644
> --- a/misc/Makefile.in
> +++ b/misc/Makefile.in
> @@ -72,6 +72,7 @@ PROFILED_FSCK_OBJS= profiled/fsck.o profiled/base_device.o \
> profiled/ismounted.o
> PROFILED_BLKID_OBJS= profiled/blkid.o
> PROFILED_FILEFRAG_OBJS= profiled/filefrag.o
> +PROFILED_E2FREEFRAG_OBJS= profiled/e2freefrag.o
> PROFILED_E2UNDO_OBJS= profiled/e2undo.o
> PROFILED_E4DEFRAG_OBJS= profiled/e4defrag.o
>
> @@ -107,7 +108,7 @@ all:: profiled $(SPROGS) $(UPROGS) $(USPROGS) $(SMANPAGES) $(UMANPAGES) \
> @PROFILE_CMT@all:: tune2fs.profiled blkid.profiled e2image.profiled \
> e2undo.profiled mke2fs.profiled dumpe2fs.profiled fsck.profiled \
> logsave.profiled filefrag.profiled uuidgen.profiled uuidd.profiled \
> - e2image.profiled e4defrag.profiled
> + e2image.profiled e4defrag.profiled e2freefrag.profiled
>
> profiled:
> @PROFILE_CMT@ $(E) " MKDIR $@"
> @@ -319,6 +320,11 @@ e2freefrag: $(E2FREEFRAG_OBJS)
> $(E) " LD $@"
> $(Q) $(CC) $(ALL_LDFLAGS) -o e2freefrag $(E2FREEFRAG_OBJS) $(LIBS)
>
> +e2freefrag.profiled: $(PROFILED_E2FREEFRAG_OBJS) $(PROFILED_DEPLIBS)
> + $(E) " LD $@"
> + $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o e2freefrag.profiled \
> + $(PROFILED_E2FREEFRAG_OBJS) $(PROFILED_LIBS)
> +
> filefrag: $(FILEFRAG_OBJS)
> $(E) " LD $@"
> $(Q) $(CC) $(ALL_LDFLAGS) -o filefrag $(FILEFRAG_OBJS)
>
On Thu, 4 Oct 2012, Theodore Ts'o wrote:
> Date: Thu, 4 Oct 2012 23:44:55 -0400
> From: Theodore Ts'o <[email protected]>
> To: Ext4 Developers List <[email protected]>
> Cc: Theodore Ts'o <[email protected]>
> Subject: [PATCH 2/2] libext2fs: optimize rb_test_bit
>
> Optimize testing for a bit in an rbtree-based bitmap for the case
> where the calling application is scanning through the bitmap
> sequentially. Previously, we did this for a set of bits which were
> inside an allocated extent, but we did not optimize the case where
> there was a large number of bits after an allocated extents which were
> not in use.
>
> 1111111111111110000000000000000000
> ^ optimized ^not optimized
>
> In my tests of a roughly half-filled file system, the run time of
> e2freefrag was halved, and the cpu time spent in userspace was during
> e2fsck's pass 5 was reduced by a factor of 30%.
Hi Ted,
the patch and the idea behind it look fine, especially when we're
walking the bitmap sequentially not modifying it simultaneously, but
I have one question/suggestion below.
>
> Signed-off-by: "Theodore Ts'o" <[email protected]>
> ---
> lib/ext2fs/blkmap64_rb.c | 16 ++++++++++++++--
> 1 file changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/lib/ext2fs/blkmap64_rb.c b/lib/ext2fs/blkmap64_rb.c
> index a83f8ac..c9006f8 100644
> --- a/lib/ext2fs/blkmap64_rb.c
> +++ b/lib/ext2fs/blkmap64_rb.c
> @@ -314,8 +314,8 @@ static errcode_t rb_resize_bmap(ext2fs_generic_bitmap bmap,
> inline static int
> rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
> {
> - struct bmap_rb_extent *rcursor;
> - struct rb_node *parent = NULL;
> + struct bmap_rb_extent *rcursor, *next_ext;
> + struct rb_node *parent = NULL, *next;
> struct rb_node **n = &bp->root.rb_node;
> struct bmap_rb_extent *ext;
>
> @@ -330,6 +330,18 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
> return 1;
> }
>
> + next = ext2fs_rb_next(&rcursor->node);
> + if (next) {
> + next_ext = ext2fs_rb_entry(next, struct bmap_rb_extent, node);
> + if ((bit >= rcursor->start + rcursor->count) &&
> + (bit < next_ext->start)) {
what about using the next_ext once we're holding it to check the bit
? On sequential walk this shout make sense to do so since we
actually should hit this if we're not in rcursor nor between rcursor
and next_ext.
So maybe something like this ? (untested)
if (next && (bit >= rcursor->start + rcursor->count)) {
next_ext = ext2fs_rb_entry(next, struct bmap_rb_extent, node);
if (bit < next_ext->start)) {
#ifdef BMAP_STATS_OPS
bp->test_hit++;
#endif
return 0;
} else if (bit < next_ext->start + next_ext->count) {
#ifdef BMAP_STATS_OPS
bp->test_hit++;
#endif
*bp->rcursor = next_ext;
return 1;
}
What do you think ? Maybe it is worth testing, whether
the advantages are higher than additional condition ?
Thanks!
-Lukas
> + }
> +
> rcursor = *bp->wcursor;
> if (!rcursor)
> goto search_tree;
>
On Mon, 8 Oct 2012, Luk?? Czerner wrote:
> Date: Mon, 8 Oct 2012 10:08:54 +0200 (CEST)
> From: Luk?? Czerner <[email protected]>
> To: Theodore Ts'o <[email protected]>
> Cc: Ext4 Developers List <[email protected]>
> Subject: Re: [PATCH 2/2] libext2fs: optimize rb_test_bit
>
> On Thu, 4 Oct 2012, Theodore Ts'o wrote:
>
> > Date: Thu, 4 Oct 2012 23:44:55 -0400
> > From: Theodore Ts'o <[email protected]>
> > To: Ext4 Developers List <[email protected]>
> > Cc: Theodore Ts'o <[email protected]>
> > Subject: [PATCH 2/2] libext2fs: optimize rb_test_bit
> >
> > Optimize testing for a bit in an rbtree-based bitmap for the case
> > where the calling application is scanning through the bitmap
> > sequentially. Previously, we did this for a set of bits which were
> > inside an allocated extent, but we did not optimize the case where
> > there was a large number of bits after an allocated extents which were
> > not in use.
> >
> > 1111111111111110000000000000000000
> > ^ optimized ^not optimized
> >
> > In my tests of a roughly half-filled file system, the run time of
> > e2freefrag was halved, and the cpu time spent in userspace was during
> > e2fsck's pass 5 was reduced by a factor of 30%.
>
> Hi Ted,
>
> the patch and the idea behind it look fine, especially when we're
> walking the bitmap sequentially not modifying it simultaneously, but
> I have one question/suggestion below.
Also for this kind of usage it might actually make sense to have
something like:
get_next_zero_bit
get_next_set_bit
which would be much more effective than testing single bits, but it
would require actually using this in e2fsprogs tools.
Thanks!
-Lukas
>
> >
> > Signed-off-by: "Theodore Ts'o" <[email protected]>
> > ---
> > lib/ext2fs/blkmap64_rb.c | 16 ++++++++++++++--
> > 1 file changed, 14 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/ext2fs/blkmap64_rb.c b/lib/ext2fs/blkmap64_rb.c
> > index a83f8ac..c9006f8 100644
> > --- a/lib/ext2fs/blkmap64_rb.c
> > +++ b/lib/ext2fs/blkmap64_rb.c
> > @@ -314,8 +314,8 @@ static errcode_t rb_resize_bmap(ext2fs_generic_bitmap bmap,
> > inline static int
> > rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
> > {
> > - struct bmap_rb_extent *rcursor;
> > - struct rb_node *parent = NULL;
> > + struct bmap_rb_extent *rcursor, *next_ext;
> > + struct rb_node *parent = NULL, *next;
> > struct rb_node **n = &bp->root.rb_node;
> > struct bmap_rb_extent *ext;
> >
> > @@ -330,6 +330,18 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
> > return 1;
> > }
> >
> > + next = ext2fs_rb_next(&rcursor->node);
> > + if (next) {
> > + next_ext = ext2fs_rb_entry(next, struct bmap_rb_extent, node);
> > + if ((bit >= rcursor->start + rcursor->count) &&
> > + (bit < next_ext->start)) {
>
> what about using the next_ext once we're holding it to check the bit
> ? On sequential walk this shout make sense to do so since we
> actually should hit this if we're not in rcursor nor between rcursor
> and next_ext.
>
> So maybe something like this ? (untested)
>
> if (next && (bit >= rcursor->start + rcursor->count)) {
> next_ext = ext2fs_rb_entry(next, struct bmap_rb_extent, node);
> if (bit < next_ext->start)) {
> #ifdef BMAP_STATS_OPS
> bp->test_hit++;
> #endif
> return 0;
> } else if (bit < next_ext->start + next_ext->count) {
> #ifdef BMAP_STATS_OPS
> bp->test_hit++;
> #endif
> *bp->rcursor = next_ext;
> return 1;
> }
>
> What do you think ? Maybe it is worth testing, whether
> the advantages are higher than additional condition ?
>
> Thanks!
> -Lukas
>
>
> > + }
> > +
> > rcursor = *bp->wcursor;
> > if (!rcursor)
> > goto search_tree;
> >
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
On Mon, Oct 08, 2012 at 10:25:19AM +0200, Lukáš Czerner wrote:
> > the patch and the idea behind it look fine, especially when we're
> > walking the bitmap sequentially not modifying it simultaneously, but
> > I have one question/suggestion below.
>
> Also for this kind of usage it might actually make sense to have
> something like:
>
> get_next_zero_bit
> get_next_set_bit
>
> which would be much more effective than testing single bits, but it
> would require actually using this in e2fsprogs tools.
Yes, I thought about that, and in fact we already have find_first_zero
(which takes a starting offset, so works for both find_first and
find_next). When we introduced this, though, we accidentally
introduced a bug at first.
At some point I agree it would be good to implement find_first_set(),
and writing new unit tests, and then modify e2freefrag, e2fsck, and
dumpe2fs to use it. But in the applications is actually pretty
tricky, and I didn't have the time I figured would be necessary to
really do the changes right, and validate/test them properly.
So yes, I agree this would be much more effective, and ultimately
would result in further speedups in e2fsck and e2freefrag in
particular. It would also allow us to take out the test_bit
optimizations which do have a slight cost for random access reads ---
and this is measurable when looking at the results of the CPU time for
e2fsck pass 4 in particular. It's just that the performance hit for
the random access test_bit case is very tiny compared with the huge
wins in the sequential scan case.
> > what about using the next_ext once we're holding it to check the bit
> > ? On sequential walk this shout make sense to do so since we
> > actually should hit this if we're not in rcursor nor between rcursor
> > and next_ext.
Yes, I implemented that in the 2/3 commit in the follow-on patch
series.
Cheers!
- Ted
On Mon, 8 Oct 2012, Theodore Ts'o wrote:
> Date: Mon, 8 Oct 2012 14:17:53 -0400
> From: Theodore Ts'o <[email protected]>
> To: Lukáš Czerner <[email protected]>
> Cc: Ext4 Developers List <[email protected]>
> Subject: Re: [PATCH 2/2] libext2fs: optimize rb_test_bit
>
> On Mon, Oct 08, 2012 at 10:25:19AM +0200, Lukáš Czerner wrote:
> > > the patch and the idea behind it look fine, especially when we're
> > > walking the bitmap sequentially not modifying it simultaneously, but
> > > I have one question/suggestion below.
> >
> > Also for this kind of usage it might actually make sense to have
> > something like:
> >
> > get_next_zero_bit
> > get_next_set_bit
> >
> > which would be much more effective than testing single bits, but it
> > would require actually using this in e2fsprogs tools.
>
> Yes, I thought about that, and in fact we already have find_first_zero
> (which takes a starting offset, so works for both find_first and
> find_next). When we introduced this, though, we accidentally
> introduced a bug at first.
>
> At some point I agree it would be good to implement find_first_set(),
> and writing new unit tests, and then modify e2freefrag, e2fsck, and
> dumpe2fs to use it. But in the applications is actually pretty
> tricky, and I didn't have the time I figured would be necessary to
> really do the changes right, and validate/test them properly.
>
> So yes, I agree this would be much more effective, and ultimately
> would result in further speedups in e2fsck and e2freefrag in
> particular. It would also allow us to take out the test_bit
> optimizations which do have a slight cost for random access reads ---
> and this is measurable when looking at the results of the CPU time for
> e2fsck pass 4 in particular. It's just that the performance hit for
> the random access test_bit case is very tiny compared with the huge
> wins in the sequential scan case.
I agree, at some point it would be nice to have that implemented.
>
> > > what about using the next_ext once we're holding it to check the bit
> > > ? On sequential walk this shout make sense to do so since we
> > > actually should hit this if we're not in rcursor nor between rcursor
> > > and next_ext.
>
> Yes, I implemented that in the 2/3 commit in the follow-on patch
> series.
Right, but I think that the 2/3 commit is not necessary and the
whole solution is more complicated than it could be. Also we do not
really need the rcursor_next pointer in the ext2fs_rb_private
structure.
The solution I was suggesting is simpler and it is also one
condition shorter on sequential walk because we store the "next"
extent into the rcursor once we actually hit it. So we optimize a
little bit less when we're testing buts in between the extents (due
to ext2fs_rb_next()), but we avoid searching the tree entirely
(except the first time) on sequential walk, because we are moving
the bp->rcursor to the next node when we have match there.
So what about this ? It has to be tested to see if it really is as
effective. Let me know what do you think.
Thanks!
-Lukas
--
diff --git a/lib/ext2fs/blkmap64_rb.c b/lib/ext2fs/blkmap64_rb.c
index 55d78e2..f76f790 100644
--- a/lib/ext2fs/blkmap64_rb.c
+++ b/lib/ext2fs/blkmap64_rb.c
@@ -304,8 +304,8 @@ static errcode_t rb_resize_bmap(ext2fs_generic_bitmap bmap,
inline static int
rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
{
- struct bmap_rb_extent *rcursor;
- struct rb_node *parent = NULL;
+ struct bmap_rb_extent *rcursor, *next_ext;
+ struct rb_node *parent = NULL, *next;
struct rb_node **n = &bp->root.rb_node;
struct bmap_rb_extent *ext;
@@ -320,6 +320,23 @@ rb_test_bit(struct ext2fs_rb_private *bp, __u64 bit)
return 1;
}
+ next = ext2fs_rb_next(&rcursor->node);
+ if (next && (bit >= rcursor->start + rcursor->count)) {
+ next_ext = ext2fs_rb_entry(next, struct bmap_rb_extent, node);
+ if (bit < next_ext->start) {
+#ifdef BMAP_STATS_OPS
+ bp->test_hit++;
+#endif
+ return 0;
+ } else if (bit < next_ext->start + next_ext->count) {
+#ifdef BMAP_STATS_OPS
+ bp->test_hit++;
+#endif
+ bp->rcursor = next_ext;
+ return 1;
+ }
+ }
+
rcursor = bp->wcursor;
if (!rcursor)
goto search_tree;
On Tue, Oct 09, 2012 at 09:18:45AM +0200, Lukáš Czerner wrote:
>
> So what about this ? It has to be tested to see if it really is as
> effective. Let me know what do you think.
Here is the results of my patches, running e2fsck on an empty 3T file system;
Pass 1: Memory used: 672k/32232k (423k/250k), time: 1.15/ 1.14/ 0.00
Pass 2: Memory used: 672k/63692k (423k/250k), time: 0.01/ 0.00/ 0.01
Peak memory: Memory used: 672k/63692k (424k/249k), time: 1.94/ 1.92/ 0.01
Pass 3A: Memory used: 672k/63692k (423k/250k), time: 0.00/ 0.00/ 0.00
Pass 3: Memory used: 672k/63692k (423k/250k), time: 0.00/ 0.00/ 0.00
Pass 4: Memory used: 672k/772k (422k/251k), time: 3.67/ 3.66/ 0.00
Pass 5: Memory used: 824k/772k (422k/403k), time: 3.83/ 3.82/ 0.00
/mnt/foo.img: 11/201326592 files (0.0% non-contiguous), 12687388/805306368 blocks
Memory used: 824k/772k (422k/403k), time: 9.49/ 9.41/ 0.01
9.41user 0.01system 0:09.59elapsed 98%CPU (0avgtext+0avgdata 65636maxresident)k
0inputs+1552outputs (0major+2198minor)pagefaults 0swaps
And here is running e2fsck on a 3T file system with your patches:
Pass 1: Memory used: 672k/32232k (423k/250k), time: 1.20/ 1.19/ 0.01
Pass 2: Memory used: 672k/63692k (423k/250k), time: 0.00/ 0.00/ 0.01
Peak memory: Memory used: 672k/63692k (423k/250k), time: 2.02/ 1.98/ 0.03
Pass 3A: Memory used: 672k/63692k (423k/250k), time: 0.00/ 0.00/ 0.00
Pass 3: Memory used: 672k/63692k (423k/250k), time: 0.00/ 0.00/ 0.00
Pass 4: Memory used: 672k/772k (422k/251k), time: 5.66/ 5.64/ 0.00
Pass 5: Memory used: 896k/772k (422k/475k), time: 4.04/ 4.02/ 0.01
/mnt/foo.img: 11/201326592 files (0.0% non-contiguous), 12687388/805306368 blocks
11.65user 0.04system 0:11.89elapsed 98%CPU (0avgtext+0avgdata 65640maxresident)k
0inputs+1552outputs (0major+1706minor)pagefaults 0swaps
I had tried this approach before, and saw a similar performance
characteristics, so here this didn't come as a huge surprise.
----------------
Here is the results of my patches running e2fsck on a roughly
half-empty file system that holds lots of kernel build trees:
Pass 1: Memory used: 2076k/2780k (1957k/120k), time: 2.78/ 0.95/ 0.15
Pass 2: Memory used: 3528k/2828k (2488k/1041k), time: 2.05/ 0.19/ 0.21
Peak memory: Memory used: 4276k/2828k (3129k/1148k), time: 4.92/ 1.16/ 0.36
Pass 3A: Memory used: 4276k/2828k (3128k/1149k), time: 0.00/ 0.00/ 0.00
Pass 3: Memory used: 4276k/2596k (2488k/1789k), time: 0.00/ 0.00/ 0.00
Pass 4: Memory used: 4276k/644k (645k/3632k), time: 0.12/ 0.12/ 0.00
Pass 5: Memory used: 4276k/0k (645k/3632k), time: 1.42/ 0.99/ 0.02
kbuild: 204690/5242880 files (1.9% non-contiguous), 9843555/20971520 blocks
Memory used: 4276k/0k (645k/3632k), time: 6.51/ 2.29/ 0.38
2.29user 0.45system 0:06.60elapsed 41%CPU (0avgtext+0avgdata 8664maxresident)k
329872inputs+56outputs (0major+2511minor)pagefaults 0swaps
Here are the results with your patch:
Pass 1: Memory used: 2080k/2780k (1957k/124k), time: 2.81/ 0.99/ 0.14
Pass 2: Memory used: 3532k/2828k (2487k/1045k), time: 2.10/ 0.21/ 0.22
Peak memory: Memory used: 4276k/2828k (3128k/1149k), time: 5.00/ 1.22/ 0.37
Pass 3: Memory used: 4276k/2596k (2487k/1790k), time: 0.00/ 0.00/ 0.00
Pass 4: Memory used: 4276k/644k (645k/3632k), time: 0.14/ 0.15/ 0.00
Pass 5: Memory used: 4276k/0k (645k/3632k), time: 1.64/ 1.18/ 0.04
Memory used: 4276k/0k (645k/3632k), time: 6.83/ 2.56/ 0.41
2.57user 0.46system 0:06.91elapsed 43%CPU (0avgtext+0avgdata 8668maxresident)k
329872inputs+56outputs (0major+2512minor)pagefaults 0swaps
----------------
Here are the timing results of e2freefrag on the empty 3T file system
image using my patch:
8.25user 0.01system 0:08.28elapsed 99%CPU (0avgtext+0avgdata 1708maxresident)k
0inputs+0outputs (0major+475minor)pagefaults 0swaps
and here are the results using your patch:
10.27user 0.00system 0:10.30elapsed 99%CPU (0avgtext+0avgdata 1708maxresident)k
0inputs+0outputs (0major+474minor)pagefaults 0swaps
So anyway, that's why I chose the approach I did.
- Ted