2023-08-01 05:31:50

by Li Dongyang

[permalink] [raw]
Subject: [PATCH] e2fsck: use rb-tree to track EA reference counts

Using the sorted array to track the EA blocks and
their refs is not scalable.
When the file system has a huge number of EA blocks
reporting wrong ref, pass1 scanning could not be
finished within a reasonable time,
as 95%+ of CPU time is spent in memmove() when
trying to enlarge the the sorted array.

On a file system with 20 million problematic EA blocks
on an NVMe device, pass1 time taken:
without patch:
time: 2014.78/1838.70/19.91
with patch:
time: 45.17/20.17/20.19

Signed-off-by: Li Dongyang <[email protected]>
---
e2fsck/e2fsck.h | 3 +-
e2fsck/ea_refcount.c | 286 +++++++++++++++----------------------------
e2fsck/pass1.c | 14 +--
3 files changed, 102 insertions(+), 201 deletions(-)

diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index 3f2dc3084..bc2490c9d 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -533,7 +533,7 @@ extern struct dx_dir_info *e2fsck_dx_dir_info_iter(e2fsck_t ctx,
typedef __u64 ea_key_t;
typedef __u64 ea_value_t;

-extern errcode_t ea_refcount_create(size_t size, ext2_refcount_t *ret);
+extern errcode_t ea_refcount_create(ext2_refcount_t *ret);
extern void ea_refcount_free(ext2_refcount_t refcount);
extern errcode_t ea_refcount_fetch(ext2_refcount_t refcount, ea_key_t ea_key,
ea_value_t *ret);
@@ -543,7 +543,6 @@ extern errcode_t ea_refcount_decrement(ext2_refcount_t refcount,
ea_key_t ea_key, ea_value_t *ret);
extern errcode_t ea_refcount_store(ext2_refcount_t refcount, ea_key_t ea_key,
ea_value_t count);
-extern size_t ext2fs_get_refcount_size(ext2_refcount_t refcount);
extern void ea_refcount_intr_begin(ext2_refcount_t refcount);
extern ea_key_t ea_refcount_intr_next(ext2_refcount_t refcount,
ea_value_t *ret);
diff --git a/e2fsck/ea_refcount.c b/e2fsck/ea_refcount.c
index 7154b47c3..fb8b009f1 100644
--- a/e2fsck/ea_refcount.c
+++ b/e2fsck/ea_refcount.c
@@ -16,193 +16,100 @@
#undef ENABLE_NLS
#endif
#include "e2fsck.h"
+#include "ext2fs/rbtree.h"

/*
* The strategy we use for keeping track of EA refcounts is as
- * follows. We keep a sorted array of first EA blocks and its
- * reference counts. Once the refcount has dropped to zero, it is
- * removed from the array to save memory space. Once the EA block is
+ * follows. We keep the first EA blocks and its reference counts
+ * in the rb-tree. Once the refcount has dropped to zero, it is
+ * removed from the rb-tree to save memory space. Once the EA block is
* checked, its bit is set in the block_ea_map bitmap.
*/
struct ea_refcount_el {
+ struct rb_node node;
/* ea_key could either be an inode number or block number. */
ea_key_t ea_key;
ea_value_t ea_value;
};

struct ea_refcount {
- size_t count;
- size_t size;
- size_t cursor;
- struct ea_refcount_el *list;
+ struct rb_root root;
+ struct rb_node *cursor;
};

void ea_refcount_free(ext2_refcount_t refcount)
{
+ struct ea_refcount_el *el;
+ struct rb_node *node, *next;
+
if (!refcount)
return;

- if (refcount->list)
- ext2fs_free_mem(&refcount->list);
+ for (node = ext2fs_rb_first(&refcount->root); node; node = next) {
+ next = ext2fs_rb_next(node);
+ el = ext2fs_rb_entry(node, struct ea_refcount_el, node);
+ ext2fs_rb_erase(node, &refcount->root);
+ ext2fs_free_mem(&el);
+ }
ext2fs_free_mem(&refcount);
}

-errcode_t ea_refcount_create(size_t size, ext2_refcount_t *ret)
+errcode_t ea_refcount_create(ext2_refcount_t *ret)
{
ext2_refcount_t refcount;
errcode_t retval;
- size_t bytes;

retval = ext2fs_get_memzero(sizeof(struct ea_refcount), &refcount);
if (retval)
return retval;
-
- if (!size)
- size = 500;
- refcount->size = size;
- bytes = size * sizeof(struct ea_refcount_el);
-#ifdef DEBUG
- printf("Refcount allocated %zu entries, %zu bytes.\n",
- refcount->size, bytes);
-#endif
- retval = ext2fs_get_memzero(bytes, &refcount->list);
- if (retval)
- goto errout;
-
- refcount->count = 0;
- refcount->cursor = 0;
+ refcount->root = RB_ROOT;

*ret = refcount;
return 0;
-
-errout:
- ea_refcount_free(refcount);
- return(retval);
-}
-
-/*
- * collapse_refcount() --- go through the refcount array, and get rid
- * of any count == zero entries
- */
-static void refcount_collapse(ext2_refcount_t refcount)
-{
- unsigned int i, j;
- struct ea_refcount_el *list;
-
- list = refcount->list;
- for (i = 0, j = 0; i < refcount->count; i++) {
- if (list[i].ea_value) {
- if (i != j)
- list[j] = list[i];
- j++;
- }
- }
-#if defined(DEBUG) || defined(TEST_PROGRAM)
- printf("Refcount_collapse: size was %zu, now %d\n",
- refcount->count, j);
-#endif
- refcount->count = j;
-}
-
-
-/*
- * insert_refcount_el() --- Insert a new entry into the sorted list at a
- * specified position.
- */
-static struct ea_refcount_el *insert_refcount_el(ext2_refcount_t refcount,
- ea_key_t ea_key, int pos)
-{
- struct ea_refcount_el *el;
- errcode_t retval;
- size_t new_size = 0;
- int num;
-
- if (refcount->count >= refcount->size) {
- new_size = refcount->size + 100;
-#ifdef DEBUG
- printf("Reallocating refcount %d entries...\n", new_size);
-#endif
- retval = ext2fs_resize_mem((size_t) refcount->size *
- sizeof(struct ea_refcount_el),
- (size_t) new_size *
- sizeof(struct ea_refcount_el),
- &refcount->list);
- if (retval)
- return 0;
- refcount->size = new_size;
- }
- num = (int) refcount->count - pos;
- if (num < 0)
- return 0; /* should never happen */
- if (num) {
- memmove(&refcount->list[pos+1], &refcount->list[pos],
- sizeof(struct ea_refcount_el) * num);
- }
- refcount->count++;
- el = &refcount->list[pos];
- el->ea_key = ea_key;
- el->ea_value = 0;
- return el;
}

-
/*
* get_refcount_el() --- given an block number, try to find refcount
- * information in the sorted list. If the create flag is set,
- * and we can't find an entry, create one in the sorted list.
+ * information in the rb-tree. If the create flag is set,
+ * and we can't find an entry, create and add it to rb-tree.
*/
static struct ea_refcount_el *get_refcount_el(ext2_refcount_t refcount,
ea_key_t ea_key, int create)
{
- int low, high, mid;
+ struct rb_node **node;
+ struct rb_node *parent = NULL;
+ struct ea_refcount_el *el;
+ errcode_t retval;

- if (!refcount || !refcount->list)
- return 0;
-retry:
- low = 0;
- high = (int) refcount->count-1;
- if (create && ((refcount->count == 0) ||
- (ea_key > refcount->list[high].ea_key))) {
- if (refcount->count >= refcount->size)
- refcount_collapse(refcount);
-
- return insert_refcount_el(refcount, ea_key,
- (unsigned) refcount->count);
- }
- if (refcount->count == 0)
+ if (!refcount)
return 0;

- if (refcount->cursor >= refcount->count)
- refcount->cursor = 0;
- if (ea_key == refcount->list[refcount->cursor].ea_key)
- return &refcount->list[refcount->cursor++];
-#ifdef DEBUG
- printf("Non-cursor get_refcount_el: %u\n", ea_key);
-#endif
- while (low <= high) {
- mid = (low+high)/2;
- if (ea_key == refcount->list[mid].ea_key) {
- refcount->cursor = mid+1;
- return &refcount->list[mid];
- }
- if (ea_key < refcount->list[mid].ea_key)
- high = mid-1;
+ node = &refcount->root.rb_node;
+ while (*node) {
+ el = ext2fs_rb_entry(*node, struct ea_refcount_el, node);
+
+ parent = *node;
+ if (ea_key < el->ea_key)
+ node = &(*node)->rb_left;
+ else if (ea_key > el->ea_key)
+ node = &(*node)->rb_right;
else
- low = mid+1;
- }
- /*
- * If we need to create a new entry, it should be right at
- * low (where high will be left at low-1).
- */
- if (create) {
- if (refcount->count >= refcount->size) {
- refcount_collapse(refcount);
- if (refcount->count < refcount->size)
- goto retry;
- }
- return insert_refcount_el(refcount, ea_key, low);
+ return el;
}
- return 0;
+
+ if (!create)
+ return 0;
+
+ retval = ext2fs_get_memzero(sizeof(struct ea_refcount_el), &el);
+ if (retval)
+ return 0;
+
+ el->ea_key = ea_key;
+ el->ea_value = 0;
+ ext2fs_rb_link_node(&el->node, parent, node);
+ ext2fs_rb_insert_color(&el->node, &refcount->root);
+
+ return el;
}

errcode_t ea_refcount_fetch(ext2_refcount_t refcount, ea_key_t ea_key,
@@ -240,13 +147,18 @@ errcode_t ea_refcount_decrement(ext2_refcount_t refcount, ea_key_t ea_key,
struct ea_refcount_el *el;

el = get_refcount_el(refcount, ea_key, 0);
- if (!el || el->ea_value == 0)
+ if (!el)
return EXT2_ET_INVALID_ARGUMENT;

el->ea_value--;

if (ret)
*ret = el->ea_value;
+
+ if (el->ea_value == 0) {
+ ext2fs_rb_erase(&el->node, &refcount->root);
+ ext2fs_free_mem(&el);
+ }
return 0;
}

@@ -262,17 +174,13 @@ errcode_t ea_refcount_store(ext2_refcount_t refcount, ea_key_t ea_key,
if (!el)
return ea_value ? EXT2_ET_NO_MEMORY : 0;
el->ea_value = ea_value;
+ if (el->ea_value == 0) {
+ ext2fs_rb_erase(&el->node, &refcount->root);
+ ext2fs_free_mem(&el);
+ }
return 0;
}

-size_t ext2fs_get_refcount_size(ext2_refcount_t refcount)
-{
- if (!refcount)
- return 0;
-
- return refcount->size;
-}
-
void ea_refcount_intr_begin(ext2_refcount_t refcount)
{
refcount->cursor = 0;
@@ -281,19 +189,23 @@ void ea_refcount_intr_begin(ext2_refcount_t refcount)
ea_key_t ea_refcount_intr_next(ext2_refcount_t refcount,
ea_value_t *ret)
{
- struct ea_refcount_el *list;
-
- while (1) {
- if (refcount->cursor >= refcount->count)
- return 0;
- list = refcount->list;
- if (list[refcount->cursor].ea_value) {
- if (ret)
- *ret = list[refcount->cursor].ea_value;
- return list[refcount->cursor++].ea_key;
- }
- refcount->cursor++;
+ struct ea_refcount_el *el;
+ struct rb_node *node = refcount->cursor;
+
+ if (node == NULL)
+ node = ext2fs_rb_first(&refcount->root);
+ else
+ node = ext2fs_rb_next(node);
+
+ if (node) {
+ refcount->cursor = node;
+ el = ext2fs_rb_entry(node, struct ea_refcount_el, node);
+ if (ret)
+ *ret = el->ea_value;
+ return el->ea_key;
}
+
+ return 0;
}


@@ -301,26 +213,28 @@ ea_key_t ea_refcount_intr_next(ext2_refcount_t refcount,

errcode_t ea_refcount_validate(ext2_refcount_t refcount, FILE *out)
{
- errcode_t ret = 0;
- int i;
+ struct ea_refcount_el *el;
+ struct rb_node *node;
+ ea_key_t prev;
+ int prev_valid = 0;
const char *bad = "bad refcount";

- if (refcount->count > refcount->size) {
- fprintf(out, "%s: count > size\n", bad);
- return EXT2_ET_INVALID_ARGUMENT;
- }
- for (i=1; i < refcount->count; i++) {
- if (refcount->list[i-1].ea_key >= refcount->list[i].ea_key) {
+ for (node = ext2fs_rb_first(&refcount->root); node != NULL;
+ node = ext2fs_rb_next(node)) {
+ el = ext2fs_rb_entry(node, struct ea_refcount_el, node);
+ if (prev_valid && prev >= el->ea_key) {
fprintf(out,
- "%s: list[%d].ea_key=%llu, list[%d].ea_key=%llu\n",
- bad, i-1,
- (unsigned long long) refcount->list[i-1].ea_key,
- i,
- (unsigned long long) refcount->list[i].ea_key);
- ret = EXT2_ET_INVALID_ARGUMENT;
+ "%s: prev.ea_key=%llu, curr.ea_key=%llu\n",
+ bad,
+ (unsigned long long) prev,
+ (unsigned long long) el->ea_key);
+ return EXT2_ET_INVALID_ARGUMENT;
}
+ prev = el->ea_key;
+ prev_valid = 1;
}
- return ret;
+
+ return 0;
}

#define BCODE_END 0
@@ -332,10 +246,9 @@ errcode_t ea_refcount_validate(ext2_refcount_t refcount, FILE *out)
#define BCODE_FETCH 6
#define BCODE_VALIDATE 7
#define BCODE_LIST 8
-#define BCODE_COLLAPSE 9

int bcode_program[] = {
- BCODE_CREATE, 5,
+ BCODE_CREATE,
BCODE_STORE, 3, 3,
BCODE_STORE, 4, 4,
BCODE_STORE, 1, 1,
@@ -362,7 +275,6 @@ int bcode_program[] = {
BCODE_FETCH, 30,
BCODE_DECR, 2,
BCODE_DECR, 2,
- BCODE_COLLAPSE,
BCODE_LIST,
BCODE_VALIDATE,
BCODE_FREE,
@@ -373,7 +285,6 @@ int main(int argc, char **argv)
{
int i = 0;
ext2_refcount_t refcount;
- size_t size;
ea_key_t ea_key;
ea_value_t arg;
errcode_t retval;
@@ -383,15 +294,13 @@ int main(int argc, char **argv)
case BCODE_END:
exit(0);
case BCODE_CREATE:
- size = bcode_program[i++];
- retval = ea_refcount_create(size, &refcount);
+ retval = ea_refcount_create(&refcount);
if (retval) {
com_err("ea_refcount_create", retval,
- "while creating size %zu", size);
+ "while creating refcount");
exit(1);
} else
- printf("Creating refcount with size %zu\n",
- size);
+ printf("Creating refcount\n");
break;
case BCODE_FREE:
ea_refcount_free(refcount);
@@ -465,9 +374,6 @@ int main(int argc, char **argv)
(unsigned long long) arg);
}
break;
- case BCODE_COLLAPSE:
- refcount_collapse(refcount);
- break;
}

}
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index a341c72ac..27364cd73 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -398,8 +398,7 @@ static void inc_ea_inode_refs(e2fsck_t ctx, struct problem_context *pctx,
if (!entry->e_value_inum)
goto next;
if (!ctx->ea_inode_refs) {
- pctx->errcode = ea_refcount_create(0,
- &ctx->ea_inode_refs);
+ pctx->errcode = ea_refcount_create(&ctx->ea_inode_refs);
if (pctx->errcode) {
pctx->num = 4;
fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
@@ -2475,7 +2474,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,

/* Create the EA refcount structure if necessary */
if (!ctx->refcount) {
- pctx->errcode = ea_refcount_create(0, &ctx->refcount);
+ pctx->errcode = ea_refcount_create(&ctx->refcount);
if (pctx->errcode) {
pctx->num = 1;
fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
@@ -2509,8 +2508,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
return 1;
/* Ooops, this EA was referenced more than it stated */
if (!ctx->refcount_extra) {
- pctx->errcode = ea_refcount_create(0,
- &ctx->refcount_extra);
+ pctx->errcode = ea_refcount_create(&ctx->refcount_extra);
if (pctx->errcode) {
pctx->num = 2;
fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
@@ -2651,8 +2649,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,

if (quota_blocks != EXT2FS_C2B(fs, 1U)) {
if (!ctx->ea_block_quota_blocks) {
- pctx->errcode = ea_refcount_create(0,
- &ctx->ea_block_quota_blocks);
+ pctx->errcode = ea_refcount_create(&ctx->ea_block_quota_blocks);
if (pctx->errcode) {
pctx->num = 3;
goto refcount_fail;
@@ -2664,8 +2661,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,

if (quota_inodes) {
if (!ctx->ea_block_quota_inodes) {
- pctx->errcode = ea_refcount_create(0,
- &ctx->ea_block_quota_inodes);
+ pctx->errcode = ea_refcount_create(&ctx->ea_block_quota_inodes);
if (pctx->errcode) {
pctx->num = 4;
refcount_fail:
--
2.41.0



2023-08-02 07:38:35

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH] e2fsck: use rb-tree to track EA reference counts

On Jul 31, 2023, at 10:53 PM, Li Dongyang <[email protected]> wrote:
>
> Using the sorted array to track the EA blocks and their refs is
> not scalable. When the file system has a huge number of EA blocks
> reporting wrong ref, pass1 scanning could not be finished within
> a reasonable time, as 95%+ of CPU time is spent in memmove() when
> trying to enlarge the the sorted array.
>
> On a file system with 20 million EA blocks on an NVMe device, pass1
> time taken:
> without patch:
> time: 2014.78/1838.70/19.91
> with patch:
> time: 45.17/20.17/20.19
>
> Signed-off-by: Li Dongyang <[email protected]>

Reviewed-by: Andreas Dilger <[email protected]>

> ---
> e2fsck/e2fsck.h | 3 +-
> e2fsck/ea_refcount.c | 286 +++++++++++++++----------------------------
> e2fsck/pass1.c | 14 +--
> 3 files changed, 102 insertions(+), 201 deletions(-)
>
> diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
> index 3f2dc3084..bc2490c9d 100644
> --- a/e2fsck/e2fsck.h
> +++ b/e2fsck/e2fsck.h
> @@ -533,7 +533,7 @@ extern struct dx_dir_info *e2fsck_dx_dir_info_iter(e2fsck_t ctx,
> typedef __u64 ea_key_t;
> typedef __u64 ea_value_t;
>
> -extern errcode_t ea_refcount_create(size_t size, ext2_refcount_t *ret);
> +extern errcode_t ea_refcount_create(ext2_refcount_t *ret);
> extern void ea_refcount_free(ext2_refcount_t refcount);
> extern errcode_t ea_refcount_fetch(ext2_refcount_t refcount, ea_key_t ea_key,
> ea_value_t *ret);
> @@ -543,7 +543,6 @@ extern errcode_t ea_refcount_decrement(ext2_refcount_t refcount,
> ea_key_t ea_key, ea_value_t *ret);
> extern errcode_t ea_refcount_store(ext2_refcount_t refcount, ea_key_t ea_key,
> ea_value_t count);
> -extern size_t ext2fs_get_refcount_size(ext2_refcount_t refcount);
> extern void ea_refcount_intr_begin(ext2_refcount_t refcount);
> extern ea_key_t ea_refcount_intr_next(ext2_refcount_t refcount,
> ea_value_t *ret);
> diff --git a/e2fsck/ea_refcount.c b/e2fsck/ea_refcount.c
> index 7154b47c3..fb8b009f1 100644
> --- a/e2fsck/ea_refcount.c
> +++ b/e2fsck/ea_refcount.c
> @@ -16,193 +16,100 @@
> #undef ENABLE_NLS
> #endif
> #include "e2fsck.h"
> +#include "ext2fs/rbtree.h"
>
> /*
> * The strategy we use for keeping track of EA refcounts is as
> - * follows. We keep a sorted array of first EA blocks and its
> - * reference counts. Once the refcount has dropped to zero, it is
> - * removed from the array to save memory space. Once the EA block is
> + * follows. We keep the first EA blocks and its reference counts
> + * in the rb-tree. Once the refcount has dropped to zero, it is
> + * removed from the rb-tree to save memory space. Once the EA block is
> * checked, its bit is set in the block_ea_map bitmap.
> */
> struct ea_refcount_el {
> + struct rb_node node;
> /* ea_key could either be an inode number or block number. */
> ea_key_t ea_key;
> ea_value_t ea_value;
> };
>
> struct ea_refcount {
> - size_t count;
> - size_t size;
> - size_t cursor;
> - struct ea_refcount_el *list;
> + struct rb_root root;
> + struct rb_node *cursor;
> };
>
> void ea_refcount_free(ext2_refcount_t refcount)
> {
> + struct ea_refcount_el *el;
> + struct rb_node *node, *next;
> +
> if (!refcount)
> return;
>
> - if (refcount->list)
> - ext2fs_free_mem(&refcount->list);
> + for (node = ext2fs_rb_first(&refcount->root); node; node = next) {
> + next = ext2fs_rb_next(node);
> + el = ext2fs_rb_entry(node, struct ea_refcount_el, node);
> + ext2fs_rb_erase(node, &refcount->root);
> + ext2fs_free_mem(&el);
> + }
> ext2fs_free_mem(&refcount);
> }
>
> -errcode_t ea_refcount_create(size_t size, ext2_refcount_t *ret)
> +errcode_t ea_refcount_create(ext2_refcount_t *ret)
> {
> ext2_refcount_t refcount;
> errcode_t retval;
> - size_t bytes;
>
> retval = ext2fs_get_memzero(sizeof(struct ea_refcount), &refcount);
> if (retval)
> return retval;
> -
> - if (!size)
> - size = 500;
> - refcount->size = size;
> - bytes = size * sizeof(struct ea_refcount_el);
> -#ifdef DEBUG
> - printf("Refcount allocated %zu entries, %zu bytes.\n",
> - refcount->size, bytes);
> -#endif
> - retval = ext2fs_get_memzero(bytes, &refcount->list);
> - if (retval)
> - goto errout;
> -
> - refcount->count = 0;
> - refcount->cursor = 0;
> + refcount->root = RB_ROOT;
>
> *ret = refcount;
> return 0;
> -
> -errout:
> - ea_refcount_free(refcount);
> - return(retval);
> -}
> -
> -/*
> - * collapse_refcount() --- go through the refcount array, and get rid
> - * of any count == zero entries
> - */
> -static void refcount_collapse(ext2_refcount_t refcount)
> -{
> - unsigned int i, j;
> - struct ea_refcount_el *list;
> -
> - list = refcount->list;
> - for (i = 0, j = 0; i < refcount->count; i++) {
> - if (list[i].ea_value) {
> - if (i != j)
> - list[j] = list[i];
> - j++;
> - }
> - }
> -#if defined(DEBUG) || defined(TEST_PROGRAM)
> - printf("Refcount_collapse: size was %zu, now %d\n",
> - refcount->count, j);
> -#endif
> - refcount->count = j;
> -}
> -
> -
> -/*
> - * insert_refcount_el() --- Insert a new entry into the sorted list at a
> - * specified position.
> - */
> -static struct ea_refcount_el *insert_refcount_el(ext2_refcount_t refcount,
> - ea_key_t ea_key, int pos)
> -{
> - struct ea_refcount_el *el;
> - errcode_t retval;
> - size_t new_size = 0;
> - int num;
> -
> - if (refcount->count >= refcount->size) {
> - new_size = refcount->size + 100;
> -#ifdef DEBUG
> - printf("Reallocating refcount %d entries...\n", new_size);
> -#endif
> - retval = ext2fs_resize_mem((size_t) refcount->size *
> - sizeof(struct ea_refcount_el),
> - (size_t) new_size *
> - sizeof(struct ea_refcount_el),
> - &refcount->list);
> - if (retval)
> - return 0;
> - refcount->size = new_size;
> - }
> - num = (int) refcount->count - pos;
> - if (num < 0)
> - return 0; /* should never happen */
> - if (num) {
> - memmove(&refcount->list[pos+1], &refcount->list[pos],
> - sizeof(struct ea_refcount_el) * num);
> - }
> - refcount->count++;
> - el = &refcount->list[pos];
> - el->ea_key = ea_key;
> - el->ea_value = 0;
> - return el;
> }
>
> -
> /*
> * get_refcount_el() --- given an block number, try to find refcount
> - * information in the sorted list. If the create flag is set,
> - * and we can't find an entry, create one in the sorted list.
> + * information in the rb-tree. If the create flag is set,
> + * and we can't find an entry, create and add it to rb-tree.
> */
> static struct ea_refcount_el *get_refcount_el(ext2_refcount_t refcount,
> ea_key_t ea_key, int create)
> {
> - int low, high, mid;
> + struct rb_node **node;
> + struct rb_node *parent = NULL;
> + struct ea_refcount_el *el;
> + errcode_t retval;
>
> - if (!refcount || !refcount->list)
> - return 0;
> -retry:
> - low = 0;
> - high = (int) refcount->count-1;
> - if (create && ((refcount->count == 0) ||
> - (ea_key > refcount->list[high].ea_key))) {
> - if (refcount->count >= refcount->size)
> - refcount_collapse(refcount);
> -
> - return insert_refcount_el(refcount, ea_key,
> - (unsigned) refcount->count);
> - }
> - if (refcount->count == 0)
> + if (!refcount)
> return 0;
>
> - if (refcount->cursor >= refcount->count)
> - refcount->cursor = 0;
> - if (ea_key == refcount->list[refcount->cursor].ea_key)
> - return &refcount->list[refcount->cursor++];
> -#ifdef DEBUG
> - printf("Non-cursor get_refcount_el: %u\n", ea_key);
> -#endif
> - while (low <= high) {
> - mid = (low+high)/2;
> - if (ea_key == refcount->list[mid].ea_key) {
> - refcount->cursor = mid+1;
> - return &refcount->list[mid];
> - }
> - if (ea_key < refcount->list[mid].ea_key)
> - high = mid-1;
> + node = &refcount->root.rb_node;
> + while (*node) {
> + el = ext2fs_rb_entry(*node, struct ea_refcount_el, node);
> +
> + parent = *node;
> + if (ea_key < el->ea_key)
> + node = &(*node)->rb_left;
> + else if (ea_key > el->ea_key)
> + node = &(*node)->rb_right;
> else
> - low = mid+1;
> - }
> - /*
> - * If we need to create a new entry, it should be right at
> - * low (where high will be left at low-1).
> - */
> - if (create) {
> - if (refcount->count >= refcount->size) {
> - refcount_collapse(refcount);
> - if (refcount->count < refcount->size)
> - goto retry;
> - }
> - return insert_refcount_el(refcount, ea_key, low);
> + return el;
> }
> - return 0;
> +
> + if (!create)
> + return 0;
> +
> + retval = ext2fs_get_memzero(sizeof(struct ea_refcount_el), &el);
> + if (retval)
> + return 0;
> +
> + el->ea_key = ea_key;
> + el->ea_value = 0;
> + ext2fs_rb_link_node(&el->node, parent, node);
> + ext2fs_rb_insert_color(&el->node, &refcount->root);
> +
> + return el;
> }
>
> errcode_t ea_refcount_fetch(ext2_refcount_t refcount, ea_key_t ea_key,
> @@ -240,13 +147,18 @@ errcode_t ea_refcount_decrement(ext2_refcount_t refcount, ea_key_t ea_key,
> struct ea_refcount_el *el;
>
> el = get_refcount_el(refcount, ea_key, 0);
> - if (!el || el->ea_value == 0)
> + if (!el)
> return EXT2_ET_INVALID_ARGUMENT;
>
> el->ea_value--;
>
> if (ret)
> *ret = el->ea_value;
> +
> + if (el->ea_value == 0) {
> + ext2fs_rb_erase(&el->node, &refcount->root);
> + ext2fs_free_mem(&el);
> + }
> return 0;
> }
>
> @@ -262,17 +174,13 @@ errcode_t ea_refcount_store(ext2_refcount_t refcount, ea_key_t ea_key,
> if (!el)
> return ea_value ? EXT2_ET_NO_MEMORY : 0;
> el->ea_value = ea_value;
> + if (el->ea_value == 0) {
> + ext2fs_rb_erase(&el->node, &refcount->root);
> + ext2fs_free_mem(&el);
> + }
> return 0;
> }
>
> -size_t ext2fs_get_refcount_size(ext2_refcount_t refcount)
> -{
> - if (!refcount)
> - return 0;
> -
> - return refcount->size;
> -}
> -
> void ea_refcount_intr_begin(ext2_refcount_t refcount)
> {
> refcount->cursor = 0;
> @@ -281,19 +189,23 @@ void ea_refcount_intr_begin(ext2_refcount_t refcount)
> ea_key_t ea_refcount_intr_next(ext2_refcount_t refcount,
> ea_value_t *ret)
> {
> - struct ea_refcount_el *list;
> -
> - while (1) {
> - if (refcount->cursor >= refcount->count)
> - return 0;
> - list = refcount->list;
> - if (list[refcount->cursor].ea_value) {
> - if (ret)
> - *ret = list[refcount->cursor].ea_value;
> - return list[refcount->cursor++].ea_key;
> - }
> - refcount->cursor++;
> + struct ea_refcount_el *el;
> + struct rb_node *node = refcount->cursor;
> +
> + if (node == NULL)
> + node = ext2fs_rb_first(&refcount->root);
> + else
> + node = ext2fs_rb_next(node);
> +
> + if (node) {
> + refcount->cursor = node;
> + el = ext2fs_rb_entry(node, struct ea_refcount_el, node);
> + if (ret)
> + *ret = el->ea_value;
> + return el->ea_key;
> }
> +
> + return 0;
> }
>
>
> @@ -301,26 +213,28 @@ ea_key_t ea_refcount_intr_next(ext2_refcount_t refcount,
>
> errcode_t ea_refcount_validate(ext2_refcount_t refcount, FILE *out)
> {
> - errcode_t ret = 0;
> - int i;
> + struct ea_refcount_el *el;
> + struct rb_node *node;
> + ea_key_t prev;
> + int prev_valid = 0;
> const char *bad = "bad refcount";
>
> - if (refcount->count > refcount->size) {
> - fprintf(out, "%s: count > size\n", bad);
> - return EXT2_ET_INVALID_ARGUMENT;
> - }
> - for (i=1; i < refcount->count; i++) {
> - if (refcount->list[i-1].ea_key >= refcount->list[i].ea_key) {
> + for (node = ext2fs_rb_first(&refcount->root); node != NULL;
> + node = ext2fs_rb_next(node)) {
> + el = ext2fs_rb_entry(node, struct ea_refcount_el, node);
> + if (prev_valid && prev >= el->ea_key) {
> fprintf(out,
> - "%s: list[%d].ea_key=%llu, list[%d].ea_key=%llu\n",
> - bad, i-1,
> - (unsigned long long) refcount->list[i-1].ea_key,
> - i,
> - (unsigned long long) refcount->list[i].ea_key);
> - ret = EXT2_ET_INVALID_ARGUMENT;
> + "%s: prev.ea_key=%llu, curr.ea_key=%llu\n",
> + bad,
> + (unsigned long long) prev,
> + (unsigned long long) el->ea_key);
> + return EXT2_ET_INVALID_ARGUMENT;
> }
> + prev = el->ea_key;
> + prev_valid = 1;
> }
> - return ret;
> +
> + return 0;
> }
>
> #define BCODE_END 0
> @@ -332,10 +246,9 @@ errcode_t ea_refcount_validate(ext2_refcount_t refcount, FILE *out)
> #define BCODE_FETCH 6
> #define BCODE_VALIDATE 7
> #define BCODE_LIST 8
> -#define BCODE_COLLAPSE 9
>
> int bcode_program[] = {
> - BCODE_CREATE, 5,
> + BCODE_CREATE,
> BCODE_STORE, 3, 3,
> BCODE_STORE, 4, 4,
> BCODE_STORE, 1, 1,
> @@ -362,7 +275,6 @@ int bcode_program[] = {
> BCODE_FETCH, 30,
> BCODE_DECR, 2,
> BCODE_DECR, 2,
> - BCODE_COLLAPSE,
> BCODE_LIST,
> BCODE_VALIDATE,
> BCODE_FREE,
> @@ -373,7 +285,6 @@ int main(int argc, char **argv)
> {
> int i = 0;
> ext2_refcount_t refcount;
> - size_t size;
> ea_key_t ea_key;
> ea_value_t arg;
> errcode_t retval;
> @@ -383,15 +294,13 @@ int main(int argc, char **argv)
> case BCODE_END:
> exit(0);
> case BCODE_CREATE:
> - size = bcode_program[i++];
> - retval = ea_refcount_create(size, &refcount);
> + retval = ea_refcount_create(&refcount);
> if (retval) {
> com_err("ea_refcount_create", retval,
> - "while creating size %zu", size);
> + "while creating refcount");
> exit(1);
> } else
> - printf("Creating refcount with size %zu\n",
> - size);
> + printf("Creating refcount\n");
> break;
> case BCODE_FREE:
> ea_refcount_free(refcount);
> @@ -465,9 +374,6 @@ int main(int argc, char **argv)
> (unsigned long long) arg);
> }
> break;
> - case BCODE_COLLAPSE:
> - refcount_collapse(refcount);
> - break;
> }
>
> }
> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
> index a341c72ac..27364cd73 100644
> --- a/e2fsck/pass1.c
> +++ b/e2fsck/pass1.c
> @@ -398,8 +398,7 @@ static void inc_ea_inode_refs(e2fsck_t ctx, struct problem_context *pctx,
> if (!entry->e_value_inum)
> goto next;
> if (!ctx->ea_inode_refs) {
> - pctx->errcode = ea_refcount_create(0,
> - &ctx->ea_inode_refs);
> + pctx->errcode = ea_refcount_create(&ctx->ea_inode_refs);
> if (pctx->errcode) {
> pctx->num = 4;
> fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
> @@ -2475,7 +2474,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
>
> /* Create the EA refcount structure if necessary */
> if (!ctx->refcount) {
> - pctx->errcode = ea_refcount_create(0, &ctx->refcount);
> + pctx->errcode = ea_refcount_create(&ctx->refcount);
> if (pctx->errcode) {
> pctx->num = 1;
> fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
> @@ -2509,8 +2508,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
> return 1;
> /* Ooops, this EA was referenced more than it stated */
> if (!ctx->refcount_extra) {
> - pctx->errcode = ea_refcount_create(0,
> - &ctx->refcount_extra);
> + pctx->errcode = ea_refcount_create(&ctx->refcount_extra);
> if (pctx->errcode) {
> pctx->num = 2;
> fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
> @@ -2651,8 +2649,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
>
> if (quota_blocks != EXT2FS_C2B(fs, 1U)) {
> if (!ctx->ea_block_quota_blocks) {
> - pctx->errcode = ea_refcount_create(0,
> - &ctx->ea_block_quota_blocks);
> + pctx->errcode = ea_refcount_create(&ctx->ea_block_quota_blocks);
> if (pctx->errcode) {
> pctx->num = 3;
> goto refcount_fail;
> @@ -2664,8 +2661,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
>
> if (quota_inodes) {
> if (!ctx->ea_block_quota_inodes) {
> - pctx->errcode = ea_refcount_create(0,
> - &ctx->ea_block_quota_inodes);
> + pctx->errcode = ea_refcount_create(&ctx->ea_block_quota_inodes);
> if (pctx->errcode) {
> pctx->num = 4;
> refcount_fail:
> --
> 2.41.0
>


Cheers, Andreas






Attachments:
signature.asc (890.00 B)
Message signed with OpenPGP