LinuxLists.cc - [PATCH] maple_tree: Remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc

2023-01-05 17:04:17

Subject: [PATCH] maple_tree: Remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk()

Preallocations are common in the VMA code to avoid allocating under
certain locking conditions. The preallocations must also cover the
worst-case scenario. Removing the GFP_ZERO flag from the
kmem_cache_alloc() (and bulk variant) calls will reduce the amount of
time spent zeroing memory that may not be used. Only zero out the
necessary area to keep track of the allocations in the maple state.
Zero the entire node prior to using it in the tree.

This required internal changes to node counting on allocation, so the
test code is also updated.

This restores some micro-benchmark performance:
up to +9% in mmtests mmap1 by my testing
+10% to +20% in mmap, mmapaddr, mmapmany tests reported by Red Hat

Link: https://bugzilla.redhat.com/show_bug.cgi?id=2149636
Reported-by: Jirka Hladky <[email protected]>
Suggested-by: Matthew Wilcox (Oracle) <[email protected]>
Signed-off-by: Liam Howlett <[email protected]>
---
lib/maple_tree.c | 80 +++++++++++++++++---------------
tools/testing/radix-tree/maple.c | 18 +++----
2 files changed, 52 insertions(+), 46 deletions(-)

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 26e2045d3cda..82a8121fe49b 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -149,13 +149,12 @@ struct maple_subtree_state {
/* Functions */
static inline struct maple_node *mt_alloc_one(gfp_t gfp)
{
- return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO);
+ return kmem_cache_alloc(maple_node_cache, gfp);
}

static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
{
- return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size,
- nodes);
+ return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
}

static inline void mt_free_bulk(size_t size, void __rcu **nodes)
@@ -1127,9 +1126,10 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas)
{
struct maple_alloc *ret, *node = mas->alloc;
unsigned long total = mas_allocated(mas);
+ unsigned int req = mas_alloc_req(mas);

/* nothing or a request pending. */
- if (unlikely(!total))
+ if (WARN_ON(!total))
return NULL;

if (total == 1) {
@@ -1139,27 +1139,25 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas)
goto single_node;
}

- if (!node->node_count) {
+ if (node->node_count == 1) {
/* Single allocation in this node. */
mas->alloc = node->slot[0];
- node->slot[0] = NULL;
mas->alloc->total = node->total - 1;
ret = node;
goto new_head;
}
-
node->total--;
- ret = node->slot[node->node_count];
- node->slot[node->node_count--] = NULL;
+ ret = node->slot[--node->node_count];
+ node->slot[node->node_count] = NULL;

single_node:
new_head:
- ret->total = 0;
- ret->node_count = 0;
- if (ret->request_count) {
- mas_set_alloc_req(mas, ret->request_count + 1);
- ret->request_count = 0;
+ if (req) {
+ req++;
+ mas_set_alloc_req(mas, req);
}
+
+ memset(ret, 0, sizeof(*ret));
return (struct maple_node *)ret;
}

@@ -1178,21 +1176,20 @@ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
unsigned long count;
unsigned int requested = mas_alloc_req(mas);

- memset(reuse, 0, sizeof(*reuse));
count = mas_allocated(mas);

- if (count && (head->node_count < MAPLE_ALLOC_SLOTS - 1)) {
- if (head->slot[0])
- head->node_count++;
- head->slot[head->node_count] = reuse;
+ reuse->request_count = 0;
+ reuse->node_count = 0;
+ if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) {
+ head->slot[head->node_count++] = reuse;
head->total++;
goto done;
}

reuse->total = 1;
if ((head) && !((unsigned long)head & 0x1)) {
- head->request_count = 0;
reuse->slot[0] = head;
+ reuse->node_count = 1;
reuse->total += head->total;
}

@@ -1211,7 +1208,6 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
{
struct maple_alloc *node;
unsigned long allocated = mas_allocated(mas);
- unsigned long success = allocated;
unsigned int requested = mas_alloc_req(mas);
unsigned int count;
void **slots = NULL;
@@ -1227,24 +1223,29 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
WARN_ON(!allocated);
}

- if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS - 1) {
+ if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
node = (struct maple_alloc *)mt_alloc_one(gfp);
if (!node)
goto nomem_one;

- if (allocated)
+ if (allocated) {
node->slot[0] = mas->alloc;
+ node->node_count = 1;
+ } else {
+ node->node_count = 0;
+ }

- success++;
mas->alloc = node;
+ node->total = ++allocated;
requested--;
}

node = mas->alloc;
+ node->request_count = 0;
while (requested) {
max_req = MAPLE_ALLOC_SLOTS;
- if (node->slot[0]) {
- unsigned int offset = node->node_count + 1;
+ if (node->node_count) {
+ unsigned int offset = node->node_count;

slots = (void **)&node->slot[offset];
max_req -= offset;
@@ -1258,15 +1259,13 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
goto nomem_bulk;

node->node_count += count;
- /* zero indexed. */
- if (slots == (void **)&node->slot)
- node->node_count--;
-
- success += count;
+ allocated += count;
node = node->slot[0];
+ node->node_count = 0;
+ node->request_count = 0;
requested -= count;
}
- mas->alloc->total = success;
+ mas->alloc->total = allocated;
return;

nomem_bulk:
@@ -1275,7 +1274,7 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
nomem_one:
mas_set_alloc_req(mas, requested);
if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
- mas->alloc->total = success;
+ mas->alloc->total = allocated;
mas_set_err(mas, -ENOMEM);
return;

@@ -5745,6 +5744,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
void mas_destroy(struct ma_state *mas)
{
struct maple_alloc *node;
+ unsigned long total;

/*
* When using mas_for_each() to insert an expected number of elements,
@@ -5767,14 +5767,20 @@ void mas_destroy(struct ma_state *mas)
}
mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);

- while (mas->alloc && !((unsigned long)mas->alloc & 0x1)) {
+ total = mas_allocated(mas);
+ while (total) {
node = mas->alloc;
mas->alloc = node->slot[0];
- if (node->node_count > 0)
- mt_free_bulk(node->node_count,
- (void __rcu **)&node->slot[1]);
+ if (node->node_count > 1) {
+ size_t count = node->node_count - 1;
+
+ mt_free_bulk(count, (void __rcu **)&node->slot[1]);
+ total -= count;
+ }
kmem_cache_free(maple_node_cache, node);
+ total--;
}
+
mas->alloc = NULL;
}
EXPORT_SYMBOL_GPL(mas_destroy);
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 81fa7ec2e66a..1f36bc1c5d36 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -173,11 +173,11 @@ static noinline void check_new_node(struct maple_tree *mt)

if (!MAPLE_32BIT) {
if (i >= 35)
- e = i - 35;
+ e = i - 34;
else if (i >= 5)
- e = i - 5;
+ e = i - 4;
else if (i >= 2)
- e = i - 2;
+ e = i - 1;
} else {
if (i >= 4)
e = i - 4;
@@ -305,17 +305,17 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
+ MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);

mn = mas_pop_node(&mas); /* get the next node. */
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS);
- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 2);
+ MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);

mas_push_node(&mas, mn);
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
+ MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);

/* Check the limit of pop/push/pop */
mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */
@@ -323,14 +323,14 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
MT_BUG_ON(mt, mas_alloc_req(&mas));
- MT_BUG_ON(mt, mas.alloc->node_count);
+ MT_BUG_ON(mt, mas.alloc->node_count != 1);
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
- MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
+ MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
mas_push_node(&mas, mn);
- MT_BUG_ON(mt, mas.alloc->node_count);
+ MT_BUG_ON(mt, mas.alloc->node_count != 1);
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
--
2.35.1

2023-01-06 07:31:16

by Mike Rapoport

[permalink] [raw]

Subject: Re: [PATCH] maple_tree: Remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk()

On Thu, Jan 05, 2023 at 04:05:34PM +0000, Liam Howlett wrote:
> Preallocations are common in the VMA code to avoid allocating under
> certain locking conditions. The preallocations must also cover the
> worst-case scenario. Removing the GFP_ZERO flag from the
> kmem_cache_alloc() (and bulk variant) calls will reduce the amount of
> time spent zeroing memory that may not be used. Only zero out the
> necessary area to keep track of the allocations in the maple state.
> Zero the entire node prior to using it in the tree.
>
> This required internal changes to node counting on allocation, so the
> test code is also updated.
>
> This restores some micro-benchmark performance:
> up to +9% in mmtests mmap1 by my testing
> +10% to +20% in mmap, mmapaddr, mmapmany tests reported by Red Hat
>
> Link: https://bugzilla.redhat.com/show_bug.cgi?id=2149636
> Reported-by: Jirka Hladky <[email protected]>
> Suggested-by: Matthew Wilcox (Oracle) <[email protected]>
> Signed-off-by: Liam Howlett <[email protected]>
> ---
> lib/maple_tree.c | 80 +++++++++++++++++---------------
> tools/testing/radix-tree/maple.c | 18 +++----
> 2 files changed, 52 insertions(+), 46 deletions(-)
>
> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> index 26e2045d3cda..82a8121fe49b 100644
> --- a/lib/maple_tree.c
> +++ b/lib/maple_tree.c
> @@ -149,13 +149,12 @@ struct maple_subtree_state {
> /* Functions */
> static inline struct maple_node *mt_alloc_one(gfp_t gfp)
> {
> - return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO);
> + return kmem_cache_alloc(maple_node_cache, gfp);
> }
>
> static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
> {
> - return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size,
> - nodes);
> + return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
> }
>
> static inline void mt_free_bulk(size_t size, void __rcu **nodes)
> @@ -1127,9 +1126,10 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas)
> {
> struct maple_alloc *ret, *node = mas->alloc;
> unsigned long total = mas_allocated(mas);
> + unsigned int req = mas_alloc_req(mas);
>
> /* nothing or a request pending. */
> - if (unlikely(!total))
> + if (WARN_ON(!total))

Hmm, isn't WARN_ON() here too much?

> return NULL;
>
> if (total == 1) {
> @@ -1139,27 +1139,25 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas)
> goto single_node;
> }
>
> - if (!node->node_count) {
> + if (node->node_count == 1) {
> /* Single allocation in this node. */
> mas->alloc = node->slot[0];
> - node->slot[0] = NULL;
> mas->alloc->total = node->total - 1;
> ret = node;
> goto new_head;
> }
> -
> node->total--;
> - ret = node->slot[node->node_count];
> - node->slot[node->node_count--] = NULL;
> + ret = node->slot[--node->node_count];
> + node->slot[node->node_count] = NULL;
>
> single_node:
> new_head:
> - ret->total = 0;
> - ret->node_count = 0;
> - if (ret->request_count) {
> - mas_set_alloc_req(mas, ret->request_count + 1);
> - ret->request_count = 0;
> + if (req) {
> + req++;
> + mas_set_alloc_req(mas, req);
> }
> +
> + memset(ret, 0, sizeof(*ret));
> return (struct maple_node *)ret;
> }
>
> @@ -1178,21 +1176,20 @@ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
> unsigned long count;
> unsigned int requested = mas_alloc_req(mas);
>
> - memset(reuse, 0, sizeof(*reuse));
> count = mas_allocated(mas);
>
> - if (count && (head->node_count < MAPLE_ALLOC_SLOTS - 1)) {
> - if (head->slot[0])
> - head->node_count++;
> - head->slot[head->node_count] = reuse;
> + reuse->request_count = 0;
> + reuse->node_count = 0;
> + if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) {
> + head->slot[head->node_count++] = reuse;
> head->total++;
> goto done;
> }
>
> reuse->total = 1;
> if ((head) && !((unsigned long)head & 0x1)) {
> - head->request_count = 0;
> reuse->slot[0] = head;
> + reuse->node_count = 1;
> reuse->total += head->total;
> }
>
> @@ -1211,7 +1208,6 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
> {
> struct maple_alloc *node;
> unsigned long allocated = mas_allocated(mas);
> - unsigned long success = allocated;
> unsigned int requested = mas_alloc_req(mas);
> unsigned int count;
> void **slots = NULL;
> @@ -1227,24 +1223,29 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
> WARN_ON(!allocated);
> }
>
> - if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS - 1) {
> + if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
> node = (struct maple_alloc *)mt_alloc_one(gfp);
> if (!node)
> goto nomem_one;
>
> - if (allocated)
> + if (allocated) {
> node->slot[0] = mas->alloc;
> + node->node_count = 1;
> + } else {
> + node->node_count = 0;
> + }
>
> - success++;
> mas->alloc = node;
> + node->total = ++allocated;
> requested--;
> }
>
> node = mas->alloc;
> + node->request_count = 0;
> while (requested) {
> max_req = MAPLE_ALLOC_SLOTS;
> - if (node->slot[0]) {
> - unsigned int offset = node->node_count + 1;
> + if (node->node_count) {
> + unsigned int offset = node->node_count;
>
> slots = (void **)&node->slot[offset];
> max_req -= offset;
> @@ -1258,15 +1259,13 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
> goto nomem_bulk;
>
> node->node_count += count;
> - /* zero indexed. */
> - if (slots == (void **)&node->slot)
> - node->node_count--;
> -
> - success += count;
> + allocated += count;
> node = node->slot[0];
> + node->node_count = 0;
> + node->request_count = 0;
> requested -= count;
> }
> - mas->alloc->total = success;
> + mas->alloc->total = allocated;
> return;
>
> nomem_bulk:
> @@ -1275,7 +1274,7 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
> nomem_one:
> mas_set_alloc_req(mas, requested);
> if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
> - mas->alloc->total = success;
> + mas->alloc->total = allocated;
> mas_set_err(mas, -ENOMEM);
> return;
>
> @@ -5745,6 +5744,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
> void mas_destroy(struct ma_state *mas)
> {
> struct maple_alloc *node;
> + unsigned long total;
>
> /*
> * When using mas_for_each() to insert an expected number of elements,
> @@ -5767,14 +5767,20 @@ void mas_destroy(struct ma_state *mas)
> }
> mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
>
> - while (mas->alloc && !((unsigned long)mas->alloc & 0x1)) {
> + total = mas_allocated(mas);
> + while (total) {
> node = mas->alloc;
> mas->alloc = node->slot[0];
> - if (node->node_count > 0)
> - mt_free_bulk(node->node_count,
> - (void __rcu **)&node->slot[1]);
> + if (node->node_count > 1) {
> + size_t count = node->node_count - 1;
> +
> + mt_free_bulk(count, (void __rcu **)&node->slot[1]);
> + total -= count;
> + }
> kmem_cache_free(maple_node_cache, node);
> + total--;
> }
> +
> mas->alloc = NULL;
> }
> EXPORT_SYMBOL_GPL(mas_destroy);
> diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
> index 81fa7ec2e66a..1f36bc1c5d36 100644
> --- a/tools/testing/radix-tree/maple.c
> +++ b/tools/testing/radix-tree/maple.c
> @@ -173,11 +173,11 @@ static noinline void check_new_node(struct maple_tree *mt)
>
> if (!MAPLE_32BIT) {
> if (i >= 35)
> - e = i - 35;
> + e = i - 34;
> else if (i >= 5)
> - e = i - 5;
> + e = i - 4;
> else if (i >= 2)
> - e = i - 2;
> + e = i - 1;
> } else {
> if (i >= 4)
> e = i - 4;
> @@ -305,17 +305,17 @@ static noinline void check_new_node(struct maple_tree *mt)
> MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
> MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
> MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
> - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
> + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
>
> mn = mas_pop_node(&mas); /* get the next node. */
> MT_BUG_ON(mt, mn == NULL);
> MT_BUG_ON(mt, not_empty(mn));
> MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS);
> - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 2);
> + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
>
> mas_push_node(&mas, mn);
> MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
> - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
> + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
>
> /* Check the limit of pop/push/pop */
> mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */
> @@ -323,14 +323,14 @@ static noinline void check_new_node(struct maple_tree *mt)
> MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
> MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
> MT_BUG_ON(mt, mas_alloc_req(&mas));
> - MT_BUG_ON(mt, mas.alloc->node_count);
> + MT_BUG_ON(mt, mas.alloc->node_count != 1);
> MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
> mn = mas_pop_node(&mas);
> MT_BUG_ON(mt, not_empty(mn));
> MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
> - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
> + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
> mas_push_node(&mas, mn);
> - MT_BUG_ON(mt, mas.alloc->node_count);
> + MT_BUG_ON(mt, mas.alloc->node_count != 1);
> MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
> mn = mas_pop_node(&mas);
> MT_BUG_ON(mt, not_empty(mn));
> --
> 2.35.1
>

--
Sincerely yours,
Mike.

2023-01-06 19:05:09

by Liam R. Howlett

[permalink] [raw]

Subject: Re: [PATCH] maple_tree: Remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk()

* Mike Rapoport <[email protected]> [230106 02:28]:
> On Thu, Jan 05, 2023 at 04:05:34PM +0000, Liam Howlett wrote:
> > Preallocations are common in the VMA code to avoid allocating under
> > certain locking conditions. The preallocations must also cover the
> > worst-case scenario. Removing the GFP_ZERO flag from the
> > kmem_cache_alloc() (and bulk variant) calls will reduce the amount of
> > time spent zeroing memory that may not be used. Only zero out the
> > necessary area to keep track of the allocations in the maple state.
> > Zero the entire node prior to using it in the tree.
> >
> > This required internal changes to node counting on allocation, so the
> > test code is also updated.
> >
> > This restores some micro-benchmark performance:
> > up to +9% in mmtests mmap1 by my testing
> > +10% to +20% in mmap, mmapaddr, mmapmany tests reported by Red Hat
> >
> > Link: https://bugzilla.redhat.com/show_bug.cgi?id=2149636
> > Reported-by: Jirka Hladky <[email protected]>
> > Suggested-by: Matthew Wilcox (Oracle) <[email protected]>
> > Signed-off-by: Liam Howlett <[email protected]>
> > ---
> > lib/maple_tree.c | 80 +++++++++++++++++---------------
> > tools/testing/radix-tree/maple.c | 18 +++----
> > 2 files changed, 52 insertions(+), 46 deletions(-)
> >
> > diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> > index 26e2045d3cda..82a8121fe49b 100644
> > --- a/lib/maple_tree.c
> > +++ b/lib/maple_tree.c
> > @@ -149,13 +149,12 @@ struct maple_subtree_state {
> > /* Functions */
> > static inline struct maple_node *mt_alloc_one(gfp_t gfp)
> > {
> > - return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO);
> > + return kmem_cache_alloc(maple_node_cache, gfp);
> > }
> >
> > static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
> > {
> > - return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size,
> > - nodes);
> > + return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
> > }
> >
> > static inline void mt_free_bulk(size_t size, void __rcu **nodes)
> > @@ -1127,9 +1126,10 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas)
> > {
> > struct maple_alloc *ret, *node = mas->alloc;
> > unsigned long total = mas_allocated(mas);
> > + unsigned int req = mas_alloc_req(mas);
> >
> > /* nothing or a request pending. */
> > - if (unlikely(!total))
> > + if (WARN_ON(!total))
>
> Hmm, isn't WARN_ON() here too much?

I don't think so. If we get to the point of asking for a node while we
don't have any to give, then it's too late. It means we (I) have
calculated the necessary nodes incorrectly and we won't have enough
memory to fit things into the tree. It should never happen.

>
> > return NULL;
> >
> > if (total == 1) {
> > @@ -1139,27 +1139,25 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas)
> > goto single_node;
> > }
> >
> > - if (!node->node_count) {
> > + if (node->node_count == 1) {
> > /* Single allocation in this node. */
> > mas->alloc = node->slot[0];
> > - node->slot[0] = NULL;
> > mas->alloc->total = node->total - 1;
> > ret = node;
> > goto new_head;
> > }
> > -
> > node->total--;
> > - ret = node->slot[node->node_count];
> > - node->slot[node->node_count--] = NULL;
> > + ret = node->slot[--node->node_count];
> > + node->slot[node->node_count] = NULL;
> >
> > single_node:
> > new_head:
> > - ret->total = 0;
> > - ret->node_count = 0;
> > - if (ret->request_count) {
> > - mas_set_alloc_req(mas, ret->request_count + 1);
> > - ret->request_count = 0;
> > + if (req) {
> > + req++;
> > + mas_set_alloc_req(mas, req);
> > }
> > +
> > + memset(ret, 0, sizeof(*ret));
> > return (struct maple_node *)ret;
> > }
> >
> > @@ -1178,21 +1176,20 @@ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
> > unsigned long count;
> > unsigned int requested = mas_alloc_req(mas);
> >
> > - memset(reuse, 0, sizeof(*reuse));
> > count = mas_allocated(mas);
> >
> > - if (count && (head->node_count < MAPLE_ALLOC_SLOTS - 1)) {
> > - if (head->slot[0])
> > - head->node_count++;
> > - head->slot[head->node_count] = reuse;
> > + reuse->request_count = 0;
> > + reuse->node_count = 0;
> > + if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) {
> > + head->slot[head->node_count++] = reuse;
> > head->total++;
> > goto done;
> > }
> >
> > reuse->total = 1;
> > if ((head) && !((unsigned long)head & 0x1)) {
> > - head->request_count = 0;
> > reuse->slot[0] = head;
> > + reuse->node_count = 1;
> > reuse->total += head->total;
> > }
> >
> > @@ -1211,7 +1208,6 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
> > {
> > struct maple_alloc *node;
> > unsigned long allocated = mas_allocated(mas);
> > - unsigned long success = allocated;
> > unsigned int requested = mas_alloc_req(mas);
> > unsigned int count;
> > void **slots = NULL;
> > @@ -1227,24 +1223,29 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
> > WARN_ON(!allocated);
> > }
> >
> > - if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS - 1) {
> > + if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
> > node = (struct maple_alloc *)mt_alloc_one(gfp);
> > if (!node)
> > goto nomem_one;
> >
> > - if (allocated)
> > + if (allocated) {
> > node->slot[0] = mas->alloc;
> > + node->node_count = 1;
> > + } else {
> > + node->node_count = 0;
> > + }
> >
> > - success++;
> > mas->alloc = node;
> > + node->total = ++allocated;
> > requested--;
> > }
> >
> > node = mas->alloc;
> > + node->request_count = 0;
> > while (requested) {
> > max_req = MAPLE_ALLOC_SLOTS;
> > - if (node->slot[0]) {
> > - unsigned int offset = node->node_count + 1;
> > + if (node->node_count) {
> > + unsigned int offset = node->node_count;
> >
> > slots = (void **)&node->slot[offset];
> > max_req -= offset;
> > @@ -1258,15 +1259,13 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
> > goto nomem_bulk;
> >
> > node->node_count += count;
> > - /* zero indexed. */
> > - if (slots == (void **)&node->slot)
> > - node->node_count--;
> > -
> > - success += count;
> > + allocated += count;
> > node = node->slot[0];
> > + node->node_count = 0;
> > + node->request_count = 0;
> > requested -= count;
> > }
> > - mas->alloc->total = success;
> > + mas->alloc->total = allocated;
> > return;
> >
> > nomem_bulk:
> > @@ -1275,7 +1274,7 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
> > nomem_one:
> > mas_set_alloc_req(mas, requested);
> > if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
> > - mas->alloc->total = success;
> > + mas->alloc->total = allocated;
> > mas_set_err(mas, -ENOMEM);
> > return;
> >
> > @@ -5745,6 +5744,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
> > void mas_destroy(struct ma_state *mas)
> > {
> > struct maple_alloc *node;
> > + unsigned long total;
> >
> > /*
> > * When using mas_for_each() to insert an expected number of elements,
> > @@ -5767,14 +5767,20 @@ void mas_destroy(struct ma_state *mas)
> > }
> > mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
> >
> > - while (mas->alloc && !((unsigned long)mas->alloc & 0x1)) {
> > + total = mas_allocated(mas);
> > + while (total) {
> > node = mas->alloc;
> > mas->alloc = node->slot[0];
> > - if (node->node_count > 0)
> > - mt_free_bulk(node->node_count,
> > - (void __rcu **)&node->slot[1]);
> > + if (node->node_count > 1) {
> > + size_t count = node->node_count - 1;
> > +
> > + mt_free_bulk(count, (void __rcu **)&node->slot[1]);
> > + total -= count;
> > + }
> > kmem_cache_free(maple_node_cache, node);
> > + total--;
> > }
> > +
> > mas->alloc = NULL;
> > }
> > EXPORT_SYMBOL_GPL(mas_destroy);
> > diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
> > index 81fa7ec2e66a..1f36bc1c5d36 100644
> > --- a/tools/testing/radix-tree/maple.c
> > +++ b/tools/testing/radix-tree/maple.c
> > @@ -173,11 +173,11 @@ static noinline void check_new_node(struct maple_tree *mt)
> >
> > if (!MAPLE_32BIT) {
> > if (i >= 35)
> > - e = i - 35;
> > + e = i - 34;
> > else if (i >= 5)
> > - e = i - 5;
> > + e = i - 4;
> > else if (i >= 2)
> > - e = i - 2;
> > + e = i - 1;
> > } else {
> > if (i >= 4)
> > e = i - 4;
> > @@ -305,17 +305,17 @@ static noinline void check_new_node(struct maple_tree *mt)
> > MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
> > MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
> > MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
> > - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
> > + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
> >
> > mn = mas_pop_node(&mas); /* get the next node. */
> > MT_BUG_ON(mt, mn == NULL);
> > MT_BUG_ON(mt, not_empty(mn));
> > MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS);
> > - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 2);
> > + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
> >
> > mas_push_node(&mas, mn);
> > MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
> > - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
> > + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
> >
> > /* Check the limit of pop/push/pop */
> > mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */
> > @@ -323,14 +323,14 @@ static noinline void check_new_node(struct maple_tree *mt)
> > MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM));
> > MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL));
> > MT_BUG_ON(mt, mas_alloc_req(&mas));
> > - MT_BUG_ON(mt, mas.alloc->node_count);
> > + MT_BUG_ON(mt, mas.alloc->node_count != 1);
> > MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
> > mn = mas_pop_node(&mas);
> > MT_BUG_ON(mt, not_empty(mn));
> > MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1);
> > - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1);
> > + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS);
> > mas_push_node(&mas, mn);
> > - MT_BUG_ON(mt, mas.alloc->node_count);
> > + MT_BUG_ON(mt, mas.alloc->node_count != 1);
> > MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
> > mn = mas_pop_node(&mas);
> > MT_BUG_ON(mt, not_empty(mn));
> > --
> > 2.35.1
> >
>
> --
> Sincerely yours,
> Mike.

2023-01-24 13:03:20

by kernel test robot

[permalink] [raw]

Subject: Re: [PATCH] maple_tree: Remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk()

Greeting,

FYI, we noticed a 3.8% improvement of will-it-scale.per_process_ops due to commit:

commit: 180c117d46be304a08b14fb080010773faf50788 ("[PATCH] maple_tree: Remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk()")
url: https://github.com/intel-lab-lkp/linux/commits/Liam-Howlett/maple_tree-Remove-GFP_ZERO-from-kmem_cache_alloc-and-kmem_cache_alloc_bulk/20230106-000849
base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git 41c03ba9beea760bd2d2ac9250b09a2e192da2dc
patch link: https://lore.kernel.org/all/[email protected]/
patch subject: [PATCH] maple_tree: Remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk()

in testcase: will-it-scale
on test machine: 104 threads 2 sockets (Skylake) with 192G memory
with following parameters:

nr_task: 16
mode: process
test: mmap1
cpufreq_governor: performance

test-description: Will It Scale takes a testcase and runs it from 1 through to n parallel copies to see if the testcase will scale. It builds both a process and threads based test in order to see any differences between the two.
test-url: https://github.com/antonblanchard/will-it-scale

Details are as below:
-------------------------------------------------------------------------------------------------->

To reproduce:

git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
sudo bin/lkp install job.yaml # job file is attached in this email
bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
sudo bin/lkp run generated-yaml-file

# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.

=========================================================================================
compiler/cpufreq_governor/kconfig/mode/nr_task/rootfs/tbox_group/test/testcase:
gcc-11/performance/x86_64-rhel-8.3/process/16/debian-11.1-x86_64-20220510.cgz/lkp-skl-fpga01/mmap1/will-it-scale

commit:
41c03ba9be ("Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost")
180c117d46 ("maple_tree: Remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk()")

41c03ba9beea760b 180c117d46be304a08b14fb0800
---------------- ---------------------------
%stddev %change %stddev
\ | \
2604393 +3.8% 2702107 will-it-scale.16.processes
162774 +3.8% 168881 will-it-scale.per_process_ops
2604393 +3.8% 2702107 will-it-scale.workload
1.254e+10 +2.6% 1.286e+10 perf-stat.i.branch-instructions
1.251e+10 +2.2% 1.279e+10 perf-stat.i.dTLB-loads
6.283e+09 +1.3% 6.367e+09 perf-stat.i.dTLB-stores
5.838e+10 +2.5% 5.987e+10 perf-stat.i.instructions
301.29 +2.2% 307.85 perf-stat.i.metric.M/sec
0.81 -2.1% 0.79 perf-stat.overall.cpi
6756492 -1.0% 6686622 perf-stat.overall.path-length
1.25e+10 +2.6% 1.282e+10 perf-stat.ps.branch-instructions
1.247e+10 +2.2% 1.275e+10 perf-stat.ps.dTLB-loads
6.263e+09 +1.3% 6.346e+09 perf-stat.ps.dTLB-stores
5.819e+10 +2.5% 5.967e+10 perf-stat.ps.instructions
1.76e+13 +2.7% 1.807e+13 perf-stat.total.instructions
2.31 ? 10% -1.0 1.29 ? 7% perf-profile.calltrace.cycles-pp.mas_preallocate.do_mas_align_munmap.__vm_munmap.__x64_sys_munmap.do_syscall_64
2.24 ? 10% -1.0 1.23 ? 7% perf-profile.calltrace.cycles-pp.mas_alloc_nodes.mas_preallocate.do_mas_align_munmap.__vm_munmap.__x64_sys_munmap
2.28 ? 11% -1.0 1.27 ? 8% perf-profile.calltrace.cycles-pp.mas_preallocate.mmap_region.do_mmap.vm_mmap_pgoff.do_syscall_64
2.24 ? 10% -1.0 1.23 ? 8% perf-profile.calltrace.cycles-pp.mas_alloc_nodes.mas_preallocate.mmap_region.do_mmap.vm_mmap_pgoff
1.52 ? 11% -0.8 0.67 ? 8% perf-profile.calltrace.cycles-pp.kmem_cache_alloc_bulk.mas_alloc_nodes.mas_preallocate.mmap_region.do_mmap
1.49 ? 11% -0.8 0.66 ? 7% perf-profile.calltrace.cycles-pp.kmem_cache_alloc_bulk.mas_alloc_nodes.mas_preallocate.do_mas_align_munmap.__vm_munmap
4.94 ? 10% -2.1 2.83 ? 8% perf-profile.children.cycles-pp.mas_alloc_nodes
4.61 ? 10% -2.0 2.57 ? 8% perf-profile.children.cycles-pp.mas_preallocate
1.96 ? 10% -1.8 0.21 ? 7% perf-profile.children.cycles-pp.memset_erms
3.09 ? 10% -1.7 1.36 ? 7% perf-profile.children.cycles-pp.kmem_cache_alloc_bulk
0.09 ? 13% -0.1 0.03 ?100% perf-profile.children.cycles-pp.vm_area_free
0.07 ? 10% +0.0 0.09 ? 11% perf-profile.children.cycles-pp.mas_wr_modify
0.22 ? 10% +0.1 0.28 ? 7% perf-profile.children.cycles-pp.__might_sleep
0.00 +0.2 0.22 ? 10% perf-profile.children.cycles-pp.mas_pop_node
1.89 ? 11% -1.7 0.21 ? 8% perf-profile.self.cycles-pp.memset_erms
0.72 ? 11% -0.3 0.37 ? 7% perf-profile.self.cycles-pp.kmem_cache_alloc_bulk
0.09 ? 13% -0.1 0.03 ?100% perf-profile.self.cycles-pp.vm_area_free
0.11 ? 7% +0.1 0.17 ? 11% perf-profile.self.cycles-pp.do_mas_munmap
0.00 +0.2 0.21 ? 9% perf-profile.self.cycles-pp.mas_pop_node

Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

Attachments:

(No filename) (5.78 kB)
config-6.2.0-rc2-00058-g180c117d46be (162.94 kB)
job-script (7.63 kB)
job.yaml (5.17 kB)
reproduce (344.00 B)
Download all attachments

2023-01-24 13:20:00

by David Hildenbrand

[permalink] [raw]

Subject: Re: [PATCH] maple_tree: Remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk()

On 06.01.23 19:36, Liam Howlett wrote:
> * Mike Rapoport <[email protected]> [230106 02:28]:
>> On Thu, Jan 05, 2023 at 04:05:34PM +0000, Liam Howlett wrote:
>>> Preallocations are common in the VMA code to avoid allocating under
>>> certain locking conditions. The preallocations must also cover the
>>> worst-case scenario. Removing the GFP_ZERO flag from the
>>> kmem_cache_alloc() (and bulk variant) calls will reduce the amount of
>>> time spent zeroing memory that may not be used. Only zero out the
>>> necessary area to keep track of the allocations in the maple state.
>>> Zero the entire node prior to using it in the tree.
>>>
>>> This required internal changes to node counting on allocation, so the
>>> test code is also updated.
>>>
>>> This restores some micro-benchmark performance:
>>> up to +9% in mmtests mmap1 by my testing
>>> +10% to +20% in mmap, mmapaddr, mmapmany tests reported by Red Hat
>>>
>>> Link: https://bugzilla.redhat.com/show_bug.cgi?id=2149636
>>> Reported-by: Jirka Hladky <[email protected]>
>>> Suggested-by: Matthew Wilcox (Oracle) <[email protected]>
>>> Signed-off-by: Liam Howlett <[email protected]>
>>> ---
>>> lib/maple_tree.c | 80 +++++++++++++++++---------------
>>> tools/testing/radix-tree/maple.c | 18 +++----
>>> 2 files changed, 52 insertions(+), 46 deletions(-)
>>>
>>> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
>>> index 26e2045d3cda..82a8121fe49b 100644
>>> --- a/lib/maple_tree.c
>>> +++ b/lib/maple_tree.c
>>> @@ -149,13 +149,12 @@ struct maple_subtree_state {
>>> /* Functions */
>>> static inline struct maple_node *mt_alloc_one(gfp_t gfp)
>>> {
>>> - return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO);
>>> + return kmem_cache_alloc(maple_node_cache, gfp);
>>> }
>>>
>>> static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
>>> {
>>> - return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size,
>>> - nodes);
>>> + return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
>>> }
>>>
>>> static inline void mt_free_bulk(size_t size, void __rcu **nodes)
>>> @@ -1127,9 +1126,10 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas)
>>> {
>>> struct maple_alloc *ret, *node = mas->alloc;
>>> unsigned long total = mas_allocated(mas);
>>> + unsigned int req = mas_alloc_req(mas);
>>>
>>> /* nothing or a request pending. */
>>> - if (unlikely(!total))
>>> + if (WARN_ON(!total))
>>
>> Hmm, isn't WARN_ON() here too much?
>
> I don't think so. If we get to the point of asking for a node while we
> don't have any to give, then it's too late. It means we (I) have
> calculated the necessary nodes incorrectly and we won't have enough
> memory to fit things into the tree. It should never happen.

Either way, the suggestion is to use WARN_ON_ONCE() instead of WARN_ON().

... now documented in Documentation/process/coding-style.rst

--
Thanks,

David / dhildenb

2023-01-24 15:23:46

by Liam R. Howlett

[permalink] [raw]

Subject: Re: [PATCH] maple_tree: Remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk()

* David Hildenbrand <[email protected]> [230124 08:18]:
> On 06.01.23 19:36, Liam Howlett wrote:
> > * Mike Rapoport <[email protected]> [230106 02:28]:
> > > On Thu, Jan 05, 2023 at 04:05:34PM +0000, Liam Howlett wrote:
> > > > Preallocations are common in the VMA code to avoid allocating under
> > > > certain locking conditions. The preallocations must also cover the
> > > > worst-case scenario. Removing the GFP_ZERO flag from the
> > > > kmem_cache_alloc() (and bulk variant) calls will reduce the amount of
> > > > time spent zeroing memory that may not be used. Only zero out the
> > > > necessary area to keep track of the allocations in the maple state.
> > > > Zero the entire node prior to using it in the tree.
> > > >
> > > > This required internal changes to node counting on allocation, so the
> > > > test code is also updated.
> > > >
> > > > This restores some micro-benchmark performance:
> > > > up to +9% in mmtests mmap1 by my testing
> > > > +10% to +20% in mmap, mmapaddr, mmapmany tests reported by Red Hat
> > > >
> > > > Link: https://bugzilla.redhat.com/show_bug.cgi?id=2149636
> > > > Reported-by: Jirka Hladky <[email protected]>
> > > > Suggested-by: Matthew Wilcox (Oracle) <[email protected]>
> > > > Signed-off-by: Liam Howlett <[email protected]>
> > > > ---
> > > > lib/maple_tree.c | 80 +++++++++++++++++---------------
> > > > tools/testing/radix-tree/maple.c | 18 +++----
> > > > 2 files changed, 52 insertions(+), 46 deletions(-)
> > > >
> > > > diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> > > > index 26e2045d3cda..82a8121fe49b 100644
> > > > --- a/lib/maple_tree.c
> > > > +++ b/lib/maple_tree.c
> > > > @@ -149,13 +149,12 @@ struct maple_subtree_state {
> > > > /* Functions */
> > > > static inline struct maple_node *mt_alloc_one(gfp_t gfp)
> > > > {
> > > > - return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO);
> > > > + return kmem_cache_alloc(maple_node_cache, gfp);
> > > > }
> > > > static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
> > > > {
> > > > - return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size,
> > > > - nodes);
> > > > + return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
> > > > }
> > > > static inline void mt_free_bulk(size_t size, void __rcu **nodes)
> > > > @@ -1127,9 +1126,10 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas)
> > > > {
> > > > struct maple_alloc *ret, *node = mas->alloc;
> > > > unsigned long total = mas_allocated(mas);
> > > > + unsigned int req = mas_alloc_req(mas);
> > > > /* nothing or a request pending. */
> > > > - if (unlikely(!total))
> > > > + if (WARN_ON(!total))
> > >
> > > Hmm, isn't WARN_ON() here too much?
> >
> > I don't think so. If we get to the point of asking for a node while we
> > don't have any to give, then it's too late. It means we (I) have
> > calculated the necessary nodes incorrectly and we won't have enough
> > memory to fit things into the tree. It should never happen.
>
> Either way, the suggestion is to use WARN_ON_ONCE() instead of WARN_ON().
>
> ... now documented in Documentation/process/coding-style.rst
>

Thanks, this is good information. I was actually debating a BUG_ON()
but it has been documented to that WARN_ON() is better, but
WARN_ON_ONCE() is best.

I would like the stack trace for any code path that happens to cause it
even if there was a previous code path that has already triggered the
backtrace.

The memory should have all been obtained or entered a previous error
path so an OOM issue will not cause a cascade of flooding backtraces.

This would also result in a null pointer dereference today, so I think
it's safe to say it's rare.

Thanks,
Liam