2022-11-29 11:20:46

by xinhui pan

[permalink] [raw]
Subject: [PATCH v4] drm: Optimise for continuous memory allocation

Currently drm-buddy does not have full knowledge of continuous memory.

Lets consider scenario below.
order 1: L R
order 0: LL LR RL RR
for order 1 allocation, it can offer L or R or LR+RL.

For now, we only implement L or R case for continuous memory allocation.
So this patch aims to implement the rest cases.

Adding a new member leaf_link which links all leaf blocks in asceding
order. Now we can find more than 2 sub-order blocks easier.
Say, order 4 can be combined with corresponding order 4, 2+2, 1+2+1,
0+1+2+0, 0+2+1+0.

Signed-off-by: xinhui pan <[email protected]>
---
change from v3:
reworked totally. adding leaf_link.

change from v2:
search continuous block in nearby root if needed

change from v1:
implement top-down continuous allocation
---
drivers/gpu/drm/drm_buddy.c | 108 +++++++++++++++++++++++++++++++++---
include/drm/drm_buddy.h | 1 +
2 files changed, 102 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..8edafb99b02c 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -80,6 +80,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
{
unsigned int i;
u64 offset;
+ LIST_HEAD(leaf);

if (size < chunk_size)
return -EINVAL;
@@ -136,6 +137,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
goto out_free_roots;

mark_free(mm, root);
+ list_add_tail(&root->leaf_link, &leaf);

BUG_ON(i > mm->max_order);
BUG_ON(drm_buddy_block_size(mm, root) < chunk_size);
@@ -147,6 +149,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
i++;
} while (size);

+ list_del(&leaf);
return 0;

out_free_roots:
@@ -205,6 +208,9 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);

+ list_add(&block->right->leaf_link, &block->leaf_link);
+ list_add(&block->left->leaf_link, &block->leaf_link);
+ list_del(&block->leaf_link);
mark_split(block);

return 0;
@@ -256,6 +262,9 @@ static void __drm_buddy_free(struct drm_buddy *mm,
break;

list_del(&buddy->link);
+ list_add(&parent->leaf_link, &block->leaf_link);
+ list_del(&buddy->leaf_link);
+ list_del(&block->leaf_link);

drm_block_free(mm, block);
drm_block_free(mm, buddy);
@@ -386,6 +395,78 @@ alloc_range_bias(struct drm_buddy *mm,
return ERR_PTR(err);
}

+static struct drm_buddy_block *
+find_continuous_blocks(struct drm_buddy *mm,
+ int order,
+ unsigned long flags,
+ struct drm_buddy_block **rblock)
+{
+ struct list_head *head = &mm->free_list[order];
+ struct drm_buddy_block *free_block, *max_block = NULL, *end, *begin;
+ u64 pages = BIT(order + 1);
+ u64 cur_pages;
+
+ list_for_each_entry(free_block, head, link) {
+ if (max_block) {
+ if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION))
+ break;
+
+ if (drm_buddy_block_offset(free_block) <
+ drm_buddy_block_offset(max_block))
+ continue;
+ }
+
+ cur_pages = BIT(order);
+ begin = end = free_block;
+ while (true) {
+ struct drm_buddy_block *prev, *next;
+ int prev_order, next_order;
+
+ prev = list_prev_entry(begin, leaf_link);
+ if (!drm_buddy_block_is_free(prev) ||
+ drm_buddy_block_offset(prev) >
+ drm_buddy_block_offset(begin)) {
+ prev = NULL;
+ }
+ next = list_next_entry(end, leaf_link);
+ if (!drm_buddy_block_is_free(next) ||
+ drm_buddy_block_offset(next) <
+ drm_buddy_block_offset(end)) {
+ next = NULL;
+ }
+ if (!prev && !next)
+ break;
+
+ prev_order = prev ? drm_buddy_block_order(prev) : -1;
+ next_order = next ? drm_buddy_block_order(next) : -1;
+ if (next_order >= prev_order) {
+ BUG_ON(drm_buddy_block_offset(end) +
+ drm_buddy_block_size(mm, end) !=
+ drm_buddy_block_offset(next));
+ end = next;
+ cur_pages += BIT(drm_buddy_block_order(next));
+ }
+ if (prev_order >= next_order) {
+ BUG_ON(drm_buddy_block_offset(prev) +
+ drm_buddy_block_size(mm, prev) !=
+ drm_buddy_block_offset(begin));
+ begin = prev;
+ cur_pages += BIT(drm_buddy_block_order(prev));
+ }
+ if (pages == cur_pages)
+ break;
+ BUG_ON(pages < cur_pages);
+ }
+
+ if (pages > cur_pages)
+ continue;
+
+ *rblock = end;
+ max_block = begin;
+ }
+ return max_block;
+}
+
static struct drm_buddy_block *
get_maxblock(struct list_head *head)
{
@@ -637,7 +718,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
struct list_head *blocks,
unsigned long flags)
{
- struct drm_buddy_block *block = NULL;
+ struct drm_buddy_block *block = NULL, *rblock = NULL;
unsigned int min_order, order;
unsigned long pages;
LIST_HEAD(allocated);
@@ -689,17 +770,30 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
break;

if (order-- == min_order) {
+ if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
+ min_order != 0 && pages == BIT(order + 1)) {
+ block = find_continuous_blocks(mm,
+ order,
+ flags,
+ &rblock);
+ if (block)
+ break;
+ }
err = -ENOSPC;
goto err_free;
}
} while (1);

- mark_allocated(block);
- mm->avail -= drm_buddy_block_size(mm, block);
- kmemleak_update_trace(block);
- list_add_tail(&block->link, &allocated);
-
- pages -= BIT(order);
+ do {
+ mark_allocated(block);
+ mm->avail -= drm_buddy_block_size(mm, block);
+ kmemleak_update_trace(block);
+ list_add_tail(&block->link, &allocated);
+ pages -= BIT(drm_buddy_block_order(block));
+ if (block == rblock || !rblock)
+ break;
+ block = list_next_entry(block, leaf_link);
+ } while (true);

if (!pages)
break;
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 572077ff8ae7..c5437bd4f4f3 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -50,6 +50,7 @@ struct drm_buddy_block {
*/
struct list_head link;
struct list_head tmp_link;
+ struct list_head leaf_link;
};

/* Order-zero must be at least PAGE_SIZE */
--
2.34.1


2022-11-29 12:04:15

by Christian König

[permalink] [raw]
Subject: Re: [PATCH v4] drm: Optimise for continuous memory allocation

Am 29.11.22 um 11:56 schrieb xinhui pan:
> Currently drm-buddy does not have full knowledge of continuous memory.
>
> Lets consider scenario below.
> order 1: L R
> order 0: LL LR RL RR
> for order 1 allocation, it can offer L or R or LR+RL.
>
> For now, we only implement L or R case for continuous memory allocation.
> So this patch aims to implement the rest cases.
>
> Adding a new member leaf_link which links all leaf blocks in asceding
> order. Now we can find more than 2 sub-order blocks easier.
> Say, order 4 can be combined with corresponding order 4, 2+2, 1+2+1,
> 0+1+2+0, 0+2+1+0.

Well that description is a bit confusing and doesn't make to much sense
to me.

When you have two adjacent free order 0 blocks then those should be
automatically combined into an order 1. This is a fundamental property
of the buddy allocator, otherwise the whole algorithm won't work.

When you have the case of a free order 1 block with two adjacent free
order 0 blocks then we a fragmented address space. In this case the best
approach is to fail the allocation and start to swap things out.

So what exactly is the goal here?

Regards,
Christian.

>
> Signed-off-by: xinhui pan <[email protected]>
> ---
> change from v3:
> reworked totally. adding leaf_link.
>
> change from v2:
> search continuous block in nearby root if needed
>
> change from v1:
> implement top-down continuous allocation
> ---
> drivers/gpu/drm/drm_buddy.c | 108 +++++++++++++++++++++++++++++++++---
> include/drm/drm_buddy.h | 1 +
> 2 files changed, 102 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 11bb59399471..8edafb99b02c 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -80,6 +80,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
> {
> unsigned int i;
> u64 offset;
> + LIST_HEAD(leaf);
>
> if (size < chunk_size)
> return -EINVAL;
> @@ -136,6 +137,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
> goto out_free_roots;
>
> mark_free(mm, root);
> + list_add_tail(&root->leaf_link, &leaf);
>
> BUG_ON(i > mm->max_order);
> BUG_ON(drm_buddy_block_size(mm, root) < chunk_size);
> @@ -147,6 +149,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
> i++;
> } while (size);
>
> + list_del(&leaf);
> return 0;
>
> out_free_roots:
> @@ -205,6 +208,9 @@ static int split_block(struct drm_buddy *mm,
> mark_free(mm, block->left);
> mark_free(mm, block->right);
>
> + list_add(&block->right->leaf_link, &block->leaf_link);
> + list_add(&block->left->leaf_link, &block->leaf_link);
> + list_del(&block->leaf_link);
> mark_split(block);
>
> return 0;
> @@ -256,6 +262,9 @@ static void __drm_buddy_free(struct drm_buddy *mm,
> break;
>
> list_del(&buddy->link);
> + list_add(&parent->leaf_link, &block->leaf_link);
> + list_del(&buddy->leaf_link);
> + list_del(&block->leaf_link);
>
> drm_block_free(mm, block);
> drm_block_free(mm, buddy);
> @@ -386,6 +395,78 @@ alloc_range_bias(struct drm_buddy *mm,
> return ERR_PTR(err);
> }
>
> +static struct drm_buddy_block *
> +find_continuous_blocks(struct drm_buddy *mm,
> + int order,
> + unsigned long flags,
> + struct drm_buddy_block **rblock)
> +{
> + struct list_head *head = &mm->free_list[order];
> + struct drm_buddy_block *free_block, *max_block = NULL, *end, *begin;
> + u64 pages = BIT(order + 1);
> + u64 cur_pages;
> +
> + list_for_each_entry(free_block, head, link) {
> + if (max_block) {
> + if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION))
> + break;
> +
> + if (drm_buddy_block_offset(free_block) <
> + drm_buddy_block_offset(max_block))
> + continue;
> + }
> +
> + cur_pages = BIT(order);
> + begin = end = free_block;
> + while (true) {
> + struct drm_buddy_block *prev, *next;
> + int prev_order, next_order;
> +
> + prev = list_prev_entry(begin, leaf_link);
> + if (!drm_buddy_block_is_free(prev) ||
> + drm_buddy_block_offset(prev) >
> + drm_buddy_block_offset(begin)) {
> + prev = NULL;
> + }
> + next = list_next_entry(end, leaf_link);
> + if (!drm_buddy_block_is_free(next) ||
> + drm_buddy_block_offset(next) <
> + drm_buddy_block_offset(end)) {
> + next = NULL;
> + }
> + if (!prev && !next)
> + break;
> +
> + prev_order = prev ? drm_buddy_block_order(prev) : -1;
> + next_order = next ? drm_buddy_block_order(next) : -1;
> + if (next_order >= prev_order) {
> + BUG_ON(drm_buddy_block_offset(end) +
> + drm_buddy_block_size(mm, end) !=
> + drm_buddy_block_offset(next));
> + end = next;
> + cur_pages += BIT(drm_buddy_block_order(next));
> + }
> + if (prev_order >= next_order) {
> + BUG_ON(drm_buddy_block_offset(prev) +
> + drm_buddy_block_size(mm, prev) !=
> + drm_buddy_block_offset(begin));
> + begin = prev;
> + cur_pages += BIT(drm_buddy_block_order(prev));
> + }
> + if (pages == cur_pages)
> + break;
> + BUG_ON(pages < cur_pages);
> + }
> +
> + if (pages > cur_pages)
> + continue;
> +
> + *rblock = end;
> + max_block = begin;
> + }
> + return max_block;
> +}
> +
> static struct drm_buddy_block *
> get_maxblock(struct list_head *head)
> {
> @@ -637,7 +718,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> struct list_head *blocks,
> unsigned long flags)
> {
> - struct drm_buddy_block *block = NULL;
> + struct drm_buddy_block *block = NULL, *rblock = NULL;
> unsigned int min_order, order;
> unsigned long pages;
> LIST_HEAD(allocated);
> @@ -689,17 +770,30 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> break;
>
> if (order-- == min_order) {
> + if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
> + min_order != 0 && pages == BIT(order + 1)) {
> + block = find_continuous_blocks(mm,
> + order,
> + flags,
> + &rblock);
> + if (block)
> + break;
> + }
> err = -ENOSPC;
> goto err_free;
> }
> } while (1);
>
> - mark_allocated(block);
> - mm->avail -= drm_buddy_block_size(mm, block);
> - kmemleak_update_trace(block);
> - list_add_tail(&block->link, &allocated);
> -
> - pages -= BIT(order);
> + do {
> + mark_allocated(block);
> + mm->avail -= drm_buddy_block_size(mm, block);
> + kmemleak_update_trace(block);
> + list_add_tail(&block->link, &allocated);
> + pages -= BIT(drm_buddy_block_order(block));
> + if (block == rblock || !rblock)
> + break;
> + block = list_next_entry(block, leaf_link);
> + } while (true);
>
> if (!pages)
> break;
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 572077ff8ae7..c5437bd4f4f3 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -50,6 +50,7 @@ struct drm_buddy_block {
> */
> struct list_head link;
> struct list_head tmp_link;
> + struct list_head leaf_link;
> };
>
> /* Order-zero must be at least PAGE_SIZE */

2022-11-29 12:12:52

by xinhui pan

[permalink] [raw]
Subject: 回复: [PATCH v4] drm: Optimise for continuous memory allocation

[AMD Official Use Only - General]

In one ROCM + gdm restart test,
find_continuous_blocks() succeed with ratio 35%.
the cod coverage report is below.

772 3998 : if (order-- == min_order) {
773 352 : if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
774 352 : min_order != 0 && pages == BIT(order + 1)) {
775 79 : block = find_continuous_blocks(mm,
776 : order,
777 : flags,
778 : &rblock);
779 79 : if (block)
780 : break;
781 : }
782 300 : err = -ENOSPC;
783 300 : goto err_free;

thanks
xinhui
________________________________________
??????: Pan, Xinhui <[email protected]>
????ʱ??: 2022??11??29?? 18:56
?ռ???: [email protected]
????: [email protected]; [email protected]; Koenig, Christian; [email protected]; [email protected]; Paneer Selvam, Arunpravin; [email protected]; Pan, Xinhui
????: [PATCH v4] drm: Optimise for continuous memory allocation

Currently drm-buddy does not have full knowledge of continuous memory.

Lets consider scenario below.
order 1: L R
order 0: LL LR RL RR
for order 1 allocation, it can offer L or R or LR+RL.

For now, we only implement L or R case for continuous memory allocation.
So this patch aims to implement the rest cases.

Adding a new member leaf_link which links all leaf blocks in asceding
order. Now we can find more than 2 sub-order blocks easier.
Say, order 4 can be combined with corresponding order 4, 2+2, 1+2+1,
0+1+2+0, 0+2+1+0.

Signed-off-by: xinhui pan <[email protected]>
---
change from v3:
reworked totally. adding leaf_link.

change from v2:
search continuous block in nearby root if needed

change from v1:
implement top-down continuous allocation
---
drivers/gpu/drm/drm_buddy.c | 108 +++++++++++++++++++++++++++++++++---
include/drm/drm_buddy.h | 1 +
2 files changed, 102 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..8edafb99b02c 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -80,6 +80,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
{
unsigned int i;
u64 offset;
+ LIST_HEAD(leaf);

if (size < chunk_size)
return -EINVAL;
@@ -136,6 +137,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
goto out_free_roots;

mark_free(mm, root);
+ list_add_tail(&root->leaf_link, &leaf);

BUG_ON(i > mm->max_order);
BUG_ON(drm_buddy_block_size(mm, root) < chunk_size);
@@ -147,6 +149,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
i++;
} while (size);

+ list_del(&leaf);
return 0;

out_free_roots:
@@ -205,6 +208,9 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);

+ list_add(&block->right->leaf_link, &block->leaf_link);
+ list_add(&block->left->leaf_link, &block->leaf_link);
+ list_del(&block->leaf_link);
mark_split(block);

return 0;
@@ -256,6 +262,9 @@ static void __drm_buddy_free(struct drm_buddy *mm,
break;

list_del(&buddy->link);
+ list_add(&parent->leaf_link, &block->leaf_link);
+ list_del(&buddy->leaf_link);
+ list_del(&block->leaf_link);

drm_block_free(mm, block);
drm_block_free(mm, buddy);
@@ -386,6 +395,78 @@ alloc_range_bias(struct drm_buddy *mm,
return ERR_PTR(err);
}

+static struct drm_buddy_block *
+find_continuous_blocks(struct drm_buddy *mm,
+ int order,
+ unsigned long flags,
+ struct drm_buddy_block **rblock)
+{
+ struct list_head *head = &mm->free_list[order];
+ struct drm_buddy_block *free_block, *max_block = NULL, *end, *begin;
+ u64 pages = BIT(order + 1);
+ u64 cur_pages;
+
+ list_for_each_entry(free_block, head, link) {
+ if (max_block) {
+ if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION))
+ break;
+
+ if (drm_buddy_block_offset(free_block) <
+ drm_buddy_block_offset(max_block))
+ continue;
+ }
+
+ cur_pages = BIT(order);
+ begin = end = free_block;
+ while (true) {
+ struct drm_buddy_block *prev, *next;
+ int prev_order, next_order;
+
+ prev = list_prev_entry(begin, leaf_link);
+ if (!drm_buddy_block_is_free(prev) ||
+ drm_buddy_block_offset(prev) >
+ drm_buddy_block_offset(begin)) {
+ prev = NULL;
+ }
+ next = list_next_entry(end, leaf_link);
+ if (!drm_buddy_block_is_free(next) ||
+ drm_buddy_block_offset(next) <
+ drm_buddy_block_offset(end)) {
+ next = NULL;
+ }
+ if (!prev && !next)
+ break;
+
+ prev_order = prev ? drm_buddy_block_order(prev) : -1;
+ next_order = next ? drm_buddy_block_order(next) : -1;
+ if (next_order >= prev_order) {
+ BUG_ON(drm_buddy_block_offset(end) +
+ drm_buddy_block_size(mm, end) !=
+ drm_buddy_block_offset(next));
+ end = next;
+ cur_pages += BIT(drm_buddy_block_order(next));
+ }
+ if (prev_order >= next_order) {
+ BUG_ON(drm_buddy_block_offset(prev) +
+ drm_buddy_block_size(mm, prev) !=
+ drm_buddy_block_offset(begin));
+ begin = prev;
+ cur_pages += BIT(drm_buddy_block_order(prev));
+ }
+ if (pages == cur_pages)
+ break;
+ BUG_ON(pages < cur_pages);
+ }
+
+ if (pages > cur_pages)
+ continue;
+
+ *rblock = end;
+ max_block = begin;
+ }
+ return max_block;
+}
+
static struct drm_buddy_block *
get_maxblock(struct list_head *head)
{
@@ -637,7 +718,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
struct list_head *blocks,
unsigned long flags)
{
- struct drm_buddy_block *block = NULL;
+ struct drm_buddy_block *block = NULL, *rblock = NULL;
unsigned int min_order, order;
unsigned long pages;
LIST_HEAD(allocated);
@@ -689,17 +770,30 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
break;

if (order-- == min_order) {
+ if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
+ min_order != 0 && pages == BIT(order + 1)) {
+ block = find_continuous_blocks(mm,
+ order,
+ flags,
+ &rblock);
+ if (block)
+ break;
+ }
err = -ENOSPC;
goto err_free;
}
} while (1);

- mark_allocated(block);
- mm->avail -= drm_buddy_block_size(mm, block);
- kmemleak_update_trace(block);
- list_add_tail(&block->link, &allocated);
-
- pages -= BIT(order);
+ do {
+ mark_allocated(block);
+ mm->avail -= drm_buddy_block_size(mm, block);
+ kmemleak_update_trace(block);
+ list_add_tail(&block->link, &allocated);
+ pages -= BIT(drm_buddy_block_order(block));
+ if (block == rblock || !rblock)
+ break;
+ block = list_next_entry(block, leaf_link);
+ } while (true);

if (!pages)
break;
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 572077ff8ae7..c5437bd4f4f3 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -50,6 +50,7 @@ struct drm_buddy_block {
*/
struct list_head link;
struct list_head tmp_link;
+ struct list_head leaf_link;
};

/* Order-zero must be at least PAGE_SIZE */
--
2.34.1


Attachments:
winmail.dat (18.27 kB)

2022-11-29 12:35:18

by xinhui pan

[permalink] [raw]
Subject: 回复: [PATCH v4] drm: Optimise for continuous memory allocation

[AMD Official Use Only - General]

comments inline.

________________________________________
??????: Koenig, Christian <[email protected]>
????ʱ??: 2022??11??29?? 19:32
?ռ???: Pan, Xinhui; [email protected]
????: [email protected]; [email protected]; [email protected]; [email protected]; Paneer Selvam, Arunpravin; [email protected]
????: Re: [PATCH v4] drm: Optimise for continuous memory allocation

Am 29.11.22 um 11:56 schrieb xinhui pan:
> Currently drm-buddy does not have full knowledge of continuous memory.
>
> Lets consider scenario below.
> order 1: L R
> order 0: LL LR RL RR
> for order 1 allocation, it can offer L or R or LR+RL.
>
> For now, we only implement L or R case for continuous memory allocation.
> So this patch aims to implement the rest cases.
>
> Adding a new member leaf_link which links all leaf blocks in asceding
> order. Now we can find more than 2 sub-order blocks easier.
> Say, order 4 can be combined with corresponding order 4, 2+2, 1+2+1,
> 0+1+2+0, 0+2+1+0.

Well that description is a bit confusing and doesn't make to much sense
to me.

When you have two adjacent free order 0 blocks then those should be
automatically combined into an order 1. This is a fundamental property
of the buddy allocator, otherwise the whole algorithm won't work.

[xh] sorry, The order above is not 4, should be 3.
order 3 can be combined with corresponding order 3, 2+2, 1+2+1, 0+1+2+0, 0+2+1+0
the order 0 + 1 + 2 + 0 case does not have two same order 0 adjacent. they are in different tree.
looks like below
order 3: L3 R3
order 2: L2 (R2)* L2*
order 1: L1 (R1) L1
order 0: L0 (R0) (L0)
R0 + R1+R2 +L0 with () around combined to be order 3.
R2 + L2 with * followed combined to be order 3.
etc....

When you have the case of a free order 1 block with two adjacent free
order 0 blocks then we a fragmented address space. In this case the best
approach is to fail the allocation and start to swap things out.

[xh] Eviction is expensive.
And if it still fails to find the continuous memory with this approach, then let's evict.

So what exactly is the goal here?

Regards,
Christian.

>
> Signed-off-by: xinhui pan <[email protected]>
> ---
> change from v3:
> reworked totally. adding leaf_link.
>
> change from v2:
> search continuous block in nearby root if needed
>
> change from v1:
> implement top-down continuous allocation
> ---
> drivers/gpu/drm/drm_buddy.c | 108 +++++++++++++++++++++++++++++++++---
> include/drm/drm_buddy.h | 1 +
> 2 files changed, 102 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 11bb59399471..8edafb99b02c 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -80,6 +80,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
> {
> unsigned int i;
> u64 offset;
> + LIST_HEAD(leaf);
>
> if (size < chunk_size)
> return -EINVAL;
> @@ -136,6 +137,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
> goto out_free_roots;
>
> mark_free(mm, root);
> + list_add_tail(&root->leaf_link, &leaf);
>
> BUG_ON(i > mm->max_order);
> BUG_ON(drm_buddy_block_size(mm, root) < chunk_size);
> @@ -147,6 +149,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
> i++;
> } while (size);
>
> + list_del(&leaf);
> return 0;
>
> out_free_roots:
> @@ -205,6 +208,9 @@ static int split_block(struct drm_buddy *mm,
> mark_free(mm, block->left);
> mark_free(mm, block->right);
>
> + list_add(&block->right->leaf_link, &block->leaf_link);
> + list_add(&block->left->leaf_link, &block->leaf_link);
> + list_del(&block->leaf_link);
> mark_split(block);
>
> return 0;
> @@ -256,6 +262,9 @@ static void __drm_buddy_free(struct drm_buddy *mm,
> break;
>
> list_del(&buddy->link);
> + list_add(&parent->leaf_link, &block->leaf_link);
> + list_del(&buddy->leaf_link);
> + list_del(&block->leaf_link);
>
> drm_block_free(mm, block);
> drm_block_free(mm, buddy);
> @@ -386,6 +395,78 @@ alloc_range_bias(struct drm_buddy *mm,
> return ERR_PTR(err);
> }
>
> +static struct drm_buddy_block *
> +find_continuous_blocks(struct drm_buddy *mm,
> + int order,
> + unsigned long flags,
> + struct drm_buddy_block **rblock)
> +{
> + struct list_head *head = &mm->free_list[order];
> + struct drm_buddy_block *free_block, *max_block = NULL, *end, *begin;
> + u64 pages = BIT(order + 1);
> + u64 cur_pages;
> +
> + list_for_each_entry(free_block, head, link) {
> + if (max_block) {
> + if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION))
> + break;
> +
> + if (drm_buddy_block_offset(free_block) <
> + drm_buddy_block_offset(max_block))
> + continue;
> + }
> +
> + cur_pages = BIT(order);
> + begin = end = free_block;
> + while (true) {
> + struct drm_buddy_block *prev, *next;
> + int prev_order, next_order;
> +
> + prev = list_prev_entry(begin, leaf_link);
> + if (!drm_buddy_block_is_free(prev) ||
> + drm_buddy_block_offset(prev) >
> + drm_buddy_block_offset(begin)) {
> + prev = NULL;
> + }
> + next = list_next_entry(end, leaf_link);
> + if (!drm_buddy_block_is_free(next) ||
> + drm_buddy_block_offset(next) <
> + drm_buddy_block_offset(end)) {
> + next = NULL;
> + }
> + if (!prev && !next)
> + break;
> +
> + prev_order = prev ? drm_buddy_block_order(prev) : -1;
> + next_order = next ? drm_buddy_block_order(next) : -1;
> + if (next_order >= prev_order) {
> + BUG_ON(drm_buddy_block_offset(end) +
> + drm_buddy_block_size(mm, end) !=
> + drm_buddy_block_offset(next));
> + end = next;
> + cur_pages += BIT(drm_buddy_block_order(next));
> + }
> + if (prev_order >= next_order) {
> + BUG_ON(drm_buddy_block_offset(prev) +
> + drm_buddy_block_size(mm, prev) !=
> + drm_buddy_block_offset(begin));
> + begin = prev;
> + cur_pages += BIT(drm_buddy_block_order(prev));
> + }
> + if (pages == cur_pages)
> + break;
> + BUG_ON(pages < cur_pages);
> + }
> +
> + if (pages > cur_pages)
> + continue;
> +
> + *rblock = end;
> + max_block = begin;
> + }
> + return max_block;
> +}
> +
> static struct drm_buddy_block *
> get_maxblock(struct list_head *head)
> {
> @@ -637,7 +718,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> struct list_head *blocks,
> unsigned long flags)
> {
> - struct drm_buddy_block *block = NULL;
> + struct drm_buddy_block *block = NULL, *rblock = NULL;
> unsigned int min_order, order;
> unsigned long pages;
> LIST_HEAD(allocated);
> @@ -689,17 +770,30 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> break;
>
> if (order-- == min_order) {
> + if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
> + min_order != 0 && pages == BIT(order + 1)) {
> + block = find_continuous_blocks(mm,
> + order,
> + flags,
> + &rblock);
> + if (block)
> + break;
> + }
> err = -ENOSPC;
> goto err_free;
> }
> } while (1);
>
> - mark_allocated(block);
> - mm->avail -= drm_buddy_block_size(mm, block);
> - kmemleak_update_trace(block);
> - list_add_tail(&block->link, &allocated);
> -
> - pages -= BIT(order);
> + do {
> + mark_allocated(block);
> + mm->avail -= drm_buddy_block_size(mm, block);
> + kmemleak_update_trace(block);
> + list_add_tail(&block->link, &allocated);
> + pages -= BIT(drm_buddy_block_order(block));
> + if (block == rblock || !rblock)
> + break;
> + block = list_next_entry(block, leaf_link);
> + } while (true);
>
> if (!pages)
> break;
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 572077ff8ae7..c5437bd4f4f3 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -50,6 +50,7 @@ struct drm_buddy_block {
> */
> struct list_head link;
> struct list_head tmp_link;
> + struct list_head leaf_link;
> };
>
> /* Order-zero must be at least PAGE_SIZE */


Attachments:
winmail.dat (18.71 kB)