Currently drm-buddy does not have full knowledge of continuous memory.
Lets consider scenario below.
order 1: L R
order 0: LL LR RL RR
for order 1 allocation, it can offer L or R or LR+RL.
For now, we only implement L or R case for continuous memory allocation.
So this patch aims to implement the LR+RL case.
Signed-off-by: xinhui pan <[email protected]>
---
change from v2:
search continuous block in nearby root if needed
change from v1:
implement top-down continuous allocation
---
drivers/gpu/drm/drm_buddy.c | 78 +++++++++++++++++++++++++++++++++----
1 file changed, 71 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..ff58eb3136d2 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -386,6 +386,58 @@ alloc_range_bias(struct drm_buddy *mm,
return ERR_PTR(err);
}
+static struct drm_buddy_block *
+find_continuous_blocks(struct drm_buddy *mm,
+ int order,
+ unsigned long flags,
+ struct drm_buddy_block **rn)
+{
+ struct list_head *head = &mm->free_list[order];
+ struct drm_buddy_block *node, *parent, *free_node, *max_node = NULL;
+ int i;
+
+ list_for_each_entry(free_node, head, link) {
+ if (max_node) {
+ if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION))
+ break;
+
+ if (drm_buddy_block_offset(free_node) <
+ drm_buddy_block_offset(max_node))
+ continue;
+ }
+
+ parent = free_node;
+ do {
+ node = parent;
+ parent = parent->parent;
+ } while (parent && parent->right == node);
+
+ if (!parent) {
+ for (i = 0; i < mm->n_roots - 1; i++)
+ if (mm->roots[i] == node)
+ break;
+ if (i == mm->n_roots - 1)
+ continue;
+ node = mm->roots[i + 1];
+ } else {
+ node = parent->right;
+ }
+
+ while (drm_buddy_block_is_split(node))
+ node = node->left;
+
+ if (drm_buddy_block_is_free(node) &&
+ drm_buddy_block_order(node) == order) {
+ *rn = node;
+ max_node = free_node;
+ BUG_ON(drm_buddy_block_offset(node) !=
+ drm_buddy_block_offset(max_node) +
+ drm_buddy_block_size(mm, max_node));
+ }
+ }
+ return max_node;
+}
+
static struct drm_buddy_block *
get_maxblock(struct list_head *head)
{
@@ -637,7 +689,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
struct list_head *blocks,
unsigned long flags)
{
- struct drm_buddy_block *block = NULL;
+ struct drm_buddy_block *block = NULL, *rblock = NULL;
unsigned int min_order, order;
unsigned long pages;
LIST_HEAD(allocated);
@@ -689,17 +741,29 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
break;
if (order-- == min_order) {
+ if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
+ min_order != 0 && pages == BIT(order + 1)) {
+ block = find_continuous_blocks(mm,
+ order,
+ flags,
+ &rblock);
+ if (block)
+ break;
+ }
err = -ENOSPC;
goto err_free;
}
} while (1);
- mark_allocated(block);
- mm->avail -= drm_buddy_block_size(mm, block);
- kmemleak_update_trace(block);
- list_add_tail(&block->link, &allocated);
-
- pages -= BIT(order);
+ do {
+ mark_allocated(block);
+ mm->avail -= drm_buddy_block_size(mm, block);
+ kmemleak_update_trace(block);
+ list_add_tail(&block->link, &allocated);
+ pages -= BIT(order);
+ block = rblock;
+ rblock = NULL;
+ } while (block);
if (!pages)
break;
--
2.34.1
Hi Xinhui,
On 11/28/2022 12:04 PM, xinhui pan wrote:
> Currently drm-buddy does not have full knowledge of continuous memory.
>
> Lets consider scenario below.
> order 1: L R
> order 0: LL LR RL RR
> for order 1 allocation, it can offer L or R or LR+RL.
>
> For now, we only implement L or R case for continuous memory allocation.
> So this patch aims to implement the LR+RL case.
>
> Signed-off-by: xinhui pan <[email protected]>
> ---
> change from v2:
> search continuous block in nearby root if needed
>
> change from v1:
> implement top-down continuous allocation
> ---
> drivers/gpu/drm/drm_buddy.c | 78 +++++++++++++++++++++++++++++++++----
> 1 file changed, 71 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 11bb59399471..ff58eb3136d2 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -386,6 +386,58 @@ alloc_range_bias(struct drm_buddy *mm,
> return ERR_PTR(err);
> }
>
> +static struct drm_buddy_block *
> +find_continuous_blocks(struct drm_buddy *mm,
> + int order,
> + unsigned long flags,
> + struct drm_buddy_block **rn)
> +{
> + struct list_head *head = &mm->free_list[order];
> + struct drm_buddy_block *node, *parent, *free_node, *max_node = NULL;
NIT: We usually name the variable as *block or ***_block for drm buddy
and we have *node or ***_node for drm mm manager.
> + int i;
> +
> + list_for_each_entry(free_node, head, link) {
> + if (max_node) {
> + if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION))
> + break;
> +
> + if (drm_buddy_block_offset(free_node) <
> + drm_buddy_block_offset(max_node))
> + continue;
> + }
> +
> + parent = free_node;
> + do {
> + node = parent;
> + parent = parent->parent;
> + } while (parent && parent->right == node);
> +
> + if (!parent) {
> + for (i = 0; i < mm->n_roots - 1; i++)
> + if (mm->roots[i] == node)
> + break;
> + if (i == mm->n_roots - 1)
> + continue;
> + node = mm->roots[i + 1];
> + } else {
> + node = parent->right;
> + }
> +
> + while (drm_buddy_block_is_split(node))
> + node = node->left;
> +
> + if (drm_buddy_block_is_free(node) &&
> + drm_buddy_block_order(node) == order) {
> + *rn = node;
> + max_node = free_node;
> + BUG_ON(drm_buddy_block_offset(node) !=
> + drm_buddy_block_offset(max_node) +
> + drm_buddy_block_size(mm, max_node));
> + }
> + }
> + return max_node;
> +}
> +
> static struct drm_buddy_block *
> get_maxblock(struct list_head *head)
> {
> @@ -637,7 +689,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> struct list_head *blocks,
> unsigned long flags)
> {
> - struct drm_buddy_block *block = NULL;
> + struct drm_buddy_block *block = NULL, *rblock = NULL;
> unsigned int min_order, order;
> unsigned long pages;
> LIST_HEAD(allocated);
> @@ -689,17 +741,29 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> break;
>
> if (order-- == min_order) {
> + if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
> + min_order != 0 && pages == BIT(order + 1)) {
> + block = find_continuous_blocks(mm,
> + order,
> + flags,
> + &rblock);
> + if (block)
> + break;
> + }
> err = -ENOSPC;
> goto err_free;
> }
> } while (1);
>
> - mark_allocated(block);
> - mm->avail -= drm_buddy_block_size(mm, block);
> - kmemleak_update_trace(block);
> - list_add_tail(&block->link, &allocated);
> -
> - pages -= BIT(order);
> + do {
> + mark_allocated(block);
> + mm->avail -= drm_buddy_block_size(mm, block);
> + kmemleak_update_trace(block);
> + list_add_tail(&block->link, &allocated);
> + pages -= BIT(order);
> + block = rblock;
> + rblock = NULL;
> + } while (block);
I think with this approach, if we are lucky enough we may get contiguous
blocks in one order level down in RL
combination from the freelist?
Regards,
Arun
>
> if (!pages)
> break;
[AMD Official Use Only - General]
Hi Arun,
Thanks for your reply. comments are inline.
________________________________________
??????: Paneer Selvam, Arunpravin <[email protected]>
????ʱ??: 2022??11??29?? 1:09
?ռ???: Pan, Xinhui; [email protected]
????: [email protected]; [email protected]; [email protected]; [email protected]; Koenig, Christian
????: Re: [PATCH v3] drm: Optimise for continuous memory allocation
Hi Xinhui,
On 11/28/2022 12:04 PM, xinhui pan wrote:
> Currently drm-buddy does not have full knowledge of continuous memory.
>
> Lets consider scenario below.
> order 1: L R
> order 0: LL LR RL RR
> for order 1 allocation, it can offer L or R or LR+RL.
>
> For now, we only implement L or R case for continuous memory allocation.
> So this patch aims to implement the LR+RL case.
>
> Signed-off-by: xinhui pan <[email protected]>
> ---
> change from v2:
> search continuous block in nearby root if needed
>
> change from v1:
> implement top-down continuous allocation
> ---
> drivers/gpu/drm/drm_buddy.c | 78 +++++++++++++++++++++++++++++++++----
> 1 file changed, 71 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 11bb59399471..ff58eb3136d2 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -386,6 +386,58 @@ alloc_range_bias(struct drm_buddy *mm,
> return ERR_PTR(err);
> }
>
> +static struct drm_buddy_block *
> +find_continuous_blocks(struct drm_buddy *mm,
> + int order,
> + unsigned long flags,
> + struct drm_buddy_block **rn)
> +{
> + struct list_head *head = &mm->free_list[order];
> + struct drm_buddy_block *node, *parent, *free_node, *max_node = NULL;
NIT: We usually name the variable as *block or ***_block for drm buddy
and we have *node or ***_node for drm mm manager.
[xh] Oh, yes. The code naming is important. Will fix it.
> + int i;
> +
> + list_for_each_entry(free_node, head, link) {
> + if (max_node) {
> + if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION))
> + break;
> +
> + if (drm_buddy_block_offset(free_node) <
> + drm_buddy_block_offset(max_node))
> + continue;
> + }
> +
> + parent = free_node;
> + do {
> + node = parent;
> + parent = parent->parent;
> + } while (parent && parent->right == node);
> +
> + if (!parent) {
> + for (i = 0; i < mm->n_roots - 1; i++)
> + if (mm->roots[i] == node)
> + break;
> + if (i == mm->n_roots - 1)
> + continue;
> + node = mm->roots[i + 1];
> + } else {
> + node = parent->right;
> + }
> +
> + while (drm_buddy_block_is_split(node))
> + node = node->left;
> +
> + if (drm_buddy_block_is_free(node) &&
> + drm_buddy_block_order(node) == order) {
> + *rn = node;
> + max_node = free_node;
> + BUG_ON(drm_buddy_block_offset(node) !=
> + drm_buddy_block_offset(max_node) +
> + drm_buddy_block_size(mm, max_node));
> + }
> + }
> + return max_node;
> +}
> +
> static struct drm_buddy_block *
> get_maxblock(struct list_head *head)
> {
> @@ -637,7 +689,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> struct list_head *blocks,
> unsigned long flags)
> {
> - struct drm_buddy_block *block = NULL;
> + struct drm_buddy_block *block = NULL, *rblock = NULL;
> unsigned int min_order, order;
> unsigned long pages;
> LIST_HEAD(allocated);
> @@ -689,17 +741,29 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> break;
>
> if (order-- == min_order) {
> + if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
> + min_order != 0 && pages == BIT(order + 1)) {
> + block = find_continuous_blocks(mm,
> + order,
> + flags,
> + &rblock);
> + if (block)
> + break;
> + }
> err = -ENOSPC;
> goto err_free;
> }
> } while (1);
>
> - mark_allocated(block);
> - mm->avail -= drm_buddy_block_size(mm, block);
> - kmemleak_update_trace(block);
> - list_add_tail(&block->link, &allocated);
> -
> - pages -= BIT(order);
> + do {
> + mark_allocated(block);
> + mm->avail -= drm_buddy_block_size(mm, block);
> + kmemleak_update_trace(block);
> + list_add_tail(&block->link, &allocated);
> + pages -= BIT(order);
> + block = rblock;
> + rblock = NULL;
> + } while (block);
I think with this approach, if we are lucky enough we may get contiguous
blocks in one order level down in RL
combination from the freelist?
[xh] That is just what I did at first time like something below.
list_for_each_entry(node, head, link)
list_for_each_entry_reverse(rnode, head, link)
I do the test with one ROCM application while running gdm restart background which is a very normal scenario.
The test result shows there is about 4% chance to find the continuous blocks in this case. This 4% is really good enough as eviction is much more expensive.
So after that, I need take care of the rest 96% to not introduce too much workload.
Compared with the two loops, walking through the tree is a little cheaper.
thanks
xinhui
Regards,
Arun
>
> if (!pages)
> break;