回复: [PATCH v4] drm: Optimise for continuous memory allocation

Pan, Xinhui Xinhui.Pan at amd.com
Tue Nov 29 11:54:42 UTC 2022


[AMD Official Use Only - General]

comments inline.

________________________________________
发件人: Koenig, Christian <Christian.Koenig at amd.com>
发送时间: 2022年11月29日 19:32
收件人: Pan, Xinhui; amd-gfx at lists.freedesktop.org
抄送: daniel at ffwll.ch; matthew.auld at intel.com; dri-devel at lists.freedesktop.org; linux-kernel at vger.kernel.org; Paneer Selvam, Arunpravin; intel-gfx at lists.freedesktop.org
主题: Re: [PATCH v4] drm: Optimise for continuous memory allocation

Am 29.11.22 um 11:56 schrieb xinhui pan:
> Currently drm-buddy does not have full knowledge of continuous memory.
>
> Lets consider scenario below.
> order 1:    L             R
> order 0: LL   LR      RL      RR
> for order 1 allocation, it can offer L or R or LR+RL.
>
> For now, we only implement L or R case for continuous memory allocation.
> So this patch aims to implement the rest cases.
>
> Adding a new member leaf_link which links all leaf blocks in asceding
> order. Now we can find more than 2 sub-order blocks easier.
> Say, order 4 can be combined with corresponding order 4, 2+2, 1+2+1,
> 0+1+2+0, 0+2+1+0.

Well that description is a bit confusing and doesn't make to much sense
to me.

When you have two adjacent free order 0 blocks then those should be
automatically combined into an order 1. This is a fundamental property
of the buddy allocator, otherwise the whole algorithm won't work.

[xh] sorry, The order above is not 4, should be 3.
order 3 can be combined with corresponding order 3, 2+2, 1+2+1, 0+1+2+0, 0+2+1+0
the order 0 + 1 + 2 + 0 case does not have two same order 0 adjacent. they are in different tree.
looks like below
order 3:                            L3                                               R3
order 2:            L2                              (R2)*                    L2*
order 1:    L1             (R1)                                         L1
order 0: L0   (R0)                                                 (L0)
R0 + R1+R2 +L0 with () around combined to be order 3.
R2 + L2 with * followed combined to be order 3.
etc....

When you have the case of a free order 1 block with two adjacent free
order 0 blocks then we a fragmented address space. In this case the best
approach is to fail the allocation and start to swap things out.

[xh] Eviction is expensive.
And if it still fails to find the continuous memory with this approach, then let's evict.

So what exactly is the goal here?

Regards,
Christian.

>
> Signed-off-by: xinhui pan <xinhui.pan at amd.com>
> ---
> change from v3:
> reworked totally. adding leaf_link.
>
> change from v2:
> search continuous block in nearby root if needed
>
> change from v1:
> implement top-down continuous allocation
> ---
>   drivers/gpu/drm/drm_buddy.c | 108 +++++++++++++++++++++++++++++++++---
>   include/drm/drm_buddy.h     |   1 +
>   2 files changed, 102 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 11bb59399471..8edafb99b02c 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -80,6 +80,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>   {
>       unsigned int i;
>       u64 offset;
> +     LIST_HEAD(leaf);
>
>       if (size < chunk_size)
>               return -EINVAL;
> @@ -136,6 +137,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>                       goto out_free_roots;
>
>               mark_free(mm, root);
> +             list_add_tail(&root->leaf_link, &leaf);
>
>               BUG_ON(i > mm->max_order);
>               BUG_ON(drm_buddy_block_size(mm, root) < chunk_size);
> @@ -147,6 +149,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>               i++;
>       } while (size);
>
> +     list_del(&leaf);
>       return 0;
>
>   out_free_roots:
> @@ -205,6 +208,9 @@ static int split_block(struct drm_buddy *mm,
>       mark_free(mm, block->left);
>       mark_free(mm, block->right);
>
> +     list_add(&block->right->leaf_link, &block->leaf_link);
> +     list_add(&block->left->leaf_link, &block->leaf_link);
> +     list_del(&block->leaf_link);
>       mark_split(block);
>
>       return 0;
> @@ -256,6 +262,9 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>                       break;
>
>               list_del(&buddy->link);
> +             list_add(&parent->leaf_link, &block->leaf_link);
> +             list_del(&buddy->leaf_link);
> +             list_del(&block->leaf_link);
>
>               drm_block_free(mm, block);
>               drm_block_free(mm, buddy);
> @@ -386,6 +395,78 @@ alloc_range_bias(struct drm_buddy *mm,
>       return ERR_PTR(err);
>   }
>
> +static struct drm_buddy_block *
> +find_continuous_blocks(struct drm_buddy *mm,
> +                    int order,
> +                    unsigned long flags,
> +                    struct drm_buddy_block **rblock)
> +{
> +     struct list_head *head = &mm->free_list[order];
> +     struct drm_buddy_block *free_block, *max_block = NULL, *end, *begin;
> +     u64 pages = BIT(order + 1);
> +     u64 cur_pages;
> +
> +     list_for_each_entry(free_block, head, link) {
> +             if (max_block) {
> +                     if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION))
> +                             break;
> +
> +                     if (drm_buddy_block_offset(free_block) <
> +                         drm_buddy_block_offset(max_block))
> +                             continue;
> +             }
> +
> +             cur_pages = BIT(order);
> +             begin = end = free_block;
> +             while (true) {
> +                     struct drm_buddy_block *prev, *next;
> +                     int prev_order, next_order;
> +
> +                     prev = list_prev_entry(begin, leaf_link);
> +                     if (!drm_buddy_block_is_free(prev) ||
> +                         drm_buddy_block_offset(prev) >
> +                         drm_buddy_block_offset(begin)) {
> +                             prev = NULL;
> +                     }
> +                     next = list_next_entry(end, leaf_link);
> +                     if (!drm_buddy_block_is_free(next) ||
> +                         drm_buddy_block_offset(next) <
> +                         drm_buddy_block_offset(end)) {
> +                             next = NULL;
> +                     }
> +                     if (!prev && !next)
> +                             break;
> +
> +                     prev_order = prev ? drm_buddy_block_order(prev) : -1;
> +                     next_order = next ? drm_buddy_block_order(next) : -1;
> +                     if (next_order >= prev_order) {
> +                             BUG_ON(drm_buddy_block_offset(end) +
> +                                    drm_buddy_block_size(mm, end) !=
> +                                    drm_buddy_block_offset(next));
> +                             end = next;
> +                             cur_pages += BIT(drm_buddy_block_order(next));
> +                     }
> +                     if (prev_order >= next_order) {
> +                             BUG_ON(drm_buddy_block_offset(prev) +
> +                                    drm_buddy_block_size(mm, prev) !=
> +                                    drm_buddy_block_offset(begin));
> +                             begin = prev;
> +                             cur_pages += BIT(drm_buddy_block_order(prev));
> +                     }
> +                     if (pages == cur_pages)
> +                             break;
> +                     BUG_ON(pages < cur_pages);
> +             }
> +
> +             if (pages > cur_pages)
> +                     continue;
> +
> +             *rblock = end;
> +             max_block = begin;
> +     }
> +     return max_block;
> +}
> +
>   static struct drm_buddy_block *
>   get_maxblock(struct list_head *head)
>   {
> @@ -637,7 +718,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>                          struct list_head *blocks,
>                          unsigned long flags)
>   {
> -     struct drm_buddy_block *block = NULL;
> +     struct drm_buddy_block *block = NULL, *rblock = NULL;
>       unsigned int min_order, order;
>       unsigned long pages;
>       LIST_HEAD(allocated);
> @@ -689,17 +770,30 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>                               break;
>
>                       if (order-- == min_order) {
> +                             if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
> +                                 min_order != 0 && pages == BIT(order + 1)) {
> +                                     block = find_continuous_blocks(mm,
> +                                                                    order,
> +                                                                    flags,
> +                                                                    &rblock);
> +                                     if (block)
> +                                             break;
> +                             }
>                               err = -ENOSPC;
>                               goto err_free;
>                       }
>               } while (1);
>
> -             mark_allocated(block);
> -             mm->avail -= drm_buddy_block_size(mm, block);
> -             kmemleak_update_trace(block);
> -             list_add_tail(&block->link, &allocated);
> -
> -             pages -= BIT(order);
> +             do {
> +                     mark_allocated(block);
> +                     mm->avail -= drm_buddy_block_size(mm, block);
> +                     kmemleak_update_trace(block);
> +                     list_add_tail(&block->link, &allocated);
> +                     pages -= BIT(drm_buddy_block_order(block));
> +                     if (block == rblock || !rblock)
> +                             break;
> +                     block = list_next_entry(block, leaf_link);
> +             } while (true);
>
>               if (!pages)
>                       break;
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 572077ff8ae7..c5437bd4f4f3 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -50,6 +50,7 @@ struct drm_buddy_block {
>        */
>       struct list_head link;
>       struct list_head tmp_link;
> +     struct list_head leaf_link;
>   };
>
>   /* Order-zero must be at least PAGE_SIZE */

-------------- next part --------------
A non-text attachment was scrubbed...
Name: winmail.dat
Type: application/ms-tnef
Size: 19160 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20221129/c363d6af/attachment-0001.bin>


More information about the amd-gfx mailing list