[PATCH v2 1/2] drm/buddy: Optimize free block management with RB tree

Arunpravin Paneer Selvam arunpravin.paneerselvam at amd.com
Wed Aug 20 07:56:56 UTC 2025


Hi Matthew,

On 8/14/2025 4:15 PM, Matthew Auld wrote:
> On 24/07/2025 11:46, Arunpravin Paneer Selvam wrote:
>> Replace the freelist (O(n)) used for free block management with a
>> red-black tree, providing more efficient O(log n) search, insert,
>> and delete operations. This improves scalability and performance
>> when managing large numbers of free blocks per order (e.g., hundreds
>> or thousands).
>>
>> In the VK-CTS memory stress subtest, the buddy manager merges
>> fragmented memory and inserts freed blocks into the freelist. Since
>> freelist insertion is O(n), this becomes a bottleneck as fragmentation
>> increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
>> with the freelist, compared to just 0.03% with the RB tree
>> (rbtree_insert.isra.0), despite performing the same sorted insert.
>>
>> This also improves performance in heavily fragmented workloads,
>> such as games or graphics tests that stress memory.
>
> Neat. Also please Cc intel-gfx at lists.freedesktop.org and 
> intel-xe at lists.freedesktop.org on the next revision so our CI can pick 
> this up.
Sure, I will add on v3.
>
>>
>> Signed-off-by: Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam at amd.com>
>> ---
>>   drivers/gpu/drm/drm_buddy.c | 141 +++++++++++++++++++++++-------------
>>   include/drm/drm_buddy.h     |  39 +++++++++-
>>   2 files changed, 128 insertions(+), 52 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index a1e652b7631d..19e9773b41be 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -31,6 +31,8 @@ static struct drm_buddy_block 
>> *drm_block_alloc(struct drm_buddy *mm,
>>       block->header |= order;
>>       block->parent = parent;
>>   +    RB_CLEAR_NODE(&block->rb);
>> +
>>       BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
>>       return block;
>>   }
>> @@ -41,23 +43,53 @@ static void drm_block_free(struct drm_buddy *mm,
>>       kmem_cache_free(slab_blocks, block);
>>   }
>>   -static void list_insert_sorted(struct drm_buddy *mm,
>> -                   struct drm_buddy_block *block)
>> +static void rbtree_insert(struct drm_buddy *mm,
>> +              struct drm_buddy_block *block)
>>   {
>> +    struct rb_root *root = 
>> &mm->free_tree[drm_buddy_block_order(block)];
>> +    struct rb_node **link = &root->rb_node;
>> +    struct rb_node *parent = NULL;
>>       struct drm_buddy_block *node;
>> -    struct list_head *head;
>> +    u64 offset;
>> +
>> +    offset = drm_buddy_block_offset(block);
>>   -    head = &mm->free_list[drm_buddy_block_order(block)];
>> -    if (list_empty(head)) {
>> -        list_add(&block->link, head);
>> -        return;
>> +    while (*link) {
>> +        parent = *link;
>> +        node = rb_entry(parent, struct drm_buddy_block, rb);
>> +
>> +        if (offset < drm_buddy_block_offset(node))
>> +            link = &parent->rb_left;
>> +        else
>> +            link = &parent->rb_right;
>>       }
>>   -    list_for_each_entry(node, head, link)
>> -        if (drm_buddy_block_offset(block) < 
>> drm_buddy_block_offset(node))
>> -            break;
>> +    rb_link_node(&block->rb, parent, link);
>> +    rb_insert_color(&block->rb, root);
>> +}
>> +
>> +static void rbtree_remove(struct drm_buddy *mm,
>> +              struct drm_buddy_block *block)
>> +{
>> +    struct rb_root *root;
>>   -    __list_add(&block->link, node->link.prev, &node->link);
>> +    root = &mm->free_tree[drm_buddy_block_order(block)];
>> +    rb_erase(&block->rb, root);
>> +
>> +    RB_CLEAR_NODE(&block->rb);
>> +}
>> +
>> +static inline struct drm_buddy_block *
>> +rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
>> +{
>> +    struct rb_node *node = rb_last(&mm->free_tree[order]);
>> +
>> +    return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
>> +}
>> +
>> +static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
>> +{
>> +    return RB_EMPTY_ROOT(&mm->free_tree[order]);
>>   }
>>     static void clear_reset(struct drm_buddy_block *block)
>> @@ -70,12 +102,13 @@ static void mark_cleared(struct drm_buddy_block 
>> *block)
>>       block->header |= DRM_BUDDY_HEADER_CLEAR;
>>   }
>>   -static void mark_allocated(struct drm_buddy_block *block)
>> +static void mark_allocated(struct drm_buddy *mm,
>> +               struct drm_buddy_block *block)
>>   {
>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>>       block->header |= DRM_BUDDY_ALLOCATED;
>>   -    list_del(&block->link);
>> +    rbtree_remove(mm, block);
>>   }
>>     static void mark_free(struct drm_buddy *mm,
>> @@ -84,15 +117,16 @@ static void mark_free(struct drm_buddy *mm,
>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>>       block->header |= DRM_BUDDY_FREE;
>>   -    list_insert_sorted(mm, block);
>> +    rbtree_insert(mm, block);
>>   }
>>   -static void mark_split(struct drm_buddy_block *block)
>> +static void mark_split(struct drm_buddy *mm,
>> +               struct drm_buddy_block *block)
>>   {
>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>>       block->header |= DRM_BUDDY_SPLIT;
>>   -    list_del(&block->link);
>> +    rbtree_remove(mm, block);
>>   }
>>     static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>> @@ -148,7 +182,7 @@ static unsigned int __drm_buddy_free(struct 
>> drm_buddy *mm,
>>                   mark_cleared(parent);
>>           }
>>   -        list_del(&buddy->link);
>> +        rbtree_remove(mm, buddy);
>>           if (force_merge && drm_buddy_block_is_clear(buddy))
>>               mm->clear_avail -= drm_buddy_block_size(mm, buddy);
>>   @@ -179,12 +213,17 @@ static int __force_merge(struct drm_buddy *mm,
>>           return -EINVAL;
>>         for (i = min_order - 1; i >= 0; i--) {
>> -        struct drm_buddy_block *block, *prev;
>> +        struct drm_buddy_block *block, *prev_block, *first_block;
>> +
>> +        first_block = rb_entry(rb_first(&mm->free_tree[i]), struct 
>> drm_buddy_block, rb);
>>   -        list_for_each_entry_safe_reverse(block, prev, 
>> &mm->free_list[i], link) {
>> +        for_each_rb_entry_reverse_safe(block, prev_block, 
>> &mm->free_tree[i], rb) {
>>               struct drm_buddy_block *buddy;
>>               u64 block_start, block_end;
>>   +            if (RB_EMPTY_NODE(&block->rb))
>> +                break;
>
> If we got the block from the rb tree, can it be empty here?

I saw a crash earlier without this check while running graphics 
workloads, but it is not happening now. I will

test with more workloads and remove it if everything looks fine.

>
>> +
>>               if (!block->parent)
>>                   continue;
>>   @@ -206,10 +245,14 @@ static int __force_merge(struct drm_buddy *mm,
>>                * block in the next iteration as we would free the
>>                * buddy block as part of the free function.
>>                */
>> -            if (prev == buddy)
>> -                prev = list_prev_entry(prev, link);
>> +            if (prev_block && prev_block == buddy) {
>> +                if (prev_block != first_block)
>> +                    prev_block = rb_entry(rb_prev(&prev_block->rb),
>> +                                  struct drm_buddy_block,
>> +                                  rb);
>> +            }
>>   -            list_del(&block->link);
>> +            rbtree_remove(mm, block);
>>               if (drm_buddy_block_is_clear(block))
>>                   mm->clear_avail -= drm_buddy_block_size(mm, block);
>>   @@ -258,14 +301,14 @@ int drm_buddy_init(struct drm_buddy *mm, u64 
>> size, u64 chunk_size)
>>         BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
>>   -    mm->free_list = kmalloc_array(mm->max_order + 1,
>> -                      sizeof(struct list_head),
>> +    mm->free_tree = kmalloc_array(mm->max_order + 1,
>> +                      sizeof(struct rb_root),
>>                         GFP_KERNEL);
>> -    if (!mm->free_list)
>> +    if (!mm->free_tree)
>>           return -ENOMEM;
>>         for (i = 0; i <= mm->max_order; ++i)
>> -        INIT_LIST_HEAD(&mm->free_list[i]);
>> +        mm->free_tree[i] = RB_ROOT;
>>         mm->n_roots = hweight64(size);
>>   @@ -273,7 +316,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 
>> size, u64 chunk_size)
>>                     sizeof(struct drm_buddy_block *),
>>                     GFP_KERNEL);
>>       if (!mm->roots)
>> -        goto out_free_list;
>> +        goto out_free_tree;
>>         offset = 0;
>>       i = 0;
>> @@ -312,8 +355,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64 
>> size, u64 chunk_size)
>>       while (i--)
>>           drm_block_free(mm, mm->roots[i]);
>>       kfree(mm->roots);
>> -out_free_list:
>> -    kfree(mm->free_list);
>> +out_free_tree:
>> +    kfree(mm->free_tree);
>>       return -ENOMEM;
>>   }
>>   EXPORT_SYMBOL(drm_buddy_init);
>> @@ -323,7 +366,7 @@ EXPORT_SYMBOL(drm_buddy_init);
>>    *
>>    * @mm: DRM buddy manager to free
>>    *
>> - * Cleanup memory manager resources and the freelist
>> + * Cleanup memory manager resources and the freetree
>>    */
>>   void drm_buddy_fini(struct drm_buddy *mm)
>>   {
>> @@ -350,7 +393,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
>>       WARN_ON(mm->avail != mm->size);
>>         kfree(mm->roots);
>> -    kfree(mm->free_list);
>> +    kfree(mm->free_tree);
>>   }
>>   EXPORT_SYMBOL(drm_buddy_fini);
>>   @@ -383,7 +426,7 @@ static int split_block(struct drm_buddy *mm,
>>           clear_reset(block);
>>       }
>>   -    mark_split(block);
>> +    mark_split(mm, block);
>>         return 0;
>>   }
>> @@ -598,7 +641,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int 
>> order,
>>       for (i = order; i <= mm->max_order; ++i) {
>>           struct drm_buddy_block *tmp_block;
>>   -        list_for_each_entry_reverse(tmp_block, &mm->free_list[i], 
>> link) {
>> +        for_each_rb_entry_reverse(tmp_block, &mm->free_tree[i], rb) {
>>               if (block_incompatible(tmp_block, flags))
>>                   continue;
>>   @@ -624,7 +667,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int 
>> order,
>>   }
>>     static struct drm_buddy_block *
>> -alloc_from_freelist(struct drm_buddy *mm,
>> +alloc_from_freetree(struct drm_buddy *mm,
>>               unsigned int order,
>>               unsigned long flags)
>>   {
>> @@ -641,7 +684,7 @@ alloc_from_freelist(struct drm_buddy *mm,
>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>               struct drm_buddy_block *tmp_block;
>>   -            list_for_each_entry_reverse(tmp_block, 
>> &mm->free_list[tmp], link) {
>> +            for_each_rb_entry_reverse(tmp_block, 
>> &mm->free_tree[tmp], rb) {
>>                   if (block_incompatible(tmp_block, flags))
>>                       continue;
>>   @@ -657,10 +700,8 @@ alloc_from_freelist(struct drm_buddy *mm,
>>       if (!block) {
>>           /* Fallback method */
>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> -            if (!list_empty(&mm->free_list[tmp])) {
>> -                block = list_last_entry(&mm->free_list[tmp],
>> -                            struct drm_buddy_block,
>> -                            link);
>> +            if (!rbtree_is_empty(mm, tmp)) {
>> +                block = rbtree_last_entry(mm, tmp);
>>                   if (block)
>>                       break;
>>               }
>> @@ -728,7 +769,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>             if (contains(start, end, block_start, block_end)) {
>>               if (drm_buddy_block_is_free(block)) {
>> -                mark_allocated(block);
>> +                mark_allocated(mm, block);
>>                   total_allocated += drm_buddy_block_size(mm, block);
>>                   mm->avail -= drm_buddy_block_size(mm, block);
>>                   if (drm_buddy_block_is_clear(block))
>> @@ -806,7 +847,6 @@ static int __alloc_contig_try_harder(struct 
>> drm_buddy *mm,
>>   {
>>       u64 rhs_offset, lhs_offset, lhs_size, filled;
>>       struct drm_buddy_block *block;
>> -    struct list_head *list;
>>       LIST_HEAD(blocks_lhs);
>>       unsigned long pages;
>>       unsigned int order;
>> @@ -819,11 +859,10 @@ static int __alloc_contig_try_harder(struct 
>> drm_buddy *mm,
>>       if (order == 0)
>>           return -ENOSPC;
>>   -    list = &mm->free_list[order];
>> -    if (list_empty(list))
>> +    if (rbtree_is_empty(mm, order))
>>           return -ENOSPC;
>>   -    list_for_each_entry_reverse(block, list, link) {
>> +    for_each_rb_entry_reverse(block, &mm->free_tree[order], rb) {
>>           /* Allocate blocks traversing RHS */
>>           rhs_offset = drm_buddy_block_offset(block);
>>           err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
>> @@ -933,7 +972,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>       list_add(&block->tmp_link, &dfs);
>>       err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
>>       if (err) {
>> -        mark_allocated(block);
>> +        mark_allocated(mm, block);
>>           mm->avail -= drm_buddy_block_size(mm, block);
>>           if (drm_buddy_block_is_clear(block))
>>               mm->clear_avail -= drm_buddy_block_size(mm, block);
>> @@ -956,8 +995,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>           return  __drm_buddy_alloc_range_bias(mm, start, end,
>>                                order, flags);
>>       else
>> -        /* Allocate from freelist */
>> -        return alloc_from_freelist(mm, order, flags);
>> +        /* Allocate from freetree */
>> +        return alloc_from_freetree(mm, order, flags);
>>   }
>>     /**
>> @@ -974,8 +1013,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>    * alloc_range_bias() called on range limitations, which traverses
>>    * the tree and returns the desired block.
>>    *
>> - * alloc_from_freelist() called when *no* range restrictions
>> - * are enforced, which picks the block from the freelist.
>> + * alloc_from_freetree() called when *no* range restrictions
>> + * are enforced, which picks the block from the freetree.
>>    *
>>    * Returns:
>>    * 0 on success, error code on failure.
>> @@ -1077,7 +1116,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>               }
>>           } while (1);
>>   -        mark_allocated(block);
>> +        mark_allocated(mm, block);
>>           mm->avail -= drm_buddy_block_size(mm, block);
>>           if (drm_buddy_block_is_clear(block))
>>               mm->clear_avail -= drm_buddy_block_size(mm, block);
>> @@ -1161,7 +1200,7 @@ void drm_buddy_print(struct drm_buddy *mm, 
>> struct drm_printer *p)
>>           struct drm_buddy_block *block;
>>           u64 count = 0, free;
>>   -        list_for_each_entry(block, &mm->free_list[order], link) {
>> +        for_each_rb_entry(block, &mm->free_tree[order], rb) {
>>               BUG_ON(!drm_buddy_block_is_free(block));
>>               count++;
>>           }
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index 9689a7c5dd36..a64d108a33b7 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -10,6 +10,7 @@
>>   #include <linux/list.h>
>>   #include <linux/slab.h>
>>   #include <linux/sched.h>
>> +#include <linux/rbtree.h>
>>     #include <drm/drm_print.h>
>>   @@ -22,6 +23,41 @@
>>       start__ >= max__ || size__ > max__ - start__; \
>>   })
>>   +/*
>> + * for_each_rb_entry() - iterate over an RB tree in order
>> + * @pos:    the struct type * to use as a loop cursor
>> + * @root:    pointer to struct rb_root to iterate
>> + * @member:    name of the rb_node field within the struct
>> + */
>> +#define for_each_rb_entry(pos, root, member) \
>> +    for (pos = rb_entry_safe(rb_first(root), typeof(*pos), member); \
>> +         pos; \
>> +         pos = rb_entry_safe(rb_next(&(pos)->member), typeof(*pos), 
>> member))
>> +
>> +/*
>> + * for_each_rb_entry_reverse() - iterate over an RB tree in reverse 
>> order
>> + * @pos:    the struct type * to use as a loop cursor
>> + * @root:    pointer to struct rb_root to iterate
>> + * @member:    name of the rb_node field within the struct
>> + */
>> +#define for_each_rb_entry_reverse(pos, root, member) \
>> +    for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member); \
>> +         pos; \
>> +         pos = rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), 
>> member))
>> +
>> +/**
>> + * for_each_rb_entry_reverse_safe() - safely iterate over an RB tree 
>> in reverse order
>> + * @pos:    the struct type * to use as a loop cursor.
>> + * @n:        another struct type * to use as temporary storage.
>> + * @root:    pointer to struct rb_root to iterate.
>> + * @member:    name of the rb_node field within the struct.
>> + */
>> +#define for_each_rb_entry_reverse_safe(pos, n, root, member) \
>
> Would it make sense to give these a less generic name? Something like 
> for_each_rb_free_block_* ?
>
> Also should this be exported or rather kept within .c?
yes, I will change it.
>
>> +    for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member), \
>> +         n = pos ? rb_entry_safe(rb_prev(&(pos)->member), 
>> typeof(*pos), member) : NULL; \
>> +         pos; \
>> +         pos = n, n = pos ? rb_entry_safe(rb_prev(&(pos)->member), 
>> typeof(*pos), member) : NULL)
>
>
>> +
>>   #define DRM_BUDDY_RANGE_ALLOCATION        BIT(0)
>>   #define DRM_BUDDY_TOPDOWN_ALLOCATION        BIT(1)
>>   #define DRM_BUDDY_CONTIGUOUS_ALLOCATION        BIT(2)
>> @@ -53,6 +89,7 @@ struct drm_buddy_block {
>>        * a list, if so desired. As soon as the block is freed with
>>        * drm_buddy_free* ownership is given back to the mm.
>>        */
>> +    struct rb_node rb;
>>       struct list_head link;
>
> I think we can be slippery here and make this a union? They should be 
> mutually exclusive AFAICT?

AFAIK, we need both, rb_node rb is for handling free blocks and the 
list_head link for adding the allocated blocks into

the driver's list.

>
>>       struct list_head tmp_link;
>
> Otherwise it should be possible to get rid of this instead, and just 
> re-use link? Could be done as separate patch, if this makes sense.

I think we cannot use link here since it is needed to add the allocated 
blocks to the driver's list and tmp_link is already used in

alloc_range() and alloc_range_bias().

Regards,

Arun.

>
>>   };
>> @@ -68,7 +105,7 @@ struct drm_buddy_block {
>>    */
>>   struct drm_buddy {
>>       /* Maintain a free list for each order. */
>> -    struct list_head *free_list;
>> +    struct rb_root *free_tree;
>>         /*
>>        * Maintain explicit binary tree(s) to track the allocation of the
>


More information about the amd-gfx mailing list