[PATCH v2 1/2] drm/buddy: Optimize free block management with RB tree

Arunpravin Paneer Selvam arunpravin.paneerselvam at amd.com
Wed Aug 20 10:37:16 UTC 2025


On 8/20/2025 3:44 PM, Matthew Auld wrote:
> On 20/08/2025 08:56, Arunpravin Paneer Selvam wrote:
>> Hi Matthew,
>>
>> On 8/14/2025 4:15 PM, Matthew Auld wrote:
>>> On 24/07/2025 11:46, Arunpravin Paneer Selvam wrote:
>>>> Replace the freelist (O(n)) used for free block management with a
>>>> red-black tree, providing more efficient O(log n) search, insert,
>>>> and delete operations. This improves scalability and performance
>>>> when managing large numbers of free blocks per order (e.g., hundreds
>>>> or thousands).
>>>>
>>>> In the VK-CTS memory stress subtest, the buddy manager merges
>>>> fragmented memory and inserts freed blocks into the freelist. Since
>>>> freelist insertion is O(n), this becomes a bottleneck as fragmentation
>>>> increases. Benchmarking shows list_insert_sorted() consumes ~52.69% 
>>>> CPU
>>>> with the freelist, compared to just 0.03% with the RB tree
>>>> (rbtree_insert.isra.0), despite performing the same sorted insert.
>>>>
>>>> This also improves performance in heavily fragmented workloads,
>>>> such as games or graphics tests that stress memory.
>>>
>>> Neat. Also please Cc intel-gfx at lists.freedesktop.org and intel- 
>>> xe at lists.freedesktop.org on the next revision so our CI can pick 
>>> this up.
>> Sure, I will add on v3.
>>>
>>>>
>>>> Signed-off-by: Arunpravin Paneer Selvam 
>>>> <Arunpravin.PaneerSelvam at amd.com>
>>>> ---
>>>>   drivers/gpu/drm/drm_buddy.c | 141 
>>>> +++++++++++++++++++++++-------------
>>>>   include/drm/drm_buddy.h     |  39 +++++++++-
>>>>   2 files changed, 128 insertions(+), 52 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>>>> index a1e652b7631d..19e9773b41be 100644
>>>> --- a/drivers/gpu/drm/drm_buddy.c
>>>> +++ b/drivers/gpu/drm/drm_buddy.c
>>>> @@ -31,6 +31,8 @@ static struct drm_buddy_block 
>>>> *drm_block_alloc(struct drm_buddy *mm,
>>>>       block->header |= order;
>>>>       block->parent = parent;
>>>>   +    RB_CLEAR_NODE(&block->rb);
>>>> +
>>>>       BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
>>>>       return block;
>>>>   }
>>>> @@ -41,23 +43,53 @@ static void drm_block_free(struct drm_buddy *mm,
>>>>       kmem_cache_free(slab_blocks, block);
>>>>   }
>>>>   -static void list_insert_sorted(struct drm_buddy *mm,
>>>> -                   struct drm_buddy_block *block)
>>>> +static void rbtree_insert(struct drm_buddy *mm,
>>>> +              struct drm_buddy_block *block)
>>>>   {
>>>> +    struct rb_root *root = &mm- 
>>>> >free_tree[drm_buddy_block_order(block)];
>>>> +    struct rb_node **link = &root->rb_node;
>>>> +    struct rb_node *parent = NULL;
>>>>       struct drm_buddy_block *node;
>>>> -    struct list_head *head;
>>>> +    u64 offset;
>>>> +
>>>> +    offset = drm_buddy_block_offset(block);
>>>>   -    head = &mm->free_list[drm_buddy_block_order(block)];
>>>> -    if (list_empty(head)) {
>>>> -        list_add(&block->link, head);
>>>> -        return;
>>>> +    while (*link) {
>>>> +        parent = *link;
>>>> +        node = rb_entry(parent, struct drm_buddy_block, rb);
>>>> +
>>>> +        if (offset < drm_buddy_block_offset(node))
>>>> +            link = &parent->rb_left;
>>>> +        else
>>>> +            link = &parent->rb_right;
>>>>       }
>>>>   -    list_for_each_entry(node, head, link)
>>>> -        if (drm_buddy_block_offset(block) < 
>>>> drm_buddy_block_offset(node))
>>>> -            break;
>>>> +    rb_link_node(&block->rb, parent, link);
>>>> +    rb_insert_color(&block->rb, root);
>>>> +}
>>>> +
>>>> +static void rbtree_remove(struct drm_buddy *mm,
>>>> +              struct drm_buddy_block *block)
>>>> +{
>>>> +    struct rb_root *root;
>>>>   -    __list_add(&block->link, node->link.prev, &node->link);
>>>> +    root = &mm->free_tree[drm_buddy_block_order(block)];
>>>> +    rb_erase(&block->rb, root);
>>>> +
>>>> +    RB_CLEAR_NODE(&block->rb);
>>>> +}
>>>> +
>>>> +static inline struct drm_buddy_block *
>>>> +rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
>>>> +{
>>>> +    struct rb_node *node = rb_last(&mm->free_tree[order]);
>>>> +
>>>> +    return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
>>>> +}
>>>> +
>>>> +static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
>>>> +{
>>>> +    return RB_EMPTY_ROOT(&mm->free_tree[order]);
>>>>   }
>>>>     static void clear_reset(struct drm_buddy_block *block)
>>>> @@ -70,12 +102,13 @@ static void mark_cleared(struct 
>>>> drm_buddy_block *block)
>>>>       block->header |= DRM_BUDDY_HEADER_CLEAR;
>>>>   }
>>>>   -static void mark_allocated(struct drm_buddy_block *block)
>>>> +static void mark_allocated(struct drm_buddy *mm,
>>>> +               struct drm_buddy_block *block)
>>>>   {
>>>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>>>>       block->header |= DRM_BUDDY_ALLOCATED;
>>>>   -    list_del(&block->link);
>>>> +    rbtree_remove(mm, block);
>>>>   }
>>>>     static void mark_free(struct drm_buddy *mm,
>>>> @@ -84,15 +117,16 @@ static void mark_free(struct drm_buddy *mm,
>>>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>>>>       block->header |= DRM_BUDDY_FREE;
>>>>   -    list_insert_sorted(mm, block);
>>>> +    rbtree_insert(mm, block);
>>>>   }
>>>>   -static void mark_split(struct drm_buddy_block *block)
>>>> +static void mark_split(struct drm_buddy *mm,
>>>> +               struct drm_buddy_block *block)
>>>>   {
>>>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>>>>       block->header |= DRM_BUDDY_SPLIT;
>>>>   -    list_del(&block->link);
>>>> +    rbtree_remove(mm, block);
>>>>   }
>>>>     static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>>>> @@ -148,7 +182,7 @@ static unsigned int __drm_buddy_free(struct 
>>>> drm_buddy *mm,
>>>>                   mark_cleared(parent);
>>>>           }
>>>>   -        list_del(&buddy->link);
>>>> +        rbtree_remove(mm, buddy);
>>>>           if (force_merge && drm_buddy_block_is_clear(buddy))
>>>>               mm->clear_avail -= drm_buddy_block_size(mm, buddy);
>>>>   @@ -179,12 +213,17 @@ static int __force_merge(struct drm_buddy *mm,
>>>>           return -EINVAL;
>>>>         for (i = min_order - 1; i >= 0; i--) {
>>>> -        struct drm_buddy_block *block, *prev;
>>>> +        struct drm_buddy_block *block, *prev_block, *first_block;
>>>> +
>>>> +        first_block = rb_entry(rb_first(&mm->free_tree[i]), struct 
>>>> drm_buddy_block, rb);
>>>>   -        list_for_each_entry_safe_reverse(block, prev, &mm- 
>>>> >free_list[i], link) {
>>>> +        for_each_rb_entry_reverse_safe(block, prev_block, &mm- 
>>>> >free_tree[i], rb) {
>>>>               struct drm_buddy_block *buddy;
>>>>               u64 block_start, block_end;
>>>>   +            if (RB_EMPTY_NODE(&block->rb))
>>>> +                break;
>>>
>>> If we got the block from the rb tree, can it be empty here?
>>
>> I saw a crash earlier without this check while running graphics 
>> workloads, but it is not happening now. I will
>>
>> test with more workloads and remove it if everything looks fine.
>>
>>>
>>>> +
>>>>               if (!block->parent)
>>>>                   continue;
>>>>   @@ -206,10 +245,14 @@ static int __force_merge(struct drm_buddy *mm,
>>>>                * block in the next iteration as we would free the
>>>>                * buddy block as part of the free function.
>>>>                */
>>>> -            if (prev == buddy)
>>>> -                prev = list_prev_entry(prev, link);
>>>> +            if (prev_block && prev_block == buddy) {
>>>> +                if (prev_block != first_block)
>>>> +                    prev_block = rb_entry(rb_prev(&prev_block->rb),
>>>> +                                  struct drm_buddy_block,
>>>> +                                  rb);
>>>> +            }
>>>>   -            list_del(&block->link);
>>>> +            rbtree_remove(mm, block);
>>>>               if (drm_buddy_block_is_clear(block))
>>>>                   mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>>   @@ -258,14 +301,14 @@ int drm_buddy_init(struct drm_buddy *mm, 
>>>> u64 size, u64 chunk_size)
>>>>         BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
>>>>   -    mm->free_list = kmalloc_array(mm->max_order + 1,
>>>> -                      sizeof(struct list_head),
>>>> +    mm->free_tree = kmalloc_array(mm->max_order + 1,
>>>> +                      sizeof(struct rb_root),
>>>>                         GFP_KERNEL);
>>>> -    if (!mm->free_list)
>>>> +    if (!mm->free_tree)
>>>>           return -ENOMEM;
>>>>         for (i = 0; i <= mm->max_order; ++i)
>>>> -        INIT_LIST_HEAD(&mm->free_list[i]);
>>>> +        mm->free_tree[i] = RB_ROOT;
>>>>         mm->n_roots = hweight64(size);
>>>>   @@ -273,7 +316,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 
>>>> size, u64 chunk_size)
>>>>                     sizeof(struct drm_buddy_block *),
>>>>                     GFP_KERNEL);
>>>>       if (!mm->roots)
>>>> -        goto out_free_list;
>>>> +        goto out_free_tree;
>>>>         offset = 0;
>>>>       i = 0;
>>>> @@ -312,8 +355,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64 
>>>> size, u64 chunk_size)
>>>>       while (i--)
>>>>           drm_block_free(mm, mm->roots[i]);
>>>>       kfree(mm->roots);
>>>> -out_free_list:
>>>> -    kfree(mm->free_list);
>>>> +out_free_tree:
>>>> +    kfree(mm->free_tree);
>>>>       return -ENOMEM;
>>>>   }
>>>>   EXPORT_SYMBOL(drm_buddy_init);
>>>> @@ -323,7 +366,7 @@ EXPORT_SYMBOL(drm_buddy_init);
>>>>    *
>>>>    * @mm: DRM buddy manager to free
>>>>    *
>>>> - * Cleanup memory manager resources and the freelist
>>>> + * Cleanup memory manager resources and the freetree
>>>>    */
>>>>   void drm_buddy_fini(struct drm_buddy *mm)
>>>>   {
>>>> @@ -350,7 +393,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
>>>>       WARN_ON(mm->avail != mm->size);
>>>>         kfree(mm->roots);
>>>> -    kfree(mm->free_list);
>>>> +    kfree(mm->free_tree);
>>>>   }
>>>>   EXPORT_SYMBOL(drm_buddy_fini);
>>>>   @@ -383,7 +426,7 @@ static int split_block(struct drm_buddy *mm,
>>>>           clear_reset(block);
>>>>       }
>>>>   -    mark_split(block);
>>>> +    mark_split(mm, block);
>>>>         return 0;
>>>>   }
>>>> @@ -598,7 +641,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int 
>>>> order,
>>>>       for (i = order; i <= mm->max_order; ++i) {
>>>>           struct drm_buddy_block *tmp_block;
>>>>   -        list_for_each_entry_reverse(tmp_block, 
>>>> &mm->free_list[i], link) {
>>>> +        for_each_rb_entry_reverse(tmp_block, &mm->free_tree[i], rb) {
>>>>               if (block_incompatible(tmp_block, flags))
>>>>                   continue;
>>>>   @@ -624,7 +667,7 @@ get_maxblock(struct drm_buddy *mm, unsigned 
>>>> int order,
>>>>   }
>>>>     static struct drm_buddy_block *
>>>> -alloc_from_freelist(struct drm_buddy *mm,
>>>> +alloc_from_freetree(struct drm_buddy *mm,
>>>>               unsigned int order,
>>>>               unsigned long flags)
>>>>   {
>>>> @@ -641,7 +684,7 @@ alloc_from_freelist(struct drm_buddy *mm,
>>>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>>>               struct drm_buddy_block *tmp_block;
>>>>   -            list_for_each_entry_reverse(tmp_block, &mm- 
>>>> >free_list[tmp], link) {
>>>> +            for_each_rb_entry_reverse(tmp_block, &mm- 
>>>> >free_tree[tmp], rb) {
>>>>                   if (block_incompatible(tmp_block, flags))
>>>>                       continue;
>>>>   @@ -657,10 +700,8 @@ alloc_from_freelist(struct drm_buddy *mm,
>>>>       if (!block) {
>>>>           /* Fallback method */
>>>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>>> -            if (!list_empty(&mm->free_list[tmp])) {
>>>> -                block = list_last_entry(&mm->free_list[tmp],
>>>> -                            struct drm_buddy_block,
>>>> -                            link);
>>>> +            if (!rbtree_is_empty(mm, tmp)) {
>>>> +                block = rbtree_last_entry(mm, tmp);
>>>>                   if (block)
>>>>                       break;
>>>>               }
>>>> @@ -728,7 +769,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>>>             if (contains(start, end, block_start, block_end)) {
>>>>               if (drm_buddy_block_is_free(block)) {
>>>> -                mark_allocated(block);
>>>> +                mark_allocated(mm, block);
>>>>                   total_allocated += drm_buddy_block_size(mm, block);
>>>>                   mm->avail -= drm_buddy_block_size(mm, block);
>>>>                   if (drm_buddy_block_is_clear(block))
>>>> @@ -806,7 +847,6 @@ static int __alloc_contig_try_harder(struct 
>>>> drm_buddy *mm,
>>>>   {
>>>>       u64 rhs_offset, lhs_offset, lhs_size, filled;
>>>>       struct drm_buddy_block *block;
>>>> -    struct list_head *list;
>>>>       LIST_HEAD(blocks_lhs);
>>>>       unsigned long pages;
>>>>       unsigned int order;
>>>> @@ -819,11 +859,10 @@ static int __alloc_contig_try_harder(struct 
>>>> drm_buddy *mm,
>>>>       if (order == 0)
>>>>           return -ENOSPC;
>>>>   -    list = &mm->free_list[order];
>>>> -    if (list_empty(list))
>>>> +    if (rbtree_is_empty(mm, order))
>>>>           return -ENOSPC;
>>>>   -    list_for_each_entry_reverse(block, list, link) {
>>>> +    for_each_rb_entry_reverse(block, &mm->free_tree[order], rb) {
>>>>           /* Allocate blocks traversing RHS */
>>>>           rhs_offset = drm_buddy_block_offset(block);
>>>>           err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
>>>> @@ -933,7 +972,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>>>       list_add(&block->tmp_link, &dfs);
>>>>       err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, 
>>>> NULL);
>>>>       if (err) {
>>>> -        mark_allocated(block);
>>>> +        mark_allocated(mm, block);
>>>>           mm->avail -= drm_buddy_block_size(mm, block);
>>>>           if (drm_buddy_block_is_clear(block))
>>>>               mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>> @@ -956,8 +995,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>>           return  __drm_buddy_alloc_range_bias(mm, start, end,
>>>>                                order, flags);
>>>>       else
>>>> -        /* Allocate from freelist */
>>>> -        return alloc_from_freelist(mm, order, flags);
>>>> +        /* Allocate from freetree */
>>>> +        return alloc_from_freetree(mm, order, flags);
>>>>   }
>>>>     /**
>>>> @@ -974,8 +1013,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>>    * alloc_range_bias() called on range limitations, which traverses
>>>>    * the tree and returns the desired block.
>>>>    *
>>>> - * alloc_from_freelist() called when *no* range restrictions
>>>> - * are enforced, which picks the block from the freelist.
>>>> + * alloc_from_freetree() called when *no* range restrictions
>>>> + * are enforced, which picks the block from the freetree.
>>>>    *
>>>>    * Returns:
>>>>    * 0 on success, error code on failure.
>>>> @@ -1077,7 +1116,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>>               }
>>>>           } while (1);
>>>>   -        mark_allocated(block);
>>>> +        mark_allocated(mm, block);
>>>>           mm->avail -= drm_buddy_block_size(mm, block);
>>>>           if (drm_buddy_block_is_clear(block))
>>>>               mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>> @@ -1161,7 +1200,7 @@ void drm_buddy_print(struct drm_buddy *mm, 
>>>> struct drm_printer *p)
>>>>           struct drm_buddy_block *block;
>>>>           u64 count = 0, free;
>>>>   -        list_for_each_entry(block, &mm->free_list[order], link) {
>>>> +        for_each_rb_entry(block, &mm->free_tree[order], rb) {
>>>>               BUG_ON(!drm_buddy_block_is_free(block));
>>>>               count++;
>>>>           }
>>>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>>>> index 9689a7c5dd36..a64d108a33b7 100644
>>>> --- a/include/drm/drm_buddy.h
>>>> +++ b/include/drm/drm_buddy.h
>>>> @@ -10,6 +10,7 @@
>>>>   #include <linux/list.h>
>>>>   #include <linux/slab.h>
>>>>   #include <linux/sched.h>
>>>> +#include <linux/rbtree.h>
>>>>     #include <drm/drm_print.h>
>>>>   @@ -22,6 +23,41 @@
>>>>       start__ >= max__ || size__ > max__ - start__; \
>>>>   })
>>>>   +/*
>>>> + * for_each_rb_entry() - iterate over an RB tree in order
>>>> + * @pos:    the struct type * to use as a loop cursor
>>>> + * @root:    pointer to struct rb_root to iterate
>>>> + * @member:    name of the rb_node field within the struct
>>>> + */
>>>> +#define for_each_rb_entry(pos, root, member) \
>>>> +    for (pos = rb_entry_safe(rb_first(root), typeof(*pos), member); \
>>>> +         pos; \
>>>> +         pos = rb_entry_safe(rb_next(&(pos)->member), 
>>>> typeof(*pos), member))
>>>> +
>>>> +/*
>>>> + * for_each_rb_entry_reverse() - iterate over an RB tree in 
>>>> reverse order
>>>> + * @pos:    the struct type * to use as a loop cursor
>>>> + * @root:    pointer to struct rb_root to iterate
>>>> + * @member:    name of the rb_node field within the struct
>>>> + */
>>>> +#define for_each_rb_entry_reverse(pos, root, member) \
>>>> +    for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member); \
>>>> +         pos; \
>>>> +         pos = rb_entry_safe(rb_prev(&(pos)->member), 
>>>> typeof(*pos), member))
>>>> +
>>>> +/**
>>>> + * for_each_rb_entry_reverse_safe() - safely iterate over an RB 
>>>> tree in reverse order
>>>> + * @pos:    the struct type * to use as a loop cursor.
>>>> + * @n:        another struct type * to use as temporary storage.
>>>> + * @root:    pointer to struct rb_root to iterate.
>>>> + * @member:    name of the rb_node field within the struct.
>>>> + */
>>>> +#define for_each_rb_entry_reverse_safe(pos, n, root, member) \
>>>
>>> Would it make sense to give these a less generic name? Something 
>>> like for_each_rb_free_block_* ?
>>>
>>> Also should this be exported or rather kept within .c?
>> yes, I will change it.
>>>
>>>> +    for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member), \
>>>> +         n = pos ? rb_entry_safe(rb_prev(&(pos)->member), 
>>>> typeof(*pos), member) : NULL; \
>>>> +         pos; \
>>>> +         pos = n, n = pos ? rb_entry_safe(rb_prev(&(pos)->member), 
>>>> typeof(*pos), member) : NULL)
>>>
>>>
>>>> +
>>>>   #define DRM_BUDDY_RANGE_ALLOCATION        BIT(0)
>>>>   #define DRM_BUDDY_TOPDOWN_ALLOCATION        BIT(1)
>>>>   #define DRM_BUDDY_CONTIGUOUS_ALLOCATION        BIT(2)
>>>> @@ -53,6 +89,7 @@ struct drm_buddy_block {
>>>>        * a list, if so desired. As soon as the block is freed with
>>>>        * drm_buddy_free* ownership is given back to the mm.
>>>>        */
>>>> +    struct rb_node rb;
>>>>       struct list_head link;
>>>
>>> I think we can be slippery here and make this a union? They should 
>>> be mutually exclusive AFAICT?
>>
>> AFAIK, we need both, rb_node rb is for handling free blocks and the 
>> list_head link for adding the allocated blocks into
>>
>> the driver's list.
>
> At a given time I think it is either on the free rb or user block 
> list, not both at the same time, given that a block can't be free and 
> allocated? Just thinking if there is an easy way to trim the size a 
> bit, given that we are adding an entire rb_node, and there can be many 
> of these of these blocks around.
yes, you are right. I will add union for free rb and user block list.
>
>>
>>>
>>>>       struct list_head tmp_link;
>>>
>>> Otherwise it should be possible to get rid of this instead, and just 
>>> re-use link? Could be done as separate patch, if this makes sense.
>>
>> I think we cannot use link here since it is needed to add the 
>> allocated blocks to the driver's list and tmp_link is already used in
>>
>> alloc_range() and alloc_range_bias().
>
> Yeah, I don't this will work, but union might still be an option instead.

sure, we can make union for rb and list.

Regards,

Arun.

>
>
>>
>> Regards,
>>
>> Arun.
>>
>>>
>>>>   };
>>>> @@ -68,7 +105,7 @@ struct drm_buddy_block {
>>>>    */
>>>>   struct drm_buddy {
>>>>       /* Maintain a free list for each order. */
>>>> -    struct list_head *free_list;
>>>> +    struct rb_root *free_tree;
>>>>         /*
>>>>        * Maintain explicit binary tree(s) to track the allocation 
>>>> of the
>>>
>


More information about the amd-gfx mailing list