[PATCH v2 1/2] drm/buddy: Optimize free block management with RB tree
Arunpravin Paneer Selvam
arunpravin.paneerselvam at amd.com
Wed Aug 20 07:56:56 UTC 2025
Hi Matthew,
On 8/14/2025 4:15 PM, Matthew Auld wrote:
> On 24/07/2025 11:46, Arunpravin Paneer Selvam wrote:
>> Replace the freelist (O(n)) used for free block management with a
>> red-black tree, providing more efficient O(log n) search, insert,
>> and delete operations. This improves scalability and performance
>> when managing large numbers of free blocks per order (e.g., hundreds
>> or thousands).
>>
>> In the VK-CTS memory stress subtest, the buddy manager merges
>> fragmented memory and inserts freed blocks into the freelist. Since
>> freelist insertion is O(n), this becomes a bottleneck as fragmentation
>> increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
>> with the freelist, compared to just 0.03% with the RB tree
>> (rbtree_insert.isra.0), despite performing the same sorted insert.
>>
>> This also improves performance in heavily fragmented workloads,
>> such as games or graphics tests that stress memory.
>
> Neat. Also please Cc intel-gfx at lists.freedesktop.org and
> intel-xe at lists.freedesktop.org on the next revision so our CI can pick
> this up.
Sure, I will add on v3.
>
>>
>> Signed-off-by: Arunpravin Paneer Selvam
>> <Arunpravin.PaneerSelvam at amd.com>
>> ---
>> drivers/gpu/drm/drm_buddy.c | 141 +++++++++++++++++++++++-------------
>> include/drm/drm_buddy.h | 39 +++++++++-
>> 2 files changed, 128 insertions(+), 52 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index a1e652b7631d..19e9773b41be 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -31,6 +31,8 @@ static struct drm_buddy_block
>> *drm_block_alloc(struct drm_buddy *mm,
>> block->header |= order;
>> block->parent = parent;
>> + RB_CLEAR_NODE(&block->rb);
>> +
>> BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
>> return block;
>> }
>> @@ -41,23 +43,53 @@ static void drm_block_free(struct drm_buddy *mm,
>> kmem_cache_free(slab_blocks, block);
>> }
>> -static void list_insert_sorted(struct drm_buddy *mm,
>> - struct drm_buddy_block *block)
>> +static void rbtree_insert(struct drm_buddy *mm,
>> + struct drm_buddy_block *block)
>> {
>> + struct rb_root *root =
>> &mm->free_tree[drm_buddy_block_order(block)];
>> + struct rb_node **link = &root->rb_node;
>> + struct rb_node *parent = NULL;
>> struct drm_buddy_block *node;
>> - struct list_head *head;
>> + u64 offset;
>> +
>> + offset = drm_buddy_block_offset(block);
>> - head = &mm->free_list[drm_buddy_block_order(block)];
>> - if (list_empty(head)) {
>> - list_add(&block->link, head);
>> - return;
>> + while (*link) {
>> + parent = *link;
>> + node = rb_entry(parent, struct drm_buddy_block, rb);
>> +
>> + if (offset < drm_buddy_block_offset(node))
>> + link = &parent->rb_left;
>> + else
>> + link = &parent->rb_right;
>> }
>> - list_for_each_entry(node, head, link)
>> - if (drm_buddy_block_offset(block) <
>> drm_buddy_block_offset(node))
>> - break;
>> + rb_link_node(&block->rb, parent, link);
>> + rb_insert_color(&block->rb, root);
>> +}
>> +
>> +static void rbtree_remove(struct drm_buddy *mm,
>> + struct drm_buddy_block *block)
>> +{
>> + struct rb_root *root;
>> - __list_add(&block->link, node->link.prev, &node->link);
>> + root = &mm->free_tree[drm_buddy_block_order(block)];
>> + rb_erase(&block->rb, root);
>> +
>> + RB_CLEAR_NODE(&block->rb);
>> +}
>> +
>> +static inline struct drm_buddy_block *
>> +rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
>> +{
>> + struct rb_node *node = rb_last(&mm->free_tree[order]);
>> +
>> + return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
>> +}
>> +
>> +static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
>> +{
>> + return RB_EMPTY_ROOT(&mm->free_tree[order]);
>> }
>> static void clear_reset(struct drm_buddy_block *block)
>> @@ -70,12 +102,13 @@ static void mark_cleared(struct drm_buddy_block
>> *block)
>> block->header |= DRM_BUDDY_HEADER_CLEAR;
>> }
>> -static void mark_allocated(struct drm_buddy_block *block)
>> +static void mark_allocated(struct drm_buddy *mm,
>> + struct drm_buddy_block *block)
>> {
>> block->header &= ~DRM_BUDDY_HEADER_STATE;
>> block->header |= DRM_BUDDY_ALLOCATED;
>> - list_del(&block->link);
>> + rbtree_remove(mm, block);
>> }
>> static void mark_free(struct drm_buddy *mm,
>> @@ -84,15 +117,16 @@ static void mark_free(struct drm_buddy *mm,
>> block->header &= ~DRM_BUDDY_HEADER_STATE;
>> block->header |= DRM_BUDDY_FREE;
>> - list_insert_sorted(mm, block);
>> + rbtree_insert(mm, block);
>> }
>> -static void mark_split(struct drm_buddy_block *block)
>> +static void mark_split(struct drm_buddy *mm,
>> + struct drm_buddy_block *block)
>> {
>> block->header &= ~DRM_BUDDY_HEADER_STATE;
>> block->header |= DRM_BUDDY_SPLIT;
>> - list_del(&block->link);
>> + rbtree_remove(mm, block);
>> }
>> static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>> @@ -148,7 +182,7 @@ static unsigned int __drm_buddy_free(struct
>> drm_buddy *mm,
>> mark_cleared(parent);
>> }
>> - list_del(&buddy->link);
>> + rbtree_remove(mm, buddy);
>> if (force_merge && drm_buddy_block_is_clear(buddy))
>> mm->clear_avail -= drm_buddy_block_size(mm, buddy);
>> @@ -179,12 +213,17 @@ static int __force_merge(struct drm_buddy *mm,
>> return -EINVAL;
>> for (i = min_order - 1; i >= 0; i--) {
>> - struct drm_buddy_block *block, *prev;
>> + struct drm_buddy_block *block, *prev_block, *first_block;
>> +
>> + first_block = rb_entry(rb_first(&mm->free_tree[i]), struct
>> drm_buddy_block, rb);
>> - list_for_each_entry_safe_reverse(block, prev,
>> &mm->free_list[i], link) {
>> + for_each_rb_entry_reverse_safe(block, prev_block,
>> &mm->free_tree[i], rb) {
>> struct drm_buddy_block *buddy;
>> u64 block_start, block_end;
>> + if (RB_EMPTY_NODE(&block->rb))
>> + break;
>
> If we got the block from the rb tree, can it be empty here?
I saw a crash earlier without this check while running graphics
workloads, but it is not happening now. I will
test with more workloads and remove it if everything looks fine.
>
>> +
>> if (!block->parent)
>> continue;
>> @@ -206,10 +245,14 @@ static int __force_merge(struct drm_buddy *mm,
>> * block in the next iteration as we would free the
>> * buddy block as part of the free function.
>> */
>> - if (prev == buddy)
>> - prev = list_prev_entry(prev, link);
>> + if (prev_block && prev_block == buddy) {
>> + if (prev_block != first_block)
>> + prev_block = rb_entry(rb_prev(&prev_block->rb),
>> + struct drm_buddy_block,
>> + rb);
>> + }
>> - list_del(&block->link);
>> + rbtree_remove(mm, block);
>> if (drm_buddy_block_is_clear(block))
>> mm->clear_avail -= drm_buddy_block_size(mm, block);
>> @@ -258,14 +301,14 @@ int drm_buddy_init(struct drm_buddy *mm, u64
>> size, u64 chunk_size)
>> BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
>> - mm->free_list = kmalloc_array(mm->max_order + 1,
>> - sizeof(struct list_head),
>> + mm->free_tree = kmalloc_array(mm->max_order + 1,
>> + sizeof(struct rb_root),
>> GFP_KERNEL);
>> - if (!mm->free_list)
>> + if (!mm->free_tree)
>> return -ENOMEM;
>> for (i = 0; i <= mm->max_order; ++i)
>> - INIT_LIST_HEAD(&mm->free_list[i]);
>> + mm->free_tree[i] = RB_ROOT;
>> mm->n_roots = hweight64(size);
>> @@ -273,7 +316,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64
>> size, u64 chunk_size)
>> sizeof(struct drm_buddy_block *),
>> GFP_KERNEL);
>> if (!mm->roots)
>> - goto out_free_list;
>> + goto out_free_tree;
>> offset = 0;
>> i = 0;
>> @@ -312,8 +355,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64
>> size, u64 chunk_size)
>> while (i--)
>> drm_block_free(mm, mm->roots[i]);
>> kfree(mm->roots);
>> -out_free_list:
>> - kfree(mm->free_list);
>> +out_free_tree:
>> + kfree(mm->free_tree);
>> return -ENOMEM;
>> }
>> EXPORT_SYMBOL(drm_buddy_init);
>> @@ -323,7 +366,7 @@ EXPORT_SYMBOL(drm_buddy_init);
>> *
>> * @mm: DRM buddy manager to free
>> *
>> - * Cleanup memory manager resources and the freelist
>> + * Cleanup memory manager resources and the freetree
>> */
>> void drm_buddy_fini(struct drm_buddy *mm)
>> {
>> @@ -350,7 +393,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
>> WARN_ON(mm->avail != mm->size);
>> kfree(mm->roots);
>> - kfree(mm->free_list);
>> + kfree(mm->free_tree);
>> }
>> EXPORT_SYMBOL(drm_buddy_fini);
>> @@ -383,7 +426,7 @@ static int split_block(struct drm_buddy *mm,
>> clear_reset(block);
>> }
>> - mark_split(block);
>> + mark_split(mm, block);
>> return 0;
>> }
>> @@ -598,7 +641,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int
>> order,
>> for (i = order; i <= mm->max_order; ++i) {
>> struct drm_buddy_block *tmp_block;
>> - list_for_each_entry_reverse(tmp_block, &mm->free_list[i],
>> link) {
>> + for_each_rb_entry_reverse(tmp_block, &mm->free_tree[i], rb) {
>> if (block_incompatible(tmp_block, flags))
>> continue;
>> @@ -624,7 +667,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int
>> order,
>> }
>> static struct drm_buddy_block *
>> -alloc_from_freelist(struct drm_buddy *mm,
>> +alloc_from_freetree(struct drm_buddy *mm,
>> unsigned int order,
>> unsigned long flags)
>> {
>> @@ -641,7 +684,7 @@ alloc_from_freelist(struct drm_buddy *mm,
>> for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> struct drm_buddy_block *tmp_block;
>> - list_for_each_entry_reverse(tmp_block,
>> &mm->free_list[tmp], link) {
>> + for_each_rb_entry_reverse(tmp_block,
>> &mm->free_tree[tmp], rb) {
>> if (block_incompatible(tmp_block, flags))
>> continue;
>> @@ -657,10 +700,8 @@ alloc_from_freelist(struct drm_buddy *mm,
>> if (!block) {
>> /* Fallback method */
>> for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> - if (!list_empty(&mm->free_list[tmp])) {
>> - block = list_last_entry(&mm->free_list[tmp],
>> - struct drm_buddy_block,
>> - link);
>> + if (!rbtree_is_empty(mm, tmp)) {
>> + block = rbtree_last_entry(mm, tmp);
>> if (block)
>> break;
>> }
>> @@ -728,7 +769,7 @@ static int __alloc_range(struct drm_buddy *mm,
>> if (contains(start, end, block_start, block_end)) {
>> if (drm_buddy_block_is_free(block)) {
>> - mark_allocated(block);
>> + mark_allocated(mm, block);
>> total_allocated += drm_buddy_block_size(mm, block);
>> mm->avail -= drm_buddy_block_size(mm, block);
>> if (drm_buddy_block_is_clear(block))
>> @@ -806,7 +847,6 @@ static int __alloc_contig_try_harder(struct
>> drm_buddy *mm,
>> {
>> u64 rhs_offset, lhs_offset, lhs_size, filled;
>> struct drm_buddy_block *block;
>> - struct list_head *list;
>> LIST_HEAD(blocks_lhs);
>> unsigned long pages;
>> unsigned int order;
>> @@ -819,11 +859,10 @@ static int __alloc_contig_try_harder(struct
>> drm_buddy *mm,
>> if (order == 0)
>> return -ENOSPC;
>> - list = &mm->free_list[order];
>> - if (list_empty(list))
>> + if (rbtree_is_empty(mm, order))
>> return -ENOSPC;
>> - list_for_each_entry_reverse(block, list, link) {
>> + for_each_rb_entry_reverse(block, &mm->free_tree[order], rb) {
>> /* Allocate blocks traversing RHS */
>> rhs_offset = drm_buddy_block_offset(block);
>> err = __drm_buddy_alloc_range(mm, rhs_offset, size,
>> @@ -933,7 +972,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>> list_add(&block->tmp_link, &dfs);
>> err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
>> if (err) {
>> - mark_allocated(block);
>> + mark_allocated(mm, block);
>> mm->avail -= drm_buddy_block_size(mm, block);
>> if (drm_buddy_block_is_clear(block))
>> mm->clear_avail -= drm_buddy_block_size(mm, block);
>> @@ -956,8 +995,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> return __drm_buddy_alloc_range_bias(mm, start, end,
>> order, flags);
>> else
>> - /* Allocate from freelist */
>> - return alloc_from_freelist(mm, order, flags);
>> + /* Allocate from freetree */
>> + return alloc_from_freetree(mm, order, flags);
>> }
>> /**
>> @@ -974,8 +1013,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> * alloc_range_bias() called on range limitations, which traverses
>> * the tree and returns the desired block.
>> *
>> - * alloc_from_freelist() called when *no* range restrictions
>> - * are enforced, which picks the block from the freelist.
>> + * alloc_from_freetree() called when *no* range restrictions
>> + * are enforced, which picks the block from the freetree.
>> *
>> * Returns:
>> * 0 on success, error code on failure.
>> @@ -1077,7 +1116,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> }
>> } while (1);
>> - mark_allocated(block);
>> + mark_allocated(mm, block);
>> mm->avail -= drm_buddy_block_size(mm, block);
>> if (drm_buddy_block_is_clear(block))
>> mm->clear_avail -= drm_buddy_block_size(mm, block);
>> @@ -1161,7 +1200,7 @@ void drm_buddy_print(struct drm_buddy *mm,
>> struct drm_printer *p)
>> struct drm_buddy_block *block;
>> u64 count = 0, free;
>> - list_for_each_entry(block, &mm->free_list[order], link) {
>> + for_each_rb_entry(block, &mm->free_tree[order], rb) {
>> BUG_ON(!drm_buddy_block_is_free(block));
>> count++;
>> }
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index 9689a7c5dd36..a64d108a33b7 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -10,6 +10,7 @@
>> #include <linux/list.h>
>> #include <linux/slab.h>
>> #include <linux/sched.h>
>> +#include <linux/rbtree.h>
>> #include <drm/drm_print.h>
>> @@ -22,6 +23,41 @@
>> start__ >= max__ || size__ > max__ - start__; \
>> })
>> +/*
>> + * for_each_rb_entry() - iterate over an RB tree in order
>> + * @pos: the struct type * to use as a loop cursor
>> + * @root: pointer to struct rb_root to iterate
>> + * @member: name of the rb_node field within the struct
>> + */
>> +#define for_each_rb_entry(pos, root, member) \
>> + for (pos = rb_entry_safe(rb_first(root), typeof(*pos), member); \
>> + pos; \
>> + pos = rb_entry_safe(rb_next(&(pos)->member), typeof(*pos),
>> member))
>> +
>> +/*
>> + * for_each_rb_entry_reverse() - iterate over an RB tree in reverse
>> order
>> + * @pos: the struct type * to use as a loop cursor
>> + * @root: pointer to struct rb_root to iterate
>> + * @member: name of the rb_node field within the struct
>> + */
>> +#define for_each_rb_entry_reverse(pos, root, member) \
>> + for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member); \
>> + pos; \
>> + pos = rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos),
>> member))
>> +
>> +/**
>> + * for_each_rb_entry_reverse_safe() - safely iterate over an RB tree
>> in reverse order
>> + * @pos: the struct type * to use as a loop cursor.
>> + * @n: another struct type * to use as temporary storage.
>> + * @root: pointer to struct rb_root to iterate.
>> + * @member: name of the rb_node field within the struct.
>> + */
>> +#define for_each_rb_entry_reverse_safe(pos, n, root, member) \
>
> Would it make sense to give these a less generic name? Something like
> for_each_rb_free_block_* ?
>
> Also should this be exported or rather kept within .c?
yes, I will change it.
>
>> + for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member), \
>> + n = pos ? rb_entry_safe(rb_prev(&(pos)->member),
>> typeof(*pos), member) : NULL; \
>> + pos; \
>> + pos = n, n = pos ? rb_entry_safe(rb_prev(&(pos)->member),
>> typeof(*pos), member) : NULL)
>
>
>> +
>> #define DRM_BUDDY_RANGE_ALLOCATION BIT(0)
>> #define DRM_BUDDY_TOPDOWN_ALLOCATION BIT(1)
>> #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2)
>> @@ -53,6 +89,7 @@ struct drm_buddy_block {
>> * a list, if so desired. As soon as the block is freed with
>> * drm_buddy_free* ownership is given back to the mm.
>> */
>> + struct rb_node rb;
>> struct list_head link;
>
> I think we can be slippery here and make this a union? They should be
> mutually exclusive AFAICT?
AFAIK, we need both, rb_node rb is for handling free blocks and the
list_head link for adding the allocated blocks into
the driver's list.
>
>> struct list_head tmp_link;
>
> Otherwise it should be possible to get rid of this instead, and just
> re-use link? Could be done as separate patch, if this makes sense.
I think we cannot use link here since it is needed to add the allocated
blocks to the driver's list and tmp_link is already used in
alloc_range() and alloc_range_bias().
Regards,
Arun.
>
>> };
>> @@ -68,7 +105,7 @@ struct drm_buddy_block {
>> */
>> struct drm_buddy {
>> /* Maintain a free list for each order. */
>> - struct list_head *free_list;
>> + struct rb_root *free_tree;
>> /*
>> * Maintain explicit binary tree(s) to track the allocation of the
>
More information about the amd-gfx
mailing list