[PATCH v3 1/2] drm/buddy: Optimize free block management with RB tree

Matthew Auld matthew.auld at intel.com
Fri Aug 22 08:37:13 UTC 2025


On 21/08/2025 16:06, Arunpravin Paneer Selvam wrote:
> Replace the freelist (O(n)) used for free block management with a
> red-black tree, providing more efficient O(log n) search, insert,
> and delete operations. This improves scalability and performance
> when managing large numbers of free blocks per order (e.g., hundreds
> or thousands).
> 
> In the VK-CTS memory stress subtest, the buddy manager merges
> fragmented memory and inserts freed blocks into the freelist. Since
> freelist insertion is O(n), this becomes a bottleneck as fragmentation
> increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
> with the freelist, compared to just 0.03% with the RB tree
> (rbtree_insert.isra.0), despite performing the same sorted insert.
> 
> This also improves performance in heavily fragmented workloads,
> such as games or graphics tests that stress memory.
> 
> v3(Matthew):
>    - Remove RB_EMPTY_NODE check in force_merge function.
>    - Rename rb for loop macros to have less generic names and move to
>      .c file.
>    - Make the rb node rb and link field as union.
> 
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam at amd.com>

CI is reporting a crash in rb_erase when running the drm_buddy kunit, 
somewhere in drm_test_buddy_alloc_clear it seems.

> ---
>   drivers/gpu/drm/drm_buddy.c | 175 +++++++++++++++++++++++++-----------
>   include/drm/drm_buddy.h     |   9 +-
>   2 files changed, 130 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index a94061f373de..92226a46cc2c 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -14,6 +14,41 @@
>   
>   static struct kmem_cache *slab_blocks;
>   
> +/*
> + * for_each_rb_free_block() - iterate over an RB tree in order
> + * @pos:	the struct type * to use as a loop cursor
> + * @root:	pointer to struct rb_root to iterate
> + * @member:	name of the rb_node field within the struct
> + */
> +#define for_each_rb_free_block(pos, root, member) \
> +	for (pos = rb_entry_safe(rb_first(root), typeof(*pos), member); \
> +	     pos; \
> +	     pos = rb_entry_safe(rb_next(&(pos)->member), typeof(*pos), member))
> +
> +/*
> + * for_each_rb_free_block_reverse() - iterate over an RB tree in reverse order
> + * @pos:	the struct type * to use as a loop cursor
> + * @root:	pointer to struct rb_root to iterate
> + * @member:	name of the rb_node field within the struct
> + */
> +#define for_each_rb_free_block_reverse(pos, root, member) \
> +	for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member); \
> +	     pos; \
> +	     pos = rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member))
> +
> +/**
> + * for_each_rb_free_block_reverse_safe() - safely iterate over an RB tree in reverse order
> + * @pos:	the struct type * to use as a loop cursor.
> + * @n:		another struct type * to use as temporary storage.
> + * @root:	pointer to struct rb_root to iterate.
> + * @member:	name of the rb_node field within the struct.
> + */
> +#define for_each_rb_free_block_reverse_safe(pos, n, root, member) \
> +	for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member), \
> +	     n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member) : NULL; \
> +	     pos; \
> +	     pos = n, n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member) : NULL)
> +
>   static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
>   					       struct drm_buddy_block *parent,
>   					       unsigned int order,
> @@ -31,6 +66,8 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
>   	block->header |= order;
>   	block->parent = parent;
>   
> +	RB_CLEAR_NODE(&block->rb);
> +
>   	BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
>   	return block;
>   }
> @@ -41,23 +78,53 @@ static void drm_block_free(struct drm_buddy *mm,
>   	kmem_cache_free(slab_blocks, block);
>   }
>   
> -static void list_insert_sorted(struct drm_buddy *mm,
> -			       struct drm_buddy_block *block)
> +static void rbtree_insert(struct drm_buddy *mm,
> +			  struct drm_buddy_block *block)
>   {
> +	struct rb_root *root = &mm->free_tree[drm_buddy_block_order(block)];
> +	struct rb_node **link = &root->rb_node;
> +	struct rb_node *parent = NULL;
>   	struct drm_buddy_block *node;
> -	struct list_head *head;
> +	u64 offset;
> +
> +	offset = drm_buddy_block_offset(block);
>   
> -	head = &mm->free_list[drm_buddy_block_order(block)];
> -	if (list_empty(head)) {
> -		list_add(&block->link, head);
> -		return;
> +	while (*link) {
> +		parent = *link;
> +		node = rb_entry(parent, struct drm_buddy_block, rb);
> +
> +		if (offset < drm_buddy_block_offset(node))
> +			link = &parent->rb_left;
> +		else
> +			link = &parent->rb_right;
>   	}
>   
> -	list_for_each_entry(node, head, link)
> -		if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node))
> -			break;
> +	rb_link_node(&block->rb, parent, link);
> +	rb_insert_color(&block->rb, root);
> +}
>   
> -	__list_add(&block->link, node->link.prev, &node->link);
> +static void rbtree_remove(struct drm_buddy *mm,
> +			  struct drm_buddy_block *block)
> +{
> +	struct rb_root *root;
> +
> +	root = &mm->free_tree[drm_buddy_block_order(block)];
> +	rb_erase(&block->rb, root);
> +
> +	RB_CLEAR_NODE(&block->rb);
> +}
> +
> +static inline struct drm_buddy_block *
> +rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
> +{
> +	struct rb_node *node = rb_last(&mm->free_tree[order]);
> +
> +	return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
> +}
> +
> +static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
> +{
> +	return RB_EMPTY_ROOT(&mm->free_tree[order]);
>   }
>   
>   static void clear_reset(struct drm_buddy_block *block)
> @@ -70,12 +137,13 @@ static void mark_cleared(struct drm_buddy_block *block)
>   	block->header |= DRM_BUDDY_HEADER_CLEAR;
>   }
>   
> -static void mark_allocated(struct drm_buddy_block *block)
> +static void mark_allocated(struct drm_buddy *mm,
> +			   struct drm_buddy_block *block)
>   {
>   	block->header &= ~DRM_BUDDY_HEADER_STATE;
>   	block->header |= DRM_BUDDY_ALLOCATED;
>   
> -	list_del(&block->link);
> +	rbtree_remove(mm, block);
>   }
>   
>   static void mark_free(struct drm_buddy *mm,
> @@ -84,15 +152,16 @@ static void mark_free(struct drm_buddy *mm,
>   	block->header &= ~DRM_BUDDY_HEADER_STATE;
>   	block->header |= DRM_BUDDY_FREE;
>   
> -	list_insert_sorted(mm, block);
> +	rbtree_insert(mm, block);
>   }
>   
> -static void mark_split(struct drm_buddy_block *block)
> +static void mark_split(struct drm_buddy *mm,
> +		       struct drm_buddy_block *block)
>   {
>   	block->header &= ~DRM_BUDDY_HEADER_STATE;
>   	block->header |= DRM_BUDDY_SPLIT;
>   
> -	list_del(&block->link);
> +	rbtree_remove(mm, block);
>   }
>   
>   static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> @@ -148,7 +217,7 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm,
>   				mark_cleared(parent);
>   		}
>   
> -		list_del(&buddy->link);
> +		rbtree_remove(mm, buddy);
>   		if (force_merge && drm_buddy_block_is_clear(buddy))
>   			mm->clear_avail -= drm_buddy_block_size(mm, buddy);
>   
> @@ -179,9 +248,11 @@ static int __force_merge(struct drm_buddy *mm,
>   		return -EINVAL;
>   
>   	for (i = min_order - 1; i >= 0; i--) {
> -		struct drm_buddy_block *block, *prev;
> +		struct drm_buddy_block *block, *prev_block, *first_block;
>   
> -		list_for_each_entry_safe_reverse(block, prev, &mm->free_list[i], link) {
> +		first_block = rb_entry(rb_first(&mm->free_tree[i]), struct drm_buddy_block, rb);
> +
> +		for_each_rb_free_block_reverse_safe(block, prev_block, &mm->free_tree[i], rb) {
>   			struct drm_buddy_block *buddy;
>   			u64 block_start, block_end;
>   
> @@ -206,10 +277,14 @@ static int __force_merge(struct drm_buddy *mm,
>   			 * block in the next iteration as we would free the
>   			 * buddy block as part of the free function.
>   			 */
> -			if (prev == buddy)
> -				prev = list_prev_entry(prev, link);
> +			if (prev_block && prev_block == buddy) {
> +				if (prev_block != first_block)
> +					prev_block = rb_entry(rb_prev(&prev_block->rb),
> +							      struct drm_buddy_block,
> +							      rb);
> +			}
>   
> -			list_del(&block->link);
> +			rbtree_remove(mm, block);
>   			if (drm_buddy_block_is_clear(block))
>   				mm->clear_avail -= drm_buddy_block_size(mm, block);
>   
> @@ -258,14 +333,14 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>   
>   	BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
>   
> -	mm->free_list = kmalloc_array(mm->max_order + 1,
> -				      sizeof(struct list_head),
> +	mm->free_tree = kmalloc_array(mm->max_order + 1,
> +				      sizeof(struct rb_root),
>   				      GFP_KERNEL);
> -	if (!mm->free_list)
> +	if (!mm->free_tree)
>   		return -ENOMEM;
>   
>   	for (i = 0; i <= mm->max_order; ++i)
> -		INIT_LIST_HEAD(&mm->free_list[i]);
> +		mm->free_tree[i] = RB_ROOT;
>   
>   	mm->n_roots = hweight64(size);
>   
> @@ -273,7 +348,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>   				  sizeof(struct drm_buddy_block *),
>   				  GFP_KERNEL);
>   	if (!mm->roots)
> -		goto out_free_list;
> +		goto out_free_tree;
>   
>   	offset = 0;
>   	i = 0;
> @@ -312,8 +387,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
>   	while (i--)
>   		drm_block_free(mm, mm->roots[i]);
>   	kfree(mm->roots);
> -out_free_list:
> -	kfree(mm->free_list);
> +out_free_tree:
> +	kfree(mm->free_tree);
>   	return -ENOMEM;
>   }
>   EXPORT_SYMBOL(drm_buddy_init);
> @@ -323,7 +398,7 @@ EXPORT_SYMBOL(drm_buddy_init);
>    *
>    * @mm: DRM buddy manager to free
>    *
> - * Cleanup memory manager resources and the freelist
> + * Cleanup memory manager resources and the freetree
>    */
>   void drm_buddy_fini(struct drm_buddy *mm)
>   {
> @@ -350,7 +425,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
>   	WARN_ON(mm->avail != mm->size);
>   
>   	kfree(mm->roots);
> -	kfree(mm->free_list);
> +	kfree(mm->free_tree);
>   }
>   EXPORT_SYMBOL(drm_buddy_fini);
>   
> @@ -383,7 +458,7 @@ static int split_block(struct drm_buddy *mm,
>   		clear_reset(block);
>   	}
>   
> -	mark_split(block);
> +	mark_split(mm, block);
>   
>   	return 0;
>   }
> @@ -433,7 +508,7 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
>   	for (i = 0; i <= mm->max_order; ++i) {
>   		struct drm_buddy_block *block;
>   
> -		list_for_each_entry_reverse(block, &mm->free_list[i], link) {
> +		for_each_rb_free_block_reverse(block, &mm->free_tree[i], rb) {
>   			if (is_clear != drm_buddy_block_is_clear(block)) {
>   				if (is_clear) {
>   					mark_cleared(block);
> @@ -641,7 +716,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
>   	for (i = order; i <= mm->max_order; ++i) {
>   		struct drm_buddy_block *tmp_block;
>   
> -		list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) {
> +		for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[i], rb) {
>   			if (block_incompatible(tmp_block, flags))
>   				continue;
>   
> @@ -667,7 +742,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
>   }
>   
>   static struct drm_buddy_block *
> -alloc_from_freelist(struct drm_buddy *mm,
> +alloc_from_freetree(struct drm_buddy *mm,
>   		    unsigned int order,
>   		    unsigned long flags)
>   {
> @@ -684,7 +759,7 @@ alloc_from_freelist(struct drm_buddy *mm,
>   		for (tmp = order; tmp <= mm->max_order; ++tmp) {
>   			struct drm_buddy_block *tmp_block;
>   
> -			list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) {
> +			for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[tmp], rb) {
>   				if (block_incompatible(tmp_block, flags))
>   					continue;
>   
> @@ -700,10 +775,8 @@ alloc_from_freelist(struct drm_buddy *mm,
>   	if (!block) {
>   		/* Fallback method */
>   		for (tmp = order; tmp <= mm->max_order; ++tmp) {
> -			if (!list_empty(&mm->free_list[tmp])) {
> -				block = list_last_entry(&mm->free_list[tmp],
> -							struct drm_buddy_block,
> -							link);
> +			if (!rbtree_is_empty(mm, tmp)) {
> +				block = rbtree_last_entry(mm, tmp);
>   				if (block)
>   					break;
>   			}
> @@ -771,7 +844,7 @@ static int __alloc_range(struct drm_buddy *mm,
>   
>   		if (contains(start, end, block_start, block_end)) {
>   			if (drm_buddy_block_is_free(block)) {
> -				mark_allocated(block);
> +				mark_allocated(mm, block);
>   				total_allocated += drm_buddy_block_size(mm, block);
>   				mm->avail -= drm_buddy_block_size(mm, block);
>   				if (drm_buddy_block_is_clear(block))
> @@ -849,7 +922,6 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
>   {
>   	u64 rhs_offset, lhs_offset, lhs_size, filled;
>   	struct drm_buddy_block *block;
> -	struct list_head *list;
>   	LIST_HEAD(blocks_lhs);
>   	unsigned long pages;
>   	unsigned int order;
> @@ -862,11 +934,10 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
>   	if (order == 0)
>   		return -ENOSPC;
>   
> -	list = &mm->free_list[order];
> -	if (list_empty(list))
> +	if (rbtree_is_empty(mm, order))
>   		return -ENOSPC;
>   
> -	list_for_each_entry_reverse(block, list, link) {
> +	for_each_rb_free_block_reverse(block, &mm->free_tree[order], rb) {
>   		/* Allocate blocks traversing RHS */
>   		rhs_offset = drm_buddy_block_offset(block);
>   		err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
> @@ -976,7 +1047,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>   	list_add(&block->tmp_link, &dfs);
>   	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
>   	if (err) {
> -		mark_allocated(block);
> +		mark_allocated(mm, block);
>   		mm->avail -= drm_buddy_block_size(mm, block);
>   		if (drm_buddy_block_is_clear(block))
>   			mm->clear_avail -= drm_buddy_block_size(mm, block);
> @@ -999,8 +1070,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   		return  __drm_buddy_alloc_range_bias(mm, start, end,
>   						     order, flags);
>   	else
> -		/* Allocate from freelist */
> -		return alloc_from_freelist(mm, order, flags);
> +		/* Allocate from freetree */
> +		return alloc_from_freetree(mm, order, flags);
>   }
>   
>   /**
> @@ -1017,8 +1088,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
>    * alloc_range_bias() called on range limitations, which traverses
>    * the tree and returns the desired block.
>    *
> - * alloc_from_freelist() called when *no* range restrictions
> - * are enforced, which picks the block from the freelist.
> + * alloc_from_freetree() called when *no* range restrictions
> + * are enforced, which picks the block from the freetree.
>    *
>    * Returns:
>    * 0 on success, error code on failure.
> @@ -1120,7 +1191,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   			}
>   		} while (1);
>   
> -		mark_allocated(block);
> +		mark_allocated(mm, block);
>   		mm->avail -= drm_buddy_block_size(mm, block);
>   		if (drm_buddy_block_is_clear(block))
>   			mm->clear_avail -= drm_buddy_block_size(mm, block);
> @@ -1204,7 +1275,7 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
>   		struct drm_buddy_block *block;
>   		u64 count = 0, free;
>   
> -		list_for_each_entry(block, &mm->free_list[order], link) {
> +		for_each_rb_free_block(block, &mm->free_tree[order], rb) {
>   			BUG_ON(!drm_buddy_block_is_free(block));
>   			count++;
>   		}
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 513837632b7d..091823592034 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -10,6 +10,7 @@
>   #include <linux/list.h>
>   #include <linux/slab.h>
>   #include <linux/sched.h>
> +#include <linux/rbtree.h>
>   
>   #include <drm/drm_print.h>
>   
> @@ -53,7 +54,11 @@ struct drm_buddy_block {
>   	 * a list, if so desired. As soon as the block is freed with
>   	 * drm_buddy_free* ownership is given back to the mm.
>   	 */
> -	struct list_head link;
> +	union {
> +		struct rb_node rb;
> +		struct list_head link;
> +	};
> +
>   	struct list_head tmp_link;
>   };
>   
> @@ -68,7 +73,7 @@ struct drm_buddy_block {
>    */
>   struct drm_buddy {
>   	/* Maintain a free list for each order. */
> -	struct list_head *free_list;
> +	struct rb_root *free_tree;
>   
>   	/*
>   	 * Maintain explicit binary tree(s) to track the allocation of the



More information about the Intel-xe mailing list