[Intel-gfx] [PATCH] drm: Alloc high address for drm buddy topdown flag
Arunpravin Paneer Selvam
arunpravin.paneerselvam at amd.com
Tue Jan 10 15:26:47 UTC 2023
Hi Matthew,
On 1/10/2023 5:32 PM, Matthew Auld wrote:
> On 07/01/2023 15:15, Arunpravin Paneer Selvam wrote:
>> As we are observing low numbers in viewperf graphics benchmark, we
>> are strictly not allowing the top down flag enabled allocations
>> to steal the memory space from cpu visible region.
>>
>> The approach is, we are sorting each order list entries in
>> ascending order and compare the last entry of each order
>> list in the freelist and return the max block.
>
> Did you also run the selftests? Does everything still pass and
> complete in a reasonable amount of time?
> I will try giving a run
>>
>> This patch improves the viewperf 3D benchmark scores.
>>
>> Signed-off-by: Arunpravin Paneer Selvam
>> <Arunpravin.PaneerSelvam at amd.com>
>> ---
>> drivers/gpu/drm/drm_buddy.c | 81 ++++++++++++++++++++++++-------------
>> 1 file changed, 54 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index 11bb59399471..50916b2f2fc5 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -38,6 +38,25 @@ static void drm_block_free(struct drm_buddy *mm,
>> kmem_cache_free(slab_blocks, block);
>> }
>> +static void list_insert_sorted(struct drm_buddy *mm,
>> + struct drm_buddy_block *block)
>> +{
>> + struct drm_buddy_block *node;
>> + struct list_head *head;
>> +
>> + head = &mm->free_list[drm_buddy_block_order(block)];
>> + if (list_empty(head)) {
>> + list_add(&block->link, head);
>> + return;
>> + }
>> +
>> + list_for_each_entry(node, head, link)
>> + if (drm_buddy_block_offset(block) <
>> drm_buddy_block_offset(node))
>> + break;
>> +
>> + __list_add(&block->link, node->link.prev, &node->link);
>> +}
>> +
>> static void mark_allocated(struct drm_buddy_block *block)
>> {
>> block->header &= ~DRM_BUDDY_HEADER_STATE;
>> @@ -52,8 +71,7 @@ static void mark_free(struct drm_buddy *mm,
>> block->header &= ~DRM_BUDDY_HEADER_STATE;
>> block->header |= DRM_BUDDY_FREE;
>> - list_add(&block->link,
>> - &mm->free_list[drm_buddy_block_order(block)]);
>> + list_insert_sorted(mm, block);
>
> One advantage of not sorting is when splitting down a large block.
> Previously the most-recently-split would be at the start of the list
> for the next order down, where potentially the next allocation could
> use it. So perhaps less fragmentation if it's all part of one BO.
> Otherwise I don't see any other downsides, other than the extra
> overhead of sorting.
>
Allocating from freelist is traversing through right side (i.e top most
address range) and for TOPDOWN flag allocations we just split the top
most large block once and the subsequent TOPDOWN low order allocations
would get block from same already split large block
For the normal allocations, I modified to retrieve the blocks in each
order list from the last entry which has the high probability of getting
top most blocks as we have sorted the blocks in each order list.
Thus the bottom most large blocks are not frequently used, hence we have
more space for the visible region on dGPU.
For APU which has small sized VRAM space, the allocations are now
ordered and we don't allocate randomly from freelist solving
fragmentation issues.
>> }
>> static void mark_split(struct drm_buddy_block *block)
>> @@ -387,20 +405,26 @@ alloc_range_bias(struct drm_buddy *mm,
>> }
>> static struct drm_buddy_block *
>> -get_maxblock(struct list_head *head)
>> +get_maxblock(struct drm_buddy *mm, unsigned int order)
>> {
>> struct drm_buddy_block *max_block = NULL, *node;
>> + unsigned int i;
>> - max_block = list_first_entry_or_null(head,
>> - struct drm_buddy_block,
>> - link);
>> - if (!max_block)
>> - return NULL;
>> + for (i = order; i <= mm->max_order; ++i) {
>> + if (!list_empty(&mm->free_list[i])) {
>> + node = list_last_entry(&mm->free_list[i],
>> + struct drm_buddy_block,
>> + link);
>> + if (!max_block) {
>> + max_block = node;
>> + continue;
>> + }
>> - list_for_each_entry(node, head, link) {
>> - if (drm_buddy_block_offset(node) >
>> - drm_buddy_block_offset(max_block))
>> - max_block = node;
>> + if (drm_buddy_block_offset(node) >
>> + drm_buddy_block_offset(max_block)) {
>
> Formatting doesn't look right here.
I will check.
>
> Going to test this today with some workloads with small-bar and i915
> just to see if this improves/impacts anything for us.
>
>> + max_block = node;
>> + }
>> + }
>> }
>> return max_block;
>> @@ -412,20 +436,23 @@ alloc_from_freelist(struct drm_buddy *mm,
>> unsigned long flags)
>> {
>> struct drm_buddy_block *block = NULL;
>> - unsigned int i;
>> + unsigned int tmp;
>> int err;
>> - for (i = order; i <= mm->max_order; ++i) {
>> - if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
>> - block = get_maxblock(&mm->free_list[i]);
>> - if (block)
>> - break;
>> - } else {
>> - block = list_first_entry_or_null(&mm->free_list[i],
>> - struct drm_buddy_block,
>> - link);
>> - if (block)
>> - break;
>> + if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
>> + block = get_maxblock(mm, order);
>> + if (block)
>> + /* Store the obtained block order */
>> + tmp = drm_buddy_block_order(block);
>> + } else {
>> + for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> + if (!list_empty(&mm->free_list[tmp])) {
>> + block = list_last_entry(&mm->free_list[tmp],
>> + struct drm_buddy_block,
>> + link);
>> + if (block)
>> + break;
>> + }
>> }
>> }
>> @@ -434,18 +461,18 @@ alloc_from_freelist(struct drm_buddy *mm,
>> BUG_ON(!drm_buddy_block_is_free(block));
>> - while (i != order) {
>> + while (tmp != order) {
>> err = split_block(mm, block);
>> if (unlikely(err))
>> goto err_undo;
>> block = block->right;
>> - i--;
>> + tmp--;
>> }
>> return block;
>> err_undo:
>> - if (i != order)
>> + if (tmp != order)
>> __drm_buddy_free(mm, block);
>> return ERR_PTR(err);
>> }
More information about the Intel-gfx
mailing list