[PATCH] drm/amdgpu: further lower VRAM allocation overhead

Wed Jul 14 08:33:42 UTC 2021

Hi Eric,

feel free to push into amd-staging-dkms-5.11, but please don't push it 
into amd-staging-drm-next.

The later will just cause a merge failure which Alex needs to resolve 
manually.

I can take care of pushing to amd-staging-drm-next as soon as that is 
rebased on latest upstream.

Regards,
Christian.

Am 13.07.21 um 21:19 schrieb Eric Huang:
> Hi Christian/Felix,
>
> If you don't have objection, it will be pushed into 
> amd-staging-dkms-5.11 and amd-staging-drm-next.
>
> Thanks,
> Eric
>
> On 2021-07-13 3:17 p.m., Eric Huang wrote:
>> For allocations larger than 48MiB we need more than a page for the
>> housekeeping in the worst case resulting in the usual vmalloc overhead.
>>
>> Try to avoid this by assuming the good case and only falling back to the
>> worst case if this didn't worked.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>
>> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 71 +++++++++++++++-----
>>   1 file changed, 53 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> index be4261c4512e..ecbe05e1db66 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> @@ -361,9 +361,11 @@ static void amdgpu_vram_mgr_virt_start(struct 
>> ttm_resource *mem,
>>   static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>                      struct ttm_buffer_object *tbo,
>>                      const struct ttm_place *place,
>> +                   unsigned long num_nodes,
>> +                   unsigned long pages_per_node,
>>                      struct ttm_resource *mem)
>>   {
>> -    unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>> +    unsigned long lpfn, pages_left, pages;
>>       struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>       struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>> @@ -393,21 +395,6 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_resource_manager *man,
>>           return -ENOSPC;
>>       }
>>   -    if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> -        pages_per_node = ~0ul;
>> -        num_nodes = 1;
>> -    } else {
>> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> -        pages_per_node = HPAGE_PMD_NR;
>> -#else
>> -        /* default to 2MB */
>> -        pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>> -#endif
>> -        pages_per_node = max_t(uint32_t, pages_per_node,
>> -                       mem->page_alignment);
>> -        num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
>> -    }
>> -
>>       nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
>>                      GFP_KERNEL | __GFP_ZERO);
>>       if (!nodes) {
>> @@ -435,7 +422,12 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_resource_manager *man,
>>       i = 0;
>>       spin_lock(&mgr->lock);
>>       while (pages_left) {
>> -        uint32_t alignment = mem->page_alignment;
>> +        unsigned long alignment = mem->page_alignment;
>> +
>> +        if (i >= num_nodes) {
>> +            r = -E2BIG;
>> +            goto error;
>> +        }
>>             if (pages >= pages_per_node)
>>               alignment = pages_per_node;
>> @@ -492,6 +484,49 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_resource_manager *man,
>>       return r;
>>   }
>>   +/**
>> + * amdgpu_vram_mgr_alloc - allocate new range
>> + *
>> + * @man: TTM memory type manager
>> + * @tbo: TTM BO we need this range for
>> + * @place: placement flags and restrictions
>> + * @mem: the resulting mem object
>> + *
>> + * Allocate VRAM for the given BO.
>> + */
>> +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
>> +                 struct ttm_buffer_object *tbo,
>> +                 const struct ttm_place *place,
>> +                 struct ttm_resource *mem)
>> +{
>> +    unsigned long num_nodes, pages_per_node;
>> +    int r;
>> +
>> +    if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>> +        return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, mem);
>> +
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +    pages_per_node = HPAGE_PMD_NR;
>> +#else
>> +    /* default to 2MB */
>> +    pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>> +#endif
>> +    pages_per_node = max_t(uint32_t, pages_per_node,
>> +                   mem->page_alignment);
>> +    num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
>> +
>> +    if (sizeof(struct drm_mm_node) * num_nodes > PAGE_SIZE) {
>> +        r = amdgpu_vram_mgr_new(man, tbo, place,
>> +                PAGE_SIZE / sizeof(struct drm_mm_node),
>> +                pages_per_node,    mem);
>> +        if (r != -E2BIG)
>> +            return r;
>> +    }
>> +
>> +    return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, 
>> pages_per_node,
>> +                   mem);
>> +}
>> +
>>   /**
>>    * amdgpu_vram_mgr_del - free ranges
>>    *
>> @@ -693,7 +728,7 @@ static void amdgpu_vram_mgr_debug(struct 
>> ttm_resource_manager *man,
>>   }
>>     static const struct ttm_resource_manager_func 
>> amdgpu_vram_mgr_func = {
>> -    .alloc    = amdgpu_vram_mgr_new,
>> +    .alloc    = amdgpu_vram_mgr_alloc,
>>       .free    = amdgpu_vram_mgr_del,
>>       .debug    = amdgpu_vram_mgr_debug
>>   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx