[PATCH] drm/amd/amdgpu: cover fragment size between 4 and 9 when not aligned

Wed Aug 30 06:24:38 UTC 2017

Hi David & Roger,

> I think when you can select fragment automatically, you shouldn't 
> involve the vm_manager.fragment_size to calculation, then we can use 
> the properest  fragment for every segment and get the best performance.
No, that won't work. I've already tried this and it decreases 
performance. The problem is that the L2 on pre Vega10 uses the fragment 
field to decide in which cache segment to put the PTE.

So we should only cover fragment sizes up to whatever the 
vm_manager.fragment_size configuration is currently.

Additional to that the patch needs to be simplified, cause another 5 
level recursion is not something we want because of the limited kernel 
stack (only 4K usually).

If you don't mind Roger I'm going to fix this today, already have a good 
idea how to handle it I think.

Regards,
Christian.

Am 30.08.2017 um 08:15 schrieb zhoucm1:
> Hi Roger,
>
> I think when you can select fragment automatically, you shouldn't 
> involve the vm_manager.fragment_size to calculation, then we can use 
> the properest  fragment for every segment and get the best performance.
>
> So vm_manager.fragment_size should be always be -1, if it becomes 
> valid fragment value, then we should always use it and disable 
> automatic selection, which should only for validation usage.
>
>
> Regards,
>
> David Zhou
>
>
> On 2017年08月30日 13:01, Roger He wrote:
>> this can get performance improvement for some cases
>>
>> Change-Id: Ibb58bb3099f7e8c4b5da90da73a03544cdb2bcb7
>> Signed-off-by: Roger He <Hongbo.He at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 98 
>> +++++++++++++++++++++++++++-------
>>   1 file changed, 79 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 592c3e7..4e5da5e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1375,7 +1375,7 @@ static int amdgpu_vm_update_ptes(struct 
>> amdgpu_pte_update_params *params,
>>   }
>>     /*
>> - * amdgpu_vm_frag_ptes - add fragment information to PTEs
>> + * amdgpu_vm_update_ptes_helper - add fragment information to PTEs
>>    *
>>    * @params: see amdgpu_pte_update_params definition
>>    * @vm: requested vm
>> @@ -1383,11 +1383,12 @@ static int amdgpu_vm_update_ptes(struct 
>> amdgpu_pte_update_params *params,
>>    * @end: last PTE to handle
>>    * @dst: addr those PTEs should point to
>>    * @flags: hw mapping flags
>> + * @fragment: fragment size
>>    * Returns 0 for success, -EINVAL for failure.
>>    */
>> -static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params    
>> *params,
>> -                uint64_t start, uint64_t end,
>> -                uint64_t dst, uint64_t flags)
>> +static int amdgpu_vm_update_ptes_helper(struct 
>> amdgpu_pte_update_params *params,
>> +                  uint64_t start, uint64_t end, uint64_t dst,
>> +                  uint64_t flags, int fragment)
>>   {
>>       int r;
>>   @@ -1409,41 +1410,100 @@ static int amdgpu_vm_frag_ptes(struct 
>> amdgpu_pte_update_params    *params,
>>        * Userspace can support this by aligning virtual base address and
>>        * allocation size to the fragment size.
>>        */
>> -    unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
>> -    uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
>> -    uint64_t frag_align = 1 << pages_per_frag;
>> +    uint64_t frag_flags, frag_align, frag_start, frag_end;
>>   -    uint64_t frag_start = ALIGN(start, frag_align);
>> -    uint64_t frag_end = end & ~(frag_align - 1);
>> +    if (start > end || fragment < 0)
>> +        return -EINVAL;
>>   -    /* system pages are non continuously */
>> -    if (params->src || !(flags & AMDGPU_PTE_VALID) ||
>> -        (frag_start >= frag_end))
>> -        return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>> +    fragment = min(fragment, max(0, fls64(end - start) - 1));
>> +    frag_flags = AMDGPU_PTE_FRAG(fragment);
>> +    frag_align = 1 << fragment;
>> +    frag_start = ALIGN(start, frag_align);
>> +    frag_end = end & ~(frag_align - 1);
>> +
>> +    if (frag_start >= frag_end) {
>> +        if (fragment <= 4)
>> +            return amdgpu_vm_update_ptes(params, start, end,
>> +                            dst, flags);
>> +        else
>> +            return amdgpu_vm_update_ptes_helper(params, start,
>> +                    end, dst, flags, fragment - 1);
>> +    }
>> +
>> +    if (fragment <= 4) {
>> +        /* handle the 4K area at the beginning */
>> +        if (start != frag_start) {
>> +            r = amdgpu_vm_update_ptes(params, start, frag_start,
>> +                          dst, flags);
>> +            if (r)
>> +                return r;
>> +            dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
>> +        }
>> +
>> +        /* handle the area in the middle */
>> +        r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
>> +                      flags | frag_flags);
>> +        if (r)
>> +            return r;
>> +
>> +        /* handle the 4K area at the end */
>> +        if (frag_end != end) {
>> +            dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
>> +            r = amdgpu_vm_update_ptes(params, frag_end, end,
>> +                            dst, flags);
>> +        }
>> +        return r;
>> +    }
>>   -    /* handle the 4K area at the beginning */
>> +    /* handle the area at the beginning not aligned */
>>       if (start != frag_start) {
>> -        r = amdgpu_vm_update_ptes(params, start, frag_start,
>> -                      dst, flags);
>> +        r = amdgpu_vm_update_ptes_helper(params, start, frag_start,
>> +                        dst, flags, fragment - 1);
>>           if (r)
>>               return r;
>>           dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
>>       }
>>   -    /* handle the area in the middle */
>> +    /* handle the area in the middle aligned*/
>>       r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
>>                     flags | frag_flags);
>>       if (r)
>>           return r;
>>   -    /* handle the 4K area at the end */
>> +    /* handle the area at the end not aligned */
>>       if (frag_end != end) {
>>           dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
>> -        r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
>> +        r = amdgpu_vm_update_ptes_helper(params, frag_end, end,
>> +                        dst, flags, fragment - 1);
>>       }
>>       return r;
>>   }
>>   +/*
>> + * amdgpu_vm_frag_ptes - add fragment information to PTEs
>> + *
>> + * @params: see amdgpu_pte_update_params definition
>> + * @vm: requested vm
>> + * @start: first PTE to handle
>> + * @end: last PTE to handle
>> + * @dst: addr those PTEs should point to
>> + * @flags: hw mapping flags
>> + * Returns 0 for success, -EINVAL for failure.
>> + */
>> +static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params    
>> *params,
>> +                uint64_t start, uint64_t end,
>> +                uint64_t dst, uint64_t flags)
>> +{
>> +    int fragment = params->adev->vm_manager.fragment_size;
>> +    /* system pages are non continuously */
>> +    if (params->src || !(flags & AMDGPU_PTE_VALID))
>> +        return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>> +
>> +    /* vram */
>> +    return amdgpu_vm_update_ptes_helper(params, start, end, dst, flags,
>> +                        fragment);
>> +}
>> +
>>   /**
>>    * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
>>    *
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx