[PATCH 2/2] drm/amdgpu: handle all fragment sizes v3

Thu Aug 31 13:35:33 UTC 2017

>>> +            frag_end = end & ~((1 << max_frag) - 1);
>> You need a cast to uint64_t to make sure your mask is big enough and
>> doesn't wipe out the high address bits:
>>
>>      frag_end = end & ~(uint64_t)((1 << max_frag) - 1)
>
> Good point. Using "frag_end = end & ~((1ULL << max_frag) - 1);" should
> work as well, shouldn't it?

Yes, that would work too.

Regards,
  Felix
________________________________________
From: Koenig, Christian
Sent: Thursday, August 31, 2017 3:31:36 AM
To: Kuehling, Felix; amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amdgpu: handle all fragment sizes v3

Am 31.08.2017 um 01:26 schrieb Felix Kuehling:
> One comment inline. With that fixed, this patch is Reviewed-by: Felix
> Kuehling <Felix.Kuehling at amd.com>
>
>
> On 2017-08-30 09:48 AM, Christian König wrote:
>> From: Roger He <Hongbo.He at amd.com>
>>
>> This can improve performance for some cases.
>>
>> v2 (chk): handle all sizes, simplify the patch quite a bit
>> v3 (chk): adjust dw estimation as well
>>
>> Signed-off-by: Roger He <Hongbo.He at amd.com>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------
>>   1 file changed, 49 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index b08f031..1575657 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
>>                              uint64_t start, uint64_t end,
>>                              uint64_t dst, uint64_t flags)
>>   {
>> -    int r;
>> -
>>      /**
>>       * The MC L1 TLB supports variable sized pages, based on a fragment
>>       * field in the PTE. When this field is set to a non-zero value, page
>> @@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params       *params,
>>       * Userspace can support this by aligning virtual base address and
>>       * allocation size to the fragment size.
>>       */
>> -    unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
>> -    uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
>> -    uint64_t frag_align = 1 << pages_per_frag;
>> +    unsigned max_frag = params->adev->vm_manager.fragment_size;
>> +    uint64_t frag_flags, frag_end;
>> +    unsigned frag;
>>
>> -    uint64_t frag_start = ALIGN(start, frag_align);
>> -    uint64_t frag_end = end & ~(frag_align - 1);
>> +    int r;
>>
>>      /* system pages are non continuously */
>> -    if (params->src || !(flags & AMDGPU_PTE_VALID) ||
>> -        (frag_start >= frag_end))
>> +    if (params->src || !(flags & AMDGPU_PTE_VALID))
>>              return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>>
>> -    /* handle the 4K area at the beginning */
>> -    if (start != frag_start) {
>> -            r = amdgpu_vm_update_ptes(params, start, frag_start,
>> -                                      dst, flags);
>> +    /* Handle the fragments at the beginning */
>> +    while (start != end) {
>> +            /* This intentionally wraps around if no bit is set */
>> +            frag = min(ffs(start), fls64(end - start)) - 1;
>> +            if (frag >= max_frag)
>> +                    break;
>> +
>> +            frag_flags = AMDGPU_PTE_FRAG(frag);
>> +            frag_end = start + (1 << frag);
>> +
>> +            r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
>> +                                      flags | frag_flags);
>>              if (r)
>>                      return r;
>> -            dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
>> +
>> +            dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> +            start = frag_end;
>>      }
>>
>>      /* handle the area in the middle */
>> -    r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
>> -                              flags | frag_flags);
>> -    if (r)
>> -            return r;
>> +    if (start != end) {
>> +            frag_flags = AMDGPU_PTE_FRAG(max_frag);
>> +            frag_end = end & ~((1 << max_frag) - 1);
> You need a cast to uint64_t to make sure your mask is big enough and
> doesn't wipe out the high address bits:
>
>      frag_end = end & ~(uint64_t)((1 << max_frag) - 1)

Good point. Using "frag_end = end & ~((1ULL << max_frag) - 1);" should
work as well, shouldn't it?

Regards,
Christian.

>
>>
>> -    /* handle the 4K area at the end */
>> -    if (frag_end != end) {
>> -            dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
>> -            r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
>> +            r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
>> +                                      flags | frag_flags);
>> +            if (r)
>> +                    return r;
>> +
>> +            dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> +            start = frag_end;
>>      }
>> -    return r;
>> +
>> +    /* Handle the fragments at the end */
>> +    while (start != end) {
>> +            frag = fls64(end - start) - 1;
>> +            frag_flags = AMDGPU_PTE_FRAG(frag);
>> +            frag_end = start + (1 << frag);
>> +
>> +            r = amdgpu_vm_update_ptes(params, start, frag_end,
>> +                                      dst, flags | frag_flags);
>> +            if (r)
>> +                    return r;
>> +
>> +            dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> +            start = frag_end;
>> +    }
>> +
>> +    return 0;
>>   }
>>
>>   /**
>> @@ -1557,8 +1581,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>              /* set page commands needed */
>>              ndw += ncmds * 10;
>>
>> -            /* two extra commands for begin/end of fragment */
>> -            ndw += 2 * 10;
>> +            /* extra commands for begin/end fragments */
>> +            ndw += 2 * 10 * adev->vm_manager.fragment_size;
>>
>>              params.func = amdgpu_vm_do_set_ptes;
>>      }