[PATCH 2/2] drm/amdgpu: handle all fragment sizes v3
Christian König
christian.koenig at amd.com
Thu Aug 31 07:31:36 UTC 2017
Am 31.08.2017 um 01:26 schrieb Felix Kuehling:
> One comment inline. With that fixed, this patch is Reviewed-by: Felix
> Kuehling <Felix.Kuehling at amd.com>
>
>
> On 2017-08-30 09:48 AM, Christian König wrote:
>> From: Roger He <Hongbo.He at amd.com>
>>
>> This can improve performance for some cases.
>>
>> v2 (chk): handle all sizes, simplify the patch quite a bit
>> v3 (chk): adjust dw estimation as well
>>
>> Signed-off-by: Roger He <Hongbo.He at amd.com>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------
>> 1 file changed, 49 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index b08f031..1575657 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
>> uint64_t start, uint64_t end,
>> uint64_t dst, uint64_t flags)
>> {
>> - int r;
>> -
>> /**
>> * The MC L1 TLB supports variable sized pages, based on a fragment
>> * field in the PTE. When this field is set to a non-zero value, page
>> @@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
>> * Userspace can support this by aligning virtual base address and
>> * allocation size to the fragment size.
>> */
>> - unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
>> - uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
>> - uint64_t frag_align = 1 << pages_per_frag;
>> + unsigned max_frag = params->adev->vm_manager.fragment_size;
>> + uint64_t frag_flags, frag_end;
>> + unsigned frag;
>>
>> - uint64_t frag_start = ALIGN(start, frag_align);
>> - uint64_t frag_end = end & ~(frag_align - 1);
>> + int r;
>>
>> /* system pages are non continuously */
>> - if (params->src || !(flags & AMDGPU_PTE_VALID) ||
>> - (frag_start >= frag_end))
>> + if (params->src || !(flags & AMDGPU_PTE_VALID))
>> return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>>
>> - /* handle the 4K area at the beginning */
>> - if (start != frag_start) {
>> - r = amdgpu_vm_update_ptes(params, start, frag_start,
>> - dst, flags);
>> + /* Handle the fragments at the beginning */
>> + while (start != end) {
>> + /* This intentionally wraps around if no bit is set */
>> + frag = min(ffs(start), fls64(end - start)) - 1;
>> + if (frag >= max_frag)
>> + break;
>> +
>> + frag_flags = AMDGPU_PTE_FRAG(frag);
>> + frag_end = start + (1 << frag);
>> +
>> + r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
>> + flags | frag_flags);
>> if (r)
>> return r;
>> - dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
>> +
>> + dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> + start = frag_end;
>> }
>>
>> /* handle the area in the middle */
>> - r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
>> - flags | frag_flags);
>> - if (r)
>> - return r;
>> + if (start != end) {
>> + frag_flags = AMDGPU_PTE_FRAG(max_frag);
>> + frag_end = end & ~((1 << max_frag) - 1);
> You need a cast to uint64_t to make sure your mask is big enough and
> doesn't wipe out the high address bits:
>
> frag_end = end & ~(uint64_t)((1 << max_frag) - 1)
Good point. Using "frag_end = end & ~((1ULL << max_frag) - 1);" should
work as well, shouldn't it?
Regards,
Christian.
>
>>
>> - /* handle the 4K area at the end */
>> - if (frag_end != end) {
>> - dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
>> - r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
>> + r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
>> + flags | frag_flags);
>> + if (r)
>> + return r;
>> +
>> + dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> + start = frag_end;
>> }
>> - return r;
>> +
>> + /* Handle the fragments at the end */
>> + while (start != end) {
>> + frag = fls64(end - start) - 1;
>> + frag_flags = AMDGPU_PTE_FRAG(frag);
>> + frag_end = start + (1 << frag);
>> +
>> + r = amdgpu_vm_update_ptes(params, start, frag_end,
>> + dst, flags | frag_flags);
>> + if (r)
>> + return r;
>> +
>> + dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>> + start = frag_end;
>> + }
>> +
>> + return 0;
>> }
>>
>> /**
>> @@ -1557,8 +1581,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>> /* set page commands needed */
>> ndw += ncmds * 10;
>>
>> - /* two extra commands for begin/end of fragment */
>> - ndw += 2 * 10;
>> + /* extra commands for begin/end fragments */
>> + ndw += 2 * 10 * adev->vm_manager.fragment_size;
>>
>> params.func = amdgpu_vm_do_set_ptes;
>> }
More information about the amd-gfx
mailing list