[PATCH 2/2] drm/amdgpu: handle all fragment sizes v3
Felix Kuehling
felix.kuehling at amd.com
Wed Aug 30 23:26:21 UTC 2017
One comment inline. With that fixed, this patch is Reviewed-by: Felix
Kuehling <Felix.Kuehling at amd.com>
On 2017-08-30 09:48 AM, Christian König wrote:
> From: Roger He <Hongbo.He at amd.com>
>
> This can improve performance for some cases.
>
> v2 (chk): handle all sizes, simplify the patch quite a bit
> v3 (chk): adjust dw estimation as well
>
> Signed-off-by: Roger He <Hongbo.He at amd.com>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------
> 1 file changed, 49 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b08f031..1575657 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
> uint64_t start, uint64_t end,
> uint64_t dst, uint64_t flags)
> {
> - int r;
> -
> /**
> * The MC L1 TLB supports variable sized pages, based on a fragment
> * field in the PTE. When this field is set to a non-zero value, page
> @@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
> * Userspace can support this by aligning virtual base address and
> * allocation size to the fragment size.
> */
> - unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
> - uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
> - uint64_t frag_align = 1 << pages_per_frag;
> + unsigned max_frag = params->adev->vm_manager.fragment_size;
> + uint64_t frag_flags, frag_end;
> + unsigned frag;
>
> - uint64_t frag_start = ALIGN(start, frag_align);
> - uint64_t frag_end = end & ~(frag_align - 1);
> + int r;
>
> /* system pages are non continuously */
> - if (params->src || !(flags & AMDGPU_PTE_VALID) ||
> - (frag_start >= frag_end))
> + if (params->src || !(flags & AMDGPU_PTE_VALID))
> return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>
> - /* handle the 4K area at the beginning */
> - if (start != frag_start) {
> - r = amdgpu_vm_update_ptes(params, start, frag_start,
> - dst, flags);
> + /* Handle the fragments at the beginning */
> + while (start != end) {
> + /* This intentionally wraps around if no bit is set */
> + frag = min(ffs(start), fls64(end - start)) - 1;
> + if (frag >= max_frag)
> + break;
> +
> + frag_flags = AMDGPU_PTE_FRAG(frag);
> + frag_end = start + (1 << frag);
> +
> + r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
> + flags | frag_flags);
> if (r)
> return r;
> - dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
> +
> + dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> + start = frag_end;
> }
>
> /* handle the area in the middle */
> - r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
> - flags | frag_flags);
> - if (r)
> - return r;
> + if (start != end) {
> + frag_flags = AMDGPU_PTE_FRAG(max_frag);
> + frag_end = end & ~((1 << max_frag) - 1);
You need a cast to uint64_t to make sure your mask is big enough and
doesn't wipe out the high address bits:
frag_end = end & ~(uint64_t)((1 << max_frag) - 1)
>
> - /* handle the 4K area at the end */
> - if (frag_end != end) {
> - dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
> - r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
> + r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
> + flags | frag_flags);
> + if (r)
> + return r;
> +
> + dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> + start = frag_end;
> }
> - return r;
> +
> + /* Handle the fragments at the end */
> + while (start != end) {
> + frag = fls64(end - start) - 1;
> + frag_flags = AMDGPU_PTE_FRAG(frag);
> + frag_end = start + (1 << frag);
> +
> + r = amdgpu_vm_update_ptes(params, start, frag_end,
> + dst, flags | frag_flags);
> + if (r)
> + return r;
> +
> + dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
> + start = frag_end;
> + }
> +
> + return 0;
> }
>
> /**
> @@ -1557,8 +1581,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> /* set page commands needed */
> ndw += ncmds * 10;
>
> - /* two extra commands for begin/end of fragment */
> - ndw += 2 * 10;
> + /* extra commands for begin/end fragments */
> + ndw += 2 * 10 * adev->vm_manager.fragment_size;
>
> params.func = amdgpu_vm_do_set_ptes;
> }
More information about the amd-gfx
mailing list