[PATCH] drm/amd/amdgpu: cover fragment size between 4 and 9 when not aligned
zhoucm1
david1.zhou at amd.com
Wed Aug 30 06:15:46 UTC 2017
Hi Roger,
I think when you can select fragment automatically, you shouldn't
involve the vm_manager.fragment_size to calculation, then we can use the
properest fragment for every segment and get the best performance.
So vm_manager.fragment_size should be always be -1, if it becomes valid
fragment value, then we should always use it and disable automatic
selection, which should only for validation usage.
Regards,
David Zhou
On 2017年08月30日 13:01, Roger He wrote:
> this can get performance improvement for some cases
>
> Change-Id: Ibb58bb3099f7e8c4b5da90da73a03544cdb2bcb7
> Signed-off-by: Roger He <Hongbo.He at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 98 +++++++++++++++++++++++++++-------
> 1 file changed, 79 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 592c3e7..4e5da5e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1375,7 +1375,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> }
>
> /*
> - * amdgpu_vm_frag_ptes - add fragment information to PTEs
> + * amdgpu_vm_update_ptes_helper - add fragment information to PTEs
> *
> * @params: see amdgpu_pte_update_params definition
> * @vm: requested vm
> @@ -1383,11 +1383,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> * @end: last PTE to handle
> * @dst: addr those PTEs should point to
> * @flags: hw mapping flags
> + * @fragment: fragment size
> * Returns 0 for success, -EINVAL for failure.
> */
> -static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
> - uint64_t start, uint64_t end,
> - uint64_t dst, uint64_t flags)
> +static int amdgpu_vm_update_ptes_helper(struct amdgpu_pte_update_params *params,
> + uint64_t start, uint64_t end, uint64_t dst,
> + uint64_t flags, int fragment)
> {
> int r;
>
> @@ -1409,41 +1410,100 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
> * Userspace can support this by aligning virtual base address and
> * allocation size to the fragment size.
> */
> - unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
> - uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
> - uint64_t frag_align = 1 << pages_per_frag;
> + uint64_t frag_flags, frag_align, frag_start, frag_end;
>
> - uint64_t frag_start = ALIGN(start, frag_align);
> - uint64_t frag_end = end & ~(frag_align - 1);
> + if (start > end || fragment < 0)
> + return -EINVAL;
>
> - /* system pages are non continuously */
> - if (params->src || !(flags & AMDGPU_PTE_VALID) ||
> - (frag_start >= frag_end))
> - return amdgpu_vm_update_ptes(params, start, end, dst, flags);
> + fragment = min(fragment, max(0, fls64(end - start) - 1));
> + frag_flags = AMDGPU_PTE_FRAG(fragment);
> + frag_align = 1 << fragment;
> + frag_start = ALIGN(start, frag_align);
> + frag_end = end & ~(frag_align - 1);
> +
> + if (frag_start >= frag_end) {
> + if (fragment <= 4)
> + return amdgpu_vm_update_ptes(params, start, end,
> + dst, flags);
> + else
> + return amdgpu_vm_update_ptes_helper(params, start,
> + end, dst, flags, fragment - 1);
> + }
> +
> + if (fragment <= 4) {
> + /* handle the 4K area at the beginning */
> + if (start != frag_start) {
> + r = amdgpu_vm_update_ptes(params, start, frag_start,
> + dst, flags);
> + if (r)
> + return r;
> + dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
> + }
> +
> + /* handle the area in the middle */
> + r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
> + flags | frag_flags);
> + if (r)
> + return r;
> +
> + /* handle the 4K area at the end */
> + if (frag_end != end) {
> + dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
> + r = amdgpu_vm_update_ptes(params, frag_end, end,
> + dst, flags);
> + }
> + return r;
> + }
>
> - /* handle the 4K area at the beginning */
> + /* handle the area at the beginning not aligned */
> if (start != frag_start) {
> - r = amdgpu_vm_update_ptes(params, start, frag_start,
> - dst, flags);
> + r = amdgpu_vm_update_ptes_helper(params, start, frag_start,
> + dst, flags, fragment - 1);
> if (r)
> return r;
> dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
> }
>
> - /* handle the area in the middle */
> + /* handle the area in the middle aligned*/
> r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
> flags | frag_flags);
> if (r)
> return r;
>
> - /* handle the 4K area at the end */
> + /* handle the area at the end not aligned */
> if (frag_end != end) {
> dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
> - r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
> + r = amdgpu_vm_update_ptes_helper(params, frag_end, end,
> + dst, flags, fragment - 1);
> }
> return r;
> }
>
> +/*
> + * amdgpu_vm_frag_ptes - add fragment information to PTEs
> + *
> + * @params: see amdgpu_pte_update_params definition
> + * @vm: requested vm
> + * @start: first PTE to handle
> + * @end: last PTE to handle
> + * @dst: addr those PTEs should point to
> + * @flags: hw mapping flags
> + * Returns 0 for success, -EINVAL for failure.
> + */
> +static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
> + uint64_t start, uint64_t end,
> + uint64_t dst, uint64_t flags)
> +{
> + int fragment = params->adev->vm_manager.fragment_size;
> + /* system pages are non continuously */
> + if (params->src || !(flags & AMDGPU_PTE_VALID))
> + return amdgpu_vm_update_ptes(params, start, end, dst, flags);
> +
> + /* vram */
> + return amdgpu_vm_update_ptes_helper(params, start, end, dst, flags,
> + fragment);
> +}
> +
> /**
> * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
> *
More information about the amd-gfx
mailing list