[PATCH] drm/amdgpu: handle all fragment sizes v2
Christian König
deathsimple at vodafone.de
Wed Aug 30 13:25:01 UTC 2017
From: Roger He <Hongbo.He at amd.com>
This can improve performance for some cases.
v2 (chk): handle all sizes, simplify the patch quite a bit
Signed-off-by: Roger He <Hongbo.He at amd.com>
Signed-off-by: Christian König <christian.koenig at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 70 +++++++++++++++++++++++-----------
1 file changed, 47 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4cdfb70..04815a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags)
{
- int r;
-
/**
* The MC L1 TLB supports variable sized pages, based on a fragment
* field in the PTE. When this field is set to a non-zero value, page
@@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
* Userspace can support this by aligning virtual base address and
* allocation size to the fragment size.
*/
- unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
- uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
- uint64_t frag_align = 1 << pages_per_frag;
+ unsigned max_frag = params->adev->vm_manager.fragment_size;
+ uint64_t frag_flags, frag_end;
+ unsigned frag;
- uint64_t frag_start = ALIGN(start, frag_align);
- uint64_t frag_end = end & ~(frag_align - 1);
+ int r;
/* system pages are non continuously */
- if (params->src || !(flags & AMDGPU_PTE_VALID) ||
- (frag_start >= frag_end))
+ if (params->src || !(flags & AMDGPU_PTE_VALID))
return amdgpu_vm_update_ptes(params, start, end, dst, flags);
- /* handle the 4K area at the beginning */
- if (start != frag_start) {
- r = amdgpu_vm_update_ptes(params, start, frag_start,
- dst, flags);
+ /* Handle the fragments at the beginning */
+ while (start != end) {
+ /* This intentionally wraps around if no bit is set */
+ frag = min(ffs(start), fls64(end - start)) - 1;
+ if (frag >= max_frag)
+ break;
+
+ frag_flags = AMDGPU_PTE_FRAG(frag);
+ frag_end = start + (1 << frag);
+
+ r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
+ flags | frag_flags);
if (r)
return r;
- dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
+
+ dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+ start = frag_end;
}
/* handle the area in the middle */
- r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
- flags | frag_flags);
- if (r)
- return r;
+ if (start != end) {
+ frag_flags = AMDGPU_PTE_FRAG(max_frag);
+ frag_end = end & ~((1 << max_frag) - 1);
- /* handle the 4K area at the end */
- if (frag_end != end) {
- dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
- r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+ r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
+ flags | frag_flags);
+ if (r)
+ return r;
+
+ dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+ start = frag_end;
}
- return r;
+
+ /* Handle the fragments at the end */
+ while (start != end) {
+ frag = fls64(end - start) - 1;
+ frag_flags = AMDGPU_PTE_FRAG(frag);
+ frag_end = start + (1 << frag);
+
+ r = amdgpu_vm_update_ptes(params, start, frag_end,
+ dst, flags | frag_flags);
+ if (r)
+ return r;
+
+ dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+ start = frag_end;
+ }
+
+ return 0;
}
/**
--
2.7.4
More information about the amd-gfx
mailing list