[PATCH 7/7] drm/amdgpu: enable huge page handling in the VM v2
Felix Kuehling
felix.kuehling at amd.com
Tue May 23 19:29:45 UTC 2017
Do you need something in amdgpu_vm_update_level to stop it from
overwriting huge page PTEs?
Regards,
Felix
On 17-05-23 12:52 PM, Christian König wrote:
> From: Christian König <christian.koenig at amd.com>
>
> The hardware can use huge pages to map 2MB of address space with only one PDE.
>
> v2: few cleanups and rebased
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 95 +++++++++++++++++++++++++---------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++
> 2 files changed, 75 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f7afdfa..f07c9b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -325,6 +325,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>
> entry->bo = pt;
> entry->addr = 0;
> + entry->huge_page = false;
> }
>
> if (level < adev->vm_manager.num_level) {
> @@ -1013,7 +1014,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>
> pt = amdgpu_bo_gpu_offset(bo);
> pt = amdgpu_gart_get_vm_pde(adev, pt);
> - if (parent->entries[pt_idx].addr == pt)
> + if (parent->entries[pt_idx].addr == pt ||
> + parent->entries[pt_idx].huge_page)
> continue;
>
> parent->entries[pt_idx].addr = pt;
> @@ -1145,29 +1147,69 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
> }
>
> /**
> - * amdgpu_vm_find_pt - find the page table for an address
> + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
> *
> * @p: see amdgpu_pte_update_params definition
> * @addr: virtual address in question
> + * @nptes: number of PTEs updated with this operation
> + * @dst: destination address where the PTEs should point to
> + * @flags: access flags fro the PTEs
> + * @bo: resulting page tables BO
> *
> - * Find the page table BO for a virtual address, return NULL when none found.
> + * Check if we can update the PD with a huge page. Also finds the page table
> + * BO for a virtual address, returns -ENOENT when nothing found.
> */
> -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
> - uint64_t addr)
> +static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
> + uint64_t addr, unsigned nptes,
> + uint64_t dst, uint64_t flags,
> + struct amdgpu_bo **bo)
> {
> - struct amdgpu_vm_pt *entry = &p->vm->root;
> - unsigned idx, level = p->adev->vm_manager.num_level;
> + unsigned pt_idx, level = p->adev->vm_manager.num_level;
> + struct amdgpu_vm_pt *entry = &p->vm->root, *parent;
> + uint64_t pd_addr, pde;
>
> - while (entry->entries) {
> - idx = addr >> (p->adev->vm_manager.block_size * level--);
> - idx %= amdgpu_bo_size(entry->bo) / 8;
> - entry = &entry->entries[idx];
> - }
> + do {
> + pt_idx = addr >> (p->adev->vm_manager.block_size * level--);
> + pt_idx %= amdgpu_bo_size(entry->bo) / 8;
> + parent = entry;
> + entry = &entry->entries[pt_idx];
> + } while (entry->entries);
>
> if (level)
> - return NULL;
> + return -ENOENT;
> +
> + *bo = entry->bo;
> +
> + /* In the case of a mixed PT the PDE must point to it*/
> + if (p->adev->asic_type < CHIP_VEGA10 ||
> + nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
> + p->func != amdgpu_vm_do_set_ptes ||
> + !(flags & AMDGPU_PTE_VALID)) {
> +
> + dst = amdgpu_bo_gpu_offset(*bo);
> + dst = amdgpu_gart_get_vm_pde(p->adev, dst);
> + flags = AMDGPU_PTE_VALID;
> + } else {
> + flags |= AMDGPU_PDE_PTE;
> + }
>
> - return entry->bo;
> + if (entry->addr == dst &&
> + entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
> + return 0;
> +
> + entry->addr = dst;
> + entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
> +
> + if (parent->bo->shadow) {
> + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
> + pde = pd_addr + pt_idx * 8;
> + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
> + }
> +
> + pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> + pde = pd_addr + pt_idx * 8;
> + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
> + return 0;
> }
>
> /**
> @@ -1193,14 +1235,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> uint64_t addr, pe_start;
> struct amdgpu_bo *pt;
> unsigned nptes;
> + int r;
>
> /* walk over the address space and update the page tables */
> for (addr = start; addr < end; addr += nptes) {
> - pt = amdgpu_vm_get_pt(params, addr);
> - if (!pt) {
> - pr_err("PT not found, aborting update_ptes\n");
> - return -EINVAL;
> - }
> +
> + if ((addr & ~mask) == (end & ~mask))
> + nptes = end - addr;
> + else
> + nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
> +
> + r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
> + dst, flags, &pt);
> + if (r)
> + return r;
>
> if (params->shadow) {
> if (!pt->shadow)
> @@ -1208,11 +1256,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> pt = pt->shadow;
> }
>
> - if ((addr & ~mask) == (end & ~mask))
> - nptes = end - addr;
> - else
> - nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
> -
> pe_start = amdgpu_bo_gpu_offset(pt);
> pe_start += (addr & mask) * 8;
>
> @@ -1353,6 +1396,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> /* padding, etc. */
> ndw = 64;
>
> + /* one PDE write for each huge page */
> + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
> +
> if (src) {
> /* only copy commands needed */
> ndw += ncmds * 7;
> @@ -1437,6 +1483,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>
> error_free:
> amdgpu_job_free(job);
> + amdgpu_vm_invalidate_level(&vm->root);
> return r;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 0f83fe3..2940d66 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry;
> /* TILED for VEGA10, reserved for older ASICs */
> #define AMDGPU_PTE_PRT (1ULL << 51)
>
> +/* PDE is handled as PTE for VEGA10 */
> +#define AMDGPU_PDE_PTE (1ULL << 54)
> +
> /* VEGA10 only */
> #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
> #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
> @@ -92,6 +95,7 @@ struct amdgpu_bo_list_entry;
> struct amdgpu_vm_pt {
> struct amdgpu_bo *bo;
> uint64_t addr;
> + bool huge_page;
>
> /* array of page tables, one for each directory entry */
> struct amdgpu_vm_pt *entries;
More information about the amd-gfx
mailing list