[PATCH 7/7] drm/amdgpu: enable huge page handling in the VM

Thu May 18 05:30:12 UTC 2017

On 2017年05月17日 17:22, Christian König wrote:
> From: Christian König <christian.koenig at amd.com>
>
> The hardware can use huge pages to map 2MB of address space with only one PDE.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 96 +++++++++++++++++++++++++---------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 ++
>   2 files changed, 76 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 860a669..8be1d7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -325,6 +325,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   
>   			entry->bo = pt;
>   			entry->addr = 0;
> +			entry->huge_page = false;
>   		}
>   
>   		if (level < adev->vm_manager.num_level) {
> @@ -1014,7 +1015,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   		pt = bo->tbo.mem.start << PAGE_SHIFT;
>   		pt = amdgpu_gart_get_vm_pde(adev, pt);
> -		if (parent->entries[pt_idx].addr == pt)
> +		if (parent->entries[pt_idx].addr == pt ||
> +		    parent->entries[pt_idx].huge_page)
>   			continue;
>   
>   		parent->entries[pt_idx].addr = pt;
> @@ -1146,29 +1148,70 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>   }
>   
>   /**
> - * amdgpu_vm_find_pt - find the page table for an address
> + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
>    *
>    * @p: see amdgpu_pte_update_params definition
>    * @addr: virtual address in question
> + * @nptes: number of PTEs updated with this operation
> + * @dst: destination address where the PTEs should point to
> + * @flags: access flags fro the PTEs
> + * @bo: resulting page tables BO
>    *
> - * Find the page table BO for a virtual address, return NULL when none found.
> + * Check if we can update the PD with a huge page. Also finds the page table
> + * BO for a virtual address, returns -ENOENT when nothing found.
>    */
> -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
> -					  uint64_t addr)
> +static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
> +				       uint64_t addr, unsigned nptes,
> +				       uint64_t dst, uint64_t flags,
> +				       struct amdgpu_bo **bo)
>   {
> -	struct amdgpu_vm_pt *entry = &p->vm->root;
> -	unsigned idx, level = p->adev->vm_manager.num_level;
> +	unsigned pt_idx, level = p->adev->vm_manager.num_level;
> +	struct amdgpu_vm_pt *entry = &p->vm->root, *parent;
> +	uint64_t pd_addr, pde, pt;
>   
> -	while (entry->entries) {
> -		idx = addr >> (p->adev->vm_manager.block_size * level--);
> -		idx %= amdgpu_bo_size(entry->bo) / 8;
> -		entry = &entry->entries[idx];
> -	}
> +	do {
> +		pt_idx = addr >> (p->adev->vm_manager.block_size * level--);
> +		pt_idx %= amdgpu_bo_size(entry->bo) / 8;
> +		parent = entry;
> +		entry = &entry->entries[pt_idx];
> +	} while (entry->entries);
>   
>   	if (level)
> -		return NULL;
> +		return -ENOENT;
> +
> +	*bo = entry->bo;
> +
> +	/* In the case of a mixed PT the PDE must point to it*/
> +	if (p->adev->asic_type < CHIP_VEGA10 ||
> +	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
> +	    p->func != amdgpu_vm_do_set_ptes ||
> +	    !(flags & AMDGPU_PTE_VALID)) {
> +
> +		pt = (*bo)->tbo.mem.start << PAGE_SHIFT;
> +		pt = amdgpu_gart_get_vm_pde(p->adev, pt);
> +		flags = AMDGPU_PTE_VALID;
This case should be handled when updating levels, so return directly?
> +	} else {
> +		pt = amdgpu_gart_get_vm_pde(p->adev, dst);
> +		flags |= AMDGPU_PDE_PTE;
> +	}
>   
> -	return entry->bo;
> +	if (entry->addr == pt &&
> +	    entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
> +		return 0;
> +
> +	entry->addr = pt;
> +	entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
> +
> +	if (parent->bo->shadow) {
> +		pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
> +		pde = pd_addr + pt_idx * 8;
> +		amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
 From the spec "any pde in the chain can itself take on the format of a 
PTE and point directly to an aligned block of logical address space by 
setting the P bit.",
So here should pass addr into PDE instead of pt.
> +	}
> +
> +	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +	pde = pd_addr + pt_idx * 8;
> +	amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
Should pass addr into PDE instead of pt as well.

> +	return 0;
>   }
>   
>   /**
> @@ -1194,14 +1237,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>   	uint64_t addr, pe_start;
>   	struct amdgpu_bo *pt;
>   	unsigned nptes;
> +	int r;
>   
>   	/* walk over the address space and update the page tables */
>   	for (addr = start; addr < end; addr += nptes) {
> -		pt = amdgpu_vm_get_pt(params, addr);
> -		if (!pt) {
> -			pr_err("PT not found, aborting update_ptes\n");
> -			return -EINVAL;
> -		}
> +
> +		if ((addr & ~mask) == (end & ~mask))
> +			nptes = end - addr;
> +		else
> +			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
> +
> +		r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
> +						dst, flags, &pt);
If huge page happens, its sub PTEs don't need to update more, they 
cannot be indexed by page table when that PDE is PTE, right?

Btw: Is this BigK which people often said?

Regards,
David Zhou
> +		if (r)
> +			return r;
>   
>   		if (params->shadow) {
>   			if (!pt->shadow)
> @@ -1209,11 +1258,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>   			pt = pt->shadow;
>   		}
>   
> -		if ((addr & ~mask) == (end & ~mask))
> -			nptes = end - addr;
> -		else
> -			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
> -
>   		pe_start = amdgpu_bo_gpu_offset(pt);
>   		pe_start += (addr & mask) * 8;
>   
> @@ -1353,6 +1397,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	/* padding, etc. */
>   	ndw = 64;
>   
> +	/* one PDE write for each huge page */
> +	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
> +
>   	if (src) {
>   		/* only copy commands needed */
>   		ndw += ncmds * 7;
> @@ -1437,6 +1484,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   
>   error_free:
>   	amdgpu_job_free(job);
> +	amdgpu_vm_invalidate_level(&vm->root);
>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index afe9073..1c5e0ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -68,6 +68,9 @@ struct amdgpu_bo_list_entry;
>   /* TILED for VEGA10, reserved for older ASICs  */
>   #define AMDGPU_PTE_PRT		(1ULL << 51)
>   
> +/* PDE is handled as PTE for VEGA10 */
> +#define AMDGPU_PDE_PTE		(1ULL << 54)
> +
>   /* VEGA10 only */
>   #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>   #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
> @@ -90,6 +93,7 @@ struct amdgpu_bo_list_entry;
>   struct amdgpu_vm_pt {
>   	struct amdgpu_bo	*bo;
>   	uint64_t		addr;
> +	bool			huge_page;
>   
>   	/* array of page tables, one for each directory entry */
>   	struct amdgpu_vm_pt	*entries;