[PATCH 3/4] drm/amdgpu: Support page directory update via CPU

Fri May 12 08:23:32 UTC 2017

Am 12.05.2017 um 04:39 schrieb Harish Kasiviswanathan:
> If amdgpu.vm_update_context param is set to use CPU, then Page
> Directories will be updated by CPU instead of SDMA
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 141 +++++++++++++++++++++++----------
>   1 file changed, 99 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ff6cf33..63f0572 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -271,6 +271,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   				  uint64_t saddr, uint64_t eaddr,
>   				  unsigned level)
>   {
> +	u64 flags;
>   	unsigned shift = (adev->vm_manager.num_level - level) *
>   		adev->vm_manager.block_size;
>   	unsigned pt_idx, from, to;
> @@ -299,6 +300,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   	saddr = saddr & ((1 << shift) - 1);
>   	eaddr = eaddr & ((1 << shift) - 1);
>   
> +	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> +			AMDGPU_GEM_CREATE_VRAM_CLEARED;
> +	if (vm->is_vm_update_mode_cpu)
> +		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> +	else
> +		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> +				AMDGPU_GEM_CREATE_SHADOW);
> +
>   	/* walk over the address space and allocate the page tables */
>   	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
>   		struct reservation_object *resv = vm->root.bo->tbo.resv;
> @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   					     amdgpu_vm_bo_size(adev, level),
>   					     AMDGPU_GPU_PAGE_SIZE, true,
>   					     AMDGPU_GEM_DOMAIN_VRAM,
> -					     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> -					     AMDGPU_GEM_CREATE_SHADOW |
> -					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> -					     AMDGPU_GEM_CREATE_VRAM_CLEARED,
> +					     flags,
>   					     NULL, resv, &pt);
>   			if (r)
>   				return r;
> @@ -953,6 +959,34 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
>   	return result;
>   }
>   
> +/**
> + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
> + *
> + * @params: see amdgpu_pte_update_params definition
> + * @pe: kmap addr of the page entry
> + * @addr: dst addr to write into pe
> + * @count: number of page entries to update
> + * @incr: increase next addr by incr bytes
> + * @flags: hw access flags
> + */
> +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
> +				   uint64_t pe, uint64_t addr,
> +				   unsigned count, uint32_t incr,
> +				   uint64_t flags)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < count; i++) {
> +		amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
> +					i, addr, flags);
> +		addr += incr;
> +	}
> +
> +	mb();
> +	amdgpu_gart_flush_gpu_tlb(params->adev, 0);
> +}
> +
> +
>   /*
>    * amdgpu_vm_update_level - update a single level in the hierarchy
>    *
> @@ -982,34 +1016,49 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   	if (!parent->entries)
>   		return 0;
> -	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
>   
> -	/* padding, etc. */
> -	ndw = 64;
> +	memset(&params, 0, sizeof(params));
> +	params.adev = adev;
> +	shadow = parent->bo->shadow;
>   
> -	/* assume the worst case */
> -	ndw += parent->last_entry_used * 6;
> +	WARN_ON(vm->is_vm_update_mode_cpu && shadow);
> +	if (vm->is_vm_update_mode_cpu && !shadow) {
> +		r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
> +		if (r)
> +			return r;

Over night I thought more about this and to also enable the CPU update 
mode for GFX we indeed need the sync code here and in patch #4 as well.

Sorry for the confusion, I didn't thought about that use case initially. 
I suggest to just add a helper for this to use here and in patch #4.

With that fixed the patch is Reviewed-by: Christian König 
<christian.koenig at amd.com>

Regards,
Christian.

> +		params.func = amdgpu_vm_cpu_set_ptes;
> +	} else {
> +		if (shadow) {
> +			r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> +			if (r)
> +				return r;
> +		}
> +		ring = container_of(vm->entity.sched, struct amdgpu_ring,
> +				    sched);
>   
> -	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +		/* padding, etc. */
> +		ndw = 64;
>   
> -	shadow = parent->bo->shadow;
> -	if (shadow) {
> -		r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> +		/* assume the worst case */
> +		ndw += parent->last_entry_used * 6;
> +
> +		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +
> +		if (shadow) {
> +			shadow_addr = amdgpu_bo_gpu_offset(shadow);
> +			ndw *= 2;
> +		} else {
> +			shadow_addr = 0;
> +		}
> +
> +		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
>   		if (r)
>   			return r;
> -		shadow_addr = amdgpu_bo_gpu_offset(shadow);
> -		ndw *= 2;
> -	} else {
> -		shadow_addr = 0;
> -	}
>   
> -	r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> -	if (r)
> -		return r;
> +		params.ib = &job->ibs[0];
> +		params.func = amdgpu_vm_do_set_ptes;
> +	}
>   
> -	memset(&params, 0, sizeof(params));
> -	params.adev = adev;
> -	params.ib = &job->ibs[0];
>   
>   	/* walk over the address space and update the directory */
>   	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> @@ -1044,15 +1093,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   					amdgpu_vm_adjust_mc_addr(adev, last_pt);
>   
>   				if (shadow)
> -					amdgpu_vm_do_set_ptes(&params,
> -							      last_shadow,
> -							      pt_addr, count,
> -							      incr,
> -							      AMDGPU_PTE_VALID);
> -
> -				amdgpu_vm_do_set_ptes(&params, last_pde,
> -						      pt_addr, count, incr,
> -						      AMDGPU_PTE_VALID);
> +					params.func(&params,
> +						    last_shadow,
> +						    pt_addr, count,
> +						    incr,
> +						    AMDGPU_PTE_VALID);
> +
> +				params.func(&params, last_pde,
> +					    pt_addr, count, incr,
> +					    AMDGPU_PTE_VALID);
>   			}
>   
>   			count = 1;
> @@ -1068,14 +1117,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   		uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
>   
>   		if (vm->root.bo->shadow)
> -			amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
> -					      count, incr, AMDGPU_PTE_VALID);
> +			params.func(&params, last_shadow, pt_addr,
> +				    count, incr, AMDGPU_PTE_VALID);
>   
> -		amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
> -				      count, incr, AMDGPU_PTE_VALID);
> +		params.func(&params, last_pde, pt_addr,
> +			    count, incr, AMDGPU_PTE_VALID);
>   	}
>   
> -	if (params.ib->length_dw == 0) {
> +	if (params.func == amdgpu_vm_cpu_set_ptes)
> +		amdgpu_bo_kunmap(parent->bo);
> +	else if (params.ib->length_dw == 0) {
>   		amdgpu_job_free(job);
>   	} else {
>   		amdgpu_ring_pad_ib(ring, params.ib);
> @@ -2310,6 +2361,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	struct amdgpu_ring *ring;
>   	struct amd_sched_rq *rq;
>   	int r, i;
> +	u64 flags;
>   
>   	vm->va = RB_ROOT;
>   	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
> @@ -2337,12 +2389,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   
>   	vm->last_dir_update = NULL;
>   
> +	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> +			AMDGPU_GEM_CREATE_VRAM_CLEARED;
> +	if (vm->is_vm_update_mode_cpu)
> +		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> +	else
> +		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> +				AMDGPU_GEM_CREATE_SHADOW);
> +
>   	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
>   			     AMDGPU_GEM_DOMAIN_VRAM,
> -			     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> -			     AMDGPU_GEM_CREATE_SHADOW |
> -			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> -			     AMDGPU_GEM_CREATE_VRAM_CLEARED,
> +			     flags,
>   			     NULL, NULL, &vm->root.bo);
>   	if (r)
>   		goto error_free_sched_entity;