[PATCH 2/4] drm/amdgpu: add support for per VM BOs v2

Felix Kuehling felix.kuehling at amd.com
Wed Aug 30 22:50:00 UTC 2017


Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>

Some more thoughts inline, but nothing that should be addressed in this
change.

Regards,
  Felix


On 2017-08-30 11:00 AM, Christian König wrote:
> From: Christian König <christian.koenig at amd.com>
>
> Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
> need to be specified in the BO lists.
>
> v2: validate PDs/PTs first
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
>  3 files changed, 60 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index f68ac56..48e18cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -813,7 +813,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>  
>  	}
>  
> -	r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
> +	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
>  
>  	if (amdgpu_vm_debug && p->bo_list) {
>  		/* Invalidate all BOs to test for userspace bugs */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 4cdfb70..6cd20e7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  			spin_unlock(&glob->lru_lock);
>  		}
>  
> -		if (vm->use_cpu_for_update) {
> +		if (bo->tbo.type == ttm_bo_type_kernel &&
> +		    vm->use_cpu_for_update) {
>  			r = amdgpu_bo_kmap(bo, NULL);
>  			if (r)
>  				return r;
>  		}
>  
>  		spin_lock(&vm->status_lock);
> -		list_move(&bo_base->vm_status, &vm->relocated);
> +		if (bo->tbo.type != ttm_bo_type_kernel)
> +			list_move(&bo_base->vm_status, &vm->moved);
> +		else
> +			list_move(&bo_base->vm_status, &vm->relocated);
>  	}
>  	spin_unlock(&vm->status_lock);
>  
> @@ -1994,20 +1998,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>  }
>  
>  /**
> - * amdgpu_vm_clear_moved - clear moved BOs in the PT
> + * amdgpu_vm_handle_moved - handle moved BOs in the PT
>   *
>   * @adev: amdgpu_device pointer
>   * @vm: requested vm
> + * @sync: sync object to add fences to
>   *
> - * Make sure all moved BOs are cleared in the PT.
> + * Make sure all BOs which are moved are updated in the PTs.
>   * Returns 0 for success.
>   *
> - * PTs have to be reserved and mutex must be locked!
> + * PTs have to be reserved!
>   */
> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> -			    struct amdgpu_sync *sync)
> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
> +			   struct amdgpu_vm *vm,
> +			   struct amdgpu_sync *sync)
>  {
>  	struct amdgpu_bo_va *bo_va = NULL;
> +	bool clear;
>  	int r = 0;
>  
>  	spin_lock(&vm->status_lock);
> @@ -2016,7 +2023,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  			struct amdgpu_bo_va, base.vm_status);
>  		spin_unlock(&vm->status_lock);
>  
> -		r = amdgpu_vm_bo_update(adev, bo_va, true);
> +		/* Per VM BOs never need to bo cleared in the page tables */
> +		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
> +
> +		r = amdgpu_vm_bo_update(adev, bo_va, clear);

Just thinking out loud: Per VM BOs don't need to be cleared because they
have separate lists for evicted and moved (but valid) BOs. If we had a
similar thing for non-VM BOs (but without the automatic validation),
this case could be optimized. Then moved but valid BOs could be updated
in the page table instead of clearing them here and updating the page
table again later.

>  		if (r)
>  			return r;
>  
> @@ -2068,6 +2078,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>  	return bo_va;
>  }
>  
> +
> +/**
> + * amdgpu_vm_bo_insert_mapping - insert a new mapping
> + *
> + * @adev: amdgpu_device pointer
> + * @bo_va: bo_va to store the address
> + * @mapping: the mapping to insert
> + *
> + * Insert a new mapping into all structures.
> + */
> +static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
> +				    struct amdgpu_bo_va *bo_va,
> +				    struct amdgpu_bo_va_mapping *mapping)
> +{
> +	struct amdgpu_vm *vm = bo_va->base.vm;
> +	struct amdgpu_bo *bo = bo_va->base.bo;
> +
> +	list_add(&mapping->list, &bo_va->invalids);
> +	amdgpu_vm_it_insert(mapping, &vm->va);
> +
> +	if (mapping->flags & AMDGPU_PTE_PRT)
> +		amdgpu_vm_prt_get(adev);
> +
> +	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
> +		spin_lock(&vm->status_lock);
> +		list_move(&bo_va->base.vm_status, &vm->moved);
> +		spin_unlock(&vm->status_lock);
> +	}
> +	trace_amdgpu_vm_bo_map(bo_va, mapping);
> +}
> +
>  /**
>   * amdgpu_vm_bo_map - map bo inside a vm
>   *
> @@ -2119,18 +2160,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>  	if (!mapping)
>  		return -ENOMEM;
>  
> -	INIT_LIST_HEAD(&mapping->list);
>  	mapping->start = saddr;
>  	mapping->last = eaddr;
>  	mapping->offset = offset;
>  	mapping->flags = flags;
>  
> -	list_add(&mapping->list, &bo_va->invalids);
> -	amdgpu_vm_it_insert(mapping, &vm->va);
> -
> -	if (flags & AMDGPU_PTE_PRT)
> -		amdgpu_vm_prt_get(adev);
> -	trace_amdgpu_vm_bo_map(bo_va, mapping);
> +	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>  
>  	return 0;
>  }
> @@ -2157,7 +2192,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>  {
>  	struct amdgpu_bo_va_mapping *mapping;
>  	struct amdgpu_bo *bo = bo_va->base.bo;
> -	struct amdgpu_vm *vm = bo_va->base.vm;
>  	uint64_t eaddr;
>  	int r;
>  
> @@ -2191,12 +2225,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>  	mapping->offset = offset;
>  	mapping->flags = flags;
>  
> -	list_add(&mapping->list, &bo_va->invalids);
> -	amdgpu_vm_it_insert(mapping, &vm->va);
> -
> -	if (flags & AMDGPU_PTE_PRT)
> -		amdgpu_vm_prt_get(adev);
> -	trace_amdgpu_vm_bo_map(bo_va, mapping);
> +	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
>  
>  	return 0;
>  }
> @@ -2411,7 +2440,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>  		bo_base->moved = true;
>  		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
>  			spin_lock(&bo_base->vm->status_lock);
> -			list_move(&bo_base->vm_status, &vm->evicted);
> +			if (bo->tbo.type == ttm_bo_type_kernel)
> +				list_move(&bo_base->vm_status, &vm->evicted);
> +			else
> +				list_move_tail(&bo_base->vm_status,
> +					       &vm->evicted);
>  			spin_unlock(&bo_base->vm->status_lock);
>  			continue;
>  		}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index c3753af..90b7741 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>  int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>  			  struct amdgpu_vm *vm,
>  			  struct dma_fence **fence);
> -int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> -			  struct amdgpu_sync *sync);
> +int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
> +			   struct amdgpu_vm *vm,
> +			   struct amdgpu_sync *sync);
>  int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>  			struct amdgpu_bo_va *bo_va,
>  			bool clear);



More information about the amd-gfx mailing list