[PATCH v2] drm/amdgpu: Get rid of dep_sync as a seperate object.

Fri Dec 1 19:59:40 UTC 2017

Am 01.12.2017 um 20:39 schrieb Andrey Grodzovsky:
> Instead mark fence as explicit in it's amdgpu_sync_entry.
>
> v2:
> Fix use after free bug and add new parameter description.
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 14 +++++++-------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  | 26 ++++++++++++--------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 23 +++++++++++++++++------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h |  6 +++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 16 ++++++++--------
>   7 files changed, 48 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index f8657c3..c56a986 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1121,7 +1121,6 @@ struct amdgpu_job {
>   	struct amdgpu_vm	*vm;
>   	struct amdgpu_ring	*ring;
>   	struct amdgpu_sync	sync;
> -	struct amdgpu_sync	dep_sync;
>   	struct amdgpu_sync	sched_sync;
>   	struct amdgpu_ib	*ibs;
>   	struct dma_fence	*fence; /* the hw fence */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index d15836b..b694d35 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -788,7 +788,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>   		return r;
>   
>   	r = amdgpu_sync_fence(adev, &p->job->sync,
> -			      fpriv->prt_va->last_pt_update);
> +			      fpriv->prt_va->last_pt_update, false);
>   	if (r)
>   		return r;
>   
> @@ -802,7 +802,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>   			return r;
>   
>   		f = bo_va->last_pt_update;
> -		r = amdgpu_sync_fence(adev, &p->job->sync, f);
> +		r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
>   		if (r)
>   			return r;
>   	}
> @@ -825,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>   				return r;
>   
>   			f = bo_va->last_pt_update;
> -			r = amdgpu_sync_fence(adev, &p->job->sync, f);
> +			r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
>   			if (r)
>   				return r;
>   		}
> @@ -836,7 +836,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>   	if (r)
>   		return r;
>   
> -	r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update);
> +	r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
>   	if (r)
>   		return r;
>   
> @@ -1040,8 +1040,8 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
>   			amdgpu_ctx_put(ctx);
>   			return r;
>   		} else if (fence) {
> -			r = amdgpu_sync_fence(p->adev, &p->job->dep_sync,
> -					      fence);
> +			r = amdgpu_sync_fence(p->adev, &p->job->sync, fence,
> +					true);
>   			dma_fence_put(fence);
>   			amdgpu_ctx_put(ctx);
>   			if (r)
> @@ -1060,7 +1060,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
>   	if (r)
>   		return r;
>   
> -	r = amdgpu_sync_fence(p->adev, &p->job->dep_sync, fence);
> +	r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
>   	dma_fence_put(fence);
>   
>   	return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 659997b..0cf86eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -164,7 +164,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   	}
>   
>   	if (ring->funcs->emit_pipeline_sync && job &&
> -	    ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
> +	    ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) ||
>   	     amdgpu_vm_need_pipeline_sync(ring, job))) {
>   		need_pipe_sync = true;
>   		dma_fence_put(tmp);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 18770a8..61fb934 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -60,7 +60,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
>   	(*job)->num_ibs = num_ibs;
>   
>   	amdgpu_sync_create(&(*job)->sync);
> -	amdgpu_sync_create(&(*job)->dep_sync);
>   	amdgpu_sync_create(&(*job)->sched_sync);
>   	(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
>   
> @@ -104,7 +103,6 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
>   	amdgpu_ring_priority_put(job->ring, s_job->s_priority);
>   	dma_fence_put(job->fence);
>   	amdgpu_sync_free(&job->sync);
> -	amdgpu_sync_free(&job->dep_sync);
>   	amdgpu_sync_free(&job->sched_sync);
>   	kfree(job);
>   }
> @@ -115,7 +113,6 @@ void amdgpu_job_free(struct amdgpu_job *job)
>   
>   	dma_fence_put(job->fence);
>   	amdgpu_sync_free(&job->sync);
> -	amdgpu_sync_free(&job->dep_sync);
>   	amdgpu_sync_free(&job->sched_sync);
>   	kfree(job);
>   }
> @@ -149,17 +146,18 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job,
>   {
>   	struct amdgpu_job *job = to_amdgpu_job(sched_job);
>   	struct amdgpu_vm *vm = job->vm;
> -
> -	struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync);
> +	bool explicit = false;
>   	int r;
> -
> -	if (amd_sched_dependency_optimized(fence, s_entity)) {
> -		r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence);
> -		if (r)
> -			DRM_ERROR("Error adding fence to sync (%d)\n", r);
> +	struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync, &explicit);
> +
> +	if (fence && explicit) {
> +		if (amd_sched_dependency_optimized(fence, s_entity)) {
> +			r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence, false);
> +			if (r)
> +				DRM_ERROR("Error adding fence to sync (%d)\n", r);
> +		}
>   	}
> -	if (!fence)
> -		fence = amdgpu_sync_get_fence(&job->sync);
> +
>   	while (fence == NULL && vm && !job->vm_id) {
>   		struct amdgpu_ring *ring = job->ring;
>   
> @@ -169,7 +167,7 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job,
>   		if (r)
>   			DRM_ERROR("Error getting VM ID (%d)\n", r);
>   
> -		fence = amdgpu_sync_get_fence(&job->sync);
> +		fence = amdgpu_sync_get_fence(&job->sync, NULL);
>   	}
>   
>   	return fence;
> @@ -190,7 +188,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
>   	finished = &job->base.s_fence->finished;
>   	adev = job->adev;
>   
> -	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
> +	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL, NULL));
>   
>   	trace_amdgpu_sched_run_job(job);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index a4bf21f..6c3dda1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -35,6 +35,7 @@
>   struct amdgpu_sync_entry {
>   	struct hlist_node	node;
>   	struct dma_fence	*fence;
> +	bool	explicit;
>   };
>   
>   static struct kmem_cache *amdgpu_sync_slab;
> @@ -141,7 +142,7 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
>    *
>    */
>   int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> -		      struct dma_fence *f)
> +		      struct dma_fence *f, bool explicit)
>   {
>   	struct amdgpu_sync_entry *e;
>   
> @@ -159,6 +160,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>   	if (!e)
>   		return -ENOMEM;
>   
> +	e->explicit = explicit;
> +
>   	hash_add(sync->fences, &e->node, f->context);
>   	e->fence = dma_fence_get(f);
>   	return 0;
> @@ -189,7 +192,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
>   
>   	/* always sync to the exclusive fence */
>   	f = reservation_object_get_excl(resv);
> -	r = amdgpu_sync_fence(adev, sync, f);
> +	r = amdgpu_sync_fence(adev, sync, f, false);
>   
>   	if (explicit_sync)
>   		return r;
> @@ -220,7 +223,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
>   				continue;
>   		}
>   
> -		r = amdgpu_sync_fence(adev, sync, f);
> +		r = amdgpu_sync_fence(adev, sync, f, false);
>   		if (r)
>   			break;
>   	}
> @@ -232,12 +235,14 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
>    *
>    * @sync: the sync object
>    * @ring: optional ring to use for test
> + * @explicit: true if the next fence is explicit
>    *
>    * Returns the next fence not signaled yet without removing it from the sync
>    * object.
>    */
>   struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
> -					 struct amdgpu_ring *ring)
> +					 struct amdgpu_ring *ring,
> +					 bool *explicit)
>   {
>   	struct amdgpu_sync_entry *e;
>   	struct hlist_node *tmp;
> @@ -247,6 +252,10 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
>   		struct dma_fence *f = e->fence;
>   		struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
>   
> +
> +		if (explicit)
> +			*explicit = e->explicit;
> +

We don't need this for _peek_fence, don't we?

Apart from that the patch is Reviewed-by: Christian König 
<christian.koenig at amd.com>.

Now the next step for you is to think a bit about how we can further 
optimize this.

For example we don't need to keep the implicit fence around any more 
when we add an explicit one which is newer.

Regards,
Christian.

>   		if (dma_fence_is_signaled(f)) {
>   			hash_del(&e->node);
>   			dma_fence_put(f);
> @@ -275,19 +284,21 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
>    * amdgpu_sync_get_fence - get the next fence from the sync object
>    *
>    * @sync: sync object to use
> + * @explicit: true if the next fence is explicit
>    *
>    * Get and removes the next fence from the sync object not signaled yet.
>    */
> -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
> +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit)
>   {
>   	struct amdgpu_sync_entry *e;
>   	struct hlist_node *tmp;
>   	struct dma_fence *f;
>   	int i;
> -
>   	hash_for_each_safe(sync->fences, i, tmp, e, node) {
>   
>   		f = e->fence;
> +		if (explicit)
> +			*explicit = e->explicit;
>   
>   		hash_del(&e->node);
>   		kmem_cache_free(amdgpu_sync_slab, e);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> index 70d7e3a..82c614d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> @@ -41,15 +41,15 @@ struct amdgpu_sync {
>   
>   void amdgpu_sync_create(struct amdgpu_sync *sync);
>   int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> -		      struct dma_fence *f);
> +		      struct dma_fence *f, bool explicit);
>   int amdgpu_sync_resv(struct amdgpu_device *adev,
>   		     struct amdgpu_sync *sync,
>   		     struct reservation_object *resv,
>   		     void *owner,
>   		     bool explicit_sync);
>   struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
> -				     struct amdgpu_ring *ring);
> -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
> +				     struct amdgpu_ring *ring, bool *explicit);
> +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
>   int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
>   void amdgpu_sync_free(struct amdgpu_sync *sync);
>   int amdgpu_sync_init(void);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 7de519b..6a332f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -486,9 +486,9 @@ static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
>   		needs_flush = true;
>   		/* to prevent one context starved by another context */
>   		id->pd_gpu_addr = 0;
> -		tmp = amdgpu_sync_peek_fence(&id->active, ring);
> +		tmp = amdgpu_sync_peek_fence(&id->active, ring, NULL);
>   		if (tmp) {
> -			r = amdgpu_sync_fence(adev, sync, tmp);
> +			r = amdgpu_sync_fence(adev, sync, tmp, false);
>   			return r;
>   		}
>   	}
> @@ -496,7 +496,7 @@ static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
>   	/* Good we can use this VMID. Remember this submission as
>   	* user of the VMID.
>   	*/
> -	r = amdgpu_sync_fence(ring->adev, &id->active, fence);
> +	r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
>   	if (r)
>   		goto out;
>   
> @@ -556,7 +556,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>   	/* Check if we have an idle VMID */
>   	i = 0;
>   	list_for_each_entry(idle, &id_mgr->ids_lru, list) {
> -		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
> +		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring, NULL);
>   		if (!fences[i])
>   			break;
>   		++i;
> @@ -583,7 +583,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>   		}
>   
>   
> -		r = amdgpu_sync_fence(ring->adev, sync, &array->base);
> +		r = amdgpu_sync_fence(ring->adev, sync, &array->base, false);
>   		dma_fence_put(&array->base);
>   		if (r)
>   			goto error;
> @@ -626,7 +626,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>   		/* Good we can use this VMID. Remember this submission as
>   		 * user of the VMID.
>   		 */
> -		r = amdgpu_sync_fence(ring->adev, &id->active, fence);
> +		r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
>   		if (r)
>   			goto error;
>   
> @@ -646,7 +646,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>   	id = idle;
>   
>   	/* Remember this submission as user of the VMID */
> -	r = amdgpu_sync_fence(ring->adev, &id->active, fence);
> +	r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
>   	if (r)
>   		goto error;
>   
> @@ -1657,7 +1657,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   		addr = 0;
>   	}
>   
> -	r = amdgpu_sync_fence(adev, &job->sync, exclusive);
> +	r = amdgpu_sync_fence(adev, &job->sync, exclusive, false);
>   	if (r)
>   		goto error_free;
>