[PATCH 2/3] drm/amdgpu: fix dependency issue

Tue May 9 08:38:01 UTC 2017

On 05/09/2017 04:14 PM, Chunming Zhou wrote:
> The problem is that executing the jobs in the right order doesn't give you the right result
> because consecutive jobs executed on the same engine are pipelined.
> In other words job B does it buffer read before job A has written it's result.
>
> Change-Id: I9065abf5bafbda7a92702ca11477315d25c9a347
> Signed-off-by: Chunming Zhou <David1.Zhou at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c        |  2 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  2 +-
>   drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 17 +++++++++++++++++
>   drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  2 ++
>   6 files changed, 27 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index dd2bd9a..787acd7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1169,6 +1169,7 @@ struct amdgpu_job {
>   	void			*owner;
>   	uint64_t		fence_ctx; /* the fence_context this job uses */
>   	bool                    vm_needs_flush;
> +	bool			need_pipeline_sync;
>   	unsigned		vm_id;
>   	uint64_t		vm_pd_addr;
>   	uint32_t		gds_base, gds_size;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 06202e2..2c6624d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -167,6 +167,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   		return r;
>   	}
>
> +	if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync)
> +		amdgpu_ring_emit_pipeline_sync(ring);
>   	if (vm) {
>   		amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go too fast than DE */
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 690ef3d..cfa97ab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
>   	(*job)->vm = vm;
>   	(*job)->ibs = (void *)&(*job)[1];
>   	(*job)->num_ibs = num_ibs;
> +	(*job)->need_pipeline_sync = false;
>
>   	amdgpu_sync_create(&(*job)->sync);
>
> @@ -152,6 +153,9 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
>   		fence = amdgpu_sync_get_fence(&job->sync);
>   	}
>
> +	if (amd_sched_dependency_optimized(fence, sched_job->s_entity))
> +		job->need_pipeline_sync = true;
> +
>   	return fence;
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b35b853..b4f83fc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -738,7 +738,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
>   	if (ring->funcs->init_cond_exec)
>   		patch_offset = amdgpu_ring_init_cond_exec(ring);
>
> -	if (ring->funcs->emit_pipeline_sync)
> +	if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync)

Perhaps we could emit pipeline sync regardless of the flag need_pipeline_sync.
Could you explain the reason about this change?

Jerry

>   		amdgpu_ring_emit_pipeline_sync(ring);
>
>   	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> index 3c258c3..fec870a 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> @@ -235,6 +235,23 @@ static void amd_sched_entity_clear_dep(struct fence *f, struct fence_cb *cb)
>   	fence_put(f);
>   }
>
> +bool amd_sched_dependency_optimized(struct fence* fence,
> +				    struct amd_sched_entity *entity)
> +{
> +	struct amd_gpu_scheduler *sched = entity->sched;
> +	struct amd_sched_fence *s_fence;
> +
> +	if (!fence || fence_is_signaled(fence))
> +		return false;
> +	if (fence->context == entity->fence_context)
> +		return true;
> +	s_fence = to_amd_sched_fence(fence);
> +	if (s_fence && s_fence->sched == sched)
> +		return true;
> +
> +	return false;
> +}
> +
>   static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
>   {
>   	struct amd_gpu_scheduler *sched = entity->sched;
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> index 2531b41..bfda90f 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> @@ -162,4 +162,6 @@ int amd_sched_job_init(struct amd_sched_job *job,
>   		       void *owner);
>   void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
>   void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
> +bool amd_sched_dependency_optimized(struct fence* fence,
> +				    struct amd_sched_entity *entity);
>   #endif
>