[PATCH] drm/amdgpu: Get rid of amdgpu_job->external_hw_fence

Christian König christian.koenig at amd.com
Wed Jul 13 17:33:06 UTC 2022


Am 13.07.22 um 19:13 schrieb Andrey Grodzovsky:
> This is a follow-up cleanup to [1]. See bellow refcount balancing
> for calling amdgpu_job_submit_direct after this cleanup as far
> as I calculated.
>
> amdgpu_fence_emit
> 	dma_fence_init 1
> 	dma_fence_get(fence) 2
> 	rcu_assign_pointer(*ptr, dma_fence_get(fence) 3
>
> ---> amdgpu_job_submit_direct completes before fence signaled
> 			amdgpu_sa_bo_free
> 				(*sa_bo)->fence = dma_fence_get(fence) 4
>
> 			amdgpu_job_free
> 				dma_fence_put 3
>
> 			amdgpu_vcn_enc_get_destroy_msg
> 				*fence = dma_fence_get(f) 4
> 				dma_fence_put(f); 3
>
> 			amdgpu_vcn_enc_ring_test_ib
> 				dma_fence_put(fence) 2
>
> 			amdgpu_fence_process
> 				dma_fence_put 1
>
> 			amdgpu_sa_bo_remove_locked
> 				dma_fence_put 0
>
> ---> amdgpu_job_submit_direct completes after fence signaled
> 			amdgpu_fence_process
> 				dma_fence_put 2
>
> 			amdgpu_job_free
> 				dma_fence_put 1
>
> 			amdgpu_vcn_enc_get_destroy_msg
> 				*fence = dma_fence_get(f) 2
> 				dma_fence_put(f); 1
>
> 			amdgpu_vcn_enc_ring_test_ib
> 				dma_fence_put(fence) 0
>
> [1] - https://patchwork.kernel.org/project/dri-devel/cover/20220624180955.485440-1-andrey.grodzovsky@amd.com/
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> Suggested-by: Christian König <christian.koenig at amd.com>

Of hand that looks correct to me, but could be that I'm missing 
something as well.

Anyway I think I can give an Reviewed-by: Christian König 
<christian.koenig at amd.com> for this.

Thanks,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  3 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    | 27 ++++------------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.h    |  1 -
>   3 files changed, 6 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 16faea7ed1cd..b79ee4ffb879 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -5229,8 +5229,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>   	 *
>   	 * job->base holds a reference to parent fence
>   	 */
> -	if (job && (job->hw_fence.ops != NULL) &&
> -	    dma_fence_is_signaled(&job->hw_fence)) {
> +	if (job && dma_fence_is_signaled(&job->hw_fence)) {
>   		job_signaled = true;
>   		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
>   		goto skip_hw_reset;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 6fa381ee5fa0..10fdd12cf853 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -134,16 +134,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
>   {
>   	struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
>   	struct dma_fence *f;
> -	struct dma_fence *hw_fence;
>   	unsigned i;
>   
> -	if (job->hw_fence.ops == NULL)
> -		hw_fence = job->external_hw_fence;
> -	else
> -		hw_fence = &job->hw_fence;
> -
>   	/* use sched fence if available */
> -	f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence;
> +	f = job->base.s_fence ? &job->base.s_fence->finished :  &job->hw_fence;
>   	for (i = 0; i < job->num_ibs; ++i)
>   		amdgpu_ib_free(ring->adev, &job->ibs[i], f);
>   }
> @@ -157,11 +151,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
>   	amdgpu_sync_free(&job->sync);
>   	amdgpu_sync_free(&job->sched_sync);
>   
> -    /* only put the hw fence if has embedded fence */
> -	if (job->hw_fence.ops != NULL)
> -		dma_fence_put(&job->hw_fence);
> -	else
> -		kfree(job);
> +	dma_fence_put(&job->hw_fence);
>   }
>   
>   void amdgpu_job_free(struct amdgpu_job *job)
> @@ -170,11 +160,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
>   	amdgpu_sync_free(&job->sync);
>   	amdgpu_sync_free(&job->sched_sync);
>   
> -	/* only put the hw fence if has embedded fence */
> -	if (job->hw_fence.ops != NULL)
> -		dma_fence_put(&job->hw_fence);
> -	else
> -		kfree(job);
> +	dma_fence_put(&job->hw_fence);
>   }
>   
>   int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
> @@ -204,15 +190,12 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
>   	int r;
>   
>   	job->base.sched = &ring->sched;
> -	r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence);
> -	/* record external_hw_fence for direct submit */
> -	job->external_hw_fence = dma_fence_get(*fence);
> +	r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence);
> +
>   	if (r)
>   		return r;
>   
>   	amdgpu_job_free(job);
> -	dma_fence_put(*fence);
> -
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> index d599c0540b46..babc0af751c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> @@ -50,7 +50,6 @@ struct amdgpu_job {
>   	struct amdgpu_sync	sync;
>   	struct amdgpu_sync	sched_sync;
>   	struct dma_fence	hw_fence;
> -	struct dma_fence	*external_hw_fence;
>   	uint32_t		preamble_status;
>   	uint32_t                preemption_status;
>   	bool                    vm_needs_flush;



More information about the amd-gfx mailing list