[PATCH 1/8] drm/xe/oa: Separate batch submission from waiting for completion

Cavitt, Jonathan jonathan.cavitt at intel.com
Thu Aug 8 20:18:21 UTC 2024


-----Original Message-----
From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of Ashutosh Dixit
Sent: Thursday, August 8, 2024 10:42 AM
To: intel-xe at lists.freedesktop.org
Cc: Nerlige Ramappa, Umesh <umesh.nerlige.ramappa at intel.com>; Souza, Jose <jose.souza at intel.com>; Landwerlin, Lionel G <lionel.g.landwerlin at intel.com>
Subject: [PATCH 1/8] drm/xe/oa: Separate batch submission from waiting for completion
> 
> When we introduce xe_syncs, we don't wait for internal OA programming
> batches to complete. That is, xe_syncs are signaled asynchronously. In
> anticipation for this, separate out batch submission from waiting for
> completion of those batches.
> 
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>

The added goto err in xe_oa_emit_oa_config had me briefly
worried due to the fence variable not getting freed, but it
seems the new organization of the series never sets the
fence variable in the case of an error.
So, LGTM.
Reviewed-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
-Jonathan Cavitt
> ---
>  drivers/gpu/drm/xe/xe_oa.c | 45 ++++++++++++++++++++++++--------------
>  1 file changed, 28 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> index 3ef92eb8fbb1e..d842c801fb9f1 100644
> --- a/drivers/gpu/drm/xe/xe_oa.c
> +++ b/drivers/gpu/drm/xe/xe_oa.c
> @@ -563,11 +563,10 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
>  	return ret;
>  }
>  
> -static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
> +static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb,
> +			   struct dma_fence **fence)
>  {
>  	struct xe_sched_job *job;
> -	struct dma_fence *fence;
> -	long timeout;
>  	int err = 0;
>  
>  	/* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */
> @@ -578,15 +577,8 @@ static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
>  	}
>  
>  	xe_sched_job_arm(job);
> -	fence = dma_fence_get(&job->drm.s_fence->finished);
> +	*fence = dma_fence_get(&job->drm.s_fence->finished);
>  	xe_sched_job_push(job);
> -
> -	timeout = dma_fence_wait_timeout(fence, false, HZ);
> -	dma_fence_put(fence);
> -	if (timeout < 0)
> -		err = timeout;
> -	else if (!timeout)
> -		err = -ETIME;
>  exit:
>  	return err;
>  }
> @@ -652,6 +644,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
>  static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc,
>  				  const struct flex *flex, u32 count)
>  {
> +	struct dma_fence *fence;
>  	struct xe_bb *bb;
>  	int err;
>  
> @@ -663,14 +656,16 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
>  
>  	xe_oa_store_flex(stream, lrc, bb, flex, count);
>  
> -	err = xe_oa_submit_bb(stream, bb);
> -	xe_bb_free(bb, NULL);
> +	err = xe_oa_submit_bb(stream, bb, &fence);
> +	xe_bb_free(bb, fence);
> +	dma_fence_put(fence);
>  exit:
>  	return err;
>  }
>  
>  static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
>  {
> +	struct dma_fence *fence;
>  	struct xe_bb *bb;
>  	int err;
>  
> @@ -682,8 +677,9 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
>  
>  	write_cs_mi_lri(bb, reg_lri, 1);
>  
> -	err = xe_oa_submit_bb(stream, bb);
> -	xe_bb_free(bb, NULL);
> +	err = xe_oa_submit_bb(stream, bb, &fence);
> +	xe_bb_free(bb, fence);
> +	dma_fence_put(fence);
>  exit:
>  	return err;
>  }
> @@ -913,15 +909,30 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config
>  {
>  #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
>  	struct xe_oa_config_bo *oa_bo;
> -	int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
> +	int err = 0, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
> +	struct dma_fence *fence;
> +	long timeout;
>  
> +	/* Emit OA configuration batch */
>  	oa_bo = xe_oa_alloc_config_buffer(stream, config);
>  	if (IS_ERR(oa_bo)) {
>  		err = PTR_ERR(oa_bo);
>  		goto exit;
>  	}
>  
> -	err = xe_oa_submit_bb(stream, oa_bo->bb);
> +	err = xe_oa_submit_bb(stream, oa_bo->bb, &fence);
> +	if (err)
> +		goto exit;
> +
> +	/* Wait till all previous batches have executed */
> +	timeout = dma_fence_wait_timeout(fence, false, 5 * HZ);
> +	dma_fence_put(fence);
> +	if (timeout < 0)
> +		err = timeout;
> +	else if (!timeout)
> +		err = -ETIME;
> +	if (err)
> +		drm_dbg(&stream->oa->xe->drm, "dma_fence_wait_timeout err %d\n", err);
>  
>  	/* Additional empirical delay needed for NOA programming after registers are written */
>  	usleep_range(us, 2 * us);
> -- 
> 2.41.0
> 
> 


More information about the Intel-xe mailing list