[PATCH 4/7] drm/xe/oa: Signal output fences

Matthew Brost matthew.brost at intel.com
Fri Aug 30 22:45:17 UTC 2024


On Fri, Aug 30, 2024 at 03:16:15PM -0700, Ashutosh Dixit wrote:
> Introduce 'struct xe_oa_fence' which includes the dma_fence used to signal
> output fences in the xe_sync array. The fences are signaled
> asynchronously. When there are no output fences to signal, the OA
> configuration wait is synchronously re-introduced into the ioctl.
> 
> v2: Don't wait in the work, use callback + delayed work (Matt B)
>     Use a single, not a per-fence spinlock (Matt Brost)
> v3: Move ofence alloc before job submission (Matt)
>     Assert, don't fail, from dma_fence_add_callback (Matt)
>     Additional dma_fence_get for dma_fence_wait (Matt)
>     Change dma_fence_wait to non-interruptible (Matt)
> v4: Introduce last_fence to prevent uaf if stream is closed with
>     pending OA config jobs

The usage looks correct, so RB holds.

One suggestion which can be done in a follow up. I had to check the
locking to make this this was correct (i.e. a mutex at the outer
layer was held to make this serial) and didn't notice any lockdep
annotations in your code. Lockdep is good catching problems but also
self documenting which helps to review / maintain code. If possible
maybe sprilke in lockdep annotation in the OA code.

Matt

> 
> Suggested-by: Matthew Brost <matthew.brost at intel.com>
> Reviewed-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
> Reviewed-by: Matthew Brost <matthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_oa.c       | 117 ++++++++++++++++++++++++++-----
>  drivers/gpu/drm/xe/xe_oa_types.h |   6 ++
>  2 files changed, 106 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> index b4b68019d35b7..56f95fd431bf9 100644
> --- a/drivers/gpu/drm/xe/xe_oa.c
> +++ b/drivers/gpu/drm/xe/xe_oa.c
> @@ -100,6 +100,15 @@ struct xe_oa_config_bo {
>  	struct xe_bb *bb;
>  };
>  
> +struct xe_oa_fence {
> +	/* @base: dma fence base */
> +	struct dma_fence base;
> +	/* @work: work to signal @base */
> +	struct delayed_work work;
> +	/* @cb: callback to schedule @work */
> +	struct dma_fence_cb cb;
> +};
> +
>  #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x
>  
>  static const struct xe_oa_format oa_formats[] = {
> @@ -172,10 +181,10 @@ static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_se
>  	return oa_config;
>  }
>  
> -static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo)
> +static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence)
>  {
>  	xe_oa_config_put(oa_bo->oa_config);
> -	xe_bb_free(oa_bo->bb, NULL);
> +	xe_bb_free(oa_bo->bb, last_fence);
>  	kfree(oa_bo);
>  }
>  
> @@ -648,7 +657,8 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream)
>  
>  	xe_oa_config_put(stream->oa_config);
>  	llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
> -		free_oa_config_bo(oa_bo);
> +		free_oa_config_bo(oa_bo, stream->last_fence);
> +	dma_fence_put(stream->last_fence);
>  }
>  
>  static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
> @@ -945,40 +955,112 @@ xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_c
>  	return oa_bo;
>  }
>  
> +static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence)
> +{
> +	dma_fence_put(stream->last_fence);
> +	stream->last_fence = dma_fence_get(fence);
> +}
> +
> +static void xe_oa_fence_work_fn(struct work_struct *w)
> +{
> +	struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work);
> +
> +	/* Signal fence to indicate new OA configuration is active */
> +	dma_fence_signal(&ofence->base);
> +	dma_fence_put(&ofence->base);
> +}
> +
> +static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
> +{
> +	/* Additional empirical delay needed for NOA programming after registers are written */
> +#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
> +
> +	struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb);
> +
> +	INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn);
> +	queue_delayed_work(system_unbound_wq, &ofence->work,
> +			   usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US));
> +	dma_fence_put(fence);
> +}
> +
> +static const char *xe_oa_get_driver_name(struct dma_fence *fence)
> +{
> +	return "xe_oa";
> +}
> +
> +static const char *xe_oa_get_timeline_name(struct dma_fence *fence)
> +{
> +	return "unbound";
> +}
> +
> +static const struct dma_fence_ops xe_oa_fence_ops = {
> +	.get_driver_name = xe_oa_get_driver_name,
> +	.get_timeline_name = xe_oa_get_timeline_name,
> +};
> +
>  static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config)
>  {
>  #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
>  	struct xe_oa_config_bo *oa_bo;
> -	int err = 0, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
> +	struct xe_oa_fence *ofence;
> +	int i, err, num_signal = 0;
>  	struct dma_fence *fence;
> -	long timeout;
>  
> -	/* Emit OA configuration batch */
> +	ofence = kzalloc(sizeof(*ofence), GFP_KERNEL);
> +	if (!ofence) {
> +		err = -ENOMEM;
> +		goto exit;
> +	}
> +
>  	oa_bo = xe_oa_alloc_config_buffer(stream, config);
>  	if (IS_ERR(oa_bo)) {
>  		err = PTR_ERR(oa_bo);
>  		goto exit;
>  	}
>  
> +	/* Emit OA configuration batch */
>  	fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb);
>  	if (IS_ERR(fence)) {
>  		err = PTR_ERR(fence);
>  		goto exit;
>  	}
>  
> -	/* Wait till all previous batches have executed */
> -	timeout = dma_fence_wait_timeout(fence, false, 5 * HZ);
> -	dma_fence_put(fence);
> -	if (timeout < 0)
> -		err = timeout;
> -	else if (!timeout)
> -		err = -ETIME;
> -	if (err)
> -		drm_dbg(&stream->oa->xe->drm, "dma_fence_wait_timeout err %d\n", err);
> +	/* Point of no return: initialize and set fence to signal */
> +	dma_fence_init(&ofence->base, &xe_oa_fence_ops, &stream->oa_fence_lock, 0, 0);
>  
> -	/* Additional empirical delay needed for NOA programming after registers are written */
> -	usleep_range(us, 2 * us);
> +	for (i = 0; i < stream->num_syncs; i++) {
> +		if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL)
> +			num_signal++;
> +		xe_sync_entry_signal(&stream->syncs[i], &ofence->base);
> +	}
> +
> +	/* Additional dma_fence_get in case we dma_fence_wait */
> +	if (!num_signal)
> +		dma_fence_get(&ofence->base);
> +
> +	/* Update last fence too before adding callback */
> +	xe_oa_update_last_fence(stream, fence);
> +
> +	/* Add job fence callback to schedule work to signal ofence->base */
> +	err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb);
> +	xe_gt_assert(stream->gt, !err || err == -ENOENT);
> +	if (err == -ENOENT)
> +		xe_oa_config_cb(fence, &ofence->cb);
> +
> +	/* If nothing needs to be signaled we wait synchronously */
> +	if (!num_signal) {
> +		dma_fence_wait(&ofence->base, false);
> +		dma_fence_put(&ofence->base);
> +	}
> +
> +	/* Done with syncs */
> +	for (i = 0; i < stream->num_syncs; i++)
> +		xe_sync_entry_cleanup(&stream->syncs[i]);
> +	kfree(stream->syncs);
> +
> +	return 0;
>  exit:
> +	kfree(ofence);
>  	return err;
>  }
>  
> @@ -1479,6 +1561,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
>  		goto err_free_oa_buf;
>  	}
>  
> +	spin_lock_init(&stream->oa_fence_lock);
>  	ret = xe_oa_enable_metric_set(stream);
>  	if (ret) {
>  		drm_dbg(&stream->oa->xe->drm, "Unable to enable metric set\n");
> diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
> index 99f4b2d4bdcf6..935f98772a948 100644
> --- a/drivers/gpu/drm/xe/xe_oa_types.h
> +++ b/drivers/gpu/drm/xe/xe_oa_types.h
> @@ -239,6 +239,12 @@ struct xe_oa_stream {
>  	/** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */
>  	u32 no_preempt;
>  
> +	/** @last_fence: fence to use in stream destroy when needed */
> +	struct dma_fence *last_fence;
> +
> +	/** @oa_fence_lock: Lock for struct xe_oa_fence */
> +	spinlock_t oa_fence_lock;
> +
>  	/** @num_syncs: size of @syncs array */
>  	u32 num_syncs;
>  
> -- 
> 2.41.0
> 


More information about the Intel-xe mailing list