[Intel-xe] [PATCH] drm/xe: Add child contexts to the GuC context lookup

Matthew Brost matthew.brost at intel.com
Wed Sep 6 05:02:58 UTC 2023


On Wed, Aug 30, 2023 at 05:29:31PM -0700, Daniele Ceraolo Spurio wrote:
> The CAT_ERROR message from the GuC provides the guc id of the context
> that caused the problem, which can be a child context. We therefore
> need to be able to match that id to the exec_queue that owns it, which
> we do by adding child context to the context lookup.
> 
> While at it, fix the error path of the guc id allocation code to
> correctly free the ids allocated for parallel queues.
> 
> Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/590
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>

Reviewed-by: Matthew Brost <matthew.brost at intel.com>

> Cc: John Harrison <John.C.Harrison at Intel.com>
> ---
>  drivers/gpu/drm/xe/xe_guc_submit.c | 44 ++++++++++++++++++++----------
>  1 file changed, 30 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index c6a9e17d6889..f912b6fd51ad 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -245,10 +245,28 @@ int xe_guc_submit_init(struct xe_guc *guc)
>  	return 0;
>  }
>  
> +static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
> +{
> +	int i;
> +
> +	lockdep_assert_held(&guc->submission_state.lock);
> +
> +	for (i = 0; i < xa_count; ++i)
> +		xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
> +
> +	if (xe_exec_queue_is_parallel(q))
> +		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
> +				      q->guc->id - GUC_ID_START_MLRC,
> +				      order_base_2(q->width));
> +	else
> +		ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
> +}
> +
>  static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
>  {
>  	int ret;
>  	void *ptr;
> +	int i;
>  
>  	/*
>  	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
> @@ -275,30 +293,27 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
>  	if (xe_exec_queue_is_parallel(q))
>  		q->guc->id += GUC_ID_START_MLRC;
>  
> -	ptr = xa_store(&guc->submission_state.exec_queue_lookup,
> -		       q->guc->id, q, GFP_NOWAIT);
> -	if (IS_ERR(ptr)) {
> -		ret = PTR_ERR(ptr);
> -		goto err_release;
> +	for (i = 0; i < q->width; ++i) {
> +		ptr = xa_store(&guc->submission_state.exec_queue_lookup,
> +			       q->guc->id + i, q, GFP_NOWAIT);
> +		if (IS_ERR(ptr)) {
> +			ret = PTR_ERR(ptr);
> +			goto err_release;
> +		}
>  	}
>  
>  	return 0;
>  
>  err_release:
> -	ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
> +	__release_guc_id(guc, q, i);
> +
>  	return ret;
>  }
>  
>  static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
>  {
>  	mutex_lock(&guc->submission_state.lock);
> -	xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id);
> -	if (xe_exec_queue_is_parallel(q))
> -		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
> -				      q->guc->id - GUC_ID_START_MLRC,
> -				      order_base_2(q->width));
> -	else
> -		ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
> +	__release_guc_id(guc, q, q->width);
>  	mutex_unlock(&guc->submission_state.lock);
>  }
>  
> @@ -1473,7 +1488,8 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
>  		return NULL;
>  	}
>  
> -	XE_WARN_ON(q->guc->id != guc_id);
> +	XE_WARN_ON(guc_id < q->guc->id);
> +	XE_WARN_ON(guc_id > (q->guc->id + q->width - 1));
>  
>  	return q;
>  }
> -- 
> 2.41.0
> 


More information about the Intel-xe mailing list