[Intel-gfx] [PATCH 2/2] drm/i915/guc: Remove hacks for reset and schedule disable G2H being received out of order

John Harrison john.c.harrison at intel.com
Thu Jan 13 17:35:31 UTC 2022


On 1/11/2022 15:11, Matthew Brost wrote:
> In the i915 there are several hacks in place to make request cancelation
> work with an old version of the GuC which delivered the G2H indicating
> schedule disable is done before G2H indicating a context reset. Version
> 69 fixes this, so we can remove these hacks.
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
Reviewed-by: John Harrison <John.C.Harrison at Intel.com>

> ---
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 30 ++-----------------
>   1 file changed, 2 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 23a40f10d376d..3918f1be114fa 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -1533,7 +1533,6 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
>   	unsigned long flags;
>   	u32 head;
>   	int i, number_children = ce->parallel.number_children;
> -	bool skip = false;
>   	struct intel_context *parent = ce;
>   
>   	GEM_BUG_ON(intel_context_is_child(ce));
> @@ -1544,23 +1543,10 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
>   	 * GuC will implicitly mark the context as non-schedulable when it sends
>   	 * the reset notification. Make sure our state reflects this change. The
>   	 * context will be marked enabled on resubmission.
> -	 *
> -	 * XXX: If the context is reset as a result of the request cancellation
> -	 * this G2H is received after the schedule disable complete G2H which is
> -	 * wrong as this creates a race between the request cancellation code
> -	 * re-submitting the context and this G2H handler. This is a bug in the
> -	 * GuC but can be worked around in the meantime but converting this to a
> -	 * NOP if a pending enable is in flight as this indicates that a request
> -	 * cancellation has occurred.
>   	 */
>   	spin_lock_irqsave(&ce->guc_state.lock, flags);
> -	if (likely(!context_pending_enable(ce)))
> -		clr_context_enabled(ce);
> -	else
> -		skip = true;
> +	clr_context_enabled(ce);
>   	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> -	if (unlikely(skip))
> -		goto out_put;
>   
>   	/*
>   	 * For each context in the relationship find the hanging request
> @@ -1592,7 +1578,6 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
>   	}
>   
>   	__unwind_incomplete_requests(parent);
> -out_put:
>   	intel_context_put(parent);
>   }
>   
> @@ -2531,12 +2516,6 @@ static void guc_context_cancel_request(struct intel_context *ce,
>   					true);
>   		}
>   
> -		/*
> -		 * XXX: Racey if context is reset, see comment in
> -		 * __guc_reset_context().
> -		 */
> -		flush_work(&ce_to_guc(ce)->ct.requests.worker);
> -
>   		guc_context_unblock(block_context);
>   		intel_context_put(ce);
>   	}
> @@ -3971,12 +3950,7 @@ static void guc_handle_context_reset(struct intel_guc *guc,
>   {
>   	trace_intel_context_reset(ce);
>   
> -	/*
> -	 * XXX: Racey if request cancellation has occurred, see comment in
> -	 * __guc_reset_context().
> -	 */
> -	if (likely(!intel_context_is_banned(ce) &&
> -		   !context_blocked(ce))) {
> +	if (likely(!intel_context_is_banned(ce))) {
>   		capture_error_state(guc, ce);
>   		guc_context_replay(ce);
>   	} else {



More information about the Intel-gfx mailing list