[Intel-gfx] [PATCH v5 02/25] drm/i915/guc: Fix outstanding G2H accounting

John Harrison john.c.harrison at intel.com
Thu Sep 2 22:11:34 UTC 2021


On 9/1/2021 17:49, Daniele Ceraolo Spurio wrote:
> From: Matthew Brost <matthew.brost at intel.com>
>
> A small race that could result in incorrect accounting of the number
> of outstanding G2H. Basically prior to this patch we did not increment
> the number of outstanding G2H if we encoutered a GT reset while sending
> a H2G. This was incorrect as the context state had already been updated
> to anticipate a G2H response thus the counter should be incremented.
>
> As part of this change we remove a legacy (now unused) path that was the
> last caller requiring a G2H response that was not guaranteed to loop.
> This allows us to simplify the accounting as we don't need to handle the
> case where the send fails due to the channel being busy.
>
> Also always use helper when decrementing this value.
>
> v2 (Daniele): update GEM_BUG_ON check, pull in dead code removal from
> later patch, remove loop param from context_deregister.
>
> Fixes: f4eb1f3fe946 ("drm/i915/guc: Ensure G2H response has space in buffer")
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: <stable at vger.kernel.org>
Reviewed-by: John Harrison <John.C.Harrison at Intel.com>

> ---
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 79 +++++++++----------
>   1 file changed, 37 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 69faa39da178..aff5dd247a88 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -352,20 +352,29 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
>   	xa_unlock_irqrestore(&guc->context_lookup, flags);
>   }
>   
> +static void decr_outstanding_submission_g2h(struct intel_guc *guc)
> +{
> +	if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
> +		wake_up_all(&guc->ct.wq);
> +}
> +
>   static int guc_submission_send_busy_loop(struct intel_guc *guc,
>   					 const u32 *action,
>   					 u32 len,
>   					 u32 g2h_len_dw,
>   					 bool loop)
>   {
> -	int err;
> -
> -	err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
> +	/*
> +	 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
> +	 * so we don't handle the case where we don't get a reply because we
> +	 * aborted the send due to the channel being busy.
> +	 */
> +	GEM_BUG_ON(g2h_len_dw && !loop);
>   
> -	if (!err && g2h_len_dw)
> +	if (g2h_len_dw)
>   		atomic_inc(&guc->outstanding_submission_g2h);
>   
> -	return err;
> +	return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
>   }
>   
>   int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
> @@ -616,7 +625,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
>   		init_sched_state(ce);
>   
>   		if (pending_enable || destroyed || deregister) {
> -			atomic_dec(&guc->outstanding_submission_g2h);
> +			decr_outstanding_submission_g2h(guc);
>   			if (deregister)
>   				guc_signal_context_fence(ce);
>   			if (destroyed) {
> @@ -635,7 +644,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
>   				intel_engine_signal_breadcrumbs(ce->engine);
>   			}
>   			intel_context_sched_disable_unpin(ce);
> -			atomic_dec(&guc->outstanding_submission_g2h);
> +			decr_outstanding_submission_g2h(guc);
>   			spin_lock_irqsave(&ce->guc_state.lock, flags);
>   			guc_blocked_fence_complete(ce);
>   			spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> @@ -1233,8 +1242,7 @@ static int register_context(struct intel_context *ce, bool loop)
>   }
>   
>   static int __guc_action_deregister_context(struct intel_guc *guc,
> -					   u32 guc_id,
> -					   bool loop)
> +					   u32 guc_id)
>   {
>   	u32 action[] = {
>   		INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
> @@ -1243,16 +1251,16 @@ static int __guc_action_deregister_context(struct intel_guc *guc,
>   
>   	return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
>   					     G2H_LEN_DW_DEREGISTER_CONTEXT,
> -					     loop);
> +					     true);
>   }
>   
> -static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
> +static int deregister_context(struct intel_context *ce, u32 guc_id)
>   {
>   	struct intel_guc *guc = ce_to_guc(ce);
>   
>   	trace_intel_context_deregister(ce);
>   
> -	return __guc_action_deregister_context(guc, guc_id, loop);
> +	return __guc_action_deregister_context(guc, guc_id);
>   }
>   
>   static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
> @@ -1340,26 +1348,23 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
>   	 * registering this context.
>   	 */
>   	if (context_registered) {
> +		bool disabled;
> +		unsigned long flags;
> +
>   		trace_intel_context_steal_guc_id(ce);
> -		if (!loop) {
> +		GEM_BUG_ON(!loop);
> +
> +		/* Seal race with Reset */
> +		spin_lock_irqsave(&ce->guc_state.lock, flags);
> +		disabled = submission_disabled(guc);
> +		if (likely(!disabled)) {
>   			set_context_wait_for_deregister_to_register(ce);
>   			intel_context_get(ce);
> -		} else {
> -			bool disabled;
> -			unsigned long flags;
> -
> -			/* Seal race with Reset */
> -			spin_lock_irqsave(&ce->guc_state.lock, flags);
> -			disabled = submission_disabled(guc);
> -			if (likely(!disabled)) {
> -				set_context_wait_for_deregister_to_register(ce);
> -				intel_context_get(ce);
> -			}
> -			spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> -			if (unlikely(disabled)) {
> -				reset_lrc_desc(guc, desc_idx);
> -				return 0;	/* Will get registered later */
> -			}
> +		}
> +		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> +		if (unlikely(disabled)) {
> +			reset_lrc_desc(guc, desc_idx);
> +			return 0;	/* Will get registered later */
>   		}
>   
>   		/*
> @@ -1367,13 +1372,9 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
>   		 * context whose guc_id was stolen.
>   		 */
>   		with_intel_runtime_pm(runtime_pm, wakeref)
> -			ret = deregister_context(ce, ce->guc_id, loop);
> -		if (unlikely(ret == -EBUSY)) {
> -			clr_context_wait_for_deregister_to_register(ce);
> -			intel_context_put(ce);
> -		} else if (unlikely(ret == -ENODEV)) {
> +			ret = deregister_context(ce, ce->guc_id);
> +		if (unlikely(ret == -ENODEV))
>   			ret = 0;	/* Will get registered later */
> -		}
>   	} else {
>   		with_intel_runtime_pm(runtime_pm, wakeref)
>   			ret = register_context(ce, loop);
> @@ -1730,7 +1731,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
>   	GEM_BUG_ON(context_enabled(ce));
>   
>   	clr_context_registered(ce);
> -	deregister_context(ce, ce->guc_id, true);
> +	deregister_context(ce, ce->guc_id);
>   }
>   
>   static void __guc_context_destroy(struct intel_context *ce)
> @@ -2583,12 +2584,6 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
>   	return ce;
>   }
>   
> -static void decr_outstanding_submission_g2h(struct intel_guc *guc)
> -{
> -	if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
> -		wake_up_all(&guc->ct.wq);
> -}
> -
>   int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
>   					  const u32 *msg,
>   					  u32 len)



More information about the Intel-gfx mailing list