[Intel-gfx] [PATCH] drm/i915/guc: Check ctx while waiting for response

Teres Alexis, Alan Previn alan.previn.teres.alexis at intel.com
Tue Jun 14 15:34:59 UTC 2022


Can't see anything wrong with this.
I consider this only a NIT, so feel : not sure if -ECANCELLED is reflective of the "ct service being temporarily down"
as opposed to the "requester cancelling". Perhaps a -EPIPE or -EAGAIN (if we got this far, we know we are probably mid-
reset) ?? (if not already used elsewhere along this callstack). Else :

Reviewed-by: Alan Previn <alan.previn.teres.alexis at intel.com>

...alan


On Thu, 2022-06-02 at 10:21 -0700, Zhanjun Dong wrote:
> We are seeing error message of "No response for request". Some cases happened
> while waiting for response and reset/suspend action was triggered. In this
> case, no response is not an error, active requests will be cancelled.
> 
> This patch will handle this condition and change the error message into
> debug message.
> 
> Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 21 ++++++++++++++-------
>  1 file changed, 14 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index f01325cd1b62..a30a388877e2 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -467,7 +467,7 @@ static int ct_write(struct intel_guc_ct *ct,
>   * *	0 response received (status is valid)
>   * *	-ETIMEDOUT no response within hardcoded timeout
>   */
> -static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
> +static int wait_for_ct_request_update(struct ct_request *req, u32 *status, struct intel_guc_ct *ct)
>  {
>  	int err;
>  
> @@ -481,12 +481,14 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
>  #define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10
>  #define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000
>  #define done \
> -	(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
> +	(!intel_guc_ct_enabled(ct) || FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
>  	 GUC_HXG_ORIGIN_GUC)
>  	err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS);
>  	if (err)
>  		err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
>  #undef done
> +	if (!intel_guc_ct_enabled(ct))
> +		err = -ECANCELED;
>  
>  	*status = req->status;
>  	return err;
> @@ -703,11 +705,15 @@ static int ct_send(struct intel_guc_ct *ct,
>  
>  	intel_guc_notify(ct_to_guc(ct));
>  
> -	err = wait_for_ct_request_update(&request, status);
> +	err = wait_for_ct_request_update(&request, status, ct);
>  	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
>  	if (unlikely(err)) {
> -		CT_ERROR(ct, "No response for request %#x (fence %u)\n",
> -			 action[0], request.fence);
> +		if (unlikely(err == ECANCELED))
> +			CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
> +				action[0], request.fence);
> +		else
> +			CT_ERROR(ct, "No response for request %#x (fence %u)\n",
> +				action[0], request.fence);
>  		goto unlink;
>  	}
>  
> @@ -771,8 +777,9 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
>  
>  	ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
>  	if (unlikely(ret < 0)) {
> -		CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
> -			 action[0], ERR_PTR(ret), status);
> +		if (likely(ret != ECANCELED))
> +			CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
> +				action[0], ERR_PTR(ret), status);
>  	} else if (unlikely(ret)) {
>  		CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
>  			 action[0], ret, ret);
> -- 
> 2.36.0
> 



More information about the Intel-gfx mailing list