[PATCH v3 2/4] drm/xe/guc: Ignore GuC CT errors when wedged

Tue Jun 3 17:34:57 UTC 2025

On Mon, 2025-06-02 at 16:44 -0700, Vinay Belgaumkar wrote:
> Messaging to GuC may get canceled when device is wedged. Don't
> flag this as an error in xe_guc_pc code.

So if we're wedged already we are already in an error state right? I
can understand flagging additional errors maybe gives a false negative,
or rather would prompt us to look at the earlier errors to make sure
these aren't just cascading, but do we really need to check for this?

Thanks,
Stuart

> 
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_guc_pc.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c
> b/drivers/gpu/drm/xe/xe_guc_pc.c
> index cb0563494fcc..793df3486d1f 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
> @@ -154,7 +154,7 @@ static int pc_action_reset(struct xe_guc_pc *pc)
>         int ret;
>  
>         ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
> -       if (ret)
> +       if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -
> ECANCELED))
>                 xe_gt_err(pc_to_gt(pc), "GuC PC reset failed: %pe\n",
>                           ERR_PTR(ret));
>  
> @@ -178,7 +178,7 @@ static int pc_action_query_task_state(struct
> xe_guc_pc *pc)
>  
>         /* Blocking here to ensure the results are ready before
> reading them */
>         ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action));
> -       if (ret)
> +       if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -
> ECANCELED))
>                 xe_gt_err(pc_to_gt(pc), "GuC PC query task state
> failed: %pe\n",
>                           ERR_PTR(ret));
>  
> @@ -201,7 +201,7 @@ static int pc_action_set_param(struct xe_guc_pc
> *pc, u8 id, u32 value)
>                 return -EAGAIN;
>  
>         ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
> -       if (ret)
> +       if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -
> ECANCELED))
>                 xe_gt_err(pc_to_gt(pc), "GuC PC set param[%u]=%u
> failed: %pe\n",
>                           id, value, ERR_PTR(ret));
>  
> @@ -223,7 +223,7 @@ static int pc_action_unset_param(struct xe_guc_pc
> *pc, u8 id)
>                 return -EAGAIN;
>  
>         ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
> -       if (ret)
> +       if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -
> ECANCELED))
>                 xe_gt_err(pc_to_gt(pc), "GuC PC unset param failed:
> %pe",
>                           ERR_PTR(ret));
>  
> @@ -240,7 +240,7 @@ static int pc_action_setup_gucrc(struct xe_guc_pc
> *pc, u32 mode)
>         int ret;
>  
>         ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
> -       if (ret)
> +       if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -
> ECANCELED))
>                 xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u
> failed: %pe\n",
>                           mode, ERR_PTR(ret));
>         return ret;