[PATCH v3 2/4] drm/xe/guc: Ignore GuC CT errors when wedged

Tue Jun 3 18:42:54 UTC 2025

On 6/3/2025 10:34 AM, Summers, Stuart wrote:
> On Mon, 2025-06-02 at 16:44 -0700, Vinay Belgaumkar wrote:
>> Messaging to GuC may get canceled when device is wedged. Don't
>> flag this as an error in xe_guc_pc code.
> So if we're wedged already we are already in an error state right? I
> can understand flagging additional errors maybe gives a false negative,
> or rather would prompt us to look at the earlier errors to make sure
> these aren't just cascading, but do we really need to check for this?

Yes, to avoid flase CI errors. This was actually for a CI failure seen 
in the previous patch.

Thanks,

Vinay.

>
> Thanks,
> Stuart
>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_guc_pc.c | 10 +++++-----
>>   1 file changed, 5 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c
>> b/drivers/gpu/drm/xe/xe_guc_pc.c
>> index cb0563494fcc..793df3486d1f 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
>> @@ -154,7 +154,7 @@ static int pc_action_reset(struct xe_guc_pc *pc)
>>          int ret;
>>   
>>          ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
>> -       if (ret)
>> +       if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -
>> ECANCELED))
>>                  xe_gt_err(pc_to_gt(pc), "GuC PC reset failed: %pe\n",
>>                            ERR_PTR(ret));
>>   
>> @@ -178,7 +178,7 @@ static int pc_action_query_task_state(struct
>> xe_guc_pc *pc)
>>   
>>          /* Blocking here to ensure the results are ready before
>> reading them */
>>          ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action));
>> -       if (ret)
>> +       if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -
>> ECANCELED))
>>                  xe_gt_err(pc_to_gt(pc), "GuC PC query task state
>> failed: %pe\n",
>>                            ERR_PTR(ret));
>>   
>> @@ -201,7 +201,7 @@ static int pc_action_set_param(struct xe_guc_pc
>> *pc, u8 id, u32 value)
>>                  return -EAGAIN;
>>   
>>          ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
>> -       if (ret)
>> +       if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -
>> ECANCELED))
>>                  xe_gt_err(pc_to_gt(pc), "GuC PC set param[%u]=%u
>> failed: %pe\n",
>>                            id, value, ERR_PTR(ret));
>>   
>> @@ -223,7 +223,7 @@ static int pc_action_unset_param(struct xe_guc_pc
>> *pc, u8 id)
>>                  return -EAGAIN;
>>   
>>          ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
>> -       if (ret)
>> +       if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -
>> ECANCELED))
>>                  xe_gt_err(pc_to_gt(pc), "GuC PC unset param failed:
>> %pe",
>>                            ERR_PTR(ret));
>>   
>> @@ -240,7 +240,7 @@ static int pc_action_setup_gucrc(struct xe_guc_pc
>> *pc, u32 mode)
>>          int ret;
>>   
>>          ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
>> -       if (ret)
>> +       if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -
>> ECANCELED))
>>                  xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u
>> failed: %pe\n",
>>                            mode, ERR_PTR(ret));
>>          return ret;