[Intel-gfx] [PATCH 11/12] drm/i915/guc: Don't abort on CTB_UNUSED status
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Jul 18 12:36:41 UTC 2022
On 13/07/2022 00:31, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
>
> When the KMD sends a CLIENT_RESET request to GuC (as part of the
> suspend sequence), GuC will mark the CTB buffer as 'UNUSED'. If the
> KMD then checked the CTB queue, it would see a non-zero status value
> and report the buffer as corrupted.
>
> Technically, no G2H messages should be received once the CLIENT_RESET
> has been sent. However, if a context was outstanding on an engine then
> it would get reset and a reset notification would be sent. So, don't
> actually treat UNUSED as a catastrophic error. Just flag it up as
> unexpected and keep going.
Does it need a Fixes: tag?
Regards,
Tvrtko
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
> .../i915/gt/uc/abi/guc_communication_ctb_abi.h | 8 +++++---
> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 18 ++++++++++++++++--
> 2 files changed, 21 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
> index df83c1cc7c7a6..28b8387f97b77 100644
> --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
> +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
> @@ -37,6 +37,7 @@
> * | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) |
> * | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) |
> * | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) |
> + * | | | - _`GUC_CTB_STATUS_UNUSED` = 8 (CTB is not in use) |
> * +---+-------+--------------------------------------------------------------+
> * |...| | RESERVED = MBZ |
> * +---+-------+--------------------------------------------------------------+
> @@ -49,9 +50,10 @@ struct guc_ct_buffer_desc {
> u32 tail;
> u32 status;
> #define GUC_CTB_STATUS_NO_ERROR 0
> -#define GUC_CTB_STATUS_OVERFLOW (1 << 0)
> -#define GUC_CTB_STATUS_UNDERFLOW (1 << 1)
> -#define GUC_CTB_STATUS_MISMATCH (1 << 2)
> +#define GUC_CTB_STATUS_OVERFLOW BIT(0)
> +#define GUC_CTB_STATUS_UNDERFLOW BIT(1)
> +#define GUC_CTB_STATUS_MISMATCH BIT(2)
> +#define GUC_CTB_STATUS_UNUSED BIT(3)
> u32 reserved[13];
> } __packed;
> static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index f01325cd1b625..11b5d4ddb19ce 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -816,8 +816,22 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
> if (unlikely(ctb->broken))
> return -EPIPE;
>
> - if (unlikely(desc->status))
> - goto corrupted;
> + if (unlikely(desc->status)) {
> + u32 status = desc->status;
> +
> + if (status & GUC_CTB_STATUS_UNUSED) {
> + /*
> + * Potentially valid if a CLIENT_RESET request resulted in
> + * contexts/engines being reset. But should never happen as
> + * no contexts should be active when CLIENT_RESET is sent.
> + */
> + CT_ERROR(ct, "Unexpected G2H after GuC has stopped!\n");
> + status &= ~GUC_CTB_STATUS_UNUSED;
> + }
> +
> + if (status)
> + goto corrupted;
> + }
>
> GEM_BUG_ON(head > size);
>
More information about the Intel-gfx
mailing list