[PATCH v2] drm/xe: Always check force_wake_get return code
Upadhyay, Tejas
tejas.upadhyay at intel.com
Mon Mar 18 16:41:50 UTC 2024
> -----Original Message-----
> From: Ceraolo Spurio, Daniele <daniele.ceraolospurio at intel.com>
> Sent: Monday, March 18, 2024 9:19 PM
> To: intel-xe at lists.freedesktop.org
> Cc: Ceraolo Spurio, Daniele <daniele.ceraolospurio at intel.com>; Upadhyay,
> Tejas <tejas.upadhyay at intel.com>; Roper, Matthew D
> <matthew.d.roper at intel.com>
> Subject: [PATCH v2] drm/xe: Always check force_wake_get return code
>
> A force_wake_get failure means that the HW might not be awake for the
> access we're doing; this can lead to an immediate error or it can be a more
> subtle problem (e.g. a register read might return an incorrect value that is still
> valid, leading the driver to make a wrong choice instead of flagging an error).
> We avoid an error from the force_wake function because callers might handle
> or tolerate the error, but this only works if all callers are checking the error
> code. The majority already do, but a few are not.
> These are mainly falling into 3 categories, which are each handled
> differently:
>
> 1) error capture: in this case we want to continue the capture, but we
> log an info message in dmesg to notify the user that the capture
> might have incorrect data.
>
> 2) ioctl: in this case we return a -EIO error to userspace
>
> 3) unabortable actions: these are scenarios where we can't simply abort
> and retry and so it's better to just try it anyway because there is a
> chance the HW is awake even with the failure. In this case we throw a
> warning so we know there was a forcewake problem if something fails
> down the line.
>
> v2: use gt_WARN_ON where appropriate
>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Tejas Upadhyay <tejas.upadhyay at intel.com>
> Reviewed-by: Matt Roper <matthew.d.roper at intel.com>
> ---
> drivers/gpu/drm/xe/xe_devcoredump.c | 9 +++++++--
> drivers/gpu/drm/xe/xe_gsc.c | 2 +-
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 +-
> drivers/gpu/drm/xe/xe_guc.c | 5 +++--
> drivers/gpu/drm/xe/xe_guc_pc.c | 2 +-
> drivers/gpu/drm/xe/xe_guc_submit.c | 4 +++-
> drivers/gpu/drm/xe/xe_query.c | 3 ++-
> 7 files changed, 18 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c
> b/drivers/gpu/drm/xe/xe_devcoredump.c
> index 0fcd30680323..7d3aa6bd3524 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> @@ -13,6 +13,7 @@
> #include "xe_exec_queue.h"
> #include "xe_force_wake.h"
> #include "xe_gt.h"
> +#include "xe_gt_printk.h"
> #include "xe_guc_ct.h"
> #include "xe_guc_submit.h"
> #include "xe_hw_engine.h"
> @@ -64,7 +65,9 @@ static void xe_devcoredump_deferred_snap_work(struct
> work_struct *work) {
> struct xe_devcoredump_snapshot *ss = container_of(work,
> typeof(*ss), work);
>
> - xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
> + /* keep going if fw fails as we still want to save the memory and SW
> data */
> + if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
> + xe_gt_info(ss->gt, "failed to get forcewake for coredump
> capture\n");
> xe_vm_snapshot_capture_delayed(ss->vm);
> xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
> xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); @@ -
> 180,7 +183,9 @@ static void devcoredump_snapshot(struct xe_devcoredump
> *coredump,
> }
> }
>
> - xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
> + /* keep going if fw fails as we still want to save the memory and SW
> data */
> + if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL))
> + xe_gt_info(ss->gt, "failed to get forcewake for coredump
> capture\n");
>
> coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct,
> true);
> coredump->snapshot.ge =
> xe_guc_exec_queue_snapshot_capture(job);
> diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index
> 09f6e7899921..8339b0b49dfb 100644
> --- a/drivers/gpu/drm/xe/xe_gsc.c
> +++ b/drivers/gpu/drm/xe/xe_gsc.c
> @@ -326,7 +326,7 @@ static void gsc_work(struct work_struct *work)
> spin_unlock_irq(&gsc->lock);
>
> xe_pm_runtime_get(xe);
> - xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
> + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC));
>
> if (actions & GSC_ACTION_ER_COMPLETE) {
> ret = gsc_er_complete(gt);
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> index a3c4ffba679d..25b4111097bc 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> @@ -247,7 +247,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
>
> xe_gt_tlb_invalidation_wait(gt, seqno);
> } else if (xe_device_uc_enabled(xe)) {
> - xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt),
> XE_FW_GT));
> if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
> xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
>
> PVC_GUC_TLB_INV_DESC1_INVALIDATE);
> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index
> caa86ccbe9e7..9ed939c20602 100644
> --- a/drivers/gpu/drm/xe/xe_guc.c
> +++ b/drivers/gpu/drm/xe/xe_guc.c
> @@ -250,10 +250,11 @@ static void guc_write_params(struct xe_guc *guc)
> static void guc_fini(struct drm_device *drm, void *arg) {
> struct xe_guc *guc = arg;
> + struct xe_gt *gt = guc_to_gt(guc);
>
> - xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
> + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt),
> XE_FORCEWAKE_ALL));
> xe_uc_fini_hw(&guc_to_gt(guc)->uc);
> - xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
> + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
> }
>
> /**
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c
> b/drivers/gpu/drm/xe/xe_guc_pc.c index f4b031b8d9de..b4b39bfcb5d5
> 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
> @@ -927,7 +927,7 @@ static void xe_guc_pc_fini(struct drm_device *drm,
> void *arg)
> return;
> }
>
> - xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
> + XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)),
> +XE_FORCEWAKE_ALL));
Why no XE_gt_WARN_ON() here?
Thanks,
Tejas
> XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
> XE_WARN_ON(xe_guc_pc_stop(pc));
> xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c
> b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 82c955a2a15c..d51cb9a4a6b7 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -833,7 +833,9 @@ static void simple_error_capture(struct
> xe_exec_queue *q)
> }
> }
>
> - xe_force_wake_get(gt_to_fw(guc_to_gt(guc)),
> XE_FORCEWAKE_ALL);
> + if (xe_force_wake_get(gt_to_fw(guc_to_gt(guc)),
> XE_FORCEWAKE_ALL))
> + xe_gt_info(guc_to_gt(guc),
> + "failed to get forcewake for error capture");
> xe_guc_ct_print(&guc->ct, &p, true);
> guc_exec_queue_print(q, &p);
> for_each_hw_engine(hwe, guc_to_gt(guc), id) { diff --git
> a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index
> e80321b34918..fcd8680d2ccc 100644
> --- a/drivers/gpu/drm/xe/xe_query.c
> +++ b/drivers/gpu/drm/xe/xe_query.c
> @@ -148,7 +148,8 @@ query_engine_cycles(struct xe_device *xe,
> if (!hwe)
> return -EINVAL;
>
> - xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
> + if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL))
> + return -EIO;
>
> __read_timestamps(gt,
> RING_TIMESTAMP(hwe->mmio_base),
> --
> 2.43.0
More information about the Intel-xe
mailing list