[PATCH 1/6] drm/i915/guc: Handle GuC interaction in reset/suspend/unload scenarios
Michal Wajdeczko
michal.wajdeczko at intel.com
Thu Aug 3 10:27:16 UTC 2017
On Thu, Aug 03, 2017 at 01:50:19PM +0530, Sagar Arun Kamble wrote:
> Tearing down of guc_ggtt_invalidate/guc_interrupts/guc_communication
> setup should happen towards end of reset/suspend/unload as these are
> setup back again during recovery/resume/load.
>
> Prepared helper intel_guc_finish that will do teardown of this setup
> along with optional notification to GuC about suspension.
> Then, during unload we can rely on intel_guc_finish being done as part
> of gem_suspend for disabling these interfaces.
> But will have to disable GuC submission prior to suspend as that involves
> communication with GuC to destroy doorbell.
> Another helper intel_guc_prepare will be useful for rebuilding the setup
> which optionally resume GuC in case of RPM resume.
>
> drv_hangman at error-state-basic and gem_exec_suspend at basic-s3 caught
> below kernel bugs with GuC.
> kernel BUG at drivers/gpu/drm/i915/i915_gem_gtt.c:3086!
> GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
>
> This was happening since ggtt_disable_guc was not done prior to reset
> and suspend.
>
> drv_module_reload at basic-reload failure due to attempt to release doorbell
> post gem_suspend that resets GuC too.
> [drm] INTEL_GUC_SEND: Action 0x20 failed; ret=-110 status=0x00000020 response=0x00000000
>
> Also moving intel_guc_suspend and intel_guc_resume to intel_uc.c.
Maybe we should create separate intel_guc.c file and start moving guc-only
stuff there. The original purpose of intel_uc.c was to keep there code that
is related/reusable to all UC (uc_fw, uc_hw_init)
>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Tomi Sarvela <tomi.p.sarvela at intel.com>
> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.c | 4 +-
> drivers/gpu/drm/i915/i915_gem.c | 11 ++-
> drivers/gpu/drm/i915/i915_guc_submission.c | 58 ++--------------
> drivers/gpu/drm/i915/intel_guc_log.c | 3 +
> drivers/gpu/drm/i915/intel_pm.c | 2 +
> drivers/gpu/drm/i915/intel_uc.c | 106 +++++++++++++++++++++++++----
> drivers/gpu/drm/i915/intel_uc.h | 4 ++
> 7 files changed, 118 insertions(+), 70 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 214555e..9eee99e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -2485,7 +2485,7 @@ static int intel_runtime_suspend(struct device *kdev)
> */
> i915_gem_runtime_suspend(dev_priv);
>
> - intel_guc_suspend(dev_priv);
> + intel_guc_finish(dev_priv);
>
> intel_runtime_pm_disable_interrupts(dev_priv);
>
> @@ -2570,7 +2570,7 @@ static int intel_runtime_resume(struct device *kdev)
> if (intel_uncore_unclaimed_mmio(dev_priv))
> DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n");
>
> - intel_guc_resume(dev_priv);
> + intel_guc_prepare(dev_priv);
>
> if (IS_GEN9_LP(dev_priv)) {
> bxt_disable_dc9(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 000a764..89ff702 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2840,6 +2840,11 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
>
> i915_gem_revoke_fences(dev_priv);
>
> + if (i915.enable_guc_loading) {
> + intel_guc_finish(dev_priv);
> + dev_priv->guc.fw.load_status = INTEL_UC_FIRMWARE_PENDING;
Please move above "if(enable_guc_loading)" check and "load_status" change
into intel_guc_finish() function ...
> + }
> +
> return err;
> }
>
> @@ -4553,9 +4558,13 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
>
> assert_kernel_context_is_current(dev_priv);
> i915_gem_contexts_lost(dev_priv);
> + i915_guc_submission_disable(dev_priv);
> mutex_unlock(&dev->struct_mutex);
>
> - intel_guc_suspend(dev_priv);
> + if (i915.enable_guc_loading) {
> + intel_guc_finish(dev_priv);
> + dev_priv->guc.fw.load_status = INTEL_UC_FIRMWARE_PENDING;
> + }
... so this will be simpler
>
> cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
> cancel_delayed_work_sync(&dev_priv->gt.retire_work);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 48a1e93..48250c4 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -1280,63 +1280,15 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
> {
> struct intel_guc *guc = &dev_priv->guc;
>
> + if (!i915.enable_guc_submission)
> + return;
> +
> guc_interrupts_release(dev_priv);
>
> /* Revert back to manual ELSP submission */
> intel_engines_reset_default_submission(dev_priv);
>
> - guc_client_free(guc->execbuf_client);
> + if (guc->execbuf_client)
> + guc_client_free(guc->execbuf_client);
> guc->execbuf_client = NULL;
> }
> -
> -/**
> - * intel_guc_suspend() - notify GuC entering suspend state
> - * @dev_priv: i915 device private
> - */
> -int intel_guc_suspend(struct drm_i915_private *dev_priv)
> -{
> - struct intel_guc *guc = &dev_priv->guc;
> - struct i915_gem_context *ctx;
> - u32 data[3];
> -
> - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> - return 0;
> -
> - gen9_disable_guc_interrupts(dev_priv);
> -
> - ctx = dev_priv->kernel_context;
> -
> - data[0] = INTEL_GUC_ACTION_ENTER_S_STATE;
> - /* any value greater than GUC_POWER_D0 */
> - data[1] = GUC_POWER_D1;
> - /* first page is shared data with GuC */
> - data[2] = guc_ggtt_offset(ctx->engine[RCS].state);
> -
> - return intel_guc_send(guc, data, ARRAY_SIZE(data));
> -}
> -
> -/**
> - * intel_guc_resume() - notify GuC resuming from suspend state
> - * @dev_priv: i915 device private
> - */
> -int intel_guc_resume(struct drm_i915_private *dev_priv)
> -{
> - struct intel_guc *guc = &dev_priv->guc;
> - struct i915_gem_context *ctx;
> - u32 data[3];
> -
> - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> - return 0;
> -
> - if (i915.guc_log_level >= 0)
> - gen9_enable_guc_interrupts(dev_priv);
> -
> - ctx = dev_priv->kernel_context;
> -
> - data[0] = INTEL_GUC_ACTION_EXIT_S_STATE;
> - data[1] = GUC_POWER_D0;
> - /* first page is shared data with GuC */
> - data[2] = guc_ggtt_offset(ctx->engine[RCS].state);
> -
> - return intel_guc_send(guc, data, ARRAY_SIZE(data));
> -}
> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
> index 16d3b87..f18aac0 100644
> --- a/drivers/gpu/drm/i915/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
> @@ -655,8 +655,11 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
> return;
>
> mutex_lock(&dev_priv->drm.struct_mutex);
> + intel_runtime_pm_get(dev_priv);
> /* GuC logging is currently the only user of Guc2Host interrupts */
> gen9_disable_guc_interrupts(dev_priv);
> + intel_runtime_pm_put(dev_priv);
> +
> guc_log_runtime_destroy(&dev_priv->guc);
> mutex_unlock(&dev_priv->drm.struct_mutex);
> }
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 8711c1f..7a87bca 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7769,6 +7769,8 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
> intel_disable_gt_powersave(dev_priv);
>
> gen6_reset_rps_interrupts(dev_priv);
> + if (i915.enable_guc_loading)
> + gen9_disable_guc_interrupts(dev_priv);
> }
>
> void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 27e072c..5999b75 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -328,19 +328,106 @@ static void guc_disable_communication(struct intel_guc *guc)
> guc->send = intel_guc_send_nop;
> }
>
> -int intel_uc_init_hw(struct drm_i915_private *dev_priv)
> +/**
> + * intel_guc_suspend() - notify GuC entering suspend state
> + * @dev_priv: i915 device private
> + */
> +int intel_guc_suspend(struct drm_i915_private *dev_priv)
Note that all intel_guc_xxx() functions take intel_guc* as param, not dev_priv.
> {
> struct intel_guc *guc = &dev_priv->guc;
> - int ret, attempts;
> + struct i915_gem_context *ctx;
> + u32 data[3];
> +
> + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> + return 0;
> +
> + ctx = dev_priv->kernel_context;
> +
> + data[0] = INTEL_GUC_ACTION_ENTER_S_STATE;
> + /* any value greater than GUC_POWER_D0 */
> + data[1] = GUC_POWER_D1;
> + /* first page is shared data with GuC */
> + data[2] = guc_ggtt_offset(ctx->engine[RCS].state);
> +
> + return intel_guc_send(guc, data, ARRAY_SIZE(data));
> +}
> +
> +/*
> + * intel_guc_finish() - Teardown GuC communication and related
> + * functionality. Also sends action to GuC to suspend. Hence,
> + * caller of this should ensure GuC is alive.
This requirement for caller is inconsistent with code below where you
check for "enable_guc_loading" flag. And I think it is better to make
all checks here, not to rely on the caller.
> + */
> +int intel_guc_finish(struct drm_i915_private *dev_priv)
Hmm, maybe better name for this function would be "intel_uc_pause_hw()" ?
Otherwise we'll have two similar functions:
int intel_guc_finish(struct drm_i915_private *dev_priv)
int intel_uc_fini_hw(struct drm_i915_private *dev_priv)
> +{
> + int ret = 0;
>
> if (!i915.enable_guc_loading)
> + return ret;
> +
> + if (dev_priv->guc.fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
> + ret = intel_guc_suspend(dev_priv);
> +
> + gen9_disable_guc_interrupts(dev_priv);
> + guc_disable_communication(&dev_priv->guc);
We should disable communication *before* disabling irqs
> + i915_ggtt_disable_guc(dev_priv);
> +
> + return ret;
> +}
> +
> +/**
> + * intel_guc_resume() - notify GuC resuming from suspend state
> + * @dev_priv: i915 device private
> + */
> +int intel_guc_resume(struct drm_i915_private *dev_priv)
Use intel_guc* as param, and likely this function can be declared
as "static"
> +{
> + struct intel_guc *guc = &dev_priv->guc;
> + struct i915_gem_context *ctx;
> + u32 data[3];
> +
> + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> return 0;
>
> - guc_disable_communication(guc);
> - gen9_reset_guc_interrupts(dev_priv);
> + ctx = dev_priv->kernel_context;
> +
> + data[0] = INTEL_GUC_ACTION_EXIT_S_STATE;
> + data[1] = GUC_POWER_D0;
> + /* first page is shared data with GuC */
> + data[2] = guc_ggtt_offset(ctx->engine[RCS].state);
> +
> + return intel_guc_send(guc, data, ARRAY_SIZE(data));
> +}
> +
> +/*
> + * intel_guc_prepare() - Bring up GuC communication and related
> + * functionality. Sends action to GuC to resume.
> + */
> +int intel_guc_prepare(struct drm_i915_private *dev_priv)
Maybe "intel_uc_resume_hw()"
> +{
> + int ret = 0;
> +
> + if (!i915.enable_guc_loading)
> + return ret;
>
> - /* We need to notify the guc whenever we change the GGTT */
> i915_ggtt_enable_guc(dev_priv);
> + guc_enable_communication(&dev_priv->guc);
> + if (i915.guc_log_level >= 0)
> + gen9_enable_guc_interrupts(dev_priv);
> +
> + if (dev_priv->guc.fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
> + ret = intel_guc_resume(dev_priv);
> +
> + return ret;
> +}
> +
> +int intel_uc_init_hw(struct drm_i915_private *dev_priv)
> +{
> + struct intel_guc *guc = &dev_priv->guc;
> + int ret, attempts;
> +
> + if (!i915.enable_guc_loading)
> + return 0;
> +
> + intel_guc_prepare(dev_priv);
>
> if (i915.enable_guc_submission) {
> /*
> @@ -447,16 +534,7 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
> return;
>
> if (i915.enable_guc_submission)
> - i915_guc_submission_disable(dev_priv);
> -
> - guc_disable_communication(&dev_priv->guc);
> -
> - if (i915.enable_guc_submission) {
> - gen9_disable_guc_interrupts(dev_priv);
> i915_guc_submission_fini(dev_priv);
> - }
> -
> - i915_ggtt_disable_guc(dev_priv);
> }
>
> int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len)
> diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> index 22ae52b..5d7cc86 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -229,6 +229,10 @@ struct intel_huc {
> int intel_guc_sample_forcewake(struct intel_guc *guc);
> int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
> int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
> +int intel_guc_suspend(struct drm_i915_private *dev_priv);
> +int intel_guc_resume(struct drm_i915_private *dev_priv);
> +int intel_guc_finish(struct drm_i915_private *dev_priv);
> +int intel_guc_prepare(struct drm_i915_private *dev_priv);
I'm not sure that we need all functions declared as public.
Thanks,
Michal
>
> static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
> {
> --
> 1.9.1
>
More information about the Intel-gfx-trybot
mailing list