[PATCH 1/6] drm/i915/guc: Handle GuC interaction in reset/suspend/unload scenarios
Kamble, Sagar A
sagar.a.kamble at intel.com
Fri Aug 4 06:46:31 UTC 2017
-----Original Message-----
From: Wajdeczko, Michal
Sent: Thursday, August 3, 2017 3:57 PM
To: Kamble, Sagar A <sagar.a.kamble at intel.com>
Cc: Intel-gfx-trybot at lists.freedesktop.org; Chris Wilson <chris at chris-wilson.co.uk>; Ceraolo Spurio, Daniele <daniele.ceraolospurio at intel.com>; Sarvela, Tomi P <tomi.p.sarvela at intel.com>
Subject: Re: [PATCH 1/6] drm/i915/guc: Handle GuC interaction in reset/suspend/unload scenarios
On Thu, Aug 03, 2017 at 01:50:19PM +0530, Sagar Arun Kamble wrote:
> Tearing down of guc_ggtt_invalidate/guc_interrupts/guc_communication
> setup should happen towards end of reset/suspend/unload as these are
> setup back again during recovery/resume/load.
>
> Prepared helper intel_guc_finish that will do teardown of this setup
> along with optional notification to GuC about suspension.
> Then, during unload we can rely on intel_guc_finish being done as part
> of gem_suspend for disabling these interfaces.
> But will have to disable GuC submission prior to suspend as that
> involves communication with GuC to destroy doorbell.
> Another helper intel_guc_prepare will be useful for rebuilding the
> setup which optionally resume GuC in case of RPM resume.
>
> drv_hangman at error-state-basic and gem_exec_suspend at basic-s3 caught
> below kernel bugs with GuC.
> kernel BUG at drivers/gpu/drm/i915/i915_gem_gtt.c:3086!
> GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
>
> This was happening since ggtt_disable_guc was not done prior to reset
> and suspend.
>
> drv_module_reload at basic-reload failure due to attempt to release
> doorbell post gem_suspend that resets GuC too.
> [drm] INTEL_GUC_SEND: Action 0x20 failed; ret=-110 status=0x00000020
> response=0x00000000
>
> Also moving intel_guc_suspend and intel_guc_resume to intel_uc.c.
Maybe we should create separate intel_guc.c file and start moving guc-only stuff there. The original purpose of intel_uc.c was to keep there code that is related/reusable to all UC (uc_fw, uc_hw_init)
<Sagar> Yes. Will do this.
>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Tomi Sarvela <tomi.p.sarvela at intel.com>
> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.c | 4 +-
> drivers/gpu/drm/i915/i915_gem.c | 11 ++-
> drivers/gpu/drm/i915/i915_guc_submission.c | 58 ++--------------
> drivers/gpu/drm/i915/intel_guc_log.c | 3 +
> drivers/gpu/drm/i915/intel_pm.c | 2 +
> drivers/gpu/drm/i915/intel_uc.c | 106 +++++++++++++++++++++++++----
> drivers/gpu/drm/i915/intel_uc.h | 4 ++
> 7 files changed, 118 insertions(+), 70 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c
> b/drivers/gpu/drm/i915/i915_drv.c index 214555e..9eee99e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -2485,7 +2485,7 @@ static int intel_runtime_suspend(struct device *kdev)
> */
> i915_gem_runtime_suspend(dev_priv);
>
> - intel_guc_suspend(dev_priv);
> + intel_guc_finish(dev_priv);
>
> intel_runtime_pm_disable_interrupts(dev_priv);
>
> @@ -2570,7 +2570,7 @@ static int intel_runtime_resume(struct device *kdev)
> if (intel_uncore_unclaimed_mmio(dev_priv))
> DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n");
>
> - intel_guc_resume(dev_priv);
> + intel_guc_prepare(dev_priv);
>
> if (IS_GEN9_LP(dev_priv)) {
> bxt_disable_dc9(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c
> b/drivers/gpu/drm/i915/i915_gem.c index 000a764..89ff702 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2840,6 +2840,11 @@ int i915_gem_reset_prepare(struct
> drm_i915_private *dev_priv)
>
> i915_gem_revoke_fences(dev_priv);
>
> + if (i915.enable_guc_loading) {
> + intel_guc_finish(dev_priv);
> + dev_priv->guc.fw.load_status = INTEL_UC_FIRMWARE_PENDING;
Please move above "if(enable_guc_loading)" check and "load_status" change into intel_guc_finish() function ...
<Sagar> ACK
> + }
> +
> return err;
> }
>
> @@ -4553,9 +4558,13 @@ int i915_gem_suspend(struct drm_i915_private
> *dev_priv)
>
> assert_kernel_context_is_current(dev_priv);
> i915_gem_contexts_lost(dev_priv);
> + i915_guc_submission_disable(dev_priv);
> mutex_unlock(&dev->struct_mutex);
>
> - intel_guc_suspend(dev_priv);
> + if (i915.enable_guc_loading) {
> + intel_guc_finish(dev_priv);
> + dev_priv->guc.fw.load_status = INTEL_UC_FIRMWARE_PENDING;
> + }
... so this will be simpler
<Sagar> ACK
>
> cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
> cancel_delayed_work_sync(&dev_priv->gt.retire_work);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
> b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 48a1e93..48250c4 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -1280,63 +1280,15 @@ void i915_guc_submission_disable(struct
> drm_i915_private *dev_priv) {
> struct intel_guc *guc = &dev_priv->guc;
>
> + if (!i915.enable_guc_submission)
> + return;
> +
> guc_interrupts_release(dev_priv);
>
> /* Revert back to manual ELSP submission */
> intel_engines_reset_default_submission(dev_priv);
>
> - guc_client_free(guc->execbuf_client);
> + if (guc->execbuf_client)
> + guc_client_free(guc->execbuf_client);
> guc->execbuf_client = NULL;
> }
> -
> -/**
> - * intel_guc_suspend() - notify GuC entering suspend state
> - * @dev_priv: i915 device private
> - */
> -int intel_guc_suspend(struct drm_i915_private *dev_priv) -{
> - struct intel_guc *guc = &dev_priv->guc;
> - struct i915_gem_context *ctx;
> - u32 data[3];
> -
> - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> - return 0;
> -
> - gen9_disable_guc_interrupts(dev_priv);
> -
> - ctx = dev_priv->kernel_context;
> -
> - data[0] = INTEL_GUC_ACTION_ENTER_S_STATE;
> - /* any value greater than GUC_POWER_D0 */
> - data[1] = GUC_POWER_D1;
> - /* first page is shared data with GuC */
> - data[2] = guc_ggtt_offset(ctx->engine[RCS].state);
> -
> - return intel_guc_send(guc, data, ARRAY_SIZE(data));
> -}
> -
> -/**
> - * intel_guc_resume() - notify GuC resuming from suspend state
> - * @dev_priv: i915 device private
> - */
> -int intel_guc_resume(struct drm_i915_private *dev_priv) -{
> - struct intel_guc *guc = &dev_priv->guc;
> - struct i915_gem_context *ctx;
> - u32 data[3];
> -
> - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> - return 0;
> -
> - if (i915.guc_log_level >= 0)
> - gen9_enable_guc_interrupts(dev_priv);
> -
> - ctx = dev_priv->kernel_context;
> -
> - data[0] = INTEL_GUC_ACTION_EXIT_S_STATE;
> - data[1] = GUC_POWER_D0;
> - /* first page is shared data with GuC */
> - data[2] = guc_ggtt_offset(ctx->engine[RCS].state);
> -
> - return intel_guc_send(guc, data, ARRAY_SIZE(data));
> -}
> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c
> b/drivers/gpu/drm/i915/intel_guc_log.c
> index 16d3b87..f18aac0 100644
> --- a/drivers/gpu/drm/i915/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
> @@ -655,8 +655,11 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
> return;
>
> mutex_lock(&dev_priv->drm.struct_mutex);
> + intel_runtime_pm_get(dev_priv);
> /* GuC logging is currently the only user of Guc2Host interrupts */
> gen9_disable_guc_interrupts(dev_priv);
> + intel_runtime_pm_put(dev_priv);
> +
> guc_log_runtime_destroy(&dev_priv->guc);
> mutex_unlock(&dev_priv->drm.struct_mutex);
> }
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c index 8711c1f..7a87bca 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7769,6 +7769,8 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
> intel_disable_gt_powersave(dev_priv);
>
> gen6_reset_rps_interrupts(dev_priv);
> + if (i915.enable_guc_loading)
> + gen9_disable_guc_interrupts(dev_priv);
> }
>
> void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_uc.c
> b/drivers/gpu/drm/i915/intel_uc.c index 27e072c..5999b75 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -328,19 +328,106 @@ static void guc_disable_communication(struct intel_guc *guc)
> guc->send = intel_guc_send_nop;
> }
>
> -int intel_uc_init_hw(struct drm_i915_private *dev_priv)
> +/**
> + * intel_guc_suspend() - notify GuC entering suspend state
> + * @dev_priv: i915 device private
> + */
> +int intel_guc_suspend(struct drm_i915_private *dev_priv)
Note that all intel_guc_xxx() functions take intel_guc* as param, not dev_priv.
<Sagar> ACK
> {
> struct intel_guc *guc = &dev_priv->guc;
> - int ret, attempts;
> + struct i915_gem_context *ctx;
> + u32 data[3];
> +
> + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> + return 0;
> +
> + ctx = dev_priv->kernel_context;
> +
> + data[0] = INTEL_GUC_ACTION_ENTER_S_STATE;
> + /* any value greater than GUC_POWER_D0 */
> + data[1] = GUC_POWER_D1;
> + /* first page is shared data with GuC */
> + data[2] = guc_ggtt_offset(ctx->engine[RCS].state);
> +
> + return intel_guc_send(guc, data, ARRAY_SIZE(data)); }
> +
> +/*
> + * intel_guc_finish() - Teardown GuC communication and related
> + * functionality. Also sends action to GuC to suspend. Hence,
> + * caller of this should ensure GuC is alive.
This requirement for caller is inconsistent with code below where you check for "enable_guc_loading" flag. And I think it is better to make all checks here, not to rely on the caller.
<Sagar> ACK
> + */
> +int intel_guc_finish(struct drm_i915_private *dev_priv)
Hmm, maybe better name for this function would be "intel_uc_pause_hw()" ?
Otherwise we'll have two similar functions:
<Sagar> ACK
int intel_guc_finish(struct drm_i915_private *dev_priv)
int intel_uc_fini_hw(struct drm_i915_private *dev_priv)
> +{
> + int ret = 0;
>
> if (!i915.enable_guc_loading)
> + return ret;
> +
> + if (dev_priv->guc.fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
> + ret = intel_guc_suspend(dev_priv);
> +
> + gen9_disable_guc_interrupts(dev_priv);
> + guc_disable_communication(&dev_priv->guc);
We should disable communication *before* disabling irqs
<Sagar> ACK
> + i915_ggtt_disable_guc(dev_priv);
> +
> + return ret;
> +}
> +
> +/**
> + * intel_guc_resume() - notify GuC resuming from suspend state
> + * @dev_priv: i915 device private
> + */
> +int intel_guc_resume(struct drm_i915_private *dev_priv)
Use intel_guc* as param, and likely this function can be declared as "static"
<Sagar> ACK
> +{
> + struct intel_guc *guc = &dev_priv->guc;
> + struct i915_gem_context *ctx;
> + u32 data[3];
> +
> + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> return 0;
>
> - guc_disable_communication(guc);
> - gen9_reset_guc_interrupts(dev_priv);
> + ctx = dev_priv->kernel_context;
> +
> + data[0] = INTEL_GUC_ACTION_EXIT_S_STATE;
> + data[1] = GUC_POWER_D0;
> + /* first page is shared data with GuC */
> + data[2] = guc_ggtt_offset(ctx->engine[RCS].state);
> +
> + return intel_guc_send(guc, data, ARRAY_SIZE(data)); }
> +
> +/*
> + * intel_guc_prepare() - Bring up GuC communication and related
> + * functionality. Sends action to GuC to resume.
> + */
> +int intel_guc_prepare(struct drm_i915_private *dev_priv)
Maybe "intel_uc_resume_hw()"
<Sagar> ACK
> +{
> + int ret = 0;
> +
> + if (!i915.enable_guc_loading)
> + return ret;
>
> - /* We need to notify the guc whenever we change the GGTT */
> i915_ggtt_enable_guc(dev_priv);
> + guc_enable_communication(&dev_priv->guc);
> + if (i915.guc_log_level >= 0)
> + gen9_enable_guc_interrupts(dev_priv);
> +
> + if (dev_priv->guc.fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
> + ret = intel_guc_resume(dev_priv);
> +
> + return ret;
> +}
> +
> +int intel_uc_init_hw(struct drm_i915_private *dev_priv) {
> + struct intel_guc *guc = &dev_priv->guc;
> + int ret, attempts;
> +
> + if (!i915.enable_guc_loading)
> + return 0;
> +
> + intel_guc_prepare(dev_priv);
>
> if (i915.enable_guc_submission) {
> /*
> @@ -447,16 +534,7 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
> return;
>
> if (i915.enable_guc_submission)
> - i915_guc_submission_disable(dev_priv);
> -
> - guc_disable_communication(&dev_priv->guc);
> -
> - if (i915.enable_guc_submission) {
> - gen9_disable_guc_interrupts(dev_priv);
> i915_guc_submission_fini(dev_priv);
> - }
> -
> - i915_ggtt_disable_guc(dev_priv);
> }
>
> int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32
> len) diff --git a/drivers/gpu/drm/i915/intel_uc.h
> b/drivers/gpu/drm/i915/intel_uc.h index 22ae52b..5d7cc86 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -229,6 +229,10 @@ struct intel_huc { int
> intel_guc_sample_forcewake(struct intel_guc *guc); int
> intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
> int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32
> len);
> +int intel_guc_suspend(struct drm_i915_private *dev_priv); int
> +intel_guc_resume(struct drm_i915_private *dev_priv); int
> +intel_guc_finish(struct drm_i915_private *dev_priv); int
> +intel_guc_prepare(struct drm_i915_private *dev_priv);
I'm not sure that we need all functions declared as public.
<Sagar> Will expose needed ones.
Thanks,
Michal
>
> static inline int intel_guc_send(struct intel_guc *guc, const u32
> *action, u32 len) {
> --
> 1.9.1
>
More information about the Intel-gfx-trybot
mailing list