[PATCH 1/6] drm/i915/guc: Handle GuC interaction in reset/suspend/unload scenarios

Kamble, Sagar A sagar.a.kamble at intel.com
Fri Aug 4 06:46:31 UTC 2017



-----Original Message-----
From: Wajdeczko, Michal 
Sent: Thursday, August 3, 2017 3:57 PM
To: Kamble, Sagar A <sagar.a.kamble at intel.com>
Cc: Intel-gfx-trybot at lists.freedesktop.org; Chris Wilson <chris at chris-wilson.co.uk>; Ceraolo Spurio, Daniele <daniele.ceraolospurio at intel.com>; Sarvela, Tomi P <tomi.p.sarvela at intel.com>
Subject: Re: [PATCH 1/6] drm/i915/guc: Handle GuC interaction in reset/suspend/unload scenarios

On Thu, Aug 03, 2017 at 01:50:19PM +0530, Sagar Arun Kamble wrote:
> Tearing down of guc_ggtt_invalidate/guc_interrupts/guc_communication
> setup should happen towards end of reset/suspend/unload as these are 
> setup back again during recovery/resume/load.
> 
> Prepared helper intel_guc_finish that will do teardown of this setup 
> along with optional notification to GuC about suspension.
> Then, during unload we can rely on intel_guc_finish being done as part 
> of gem_suspend for disabling these interfaces.
> But will have to disable GuC submission prior to suspend as that 
> involves communication with GuC to destroy doorbell.
> Another helper intel_guc_prepare will be useful for rebuilding the 
> setup which optionally resume GuC in case of RPM resume.
> 
> drv_hangman at error-state-basic and gem_exec_suspend at basic-s3 caught 
> below kernel bugs with GuC.
> kernel BUG at drivers/gpu/drm/i915/i915_gem_gtt.c:3086!
> GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
> 
> This was happening since ggtt_disable_guc was not done prior to reset 
> and suspend.
> 
> drv_module_reload at basic-reload failure due to attempt to release 
> doorbell post gem_suspend that resets GuC too.
> [drm] INTEL_GUC_SEND: Action 0x20 failed; ret=-110 status=0x00000020 
> response=0x00000000
> 
> Also moving intel_guc_suspend and intel_guc_resume to intel_uc.c.

Maybe we should create separate intel_guc.c file and start moving guc-only stuff there. The original purpose of intel_uc.c was to keep there code that is related/reusable to all UC (uc_fw, uc_hw_init)

<Sagar> Yes. Will do this.

> 
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Tomi Sarvela <tomi.p.sarvela at intel.com>
> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c            |   4 +-
>  drivers/gpu/drm/i915/i915_gem.c            |  11 ++-
>  drivers/gpu/drm/i915/i915_guc_submission.c |  58 ++--------------
>  drivers/gpu/drm/i915/intel_guc_log.c       |   3 +
>  drivers/gpu/drm/i915/intel_pm.c            |   2 +
>  drivers/gpu/drm/i915/intel_uc.c            | 106 +++++++++++++++++++++++++----
>  drivers/gpu/drm/i915/intel_uc.h            |   4 ++
>  7 files changed, 118 insertions(+), 70 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
> b/drivers/gpu/drm/i915/i915_drv.c index 214555e..9eee99e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -2485,7 +2485,7 @@ static int intel_runtime_suspend(struct device *kdev)
>  	 */
>  	i915_gem_runtime_suspend(dev_priv);
>  
> -	intel_guc_suspend(dev_priv);
> +	intel_guc_finish(dev_priv);
>  
>  	intel_runtime_pm_disable_interrupts(dev_priv);
>  
> @@ -2570,7 +2570,7 @@ static int intel_runtime_resume(struct device *kdev)
>  	if (intel_uncore_unclaimed_mmio(dev_priv))
>  		DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n");
>  
> -	intel_guc_resume(dev_priv);
> +	intel_guc_prepare(dev_priv);
>  
>  	if (IS_GEN9_LP(dev_priv)) {
>  		bxt_disable_dc9(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> b/drivers/gpu/drm/i915/i915_gem.c index 000a764..89ff702 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2840,6 +2840,11 @@ int i915_gem_reset_prepare(struct 
> drm_i915_private *dev_priv)
>  
>  	i915_gem_revoke_fences(dev_priv);
>  
> +	if (i915.enable_guc_loading) {
> +		intel_guc_finish(dev_priv);
> +		dev_priv->guc.fw.load_status = INTEL_UC_FIRMWARE_PENDING;

Please move above "if(enable_guc_loading)" check and "load_status" change into intel_guc_finish() function ...

<Sagar> ACK
> +	}
> +
>  	return err;
>  }
>  
> @@ -4553,9 +4558,13 @@ int i915_gem_suspend(struct drm_i915_private 
> *dev_priv)
>  
>  	assert_kernel_context_is_current(dev_priv);
>  	i915_gem_contexts_lost(dev_priv);
> +	i915_guc_submission_disable(dev_priv);
>  	mutex_unlock(&dev->struct_mutex);
>  
> -	intel_guc_suspend(dev_priv);
> +	if (i915.enable_guc_loading) {
> +		intel_guc_finish(dev_priv);
> +		dev_priv->guc.fw.load_status = INTEL_UC_FIRMWARE_PENDING;
> +	}

... so this will be simpler
<Sagar> ACK
>  
>  	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
>  	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
> b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 48a1e93..48250c4 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -1280,63 +1280,15 @@ void i915_guc_submission_disable(struct 
> drm_i915_private *dev_priv)  {
>  	struct intel_guc *guc = &dev_priv->guc;
>  
> +	if (!i915.enable_guc_submission)
> +		return;
> +
>  	guc_interrupts_release(dev_priv);
>  
>  	/* Revert back to manual ELSP submission */
>  	intel_engines_reset_default_submission(dev_priv);
>  
> -	guc_client_free(guc->execbuf_client);
> +	if (guc->execbuf_client)
> +		guc_client_free(guc->execbuf_client);
>  	guc->execbuf_client = NULL;
>  }
> -
> -/**
> - * intel_guc_suspend() - notify GuC entering suspend state
> - * @dev_priv:	i915 device private
> - */
> -int intel_guc_suspend(struct drm_i915_private *dev_priv) -{
> -	struct intel_guc *guc = &dev_priv->guc;
> -	struct i915_gem_context *ctx;
> -	u32 data[3];
> -
> -	if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> -		return 0;
> -
> -	gen9_disable_guc_interrupts(dev_priv);
> -
> -	ctx = dev_priv->kernel_context;
> -
> -	data[0] = INTEL_GUC_ACTION_ENTER_S_STATE;
> -	/* any value greater than GUC_POWER_D0 */
> -	data[1] = GUC_POWER_D1;
> -	/* first page is shared data with GuC */
> -	data[2] = guc_ggtt_offset(ctx->engine[RCS].state);
> -
> -	return intel_guc_send(guc, data, ARRAY_SIZE(data));
> -}
> -
> -/**
> - * intel_guc_resume() - notify GuC resuming from suspend state
> - * @dev_priv:	i915 device private
> - */
> -int intel_guc_resume(struct drm_i915_private *dev_priv) -{
> -	struct intel_guc *guc = &dev_priv->guc;
> -	struct i915_gem_context *ctx;
> -	u32 data[3];
> -
> -	if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> -		return 0;
> -
> -	if (i915.guc_log_level >= 0)
> -		gen9_enable_guc_interrupts(dev_priv);
> -
> -	ctx = dev_priv->kernel_context;
> -
> -	data[0] = INTEL_GUC_ACTION_EXIT_S_STATE;
> -	data[1] = GUC_POWER_D0;
> -	/* first page is shared data with GuC */
> -	data[2] = guc_ggtt_offset(ctx->engine[RCS].state);
> -
> -	return intel_guc_send(guc, data, ARRAY_SIZE(data));
> -}
> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c 
> b/drivers/gpu/drm/i915/intel_guc_log.c
> index 16d3b87..f18aac0 100644
> --- a/drivers/gpu/drm/i915/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
> @@ -655,8 +655,11 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
>  		return;
>  
>  	mutex_lock(&dev_priv->drm.struct_mutex);
> +	intel_runtime_pm_get(dev_priv);
>  	/* GuC logging is currently the only user of Guc2Host interrupts */
>  	gen9_disable_guc_interrupts(dev_priv);
> +	intel_runtime_pm_put(dev_priv);
> +
>  	guc_log_runtime_destroy(&dev_priv->guc);
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_pm.c 
> b/drivers/gpu/drm/i915/intel_pm.c index 8711c1f..7a87bca 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7769,6 +7769,8 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
>  	intel_disable_gt_powersave(dev_priv);
>  
>  	gen6_reset_rps_interrupts(dev_priv);
> +	if (i915.enable_guc_loading)
> +		gen9_disable_guc_interrupts(dev_priv);
>  }
>  
>  void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) 
> diff --git a/drivers/gpu/drm/i915/intel_uc.c 
> b/drivers/gpu/drm/i915/intel_uc.c index 27e072c..5999b75 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -328,19 +328,106 @@ static void guc_disable_communication(struct intel_guc *guc)
>  	guc->send = intel_guc_send_nop;
>  }
>  
> -int intel_uc_init_hw(struct drm_i915_private *dev_priv)
> +/**
> + * intel_guc_suspend() - notify GuC entering suspend state
> + * @dev_priv:	i915 device private
> + */
> +int intel_guc_suspend(struct drm_i915_private *dev_priv)

Note that all intel_guc_xxx() functions take intel_guc* as param, not dev_priv.
<Sagar> ACK
>  {
>  	struct intel_guc *guc = &dev_priv->guc;
> -	int ret, attempts;
> +	struct i915_gem_context *ctx;
> +	u32 data[3];
> +
> +	if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> +		return 0;
> +
> +	ctx = dev_priv->kernel_context;
> +
> +	data[0] = INTEL_GUC_ACTION_ENTER_S_STATE;
> +	/* any value greater than GUC_POWER_D0 */
> +	data[1] = GUC_POWER_D1;
> +	/* first page is shared data with GuC */
> +	data[2] = guc_ggtt_offset(ctx->engine[RCS].state);
> +
> +	return intel_guc_send(guc, data, ARRAY_SIZE(data)); }
> +
> +/*
> + * intel_guc_finish() - Teardown GuC communication and related
> + * functionality. Also sends action to GuC to suspend. Hence,
> + * caller of this should ensure GuC is alive.

This requirement for caller is inconsistent with code below where you check for "enable_guc_loading" flag. And I think it is better to make all checks here, not to rely on the caller.
<Sagar> ACK
> + */
> +int intel_guc_finish(struct drm_i915_private *dev_priv)

Hmm, maybe better name for this function would be "intel_uc_pause_hw()" ?
Otherwise we'll have two similar functions:
<Sagar> ACK
	int intel_guc_finish(struct drm_i915_private *dev_priv)
	int intel_uc_fini_hw(struct drm_i915_private *dev_priv)

> +{
> +	int ret = 0;
>  
>  	if (!i915.enable_guc_loading)
> +		return ret;
> +
> +	if (dev_priv->guc.fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
> +		ret = intel_guc_suspend(dev_priv);
> +
> +	gen9_disable_guc_interrupts(dev_priv);
> +	guc_disable_communication(&dev_priv->guc);

We should disable communication *before* disabling irqs
<Sagar> ACK
> +	i915_ggtt_disable_guc(dev_priv);
> +
> +	return ret;
> +}
> +
> +/**
> + * intel_guc_resume() - notify GuC resuming from suspend state
> + * @dev_priv:	i915 device private
> + */
> +int intel_guc_resume(struct drm_i915_private *dev_priv)

Use intel_guc* as param, and likely this function can be declared as "static"
<Sagar> ACK
> +{
> +	struct intel_guc *guc = &dev_priv->guc;
> +	struct i915_gem_context *ctx;
> +	u32 data[3];
> +
> +	if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
>  		return 0;
>  
> -	guc_disable_communication(guc);
> -	gen9_reset_guc_interrupts(dev_priv);
> +	ctx = dev_priv->kernel_context;
> +
> +	data[0] = INTEL_GUC_ACTION_EXIT_S_STATE;
> +	data[1] = GUC_POWER_D0;
> +	/* first page is shared data with GuC */
> +	data[2] = guc_ggtt_offset(ctx->engine[RCS].state);
> +
> +	return intel_guc_send(guc, data, ARRAY_SIZE(data)); }
> +
> +/*
> + * intel_guc_prepare() - Bring up GuC communication and related
> + * functionality. Sends action to GuC to resume.
> + */
> +int intel_guc_prepare(struct drm_i915_private *dev_priv)

Maybe "intel_uc_resume_hw()"
<Sagar> ACK
> +{
> +	int ret = 0;
> +
> +	if (!i915.enable_guc_loading)
> +		return ret;
>  
> -	/* We need to notify the guc whenever we change the GGTT */
>  	i915_ggtt_enable_guc(dev_priv);
> +	guc_enable_communication(&dev_priv->guc);
> +	if (i915.guc_log_level >= 0)
> +		gen9_enable_guc_interrupts(dev_priv);
> +
> +	if (dev_priv->guc.fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
> +		ret = intel_guc_resume(dev_priv);
> +
> +	return ret;
> +}
> +
> +int intel_uc_init_hw(struct drm_i915_private *dev_priv) {
> +	struct intel_guc *guc = &dev_priv->guc;
> +	int ret, attempts;
> +
> +	if (!i915.enable_guc_loading)
> +		return 0;
> +
> +	intel_guc_prepare(dev_priv);
>  
>  	if (i915.enable_guc_submission) {
>  		/*
> @@ -447,16 +534,7 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
>  		return;
>  
>  	if (i915.enable_guc_submission)
> -		i915_guc_submission_disable(dev_priv);
> -
> -	guc_disable_communication(&dev_priv->guc);
> -
> -	if (i915.enable_guc_submission) {
> -		gen9_disable_guc_interrupts(dev_priv);
>  		i915_guc_submission_fini(dev_priv);
> -	}
> -
> -	i915_ggtt_disable_guc(dev_priv);
>  }
>  
>  int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 
> len) diff --git a/drivers/gpu/drm/i915/intel_uc.h 
> b/drivers/gpu/drm/i915/intel_uc.h index 22ae52b..5d7cc86 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -229,6 +229,10 @@ struct intel_huc {  int 
> intel_guc_sample_forcewake(struct intel_guc *guc);  int 
> intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);  
> int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 
> len);
> +int intel_guc_suspend(struct drm_i915_private *dev_priv); int 
> +intel_guc_resume(struct drm_i915_private *dev_priv); int 
> +intel_guc_finish(struct drm_i915_private *dev_priv); int 
> +intel_guc_prepare(struct drm_i915_private *dev_priv);

I'm not sure that we need all functions declared as public.

<Sagar> Will expose needed ones.

Thanks,
Michal

>  
>  static inline int intel_guc_send(struct intel_guc *guc, const u32 
> *action, u32 len)  {
> --
> 1.9.1
> 


More information about the Intel-gfx-trybot mailing list