[Intel-xe] [PATCH 2/2] RFC drm/xe: Disable GuC CT communication for D3Hot Transition

Ceraolo Spurio, Daniele daniele.ceraolospurio at intel.com
Thu Aug 31 19:02:31 UTC 2023



On 8/22/2023 10:09 PM, Riana Tauro wrote:
> During Runtime suspend, GuC is reset for both D0->D3hot/D3Cold
> transistions. It is not necessary for GuC to reset for D0 -> D3hot,
> only enable/disable ctb communication.
>
> Add a function that enables/disables CT communication when d3cold
> is not allowed.
>
> Signed-off-by: Riana Tauro <riana.tauro at intel.com>

xe_gt_suspend and xe_gt_resume do more things than just resetting the 
GuC (e.g. marking submission as disabled). Shouldn't we need at least 
some of that in the runtime suspend/resume scenario as well?

Also, which paths and how much we're actually looking to optimize? Are 
we interested in optimizing the full suspend as well?
To elaborate, we're not actually killing the GuC in the suspend flow, 
we're just resetting its SW state (i.e. putting it back as if it had 
just been loaded); the actual reset happens in the resume path in 
xe_uc_init_hw. If the GuC and the LMEM have survived the suspend/resume 
flow, we could theoretically just restart the SW side of things 
(re-register the CTBs and start SLPC) in the resume path instead of 
resetting and reloading the FW; this would be a lesser benefit to the 
runtime path compared to what you're proposing, but it could benefit the 
full resume path as well and it'd have the added benefit of keeping the 
2 paths the same. I'm not sure though if the LMEM management could end 
up breaking this approach by moving the memory around during suspend.


A couple of minor comments inline

> ---
>   drivers/gpu/drm/xe/xe_gt.c | 56 ++++++++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/xe/xe_gt.h |  2 ++
>   drivers/gpu/drm/xe/xe_pm.c | 10 +++++--
>   drivers/gpu/drm/xe/xe_uc.c | 18 ++++++++++++
>   drivers/gpu/drm/xe/xe_uc.h |  2 ++
>   5 files changed, 85 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 13320af4ddd3..0d52621ce64d 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -676,6 +676,62 @@ int xe_gt_resume(struct xe_gt *gt)
>   	return err;
>   }
>   
> +int xe_gt_runtime_suspend(struct xe_gt *gt)
> +{
> +	struct xe_device *xe = gt_to_xe(gt);
> +	int err;
> +
> +	if (xe->d3cold.allowed)
> +		return xe_gt_suspend(gt);
> +
> +	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> +	if (err)
> +		return err;
> +
> +	err = xe_uc_disable_communication(&gt->uc);
> +	if (err)
> +		goto err_force_wake;

uc_stop() might already cover what's needed (or could be expanded to do 
so), although unfortunately uc_start seems to not be matching and 
therefore not usable as-is for the resume side.

> +
> +	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
> +	xe_gt_info(gt, "suspended\n");
> +
> +	return 0;
> +
> +err_force_wake:
> +	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
> +	xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
> +
> +	return err;
> +}
> +
> +int xe_gt_runtime_resume(struct xe_gt *gt)
> +{
> +	struct xe_device *xe = gt_to_xe(gt);
> +	int err;
> +
> +	if (xe->d3cold.allowed)
> +		return xe_gt_resume(gt);
> +
> +	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> +	if (err)
> +		return err;
> +
> +	err = xe_uc_resume(&gt->uc);
> +	if (err)
> +		goto err_force_wake;
> +
> +	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
> +	xe_gt_info(gt, "resumed\n");
> +
> +	return 0;
> +
> +err_force_wake:
> +	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
> +	xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
> +
> +	return err;
> +}
> +
>   struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
>   				     enum xe_engine_class class,
>   				     u16 instance, bool logical)
> diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
> index caded203a8a0..e6574e51004f 100644
> --- a/drivers/gpu/drm/xe/xe_gt.h
> +++ b/drivers/gpu/drm/xe/xe_gt.h
> @@ -37,6 +37,8 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt);
>   void xe_gt_suspend_prepare(struct xe_gt *gt);
>   int xe_gt_suspend(struct xe_gt *gt);
>   int xe_gt_resume(struct xe_gt *gt);
> +int xe_gt_runtime_suspend(struct xe_gt *gt);
> +int xe_gt_runtime_resume(struct xe_gt *gt);
>   void xe_gt_reset_async(struct xe_gt *gt);
>   void xe_gt_sanitize(struct xe_gt *gt);
>   
> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
> index 0f06d8304e17..6bc01bb45fc2 100644
> --- a/drivers/gpu/drm/xe/xe_pm.c
> +++ b/drivers/gpu/drm/xe/xe_pm.c
> @@ -245,7 +245,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
>   	}
>   
>   	for_each_gt(gt, xe, id) {
> -		err = xe_gt_suspend(gt);
> +		err = xe_gt_runtime_suspend(gt);
>   		if (err)
>   			goto out;
>   	}
> @@ -294,14 +294,18 @@ int xe_pm_runtime_resume(struct xe_device *xe)
>   
>   	xe_irq_resume(xe);
>   
> -	for_each_gt(gt, xe, id)
> -		xe_gt_resume(gt);
> +	for_each_gt(gt, xe, id) {
> +		err = xe_gt_runtime_resume(gt);
> +		if (err)
> +			goto out;
> +	}
>   
>   	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
>   		err = xe_bo_restore_user(xe);
>   		if (err)
>   			goto out;
>   	}
> +
>   out:
>   	lock_map_release(&xe_device_mem_access_lockdep_map);
>   	xe_pm_write_callback_task(xe, NULL);
> diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
> index addd6f2681b9..b5b53c8c3edb 100644
> --- a/drivers/gpu/drm/xe/xe_uc.c
> +++ b/drivers/gpu/drm/xe/xe_uc.c
> @@ -216,6 +216,15 @@ static void uc_reset_wait(struct xe_uc *uc)
>   		goto again;
>   }
>   
> +int xe_uc_disable_communication(struct xe_uc *uc)
> +{
> +	/* GuC submission not enabled, nothing to do */
> +	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
> +		return 0;
> +
> +	return xe_guc_disable_communication(&uc->guc);
> +}
> +
>   int xe_uc_suspend(struct xe_uc *uc)
>   {
>   	int ret;
> @@ -232,3 +241,12 @@ int xe_uc_suspend(struct xe_uc *uc)
>   
>   	return xe_guc_suspend(&uc->guc);
>   }
> +
> +int xe_uc_resume(struct xe_uc *uc)

This should be called runtime_resume.

Daniele

> +{
> +	/* GuC submission not enabled, nothing to do */
> +	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
> +		return 0;
> +
> +	return xe_guc_enable_communication(&uc->guc);
> +}
> diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
> index 42219b361df5..29bd692d8800 100644
> --- a/drivers/gpu/drm/xe/xe_uc.h
> +++ b/drivers/gpu/drm/xe/xe_uc.h
> @@ -12,8 +12,10 @@ int xe_uc_init(struct xe_uc *uc);
>   int xe_uc_init_hwconfig(struct xe_uc *uc);
>   int xe_uc_init_post_hwconfig(struct xe_uc *uc);
>   int xe_uc_init_hw(struct xe_uc *uc);
> +int xe_uc_disable_communication(struct xe_uc *uc);
>   void xe_uc_gucrc_disable(struct xe_uc *uc);
>   int xe_uc_reset_prepare(struct xe_uc *uc);
> +int xe_uc_resume(struct xe_uc *uc);
>   void xe_uc_stop_prepare(struct xe_uc *uc);
>   int xe_uc_stop(struct xe_uc *uc);
>   int xe_uc_start(struct xe_uc *uc);



More information about the Intel-xe mailing list