[Intel-xe] [PATCH 2/2] RFC drm/xe: Disable GuC CT communication for D3Hot Transition

Rodrigo Vivi rodrigo.vivi at intel.com
Thu Aug 31 19:21:18 UTC 2023


On Thu, Aug 31, 2023 at 12:02:31PM -0700, Ceraolo Spurio, Daniele wrote:
> 
> 
> On 8/22/2023 10:09 PM, Riana Tauro wrote:
> > During Runtime suspend, GuC is reset for both D0->D3hot/D3Cold
> > transistions. It is not necessary for GuC to reset for D0 -> D3hot,
> > only enable/disable ctb communication.
> > 
> > Add a function that enables/disables CT communication when d3cold
> > is not allowed.
> > 
> > Signed-off-by: Riana Tauro <riana.tauro at intel.com>
> 
> xe_gt_suspend and xe_gt_resume do more things than just resetting the GuC
> (e.g. marking submission as disabled). Shouldn't we need at least some of
> that in the runtime suspend/resume scenario as well?

I was asking myself the same thing...

> 
> Also, which paths and how much we're actually looking to optimize? Are we
> interested in optimizing the full suspend as well?
> To elaborate, we're not actually killing the GuC in the suspend flow, we're
> just resetting its SW state (i.e. putting it back as if it had just been
> loaded); the actual reset happens in the resume path in xe_uc_init_hw. If
> the GuC and the LMEM have survived the suspend/resume flow, we could

On S3/S2idle/S4, GuC and LMEM will lose power.

> theoretically just restart the SW side of things (re-register the CTBs and
> start SLPC) in the resume path instead of resetting and reloading the FW;
> this would be a lesser benefit to the runtime path compared to what you're
> proposing, but it could benefit the full resume path as well and it'd have
> the added benefit of keeping the 2 paths the same. I'm not sure though if
> the LMEM management could end up breaking this approach by moving the memory
> around during suspend.

this memory movement is indeed the hardest part with all the locking...

> 
> 
> A couple of minor comments inline
> 
> > ---
> >   drivers/gpu/drm/xe/xe_gt.c | 56 ++++++++++++++++++++++++++++++++++++++
> >   drivers/gpu/drm/xe/xe_gt.h |  2 ++
> >   drivers/gpu/drm/xe/xe_pm.c | 10 +++++--
> >   drivers/gpu/drm/xe/xe_uc.c | 18 ++++++++++++
> >   drivers/gpu/drm/xe/xe_uc.h |  2 ++
> >   5 files changed, 85 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> > index 13320af4ddd3..0d52621ce64d 100644
> > --- a/drivers/gpu/drm/xe/xe_gt.c
> > +++ b/drivers/gpu/drm/xe/xe_gt.c
> > @@ -676,6 +676,62 @@ int xe_gt_resume(struct xe_gt *gt)
> >   	return err;
> >   }
> > +int xe_gt_runtime_suspend(struct xe_gt *gt)
> > +{
> > +	struct xe_device *xe = gt_to_xe(gt);
> > +	int err;
> > +
> > +	if (xe->d3cold.allowed)
> > +		return xe_gt_suspend(gt);
> > +
> > +	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> > +	if (err)
> > +		return err;
> > +
> > +	err = xe_uc_disable_communication(&gt->uc);
> > +	if (err)
> > +		goto err_force_wake;
> 
> uc_stop() might already cover what's needed (or could be expanded to do so),
> although unfortunately uc_start seems to not be matching and therefore not
> usable as-is for the resume side.
> 
> > +
> > +	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
> > +	xe_gt_info(gt, "suspended\n");
> > +
> > +	return 0;
> > +
> > +err_force_wake:
> > +	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
> > +	xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
> > +
> > +	return err;
> > +}
> > +
> > +int xe_gt_runtime_resume(struct xe_gt *gt)
> > +{
> > +	struct xe_device *xe = gt_to_xe(gt);
> > +	int err;
> > +
> > +	if (xe->d3cold.allowed)
> > +		return xe_gt_resume(gt);
> > +
> > +	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> > +	if (err)
> > +		return err;
> > +
> > +	err = xe_uc_resume(&gt->uc);
> > +	if (err)
> > +		goto err_force_wake;
> > +
> > +	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
> > +	xe_gt_info(gt, "resumed\n");
> > +
> > +	return 0;
> > +
> > +err_force_wake:
> > +	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
> > +	xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
> > +
> > +	return err;
> > +}
> > +
> >   struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
> >   				     enum xe_engine_class class,
> >   				     u16 instance, bool logical)
> > diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
> > index caded203a8a0..e6574e51004f 100644
> > --- a/drivers/gpu/drm/xe/xe_gt.h
> > +++ b/drivers/gpu/drm/xe/xe_gt.h
> > @@ -37,6 +37,8 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt);
> >   void xe_gt_suspend_prepare(struct xe_gt *gt);
> >   int xe_gt_suspend(struct xe_gt *gt);
> >   int xe_gt_resume(struct xe_gt *gt);
> > +int xe_gt_runtime_suspend(struct xe_gt *gt);
> > +int xe_gt_runtime_resume(struct xe_gt *gt);
> >   void xe_gt_reset_async(struct xe_gt *gt);
> >   void xe_gt_sanitize(struct xe_gt *gt);
> > diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
> > index 0f06d8304e17..6bc01bb45fc2 100644
> > --- a/drivers/gpu/drm/xe/xe_pm.c
> > +++ b/drivers/gpu/drm/xe/xe_pm.c
> > @@ -245,7 +245,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
> >   	}
> >   	for_each_gt(gt, xe, id) {
> > -		err = xe_gt_suspend(gt);
> > +		err = xe_gt_runtime_suspend(gt);
> >   		if (err)
> >   			goto out;
> >   	}
> > @@ -294,14 +294,18 @@ int xe_pm_runtime_resume(struct xe_device *xe)
> >   	xe_irq_resume(xe);
> > -	for_each_gt(gt, xe, id)
> > -		xe_gt_resume(gt);
> > +	for_each_gt(gt, xe, id) {
> > +		err = xe_gt_runtime_resume(gt);
> > +		if (err)
> > +			goto out;
> > +	}
> >   	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
> >   		err = xe_bo_restore_user(xe);
> >   		if (err)
> >   			goto out;
> >   	}
> > +
> >   out:
> >   	lock_map_release(&xe_device_mem_access_lockdep_map);
> >   	xe_pm_write_callback_task(xe, NULL);
> > diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
> > index addd6f2681b9..b5b53c8c3edb 100644
> > --- a/drivers/gpu/drm/xe/xe_uc.c
> > +++ b/drivers/gpu/drm/xe/xe_uc.c
> > @@ -216,6 +216,15 @@ static void uc_reset_wait(struct xe_uc *uc)
> >   		goto again;
> >   }
> > +int xe_uc_disable_communication(struct xe_uc *uc)
> > +{
> > +	/* GuC submission not enabled, nothing to do */
> > +	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
> > +		return 0;
> > +
> > +	return xe_guc_disable_communication(&uc->guc);
> > +}
> > +
> >   int xe_uc_suspend(struct xe_uc *uc)
> >   {
> >   	int ret;
> > @@ -232,3 +241,12 @@ int xe_uc_suspend(struct xe_uc *uc)
> >   	return xe_guc_suspend(&uc->guc);
> >   }
> > +
> > +int xe_uc_resume(struct xe_uc *uc)
> 
> This should be called runtime_resume.
> 
> Daniele
> 
> > +{
> > +	/* GuC submission not enabled, nothing to do */
> > +	if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
> > +		return 0;
> > +
> > +	return xe_guc_enable_communication(&uc->guc);
> > +}
> > diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
> > index 42219b361df5..29bd692d8800 100644
> > --- a/drivers/gpu/drm/xe/xe_uc.h
> > +++ b/drivers/gpu/drm/xe/xe_uc.h
> > @@ -12,8 +12,10 @@ int xe_uc_init(struct xe_uc *uc);
> >   int xe_uc_init_hwconfig(struct xe_uc *uc);
> >   int xe_uc_init_post_hwconfig(struct xe_uc *uc);
> >   int xe_uc_init_hw(struct xe_uc *uc);
> > +int xe_uc_disable_communication(struct xe_uc *uc);
> >   void xe_uc_gucrc_disable(struct xe_uc *uc);
> >   int xe_uc_reset_prepare(struct xe_uc *uc);
> > +int xe_uc_resume(struct xe_uc *uc);
> >   void xe_uc_stop_prepare(struct xe_uc *uc);
> >   int xe_uc_stop(struct xe_uc *uc);
> >   int xe_uc_start(struct xe_uc *uc);
> 


More information about the Intel-xe mailing list