[PATCH v2 10/12] drm/xe/pxp: add PXP PM support

John Harrison john.c.harrison at intel.com
Wed Oct 9 01:12:58 UTC 2024


On 8/16/2024 12:00, Daniele Ceraolo Spurio wrote:
> The HW suspend flow kills all PXP HWDRM sessions, so if there was any
> PXP activity before the suspend we need to trigger a full termination on
> suspend.
>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_pm.c        | 42 +++++++++++---
>   drivers/gpu/drm/xe/xe_pxp.c       | 92 ++++++++++++++++++++++++++++++-
>   drivers/gpu/drm/xe/xe_pxp.h       |  3 +
>   drivers/gpu/drm/xe/xe_pxp_types.h |  9 ++-
>   4 files changed, 134 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
> index 9f3c14fd9f33..1e1f87ec03a2 100644
> --- a/drivers/gpu/drm/xe/xe_pm.c
> +++ b/drivers/gpu/drm/xe/xe_pm.c
> @@ -20,6 +20,7 @@
>   #include "xe_guc.h"
>   #include "xe_irq.h"
>   #include "xe_pcode.h"
> +#include "xe_pxp.h"
>   #include "xe_trace.h"
>   #include "xe_wa.h"
>   
> @@ -90,22 +91,24 @@ int xe_pm_suspend(struct xe_device *xe)
>   	drm_dbg(&xe->drm, "Suspending device\n");
>   	trace_xe_pm_suspend(xe, __builtin_return_address(0));
>   
> +	err = xe_pxp_pm_suspend(xe->pxp);
> +	if (err)
> +		goto err;
> +
>   	for_each_gt(gt, xe, id)
>   		xe_gt_suspend_prepare(gt);
>   
>   	/* FIXME: Super racey... */
>   	err = xe_bo_evict_all(xe);
>   	if (err)
> -		goto err;
> +		goto err_pxp;
>   
>   	xe_display_pm_suspend(xe, false);
>   
>   	for_each_gt(gt, xe, id) {
>   		err = xe_gt_suspend(gt);
> -		if (err) {
> -			xe_display_pm_resume(xe, false);
> -			goto err;
> -		}
> +		if (err)
> +			goto err_display;
>   	}
>   
>   	xe_irq_suspend(xe);
> @@ -114,6 +117,11 @@ int xe_pm_suspend(struct xe_device *xe)
>   
>   	drm_dbg(&xe->drm, "Device suspended\n");
>   	return 0;
> +
> +err_display:
> +	xe_display_pm_resume(xe, false);
> +err_pxp:
> +	xe_pxp_pm_resume(xe->pxp);
>   err:
>   	drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
>   	return err;
> @@ -163,6 +171,8 @@ int xe_pm_resume(struct xe_device *xe)
>   	if (err)
>   		goto err;
>   
> +	xe_pxp_pm_resume(xe->pxp);
> +
>   	drm_dbg(&xe->drm, "Device resumed\n");
>   	return 0;
>   err:
> @@ -356,6 +366,10 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
>   	 */
>   	lock_map_acquire(&xe_pm_runtime_lockdep_map);
>   
> +	err = xe_pxp_pm_suspend(xe->pxp);
> +	if (err)
> +		goto out;
> +
>   	/*
>   	 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
>   	 * also checks and delets bo entry from user fault list.
> @@ -369,23 +383,30 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
>   	if (xe->d3cold.allowed) {
>   		err = xe_bo_evict_all(xe);
>   		if (err)
> -			goto out;
> +			goto out_pxp;
>   		xe_display_pm_suspend(xe, true);
>   	}
>   
>   	for_each_gt(gt, xe, id) {
>   		err = xe_gt_suspend(gt);
>   		if (err)
> -			goto out;
> +			goto out_display;
>   	}
>   
>   	xe_irq_suspend(xe);
>   
>   	if (xe->d3cold.allowed)
>   		xe_display_pm_suspend_late(xe);
> +
> +	lock_map_release(&xe_pm_runtime_lockdep_map);
> +	xe_pm_write_callback_task(xe, NULL);
> +	return 0;
> +
> +out_display:
> +	xe_display_pm_resume(xe, true);
> +out_pxp:
> +	xe_pxp_pm_resume(xe->pxp);
>   out:
> -	if (err)
> -		xe_display_pm_resume(xe, true);
>   	lock_map_release(&xe_pm_runtime_lockdep_map);
>   	xe_pm_write_callback_task(xe, NULL);
>   	return err;
> @@ -436,6 +457,9 @@ int xe_pm_runtime_resume(struct xe_device *xe)
>   		if (err)
>   			goto out;
>   	}
> +
> +	xe_pxp_pm_resume(xe->pxp);
> +
>   out:
>   	lock_map_release(&xe_pm_runtime_lockdep_map);
>   	xe_pm_write_callback_task(xe, NULL);
> diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
> index 640e62d1d5d7..78373cbbe0d4 100644
> --- a/drivers/gpu/drm/xe/xe_pxp.c
> +++ b/drivers/gpu/drm/xe/xe_pxp.c
> @@ -137,6 +137,13 @@ static void pxp_terminate(struct xe_pxp *pxp)
>   	if (pxp->status == XE_PXP_ACTIVE)
>   		pxp->key_instance++;
>   
> +	/*
> +	 * we'll mark the status as needing termination on resume, so no need to
> +	 * emit a termination now.
> +	 */
> +	if (pxp->status == XE_PXP_SUSPENDED)
> +		return;
> +
>   	/*
>   	 * If we have a termination already in progress, we need to wait for
>   	 * it to complete before queueing another one. We update the state
> @@ -181,17 +188,19 @@ static void pxp_terminate(struct xe_pxp *pxp)
>   static void pxp_terminate_complete(struct xe_pxp *pxp)
>   {
>   	/*
> -	 * We expect PXP to be in one of 2 states when we get here:
> +	 * We expect PXP to be in one of 3 states when we get here:
>   	 * - XE_PXP_TERMINATION_IN_PROGRESS: a single termination event was
>   	 * requested and it is now completing, so we're ready to start.
>   	 * - XE_PXP_NEEDS_TERMINATION: a second termination was requested while
>   	 * the first one was still being processed; we don't update the state
>   	 * in this case so the pxp_start code will automatically issue that
>   	 * second termination.
> +	 * - XE_PXP_SUSPENDED: PXP is now suspended, so we defer everything to
> +	 * when we come back on resume.
>   	 */
>   	if (pxp->status == XE_PXP_TERMINATION_IN_PROGRESS)
>   		pxp->status = XE_PXP_READY_TO_START;
> -	else if (pxp->status != XE_PXP_NEEDS_TERMINATION)
> +	else if (pxp->status != XE_PXP_NEEDS_TERMINATION && pxp->status != XE_PXP_SUSPENDED)
>   		drm_err(&pxp->xe->drm,
>   			"PXP termination complete while status was %u\n",
>   			pxp->status);
> @@ -505,6 +514,7 @@ int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q)
>   		pxp_terminate(pxp);
>   		mutex_unlock(&pxp->mutex);
>   		goto wait_for_termination;
> +	case XE_PXP_SUSPENDED:
>   	default:
>   		drm_err(&pxp->xe->drm, "unexpected state during PXP start: %u", pxp->status);
>   		ret = -EIO;
> @@ -648,3 +658,81 @@ int xe_pxp_key_check(struct xe_pxp *pxp, struct xe_bo *bo)
>   	return 0;
>   }
>   
> +int xe_pxp_pm_suspend(struct xe_pxp *pxp)
> +{
> +	int ret = 0;
> +
> +	if (!xe_pxp_is_enabled(pxp))
> +		return 0;
> +
> +	mutex_lock(&pxp->mutex);
> +
> +	/* if the termination is already in progress, no need to re-emit it */
> +	if (!completion_done(&pxp->termination))
> +		goto mark_suspended;
> +
> +	switch (pxp->status) {
> +	case XE_PXP_ERROR:
> +	case XE_PXP_READY_TO_START:
> +	case XE_PXP_SUSPENDED:
> +		/* nothing to cleanup */
> +		break;
> +	case XE_PXP_NEEDS_TERMINATION:
> +		/* If PXP was never used we can skip the cleanup */
> +		if (pxp->key_instance == pxp->last_suspend_key_instance)
Again, there is the possibility of this being confused by key_instance 
roll over.

> +			break;
> +		fallthrough;
> +	case XE_PXP_ACTIVE:
> +		pxp_terminate(pxp);
> +		break;
> +	default:
> +		drm_err(&pxp->xe->drm, "unexpected state during PXP suspend: %u",
> +			pxp->status);
> +		ret = -EIO;
> +		goto out;
> +	}
> +
> +mark_suspended:
> +	/*
> +	 * We set this even if we were in error state, hoping the suspend clears
> +	 * the error. Worse case we fail again and go in error state again.
> +	 */
> +	pxp->status = XE_PXP_SUSPENDED;
> +
> +	mutex_unlock(&pxp->mutex);
> +
> +	/*
> +	 * if there is a termination in progress, wait for it.
> +	 * We need to wait outside the lock because the completion is done from
> +	 * within the lock
> +	 */
> +	if (!wait_for_completion_timeout(&pxp->termination,
> +					 msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS)))
> +		ret = -ETIMEDOUT;
> +
> +	pxp->last_suspend_key_instance = pxp->key_instance;
> +
> +out:
> +	return ret;
> +}
> +
> +void xe_pxp_pm_resume(struct xe_pxp *pxp)
> +{
> +	int err;
> +
> +	if (!xe_pxp_is_enabled(pxp))
> +		return;
> +
> +	err = kcr_pxp_enable(pxp);
> +
> +	mutex_lock(&pxp->mutex);
> +
> +	xe_assert(pxp->xe, pxp->status == XE_PXP_SUSPENDED);
> +
> +	if (err)
> +		pxp->status = XE_PXP_ERROR;
> +	else
> +		pxp->status = XE_PXP_NEEDS_TERMINATION;
> +
> +	mutex_unlock(&pxp->mutex);
> +}
> diff --git a/drivers/gpu/drm/xe/xe_pxp.h b/drivers/gpu/drm/xe/xe_pxp.h
> index 2d22a6e6ab27..af32c2616641 100644
> --- a/drivers/gpu/drm/xe/xe_pxp.h
> +++ b/drivers/gpu/drm/xe/xe_pxp.h
> @@ -20,6 +20,9 @@ int xe_pxp_get_readiness_status(struct xe_pxp *pxp);
>   int xe_pxp_init(struct xe_device *xe);
>   void xe_pxp_irq_handler(struct xe_device *xe, u16 iir);
>   
> +int xe_pxp_pm_suspend(struct xe_pxp *pxp);
> +void xe_pxp_pm_resume(struct xe_pxp *pxp);
> +
>   int xe_pxp_exec_queue_set_type(struct xe_pxp *pxp, struct xe_exec_queue *q, u8 type);
>   int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q);
>   void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q);
> diff --git a/drivers/gpu/drm/xe/xe_pxp_types.h b/drivers/gpu/drm/xe/xe_pxp_types.h
> index 1bb747837f86..942f2fa40a58 100644
> --- a/drivers/gpu/drm/xe/xe_pxp_types.h
> +++ b/drivers/gpu/drm/xe/xe_pxp_types.h
> @@ -24,7 +24,8 @@ enum xe_pxp_status {
>   	XE_PXP_NEEDS_TERMINATION = 0, /* starting status */
>   	XE_PXP_TERMINATION_IN_PROGRESS,
>   	XE_PXP_READY_TO_START,
> -	XE_PXP_ACTIVE
> +	XE_PXP_ACTIVE,
You can add a trailing comma even on the last enum value to avoid such 
unnecessary deltas.

John.

> +	XE_PXP_SUSPENDED
>   };
>   
>   /**
> @@ -111,6 +112,12 @@ struct xe_pxp {
>   
>   	/** @key_instance: keep track of the current iteration of the PXP key */
>   	u32 key_instance;
> +	/**
> +	 * @last_suspend_key_instance: value of key_instance at the last
> +	 * suspend. Used to check if any PXP session has been created between
> +	 * suspend cycles.
> +	 */
> +	u32 last_suspend_key_instance;
>   };
>   
>   #endif /* __XE_PXP_TYPES_H__ */



More information about the Intel-xe mailing list