[PATCH v2 10/12] drm/xe/pxp: add PXP PM support

Tue Nov 12 22:27:10 UTC 2024

On 10/8/24 18:12, John Harrison wrote:
> On 8/16/2024 12:00, Daniele Ceraolo Spurio wrote:
>> The HW suspend flow kills all PXP HWDRM sessions, so if there was any
>> PXP activity before the suspend we need to trigger a full termination on
>> suspend.
>>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_pm.c        | 42 +++++++++++---
>>   drivers/gpu/drm/xe/xe_pxp.c       | 92 ++++++++++++++++++++++++++++++-
>>   drivers/gpu/drm/xe/xe_pxp.h       |  3 +
>>   drivers/gpu/drm/xe/xe_pxp_types.h |  9 ++-
>>   4 files changed, 134 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
>> index 9f3c14fd9f33..1e1f87ec03a2 100644
>> --- a/drivers/gpu/drm/xe/xe_pm.c
>> +++ b/drivers/gpu/drm/xe/xe_pm.c
>> @@ -20,6 +20,7 @@
>>   #include "xe_guc.h"
>>   #include "xe_irq.h"
>>   #include "xe_pcode.h"
>> +#include "xe_pxp.h"
>>   #include "xe_trace.h"
>>   #include "xe_wa.h"
>>   @@ -90,22 +91,24 @@ int xe_pm_suspend(struct xe_device *xe)
>>       drm_dbg(&xe->drm, "Suspending device\n");
>>       trace_xe_pm_suspend(xe, __builtin_return_address(0));
>>   +    err = xe_pxp_pm_suspend(xe->pxp);
>> +    if (err)
>> +        goto err;
>> +
>>       for_each_gt(gt, xe, id)
>>           xe_gt_suspend_prepare(gt);
>>         /* FIXME: Super racey... */
>>       err = xe_bo_evict_all(xe);
>>       if (err)
>> -        goto err;
>> +        goto err_pxp;
>>         xe_display_pm_suspend(xe, false);
>>         for_each_gt(gt, xe, id) {
>>           err = xe_gt_suspend(gt);
>> -        if (err) {
>> -            xe_display_pm_resume(xe, false);
>> -            goto err;
>> -        }
>> +        if (err)
>> +            goto err_display;
>>       }
>>         xe_irq_suspend(xe);
>> @@ -114,6 +117,11 @@ int xe_pm_suspend(struct xe_device *xe)
>>         drm_dbg(&xe->drm, "Device suspended\n");
>>       return 0;
>> +
>> +err_display:
>> +    xe_display_pm_resume(xe, false);
>> +err_pxp:
>> +    xe_pxp_pm_resume(xe->pxp);
>>   err:
>>       drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
>>       return err;
>> @@ -163,6 +171,8 @@ int xe_pm_resume(struct xe_device *xe)
>>       if (err)
>>           goto err;
>>   +    xe_pxp_pm_resume(xe->pxp);
>> +
>>       drm_dbg(&xe->drm, "Device resumed\n");
>>       return 0;
>>   err:
>> @@ -356,6 +366,10 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
>>        */
>>       lock_map_acquire(&xe_pm_runtime_lockdep_map);
>>   +    err = xe_pxp_pm_suspend(xe->pxp);
>> +    if (err)
>> +        goto out;
>> +
>>       /*
>>        * Applying lock for entire list op as xe_ttm_bo_destroy and 
>> xe_bo_move_notify
>>        * also checks and delets bo entry from user fault list.
>> @@ -369,23 +383,30 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
>>       if (xe->d3cold.allowed) {
>>           err = xe_bo_evict_all(xe);
>>           if (err)
>> -            goto out;
>> +            goto out_pxp;
>>           xe_display_pm_suspend(xe, true);
>>       }
>>         for_each_gt(gt, xe, id) {
>>           err = xe_gt_suspend(gt);
>>           if (err)
>> -            goto out;
>> +            goto out_display;
>>       }
>>         xe_irq_suspend(xe);
>>         if (xe->d3cold.allowed)
>>           xe_display_pm_suspend_late(xe);
>> +
>> +    lock_map_release(&xe_pm_runtime_lockdep_map);
>> +    xe_pm_write_callback_task(xe, NULL);
>> +    return 0;
>> +
>> +out_display:
>> +    xe_display_pm_resume(xe, true);
>> +out_pxp:
>> +    xe_pxp_pm_resume(xe->pxp);
>>   out:
>> -    if (err)
>> -        xe_display_pm_resume(xe, true);
>>       lock_map_release(&xe_pm_runtime_lockdep_map);
>>       xe_pm_write_callback_task(xe, NULL);
>>       return err;
>> @@ -436,6 +457,9 @@ int xe_pm_runtime_resume(struct xe_device *xe)
>>           if (err)
>>               goto out;
>>       }
>> +
>> +    xe_pxp_pm_resume(xe->pxp);
>> +
>>   out:
>>       lock_map_release(&xe_pm_runtime_lockdep_map);
>>       xe_pm_write_callback_task(xe, NULL);
>> diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
>> index 640e62d1d5d7..78373cbbe0d4 100644
>> --- a/drivers/gpu/drm/xe/xe_pxp.c
>> +++ b/drivers/gpu/drm/xe/xe_pxp.c
>> @@ -137,6 +137,13 @@ static void pxp_terminate(struct xe_pxp *pxp)
>>       if (pxp->status == XE_PXP_ACTIVE)
>>           pxp->key_instance++;
>>   +    /*
>> +     * we'll mark the status as needing termination on resume, so no 
>> need to
>> +     * emit a termination now.
>> +     */
>> +    if (pxp->status == XE_PXP_SUSPENDED)
>> +        return;
>> +
>>       /*
>>        * If we have a termination already in progress, we need to 
>> wait for
>>        * it to complete before queueing another one. We update the state
>> @@ -181,17 +188,19 @@ static void pxp_terminate(struct xe_pxp *pxp)
>>   static void pxp_terminate_complete(struct xe_pxp *pxp)
>>   {
>>       /*
>> -     * We expect PXP to be in one of 2 states when we get here:
>> +     * We expect PXP to be in one of 3 states when we get here:
>>        * - XE_PXP_TERMINATION_IN_PROGRESS: a single termination event 
>> was
>>        * requested and it is now completing, so we're ready to start.
>>        * - XE_PXP_NEEDS_TERMINATION: a second termination was 
>> requested while
>>        * the first one was still being processed; we don't update the 
>> state
>>        * in this case so the pxp_start code will automatically issue 
>> that
>>        * second termination.
>> +     * - XE_PXP_SUSPENDED: PXP is now suspended, so we defer 
>> everything to
>> +     * when we come back on resume.
>>        */
>>       if (pxp->status == XE_PXP_TERMINATION_IN_PROGRESS)
>>           pxp->status = XE_PXP_READY_TO_START;
>> -    else if (pxp->status != XE_PXP_NEEDS_TERMINATION)
>> +    else if (pxp->status != XE_PXP_NEEDS_TERMINATION && pxp->status 
>> != XE_PXP_SUSPENDED)
>>           drm_err(&pxp->xe->drm,
>>               "PXP termination complete while status was %u\n",
>>               pxp->status);
>> @@ -505,6 +514,7 @@ int xe_pxp_exec_queue_add(struct xe_pxp *pxp, 
>> struct xe_exec_queue *q)
>>           pxp_terminate(pxp);
>>           mutex_unlock(&pxp->mutex);
>>           goto wait_for_termination;
>> +    case XE_PXP_SUSPENDED:
>>       default:
>>           drm_err(&pxp->xe->drm, "unexpected state during PXP start: 
>> %u", pxp->status);
>>           ret = -EIO;
>> @@ -648,3 +658,81 @@ int xe_pxp_key_check(struct xe_pxp *pxp, struct 
>> xe_bo *bo)
>>       return 0;
>>   }
>>   +int xe_pxp_pm_suspend(struct xe_pxp *pxp)
>> +{
>> +    int ret = 0;
>> +
>> +    if (!xe_pxp_is_enabled(pxp))
>> +        return 0;
>> +
>> +    mutex_lock(&pxp->mutex);
>> +
>> +    /* if the termination is already in progress, no need to re-emit 
>> it */
>> +    if (!completion_done(&pxp->termination))
>> +        goto mark_suspended;
>> +
>> +    switch (pxp->status) {
>> +    case XE_PXP_ERROR:
>> +    case XE_PXP_READY_TO_START:
>> +    case XE_PXP_SUSPENDED:
>> +        /* nothing to cleanup */
>> +        break;
>> +    case XE_PXP_NEEDS_TERMINATION:
>> +        /* If PXP was never used we can skip the cleanup */
>> +        if (pxp->key_instance == pxp->last_suspend_key_instance)
> Again, there is the possibility of this being confused by key_instance 
> roll over.

I don't believe it is possible for it to actually roll over even if the 
system was never rebooted in its lifetime.

Daniele

>
>> +            break;
>> +        fallthrough;
>> +    case XE_PXP_ACTIVE:
>> +        pxp_terminate(pxp);
>> +        break;
>> +    default:
>> +        drm_err(&pxp->xe->drm, "unexpected state during PXP suspend: 
>> %u",
>> +            pxp->status);
>> +        ret = -EIO;
>> +        goto out;
>> +    }
>> +
>> +mark_suspended:
>> +    /*
>> +     * We set this even if we were in error state, hoping the 
>> suspend clears
>> +     * the error. Worse case we fail again and go in error state again.
>> +     */
>> +    pxp->status = XE_PXP_SUSPENDED;
>> +
>> +    mutex_unlock(&pxp->mutex);
>> +
>> +    /*
>> +     * if there is a termination in progress, wait for it.
>> +     * We need to wait outside the lock because the completion is 
>> done from
>> +     * within the lock
>> +     */
>> +    if (!wait_for_completion_timeout(&pxp->termination,
>> + msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS)))
>> +        ret = -ETIMEDOUT;
>> +
>> +    pxp->last_suspend_key_instance = pxp->key_instance;
>> +
>> +out:
>> +    return ret;
>> +}
>> +
>> +void xe_pxp_pm_resume(struct xe_pxp *pxp)
>> +{
>> +    int err;
>> +
>> +    if (!xe_pxp_is_enabled(pxp))
>> +        return;
>> +
>> +    err = kcr_pxp_enable(pxp);
>> +
>> +    mutex_lock(&pxp->mutex);
>> +
>> +    xe_assert(pxp->xe, pxp->status == XE_PXP_SUSPENDED);
>> +
>> +    if (err)
>> +        pxp->status = XE_PXP_ERROR;
>> +    else
>> +        pxp->status = XE_PXP_NEEDS_TERMINATION;
>> +
>> +    mutex_unlock(&pxp->mutex);
>> +}
>> diff --git a/drivers/gpu/drm/xe/xe_pxp.h b/drivers/gpu/drm/xe/xe_pxp.h
>> index 2d22a6e6ab27..af32c2616641 100644
>> --- a/drivers/gpu/drm/xe/xe_pxp.h
>> +++ b/drivers/gpu/drm/xe/xe_pxp.h
>> @@ -20,6 +20,9 @@ int xe_pxp_get_readiness_status(struct xe_pxp *pxp);
>>   int xe_pxp_init(struct xe_device *xe);
>>   void xe_pxp_irq_handler(struct xe_device *xe, u16 iir);
>>   +int xe_pxp_pm_suspend(struct xe_pxp *pxp);
>> +void xe_pxp_pm_resume(struct xe_pxp *pxp);
>> +
>>   int xe_pxp_exec_queue_set_type(struct xe_pxp *pxp, struct 
>> xe_exec_queue *q, u8 type);
>>   int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue 
>> *q);
>>   void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct 
>> xe_exec_queue *q);
>> diff --git a/drivers/gpu/drm/xe/xe_pxp_types.h 
>> b/drivers/gpu/drm/xe/xe_pxp_types.h
>> index 1bb747837f86..942f2fa40a58 100644
>> --- a/drivers/gpu/drm/xe/xe_pxp_types.h
>> +++ b/drivers/gpu/drm/xe/xe_pxp_types.h
>> @@ -24,7 +24,8 @@ enum xe_pxp_status {
>>       XE_PXP_NEEDS_TERMINATION = 0, /* starting status */
>>       XE_PXP_TERMINATION_IN_PROGRESS,
>>       XE_PXP_READY_TO_START,
>> -    XE_PXP_ACTIVE
>> +    XE_PXP_ACTIVE,
> You can add a trailing comma even on the last enum value to avoid such 
> unnecessary deltas.
>
> John.
>
>> +    XE_PXP_SUSPENDED
>>   };
>>     /**
>> @@ -111,6 +112,12 @@ struct xe_pxp {
>>         /** @key_instance: keep track of the current iteration of the 
>> PXP key */
>>       u32 key_instance;
>> +    /**
>> +     * @last_suspend_key_instance: value of key_instance at the last
>> +     * suspend. Used to check if any PXP session has been created 
>> between
>> +     * suspend cycles.
>> +     */
>> +    u32 last_suspend_key_instance;
>>   };
>>     #endif /* __XE_PXP_TYPES_H__ */
>