[PATCH v2 04/12] drm/xe/pxp: Add GSC session invalidation support

Thu Nov 7 00:15:46 UTC 2024

On 10/7/24 13:05, John Harrison wrote:
> On 8/16/2024 12:00, Daniele Ceraolo Spurio wrote:
>> After a session is terminated, we need to inform the GSC so that it can
>> clean up its side of the allocation. This is done by sending an
>> invalidation command with the session ID.
>>
>> Note that this patch is meant to be squashed with the follow-up patches
>> that implement the other pieces of the termination flow. It is separate
>> for now for ease of review.
>>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> ---
>>   drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h |  12 +
>>   drivers/gpu/drm/xe/xe_pxp_submit.c            | 215 ++++++++++++++++++
>>   drivers/gpu/drm/xe/xe_pxp_submit.h            |   3 +
>>   3 files changed, 230 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h 
>> b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
>> index f3c4cf10ba20..4a59c564a0d0 100644
>> --- a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
>> +++ b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
>> @@ -49,6 +49,7 @@ struct pxp_cmd_header {
>>       u32 buffer_len;
>>   } __packed;
>>   +#define PXP43_CMDID_INVALIDATE_STREAM_KEY 0x00000007
>>   #define PXP43_CMDID_NEW_HUC_AUTH 0x0000003F /* MTL+ */
>>     /* PXP-Input-Packet: HUC Auth-only */
>> @@ -63,4 +64,15 @@ struct pxp43_huc_auth_out {
>>       struct pxp_cmd_header header;
>>   } __packed;
>>   +/* PXP-Input-Packet: Invalidate Stream Key */
>> +struct pxp43_inv_stream_key_in {
>> +    struct pxp_cmd_header header;
>> +    u32 rsvd[3];
>> +} __packed;
>> +
>> +/* PXP-Output-Packet: Invalidate Stream Key */
>> +struct pxp43_inv_stream_key_out {
>> +    struct pxp_cmd_header header;
>> +    u32 rsvd;
>> +} __packed;
>>   #endif
>> diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c 
>> b/drivers/gpu/drm/xe/xe_pxp_submit.c
>> index 3b69dcc0a00f..41684d666376 100644
>> --- a/drivers/gpu/drm/xe/xe_pxp_submit.c
>> +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c
>> @@ -15,9 +15,13 @@
>>   #include "xe_gsc_submit.h"
>>   #include "xe_gt.h"
>>   #include "xe_lrc.h"
>> +#include "xe_map.h"
>>   #include "xe_pxp_types.h"
>>   #include "xe_sched_job.h"
>>   #include "xe_vm.h"
>> +#include "abi/gsc_command_header_abi.h"
>> +#include "abi/gsc_pxp_commands_abi.h"
>> +#include "instructions/xe_gsc_commands.h"
>>   #include "instructions/xe_mfx_commands.h"
>>   #include "instructions/xe_mi_commands.h"
>>   #include "regs/xe_gt_regs.h"
>> @@ -307,3 +311,214 @@ int xe_pxp_submit_session_termination(struct 
>> xe_pxp *pxp, u32 id)
>>         return 0;
>>   }
>> +
>> +static bool
>> +is_fw_err_platform_config(u32 type)
>> +{
>> +    switch (type) {
>> +    case PXP_STATUS_ERROR_API_VERSION:
>> +    case PXP_STATUS_PLATFCONFIG_KF1_NOVERIF:
>> +    case PXP_STATUS_PLATFCONFIG_KF1_BAD:
>> +        return true;
>> +    default:
>> +        break;
>> +    }
>> +    return false;
>> +}
>> +
>> +static const char *
>> +fw_err_to_string(u32 type)
>> +{
>> +    switch (type) {
>> +    case PXP_STATUS_ERROR_API_VERSION:
>> +        return "ERR_API_VERSION";
>> +    case PXP_STATUS_NOT_READY:
>> +        return "ERR_NOT_READY";
>> +    case PXP_STATUS_PLATFCONFIG_KF1_NOVERIF:
> Is it not worth having a separate string for this error?

Not really, the aim here is to communicate that the platform doesn't 
have a valid configuration (which in theory should never happen without 
a debug bios), it doesn't really matter which specific error it is. We 
also print the error code anyway.

>
>> +    case PXP_STATUS_PLATFCONFIG_KF1_BAD:
>> +        return "ERR_PLATFORM_CONFIG";
>> +    default:
>> +        break;
>> +    }
>> +    return NULL;
>> +}
>> +
>> +static int pxp_pkt_submit(struct xe_exec_queue *q, u64 batch_addr)
>> +{
>> +    struct xe_gt *gt = q->gt;
>> +    struct xe_device *xe = gt_to_xe(gt);
>> +    struct xe_sched_job *job;
>> +    struct dma_fence *fence;
>> +    long timeout;
>> +
>> +    xe_assert(xe, q->hwe->engine_id == XE_HW_ENGINE_GSCCS0);
>> +
>> +    job =  xe_sched_job_create(q, &batch_addr);
> Double space.
>
>> +    if (IS_ERR(job))
>> +        return PTR_ERR(job);
>> +
>> +    xe_sched_job_arm(job);
>> +    fence = dma_fence_get(&job->drm.s_fence->finished);
>> +    xe_sched_job_push(job);
>> +
>> +    timeout = dma_fence_wait_timeout(fence, false, HZ);
>> +    dma_fence_put(fence);
>> +    if (timeout < 0)
>> +        return timeout;
>> +    else if (!timeout)
>> +        return -ETIME;
>> +
>> +    return 0;
>> +}
>> +
>> +static void emit_pxp_heci_cmd(struct xe_device *xe, struct iosys_map 
>> *batch,
>> +                  u64 addr_in, u32 size_in, u64 addr_out, u32 size_out)
>> +{
>> +    u32 len = 0;
>> +
>> +    xe_map_wr(xe, batch, len++ * sizeof(u32), u32, GSC_HECI_CMD_PKT);
>> +    xe_map_wr(xe, batch, len++ * sizeof(u32), u32, 
>> lower_32_bits(addr_in));
>> +    xe_map_wr(xe, batch, len++ * sizeof(u32), u32, 
>> upper_32_bits(addr_in));
>> +    xe_map_wr(xe, batch, len++ * sizeof(u32), u32, size_in);
>> +    xe_map_wr(xe, batch, len++ * sizeof(u32), u32, 
>> lower_32_bits(addr_out));
>> +    xe_map_wr(xe, batch, len++ * sizeof(u32), u32, 
>> upper_32_bits(addr_out));
>> +    xe_map_wr(xe, batch, len++ * sizeof(u32), u32, size_out);
>> +    xe_map_wr(xe, batch, len++ * sizeof(u32), u32, 0);
>> +    xe_map_wr(xe, batch, len++ * sizeof(u32), u32, 
>> MI_BATCH_BUFFER_END);
>> +}
>> +
>> +#define GSC_PENDING_RETRY_MAXCOUNT 40
>> +#define GSC_PENDING_RETRY_PAUSE_MS 50
>> +static int gsccs_send_message(struct xe_pxp_gsc_client_resources 
>> *gsc_res,
>> +                  void *msg_in, size_t msg_in_size,
>> +                  void *msg_out, size_t msg_out_size_max)
>> +{
>> +    struct xe_device *xe = gsc_res->vm->xe;
>> +    const size_t max_msg_size = gsc_res->inout_size - sizeof(struct 
>> intel_gsc_mtl_header);
>> +    u32 wr_offset = 0;
>> +    u32 rd_offset = 0;
> The intialisation is not necessary here. rd_offset has the appearance 
> of requiring it but doesn't really, and wr_offset is re-assigned 
> almost immediately.
>
>> +    u32 reply_size;
>> +    u32 min_reply_size = 0;
>> +    int ret = 0;
> Also not necessary to be pre-initialised.
>
>> +    int retry = GSC_PENDING_RETRY_MAXCOUNT;
>> +
>> +    if (msg_in_size > max_msg_size || msg_out_size_max > max_msg_size)
>> +        return -ENOSPC;
>> +
>> +    wr_offset = xe_gsc_emit_header(xe, &gsc_res->msg_in, 0,
>> +                       HECI_MEADDRESS_PXP,
>> +                       gsc_res->host_session_handle,
>> +                       msg_in_size);
>> +
>> +    /* NOTE: zero size packets are used for session-cleanups */
>> +    if (msg_in && msg_in_size) {
>> +        xe_map_memcpy_to(xe, &gsc_res->msg_in, wr_offset,
>> +                 msg_in, msg_in_size);
>> +        min_reply_size = sizeof(struct pxp_cmd_header);
>> +    }
>> +
>> +    /* Make sure the reply header does not contain stale data */
>> +    xe_gsc_poison_header(xe, &gsc_res->msg_out, 0);
>> +
>> +    emit_pxp_heci_cmd(xe, &gsc_res->batch, PXP_BB_SIZE,
>> +              wr_offset + msg_in_size, PXP_BB_SIZE + 
>> gsc_res->inout_size,
>> +              msg_out_size_max + wr_offset);
> Is this correct? It is passing in the batch buffer allocation size as 
> the address. Shouldn't there be some kind of base address included? 
> The in/out buffer is after the BB in the same allocation but that 
> allocation is not guaranteed to be at address zero, is it?

It is guaranteed to be ad address zero, because when we call bind_bo() 
we specify the address. I can make that a define if you think it'd make 
things clearer.

>
> Also, it would be more consistent to use 'wr_offset + out_size_max' to 
> match the input calculation rather than flipping the terms around.
>
>> +
>> +    xe_device_wmb(xe);
>> +
> Might be worth a comment here to say why retries are required and how 
> many/how long is expected normally versus worst case?

will do.

>
>> +    do {
>> +        ret = pxp_pkt_submit(gsc_res->q, 0);
>> +        if (ret)
>> +            break;
>> +
>> +        if (xe_gsc_check_and_update_pending(xe, &gsc_res->msg_in, 0,
>> +                            &gsc_res->msg_out, 0)) {
>> +            ret = -EAGAIN;
>> +            msleep(GSC_PENDING_RETRY_PAUSE_MS);
>> +        }
>> +    } while (--retry && ret == -EAGAIN);
>> +
>> +    if (ret) {
>> +        drm_err(&xe->drm, "failed to submit GSC PXP message: %d\n", 
>> ret);
>> +        return ret;
>> +    }
>> +
>> +    ret = xe_gsc_read_out_header(xe, &gsc_res->msg_out, 0,
>> +                     min_reply_size, &rd_offset);
>> +    if (ret) {
>> +        drm_err(&xe->drm, "invalid GSC reply for PXP (err=%d)\n", ret);
> Should be %pe for the error code?
>
>> +        return ret;
>> +    }
>> +
>> +    if (msg_out && min_reply_size) {
>> +        reply_size = xe_map_rd_field(xe, &gsc_res->msg_out, rd_offset,
>> +                         struct pxp_cmd_header, buffer_len);
>> +        reply_size += sizeof(struct pxp_cmd_header);
>> +
>> +        if (reply_size > msg_out_size_max) {
>> +            drm_warn(&xe->drm, "caller with insufficient PXP reply 
>> size %u (%ld)\n",
>> +                 reply_size, msg_out_size_max);
> I would maybe go with 'reply size overflow'. Took me a moment to work 
> out why 'size > max' becomes 'insufficient reply size'.
>
>> +            reply_size = msg_out_size_max;
> Is it useful to return a partial message?

The caller can check the header and see if there is any error set there.

>
>> +        }
>> +
>> +        xe_map_memcpy_from(xe, msg_out, &gsc_res->msg_out,
>> +                   rd_offset, reply_size);
>> +    }
>> +
>> +    xe_gsc_poison_header(xe, &gsc_res->msg_in, 0);
>> +
>> +    return ret;
>> +}
>> +
>> +/**
>> + * xe_pxp_submit_session_invalidation - submits a PXP GSC invalidation
>> + * @gsc_res: the pxp client resources
>> + * @id: the session to invalidate
>> + *
>> + * Submit a message to the GSC FW to notify it that a session has been
>> + * terminated and is therefore invalid.
>> + *
>> + * Returns 0 if the submission is successful, an errno value otherwise.
>> + */
>> +int xe_pxp_submit_session_invalidation(struct 
>> xe_pxp_gsc_client_resources *gsc_res,
>> +                       u32 id)
> Is this really over 100 columns if not wrapped?

nope, will remove the wrapping

>
>> +{
>> +    struct xe_device *xe = gsc_res->vm->xe;
>> +    struct pxp43_inv_stream_key_in msg_in = {0};
>> +    struct pxp43_inv_stream_key_out msg_out = {0};
>> +    int ret = 0;
>> +
>> +    /*
>> +     * Stream key invalidation reuses the same version 4.2 input/output
>> +     * command format but firmware requires 4.3 API interaction
>> +     */
>> +    msg_in.header.api_version = PXP_APIVER(4, 3);
>> +    msg_in.header.command_id = PXP43_CMDID_INVALIDATE_STREAM_KEY;
>> +    msg_in.header.buffer_len = sizeof(msg_in) - sizeof(msg_in.header);
>> +
>> +    msg_in.header.stream_id = 
>> FIELD_PREP(PXP_CMDHDR_EXTDATA_SESSION_VALID, 1);
>> +    msg_in.header.stream_id |= 
>> FIELD_PREP(PXP_CMDHDR_EXTDATA_APP_TYPE, 0);
>> +    msg_in.header.stream_id |= 
>> FIELD_PREP(PXP_CMDHDR_EXTDATA_SESSION_ID, id);
>> +
>> +    ret = gsccs_send_message(gsc_res, &msg_in, sizeof(msg_in),
>> +                 &msg_out, sizeof(msg_out));
>> +    if (ret) {
>> +        drm_err(&xe->drm, "Failed to inv-stream-key-%u, ret=[%d]\n",
> Would be clearer to say "failed to invalidate stream-key-%u"? The 
> message current reads as "failed to <name-of-variable>" which doesn't 
> make much sense. Same comment for the other two prints below.
>
> Also, %pe for the return code?

Ack.

Daniele

>
> John.
>
>> +            id, ret);
>> +    } else if (msg_out.header.status != 0) {
>> +        if (is_fw_err_platform_config(msg_out.header.status)) {
>> +            drm_info_once(&xe->drm,
>> +                      "PXP inv-stream-key-%u failed due to BIOS/SOC 
>> :0x%08x:%s\n",
>> +                      id, msg_out.header.status,
>> +                      fw_err_to_string(msg_out.header.status));
>> +        } else {
>> +            drm_dbg(&xe->drm, "PXP inv-stream-key-%u failed 
>> 0x%08x:%s:\n",
>> +                id, msg_out.header.status,
>> +                fw_err_to_string(msg_out.header.status));
>> +            drm_dbg(&xe->drm, "     cmd-detail: 
>> ID=[0x%08x],API-Ver-[0x%08x]\n",
>> +                msg_in.header.command_id, msg_in.header.api_version);
>> +        }
>> +    }
>> +
>> +    return ret;
>> +}
>> diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.h 
>> b/drivers/gpu/drm/xe/xe_pxp_submit.h
>> index 4ee8c0acfed9..48fdc9b09116 100644
>> --- a/drivers/gpu/drm/xe/xe_pxp_submit.h
>> +++ b/drivers/gpu/drm/xe/xe_pxp_submit.h
>> @@ -9,10 +9,13 @@
>>   #include <linux/types.h>
>>     struct xe_pxp;
>> +struct xe_pxp_gsc_client_resources;
>>     int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp);
>>   void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp);
>>     int xe_pxp_submit_session_termination(struct xe_pxp *pxp, u32 id);
>> +int xe_pxp_submit_session_invalidation(struct 
>> xe_pxp_gsc_client_resources *gsc_res,
>> +                       u32 id);
>>     #endif /* __XE_PXP_SUBMIT_H__ */
>