[Intel-gfx] [PATCH v6 4/8] drm/i915/pxp: Add GSC-CS backend to send GSC fw messages
Ceraolo Spurio, Daniele
daniele.ceraolospurio at intel.com
Sat Mar 4 01:07:03 UTC 2023
On 2/27/2023 6:21 PM, Alan Previn wrote:
> Add GSC engine based method for sending PXP firmware packets
> to the GSC firmware for MTL (and future) products.
>
> Use the newly added helpers to populate the GSC-CS memory
> header and send the message packet to the FW by dispatching
> the GSC_HECI_CMD_PKT instruction on the GSC engine.
>
> We use non-priveleged batches for submission to GSC engine
> which require two buffers for the request:
> - a buffer for the HECI packet that contains PXP FW commands
> - a batch-buffer that contains the engine instruction for
> sending the HECI packet to the GSC firmware.
>
> Thus, add the allocation and freeing of these buffers in gsccs
> init and fini.
>
> The GSC-fw may reply to commands with a SUCCESS but with an
> additional pending-bit set in the reply packet. This bit
> means the GSC-FW is currently busy and the caller needs to
> try again with the gsc_message_handle the fw gave. The
> GSC-fw requires a non-zero host_session_handle provided
> by the caller to enable gsc_message_handle tracking.
>
> Thus, allocate the host_session_handle at init and destroy it
> at fini (the latter requiring an FYI to the gsc-firmware).
> Also ensure the send-message function allows replay of the
> gsc_message_handle.
>
> Signed-off-by: Alan Previn <alan.previn.teres.alexis at intel.com>
> ---
> .../drm/i915/pxp/intel_pxp_cmd_interface_43.h | 4 +
> drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c | 239 +++++++++++++++++-
> drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h | 4 +
> drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 6 +
> 4 files changed, 251 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h
> index ad67e3f49c20..b2523d6918c7 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h
> @@ -12,6 +12,10 @@
> /* PXP-Cmd-Op definitions */
> #define PXP43_CMDID_START_HUC_AUTH 0x0000003A
>
> +/* PXP-Packet sizes for MTL's GSCCS-HECI instruction */
> +#define PXP43_MAX_HECI_IN_SIZE (SZ_32K)
> +#define PXP43_MAX_HECI_OUT_SIZE (SZ_32K)
> +
> /* PXP-Input-Packet: HUC-Authentication */
> struct pxp43_start_huc_auth_in {
> struct pxp_cmd_header header;
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
> index 13693e78b57e..30aa660a975f 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
> @@ -6,19 +6,226 @@
> #include "gem/i915_gem_internal.h"
>
> #include "gt/intel_context.h"
> +#include "gt/uc/intel_gsc_uc_heci_cmd_submit.h"
>
> #include "i915_drv.h"
> #include "intel_pxp_cmd_interface_43.h"
> #include "intel_pxp_gsccs.h"
> #include "intel_pxp_types.h"
>
> +static int
> +gsccs_send_message(struct intel_pxp *pxp,
> + void *msg_in, size_t msg_in_size,
> + void *msg_out, size_t msg_out_size_max,
> + size_t *msg_out_len,
> + u64 *gsc_msg_handle_retry)
> +{
> + struct intel_gt *gt = pxp->ctrl_gt;
> + struct drm_i915_private *i915 = gt->i915;
> + struct gsccs_session_resources *exec = &pxp->gsccs_res;
in the alloc/free functions you called this object *strm_res; IMO better
to use a consistent naming so it is clear they're the same object
> + struct intel_gsc_mtl_header *header = exec->pkt_vaddr;
> + struct intel_gsc_heci_non_priv_pkt pkt;
> + bool null_pkt = !msg_in && !msg_out;
> + size_t max_msg_size;
> + u32 reply_size;
> + int ret;
> +
> + if (!exec->ce)
> + return -ENODEV;
> +
> + max_msg_size = PXP43_MAX_HECI_IN_SIZE - sizeof(*header);
Using the same max_msg_size for both in and out only works if
PXP43_MAX_HECI_IN_SIZE == PXP43_MAX_HECI_OUT_SIZE. This is true now, but
I'd add a:
BUILD_BUG_ON(PXP43_MAX_HECI_IN_SIZE != PXP43_MAX_HECI_OUT_SIZE);
just to be safe. Potentially also a:
GEM_BUG_ON(exec->pkt_vma->size < (PXP43_MAX_HECI_IN_SIZE +
PXP43_MAX_HECI_OUT_SIZE));
After checking that exec->pkt_vma exists.
> +
> + if (msg_in_size > max_msg_size || msg_out_size_max > max_msg_size)
> + return -ENOSPC;
> +
> + if (!exec->pkt_vma || !exec->bb_vma)
> + return -ENOENT;
> +
> + mutex_lock(&pxp->tee_mutex);
> +
> + memset(header, 0, sizeof(*header));
> + intel_gsc_uc_heci_cmd_emit_mtl_header(header, GSC_HECI_MEADDRESS_PXP,
> + msg_in_size + sizeof(*header),
> + exec->host_session_handle);
> +
> + /* check if this is a host-session-handle cleanup call */
> + if (null_pkt)
nit: can't you just use if (!msg_in && !msg_out) instead of a local var?
not a blocker.
> + header->flags |= GSC_HECI_FLAG_MSG_CLEANUP;
> +
> + /* copy caller provided gsc message handle if this is polling for a prior msg completion */
> + header->gsc_message_handle = *gsc_msg_handle_retry;
> +
> + /* NOTE: zero size packets are used for session-cleanups */
> + if (msg_in && msg_in_size)
> + memcpy(exec->pkt_vaddr + sizeof(*header), msg_in, msg_in_size);
> +
> + pkt.addr_in = i915_vma_offset(exec->pkt_vma);
> + pkt.size_in = header->message_size;
> + pkt.addr_out = pkt.addr_in + PXP43_MAX_HECI_IN_SIZE;
> + pkt.size_out = msg_out_size_max + sizeof(*header);
> + pkt.heci_pkt_vma = exec->pkt_vma;
> + pkt.bb_vma = exec->bb_vma;
> +
> + ret = intel_gsc_uc_heci_cmd_submit_nonpriv(>->uc.gsc,
> + exec->ce, &pkt, exec->bb_vaddr,
> + GSC_REPLY_LATENCY_MS);
> + if (ret) {
> + drm_err(&i915->drm, "failed to send gsc PXP msg (%d)\n", ret);
> + goto unlock;
> + }
> +
> + /* we keep separate location for reply, so get the response header loc first */
> + header = exec->pkt_vaddr + PXP43_MAX_HECI_IN_SIZE;
> +
> + /* Response validity marker, status and busyness */
> + if (header->validity_marker != GSC_HECI_VALIDITY_MARKER) {
AFAICS you're not clearing the reply header when you re-send the same
packets after the pending bit, so this marker might be stale data. Same
for the other fields below.
> + drm_err(&i915->drm, "gsc PXP reply with invalid validity marker\n");
> + ret = -EINVAL;
> + goto unlock;
> + }
> + if (header->status != 0) {
> + drm_dbg(&i915->drm, "gsc PXP reply status has error = 0x%08x\n",
> + header->status);
> + ret = -EINVAL;
> + goto unlock;
> + }
> + if (header->flags & GSC_HECI_FLAG_MSG_PENDING) {
> + drm_dbg(&i915->drm, "gsc PXP reply is busy\n");
> + /*
> + * When the GSC firmware replies with pending bit, it means that the requested
> + * operation has begun but the completion is pending and the caller needs
> + * to re-request with the gsc_message_handle that was returned by the firmware.
> + * until the pending bit is turned off.
> + */
> + *gsc_msg_handle_retry = header->gsc_message_handle;
Non blocking question: would it be worth it to copy the value to the
header_in directly, instead of returning the value to the caller and
copying it on resubmit? Just a thought, I see pro and cons with both
approaches.
> + ret = -EAGAIN;
> + goto unlock;
> + }
> +
> + reply_size = header->message_size - sizeof(*header);
> + if (reply_size > msg_out_size_max) {
> + drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%ld)\n",
> + reply_size, msg_out_size_max);
> + reply_size = msg_out_size_max;
> + } else if (reply_size != msg_out_size_max) {
> + drm_dbg(&i915->drm, "caller unexpected PXP reply size %u (%ld)\n",
> + reply_size, msg_out_size_max);
Are we expecting all caller to always pass the exact size? Not a
complain, but if that's the case then maybe rename msg_out_size_max to
msg_out_expected_size, so it's clearer. size_max sounds like any size
smaller than it is allowed. My personal preference would be to leave
this as a size_max and have the caller decide if the actual returned
size matches the expectations (via msg_out_len)
> + }
> +
> + if (msg_out)
> + memcpy(msg_out, exec->pkt_vaddr + PXP43_MAX_HECI_IN_SIZE + sizeof(*header),
> + reply_size);
> + if (msg_out_len)
> + *msg_out_len = reply_size;
> +
> +unlock:
> + mutex_unlock(&pxp->tee_mutex);
> + return ret;
> +}
> +
> +static int
> +gsccs_send_message_retry_complete(struct intel_pxp *pxp,
> + void *msg_in, size_t msg_in_size,
> + void *msg_out, size_t msg_out_size_max,
> + size_t *msg_out_len)
> +{
> + u64 gsc_session_retry = 0;
> + int ret, tries = 0;
> +
> + /*
> + * Keep sending request if GSC firmware was busy. Based on fw specs +
> + * sw overhead (and testing) we expect a worst case pending-bit delay of
> + * GSC_PENDING_RETRY_MAXCOUNT x GSC_PENDING_RETRY_PAUSE_MS millisecs.
> + */
> + do {
> + ret = gsccs_send_message(pxp, msg_in, msg_in_size, msg_out, msg_out_size_max,
> + msg_out_len, &gsc_session_retry);
> + /* Only try again if gsc says so */
> + if (ret != -EAGAIN)
> + break;
> +
> + msleep(GSC_PENDING_RETRY_PAUSE_MS);
> + } while (++tries < GSC_PENDING_RETRY_MAXCOUNT);
> +
> + return ret;
> +}
> +
> +static int
> +gsccs_create_buffer(struct intel_gt *gt,
> + const char *bufname, size_t size,
> + struct i915_vma **vma, void **map)
> +{
> + struct drm_i915_private *i915 = gt->i915;
> + struct drm_i915_gem_object *obj;
> + int err = 0;
> +
> + obj = i915_gem_object_create_internal(i915, size);
> + if (IS_ERR(obj)) {
> + drm_err(&i915->drm, "Failed to allocate gsccs backend %s.\n", bufname);
> + err = PTR_ERR(obj);
> + goto out_none;
> + }
> +
> + *vma = i915_vma_instance(obj, gt->vm, NULL);
> + if (IS_ERR(*vma)) {
> + drm_err(&i915->drm, "Failed to vma-instance gsccs backend %s.\n", bufname);
> + err = PTR_ERR(*vma);
> + goto out_put;
> + }
> +
> + /* return a virtual pointer */
> + *map = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(i915, obj, true));
> + if (IS_ERR(*map)) {
> + drm_err(&i915->drm, "Failed to map gsccs backend %s.\n", bufname);
> + err = PTR_ERR(*map);
> + goto out_put;
> + }
> +
> + /* all PXP sessions commands are treated as non-priveleged */
typo priveleged
> + err = i915_vma_pin(*vma, 0, 0, PIN_USER);
> + if (err) {
> + drm_err(&i915->drm, "Failed to vma-pin gsccs backend %s.\n", bufname);
> + goto out_unmap;
> + }
> +
> + return 0;
> +
> +out_unmap:
> + i915_gem_object_unpin_map(obj);
> +out_put:
> + i915_gem_object_put(obj);
> +out_none:
> + *vma = NULL;
> + *map = NULL;
> +
> + return err;
> +}
> +
nit: maybe move gsccs_create_buffer after the cleanup/destruction
functions? so we can group all the creation functions close together.
> +static void
> +gsccs_cleanup_fw_host_session_handle(struct intel_pxp *pxp)
> +{
> + struct drm_i915_private *i915 = pxp->ctrl_gt->i915;
> + int ret;
> +
> + ret = gsccs_send_message_retry_complete(pxp, NULL, 0, NULL, 0, NULL);
> + if (ret)
> + drm_dbg(&i915->drm, "Failed to send gsccs msg host-session-cleanup: ret=[%d]\n",
> + ret);
> +}
> +
> static void
> gsccs_destroy_execution_resource(struct intel_pxp *pxp)
> {
> struct gsccs_session_resources *strm_res = &pxp->gsccs_res;
>
> + if (strm_res->host_session_handle)
> + gsccs_cleanup_fw_host_session_handle(pxp);
> if (strm_res->ce)
> intel_context_put(strm_res->ce);
> + if (strm_res->bb_vma)
> + i915_vma_unpin_and_release(&strm_res->bb_vma, I915_VMA_RELEASE_MAP);
> + if (strm_res->pkt_vma)
> + i915_vma_unpin_and_release(&strm_res->pkt_vma, I915_VMA_RELEASE_MAP);
>
> memset(strm_res, 0, sizeof(*strm_res));
> }
> @@ -30,6 +237,7 @@ gsccs_allocate_execution_resource(struct intel_pxp *pxp)
> struct gsccs_session_resources *strm_res = &pxp->gsccs_res;
> struct intel_engine_cs *engine = gt->engine[GSC0];
> struct intel_context *ce;
> + int err = 0;
>
> /*
> * First, ensure the GSC engine is present.
> @@ -38,16 +246,43 @@ gsccs_allocate_execution_resource(struct intel_pxp *pxp)
> if (!engine)
> return -ENODEV;
>
> + /*
> + * Now, allocate, pin and map two objects, one for the heci message packet
> + * and another for the batch buffer we submit into GSC engine (that includes the packet).
> + * NOTE: GSC-CS backend is currently only supported on MTL, so we allocate shmem.
> + */
> + err = gsccs_create_buffer(pxp->ctrl_gt, "Heci Packet",
> + PXP43_MAX_HECI_IN_SIZE + PXP43_MAX_HECI_OUT_SIZE,
> + &strm_res->pkt_vma, &strm_res->pkt_vaddr);
> + if (err)
> + return err;
> +
> + err = gsccs_create_buffer(pxp->ctrl_gt, "Batch Buffer", PAGE_SIZE,
> + &strm_res->bb_vma, &strm_res->bb_vaddr);
> + if (err)
> + goto free_pkt;
> +
> /* Finally, create an intel_context to be used during the submission */
> ce = intel_context_create(engine);
> if (IS_ERR(ce)) {
> drm_err(>->i915->drm, "Failed creating gsccs backend ctx\n");
> - return PTR_ERR(ce);
> + err = PTR_ERR(ce);
> + goto free_batch;
> }
> -
> strm_res->ce = ce;
>
> + /* initialize host-session-handle (for all i915-to-gsc-firmware PXP cmds) */
> + get_random_bytes(&strm_res->host_session_handle, sizeof(strm_res->host_session_handle));
This does not guarantee that each host session handle is unique
(although getting the same u64 twice is going to be extremely extremely
unlikely). Not sure if it is a problem.
> +
> return 0;
> +
> +free_pkt:
> + i915_vma_unpin_and_release(&strm_res->pkt_vma, I915_VMA_RELEASE_MAP);
> +free_batch:
> + i915_vma_unpin_and_release(&strm_res->bb_vma, I915_VMA_RELEASE_MAP);
those gotos are the wrong way around, the pkt is allocated first and
therefore it should be freed second
Daniele
> + memset(strm_res, 0, sizeof(*strm_res));
> +
> + return err;
> }
>
> void intel_pxp_gsccs_fini(struct intel_pxp *pxp)
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h
> index 354ea9a8f940..bd1c028bc80f 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h
> @@ -10,6 +10,10 @@
>
> struct intel_pxp;
>
> +#define GSC_REPLY_LATENCY_MS 200
> +#define GSC_PENDING_RETRY_MAXCOUNT 40
> +#define GSC_PENDING_RETRY_PAUSE_MS 50
> +
> #ifdef CONFIG_DRM_I915_PXP
> void intel_pxp_gsccs_fini(struct intel_pxp *pxp);
> int intel_pxp_gsccs_init(struct intel_pxp *pxp);
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> index fdd98911968d..73392fbab7ee 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> @@ -38,6 +38,12 @@ struct intel_pxp {
> struct gsccs_session_resources {
> u64 host_session_handle; /* used by firmware to link commands to sessions */
> struct intel_context *ce; /* context for gsc command submission */
> +
> + struct i915_vma *pkt_vma; /* GSC FW cmd packet vma */
> + void *pkt_vaddr; /* GSC FW cmd packet virt pointer */
> +
> + struct i915_vma *bb_vma; /* HECI_PKT batch buffer vma */
> + void *bb_vaddr; /* HECI_PKT batch buffer virt pointer */
> } gsccs_res;
>
> /**
More information about the Intel-gfx
mailing list