[Intel-gfx] [PATCH v6 4/8] drm/i915/pxp: Add GSC-CS backend to send GSC fw messages

Sat Mar 4 01:07:03 UTC 2023

On 2/27/2023 6:21 PM, Alan Previn wrote:
> Add GSC engine based method for sending PXP firmware packets
> to the GSC firmware for MTL (and future) products.
>
> Use the newly added helpers to populate the GSC-CS memory
> header and send the message packet to the FW by dispatching
> the GSC_HECI_CMD_PKT instruction on the GSC engine.
>
> We use non-priveleged batches for submission to GSC engine
> which require two buffers for the request:
>       - a buffer for the HECI packet that contains PXP FW commands
>       - a batch-buffer that contains the engine instruction for
>         sending the HECI packet to the GSC firmware.
>
> Thus, add the allocation and freeing of these buffers in gsccs
> init and fini.
>
> The GSC-fw may reply to commands with a SUCCESS but with an
> additional pending-bit set in the reply packet. This bit
> means the GSC-FW is currently busy and the caller needs to
> try again with the gsc_message_handle the fw gave. The
> GSC-fw requires a non-zero host_session_handle provided
> by the caller to enable gsc_message_handle tracking.
>
> Thus, allocate the host_session_handle at init and destroy it
> at fini (the latter requiring an FYI to the gsc-firmware).
> Also ensure the send-message function allows replay of the
> gsc_message_handle.
>
> Signed-off-by: Alan Previn <alan.previn.teres.alexis at intel.com>
> ---
>   .../drm/i915/pxp/intel_pxp_cmd_interface_43.h |   4 +
>   drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c    | 239 +++++++++++++++++-
>   drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h    |   4 +
>   drivers/gpu/drm/i915/pxp/intel_pxp_types.h    |   6 +
>   4 files changed, 251 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h
> index ad67e3f49c20..b2523d6918c7 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_cmd_interface_43.h
> @@ -12,6 +12,10 @@
>   /* PXP-Cmd-Op definitions */
>   #define PXP43_CMDID_START_HUC_AUTH 0x0000003A
>   
> +/* PXP-Packet sizes for MTL's GSCCS-HECI instruction */
> +#define PXP43_MAX_HECI_IN_SIZE (SZ_32K)
> +#define PXP43_MAX_HECI_OUT_SIZE (SZ_32K)
> +
>   /* PXP-Input-Packet: HUC-Authentication */
>   struct pxp43_start_huc_auth_in {
>   	struct pxp_cmd_header header;
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
> index 13693e78b57e..30aa660a975f 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c
> @@ -6,19 +6,226 @@
>   #include "gem/i915_gem_internal.h"
>   
>   #include "gt/intel_context.h"
> +#include "gt/uc/intel_gsc_uc_heci_cmd_submit.h"
>   
>   #include "i915_drv.h"
>   #include "intel_pxp_cmd_interface_43.h"
>   #include "intel_pxp_gsccs.h"
>   #include "intel_pxp_types.h"
>   
> +static int
> +gsccs_send_message(struct intel_pxp *pxp,
> +		   void *msg_in, size_t msg_in_size,
> +		   void *msg_out, size_t msg_out_size_max,
> +		   size_t *msg_out_len,
> +		   u64 *gsc_msg_handle_retry)
> +{
> +	struct intel_gt *gt = pxp->ctrl_gt;
> +	struct drm_i915_private *i915 = gt->i915;
> +	struct gsccs_session_resources *exec =  &pxp->gsccs_res;

in the alloc/free functions you called this object *strm_res; IMO better 
to use a consistent naming so it is clear they're the same object

> +	struct intel_gsc_mtl_header *header = exec->pkt_vaddr;
> +	struct intel_gsc_heci_non_priv_pkt pkt;
> +	bool null_pkt = !msg_in && !msg_out;
> +	size_t max_msg_size;
> +	u32 reply_size;
> +	int ret;
> +
> +	if (!exec->ce)
> +		return -ENODEV;
> +
> +	max_msg_size = PXP43_MAX_HECI_IN_SIZE - sizeof(*header);

Using the same max_msg_size for both in and out only works if 
PXP43_MAX_HECI_IN_SIZE == PXP43_MAX_HECI_OUT_SIZE. This is true now, but 
I'd add a:

BUILD_BUG_ON(PXP43_MAX_HECI_IN_SIZE  != PXP43_MAX_HECI_OUT_SIZE);

just to be safe. Potentially also a:

GEM_BUG_ON(exec->pkt_vma->size < (PXP43_MAX_HECI_IN_SIZE + 
PXP43_MAX_HECI_OUT_SIZE));

After checking that exec->pkt_vma exists.

> +
> +	if (msg_in_size > max_msg_size || msg_out_size_max > max_msg_size)
> +		return -ENOSPC;
> +
> +	if (!exec->pkt_vma || !exec->bb_vma)
> +		return -ENOENT;
> +
> +	mutex_lock(&pxp->tee_mutex);
> +
> +	memset(header, 0, sizeof(*header));
> +	intel_gsc_uc_heci_cmd_emit_mtl_header(header, GSC_HECI_MEADDRESS_PXP,
> +					      msg_in_size + sizeof(*header),
> +					      exec->host_session_handle);
> +
> +	/* check if this is a host-session-handle cleanup call */
> +	if (null_pkt)

nit: can't you just use if (!msg_in && !msg_out) instead of a local var? 
not a blocker.

> +		header->flags |= GSC_HECI_FLAG_MSG_CLEANUP;
> +
> +	/* copy caller provided gsc message handle if this is polling for a prior msg completion */
> +	header->gsc_message_handle = *gsc_msg_handle_retry;
> +
> +	/* NOTE: zero size packets are used for session-cleanups */
> +	if (msg_in && msg_in_size)
> +		memcpy(exec->pkt_vaddr + sizeof(*header), msg_in, msg_in_size);
> +
> +	pkt.addr_in = i915_vma_offset(exec->pkt_vma);
> +	pkt.size_in = header->message_size;
> +	pkt.addr_out = pkt.addr_in + PXP43_MAX_HECI_IN_SIZE;
> +	pkt.size_out = msg_out_size_max + sizeof(*header);
> +	pkt.heci_pkt_vma = exec->pkt_vma;
> +	pkt.bb_vma = exec->bb_vma;
> +
> +	ret = intel_gsc_uc_heci_cmd_submit_nonpriv(&gt->uc.gsc,
> +						   exec->ce, &pkt, exec->bb_vaddr,
> +						   GSC_REPLY_LATENCY_MS);
> +	if (ret) {
> +		drm_err(&i915->drm, "failed to send gsc PXP msg (%d)\n", ret);
> +		goto unlock;
> +	}
> +
> +	/* we keep separate location for reply, so get the response header loc first */
> +	header = exec->pkt_vaddr + PXP43_MAX_HECI_IN_SIZE;
> +
> +	/* Response validity marker, status and busyness */
> +	if (header->validity_marker != GSC_HECI_VALIDITY_MARKER) {

AFAICS you're not clearing the reply header when you re-send the same 
packets after the pending bit, so this marker might be stale data. Same 
for the other fields below.

> +		drm_err(&i915->drm, "gsc PXP reply with invalid validity marker\n");
> +		ret = -EINVAL;
> +		goto unlock;
> +	}
> +	if (header->status != 0) {
> +		drm_dbg(&i915->drm, "gsc PXP reply status has error = 0x%08x\n",
> +			header->status);
> +		ret = -EINVAL;
> +		goto unlock;
> +	}
> +	if (header->flags & GSC_HECI_FLAG_MSG_PENDING) {
> +		drm_dbg(&i915->drm, "gsc PXP reply is busy\n");
> +		/*
> +		 * When the GSC firmware replies with pending bit, it means that the requested
> +		 * operation has begun but the completion is pending and the caller needs
> +		 * to re-request with the gsc_message_handle that was returned by the firmware.
> +		 * until the pending bit is turned off.
> +		 */
> +		*gsc_msg_handle_retry = header->gsc_message_handle;

Non blocking question: would it be worth it to copy the value to the 
header_in directly, instead of returning the value to the caller and 
copying it on resubmit? Just a thought, I see pro and cons with both 
approaches.

> +		ret = -EAGAIN;
> +		goto unlock;
> +	}
> +
> +	reply_size = header->message_size - sizeof(*header);
> +	if (reply_size > msg_out_size_max) {
> +		drm_warn(&i915->drm, "caller with insufficient PXP reply size %u (%ld)\n",
> +			 reply_size, msg_out_size_max);
> +		reply_size = msg_out_size_max;
> +	} else if (reply_size != msg_out_size_max) {
> +		drm_dbg(&i915->drm, "caller unexpected PXP reply size %u (%ld)\n",
> +			reply_size, msg_out_size_max);

Are we expecting all caller to always pass the exact size? Not a 
complain, but if that's the case then maybe rename msg_out_size_max to 
msg_out_expected_size, so it's clearer. size_max sounds like any size 
smaller than it is allowed. My personal preference would be to leave 
this as a size_max and have the caller decide if the actual returned 
size matches the expectations (via msg_out_len)

> +	}
> +
> +	if (msg_out)
> +		memcpy(msg_out, exec->pkt_vaddr + PXP43_MAX_HECI_IN_SIZE + sizeof(*header),
> +		       reply_size);
> +	if (msg_out_len)
> +		*msg_out_len = reply_size;
> +
> +unlock:
> +	mutex_unlock(&pxp->tee_mutex);
> +	return ret;
> +}
> +
> +static int
> +gsccs_send_message_retry_complete(struct intel_pxp *pxp,
> +				  void *msg_in, size_t msg_in_size,
> +				  void *msg_out, size_t msg_out_size_max,
> +				  size_t *msg_out_len)
> +{
> +	u64 gsc_session_retry = 0;
> +	int ret, tries = 0;
> +
> +	/*
> +	 * Keep sending request if GSC firmware was busy. Based on fw specs +
> +	 * sw overhead (and testing) we expect a worst case pending-bit delay of
> +	 * GSC_PENDING_RETRY_MAXCOUNT x GSC_PENDING_RETRY_PAUSE_MS millisecs.
> +	 */
> +	do {
> +		ret = gsccs_send_message(pxp, msg_in, msg_in_size, msg_out, msg_out_size_max,
> +					 msg_out_len, &gsc_session_retry);
> +		/* Only try again if gsc says so */
> +		if (ret != -EAGAIN)
> +			break;
> +
> +		msleep(GSC_PENDING_RETRY_PAUSE_MS);
> +	} while (++tries < GSC_PENDING_RETRY_MAXCOUNT);
> +
> +	return ret;
> +}
> +
> +static int
> +gsccs_create_buffer(struct intel_gt *gt,
> +		    const char *bufname, size_t size,
> +		    struct i915_vma **vma, void **map)
> +{
> +	struct drm_i915_private *i915 = gt->i915;
> +	struct drm_i915_gem_object *obj;
> +	int err = 0;
> +
> +	obj = i915_gem_object_create_internal(i915, size);
> +	if (IS_ERR(obj)) {
> +		drm_err(&i915->drm, "Failed to allocate gsccs backend %s.\n", bufname);
> +		err = PTR_ERR(obj);
> +		goto out_none;
> +	}
> +
> +	*vma = i915_vma_instance(obj, gt->vm, NULL);
> +	if (IS_ERR(*vma)) {
> +		drm_err(&i915->drm, "Failed to vma-instance gsccs backend %s.\n", bufname);
> +		err = PTR_ERR(*vma);
> +		goto out_put;
> +	}
> +
> +	/* return a virtual pointer */
> +	*map = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(i915, obj, true));
> +	if (IS_ERR(*map)) {
> +		drm_err(&i915->drm, "Failed to map gsccs backend %s.\n", bufname);
> +		err = PTR_ERR(*map);
> +		goto out_put;
> +	}
> +
> +	/* all PXP sessions commands are treated as non-priveleged */

typo priveleged

> +	err = i915_vma_pin(*vma, 0, 0, PIN_USER);
> +	if (err) {
> +		drm_err(&i915->drm, "Failed to vma-pin gsccs backend %s.\n", bufname);
> +		goto out_unmap;
> +	}
> +
> +	return 0;
> +
> +out_unmap:
> +	i915_gem_object_unpin_map(obj);
> +out_put:
> +	i915_gem_object_put(obj);
> +out_none:
> +	*vma = NULL;
> +	*map = NULL;
> +
> +	return err;
> +}
> +

nit: maybe move gsccs_create_buffer after the cleanup/destruction 
functions? so we can group all the creation functions close together.

> +static void
> +gsccs_cleanup_fw_host_session_handle(struct intel_pxp *pxp)
> +{
> +	struct drm_i915_private *i915 = pxp->ctrl_gt->i915;
> +	int ret;
> +
> +	ret = gsccs_send_message_retry_complete(pxp, NULL, 0, NULL, 0, NULL);
> +	if (ret)
> +		drm_dbg(&i915->drm, "Failed to send gsccs msg host-session-cleanup: ret=[%d]\n",
> +			ret);
> +}
> +
>   static void
>   gsccs_destroy_execution_resource(struct intel_pxp *pxp)
>   {
>   	struct gsccs_session_resources *strm_res = &pxp->gsccs_res;
>   
> +	if (strm_res->host_session_handle)
> +		gsccs_cleanup_fw_host_session_handle(pxp);
>   	if (strm_res->ce)
>   		intel_context_put(strm_res->ce);
> +	if (strm_res->bb_vma)
> +		i915_vma_unpin_and_release(&strm_res->bb_vma, I915_VMA_RELEASE_MAP);
> +	if (strm_res->pkt_vma)
> +		i915_vma_unpin_and_release(&strm_res->pkt_vma, I915_VMA_RELEASE_MAP);
>   
>   	memset(strm_res, 0, sizeof(*strm_res));
>   }
> @@ -30,6 +237,7 @@ gsccs_allocate_execution_resource(struct intel_pxp *pxp)
>   	struct gsccs_session_resources *strm_res = &pxp->gsccs_res;
>   	struct intel_engine_cs *engine = gt->engine[GSC0];
>   	struct intel_context *ce;
> +	int err = 0;
>   
>   	/*
>   	 * First, ensure the GSC engine is present.
> @@ -38,16 +246,43 @@ gsccs_allocate_execution_resource(struct intel_pxp *pxp)
>   	if (!engine)
>   		return -ENODEV;
>   
> +	/*
> +	 * Now, allocate, pin and map two objects, one for the heci message packet
> +	 * and another for the batch buffer we submit into GSC engine (that includes the packet).
> +	 * NOTE: GSC-CS backend is currently only supported on MTL, so we allocate shmem.
> +	 */
> +	err = gsccs_create_buffer(pxp->ctrl_gt, "Heci Packet",
> +				  PXP43_MAX_HECI_IN_SIZE + PXP43_MAX_HECI_OUT_SIZE,
> +				  &strm_res->pkt_vma, &strm_res->pkt_vaddr);
> +	if (err)
> +		return err;
> +
> +	err = gsccs_create_buffer(pxp->ctrl_gt, "Batch Buffer", PAGE_SIZE,
> +				  &strm_res->bb_vma, &strm_res->bb_vaddr);
> +	if (err)
> +		goto free_pkt;
> +
>   	/* Finally, create an intel_context to be used during the submission */
>   	ce = intel_context_create(engine);
>   	if (IS_ERR(ce)) {
>   		drm_err(&gt->i915->drm, "Failed creating gsccs backend ctx\n");
> -		return PTR_ERR(ce);
> +		err = PTR_ERR(ce);
> +		goto free_batch;
>   	}
> -
>   	strm_res->ce = ce;
>   
> +	/* initialize host-session-handle (for all i915-to-gsc-firmware PXP cmds) */
> +	get_random_bytes(&strm_res->host_session_handle, sizeof(strm_res->host_session_handle));

This does not guarantee that each host session handle is unique 
(although getting the same u64 twice is going to be extremely extremely 
unlikely). Not sure if it is a problem.

> +
>   	return 0;
> +
> +free_pkt:
> +	i915_vma_unpin_and_release(&strm_res->pkt_vma, I915_VMA_RELEASE_MAP);
> +free_batch:
> +	i915_vma_unpin_and_release(&strm_res->bb_vma, I915_VMA_RELEASE_MAP);

those gotos are the wrong way around, the pkt is allocated first and 
therefore it should be freed second

Daniele

> +	memset(strm_res, 0, sizeof(*strm_res));
> +
> +	return err;
>   }
>   
>   void intel_pxp_gsccs_fini(struct intel_pxp *pxp)
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h
> index 354ea9a8f940..bd1c028bc80f 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h
> @@ -10,6 +10,10 @@
>   
>   struct intel_pxp;
>   
> +#define GSC_REPLY_LATENCY_MS 200
> +#define GSC_PENDING_RETRY_MAXCOUNT 40
> +#define GSC_PENDING_RETRY_PAUSE_MS 50
> +
>   #ifdef CONFIG_DRM_I915_PXP
>   void intel_pxp_gsccs_fini(struct intel_pxp *pxp);
>   int intel_pxp_gsccs_init(struct intel_pxp *pxp);
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> index fdd98911968d..73392fbab7ee 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> @@ -38,6 +38,12 @@ struct intel_pxp {
>   	struct gsccs_session_resources {
>   		u64 host_session_handle; /* used by firmware to link commands to sessions */
>   		struct intel_context *ce; /* context for gsc command submission */
> +
> +		struct i915_vma *pkt_vma; /* GSC FW cmd packet vma */
> +		void *pkt_vaddr;  /* GSC FW cmd packet virt pointer */
> +
> +		struct i915_vma *bb_vma; /* HECI_PKT batch buffer vma */
> +		void *bb_vaddr; /* HECI_PKT batch buffer virt pointer */
>   	} gsccs_res;
>   
>   	/**