[PATCH v2 03/12] drm/xe/pxp: Add VCS inline termination support

John Harrison john.c.harrison at intel.com
Fri Oct 4 22:25:48 UTC 2024


On 8/16/2024 12:00, Daniele Ceraolo Spurio wrote:
> The key termination is done with a specific submission to the VCS
> engine.
>
> Note that this patch is meant to be squashed with the follow-up patches
> that implement the other pieces of the termination flow. It is separate
> for now for ease of review.
>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
>   .../gpu/drm/xe/instructions/xe_instr_defs.h   |   1 +
>   .../gpu/drm/xe/instructions/xe_mfx_commands.h |  29 +++++
>   .../gpu/drm/xe/instructions/xe_mi_commands.h  |   5 +
>   drivers/gpu/drm/xe/xe_lrc.h                   |   3 +-
>   drivers/gpu/drm/xe/xe_pxp_submit.c            | 108 ++++++++++++++++++
>   drivers/gpu/drm/xe/xe_pxp_submit.h            |   2 +
>   drivers/gpu/drm/xe/xe_ring_ops.c              |   4 +-
>   7 files changed, 149 insertions(+), 3 deletions(-)
>   create mode 100644 drivers/gpu/drm/xe/instructions/xe_mfx_commands.h
>
> diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
> index fd2ce7ace510..e559969468c4 100644
> --- a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
> +++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
> @@ -16,6 +16,7 @@
>   #define XE_INSTR_CMD_TYPE		GENMASK(31, 29)
>   #define   XE_INSTR_MI			REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0)
>   #define   XE_INSTR_GSC			REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2)
> +#define   XE_INSTR_VIDEOPIPE		REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
>   #define   XE_INSTR_GFXPIPE		REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
>   #define   XE_INSTR_GFX_STATE		REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x4)
>   
> diff --git a/drivers/gpu/drm/xe/instructions/xe_mfx_commands.h b/drivers/gpu/drm/xe/instructions/xe_mfx_commands.h
> new file mode 100644
> index 000000000000..686ca3b1d9e8
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/instructions/xe_mfx_commands.h
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2024 Intel Corporation
> + */
> +
> +#ifndef _XE_MFX_COMMANDS_H_
> +#define _XE_MFX_COMMANDS_H_
> +
> +#include "instructions/xe_instr_defs.h"
> +
> +#define MFX_CMD_SUBTYPE		REG_GENMASK(28, 27) /* A.K.A cmd pipe */
> +#define MFX_CMD_OPCODE		REG_GENMASK(26, 24)
> +#define MFX_CMD_SUB_OPCODE	REG_GENMASK(23, 16)
> +#define MFX_FLAGS_AND_LEN	REG_GENMASK(15, 0)
> +
> +#define XE_MFX_INSTR(subtype, op, sub_op, flags) \
> +	(XE_INSTR_VIDEOPIPE | \
> +	 REG_FIELD_PREP(MFX_CMD_SUBTYPE, subtype) | \
> +	 REG_FIELD_PREP(MFX_CMD_OPCODE, op) | \
> +	 REG_FIELD_PREP(MFX_CMD_SUB_OPCODE, sub_op) | \
> +	 REG_FIELD_PREP(MFX_FLAGS_AND_LEN, flags))
> +
> +#define MFX_WAIT				XE_MFX_INSTR(1, 0, 0, 0)
> +#define MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG	REG_BIT(9)
> +#define MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG	REG_BIT(8)
> +
> +#define CRYPTO_KEY_EXCHANGE			XE_MFX_INSTR(2, 6, 9, 0)
> +
> +#endif
> diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> index 10ec2920d31b..167fb0f742de 100644
> --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> @@ -48,6 +48,7 @@
>   #define   MI_LRI_LEN(x)			(((x) & 0xff) + 1)
>   
>   #define MI_FLUSH_DW			__MI_INSTR(0x26)
> +#define   MI_FLUSH_DW_PROTECTED_MEM_EN	REG_BIT(22)
>   #define   MI_FLUSH_DW_STORE_INDEX	REG_BIT(21)
>   #define   MI_INVALIDATE_TLB		REG_BIT(18)
>   #define   MI_FLUSH_DW_CCS		REG_BIT(16)
> @@ -66,4 +67,8 @@
>   
>   #define MI_BATCH_BUFFER_START		__MI_INSTR(0x31)
>   
> +#define MI_SET_APPID			__MI_INSTR(0x0e)
> +#define MI_SET_APPID_SESSION_ID_MASK	REG_GENMASK(6, 0)
> +#define MI_SET_APPID_SESSION_ID(x)	REG_FIELD_PREP(MI_SET_APPID_SESSION_ID_MASK, x)
> +
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
> index c24542e89318..d411c3fbcbc6 100644
> --- a/drivers/gpu/drm/xe/xe_lrc.h
> +++ b/drivers/gpu/drm/xe/xe_lrc.h
> @@ -20,7 +20,8 @@ struct xe_lrc;
>   struct xe_lrc_snapshot;
>   struct xe_vm;
>   
> -#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
> +#define LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR (0x34 * 4)
> +#define LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR (0x40 * 4)
>   
>   struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
>   			     u32 ring_size);
> diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c
> index b777b0765c8a..3b69dcc0a00f 100644
> --- a/drivers/gpu/drm/xe/xe_pxp_submit.c
> +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c
> @@ -6,14 +6,20 @@
>   #include "xe_pxp_submit.h"
>   
>   #include <drm/xe_drm.h>
> +#include <linux/delay.h>
>   
>   #include "xe_device_types.h"
> +#include "xe_bb.h"
>   #include "xe_bo.h"
>   #include "xe_exec_queue.h"
>   #include "xe_gsc_submit.h"
>   #include "xe_gt.h"
> +#include "xe_lrc.h"
>   #include "xe_pxp_types.h"
> +#include "xe_sched_job.h"
>   #include "xe_vm.h"
> +#include "instructions/xe_mfx_commands.h"
> +#include "instructions/xe_mi_commands.h"
>   #include "regs/xe_gt_regs.h"
>   
>   /*
> @@ -199,3 +205,105 @@ void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp)
>   	destroy_vcs_execution_resources(pxp);
>   }
>   
> +#define emit_cmd(xe_, map_, offset_, val_) \
> +	xe_map_wr(xe_, map_, (offset_) * sizeof(u32), u32, val_)
> +
> +/* stall until prior PXP and MFX/HCP/HUC objects are cmopleted */
completed

> +#define MFX_WAIT_PXP (MFX_WAIT | \
> +		      MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG | \
> +		      MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG)
Why define an XE_MFX_INSTR macro that takes a flags word only to 
manually OR the flags in outside of the macro?

> +static u32 pxp_emit_wait(struct xe_device *xe, struct iosys_map *batch, u32 offset)
> +{
> +	/* wait for cmds to go through */
> +	emit_cmd(xe, batch, offset++, MFX_WAIT_PXP);
> +	emit_cmd(xe, batch, offset++, 0);
This zero is just padding to ensure 64bit alignment of future instructions?

> +
> +	return offset;
> +}
> +
> +static u32 pxp_emit_session_selection(struct xe_device *xe, struct iosys_map *batch,
> +				      u32 offset, u32 idx)
> +{
> +	offset = pxp_emit_wait(xe, batch, offset);
> +
> +	/* pxp off */
> +	emit_cmd(xe, batch, offset++, MI_FLUSH_DW | MI_FLUSH_IMM_DW);
> +	emit_cmd(xe, batch, offset++, 0);
> +	emit_cmd(xe, batch, offset++, 0);
> +	emit_cmd(xe, batch, offset++, 0);
> +
> +	/* select session */
> +	emit_cmd(xe, batch, offset++, MI_SET_APPID | MI_SET_APPID_SESSION_ID(idx));
> +	emit_cmd(xe, batch, offset++, MFX_WAIT_PXP);
Seems odd to define a helper function to emit this instruction and then 
only use it for some instances.

> +
> +	/* pxp on */
> +	emit_cmd(xe, batch, offset++, MI_FLUSH_DW |
> +				      MI_FLUSH_DW_PROTECTED_MEM_EN |
> +				      MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX |
> +				      MI_FLUSH_IMM_DW);
> +	emit_cmd(xe, batch, offset++, LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR |
> +				      MI_FLUSH_DW_USE_GTT);
> +	emit_cmd(xe, batch, offset++, 0);
> +	emit_cmd(xe, batch, offset++, 0);
> +
> +	offset = pxp_emit_wait(xe, batch, offset);
> +
> +	return offset;
> +}
> +
> +static u32 pxp_emit_inline_termination(struct xe_device *xe,
> +				       struct iosys_map *batch, u32 offset)
> +{
> +	/* session inline termination */
> +	emit_cmd(xe, batch, offset++, CRYPTO_KEY_EXCHANGE);
> +	emit_cmd(xe, batch, offset++, 0);
> +
> +	return offset;
> +}
> +
> +static u32 pxp_emit_session_termination(struct xe_device *xe, struct iosys_map *batch,
> +					u32 offset, u32 idx)
> +{
> +	offset = pxp_emit_session_selection(xe, batch, offset, idx);
> +	offset = pxp_emit_inline_termination(xe, batch, offset);
> +
> +	return offset;
> +}
> +
> +/**
> + * xe_pxp_submit_session_termination - submits a PXP inline termination
> + * @pxp: the xe_pxp structure
> + * @id: the session to terminate
> + *
> + * Emit an inline termination via the VCS engine to terminate a session.
> + *
> + * Returns 0 if the submission is successful, an errno value otherwise.
> + */
> +int xe_pxp_submit_session_termination(struct xe_pxp *pxp, u32 id)
> +{
> +	struct xe_sched_job *job;
> +	struct dma_fence *fence;
> +	long timeout;
> +	u32 offset = 0;
> +	u64 addr = xe_bo_ggtt_addr(pxp->vcs_exec.bo);
> +
> +	offset = pxp_emit_session_termination(pxp->xe, &pxp->vcs_exec.bo->vmap, offset, id);
> +	offset = pxp_emit_wait(pxp->xe, &pxp->vcs_exec.bo->vmap, offset);
> +	emit_cmd(pxp->xe, &pxp->vcs_exec.bo->vmap, offset, MI_BATCH_BUFFER_END);
> +
> +	job =  xe_sched_job_create(pxp->vcs_exec.q, &addr);
Double space

> +	if (IS_ERR(job))
> +		return PTR_ERR(job);
> +
> +	xe_sched_job_arm(job);
> +	fence = dma_fence_get(&job->drm.s_fence->finished);
> +	xe_sched_job_push(job);
> +
> +	timeout = dma_fence_wait_timeout(fence, false, HZ);
> +
> +	dma_fence_put(fence);
> +	if (timeout <= 0)
> +		return -EAGAIN;
Does it not matter what the error was? Why/how would this fail in a way 
that needs to be re-tried?

Although looking at the later patches, the return value from this 
function is just treated as a pass/fail bool anyway. So why bother 
munging it at all?

John.

> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.h b/drivers/gpu/drm/xe/xe_pxp_submit.h
> index 1a971fadc081..4ee8c0acfed9 100644
> --- a/drivers/gpu/drm/xe/xe_pxp_submit.h
> +++ b/drivers/gpu/drm/xe/xe_pxp_submit.h
> @@ -13,4 +13,6 @@ struct xe_pxp;
>   int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp);
>   void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp);
>   
> +int xe_pxp_submit_session_termination(struct xe_pxp *pxp, u32 id);
> +
>   #endif /* __XE_PXP_SUBMIT_H__ */
> diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> index 0be4f489d3e1..a4b5a0f68a32 100644
> --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> @@ -118,7 +118,7 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
>   	dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
>   		MI_FLUSH_DW_STORE_INDEX;
>   
> -	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
> +	dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
>   	dw[i++] = 0;
>   	dw[i++] = ~0U;
>   
> @@ -156,7 +156,7 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
>   
>   	flags &= ~mask_flags;
>   
> -	return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_SCRATCH_ADDR, 0);
> +	return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0);
>   }
>   
>   static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,



More information about the Intel-xe mailing list