[PATCH v2 03/12] drm/xe/pxp: Add VCS inline termination support
John Harrison
john.c.harrison at intel.com
Fri Oct 4 22:25:48 UTC 2024
On 8/16/2024 12:00, Daniele Ceraolo Spurio wrote:
> The key termination is done with a specific submission to the VCS
> engine.
>
> Note that this patch is meant to be squashed with the follow-up patches
> that implement the other pieces of the termination flow. It is separate
> for now for ease of review.
>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
> .../gpu/drm/xe/instructions/xe_instr_defs.h | 1 +
> .../gpu/drm/xe/instructions/xe_mfx_commands.h | 29 +++++
> .../gpu/drm/xe/instructions/xe_mi_commands.h | 5 +
> drivers/gpu/drm/xe/xe_lrc.h | 3 +-
> drivers/gpu/drm/xe/xe_pxp_submit.c | 108 ++++++++++++++++++
> drivers/gpu/drm/xe/xe_pxp_submit.h | 2 +
> drivers/gpu/drm/xe/xe_ring_ops.c | 4 +-
> 7 files changed, 149 insertions(+), 3 deletions(-)
> create mode 100644 drivers/gpu/drm/xe/instructions/xe_mfx_commands.h
>
> diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
> index fd2ce7ace510..e559969468c4 100644
> --- a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
> +++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
> @@ -16,6 +16,7 @@
> #define XE_INSTR_CMD_TYPE GENMASK(31, 29)
> #define XE_INSTR_MI REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0)
> #define XE_INSTR_GSC REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2)
> +#define XE_INSTR_VIDEOPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
> #define XE_INSTR_GFXPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
> #define XE_INSTR_GFX_STATE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x4)
>
> diff --git a/drivers/gpu/drm/xe/instructions/xe_mfx_commands.h b/drivers/gpu/drm/xe/instructions/xe_mfx_commands.h
> new file mode 100644
> index 000000000000..686ca3b1d9e8
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/instructions/xe_mfx_commands.h
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2024 Intel Corporation
> + */
> +
> +#ifndef _XE_MFX_COMMANDS_H_
> +#define _XE_MFX_COMMANDS_H_
> +
> +#include "instructions/xe_instr_defs.h"
> +
> +#define MFX_CMD_SUBTYPE REG_GENMASK(28, 27) /* A.K.A cmd pipe */
> +#define MFX_CMD_OPCODE REG_GENMASK(26, 24)
> +#define MFX_CMD_SUB_OPCODE REG_GENMASK(23, 16)
> +#define MFX_FLAGS_AND_LEN REG_GENMASK(15, 0)
> +
> +#define XE_MFX_INSTR(subtype, op, sub_op, flags) \
> + (XE_INSTR_VIDEOPIPE | \
> + REG_FIELD_PREP(MFX_CMD_SUBTYPE, subtype) | \
> + REG_FIELD_PREP(MFX_CMD_OPCODE, op) | \
> + REG_FIELD_PREP(MFX_CMD_SUB_OPCODE, sub_op) | \
> + REG_FIELD_PREP(MFX_FLAGS_AND_LEN, flags))
> +
> +#define MFX_WAIT XE_MFX_INSTR(1, 0, 0, 0)
> +#define MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG REG_BIT(9)
> +#define MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG REG_BIT(8)
> +
> +#define CRYPTO_KEY_EXCHANGE XE_MFX_INSTR(2, 6, 9, 0)
> +
> +#endif
> diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> index 10ec2920d31b..167fb0f742de 100644
> --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> @@ -48,6 +48,7 @@
> #define MI_LRI_LEN(x) (((x) & 0xff) + 1)
>
> #define MI_FLUSH_DW __MI_INSTR(0x26)
> +#define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22)
> #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21)
> #define MI_INVALIDATE_TLB REG_BIT(18)
> #define MI_FLUSH_DW_CCS REG_BIT(16)
> @@ -66,4 +67,8 @@
>
> #define MI_BATCH_BUFFER_START __MI_INSTR(0x31)
>
> +#define MI_SET_APPID __MI_INSTR(0x0e)
> +#define MI_SET_APPID_SESSION_ID_MASK REG_GENMASK(6, 0)
> +#define MI_SET_APPID_SESSION_ID(x) REG_FIELD_PREP(MI_SET_APPID_SESSION_ID_MASK, x)
> +
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
> index c24542e89318..d411c3fbcbc6 100644
> --- a/drivers/gpu/drm/xe/xe_lrc.h
> +++ b/drivers/gpu/drm/xe/xe_lrc.h
> @@ -20,7 +20,8 @@ struct xe_lrc;
> struct xe_lrc_snapshot;
> struct xe_vm;
>
> -#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
> +#define LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR (0x34 * 4)
> +#define LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR (0x40 * 4)
>
> struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
> u32 ring_size);
> diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c
> index b777b0765c8a..3b69dcc0a00f 100644
> --- a/drivers/gpu/drm/xe/xe_pxp_submit.c
> +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c
> @@ -6,14 +6,20 @@
> #include "xe_pxp_submit.h"
>
> #include <drm/xe_drm.h>
> +#include <linux/delay.h>
>
> #include "xe_device_types.h"
> +#include "xe_bb.h"
> #include "xe_bo.h"
> #include "xe_exec_queue.h"
> #include "xe_gsc_submit.h"
> #include "xe_gt.h"
> +#include "xe_lrc.h"
> #include "xe_pxp_types.h"
> +#include "xe_sched_job.h"
> #include "xe_vm.h"
> +#include "instructions/xe_mfx_commands.h"
> +#include "instructions/xe_mi_commands.h"
> #include "regs/xe_gt_regs.h"
>
> /*
> @@ -199,3 +205,105 @@ void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp)
> destroy_vcs_execution_resources(pxp);
> }
>
> +#define emit_cmd(xe_, map_, offset_, val_) \
> + xe_map_wr(xe_, map_, (offset_) * sizeof(u32), u32, val_)
> +
> +/* stall until prior PXP and MFX/HCP/HUC objects are cmopleted */
completed
> +#define MFX_WAIT_PXP (MFX_WAIT | \
> + MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG | \
> + MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG)
Why define an XE_MFX_INSTR macro that takes a flags word only to
manually OR the flags in outside of the macro?
> +static u32 pxp_emit_wait(struct xe_device *xe, struct iosys_map *batch, u32 offset)
> +{
> + /* wait for cmds to go through */
> + emit_cmd(xe, batch, offset++, MFX_WAIT_PXP);
> + emit_cmd(xe, batch, offset++, 0);
This zero is just padding to ensure 64bit alignment of future instructions?
> +
> + return offset;
> +}
> +
> +static u32 pxp_emit_session_selection(struct xe_device *xe, struct iosys_map *batch,
> + u32 offset, u32 idx)
> +{
> + offset = pxp_emit_wait(xe, batch, offset);
> +
> + /* pxp off */
> + emit_cmd(xe, batch, offset++, MI_FLUSH_DW | MI_FLUSH_IMM_DW);
> + emit_cmd(xe, batch, offset++, 0);
> + emit_cmd(xe, batch, offset++, 0);
> + emit_cmd(xe, batch, offset++, 0);
> +
> + /* select session */
> + emit_cmd(xe, batch, offset++, MI_SET_APPID | MI_SET_APPID_SESSION_ID(idx));
> + emit_cmd(xe, batch, offset++, MFX_WAIT_PXP);
Seems odd to define a helper function to emit this instruction and then
only use it for some instances.
> +
> + /* pxp on */
> + emit_cmd(xe, batch, offset++, MI_FLUSH_DW |
> + MI_FLUSH_DW_PROTECTED_MEM_EN |
> + MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX |
> + MI_FLUSH_IMM_DW);
> + emit_cmd(xe, batch, offset++, LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR |
> + MI_FLUSH_DW_USE_GTT);
> + emit_cmd(xe, batch, offset++, 0);
> + emit_cmd(xe, batch, offset++, 0);
> +
> + offset = pxp_emit_wait(xe, batch, offset);
> +
> + return offset;
> +}
> +
> +static u32 pxp_emit_inline_termination(struct xe_device *xe,
> + struct iosys_map *batch, u32 offset)
> +{
> + /* session inline termination */
> + emit_cmd(xe, batch, offset++, CRYPTO_KEY_EXCHANGE);
> + emit_cmd(xe, batch, offset++, 0);
> +
> + return offset;
> +}
> +
> +static u32 pxp_emit_session_termination(struct xe_device *xe, struct iosys_map *batch,
> + u32 offset, u32 idx)
> +{
> + offset = pxp_emit_session_selection(xe, batch, offset, idx);
> + offset = pxp_emit_inline_termination(xe, batch, offset);
> +
> + return offset;
> +}
> +
> +/**
> + * xe_pxp_submit_session_termination - submits a PXP inline termination
> + * @pxp: the xe_pxp structure
> + * @id: the session to terminate
> + *
> + * Emit an inline termination via the VCS engine to terminate a session.
> + *
> + * Returns 0 if the submission is successful, an errno value otherwise.
> + */
> +int xe_pxp_submit_session_termination(struct xe_pxp *pxp, u32 id)
> +{
> + struct xe_sched_job *job;
> + struct dma_fence *fence;
> + long timeout;
> + u32 offset = 0;
> + u64 addr = xe_bo_ggtt_addr(pxp->vcs_exec.bo);
> +
> + offset = pxp_emit_session_termination(pxp->xe, &pxp->vcs_exec.bo->vmap, offset, id);
> + offset = pxp_emit_wait(pxp->xe, &pxp->vcs_exec.bo->vmap, offset);
> + emit_cmd(pxp->xe, &pxp->vcs_exec.bo->vmap, offset, MI_BATCH_BUFFER_END);
> +
> + job = xe_sched_job_create(pxp->vcs_exec.q, &addr);
Double space
> + if (IS_ERR(job))
> + return PTR_ERR(job);
> +
> + xe_sched_job_arm(job);
> + fence = dma_fence_get(&job->drm.s_fence->finished);
> + xe_sched_job_push(job);
> +
> + timeout = dma_fence_wait_timeout(fence, false, HZ);
> +
> + dma_fence_put(fence);
> + if (timeout <= 0)
> + return -EAGAIN;
Does it not matter what the error was? Why/how would this fail in a way
that needs to be re-tried?
Although looking at the later patches, the return value from this
function is just treated as a pass/fail bool anyway. So why bother
munging it at all?
John.
> +
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.h b/drivers/gpu/drm/xe/xe_pxp_submit.h
> index 1a971fadc081..4ee8c0acfed9 100644
> --- a/drivers/gpu/drm/xe/xe_pxp_submit.h
> +++ b/drivers/gpu/drm/xe/xe_pxp_submit.h
> @@ -13,4 +13,6 @@ struct xe_pxp;
> int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp);
> void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp);
>
> +int xe_pxp_submit_session_termination(struct xe_pxp *pxp, u32 id);
> +
> #endif /* __XE_PXP_SUBMIT_H__ */
> diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> index 0be4f489d3e1..a4b5a0f68a32 100644
> --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> @@ -118,7 +118,7 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
> dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
> MI_FLUSH_DW_STORE_INDEX;
>
> - dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
> + dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
> dw[i++] = 0;
> dw[i++] = ~0U;
>
> @@ -156,7 +156,7 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
>
> flags &= ~mask_flags;
>
> - return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_SCRATCH_ADDR, 0);
> + return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0);
> }
>
> static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
More information about the Intel-xe
mailing list