[PATCH v2 03/12] drm/xe/pxp: Add VCS inline termination support
John Harrison
john.c.harrison at intel.com
Thu Nov 14 18:46:31 UTC 2024
On 11/6/2024 15:49, Daniele Ceraolo Spurio wrote:
> On 10/4/24 15:25, John Harrison wrote:
>> On 8/16/2024 12:00, Daniele Ceraolo Spurio wrote:
>>> The key termination is done with a specific submission to the VCS
>>> engine.
>>>
>>> Note that this patch is meant to be squashed with the follow-up patches
>>> that implement the other pieces of the termination flow. It is separate
>>> for now for ease of review.
>>>
>>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>>> ---
>>> .../gpu/drm/xe/instructions/xe_instr_defs.h | 1 +
>>> .../gpu/drm/xe/instructions/xe_mfx_commands.h | 29 +++++
>>> .../gpu/drm/xe/instructions/xe_mi_commands.h | 5 +
>>> drivers/gpu/drm/xe/xe_lrc.h | 3 +-
>>> drivers/gpu/drm/xe/xe_pxp_submit.c | 108
>>> ++++++++++++++++++
>>> drivers/gpu/drm/xe/xe_pxp_submit.h | 2 +
>>> drivers/gpu/drm/xe/xe_ring_ops.c | 4 +-
>>> 7 files changed, 149 insertions(+), 3 deletions(-)
>>> create mode 100644 drivers/gpu/drm/xe/instructions/xe_mfx_commands.h
>>>
>>> diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
>>> b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
>>> index fd2ce7ace510..e559969468c4 100644
>>> --- a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
>>> +++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
>>> @@ -16,6 +16,7 @@
>>> #define XE_INSTR_CMD_TYPE GENMASK(31, 29)
>>> #define XE_INSTR_MI REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0)
>>> #define XE_INSTR_GSC REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2)
>>> +#define XE_INSTR_VIDEOPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
>>> #define XE_INSTR_GFXPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
>>> #define XE_INSTR_GFX_STATE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x4)
>>> diff --git a/drivers/gpu/drm/xe/instructions/xe_mfx_commands.h
>>> b/drivers/gpu/drm/xe/instructions/xe_mfx_commands.h
>>> new file mode 100644
>>> index 000000000000..686ca3b1d9e8
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/xe/instructions/xe_mfx_commands.h
>>> @@ -0,0 +1,29 @@
>>> +/* SPDX-License-Identifier: MIT */
>>> +/*
>>> + * Copyright © 2024 Intel Corporation
>>> + */
>>> +
>>> +#ifndef _XE_MFX_COMMANDS_H_
>>> +#define _XE_MFX_COMMANDS_H_
>>> +
>>> +#include "instructions/xe_instr_defs.h"
>>> +
>>> +#define MFX_CMD_SUBTYPE REG_GENMASK(28, 27) /* A.K.A cmd
>>> pipe */
>>> +#define MFX_CMD_OPCODE REG_GENMASK(26, 24)
>>> +#define MFX_CMD_SUB_OPCODE REG_GENMASK(23, 16)
>>> +#define MFX_FLAGS_AND_LEN REG_GENMASK(15, 0)
>>> +
>>> +#define XE_MFX_INSTR(subtype, op, sub_op, flags) \
>>> + (XE_INSTR_VIDEOPIPE | \
>>> + REG_FIELD_PREP(MFX_CMD_SUBTYPE, subtype) | \
>>> + REG_FIELD_PREP(MFX_CMD_OPCODE, op) | \
>>> + REG_FIELD_PREP(MFX_CMD_SUB_OPCODE, sub_op) | \
>>> + REG_FIELD_PREP(MFX_FLAGS_AND_LEN, flags))
>>> +
>>> +#define MFX_WAIT XE_MFX_INSTR(1, 0, 0, 0)
>>> +#define MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG REG_BIT(9)
>>> +#define MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG REG_BIT(8)
>>> +
>>> +#define CRYPTO_KEY_EXCHANGE XE_MFX_INSTR(2, 6, 9, 0)
>>> +
>>> +#endif
>>> diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>>> b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>>> index 10ec2920d31b..167fb0f742de 100644
>>> --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>>> +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>>> @@ -48,6 +48,7 @@
>>> #define MI_LRI_LEN(x) (((x) & 0xff) + 1)
>>> #define MI_FLUSH_DW __MI_INSTR(0x26)
>>> +#define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22)
>>> #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21)
>>> #define MI_INVALIDATE_TLB REG_BIT(18)
>>> #define MI_FLUSH_DW_CCS REG_BIT(16)
>>> @@ -66,4 +67,8 @@
>>> #define MI_BATCH_BUFFER_START __MI_INSTR(0x31)
>>> +#define MI_SET_APPID __MI_INSTR(0x0e)
>>> +#define MI_SET_APPID_SESSION_ID_MASK REG_GENMASK(6, 0)
>>> +#define MI_SET_APPID_SESSION_ID(x)
>>> REG_FIELD_PREP(MI_SET_APPID_SESSION_ID_MASK, x)
>>> +
>>> #endif
>>> diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
>>> index c24542e89318..d411c3fbcbc6 100644
>>> --- a/drivers/gpu/drm/xe/xe_lrc.h
>>> +++ b/drivers/gpu/drm/xe/xe_lrc.h
>>> @@ -20,7 +20,8 @@ struct xe_lrc;
>>> struct xe_lrc_snapshot;
>>> struct xe_vm;
>>> -#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
>>> +#define LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR (0x34 * 4)
>>> +#define LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR (0x40 * 4)
>>> struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct
>>> xe_vm *vm,
>>> u32 ring_size);
>>> diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c
>>> b/drivers/gpu/drm/xe/xe_pxp_submit.c
>>> index b777b0765c8a..3b69dcc0a00f 100644
>>> --- a/drivers/gpu/drm/xe/xe_pxp_submit.c
>>> +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c
>>> @@ -6,14 +6,20 @@
>>> #include "xe_pxp_submit.h"
>>> #include <drm/xe_drm.h>
>>> +#include <linux/delay.h>
>>> #include "xe_device_types.h"
>>> +#include "xe_bb.h"
>>> #include "xe_bo.h"
>>> #include "xe_exec_queue.h"
>>> #include "xe_gsc_submit.h"
>>> #include "xe_gt.h"
>>> +#include "xe_lrc.h"
>>> #include "xe_pxp_types.h"
>>> +#include "xe_sched_job.h"
>>> #include "xe_vm.h"
>>> +#include "instructions/xe_mfx_commands.h"
>>> +#include "instructions/xe_mi_commands.h"
>>> #include "regs/xe_gt_regs.h"
>>> /*
>>> @@ -199,3 +205,105 @@ void xe_pxp_destroy_execution_resources(struct
>>> xe_pxp *pxp)
>>> destroy_vcs_execution_resources(pxp);
>>> }
>>> +#define emit_cmd(xe_, map_, offset_, val_) \
>>> + xe_map_wr(xe_, map_, (offset_) * sizeof(u32), u32, val_)
>>> +
>>> +/* stall until prior PXP and MFX/HCP/HUC objects are cmopleted */
>> completed
>>
>>> +#define MFX_WAIT_PXP (MFX_WAIT | \
>>> + MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG | \
>>> + MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG)
>> Why define an XE_MFX_INSTR macro that takes a flags word only to
>> manually OR the flags in outside of the macro?
>
> Maybe flags is a misnomer here, but I couldn't think of anything
> clearer. Some commands have bits that are always set within that flags
> field, so for those we would add them at definition time; other
> commands, like MFX_WAIT, do instead have optional flags, which we can
> set as needed in the code.
>
> Given that I haven't defined any XE_MFX_INSTR command that requires a
> value in the flags field, I'll just remove that field from the
> XE_MFX_INSTR macro for now to make things clearer.
>
>
>>
>>> +static u32 pxp_emit_wait(struct xe_device *xe, struct iosys_map
>>> *batch, u32 offset)
>>> +{
>>> + /* wait for cmds to go through */
>>> + emit_cmd(xe, batch, offset++, MFX_WAIT_PXP);
>>> + emit_cmd(xe, batch, offset++, 0);
>> This zero is just padding to ensure 64bit alignment of future
>> instructions?
>
> yes
>
>
>>
>>> +
>>> + return offset;
>>> +}
>>> +
>>> +static u32 pxp_emit_session_selection(struct xe_device *xe, struct
>>> iosys_map *batch,
>>> + u32 offset, u32 idx)
>>> +{
>>> + offset = pxp_emit_wait(xe, batch, offset);
>>> +
>>> + /* pxp off */
>>> + emit_cmd(xe, batch, offset++, MI_FLUSH_DW | MI_FLUSH_IMM_DW);
>>> + emit_cmd(xe, batch, offset++, 0);
>>> + emit_cmd(xe, batch, offset++, 0);
>>> + emit_cmd(xe, batch, offset++, 0);
>>> +
>>> + /* select session */
>>> + emit_cmd(xe, batch, offset++, MI_SET_APPID |
>>> MI_SET_APPID_SESSION_ID(idx));
>>> + emit_cmd(xe, batch, offset++, MFX_WAIT_PXP);
>> Seems odd to define a helper function to emit this instruction and
>> then only use it for some instances.
>
>
> I didn't want the extra padding here or to make the function
> conditionally add it only when needed. I'll just add it, it's not like
> we don't have enough memory (the BO is 1 page and we only use a few
> dwords for the termination).
I guess what we really want is an emit function that looks at the size
of the instruction being added and pre-pads as appropriate. Then you
don't need to worry about optional padding after an instruction
depending upon what may or may not happen next! But probably not worth
the complication. As you say, a few extra words of padding in a
non-critical buffer is no biggie.
John.
>
>
>>
>>> +
>>> + /* pxp on */
>>> + emit_cmd(xe, batch, offset++, MI_FLUSH_DW |
>>> + MI_FLUSH_DW_PROTECTED_MEM_EN |
>>> + MI_FLUSH_DW_OP_STOREDW |
>>> MI_FLUSH_DW_STORE_INDEX |
>>> + MI_FLUSH_IMM_DW);
>>> + emit_cmd(xe, batch, offset++, LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR |
>>> + MI_FLUSH_DW_USE_GTT);
>>> + emit_cmd(xe, batch, offset++, 0);
>>> + emit_cmd(xe, batch, offset++, 0);
>>> +
>>> + offset = pxp_emit_wait(xe, batch, offset);
>>> +
>>> + return offset;
>>> +}
>>> +
>>> +static u32 pxp_emit_inline_termination(struct xe_device *xe,
>>> + struct iosys_map *batch, u32 offset)
>>> +{
>>> + /* session inline termination */
>>> + emit_cmd(xe, batch, offset++, CRYPTO_KEY_EXCHANGE);
>>> + emit_cmd(xe, batch, offset++, 0);
>>> +
>>> + return offset;
>>> +}
>>> +
>>> +static u32 pxp_emit_session_termination(struct xe_device *xe,
>>> struct iosys_map *batch,
>>> + u32 offset, u32 idx)
>>> +{
>>> + offset = pxp_emit_session_selection(xe, batch, offset, idx);
>>> + offset = pxp_emit_inline_termination(xe, batch, offset);
>>> +
>>> + return offset;
>>> +}
>>> +
>>> +/**
>>> + * xe_pxp_submit_session_termination - submits a PXP inline
>>> termination
>>> + * @pxp: the xe_pxp structure
>>> + * @id: the session to terminate
>>> + *
>>> + * Emit an inline termination via the VCS engine to terminate a
>>> session.
>>> + *
>>> + * Returns 0 if the submission is successful, an errno value
>>> otherwise.
>>> + */
>>> +int xe_pxp_submit_session_termination(struct xe_pxp *pxp, u32 id)
>>> +{
>>> + struct xe_sched_job *job;
>>> + struct dma_fence *fence;
>>> + long timeout;
>>> + u32 offset = 0;
>>> + u64 addr = xe_bo_ggtt_addr(pxp->vcs_exec.bo);
>>> +
>>> + offset = pxp_emit_session_termination(pxp->xe,
>>> &pxp->vcs_exec.bo->vmap, offset, id);
>>> + offset = pxp_emit_wait(pxp->xe, &pxp->vcs_exec.bo->vmap, offset);
>>> + emit_cmd(pxp->xe, &pxp->vcs_exec.bo->vmap, offset,
>>> MI_BATCH_BUFFER_END);
>>> +
>>> + job = xe_sched_job_create(pxp->vcs_exec.q, &addr);
>> Double space
>>
>>> + if (IS_ERR(job))
>>> + return PTR_ERR(job);
>>> +
>>> + xe_sched_job_arm(job);
>>> + fence = dma_fence_get(&job->drm.s_fence->finished);
>>> + xe_sched_job_push(job);
>>> +
>>> + timeout = dma_fence_wait_timeout(fence, false, HZ);
>>> +
>>> + dma_fence_put(fence);
>>> + if (timeout <= 0)
>>> + return -EAGAIN;
>> Does it not matter what the error was? Why/how would this fail in a
>> way that needs to be re-tried?
>>
>> Although looking at the later patches, the return value from this
>> function is just treated as a pass/fail bool anyway. So why bother
>> munging it at all?
>
>
> Timeout can be 0 on failure, so can't return that directly. I'll
> change it to return timeout if negative and an error if it actually
> timed out.
>
> Daniele
>
>
>>
>> John.
>>
>>> +
>>> + return 0;
>>> +}
>>> diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.h
>>> b/drivers/gpu/drm/xe/xe_pxp_submit.h
>>> index 1a971fadc081..4ee8c0acfed9 100644
>>> --- a/drivers/gpu/drm/xe/xe_pxp_submit.h
>>> +++ b/drivers/gpu/drm/xe/xe_pxp_submit.h
>>> @@ -13,4 +13,6 @@ struct xe_pxp;
>>> int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp);
>>> void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp);
>>> +int xe_pxp_submit_session_termination(struct xe_pxp *pxp, u32 id);
>>> +
>>> #endif /* __XE_PXP_SUBMIT_H__ */
>>> diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c
>>> b/drivers/gpu/drm/xe/xe_ring_ops.c
>>> index 0be4f489d3e1..a4b5a0f68a32 100644
>>> --- a/drivers/gpu/drm/xe/xe_ring_ops.c
>>> +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
>>> @@ -118,7 +118,7 @@ static int emit_flush_invalidate(u32 flag, u32
>>> *dw, int i)
>>> dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
>>> MI_FLUSH_IMM_DW |
>>> MI_FLUSH_DW_STORE_INDEX;
>>> - dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
>>> + dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR |
>>> MI_FLUSH_DW_USE_GTT;
>>> dw[i++] = 0;
>>> dw[i++] = ~0U;
>>> @@ -156,7 +156,7 @@ static int emit_pipe_invalidate(u32
>>> mask_flags, bool invalidate_tlb, u32 *dw,
>>> flags &= ~mask_flags;
>>> - return emit_pipe_control(dw, i, 0, flags,
>>> LRC_PPHWSP_SCRATCH_ADDR, 0);
>>> + return emit_pipe_control(dw, i, 0, flags,
>>> LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0);
>>> }
>>> static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
>>
More information about the Intel-xe
mailing list