[PATCH] drm/xe: use non-posted writes for user fence signalling
Andrzej Hajda
andrzej.hajda at intel.com
Mon Jun 3 08:06:20 UTC 2024
Moving flush before user fence signalling fixes issue with prior writes not
visible when fence is signalled. The change, however, should be also
accompanied with updating signalling to non-posted write, there is no
need for flush, as writes are coherent with CPU cache.
Fixes: 38007fa96419 ("drm/xe: flush gtt before signalling user fence on all engines")
Signed-off-by: Andrzej Hajda <andrzej.hajda at intel.com>
---
drivers/gpu/drm/xe/instructions/xe_mi_commands.h | 1 +
drivers/gpu/drm/xe/xe_ring_ops.c | 23 +++++++++++------------
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index c74ceb550dce..0712a8dee0e2 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -35,6 +35,7 @@
#define MI_STORE_DATA_IMM __MI_INSTR(0x20)
#define MI_SDI_GGTT REG_BIT(22)
+#define MI_SDI_FORCE_WRITE_COMPLETION REG_BIT(10)
#define MI_SDI_LEN_DW GENMASK(9, 0)
#define MI_SDI_NUM_DW(x) REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2)
#define MI_SDI_NUM_QW(x) (REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 2a607c141d65..00b2cbc15284 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -149,10 +149,9 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_SCRATCH_ADDR, 0);
}
-static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
- u32 *dw, int i)
+static int emit_store_imm_ppgtt(u64 addr, u64 value, u32 flags, u32 *dw, int i)
{
- dw[i++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(1);
+ dw[i++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(1) | flags;
dw[i++] = lower_32_bits(addr);
dw[i++] = upper_32_bits(addr);
dw[i++] = lower_32_bits(value);
@@ -237,9 +236,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
if (job->user_fence.used)
- i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
- job->user_fence.value,
- dw, i);
+ i = emit_store_imm_ppgtt(job->user_fence.addr,
+ job->user_fence.value,
+ MI_SDI_FORCE_WRITE_COMPLETION, dw, i);
i = emit_user_interrupt(dw, i);
@@ -296,9 +295,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
if (job->user_fence.used)
- i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
- job->user_fence.value,
- dw, i);
+ i = emit_store_imm_ppgtt(job->user_fence.addr,
+ job->user_fence.value,
+ MI_SDI_FORCE_WRITE_COMPLETION, dw, i);
i = emit_user_interrupt(dw, i);
@@ -341,9 +340,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
i = emit_render_cache_flush(job, dw, i);
if (job->user_fence.used)
- i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
- job->user_fence.value,
- dw, i);
+ i = emit_store_imm_ppgtt(job->user_fence.addr,
+ job->user_fence.value,
+ 0, dw, i);
i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i);
---
base-commit: fe3d637a9c72b22297da0c731fa5e217bd182d2d
change-id: 20240603-fix_user_fence_posted-ca56c79c0662
Best regards,
--
Andrzej Hajda <andrzej.hajda at intel.com>
More information about the Intel-xe
mailing list