[Intel-gfx] [CI 2/3] drm/i915/execlists: Use per-process HWSP as scratch
Chris Wilson
chris at chris-wilson.co.uk
Thu Sep 26 13:31:41 UTC 2019
From: Michał Winiarski <michal.winiarski at intel.com>
Some of our commands (MI_FLUSH_DW / PIPE_CONTROL) require a post-sync write
operation to be performed. Currently we're using dedicated VMA for
PIPE_CONTROL and global HWSP for MI_FLUSH_DW.
On execlists platforms, each of our contexts has an area that can be
used as scratch space. Let's use that instead.
Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 --
drivers/gpu/drm/i915/gt/intel_lrc.c | 45 +++++++-----------------
drivers/gpu/drm/i915/gt/intel_lrc.h | 4 +++
3 files changed, 17 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 3039cef64b11..e64db4c13df6 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -89,9 +89,6 @@ enum intel_gt_scratch_field {
/* 8 bytes */
INTEL_GT_SCRATCH_FIELD_DEFAULT = 0,
- /* 8 bytes */
- INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA = 128,
-
/* 8 bytes */
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index ab725a6ca0ac..fa385218ce92 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2308,12 +2308,6 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
return batch;
}
-static u32 slm_offset(struct intel_engine_cs *engine)
-{
- return intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA);
-}
-
/*
* Typically we only have one indirect_ctx and per_ctx batch buffer which are
* initialized at the beginning and shared across all contexts but this field
@@ -2342,10 +2336,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* Actual scratch location is at 128 bytes offset */
batch = gen8_emit_pipe_control(batch,
PIPE_CONTROL_FLUSH_L3 |
- PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_STORE_DATA_INDEX |
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_QW_WRITE,
- slm_offset(engine));
+ LRC_PPHWSP_SCRATCH_ADDR);
*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -3052,7 +3046,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
}
*cs++ = cmd;
- *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
intel_ring_advance(request, cs);
@@ -3063,10 +3057,6 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
static int gen8_emit_flush_render(struct i915_request *request,
u32 mode)
{
- struct intel_engine_cs *engine = request->engine;
- u32 scratch_addr =
- intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
bool vf_flush_wa = false, dc_flush_wa = false;
u32 *cs, flags = 0;
int len;
@@ -3088,7 +3078,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
/*
* On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
@@ -3121,7 +3111,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
0);
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
if (dc_flush_wa)
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
@@ -3134,11 +3124,6 @@ static int gen8_emit_flush_render(struct i915_request *request,
static int gen11_emit_flush_render(struct i915_request *request,
u32 mode)
{
- struct intel_engine_cs *engine = request->engine;
- const u32 scratch_addr =
- intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
-
if (mode & EMIT_FLUSH) {
u32 *cs;
u32 flags = 0;
@@ -3151,13 +3136,13 @@ static int gen11_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
cs = intel_ring_begin(request, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(request, cs);
}
@@ -3175,13 +3160,13 @@ static int gen11_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
cs = intel_ring_begin(request, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(request, cs);
}
@@ -3196,10 +3181,6 @@ static u32 preparser_disable(bool state)
static int gen12_emit_flush_render(struct i915_request *request,
u32 mode)
{
- const u32 scratch_addr =
- intel_gt_scratch_offset(request->engine->gt,
- INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
-
if (mode & EMIT_FLUSH) {
u32 flags = 0;
u32 *cs;
@@ -3210,7 +3191,7 @@ static int gen12_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_CS_STALL;
@@ -3219,7 +3200,7 @@ static int gen12_emit_flush_render(struct i915_request *request,
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(request, cs);
}
@@ -3235,7 +3216,7 @@ static int gen12_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_CS_STALL;
@@ -3251,7 +3232,7 @@ static int gen12_emit_flush_render(struct i915_request *request,
*/
*cs++ = preparser_disable(true);
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
*cs++ = preparser_disable(false);
intel_ring_advance(request, cs);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index dc0252e0589e..66ac616361c1 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -104,6 +104,10 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine);
*/
#define LRC_HEADER_PAGES LRC_PPHWSP_PN
+/* Space within PPHWSP reserved to be used as scratch */
+#define LRC_PPHWSP_SCRATCH 0x34
+#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32))
+
void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
void intel_lr_context_reset(struct intel_engine_cs *engine,
--
2.23.0
More information about the Intel-gfx
mailing list