[PATCH 7/8] drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
Chris Wilson
chris at chris-wilson.co.uk
Tue Dec 18 12:06:40 UTC 2018
In preparation for removing the manual EMIT_FLUSH prior to emitting the
breadcrumb implement the flush inline with writing the breadcrumb for
execlists.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/intel_ringbuffer.c | 119 ++++++++++++++++++------
1 file changed, 89 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d5a9edbcf1be..dd078ed3bd06 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -300,6 +300,37 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
return 0;
}
+static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ /* First we do the post_sync_nonzero_flush w/a */
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ *cs++ = 0;
+ *cs++ = 0; /* low dword */
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_QW_WRITE;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ /* Finally we can flush and emit the breadcrumb */
+ *cs++ = GFX_OP_PIPE_CONTROL(5);
+ *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_CS_STALL);
+ *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = rq->global_seqno;
+ *cs++ = 0;
+
+ *cs++ = MI_USER_INTERRUPT;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+}
+static const int gen6_rcs_emit_breadcrumb_sz = 14;
+
static int
gen7_render_ring_cs_stall_wa(struct i915_request *rq)
{
@@ -379,6 +410,39 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
return 0;
}
+static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ *cs++ = GFX_OP_PIPE_CONTROL(5);
+ *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_CS_STALL);
+ *cs++ = intel_hws_seqno_address(rq->engine);
+ *cs++ = rq->global_seqno;
+ *cs++ = 0;
+
+ *cs++ = MI_USER_INTERRUPT;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+}
+static const int gen7_rcs_emit_breadcrumb_sz = 6;
+
+static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
+ *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
+ *cs++ = rq->global_seqno;
+ *cs++ = MI_USER_INTERRUPT;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+}
+static const int gen6_xcs_emit_breadcrumb_sz = 4;
+
static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
{
/*
@@ -777,16 +841,19 @@ static void i9xx_submit_request(struct i915_request *request)
static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
+ *cs++ = MI_FLUSH;
*cs++ = MI_STORE_DWORD_INDEX;
*cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
*cs++ = rq->global_seqno;
+
+ *cs++ = MI_FLUSH;
*cs++ = MI_USER_INTERRUPT;
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
}
-static const int i9xx_emit_breadcrumb_sz = 4;
+static const int i9xx_emit_breadcrumb_sz = 6;
static void
gen5_seqno_barrier(struct intel_engine_cs *engine)
@@ -806,31 +873,6 @@ gen5_seqno_barrier(struct intel_engine_cs *engine)
usleep_range(125, 250);
}
-static void
-gen6_seqno_barrier(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- /* Workaround to force correct ordering between irq and seqno writes on
- * ivb (and maybe also on snb) by reading from a CS register (like
- * ACTHD) before reading the status page.
- *
- * Note that this effectively stalls the read by the time it takes to
- * do a memory transaction, which more or less ensures that the write
- * from the GPU has sufficient time to invalidate the CPU cacheline.
- * Alternatively we could delay the interrupt from the CS ring to give
- * the write time to land, but that would incur a delay after every
- * batch i.e. much more frequent than a delay when waiting for the
- * interrupt (with the same net latency).
- *
- * Also note that to prevent whole machine hangs on gen7, we have to
- * take the spinlock to guard against concurrent cacheline access.
- */
- spin_lock_irq(&dev_priv->uncore.lock);
- POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
- spin_unlock_irq(&dev_priv->uncore.lock);
-}
-
static void
gen5_irq_enable(struct intel_engine_cs *engine)
{
@@ -2050,7 +2092,6 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
if (INTEL_GEN(dev_priv) >= 6) {
engine->irq_enable = gen6_irq_enable;
engine->irq_disable = gen6_irq_disable;
- engine->irq_seqno_barrier = gen6_seqno_barrier;
} else if (INTEL_GEN(dev_priv) >= 5) {
engine->irq_enable = gen5_irq_enable;
engine->irq_disable = gen5_irq_disable;
@@ -2122,11 +2163,16 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
- if (INTEL_GEN(dev_priv) >= 6) {
+ if (INTEL_GEN(dev_priv) >= 7) {
engine->init_context = intel_rcs_ctx_init;
engine->emit_flush = gen7_render_ring_flush;
- if (IS_GEN(dev_priv, 6))
- engine->emit_flush = gen6_render_ring_flush;
+ engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb;
+ engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
+ } else if (IS_GEN(dev_priv, 6)) {
+ engine->init_context = intel_rcs_ctx_init;
+ engine->emit_flush = gen6_render_ring_flush;
+ engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb;
+ engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
} else if (IS_GEN(dev_priv, 5)) {
engine->emit_flush = gen4_render_ring_flush;
} else {
@@ -2161,6 +2207,9 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
engine->set_default_submission = gen6_bsd_set_default_submission;
engine->emit_flush = gen6_bsd_ring_flush;
engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
+
+ engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
+ engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
} else {
engine->emit_flush = bsd_ring_flush;
if (IS_GEN(dev_priv, 5))
@@ -2176,11 +2225,16 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
+ GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
+
intel_ring_default_vfuncs(dev_priv, engine);
engine->emit_flush = gen6_ring_flush;
engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
+ engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
+ engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
+
return intel_init_ring_buffer(engine);
}
@@ -2188,6 +2242,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
+ GEM_BUG_ON(INTEL_GEN(dev_priv) < 7);
+
intel_ring_default_vfuncs(dev_priv, engine);
engine->emit_flush = gen6_ring_flush;
@@ -2195,5 +2251,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
engine->irq_enable = hsw_vebox_irq_enable;
engine->irq_disable = hsw_vebox_irq_disable;
+ engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
+ engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
+
return intel_init_ring_buffer(engine);
}
--
2.20.0
More information about the Intel-gfx-trybot
mailing list