[Intel-gfx] [PATCH 3/3] drm/i915/rq: Ironlake is coherent for MI_STORE_DWORD
Chris Wilson
chris at chris-wilson.co.uk
Sat Sep 6 11:28:29 CEST 2014
So had we just used the good old method for writing the seqno before the
old fashioned MI_USER_INTERRUPT, we would have had coherent request
completion - at least as far as the resolution of our testing goes,
which is extremely good for this particular issue. Removing the
PIPE_CONTROL notify for Ironlake removes a good chunk of special cased
code and vfuncs.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_debugfs.c | 4 +-
drivers/gpu/drm/i915/i915_gem.c | 22 +++---
drivers/gpu/drm/i915/i915_gpu_error.c | 2 +-
drivers/gpu/drm/i915/i915_irq.c | 2 +-
drivers/gpu/drm/i915/i915_trace.h | 2 +-
drivers/gpu/drm/i915/intel_lrc.c | 10 +++
drivers/gpu/drm/i915/intel_ringbuffer.c | 114 +++-----------------------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 21 +++---
8 files changed, 44 insertions(+), 133 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4d0b5cff5291..1d385b399643 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -553,7 +553,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
seq_printf(m, "Flip queued on %s at seqno %u, next seqno %u [current breadcrumb %u], completed? %d\n",
rq->engine->name,
rq->seqno, rq->i915->next_seqno,
- rq->engine->get_seqno(rq->engine),
+ intel_engine_get_seqno(rq->engine),
__i915_request_complete__wa(rq));
} else
seq_printf(m, "Flip not associated with any ring\n");
@@ -625,7 +625,7 @@ static void i915_engine_seqno_info(struct seq_file *m,
{
seq_printf(m, "Current sequence (%s): seqno=%u, tag=%u [last breadcrumb %u, last request %u], next seqno=%u, next tag=%u\n",
engine->name,
- engine->get_seqno(engine),
+ intel_engine_get_seqno(engine),
engine->tag,
engine->breadcrumb[engine->id],
engine->last_request ? engine->last_request->seqno : 0,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 46d3aced7a50..dcb3e790cd24 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2148,20 +2148,20 @@ i915_gem_retire_requests__engine(struct intel_engine_cs *engine)
if (engine->last_request == NULL)
return;
- if (!intel_engine_retire(engine, engine->get_seqno(engine)))
+ if (!intel_engine_retire(engine, intel_engine_get_seqno(engine)))
return;
- while (!list_empty(&engine->write_list)) {
+ while (!list_empty(&engine->read_list)) {
struct drm_i915_gem_object *obj;
- obj = list_first_entry(&engine->write_list,
+ obj = list_first_entry(&engine->read_list,
struct drm_i915_gem_object,
- last_write.engine_list);
+ last_read[engine->id].engine_list);
- if (!obj->last_write.request->completed)
+ if (!obj->last_read[engine->id].request->completed)
break;
- i915_gem_object_retire__write(obj);
+ i915_gem_object_retire__read(obj, engine);
}
while (!list_empty(&engine->fence_list)) {
@@ -2177,17 +2177,17 @@ i915_gem_retire_requests__engine(struct intel_engine_cs *engine)
i915_gem_object_retire__fence(obj);
}
- while (!list_empty(&engine->read_list)) {
+ while (!list_empty(&engine->write_list)) {
struct drm_i915_gem_object *obj;
- obj = list_first_entry(&engine->read_list,
+ obj = list_first_entry(&engine->write_list,
struct drm_i915_gem_object,
- last_read[engine->id].engine_list);
+ last_write.engine_list);
- if (!obj->last_read[engine->id].request->completed)
+ if (!obj->last_write.request->completed)
break;
- i915_gem_object_retire__read(obj, engine);
+ i915_gem_object_retire__write(obj);
}
}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index adb6358a8f6e..b596c9365818 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -914,7 +914,7 @@ static void i915_record_ring_state(struct drm_device *dev,
ering->waiting = waitqueue_active(&engine->irq_queue);
ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
ering->acthd = intel_engine_get_active_head(engine);
- ering->seqno = engine->get_seqno(engine);
+ ering->seqno = intel_engine_get_seqno(engine);
ering->request = engine->last_request ? engine->last_request->seqno : 0;
ering->hangcheck = engine->hangcheck.seqno;
memcpy(ering->breadcrumb, engine->breadcrumb, sizeof(ering->breadcrumb));
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 71bdd9b3784f..daf1664e380d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3271,7 +3271,7 @@ static void i915_hangcheck_elapsed(unsigned long data)
semaphore_clear_deadlocks(dev_priv);
acthd = intel_engine_get_active_head(engine);
- seqno = engine->get_seqno(engine);
+ seqno = intel_engine_get_seqno(engine);
interrupts = atomic_read(&engine->interrupts);
if (engine_idle(engine)) {
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 8bb51dcb10f3..195ce54d7694 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -474,7 +474,7 @@ TRACE_EVENT(i915_gem_ring_complete,
TP_fast_assign(
__entry->dev = ring->i915->dev->primary->index;
__entry->ring = ring->id;
- __entry->seqno = ring->get_seqno(ring);
+ __entry->seqno = intel_engine_get_seqno(ring);
),
TP_printk("dev=%u, ring=%u, seqno=%x",
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d47af931d5ab..7959b7b1f531 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -643,6 +643,15 @@ static int execlists_resume(struct intel_engine_cs *engine)
/* XXX */
I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
+ I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
+
+ /* We need to disable the AsyncFlip performance optimisations in order
+ * to use MI_WAIT_FOR_EVENT within the CS. It should already be
+ * programmed to '1' on all products.
+ *
+ * WaDisableAsyncFlipPerfMode:bdw
+ */
+ I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
I915_WRITE(RING_MODE_GEN7(engine),
_MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
@@ -674,6 +683,7 @@ static void execlists_reset(struct intel_engine_cs *engine)
spin_lock_irqsave(&engine->irqlock, flags);
list_splice_tail_init(&engine->pending, &engine->submitted);
+ list_splice_tail_init(&engine->submitted, &engine->requests);
spin_unlock_irqrestore(&engine->irqlock, flags);
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ae02b1757745..0147bd04c8e8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -666,7 +666,7 @@ static int engine_add_request(struct i915_gem_request *rq)
static bool engine_rq_is_complete(struct i915_gem_request *rq)
{
- return __i915_seqno_passed(rq->engine->get_seqno(rq->engine),
+ return __i915_seqno_passed(intel_engine_get_seqno(rq->engine),
rq->seqno);
}
@@ -816,7 +816,7 @@ static int chv_render_init_context(struct i915_gem_request *rq)
{
int ret;
- ret = emit_lri(rq, 8,
+ ret = emit_lri(rq, 3,
GEN8_ROW_CHICKEN,
/* WaDisablePartialInstShootdown:chv */
@@ -1058,89 +1058,6 @@ gen6_emit_wait(struct i915_gem_request *waiter,
return 0;
}
-#define PIPE_CONTROL_FLUSH(ring__, addr__) \
-do { \
- intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \
- PIPE_CONTROL_DEPTH_STALL); \
- intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \
- intel_ring_emit(ring__, 0); \
- intel_ring_emit(ring__, 0); \
-} while (0)
-
-static int
-gen5_emit_breadcrumb(struct i915_gem_request *rq)
-{
- u32 scratch_addr = rq->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
- struct intel_ringbuffer *ring;
-
- /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
- * incoherent with writes to memory, i.e. completely fubar,
- * so we need to use PIPE_NOTIFY instead.
- *
- * However, we also need to workaround the qword write
- * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
- * memory before requesting an interrupt.
- */
- ring = intel_ring_begin(rq, 32);
- if (IS_ERR(ring))
- return PTR_ERR(ring);
-
- intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
- PIPE_CONTROL_WRITE_FLUSH |
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
- intel_ring_emit(ring, rq->engine->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
- intel_ring_emit(ring, rq->seqno);
- intel_ring_emit(ring, 0);
-
- PIPE_CONTROL_FLUSH(ring, scratch_addr);
- scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
- PIPE_CONTROL_FLUSH(ring, scratch_addr);
- scratch_addr += 2 * CACHELINE_BYTES;
- PIPE_CONTROL_FLUSH(ring, scratch_addr);
- scratch_addr += 2 * CACHELINE_BYTES;
- PIPE_CONTROL_FLUSH(ring, scratch_addr);
- scratch_addr += 2 * CACHELINE_BYTES;
- PIPE_CONTROL_FLUSH(ring, scratch_addr);
- scratch_addr += 2 * CACHELINE_BYTES;
- PIPE_CONTROL_FLUSH(ring, scratch_addr);
-
- intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
- PIPE_CONTROL_WRITE_FLUSH |
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_NOTIFY);
- intel_ring_emit(ring, rq->engine->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
- intel_ring_emit(ring, rq->seqno);
- intel_ring_emit(ring, 0);
-
- intel_ring_advance(ring);
-
- return 0;
-}
-
-static u32
-ring_get_seqno(struct intel_engine_cs *engine)
-{
- return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
-}
-
-static void
-ring_set_seqno(struct intel_engine_cs *engine, u32 seqno)
-{
- intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-}
-
-static u32
-gen5_render_get_seqno(struct intel_engine_cs *engine)
-{
- return engine->scratch.cpu_page[0];
-}
-
-static void
-gen5_render_set_seqno(struct intel_engine_cs *engine, u32 seqno)
-{
- engine->scratch.cpu_page[0] = seqno;
-}
-
static bool
gen5_irq_get(struct intel_engine_cs *engine)
{
@@ -1256,7 +1173,7 @@ bsd_emit_flush(struct i915_gem_request *rq,
}
static int
-i9xx_emit_breadcrumb(struct i915_gem_request *rq)
+emit_breadcrumb(struct i915_gem_request *rq)
{
struct intel_ringbuffer *ring;
@@ -1332,8 +1249,6 @@ hsw_vebox_irq_get(struct intel_engine_cs *engine)
if (!dev_priv->dev->irq_enabled)
return false;
- gen6_gt_force_wake_get(dev_priv, engine->power_domains);
-
spin_lock_irqsave(&dev_priv->irq_lock, flags);
if (engine->irq_refcount++ == 0) {
I915_WRITE_IMR(engine,
@@ -1343,6 +1258,8 @@ hsw_vebox_irq_get(struct intel_engine_cs *engine)
}
spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+ gen6_gt_force_wake_get(dev_priv, engine->power_domains);
+
return true;
}
@@ -1352,14 +1269,14 @@ hsw_vebox_irq_put(struct intel_engine_cs *engine)
struct drm_i915_private *dev_priv = engine->i915;
unsigned long flags;
+ gen6_gt_force_wake_put(dev_priv, engine->power_domains);
+
spin_lock_irqsave(&dev_priv->irq_lock, flags);
if (--engine->irq_refcount == 0) {
I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
gen6_disable_pm_irq(dev_priv, engine->irq_enable_mask);
}
spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
-
- gen6_gt_force_wake_put(dev_priv, engine->power_domains);
}
static bool
@@ -1668,10 +1585,8 @@ static int intel_engine_init(struct intel_engine_cs *engine,
engine->resume = engine_resume;
engine->cleanup = engine_cleanup;
- engine->get_seqno = ring_get_seqno;
- engine->set_seqno = ring_set_seqno;
-
engine->irq_barrier = nop_irq_barrier;
+ engine->emit_breadcrumb = emit_breadcrumb;
engine->get_ring = engine_get_ring;
engine->put_ring = engine_put_ring;
@@ -1947,14 +1862,12 @@ int intel_init_render_engine(struct drm_i915_private *dev_priv)
engine->init_context = chv_render_init_context;
else
engine->init_context = bdw_render_init_context;
- engine->emit_breadcrumb = i9xx_emit_breadcrumb;
engine->emit_flush = gen8_render_emit_flush;
engine->irq_get = gen8_irq_get;
engine->irq_put = gen8_irq_put;
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
gen8_engine_init_semaphore(engine);
} else if (INTEL_INFO(dev_priv)->gen >= 6) {
- engine->emit_breadcrumb = i9xx_emit_breadcrumb;
engine->emit_flush = gen7_render_emit_flush;
if (INTEL_INFO(dev_priv)->gen == 6)
engine->emit_flush = gen6_render_emit_flush;
@@ -1984,17 +1897,13 @@ int intel_init_render_engine(struct drm_i915_private *dev_priv)
engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
}
} else if (IS_GEN5(dev_priv)) {
- engine->emit_breadcrumb = gen5_emit_breadcrumb;
engine->emit_flush = gen4_emit_flush;
- engine->get_seqno = gen5_render_get_seqno;
- engine->set_seqno = gen5_render_set_seqno;
engine->irq_get = gen5_irq_get;
engine->irq_put = gen5_irq_put;
engine->irq_enable_mask =
GT_RENDER_USER_INTERRUPT |
GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
} else {
- engine->emit_breadcrumb = i9xx_emit_breadcrumb;
if (INTEL_INFO(dev_priv)->gen < 4)
engine->emit_flush = gen2_emit_flush;
else
@@ -2044,7 +1953,7 @@ int intel_init_render_engine(struct drm_i915_private *dev_priv)
engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
}
- if (INTEL_INFO(dev_priv)->gen >= 5) {
+ if (INTEL_INFO(dev_priv)->gen >= 6) {
ret = init_pipe_control(engine);
if (ret)
return ret;
@@ -2073,7 +1982,6 @@ int intel_init_bsd_engine(struct drm_i915_private *dev_priv)
if (IS_GEN6(dev_priv))
engine->write_tail = gen6_bsd_ring_write_tail;
engine->emit_flush = gen6_bsd_emit_flush;
- engine->emit_breadcrumb = i9xx_emit_breadcrumb;
if (INTEL_INFO(dev_priv)->gen >= 8) {
engine->irq_enable_mask =
GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
@@ -2106,7 +2014,6 @@ int intel_init_bsd_engine(struct drm_i915_private *dev_priv)
engine->mmio_base = BSD_RING_BASE;
engine->emit_flush = bsd_emit_flush;
- engine->emit_breadcrumb = i9xx_emit_breadcrumb;
if (IS_GEN5(dev_priv)) {
engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
engine->irq_get = gen5_irq_get;
@@ -2146,7 +2053,6 @@ int intel_init_bsd2_engine(struct drm_i915_private *dev_priv)
engine->mmio_base = GEN8_BSD2_RING_BASE;
engine->emit_flush = gen6_bsd_emit_flush;
- engine->emit_breadcrumb = i9xx_emit_breadcrumb;
engine->emit_batchbuffer = gen8_emit_batchbuffer;
engine->irq_enable_mask =
GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
@@ -2172,7 +2078,6 @@ int intel_init_blt_engine(struct drm_i915_private *dev_priv)
engine->mmio_base = BLT_RING_BASE;
engine->emit_flush = gen6_blt_emit_flush;
- engine->emit_breadcrumb = i9xx_emit_breadcrumb;
if (INTEL_INFO(dev_priv)->gen >= 8) {
engine->irq_enable_mask =
GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
@@ -2227,7 +2132,6 @@ int intel_init_vebox_engine(struct drm_i915_private *dev_priv)
engine->mmio_base = VEBOX_RING_BASE;
engine->emit_flush = gen6_blt_emit_flush;
- engine->emit_breadcrumb = i9xx_emit_breadcrumb;
if (INTEL_INFO(dev_priv)->gen >= 8) {
engine->irq_enable_mask =
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 46c8d2288821..1289714351dc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -21,8 +21,9 @@
* cacheline, the Head Pointer must not be greater than the Tail
* Pointer."
*
- * To also accommodate errata on 830/845 which makes the last pair of cachlines
- * in the ringbuffer unavailable, reduce the available space further.
+ * To also accommodate errata on 830/845 which makes the last pair of
+ * cachelines in the ringbuffer unavailable, reduce the available space
+ * further.
*/
#define I915_RING_RSVD (2*CACHELINE_BYTES)
@@ -177,16 +178,6 @@ struct intel_engine_cs {
int (*resume)(struct intel_engine_cs *engine);
void (*cleanup)(struct intel_engine_cs *engine);
- /* Some chipsets are not quite as coherent as advertised and need
- * an expensive kick to force a true read of the up-to-date seqno.
- * However, the up-to-date seqno is not always required and the last
- * seen value is good enough. Note that the seqno will always be
- * monotonic, even if not coherent.
- */
- u32 (*get_seqno)(struct intel_engine_cs *engine);
- void (*set_seqno)(struct intel_engine_cs *engine,
- u32 seqno);
-
int (*init_context)(struct i915_gem_request *rq);
int __must_check (*emit_flush)(struct i915_gem_request *rq,
@@ -371,6 +362,12 @@ intel_write_status_page(struct intel_engine_cs *engine,
#define I915_GEM_HWS_SCRATCH_INDEX 0x30
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
+static inline u32
+intel_engine_get_seqno(struct intel_engine_cs *engine)
+{
+ return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
+}
+
struct intel_ringbuffer *
intel_engine_alloc_ring(struct intel_engine_cs *engine,
struct intel_context *ctx,
--
2.1.0
More information about the Intel-gfx
mailing list