[Intel-gfx] [PATCH 2/2] drm/i915: Remove obsolete ringbuffer emission for gen8+

Chris Wilson chris at chris-wilson.co.uk
Thu Oct 26 21:17:11 UTC 2017


Since removing the module parameter to force selection of ringbuffer
emission for gen8, the code is defunct. Remove it.

To put the difference into perspective, a couple of microbenchmarks
(bdw i7-5557u, 20170324):
                                        ring          execlists
exec continuous nops on all rings:   1.491us            2.223us
exec sequential nops on each ring:  12.508us           53.682us
single nop + sync:                   9.272us           30.291us

vblank_mode=0 glxgears:            ~11000fps           ~9000fps

Since the earlier submission, gen8 ringbuffer submission has fallen
further and further behind in features. So while ringbuffer may hold the
throughput crown, in terms of interactive latency, execlists is much
better. Alas, we have no convenient metrics for such, other than
demonstrating things we can do with execlists but can not using
legacy ringbuffer submission.

We have made a few improvements to lowlevel execlists throughput,
and ringbuffer currently panics on boot! (bdw i7-5557u, 20171026):

                                        ring          execlists
exec continuous nops on all rings:       n/a            1.921us
exec sequential nops on each ring:       n/a           44.621us
single nop + sync:                       n/a           21.953us

vblank_mode=0 glxgears:                  n/a          ~18500fps

References: https://bugs.freedesktop.org/show_bug.cgi?id=87725
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Once-upon-a-time-Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  44 +---
 drivers/gpu/drm/i915/i915_drv.h         |   2 -
 drivers/gpu/drm/i915/i915_gem.c         |   2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c   |  39 +--
 drivers/gpu/drm/i915/intel_engine_cs.c  |  12 -
 drivers/gpu/drm/i915/intel_hangcheck.c  |  44 +---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 431 +++++---------------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  25 +-
 8 files changed, 89 insertions(+), 510 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 726f3a8b2cd1..b83fee536a2d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3216,44 +3216,12 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
 		return ret;
 	intel_runtime_pm_get(dev_priv);
 
-	if (IS_BROADWELL(dev_priv)) {
-		struct page *page;
-		uint64_t *seqno;
-
-		page = i915_gem_object_get_page(dev_priv->semaphore->obj, 0);
-
-		seqno = (uint64_t *)kmap_atomic(page);
-		for_each_engine(engine, dev_priv, id) {
-			uint64_t offset;
-
-			seq_printf(m, "%s\n", engine->name);
-
-			seq_puts(m, "  Last signal:");
-			for (j = 0; j < num_rings; j++) {
-				offset = id * I915_NUM_ENGINES + j;
-				seq_printf(m, "0x%08llx (0x%02llx) ",
-					   seqno[offset], offset * 8);
-			}
-			seq_putc(m, '\n');
-
-			seq_puts(m, "  Last wait:  ");
-			for (j = 0; j < num_rings; j++) {
-				offset = id + (j * I915_NUM_ENGINES);
-				seq_printf(m, "0x%08llx (0x%02llx) ",
-					   seqno[offset], offset * 8);
-			}
-			seq_putc(m, '\n');
-
-		}
-		kunmap_atomic(seqno);
-	} else {
-		seq_puts(m, "  Last signal:");
-		for_each_engine(engine, dev_priv, id)
-			for (j = 0; j < num_rings; j++)
-				seq_printf(m, "0x%08x\n",
-					   I915_READ(engine->semaphore.mbox.signal[j]));
-		seq_putc(m, '\n');
-	}
+	seq_puts(m, "  Last signal:");
+	for_each_engine(engine, dev_priv, id)
+		for (j = 0; j < num_rings; j++)
+			seq_printf(m, "0x%08x\n",
+				   I915_READ(engine->semaphore.mbox.signal[j]));
+	seq_putc(m, '\n');
 
 	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f7c1f203aad1..1124ff4ca5b1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -933,7 +933,6 @@ struct i915_gpu_state {
 	u64 fence[I915_MAX_NUM_FENCES];
 	struct intel_overlay_error_state *overlay;
 	struct intel_display_error_state *display;
-	struct drm_i915_error_object *semaphore;
 	struct drm_i915_error_object *guc_log;
 
 	struct drm_i915_error_engine {
@@ -2282,7 +2281,6 @@ struct drm_i915_private {
 	struct i915_gem_context *kernel_context;
 	/* Context only to be used for injecting preemption commands */
 	struct i915_gem_context *preempt_context;
-	struct i915_vma *semaphore;
 
 	struct drm_dma_handle *status_page_dmah;
 	struct resource mch_res;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 23e2eb3e60fe..c7a7a9e39daf 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4957,7 +4957,7 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
 
 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
 {
-	if (INTEL_INFO(dev_priv)->gen < 6)
+	if (INTEL_GEN(dev_priv) < 6)
 		return false;
 
 	/* TODO: make semaphores and Execlists play nicely together */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 653fb69e7ecb..9b6e73015387 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -761,8 +761,6 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 				"WA batchbuffer", ee->wa_batchbuffer);
 	}
 
-	print_error_obj(m, NULL, "Semaphores", error->semaphore);
-
 	print_error_obj(m, NULL, "GuC log buffer", error->guc_log);
 
 	if (error->overlay)
@@ -856,7 +854,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
 			kfree(ee->waiters);
 	}
 
-	i915_error_object_free(error->semaphore);
 	i915_error_object_free(error->guc_log);
 
 	for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
@@ -1071,34 +1068,6 @@ gen8_engine_sync_index(struct intel_engine_cs *engine,
 	return idx;
 }
 
-static void gen8_record_semaphore_state(struct i915_gpu_state *error,
-					struct intel_engine_cs *engine,
-					struct drm_i915_error_engine *ee)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	struct intel_engine_cs *to;
-	enum intel_engine_id id;
-
-	if (!error->semaphore)
-		return;
-
-	for_each_engine(to, dev_priv, id) {
-		int idx;
-		u16 signal_offset;
-		u32 *tmp;
-
-		if (engine == to)
-			continue;
-
-		signal_offset =
-			(GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4;
-		tmp = error->semaphore->pages[0];
-		idx = gen8_engine_sync_index(engine, to);
-
-		ee->semaphore_mboxes[idx] = tmp[signal_offset];
-	}
-}
-
 static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
 					struct drm_i915_error_engine *ee)
 {
@@ -1173,10 +1142,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	if (INTEL_GEN(dev_priv) >= 6) {
 		ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base));
 		ee->fault_reg = I915_READ(RING_FAULT_REG(engine));
-		if (INTEL_GEN(dev_priv) >= 8)
-			gen8_record_semaphore_state(error, engine, ee);
-		else
-			gen6_record_semaphore_state(engine, ee);
+		gen6_record_semaphore_state(engine, ee);
 	}
 
 	if (INTEL_GEN(dev_priv) >= 4) {
@@ -1406,9 +1372,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	int i;
 
-	error->semaphore =
-		i915_error_object_create(dev_priv, dev_priv->semaphore);
-
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
 		struct intel_engine_cs *engine = dev_priv->engine[i];
 		struct drm_i915_error_engine *ee = &error->engine[i];
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 73c401468b0c..d2ab52fde031 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -354,18 +354,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 		if (HAS_VEBOX(dev_priv))
 			I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
 	}
-	if (dev_priv->semaphore) {
-		struct page *page = i915_vma_first_page(dev_priv->semaphore);
-		void *semaphores;
-
-		/* Semaphores are in noncoherent memory, flush to be safe */
-		semaphores = kmap_atomic(page);
-		memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
-		       0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
-		drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
-				       I915_NUM_ENGINES * gen8_semaphore_seqno_size);
-		kunmap_atomic(semaphores);
-	}
 
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
 	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 12ac270a5f93..c8383c30b142 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -27,13 +27,9 @@
 static bool
 ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
 {
-	if (INTEL_GEN(engine->i915) >= 8) {
-		return (ipehr >> 23) == 0x1c;
-	} else {
-		ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
-		return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
-				 MI_SEMAPHORE_REGISTER);
-	}
+	ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
+	return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
+			 MI_SEMAPHORE_REGISTER);
 }
 
 static struct intel_engine_cs *
@@ -41,31 +37,20 @@ semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr,
 				 u64 offset)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
+	u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
 	struct intel_engine_cs *signaller;
 	enum intel_engine_id id;
 
-	if (INTEL_GEN(dev_priv) >= 8) {
-		for_each_engine(signaller, dev_priv, id) {
-			if (engine == signaller)
-				continue;
-
-			if (offset == signaller->semaphore.signal_ggtt[engine->hw_id])
-				return signaller;
-		}
-	} else {
-		u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
-
-		for_each_engine(signaller, dev_priv, id) {
-			if(engine == signaller)
-				continue;
+	for_each_engine(signaller, dev_priv, id) {
+		if(engine == signaller)
+			continue;
 
-			if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
-				return signaller;
-		}
+		if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
+			return signaller;
 	}
 
-	DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x, offset 0x%016llx\n",
-			 engine->name, ipehr, offset);
+	DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n",
+			 engine->name, ipehr);
 
 	return ERR_PTR(-ENODEV);
 }
@@ -135,11 +120,6 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
 		return NULL;
 
 	*seqno = ioread32(vaddr + head + 4) + 1;
-	if (INTEL_GEN(dev_priv) >= 8) {
-		offset = ioread32(vaddr + head + 12);
-		offset <<= 32;
-		offset |= ioread32(vaddr + head + 8);
-	}
 	return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
 }
 
@@ -273,7 +253,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 		return ENGINE_WAIT_KICK;
 	}
 
-	if (INTEL_GEN(dev_priv) >= 6 && tmp & RING_WAIT_SEMAPHORE) {
+	if (IS_GEN(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) {
 		switch (semaphore_passed(engine)) {
 		default:
 			return ENGINE_DEAD;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 05e01446b00b..ffaf9996feb5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -337,50 +337,6 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 	return 0;
 }
 
-static int
-gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
-{
-	u32 flags;
-	u32 *cs;
-
-	cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	flags = PIPE_CONTROL_CS_STALL;
-
-	if (mode & EMIT_FLUSH) {
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-		flags |= PIPE_CONTROL_FLUSH_ENABLE;
-	}
-	if (mode & EMIT_INVALIDATE) {
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
-
-		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
-		cs = gen8_emit_pipe_control(cs,
-					    PIPE_CONTROL_CS_STALL |
-					    PIPE_CONTROL_STALL_AT_SCOREBOARD,
-					    0);
-	}
-
-	cs = gen8_emit_pipe_control(cs, flags,
-				    i915_ggtt_offset(req->engine->scratch) +
-				    2 * CACHELINE_BYTES);
-
-	intel_ring_advance(req, cs);
-
-	return 0;
-}
-
 static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -424,7 +380,6 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
 	} else if (IS_GEN6(dev_priv)) {
 		mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
 	} else {
-		/* XXX: gen8 returns to sanity */
 		mmio = RING_HWS_PGA(engine->mmio_base);
 	}
 
@@ -434,13 +389,7 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
 	I915_WRITE(mmio, engine->status_page.ggtt_offset);
 	POSTING_READ(mmio);
 
-	/*
-	 * Flush the TLB for this page
-	 *
-	 * FIXME: These two bits have disappeared on gen8, so a question
-	 * arises: do we still need this and if so how should we go about
-	 * invalidating the TLB?
-	 */
+	/* Flush the TLB for this page */
 	if (IS_GEN(dev_priv, 6, 7)) {
 		i915_reg_t reg = RING_INSTPM(engine->mmio_base);
 
@@ -604,8 +553,6 @@ static void reset_ring_common(struct intel_engine_cs *engine,
 		struct intel_context *ce = &request->ctx->engine[engine->id];
 		struct i915_hw_ppgtt *ppgtt;
 
-		/* FIXME consider gen8 reset */
-
 		if (ce->state) {
 			I915_WRITE(CCID,
 				   i915_ggtt_offset(ce->state) |
@@ -706,62 +653,6 @@ static int init_render_ring(struct intel_engine_cs *engine)
 	return init_workarounds_ring(engine);
 }
 
-static void render_ring_cleanup(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	i915_vma_unpin_and_release(&dev_priv->semaphore);
-}
-
-static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs)
-{
-	struct drm_i915_private *dev_priv = req->i915;
-	struct intel_engine_cs *waiter;
-	enum intel_engine_id id;
-
-	for_each_engine(waiter, dev_priv, id) {
-		u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
-		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
-			continue;
-
-		*cs++ = GFX_OP_PIPE_CONTROL(6);
-		*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE |
-			PIPE_CONTROL_CS_STALL;
-		*cs++ = lower_32_bits(gtt_offset);
-		*cs++ = upper_32_bits(gtt_offset);
-		*cs++ = req->global_seqno;
-		*cs++ = 0;
-		*cs++ = MI_SEMAPHORE_SIGNAL |
-			MI_SEMAPHORE_TARGET(waiter->hw_id);
-		*cs++ = 0;
-	}
-
-	return cs;
-}
-
-static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs)
-{
-	struct drm_i915_private *dev_priv = req->i915;
-	struct intel_engine_cs *waiter;
-	enum intel_engine_id id;
-
-	for_each_engine(waiter, dev_priv, id) {
-		u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
-		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
-			continue;
-
-		*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
-		*cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT;
-		*cs++ = upper_32_bits(gtt_offset);
-		*cs++ = req->global_seqno;
-		*cs++ = MI_SEMAPHORE_SIGNAL |
-			MI_SEMAPHORE_TARGET(waiter->hw_id);
-		*cs++ = 0;
-	}
-
-	return cs;
-}
-
 static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs)
 {
 	struct drm_i915_private *dev_priv = req->i915;
@@ -844,70 +735,6 @@ static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
 				    req->engine->semaphore.signal(req, cs));
 }
 
-static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req,
-					u32 *cs)
-{
-	struct intel_engine_cs *engine = req->engine;
-
-	if (engine->semaphore.signal)
-		cs = engine->semaphore.signal(req, cs);
-
-	*cs++ = GFX_OP_PIPE_CONTROL(6);
-	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
-		PIPE_CONTROL_QW_WRITE;
-	*cs++ = intel_hws_seqno_address(engine);
-	*cs++ = 0;
-	*cs++ = req->global_seqno;
-	/* We're thrashing one dword of HWS. */
-	*cs++ = 0;
-	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
-
-	req->tail = intel_ring_offset(req, cs);
-	assert_ring_tail_valid(req->ring, req->tail);
-}
-
-static const int gen8_render_emit_breadcrumb_sz = 8;
-
-/**
- * intel_ring_sync - sync the waiter to the signaller on seqno
- *
- * @waiter - ring that is waiting
- * @signaller - ring which has, or will signal
- * @seqno - seqno which the waiter will block on
- */
-
-static int
-gen8_ring_sync_to(struct drm_i915_gem_request *req,
-		  struct drm_i915_gem_request *signal)
-{
-	struct drm_i915_private *dev_priv = req->i915;
-	u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id);
-	struct i915_hw_ppgtt *ppgtt;
-	u32 *cs;
-
-	cs = intel_ring_begin(req, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT |
-		MI_SEMAPHORE_SAD_GTE_SDD;
-	*cs++ = signal->global_seqno;
-	*cs++ = lower_32_bits(offset);
-	*cs++ = upper_32_bits(offset);
-	intel_ring_advance(req, cs);
-
-	/* When the !RCS engines idle waiting upon a semaphore, they lose their
-	 * pagetables and we must reload them before executing the batch.
-	 * We do this on the i915_switch_context() following the wait and
-	 * before the dispatch.
-	 */
-	ppgtt = req->ctx->ppgtt;
-	if (ppgtt && req->engine->id != RCS)
-		ppgtt->pd_dirty_rings |= intel_engine_flag(req->engine);
-	return 0;
-}
-
 static int
 gen6_ring_sync_to(struct drm_i915_gem_request *req,
 		  struct drm_i915_gem_request *signal)
@@ -1083,25 +910,6 @@ hsw_vebox_irq_disable(struct intel_engine_cs *engine)
 	gen6_mask_pm_irq(dev_priv, engine->irq_enable_mask);
 }
 
-static void
-gen8_irq_enable(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	I915_WRITE_IMR(engine,
-		       ~(engine->irq_enable_mask |
-			 engine->irq_keep_mask));
-	POSTING_READ_FW(RING_IMR(engine->mmio_base));
-}
-
-static void
-gen8_irq_disable(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
-}
-
 static int
 i965_emit_bb_start(struct drm_i915_gem_request *req,
 		   u64 offset, u32 length,
@@ -1748,8 +1556,6 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 		return PTR_ERR(cs);
 
 	cmd = MI_FLUSH_DW;
-	if (INTEL_GEN(req->i915) >= 8)
-		cmd += 1;
 
 	/* We always require a command barrier so that subsequent
 	 * commands, such as breadcrumb interrupts, are strictly ordered
@@ -1769,38 +1575,9 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 
 	*cs++ = cmd;
 	*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
-	if (INTEL_GEN(req->i915) >= 8) {
-		*cs++ = 0; /* upper addr */
-		*cs++ = 0; /* value */
-	} else  {
-		*cs++ = 0;
-		*cs++ = MI_NOOP;
-	}
-	intel_ring_advance(req, cs);
-	return 0;
-}
-
-static int
-gen8_emit_bb_start(struct drm_i915_gem_request *req,
-		   u64 offset, u32 len,
-		   unsigned int dispatch_flags)
-{
-	bool ppgtt = USES_PPGTT(req->i915) &&
-			!(dispatch_flags & I915_DISPATCH_SECURE);
-	u32 *cs;
-
-	cs = intel_ring_begin(req, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	/* FIXME(BDW): Address space and security selectors. */
-	*cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags &
-		I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
-	*cs++ = lower_32_bits(offset);
-	*cs++ = upper_32_bits(offset);
+	*cs++ = 0;
 	*cs++ = MI_NOOP;
 	intel_ring_advance(req, cs);
-
 	return 0;
 }
 
@@ -1857,8 +1634,6 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 		return PTR_ERR(cs);
 
 	cmd = MI_FLUSH_DW;
-	if (INTEL_GEN(req->i915) >= 8)
-		cmd += 1;
 
 	/* We always require a command barrier so that subsequent
 	 * commands, such as breadcrumb interrupts, are strictly ordered
@@ -1877,13 +1652,8 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 		cmd |= MI_INVALIDATE_TLB;
 	*cs++ = cmd;
 	*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
-	if (INTEL_GEN(req->i915) >= 8) {
-		*cs++ = 0; /* upper addr */
-		*cs++ = 0; /* value */
-	} else  {
-		*cs++ = 0;
-		*cs++ = MI_NOOP;
-	}
+	*cs++ = 0;
+	*cs++ = MI_NOOP;
 	intel_ring_advance(req, cs);
 
 	return 0;
@@ -1892,110 +1662,61 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
 				       struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_object *obj;
-	int ret, i;
+	int i;
 
 	if (!i915_modparams.semaphores)
 		return;
 
-	if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) {
-		struct i915_vma *vma;
-
-		obj = i915_gem_object_create(dev_priv, PAGE_SIZE);
-		if (IS_ERR(obj))
-			goto err;
-
-		vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
-		if (IS_ERR(vma))
-			goto err_obj;
-
-		ret = i915_gem_object_set_to_gtt_domain(obj, false);
-		if (ret)
-			goto err_obj;
-
-		ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
-		if (ret)
-			goto err_obj;
-
-		dev_priv->semaphore = vma;
-	}
-
-	if (INTEL_GEN(dev_priv) >= 8) {
-		u32 offset = i915_ggtt_offset(dev_priv->semaphore);
-
-		engine->semaphore.sync_to = gen8_ring_sync_to;
-		engine->semaphore.signal = gen8_xcs_signal;
-
-		for (i = 0; i < I915_NUM_ENGINES; i++) {
-			u32 ring_offset;
-
-			if (i != engine->id)
-				ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i);
-			else
-				ring_offset = MI_SEMAPHORE_SYNC_INVALID;
+	GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
+	engine->semaphore.sync_to = gen6_ring_sync_to;
+	engine->semaphore.signal = gen6_signal;
 
-			engine->semaphore.signal_ggtt[i] = ring_offset;
-		}
-	} else if (INTEL_GEN(dev_priv) >= 6) {
-		engine->semaphore.sync_to = gen6_ring_sync_to;
-		engine->semaphore.signal = gen6_signal;
-
-		/*
-		 * The current semaphore is only applied on pre-gen8
-		 * platform.  And there is no VCS2 ring on the pre-gen8
-		 * platform. So the semaphore between RCS and VCS2 is
-		 * initialized as INVALID.  Gen8 will initialize the
-		 * sema between VCS2 and RCS later.
-		 */
-		for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
-			static const struct {
-				u32 wait_mbox;
-				i915_reg_t mbox_reg;
-			} sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
-				[RCS_HW] = {
-					[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RV,  .mbox_reg = GEN6_VRSYNC },
-					[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RB,  .mbox_reg = GEN6_BRSYNC },
-					[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
-				},
-				[VCS_HW] = {
-					[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VR,  .mbox_reg = GEN6_RVSYNC },
-					[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VB,  .mbox_reg = GEN6_BVSYNC },
-					[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
-				},
-				[BCS_HW] = {
-					[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BR,  .mbox_reg = GEN6_RBSYNC },
-					[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BV,  .mbox_reg = GEN6_VBSYNC },
-					[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
-				},
-				[VECS_HW] = {
-					[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
-					[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
-					[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
-				},
-			};
+	/*
+	 * The current semaphore is only applied on pre-gen8
+	 * platform.  And there is no VCS2 ring on the pre-gen8
+	 * platform. So the semaphore between RCS and VCS2 is
+	 * initialized as INVALID.
+	 */
+	for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
+		static const struct {
 			u32 wait_mbox;
 			i915_reg_t mbox_reg;
+		} sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
+			[RCS_HW] = {
+				[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RV,  .mbox_reg = GEN6_VRSYNC },
+				[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RB,  .mbox_reg = GEN6_BRSYNC },
+				[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
+			},
+			[VCS_HW] = {
+				[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VR,  .mbox_reg = GEN6_RVSYNC },
+				[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VB,  .mbox_reg = GEN6_BVSYNC },
+				[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
+			},
+			[BCS_HW] = {
+				[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BR,  .mbox_reg = GEN6_RBSYNC },
+				[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BV,  .mbox_reg = GEN6_VBSYNC },
+				[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
+			},
+			[VECS_HW] = {
+				[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
+				[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
+				[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
+			},
+		};
+		u32 wait_mbox;
+		i915_reg_t mbox_reg;
 
-			if (i == engine->hw_id) {
-				wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
-				mbox_reg = GEN6_NOSYNC;
-			} else {
-				wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
-				mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
-			}
-
-			engine->semaphore.mbox.wait[i] = wait_mbox;
-			engine->semaphore.mbox.signal[i] = mbox_reg;
+		if (i == engine->hw_id) {
+			wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
+			mbox_reg = GEN6_NOSYNC;
+		} else {
+			wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
+			mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
 		}
-	}
-
-	return;
 
-err_obj:
-	i915_gem_object_put(obj);
-err:
-	DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n");
-	i915_modparams.semaphores = 0;
+		engine->semaphore.mbox.wait[i] = wait_mbox;
+		engine->semaphore.mbox.signal[i] = mbox_reg;
+	}
 }
 
 static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
@@ -2003,11 +1724,7 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
 {
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift;
 
-	if (INTEL_GEN(dev_priv) >= 8) {
-		engine->irq_enable = gen8_irq_enable;
-		engine->irq_disable = gen8_irq_disable;
-		engine->irq_seqno_barrier = gen6_seqno_barrier;
-	} else if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(dev_priv) >= 6) {
 		engine->irq_enable = gen6_irq_enable;
 		engine->irq_disable = gen6_irq_disable;
 		engine->irq_seqno_barrier = gen6_seqno_barrier;
@@ -2042,6 +1759,9 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 				      struct intel_engine_cs *engine)
 {
+	/* gen8+ are only supported with execlists */
+	GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
+
 	intel_ring_init_irq(dev_priv, engine);
 	intel_ring_init_semaphores(dev_priv, engine);
 
@@ -2061,20 +1781,14 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 		engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
 
 		num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
-		if (INTEL_GEN(dev_priv) >= 8) {
-			engine->emit_breadcrumb_sz += num_rings * 6;
-		} else {
-			engine->emit_breadcrumb_sz += num_rings * 3;
-			if (num_rings & 1)
-				engine->emit_breadcrumb_sz++;
-		}
+		engine->emit_breadcrumb_sz += num_rings * 3;
+		if (num_rings & 1)
+			engine->emit_breadcrumb_sz++;
 	}
 
 	engine->set_default_submission = i9xx_set_default_submission;
 
-	if (INTEL_GEN(dev_priv) >= 8)
-		engine->emit_bb_start = gen8_emit_bb_start;
-	else if (INTEL_GEN(dev_priv) >= 6)
+	if (INTEL_GEN(dev_priv) >= 6)
 		engine->emit_bb_start = gen6_emit_bb_start;
 	else if (INTEL_GEN(dev_priv) >= 4)
 		engine->emit_bb_start = i965_emit_bb_start;
@@ -2094,20 +1808,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 	if (HAS_L3_DPF(dev_priv))
 		engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
 
-	if (INTEL_GEN(dev_priv) >= 8) {
-		engine->init_context = intel_rcs_ctx_init;
-		engine->emit_breadcrumb = gen8_render_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen8_render_emit_breadcrumb_sz;
-		engine->emit_flush = gen8_render_ring_flush;
-		if (i915_modparams.semaphores) {
-			int num_rings;
-
-			engine->semaphore.signal = gen8_rcs_signal;
-
-			num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
-			engine->emit_breadcrumb_sz += num_rings * 8;
-		}
-	} else if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(dev_priv) >= 6) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen7_render_ring_flush;
 		if (IS_GEN6(dev_priv))
@@ -2126,7 +1827,6 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 		engine->emit_bb_start = hsw_emit_bb_start;
 
 	engine->init_hw = init_render_ring;
-	engine->cleanup = render_ring_cleanup;
 
 	ret = intel_init_ring_buffer(engine);
 	if (ret)
@@ -2156,8 +1856,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 		if (IS_GEN6(dev_priv))
 			engine->set_default_submission = gen6_bsd_set_default_submission;
 		engine->emit_flush = gen6_bsd_ring_flush;
-		if (INTEL_GEN(dev_priv) < 8)
-			engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
+		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 	} else {
 		engine->mmio_base = BSD_RING_BASE;
 		engine->emit_flush = bsd_ring_flush;
@@ -2177,8 +1876,7 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
 	intel_ring_default_vfuncs(dev_priv, engine);
 
 	engine->emit_flush = gen6_ring_flush;
-	if (INTEL_GEN(dev_priv) < 8)
-		engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
+	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 
 	return intel_init_ring_buffer(engine);
 }
@@ -2190,12 +1888,9 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 	intel_ring_default_vfuncs(dev_priv, engine);
 
 	engine->emit_flush = gen6_ring_flush;
-
-	if (INTEL_GEN(dev_priv) < 8) {
-		engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
-		engine->irq_enable = hsw_vebox_irq_enable;
-		engine->irq_disable = hsw_vebox_irq_disable;
-	}
+	engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
+	engine->irq_enable = hsw_vebox_irq_enable;
+	engine->irq_disable = hsw_vebox_irq_disable;
 
 	return intel_init_ring_buffer(engine);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 69ad875fd011..e7a5d90e3d52 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -46,16 +46,6 @@ struct intel_hw_status_page {
 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  */
-#define gen8_semaphore_seqno_size sizeof(uint64_t)
-#define GEN8_SEMAPHORE_OFFSET(__from, __to)			     \
-	(((__from) * I915_NUM_ENGINES  + (__to)) * gen8_semaphore_seqno_size)
-#define GEN8_SIGNAL_OFFSET(__ring, to)			     \
-	(dev_priv->semaphore->node.start + \
-	 GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
-#define GEN8_WAIT_OFFSET(__ring, from)			     \
-	(dev_priv->semaphore->node.start + \
-	 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
-
 enum intel_engine_hangcheck_action {
 	ENGINE_IDLE = 0,
 	ENGINE_WAIT,
@@ -464,18 +454,15 @@ struct intel_engine_cs {
 	 *  ie. transpose of f(x, y)
 	 */
 	struct {
-		union {
 #define GEN6_SEMAPHORE_LAST	VECS_HW
 #define GEN6_NUM_SEMAPHORES	(GEN6_SEMAPHORE_LAST + 1)
 #define GEN6_SEMAPHORES_MASK	GENMASK(GEN6_SEMAPHORE_LAST, 0)
-			struct {
-				/* our mbox written by others */
-				u32		wait[GEN6_NUM_SEMAPHORES];
-				/* mboxes this ring signals to */
-				i915_reg_t	signal[GEN6_NUM_SEMAPHORES];
-			} mbox;
-			u64		signal_ggtt[I915_NUM_ENGINES];
-		};
+		struct {
+			/* our mbox written by others */
+			u32		wait[GEN6_NUM_SEMAPHORES];
+			/* mboxes this ring signals to */
+			i915_reg_t	signal[GEN6_NUM_SEMAPHORES];
+		} mbox;
 
 		/* AKA wait() */
 		int	(*sync_to)(struct drm_i915_gem_request *req,
-- 
2.15.0.rc2



More information about the Intel-gfx mailing list