[Intel-gfx] [PATCH] drm/i915: test fix for missed irqs on IVB

Tue Apr 17 07:44:31 CEST 2012

This is based on a workaround implemented in the windows driver. I've
tried a similar fix for Sandrybridge with no luck, but it is a bit
different for IVB. This is pretty experimental at this point; hopefully
it helps anyone having any missed IRQ issues ;)

Cc: Jesse Barnes <jesse.barnes at intel.com>
Cc: Michael Larabel <Michael at phoronix.com>
Signed-off-by: Ben Widawsky <benjamin.widawsky at intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h         |    2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c |   47 +++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index cb55444..e925324 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -210,6 +210,7 @@
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 965+ only */
 #define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
 #define   MI_STORE_DWORD_INDEX_SHIFT 2
+#define MI_STORE_REGISTER_MEM	MI_INSTR(0x24, 1)
 /* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
  * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
  *   simply ignores the register load under certain conditions.
@@ -217,6 +218,7 @@
  *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*x-1)
+#define MI_LOAD_REGISTER_MEM	MI_INSTR(0x29, 1)
 #define MI_FLUSH_DW		MI_INSTR(0x26, 1) /* for GEN6 */
 #define   MI_INVALIDATE_TLB	(1<<18)
 #define   MI_INVALIDATE_BSD	(1<<7)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 465a7da..40976d6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -464,6 +464,50 @@ gen6_add_request(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+static int
+ivb_render_add_request(struct intel_ring_buffer *ring,
+		       u32 *seqno)
+{
+	struct pipe_control *pc = ring->private;
+	u32 scratch_addr = pc->gtt_offset + 1024;
+	u32 mbox1_reg;
+	u32 mbox2_reg;
+	int ret;
+
+	ret = intel_ring_begin(ring, 16);
+	if (ret)
+		return ret;
+
+	mbox1_reg = ring->signal_mbox[0];
+	mbox2_reg = ring->signal_mbox[1];
+
+	*seqno = i915_gem_next_request_seqno(ring);
+
+	update_mboxes(ring, *seqno, mbox1_reg);
+	update_mboxes(ring, *seqno, mbox2_reg);
+	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
+	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
+	intel_ring_emit(ring, *seqno);
+
+	/* Experimental workaround. Doing a load/store of the same reg should
+	 * cause the HW to figure out the hazard and stall the pipeline.
+	 * The choice of CCID is just a random reg of < 40000 that doesn't hae
+	 * much impact. scratch_addr is also random at this point.
+	 */
+	intel_ring_emit(ring, MI_STORE_REGISTER_MEM);
+	intel_ring_emit(ring, CCID);
+	intel_ring_emit(ring, scratch_addr);
+	intel_ring_emit(ring, MI_LOAD_REGISTER_MEM);
+	intel_ring_emit(ring, CCID);
+	intel_ring_emit(ring, scratch_addr);
+
+
+	intel_ring_emit(ring, MI_USER_INTERRUPT);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
 /**
  * intel_ring_sync - sync the waiter to the signaller on seqno
  *
@@ -1459,6 +1503,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->get_seqno = pc_render_get_seqno;
 	}
 
+	if (INTEL_INFO(dev)->gen == 7)
+		ring->add_request = ivb_render_add_request;
+
 	if (!I915_NEED_GFX_HWS(dev)) {
 		ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
 		memset(ring->status_page.page_addr, 0, PAGE_SIZE);
-- 
1.7.10