[Intel-gfx] [PATCH v3 6/6] drm/i915/gen8: Add WaRsRestoreWithPerCtxtBb workaround
Arun Siluvery
arun.siluvery at linux.intel.com
Fri Jun 5 03:34:06 PDT 2015
In Per context w/a batch buffer,
WaRsRestoreWithPerCtxtBb
v2: This patches modifies definitions of MI_LOAD_REGISTER_MEM and
MI_LOAD_REGISTER_REG; Add GEN8 specific defines for these instructions
so as to not break any future users of existing definitions (Michel)
Signed-off-by: Rafael Barbalho <rafael.barbalho at intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
---
drivers/gpu/drm/i915/i915_reg.h | 26 ++++++++++++++++++
drivers/gpu/drm/i915/intel_lrc.c | 59 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 85 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 33b0ff1..6928162 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -347,6 +347,26 @@
#define MI_INVALIDATE_BSD (1<<7)
#define MI_FLUSH_DW_USE_GTT (1<<2)
#define MI_FLUSH_DW_USE_PPGTT (0<<2)
+#define MI_ATOMIC(len) MI_INSTR(0x2F, (len-2))
+#define MI_ATOMIC_MEMORY_TYPE_GGTT (1<<22)
+#define MI_ATOMIC_INLINE_DATA (1<<18)
+#define MI_ATOMIC_CS_STALL (1<<17)
+#define MI_ATOMIC_RETURN_DATA_CTL (1<<16)
+#define MI_ATOMIC_OP_MASK(op) ((op) << 8)
+#define MI_ATOMIC_AND MI_ATOMIC_OP_MASK(0x01)
+#define MI_ATOMIC_OR MI_ATOMIC_OP_MASK(0x02)
+#define MI_ATOMIC_XOR MI_ATOMIC_OP_MASK(0x03)
+#define MI_ATOMIC_MOVE MI_ATOMIC_OP_MASK(0x04)
+#define MI_ATOMIC_INC MI_ATOMIC_OP_MASK(0x05)
+#define MI_ATOMIC_DEC MI_ATOMIC_OP_MASK(0x06)
+#define MI_ATOMIC_ADD MI_ATOMIC_OP_MASK(0x07)
+#define MI_ATOMIC_SUB MI_ATOMIC_OP_MASK(0x08)
+#define MI_ATOMIC_RSUB MI_ATOMIC_OP_MASK(0x09)
+#define MI_ATOMIC_IMAX MI_ATOMIC_OP_MASK(0x0A)
+#define MI_ATOMIC_IMIN MI_ATOMIC_OP_MASK(0x0B)
+#define MI_ATOMIC_UMAX MI_ATOMIC_OP_MASK(0x0C)
+#define MI_ATOMIC_UMIN MI_ATOMIC_OP_MASK(0x0D)
+
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
#define MI_BATCH_NON_SECURE (1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -453,6 +473,10 @@
#define MI_REPORT_PERF_COUNT_GGTT (1<<0)
#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 0)
#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0)
+#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2)
+#define MI_LRM_USE_GLOBAL_GTT (1<<22)
+#define MI_LRM_ASYNC_MODE_ENABLE (1<<21)
+#define MI_LOAD_REGISTER_REG_GEN8 MI_INSTR(0x2A, 1)
#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0)
#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0)
#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0)
@@ -1799,6 +1823,8 @@ enum skl_disp_power_wells {
#define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12)
#define GEN8_FF_DOP_CLOCK_GATE_DISABLE (1<<10)
+#define GEN8_RS_PREEMPT_STATUS 0x215C
+
/* Fuse readout registers for GT */
#define CHV_FUSE_GT (VLV_DISPLAY_BASE + 0x2168)
#define CHV_FGT_DISABLE_SS0 (1 << 10)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index bca137e..61b1e22 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1155,6 +1155,13 @@ static int gen8_init_perctx_bb(struct intel_engine_cs *ring)
int end;
struct page *page;
uint32_t *reg_state;
+ u32 scratch_addr;
+ unsigned long flags = 0;
+
+ if (ring->scratch.obj == NULL) {
+ DRM_ERROR("scratch page not allocated for %s\n", ring->name);
+ return -EINVAL;
+ }
page = i915_gem_object_get_page(ring->ctx_wa.obj, 0);
reg_state = kmap_atomic(page);
@@ -1169,9 +1176,61 @@ static int gen8_init_perctx_bb(struct intel_engine_cs *ring)
return -EINVAL;
}
+ /* Actual scratch location is at 128 bytes offset */
+ scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
+ scratch_addr |= PIPE_CONTROL_GLOBAL_GTT;
+
/* WaDisableCtxRestoreArbitration:bdw,chv */
reg_state[index++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ /*
+ * As per Bspec, to workaround a known HW issue, SW must perform the
+ * below programming sequence prior to programming MI_BATCH_BUFFER_END.
+ *
+ * This is only applicable for Gen8.
+ */
+
+ /* WaRsRestoreWithPerCtxtBb:bdw,chv */
+ reg_state[index++] = MI_LOAD_REGISTER_IMM(1);
+ reg_state[index++] = INSTPM;
+ reg_state[index++] = _MASKED_BIT_DISABLE(INSTPM_FORCE_ORDERING);
+
+ flags = MI_ATOMIC_MEMORY_TYPE_GGTT |
+ MI_ATOMIC_INLINE_DATA |
+ MI_ATOMIC_CS_STALL |
+ MI_ATOMIC_RETURN_DATA_CTL |
+ MI_ATOMIC_MOVE;
+
+ reg_state[index++] = MI_ATOMIC(5) | flags;
+ reg_state[index++] = scratch_addr;
+ reg_state[index++] = 0;
+ reg_state[index++] = _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING);
+ reg_state[index++] = _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING);
+
+ /*
+ * BSpec says MI_LOAD_REGISTER_MEM, MI_LOAD_REGISTER_REG and
+ * MI_BATCH_BUFFER_END instructions in this sequence need to be
+ * in the same cacheline.
+ */
+ while (((unsigned long) (reg_state + index) % CACHELINE_BYTES) != 0)
+ reg_state[index++] = MI_NOOP;
+
+ reg_state[index++] = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_LRM_USE_GLOBAL_GTT |
+ MI_LRM_ASYNC_MODE_ENABLE;
+ reg_state[index++] = INSTPM;
+ reg_state[index++] = scratch_addr;
+ reg_state[index++] = 0;
+
+ /*
+ * BSpec says there should not be any commands programmed
+ * between MI_LOAD_REGISTER_REG and MI_BATCH_BUFFER_END so
+ * do not add any new commands
+ */
+ reg_state[index++] = MI_LOAD_REGISTER_REG_GEN8;
+ reg_state[index++] = GEN8_RS_PREEMPT_STATUS;
+ reg_state[index++] = GEN8_RS_PREEMPT_STATUS;
+
/* padding */
while (index < end)
reg_state[index++] = MI_NOOP;
--
2.3.0
More information about the Intel-gfx
mailing list