[Intel-gfx] [PATCH] [v2] drm/i915: Exec flag to force non IA-Coherent cache for Gen9+

Artur Harasimiuk artur.harasimiuk at intel.com
Fri Jan 15 00:00:41 PST 2016


Starting from Gen9 we can use IA-Coherent caches. Generally, coherency
can be programmed using RENDER_SURFACE_STATE or BTI 255, depending if
surface state model or stateless model is used. It is important to control
whether IA or GPU cache coherency should be used, especially for non-LLC
devices. However this control is complicated when stateless memory access
model is in action. It would require dedicated ISA code depending on
coherency requirement.

By setting HDC_FORCE_NON_COHERENT we *Force* data port to ignore these
attributes and all caches are GPU-Coherent. This register is part of HW
context, however it is private and cannot be programmed from
non-privileged batch buffer.

Default operation mode is as programmed by workaround. When
WaForceEnableNonCoherent is in place caches are GPU-Coherent and we
should not change it back to IA-Coherent because this can lead to GPU
hangs (as workaround description says).

A new device parameter is to inform user space about kernel capability.
It tells if can request to disable IA-Coherency.

Exec flag is to allow UMD to decide whether IA-Coherency is not needed
for submitted batch buffer. Exec flag behavior:
    1. flag is not set - use system default
    2. flag is set but WaForceEnableNonCoherent is
       a) not programmed - *Force* GPU-Coherent cache by setting
          HDC_FORCE_NON_COHERENT prior to bb_start and clearing after
       b) programmed - do nothing, GPU-Coherent is already in place

v2: Ringbufer handling fixes (Chris)
    Moved workarounds to common place (Chris)
    Removed flag cleanup (Dave)
    Updated commit message to reflect comments (Chris,Dave)

Signed-off-by: Artur Harasimiuk <artur.harasimiuk at intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c            |  4 ++++
 drivers/gpu/drm/i915/i915_drv.h            |  4 ++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 ++++
 drivers/gpu/drm/i915/intel_lrc.c           | 38 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 ++
 include/uapi/drm/i915_drm.h                |  8 ++++++-
 6 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 44a896c..f735e56 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -172,6 +172,10 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_EXEC_FORCE_NON_COHERENT:
+		value = !dev_priv->workarounds.WaForceEnableNonCoherent &&
+			INTEL_INFO(dev)->gen >= 9;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 104bd18..793be854 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1658,6 +1658,10 @@ struct i915_wa_reg {
 struct i915_workarounds {
 	struct i915_wa_reg reg[I915_MAX_WA_REGS];
 	u32 count;
+
+	struct {
+		unsigned int WaForceEnableNonCoherent:1;
+	};
 };
 
 struct i915_virtual_gpu {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d469c47..5db3806 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1400,6 +1400,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (!i915_gem_check_execbuffer(args))
 		return -EINVAL;
 
+	if ((args->flags & I915_EXEC_FORCE_NON_COHERENT) &&
+		INTEL_INFO(dev)->gen < 9)
+		return -EINVAL;
+
 	ret = validate_exec_list(dev, exec, args->buffer_count);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ab344e0..f37d12f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -879,6 +879,36 @@ int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request)
 	return intel_logical_ring_begin(request, 0);
 }
 
+static inline int
+intel_lr_emit_force_non_coherent(struct i915_execbuffer_params *params,
+		struct drm_i915_gem_execbuffer2 *args, bool force)
+{
+	struct drm_device       *dev = params->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	if (dev_priv->workarounds.WaForceEnableNonCoherent)
+		return 0;
+
+	if (args->flags & I915_EXEC_FORCE_NON_COHERENT) {
+		struct intel_ringbuffer *ringbuf = params->request->ringbuf;
+
+		ret = intel_logical_ring_begin(params->request, 4);
+		if (ret)
+			return ret;
+
+		intel_logical_ring_emit(ringbuf, MI_NOOP);
+		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
+		intel_logical_ring_emit(ringbuf, HDC_CHICKEN0.reg);
+		intel_logical_ring_emit(ringbuf, force ?
+				_MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT) :
+				_MASKED_BIT_DISABLE(HDC_FORCE_NON_COHERENT));
+		intel_logical_ring_advance(ringbuf);
+	}
+
+	return 0;
+}
+
 /**
  * execlists_submission() - submit a batchbuffer for execution, Execlists style
  * @dev: DRM device.
@@ -959,6 +989,10 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
 		dev_priv->relative_constants_mode = instp_mode;
 	}
 
+	ret = intel_lr_emit_force_non_coherent(params, args, true);
+	if (ret)
+		return ret;
+
 	exec_start = params->batch_obj_vm_offset +
 		     args->batch_start_offset;
 
@@ -966,6 +1000,10 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
 	if (ret)
 		return ret;
 
+	ret = intel_lr_emit_force_non_coherent(params, args, false);
+	if (ret)
+		return ret;
+
 	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
 
 	i915_gem_execbuffer_move_to_active(vmas, params->request);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 4060acf..456421e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -806,6 +806,7 @@ static int gen8_init_workarounds(struct intel_engine_cs *ring)
 	 * invalidation occurs during a PSD flush.
 	 */
 	/* WaForceEnableNonCoherent:bdw,chv */
+	dev_priv->workarounds.WaForceEnableNonCoherent = 1;
 	/* WaHdcDisableFetchWhenMasked:bdw,chv */
 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
 			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
@@ -1049,6 +1050,7 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)
 		 * a TLB invalidation occurs during a PSD flush.
 		 */
 		/* WaForceEnableNonCoherent:skl */
+		dev_priv->workarounds.WaForceEnableNonCoherent = 1;
 		WA_SET_BIT_MASKED(HDC_CHICKEN0,
 				  HDC_FORCE_NON_COHERENT);
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index acf2102..c425e80 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -357,6 +357,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_GPU_RESET	 35
 #define I915_PARAM_HAS_RESOURCE_STREAMER 36
 #define I915_PARAM_HAS_EXEC_SOFTPIN	 37
+#define I915_PARAM_HAS_EXEC_FORCE_NON_COHERENT 38
 
 typedef struct drm_i915_getparam {
 	__s32 param;
@@ -782,7 +783,12 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_RESOURCE_STREAMER     (1<<15)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
+/**
+ * Tell the kernel that the batch buffer requires to disable IA-Coherency
+ */
+#define I915_EXEC_FORCE_NON_COHERENT    (1<<16)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_FORCE_NON_COHERENT<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.9.1



More information about the Intel-gfx mailing list