[Intel-gfx] [PATCH 3/4] drm/i915: HSW GT3 Slices: exec flag to warn kernel that userspace is using predication

Rodrigo Vivi rodrigo.vivi at gmail.com
Tue Nov 5 23:44:15 CET 2013


If Userspace isn't using MI_PREDICATE all slices must be enabled for
backward compatibility.

If I915_EXEC_USE_PREDICATE isn't set and defaul is set to half, kernel will force
all slices on.

v2: fix the inverted logic for backwards compatibility
    USE_PREDICATE unset force gt_full when defaul is half
    instead of GT_FULL flag.

v3: Accepting Chris's suggestions: better variable names;
    better logic around state_default x legacy_userspace_busy;
    remove unecessary mutex;

v4: Accepting more suggestions from Chris:
    * Send all LRIs in only one block and don't ignore if it fails.
    * function name and cleaner code on forcing_full.

v5: fix mutex_lock use by Chris.

v6: change state machine logic to fix locks and use number of slices on (1,2) instead of 0
    for half and 1 for full.
    Init gt slices out of init_hw to avoid reseting values during execution.

CC: Chris Wilson <chris at chris-wilson.co.uk>
CC: Eric Anholt <eric at anholt.net>
CC: Kenneth Graunke <kenneth at whitecape.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at gmail.com>
---
 drivers/gpu/drm/i915/i915_drv.h            | 14 ++++++
 drivers/gpu/drm/i915/i915_gem.c            |  4 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 68 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h            | 11 +++++
 drivers/gpu/drm/i915/intel_display.c       |  7 +++
 drivers/gpu/drm/i915/intel_drv.h           |  1 +
 drivers/gpu/drm/i915/intel_pm.c            | 63 ++++++++++++++++++++++++---
 include/uapi/drm/i915_drm.h                |  8 +++-
 8 files changed, 167 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5bd8d6f..74284e6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1272,6 +1272,18 @@ struct intel_pipe_crc {
 	wait_queue_head_t wq;
 };
 
+enum gt_slices_state {
+	LEGACY_BUSY = 0,
+	ONE_SLICE,
+	TWO_SLICES,
+};
+
+struct i915_gt_slices {
+	u32 predicate_result_2; /* avoid reads and minimize mutex on execbuf */
+	enum gt_slices_state state;
+	struct mutex lock; /* locks access to slices state and registers */
+};
+
 typedef struct drm_i915_private {
 	struct drm_device *dev;
 	struct kmem_cache *slab;
@@ -1474,6 +1486,8 @@ typedef struct drm_i915_private {
 
 	struct i915_package_c8 pc8;
 
+	struct i915_gt_slices gt_slices;
+
 	/* Old dri1 support infrastructure, beware the dragons ya fools entering
 	 * here! */
 	struct i915_dri1_state dri1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f9c32d1..cfb0687 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4439,8 +4439,6 @@ i915_gem_init_hw(struct drm_device *dev)
 	if (dev_priv->ellc_size)
 		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
 
-	intel_init_gt_slices(dev);
-
 	if (HAS_PCH_NOP(dev)) {
 		u32 temp = I915_READ(GEN7_MSG_CTL);
 		temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
@@ -4495,6 +4493,8 @@ int i915_gem_init(struct drm_device *dev)
 		return ret;
 	}
 
+	intel_init_gt_slices(dev);
+
 	/* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		dev_priv->dri1.allow_batchbuffer = 1;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0ce0d47..4911690 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -922,6 +922,62 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
 	return 0;
 }
 
+static int gt_legacy_userspace_busy(struct intel_ring_buffer *ring)
+{
+	drm_i915_private_t *dev_priv = ring->dev->dev_private;
+	int ret;
+
+	ret = intel_ring_begin(ring, 18);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, HSW_GT_SLICE_INFO);
+	intel_ring_emit(ring, SLICE_SEL_BOTH);
+
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, MI_PREDICATE_RESULT_2);
+	intel_ring_emit(ring, LOWER_SLICE_ENABLED);
+	dev_priv->gt_slices.predicate_result_2 = LOWER_SLICE_ENABLED;
+
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, HSW_SLICESHUTDOWN);
+	intel_ring_emit(ring, ~SLICE_SHUTDOWN);
+
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
+	intel_ring_emit(ring, CS_IDLE_COUNT_1US);
+
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, WAIT_FOR_RC6_EXIT);
+	intel_ring_emit(ring, _MASKED_BIT_ENABLE(WAIT_RC6_EXIT));
+
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
+	intel_ring_emit(ring, CS_IDLE_COUNT_5US);
+
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
+static bool gt_legacy_userspace(struct intel_ring_buffer *ring,
+				struct drm_i915_gem_execbuffer2 *args)
+{
+	drm_i915_private_t *dev_priv = ring->dev->dev_private;
+
+	if (ring->id == BCS)
+		return false;
+
+	if (!HAS_SLICE_SHUTDOWN(ring->dev))
+		return false;
+
+	if (dev_priv->gt_slices.predicate_result_2 == LOWER_SLICE_ENABLED)
+		return false;
+
+	return (args->flags & I915_EXEC_USE_PREDICATE) == 0;
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		       struct drm_file *file,
@@ -999,6 +1055,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	if (gt_legacy_userspace(ring, args)) {
+		mutex_lock(&dev_priv->gt_slices.lock);
+		if (dev_priv->gt_slices.state == ONE_SLICE) {
+			ret = gt_legacy_userspace_busy(ring);
+			if (ret == 0)
+				dev_priv->gt_slices.state = LEGACY_BUSY;
+		}
+		mutex_unlock(&dev_priv->gt_slices.lock);
+		if (ret)
+			return ret;
+	}
+
 	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
 	mask = I915_EXEC_CONSTANTS_MASK;
 	switch (mode) {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index a2e7deb..4617880 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -278,6 +278,17 @@
 #define   SLICE_STATUS_MAIN_ON	(2<<0)
 #define   SLICE_STATUS_BOTH_ON	(3<<0)
 
+#define HSW_SLICESHUTDOWN	0xA190
+#define   SLICE_SHUTDOWN	(1<<0)
+
+#define RC_IDLE_MAX_COUNT	0x2054
+#define   CS_IDLE_COUNT_1US	(1<<1)
+#define   CS_IDLE_COUNT_5US	(1<<3)
+
+#define WAIT_FOR_RC6_EXIT	0x20CC
+#define   WAIT_RC6_EXIT		(1<<0)
+#define   MASK_WAIT_RC6_EXIT	(1<<16)
+
 /*
  * 3D instructions used by the kernel
  */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f34252d..55f2a6e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7878,6 +7878,13 @@ void intel_mark_idle(struct drm_device *dev)
 
 	if (dev_priv->info->gen >= 6)
 		gen6_rps_idle(dev->dev_private);
+
+	mutex_lock(&dev_priv->gt_slices.lock);
+	if (dev_priv->gt_slices.state == LEGACY_BUSY) {
+		dev_priv->gt_slices.state = ONE_SLICE;
+		intel_set_gt_half_async(dev);
+	}
+	mutex_unlock(&dev_priv->gt_slices.lock);
 }
 
 void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index cf37741..deb9464 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -837,6 +837,7 @@ void intel_enable_gt_powersave(struct drm_device *dev);
 void intel_disable_gt_powersave(struct drm_device *dev);
 void ironlake_teardown_rc6(struct drm_device *dev);
 int intel_set_gt_slices(struct drm_device *dev, int slices);
+void intel_set_gt_half_async(struct drm_device *dev);
 void intel_init_gt_slices(struct drm_device *dev);
 void gen6_update_ring_freq(struct drm_device *dev);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 40ab76a..643ab05 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3877,18 +3877,22 @@ static int intel_set_gt_full(struct drm_device *dev)
 
 	/* Slices are enabled on RC6 exit */
 	gen6_gt_force_wake_get(dev_priv);
-
 	if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) ==
 		      SLICE_STATUS_BOTH_ON), 2000)) {
 		DRM_ERROR("Timeout enabling full gt slices\n");
+
 		I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
 		I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+		dev_priv->gt_slices.predicate_result_2 = LOWER_SLICE_DISABLED;
+
 		gen6_gt_force_wake_put(dev_priv);
 		return -ETIMEDOUT;
 	}
+
 	I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED);
-	gen6_gt_force_wake_put(dev_priv);
+	dev_priv->gt_slices.predicate_result_2 = LOWER_SLICE_ENABLED;
 
+	gen6_gt_force_wake_put(dev_priv);
 	return 0;
 }
 
@@ -3904,28 +3908,71 @@ static int intel_set_gt_half(struct drm_device *dev)
 	if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) ==
 		      SLICE_STATUS_MAIN_ON), 2000)) {
 		DRM_ERROR("Timed out disabling half gt slices\n");
+
 		I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH);
 		I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED);
+		dev_priv->gt_slices.predicate_result_2 = LOWER_SLICE_ENABLED;
+
 		gen6_gt_force_wake_put(dev_priv);
 		return -ETIMEDOUT;
 	}
+
 	I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+	dev_priv->gt_slices.predicate_result_2 = LOWER_SLICE_DISABLED;
+
 	gen6_gt_force_wake_put(dev_priv);
 	return 0;
 }
 
 int intel_set_gt_slices(struct drm_device *dev, int slices)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret = 0;
+
 	if (!HAS_SLICE_SHUTDOWN(dev))
 		return -ENODEV;
 
 	switch (slices) {
-	case 1: return intel_set_gt_half(dev);
-	case 2: return intel_set_gt_full(dev);
+	case 1:
+		mutex_lock(&dev_priv->gt_slices.lock);
+		if (dev_priv->gt_slices.state == TWO_SLICES) {
+			ret = intel_set_gt_half(dev);
+			if (ret == 0)
+				dev_priv->gt_slices.state = ONE_SLICE;
+		}
+		mutex_unlock(&dev_priv->gt_slices.lock);
+		return ret;
+	case 2:
+		mutex_lock(&dev_priv->gt_slices.lock);
+		if (dev_priv->gt_slices.state == ONE_SLICE) {
+			ret = intel_set_gt_full(dev);
+			if (ret == 0)
+				dev_priv->gt_slices.state = TWO_SLICES;
+		} else if (dev_priv->gt_slices.state == LEGACY_BUSY)
+			dev_priv->gt_slices.state = TWO_SLICES;
+		mutex_unlock(&dev_priv->gt_slices.lock);
+		return ret;
 	default: return -EINVAL;
 	}
 }
 
+/**
+ * On Haswell, slices on/off transitions are done via RC6 sequence.
+ * This async function allows you to request slices shutdown without waiting.
+ * Slices will be disabled on next RC6 exit.
+ */
+void intel_set_gt_half_async(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!HAS_SLICE_SHUTDOWN(dev))
+		return;
+
+	I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
+	I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+	dev_priv->gt_slices.predicate_result_2 = LOWER_SLICE_DISABLED;
+}
+
 void intel_init_gt_slices(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3936,9 +3983,13 @@ void intel_init_gt_slices(struct drm_device *dev)
 	if (!HAS_SLICE_SHUTDOWN(dev))
 		return;
 
+	dev_priv->gt_slices.state = TWO_SLICES;
+	dev_priv->gt_slices.predicate_result_2 = LOWER_SLICE_ENABLED;
+	mutex_init(&dev_priv->gt_slices.lock);
+
 	if (i915_gt_slices == 1) {
-		I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
-		I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+		intel_set_gt_half_async(dev);
+		dev_priv->gt_slices.state = ONE_SLICE;
 		return;
 	}
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3a4e97b..3fa3e24 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -731,7 +731,13 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_HANDLE_LUT		(1<<12)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
+/* If this flag is set userspace is using predicate and half slices can be
+ * let disabled for power saving. Otherwise use all slices even when disabled
+ * by boot parameter or via sysfs interface
+ */
+#define I915_EXEC_USE_PREDICATE		(1<<13)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_USE_PREDICATE<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.7.11.7




More information about the Intel-gfx mailing list