[Intel-gfx] [PATCH] drm/i915: HSW GT3 Slices: exec flag to warn kernel that userspace is using predication
Rodrigo Vivi
rodrigo.vivi at gmail.com
Fri Nov 1 00:07:09 CET 2013
If Userspace isn't using MI_PREDICATE all slices must be enabled for
backward compatibility.
If I915_EXEC_USE_PREDICATE isn't set and defaul is set to half, kernel will force
all slices on.
v2: fix the inverted logic for backwards compatibility
USE_PREDICATE unset force gt_full when defaul is half
instead of GT_FULL flag.
v3: Accepting Chris's suggestions: better variable names;
better logic around state_default x legacy_userspace_busy;
remove unecessary mutex;
v4: Accepting more suggestions from Chris:
* Send all LRIs in only one block and don't ignore if it fails.
* function name and cleaner code on forcing_full.
v5: fix mutex_lock use by Chris.
CC: Chris Wilson <chris at chris-wilson.co.uk>
CC: Eric Anholt <eric at anholt.net>
CC: Kenneth Graunke <kenneth at whitecape.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at gmail.com>
---
drivers/gpu/drm/i915/i915_drv.h | 8 ++++
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 64 ++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_reg.h | 11 +++++
drivers/gpu/drm/i915/i915_sysfs.c | 7 ++++
drivers/gpu/drm/i915/intel_display.c | 17 ++++++++
drivers/gpu/drm/i915/intel_drv.h | 1 +
drivers/gpu/drm/i915/intel_pm.c | 41 ++++++++++++++++++-
include/uapi/drm/i915_drm.h | 8 +++-
8 files changed, 154 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 685fb1d..67bbbce 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1219,6 +1219,12 @@ struct i915_package_c8 {
} regsave;
};
+struct i915_gt_slices {
+ int state_default;
+ int legacy_userspace_busy;
+ struct mutex lock; /* locks access to this scruct and slice registers */
+};
+
typedef struct drm_i915_private {
struct drm_device *dev;
struct kmem_cache *slab;
@@ -1418,6 +1424,8 @@ typedef struct drm_i915_private {
struct i915_package_c8 pc8;
+ struct i915_gt_slices gt_slices;
+
/* Old dri1 support infrastructure, beware the dragons ya fools entering
* here! */
struct i915_dri1_state dri1;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0ce0d47..3ada5b4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -922,6 +922,56 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
return 0;
}
+static int gt_legacy_userspace_busy(struct intel_ring_buffer *ring)
+{
+ int ret;
+
+ ret = intel_ring_begin(ring, 18);
+ if (ret)
+ return ret;
+
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, HSW_GT_SLICE_INFO);
+ intel_ring_emit(ring, SLICE_SEL_BOTH);
+
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, MI_PREDICATE_RESULT_2);
+ intel_ring_emit(ring, LOWER_SLICE_ENABLED);
+
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, HSW_SLICESHUTDOWN);
+ intel_ring_emit(ring, ~SLICE_SHUTDOWN);
+
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
+ intel_ring_emit(ring, CS_IDLE_COUNT_1US);
+
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, WAIT_FOR_RC6_EXIT);
+ intel_ring_emit(ring, _MASKED_BIT_ENABLE(WAIT_RC6_EXIT));
+
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
+ intel_ring_emit(ring, CS_IDLE_COUNT_5US);
+
+ intel_ring_advance(ring);
+ return 0;
+}
+
+static bool gt_legacy_userspace(struct intel_ring_buffer *ring,
+ struct drm_i915_gem_execbuffer2 *args)
+{
+ drm_i915_private_t *dev_priv = ring->dev->dev_private;
+
+ if (ring->id == BCS)
+ return false;
+
+ if (!HAS_SLICE_SHUTDOWN(ring->dev))
+ return false;
+
+ return (args->flags & I915_EXEC_USE_PREDICATE) == 0;
+}
+
static int
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file,
@@ -935,6 +985,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_clip_rect *cliprects = NULL;
struct intel_ring_buffer *ring;
struct i915_ctx_hang_stats *hs;
+ struct i915_gt_slices *gt_slices = &dev_priv->gt_slices;
u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 exec_start, exec_len;
u32 mask, flags;
@@ -999,6 +1050,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
return -EINVAL;
}
+ if (gt_legacy_userspace(ring, args)) {
+ mutex_lock(>_slices->lock);
+ if (gt_slices->state_default == 0 &&
+ !gt_slices->legacy_userspace_busy) {
+ ret = gt_legacy_userspace_busy(ring);
+ if (ret == 0)
+ gt_slices->legacy_userspace_busy = true;
+ }
+ mutex_unlock(>_slices->lock);
+ if (ret)
+ return ret;
+ }
+
mode = args->flags & I915_EXEC_CONSTANTS_MASK;
mask = I915_EXEC_CONSTANTS_MASK;
switch (mode) {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 497c441..0146bef 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -277,6 +277,17 @@
#define SLICE_STATUS_MAIN_ON (2<<0)
#define SLICE_STATUS_BOTH_ON (3<<0)
+#define HSW_SLICESHUTDOWN 0xA190
+#define SLICE_SHUTDOWN (1<<0)
+
+#define RC_IDLE_MAX_COUNT 0x2054
+#define CS_IDLE_COUNT_1US (1<<1)
+#define CS_IDLE_COUNT_5US (1<<3)
+
+#define WAIT_FOR_RC6_EXIT 0x20CC
+#define WAIT_RC6_EXIT (1<<0)
+#define MASK_WAIT_RC6_EXIT (1<<16)
+
/*
* 3D instructions used by the kernel
*/
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 86ccd52..a821499 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -135,16 +135,23 @@ static ssize_t gt_slice_config_store(struct device *kdev,
{
struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
struct drm_device *dev = minor->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
if (!strncmp(buf, "full", sizeof("full") - 1)) {
ret = intel_set_gt_full(dev);
if (ret)
return ret;
+ mutex_lock(&dev_priv->gt_slices.lock);
+ dev_priv->gt_slices.state_default = 1;
+ mutex_unlock(&dev_priv->gt_slices.lock);
} else if (!strncmp(buf, "half", sizeof("half") - 1)) {
ret = intel_set_gt_half(dev);
if (ret)
return ret;
+ mutex_lock(&dev_priv->gt_slices.lock);
+ dev_priv->gt_slices.state_default = 0;
+ mutex_unlock(&dev_priv->gt_slices.lock);
} else
return -EINVAL;
return count;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4f1b636..eec4c0e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7759,6 +7759,20 @@ void intel_mark_busy(struct drm_device *dev)
i915_update_gfx_val(dev_priv);
}
+static bool intel_need_shutdown_slices(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ mutex_lock(&dev_priv->gt_slices.lock);
+ if (dev_priv->gt_slices.legacy_userspace_busy) {
+ dev_priv->gt_slices.legacy_userspace_busy = false;
+ mutex_unlock(&dev_priv->gt_slices.lock);
+ return true;
+ }
+ mutex_unlock(&dev_priv->gt_slices.lock);
+ return false;
+}
+
void intel_mark_idle(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -7778,6 +7792,9 @@ void intel_mark_idle(struct drm_device *dev)
if (dev_priv->info->gen >= 6)
gen6_rps_idle(dev->dev_private);
+
+ if (intel_need_shutdown_slices(dev))
+ intel_set_gt_half_async(dev);
}
void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a9abbb5..98cd63e 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -836,6 +836,7 @@ void intel_disable_gt_powersave(struct drm_device *dev);
void ironlake_teardown_rc6(struct drm_device *dev);
int intel_set_gt_full(struct drm_device *dev);
int intel_set_gt_half(struct drm_device *dev);
+void intel_set_gt_half_async(struct drm_device *dev);
void intel_init_gt_slices(struct drm_device *dev);
void gen6_update_ring_freq(struct drm_device *dev);
void gen6_rps_idle(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 63af075..b3bd70f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3873,6 +3873,7 @@ int intel_set_gt_full(struct drm_device *dev)
if (!HAS_SLICE_SHUTDOWN(dev))
return -ENODEV;
+ mutex_lock(&dev_priv->gt_slices.lock);
I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH);
/* Slices are enabled on RC6 exit */
@@ -3881,13 +3882,18 @@ int intel_set_gt_full(struct drm_device *dev)
if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) ==
SLICE_STATUS_BOTH_ON), 2000)) {
DRM_ERROR("Timeout enabling full gt slices\n");
+
I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+
gen6_gt_force_wake_put(dev_priv);
+ mutex_unlock(&dev_priv->gt_slices.lock);
return -ETIMEDOUT;
}
+
I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED);
gen6_gt_force_wake_put(dev_priv);
+ mutex_unlock(&dev_priv->gt_slices.lock);
return 0;
}
@@ -3899,6 +3905,7 @@ int intel_set_gt_half(struct drm_device *dev)
if (!HAS_SLICE_SHUTDOWN(dev))
return -ENODEV;
+ mutex_lock(&dev_priv->gt_slices.lock);
I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
/* Slices are disabled on RC6 exit */
@@ -3907,16 +3914,42 @@ int intel_set_gt_half(struct drm_device *dev)
if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) ==
SLICE_STATUS_MAIN_ON), 2000)) {
DRM_ERROR("Timed out disabling half gt slices\n");
+
I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH);
I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED);
+
gen6_gt_force_wake_put(dev_priv);
+ mutex_unlock(&dev_priv->gt_slices.lock);
return -ETIMEDOUT;
}
+
I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
gen6_gt_force_wake_put(dev_priv);
+
+ mutex_unlock(&dev_priv->gt_slices.lock);
return 0;
}
+/**
+ * On Haswell, slices on/off transitions are done via RC6 sequence.
+ * This async function allows you to request slices shutdown without waiting.
+ * Slices will be disabled on next RC6 exit.
+ */
+void intel_set_gt_half_async(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ if (!HAS_SLICE_SHUTDOWN(dev))
+ return;
+
+ mutex_lock(&dev_priv->gt_slices.lock);
+ if (dev_priv->gt_slices.state_default == 0) {
+ I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
+ I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+ }
+ mutex_unlock(&dev_priv->gt_slices.lock);
+}
+
void intel_init_gt_slices(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3927,9 +3960,13 @@ void intel_init_gt_slices(struct drm_device *dev)
if (!HAS_SLICE_SHUTDOWN(dev))
return;
+ dev_priv->gt_slices.state_default = 1;
+ dev_priv->gt_slices.legacy_userspace_busy = false;
+ mutex_init(&dev_priv->gt_slices.lock);
+
if (!i915_gt_slice_config) {
- I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
- I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+ dev_priv->gt_slices.state_default = 0;
+ intel_set_gt_half_async(dev);
}
}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3a4e97b..3fa3e24 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -731,7 +731,13 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_HANDLE_LUT (1<<12)
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
+/* If this flag is set userspace is using predicate and half slices can be
+ * let disabled for power saving. Otherwise use all slices even when disabled
+ * by boot parameter or via sysfs interface
+ */
+#define I915_EXEC_USE_PREDICATE (1<<13)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_USE_PREDICATE<<1)
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
--
1.7.11.7
More information about the Intel-gfx
mailing list