[PATCH 81/87] drm/i915: "Race-to-idle" on switching to the kernel context

Chris Wilson chris at chris-wilson.co.uk
Sat Mar 24 11:41:57 UTC 2018


During suspend we want to flush out all active contexts and their
rendering. To do so we queue a request from the kernel's context, once
we know that request is done, we know the GPU is completely idle. To
speed up that switch bump the GPU clocks.

Switching to the kernel context prior to idling is also used to enforce
a barrier before changing OA properties, and when evicting active
rendering from the global GTT. All cases where we do want to
race-to-idle.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: David Weinehall <david.weinehall at linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala at intel.com>
Tested-by: David Weinehall <david.weinehall at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  2 +
 drivers/gpu/drm/i915/i915_gem.c         |  1 +
 drivers/gpu/drm/i915/i915_gem_context.c | 82 +++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_gem_context.h |  5 ++
 drivers/gpu/drm/i915/intel_lrc.c        |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  2 +-
 6 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 465f57082468..a6f8e536cdb4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1829,6 +1829,8 @@ struct drm_i915_private {
 		struct llist_head free_list;
 		struct work_struct free_work;
 
+		struct list_head freed_objects;
+
 		/* The hw wants to have a stable context identifier for the
 		 * lifetime of the context (for OA, PASID, faults, etc).
 		 * This is limited in execlists to 21 bits.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3c203310357f..6e569efd6535 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3485,6 +3485,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 
 	intel_engines_park(dev_priv);
 	i915_timelines_park(dev_priv);
+	i915_gem_contexts_park(dev_priv);
 
 	i915_pmu_gt_parked(dev_priv);
 	/* Discard all currently unused caching of obj->mm.pages */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 7d0fa0199586..a56430bb3b5d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -95,6 +95,51 @@
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
+struct drm_i915_gem_object *
+i915_gem_context_create_object(struct drm_i915_private *i915,
+			       unsigned long size)
+{
+	struct drm_i915_gem_object *obj, *on;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	list_for_each_entry_safe(obj, on,
+				 &i915->contexts.freed_objects,
+				 batch_pool_link) {
+		if (IS_ERR(obj->mm.pages)) {
+			list_del(&obj->batch_pool_link);
+			GEM_BUG_ON(i915_gem_object_is_active(obj));
+			i915_gem_object_put(obj);
+			continue;
+		}
+
+		if (obj->base.size == size) {
+			list_del(&obj->batch_pool_link);
+			GEM_BUG_ON(obj->mm.madv != I915_MADV_DONTNEED);
+			obj->mm.madv = I915_MADV_WILLNEED;
+			return obj;
+		}
+	}
+
+	return i915_gem_object_create(i915, size);
+}
+
+void i915_gem_contexts_park(struct drm_i915_private *i915)
+{
+	struct drm_i915_gem_object *obj, *on;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	list_for_each_entry_safe(obj, on,
+				 &i915->contexts.freed_objects,
+				 batch_pool_link) {
+		GEM_BUG_ON(i915_gem_object_is_active(obj));
+		i915_gem_object_put(obj);
+	}
+
+	INIT_LIST_HEAD(&i915->contexts.freed_objects);
+}
+
 static void lut_close(struct i915_gem_context *ctx)
 {
 	struct i915_lut_handle *lut, *ln;
@@ -120,9 +165,10 @@ static void lut_close(struct i915_gem_context *ctx)
 
 static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
+	struct drm_i915_private *i915 = ctx->i915;
 	int i;
 
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
+	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
 	i915_ppgtt_put(ctx->ppgtt);
@@ -130,14 +176,23 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
 		struct intel_context *ce = &ctx->engine[i];
 
-		if (!ce->state)
-			continue;
+		GEM_BUG_ON(ce->pin_count);
 
-		WARN_ON(ce->pin_count);
 		if (ce->ring)
 			intel_ring_free(ce->ring);
 
-		__i915_gem_object_release_unless_active(ce->state->obj);
+		/* Keep the objects around for quick reuse */
+		if (ce->state) {
+			struct drm_i915_gem_object *obj = ce->state->obj;
+
+			if (!refcount_dec_not_one(&obj->base.refcount.refcount)) {
+				GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED);
+				obj->mm.madv = I915_MADV_DONTNEED;
+
+				list_add(&obj->batch_pool_link,
+						&i915->contexts.freed_objects);
+			}
+		}
 	}
 
 	if (ctx->timeline)
@@ -149,7 +204,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	list_del(&ctx->link);
 
 	if (!list_empty(&ctx->hw_id_link)) {
-		ida_simple_remove(&ctx->i915->contexts.hw_ida, ctx->hw_id);
+		ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
 		list_del(&ctx->hw_id_link);
 	}
 
@@ -469,6 +524,8 @@ static void init_contexts(struct drm_i915_private *i915)
 
 	INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
 	init_llist_head(&i915->contexts.free_list);
+
+	INIT_LIST_HEAD(&i915->contexts.freed_objects);
 }
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
@@ -541,6 +598,9 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915)
 		destroy_kernel_context(&i915->preempt_context);
 	destroy_kernel_context(&i915->kernel_context);
 
+	i915_gem_contexts_park(i915);
+	GEM_BUG_ON(!list_empty(&i915->contexts.freed_objects));
+
 	/* Must free all deferred contexts (via flush_workqueue) first */
 	GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
 	ida_destroy(&i915->contexts.hw_ida);
@@ -625,6 +685,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 
 	for_each_engine(engine, dev_priv, id) {
 		struct i915_request *rq;
+		bool active = false;
 
 		if (engine_has_idle_kernel_context(engine))
 			continue;
@@ -639,11 +700,14 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 
 			prev = last_timeline_request(timeline, engine);
 			if (prev)
-				i915_sw_fence_await_sw_fence_gfp(&rq->submit,
-								 &prev->submit,
-								 I915_FENCE_GFP);
+				active |= i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+									   &prev->submit,
+									   I915_FENCE_GFP) > 0;
 		}
 
+		if (active)
+			intel_rps_boost(rq, NULL);
+
 		/*
 		 * Force a flush after the switch to ensure that all rendering
 		 * and operations prior to switching to the kernel context hits
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 36d6eb781120..13b377c3de6c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -298,6 +298,11 @@ void i915_gem_context_release(struct kref *ctx_ref);
 struct i915_gem_context *
 i915_gem_context_create_gvt(struct drm_device *dev);
 
+struct drm_i915_gem_object *
+i915_gem_context_create_object(struct drm_i915_private *i915,
+			       unsigned long size);
+void i915_gem_contexts_park(struct drm_i915_private *i915);
+
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file);
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 13ccf4273245..88a30b514a2e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2606,7 +2606,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 	 */
 	context_size += LRC_HEADER_PAGES * PAGE_SIZE;
 
-	ctx_obj = i915_gem_object_create(ctx->i915, context_size);
+	ctx_obj = i915_gem_context_create_object(ctx->i915, context_size);
 	if (IS_ERR(ctx_obj)) {
 		ret = PTR_ERR(ctx_obj);
 		goto error_deref_obj;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d1813761aa4e..0559d56c3861 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1213,7 +1213,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create(i915, engine->context_size);
+	obj = i915_gem_context_create_object(i915, engine->context_size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
-- 
2.16.3



More information about the Intel-gfx-trybot mailing list