[Intel-gfx] [PATCH 6/8] drm/i915: Add global barrier support

Tue Aug 14 14:40:56 UTC 2018

From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Global barrier is a facility to allow serialization between different
timelines.

After calling i915_gem_set_global_barrier on a request, all following
submissions on any engine will be set up as depending on this global
barrier. Once the global barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

-------------------------------------------------------------------------
This code was part of the larger SSEU patch but I extracted it to be
separate for ease of review and clarity. I think it originates from Chris
Wilson so permission pending I will change the author and add appropriate
S-o-B.
-------------------------------------------------------------------------

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h     | 27 +++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem.c     |  2 ++
 drivers/gpu/drm/i915/i915_request.c | 16 ++++++++++++++++
 3 files changed, 45 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c12d2676435..643089ba01b9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2098,6 +2098,16 @@ struct drm_i915_private {
 		u32 active_requests;
 		u32 request_serial;
 
+		/**
+		 * Global barrier for the ability to serialize ordering between
+		 * different timelines.
+		 *
+		 * Users can call i915_gem_set_global_barrier which will make
+		 * all subsequent submission be execute only after this barrier
+		 * has been completed.
+		 */
+		struct i915_gem_active global_barrier;
+
 		/**
 		 * Is the GPU currently considered idle, or busy executing
 		 * userspace requests? Whilst idle, we allow runtime power
@@ -3230,6 +3240,23 @@ i915_vm_to_ppgtt(struct i915_address_space *vm)
 	return container_of(vm, struct i915_hw_ppgtt, vm);
 }
 
+/**
+ * i915_gem_set_global_barrier - orders submission on different timelines
+ * @i915: i915 device private
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions, regardless of the engine/timeline. Subsequent requests will not
+ * be submitted to GPU until the global barrier has been completed.
+ */
+static inline void
+i915_gem_set_global_barrier(struct drm_i915_private *i915,
+			    struct i915_request *rq)
+{
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	i915_gem_active_set(&i915->gt.global_barrier, rq);
+}
+
 /* i915_gem_fence_reg.c */
 struct drm_i915_fence_reg *
 i915_reserve_fence(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0453eb42a1a3..be462ef65786 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5752,6 +5752,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 	if (!dev_priv->priorities)
 		goto err_dependencies;
 
+	init_request_active(&dev_priv->gt.global_barrier, NULL);
+
 	INIT_LIST_HEAD(&dev_priv->gt.timelines);
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 09ed48833b54..8b45f74dc748 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -644,6 +644,18 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static int add_global_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier;
+
+	barrier = i915_gem_active_raw(&rq->i915->gt.global_barrier,
+				      &rq->i915->drm.struct_mutex);
+	if (barrier)
+		return i915_request_await_dma_fence(rq, &barrier->fence);
+
+	return 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -806,6 +818,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_global_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	/* Unconditionally invalidate GPU caches and TLBs. */
 	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 	if (ret)
-- 
2.17.1