[Intel-gfx] [PATCH v5 35/35] drm/i915: Allow scheduler to manage inter-ring object synchronisation

Thu Feb 18 14:27:23 UTC 2016

From: John Harrison <John.C.Harrison at Intel.com>

The scheduler has always tracked batch buffer dependencies based on
DRM object usage. This means that it will not submit a batch on one
ring that has outstanding dependencies still executing on other rings.
This is exactly the same synchronisation performed by
i915_gem_object_sync() using hardware semaphores where available and
CPU stalls where not (e.g. in execlist mode and/or on Gen8 hardware).

Unfortunately, when a batch buffer is submitted to the driver the
_object_sync() call happens first. Thus in case where hardware
semaphores are disabled, the driver has already stalled until the
dependency has been resolved.

This patch adds an optimisation to _object_sync() to ignore the
synchronisation in the case where it will subsequently be handled by
the scheduler. This removes the driver stall and (in the single
application case) provides near hardware semaphore performance even
when hardware semaphores are disabled. In a busy system where there is
other work that can be executed on the stalling ring, it provides
better than hardware semaphore performance as it removes the stall
from both the driver and from the hardware. There is also a theory
that this method should improve power usage as hardware semaphores are
apparently not very power efficient - the stalled ring does not go
into as low a power a state as when it is genuinely idle.

The optimisation is to check whether both ends of the synchronisation
are batch buffer requests. If they are, then the scheduler will have
the inter-dependency tracked and managed. If one or other end is not a
batch buffer request (e.g. a page flip) then the code falls back to
the CPU stall or hardware semaphore as appropriate.

To check whether the existing usage is a batch buffer, the code simply
calls the 'are you tracking this request' function of the scheduler on
the object's last_read_req member. To check whether the new usage is a
batch buffer, a flag is passed in from the caller.

Issue: VIZ-5566
Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  2 +-
 drivers/gpu/drm/i915/i915_gem.c            | 17 ++++++++++++++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 +-
 drivers/gpu/drm/i915/intel_display.c       |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c           |  2 +-
 5 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5d02f44..207ac16 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3011,7 +3011,7 @@ int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 #endif
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
 			 struct intel_engine_cs *to,
-			 struct drm_i915_gem_request **to_req);
+			 struct drm_i915_gem_request **to_req, bool to_batch);
 void i915_vma_move_to_active(struct i915_vma *vma,
 			     struct drm_i915_gem_request *req);
 int i915_gem_dumb_create(struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a2c136d..b14e384 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3577,7 +3577,7 @@ static int
 __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		       struct intel_engine_cs *to,
 		       struct drm_i915_gem_request *from_req,
-		       struct drm_i915_gem_request **to_req)
+		       struct drm_i915_gem_request **to_req, bool to_batch)
 {
 	struct intel_engine_cs *from;
 	int ret;
@@ -3589,6 +3589,15 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 	if (i915_gem_request_completed(from_req))
 		return 0;
 
+	/*
+	 * The scheduler will manage inter-ring object dependencies
+	 * as long as both to and from requests are scheduler managed
+	 * (i.e. batch buffers).
+	 */
+	if (to_batch &&
+	    i915_scheduler_is_request_tracked(from_req, NULL, NULL))
+		return 0;
+
 	if (!i915_semaphore_is_enabled(obj->base.dev)) {
 		struct drm_i915_private *i915 = to_i915(obj->base.dev);
 		ret = __i915_wait_request(from_req,
@@ -3639,6 +3648,8 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
  * @to_req: request we wish to use the object for. See below.
  *          This will be allocated and returned if a request is
  *          required but not passed in.
+ * @to_batch: is the sync request on behalf of batch buffer submission?
+ * If so then the scheduler can (potentially) manage the synchronisation.
  *
  * This code is meant to abstract object synchronization with the GPU.
  * Calling with NULL implies synchronizing the object with the CPU
@@ -3669,7 +3680,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		     struct intel_engine_cs *to,
-		     struct drm_i915_gem_request **to_req)
+		     struct drm_i915_gem_request **to_req, bool to_batch)
 {
 	const bool readonly = obj->base.pending_write_domain == 0;
 	struct drm_i915_gem_request *req[I915_NUM_RINGS];
@@ -3691,7 +3702,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 				req[n++] = obj->last_read_req[i];
 	}
 	for (i = 0; i < n; i++) {
-		ret = __i915_gem_object_sync(obj, to, req[i], to_req);
+		ret = __i915_gem_object_sync(obj, to, req[i], to_req, to_batch);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0b8c61e..2923dcd 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -950,7 +950,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		if (obj->active & other_rings) {
-			ret = i915_gem_object_sync(obj, req->ring, &req);
+			ret = i915_gem_object_sync(obj, req->ring, &req, true);
 			if (ret)
 				return ret;
 		}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 5953590..e768026 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11693,7 +11693,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	 * into the display plane and skip any waits.
 	 */
 	if (!mmio_flip) {
-		ret = i915_gem_object_sync(obj, ring, &request);
+		ret = i915_gem_object_sync(obj, ring, &request, false);
 		if (ret)
 			goto cleanup_pending;
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5fbeb0e..0a97261 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -686,7 +686,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		if (obj->active & other_rings) {
-			ret = i915_gem_object_sync(obj, req->ring, &req);
+			ret = i915_gem_object_sync(obj, req->ring, &req, true);
 			if (ret)
 				return ret;
 		}
-- 
1.9.1