[PATCH] drm/i915: Throttle execbuffer hogs

Tue Feb 5 21:15:14 UTC 2019

Apply backpressure to hogs that emit requests faster than the GPU can
process them by waiting for their ring to be less than half-full before
proceeding with taking the struct_mutex.

This is a gross hack to apply throttling backpressure, the long term
goal is to remove the struct_mutex contention so that each client
naturally waits for their own resources and never blocks another within
the driver at least. (Realtime priority goals would extend to ensuring
that resource contention favours high priority clients as well.)

This only limits excessive request production and does not attempt to
throttle clients that block waiting for eviction (either global GTT or
system memory), see above for the long term goal.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 60 ++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_request.c        |  4 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 +
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  2 +
 4 files changed, 68 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8eedf7cac493..a403d5fd3e80 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -753,6 +753,61 @@ static int eb_select_context(struct i915_execbuffer *eb)
 	return 0;
 }
 
+static bool __eb_check_space(const struct intel_context *ce)
+{
+	return 2 * READ_ONCE(ce->ring->space) > ce->ring->size;
+}
+
+static bool __eb_flush_ring(const struct i915_execbuffer *eb,
+			    const struct intel_context *ce)
+{
+	if (__eb_check_space(ce))
+		return true;
+
+	if (mutex_trylock(&eb->i915->drm.struct_mutex)) {
+		struct i915_request *rq;
+
+		list_for_each_entry(rq, &ce->ring->request_list, ring_link) {
+			if (!i915_request_completed(rq)) {
+				i915_request_retire_upto(rq);
+				break;
+			}
+		}
+
+		mutex_unlock(&eb->i915->drm.struct_mutex);
+	}
+
+	return __eb_check_space(ce);
+}
+
+static int eb_wait_for_space(const struct i915_execbuffer *eb)
+{
+	const struct intel_context *ce;
+	int ret;
+
+	ce = to_intel_context(eb->ctx, eb->engine);
+	if (!ce->ring) /* first use, assume empty! */
+		return 0;
+
+	if (__eb_check_space(ce))
+		return 0;
+
+	do {
+		/*
+		 * Nobody is forced to retire our requests on our behalf.
+		 * There is a background flusher that tries to grab the mutex
+		 * and backoffs under contention, but otherwise there is no
+		 * guarantee that somebody wakes us up in a timely fashion!
+		 * So set a timer and try flushing ourselves.
+		 */
+		ret = wait_event_interruptible_timeout(ce->ring->waitqueue,
+						       __eb_flush_ring(eb, ce),
+						       HZ / 10);
+	} while (ret == 0);
+
+	return ret < 0 ? ret : 0;
+}
+
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
 	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
@@ -2278,6 +2333,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
+	err = eb_wait_for_space(&eb);
+	if (unlikely(err))
+		goto err_context;
+
 	/*
 	 * Take a local wakeref for preparing to dispatch the execbuf as
 	 * we expect to access the hardware fairly frequently in the
@@ -2438,6 +2497,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
 	intel_runtime_pm_put(eb.i915, wakeref);
+err_context:
 	i915_gem_context_put(eb.ctx);
 err_destroy:
 	eb_destroy(&eb);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c2a5c48c7541..efb42f79056b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -129,6 +129,7 @@ static void unreserve_gt(struct drm_i915_private *i915)
 static void advance_ring(struct i915_request *request)
 {
 	struct intel_ring *ring = request->ring;
+	unsigned int old_space = ring->space;
 	unsigned int tail;
 
 	/*
@@ -159,6 +160,9 @@ static void advance_ring(struct i915_request *request)
 	list_del_init(&request->ring_link);
 
 	ring->head = tail;
+
+	if ((intel_ring_update_space(ring) ^ old_space) & PAGE_SIZE)
+		wake_up_all(&ring->waitqueue);
 }
 
 static void free_capture_list(struct i915_request *request)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b889b27f8aeb..a94edd50500c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1337,6 +1337,8 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&ring->request_list);
+	init_waitqueue_head(&ring->waitqueue);
+
 	ring->timeline = i915_timeline_get(timeline);
 
 	ring->size = size;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 1398eb81dee6..406dc1ffc855 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -139,6 +139,8 @@ struct intel_ring {
 	u32 space;
 	u32 size;
 	u32 effective_size;
+
+	wait_queue_head_t waitqueue;
 };
 
 struct i915_gem_context;
-- 
2.20.1