[PATCH 14/15] drm/i915: Priority boost for locked waits

Chris Wilson chris at chris-wilson.co.uk
Wed Jan 3 13:01:01 UTC 2018


We always try to do an unlocked wait before resorting to having a
blocking wait under the mutex - so we very rarely have to sleep under
the struct_mutex. However, when we do we want that wait to be as short
as possible as the struct_mutex is our BKL that will stall the driver and
all clients.

There should be no impact for all typical workloads.

v2: Move down a layer to apply to all waits.
v3: Make the priority boost explicit. This makes the paths where we want
boosting under the mutex clear and prevents boosting priority uselessly
for when we are waiting for idle.
v4: Reduce the priority bump such that it only rearranges within one
userspace level and does not reorder userspace itself.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  2 +-
 drivers/gpu/drm/i915/i915_gem.c         |  9 ++++++++-
 drivers/gpu/drm/i915/i915_gem_context.c |  6 +++---
 drivers/gpu/drm/i915/i915_gem_request.c | 12 ++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h | 12 ++++++++++--
 drivers/gpu/drm/i915/intel_lrc.c        |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  4 +++-
 7 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index caebd5825279..f9bd50563db8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3371,7 +3371,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  unsigned int flags,
 				  int priority);
-#define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
+#define I915_PRIORITY_DISPLAY (I915_PRIORITY_MAX << I915_PRIORITY_SHIFT)
 
 int __must_check
 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9d59d64df0a3..a04dbafe2d46 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -805,7 +805,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED,
+				   I915_WAIT_LOCKED |
+				   I915_WAIT_PRIORITY,
 				   MAX_SCHEDULE_TIMEOUT,
 				   NULL);
 	if (ret)
@@ -858,6 +859,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
 				   I915_WAIT_LOCKED |
+				   I915_WAIT_PRIORITY |
 				   I915_WAIT_ALL,
 				   MAX_SCHEDULE_TIMEOUT,
 				   NULL);
@@ -3467,6 +3469,8 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 {
 	int ret;
 
+	GEM_BUG_ON(flags & I915_WAIT_PRIORITY);
+
 	/* If the device is asleep, we have no requests outstanding */
 	if (!READ_ONCE(i915->gt.awake))
 		return 0;
@@ -3594,6 +3598,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
 				   I915_WAIT_LOCKED |
+				   I915_WAIT_PRIORITY |
 				   (write ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT,
 				   NULL);
@@ -3710,6 +3715,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		ret = i915_gem_object_wait(obj,
 					   I915_WAIT_INTERRUPTIBLE |
 					   I915_WAIT_LOCKED |
+					   I915_WAIT_PRIORITY |
 					   I915_WAIT_ALL,
 					   MAX_SCHEDULE_TIMEOUT,
 					   NULL);
@@ -3987,6 +3993,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
 				   I915_WAIT_LOCKED |
+				   I915_WAIT_PRIORITY |
 				   (write ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT,
 				   NULL);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 648e7536ff51..f5c6641a9b21 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -428,7 +428,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 		return ctx;
 
 	i915_gem_context_clear_bannable(ctx);
-	ctx->priority = prio;
+	ctx->priority = prio << I915_PRIORITY_SHIFT;
 	ctx->ring_size = PAGE_SIZE;
 
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
@@ -743,7 +743,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
 	case I915_CONTEXT_PARAM_PRIORITY:
-		args->value = ctx->priority;
+		args->value = ctx->priority >> I915_PRIORITY_SHIFT;
 		break;
 	default:
 		ret = -EINVAL;
@@ -816,7 +816,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				 !capable(CAP_SYS_NICE))
 				ret = -EPERM;
 			else
-				ctx->priority = priority;
+				ctx->priority = priority << I915_PRIORITY_SHIFT;
 		}
 		break;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 836db90ef81b..eefb73285b95 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1200,6 +1200,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		   !!lockdep_is_held(&req->i915->drm.struct_mutex) !=
 		   !!(flags & I915_WAIT_LOCKED));
 #endif
+	GEM_BUG_ON((flags & I915_WAIT_PRIORITY) && !(flags & I915_WAIT_LOCKED));
 	GEM_BUG_ON(timeout < 0);
 
 	if (i915_gem_request_completed(req))
@@ -1210,6 +1211,17 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	trace_i915_gem_request_wait_begin(req, flags);
 
+	/*
+	 * Very rarely do we wait whilst holding the mutex, as we try to always
+	 * do an unlocked wait before using a locked wait. However, when we
+	 * have to resort to a locked wait, we want that wait to be as short
+	 * as possible as the struct_mutex is our BKL that will stall the
+	 * driver and all clients.
+	 */
+	if (flags & I915_WAIT_PRIORITY && req->engine->schedule)
+		req->engine->schedule(req,
+				      req->priotree.priority | I915_PRIORITY_LOCKED);
+
 	add_wait_queue(&req->execute, &exec);
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 6c607f8dbf92..e0615a1ea262 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -81,6 +81,11 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
+#define I915_PRIORITY_SHIFT 8
+#define I915_PRIORITY_MASK (-BIT(I915_PRIORITY_SHIFT))
+
+#define I915_PRIORITY_LOCKED BIT(7)
+
 struct i915_gem_capture_list {
 	struct i915_gem_capture_list *next;
 	struct i915_vma *vma;
@@ -297,7 +302,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
 #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
-#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
+#define I915_WAIT_PRIORITY	BIT(2) /* struct_mutex held, boost priority */
+#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 
@@ -698,7 +704,9 @@ i915_gem_active_retire(struct i915_gem_active *active,
 		return 0;
 
 	ret = i915_wait_request(request,
-				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				I915_WAIT_INTERRUPTIBLE |
+				I915_WAIT_LOCKED |
+				I915_WAIT_PRIORITY,
 				MAX_SCHEDULE_TIMEOUT);
 	if (ret < 0)
 		return ret;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 144a7e90e773..5daa57712a75 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1048,7 +1048,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	 */
 	if (request->priotree.priority == I915_PRIORITY_INVALID) {
 		GEM_BUG_ON(!list_empty(&request->priotree.link));
-		request->priotree.priority = prio;
+		request->priotree.priority = prio & I915_PRIORITY_MASK;
 		if (stack.dfs_link.next == stack.dfs_link.prev)
 			return;
 		__list_del_entry(&stack.dfs_link);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e2085820b586..ba150793d94d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1614,7 +1614,9 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 		return -ENOSPC;
 
 	timeout = i915_wait_request(target,
-				    I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				    I915_WAIT_INTERRUPTIBLE |
+				    I915_WAIT_LOCKED |
+				    I915_WAIT_PRIORITY,
 				    MAX_SCHEDULE_TIMEOUT);
 	if (timeout < 0)
 		return timeout;
-- 
2.15.1



More information about the Intel-gfx-trybot mailing list