[PATCH 16/17] drm/i915: Priority boost for locked waits
Chris Wilson
chris at chris-wilson.co.uk
Wed Jan 3 09:37:05 UTC 2018
We always try to do an unlocked wait before resorting to having a
blocking wait under the mutex - so we very rarely have to sleep under
the struct_mutex. However, when we do we want that wait to be as short
as possible as the struct_mutex is our BKL that will stall the driver and
all clients.
There should be no impact for all typical workloads.
v2: Move down a layer to apply to all waits.
v3: Make the priority boost explicit. This makes the paths where we want
boosting under the mutex clear and prevents boosting priority uselessly
for when we are waiting for idle.
v4: Reduce the priority bump such that it only rearranges within one
userspace level and does not reorder userspace itself.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 2 +-
drivers/gpu/drm/i915/i915_gem.c | 9 ++++++++-
drivers/gpu/drm/i915/i915_gem_request.c | 13 ++++++++++++-
drivers/gpu/drm/i915/i915_gem_request.h | 9 +++++++--
drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +++-
5 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index caebd5825279..9544202d411b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3371,7 +3371,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj,
int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
unsigned int flags,
int priority);
-#define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
+#define I915_PRIORITY_DISPLAY (I915_PRIORITY_MAX << 8)
int __must_check
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9d59d64df0a3..a04dbafe2d46 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -805,7 +805,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED,
+ I915_WAIT_LOCKED |
+ I915_WAIT_PRIORITY,
MAX_SCHEDULE_TIMEOUT,
NULL);
if (ret)
@@ -858,6 +859,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
+ I915_WAIT_PRIORITY |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT,
NULL);
@@ -3467,6 +3469,8 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
{
int ret;
+ GEM_BUG_ON(flags & I915_WAIT_PRIORITY);
+
/* If the device is asleep, we have no requests outstanding */
if (!READ_ONCE(i915->gt.awake))
return 0;
@@ -3594,6 +3598,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
+ I915_WAIT_PRIORITY |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT,
NULL);
@@ -3710,6 +3715,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
+ I915_WAIT_PRIORITY |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT,
NULL);
@@ -3987,6 +3993,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
+ I915_WAIT_PRIORITY |
(write ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT,
NULL);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 9cd19dd890aa..2690d35199fb 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1054,7 +1054,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
* run at the earliest possible convenience.
*/
if (engine->schedule)
- engine->schedule(request, request->ctx->priority);
+ engine->schedule(request, request->ctx->priority << 8);
local_bh_disable();
i915_sw_fence_commit(&request->submit);
@@ -1200,6 +1200,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
!!lockdep_is_held(&req->i915->drm.struct_mutex) !=
!!(flags & I915_WAIT_LOCKED));
#endif
+ GEM_BUG_ON((flags & I915_WAIT_PRIORITY) && !(flags & I915_WAIT_LOCKED));
GEM_BUG_ON(timeout < 0);
if (i915_gem_request_completed(req))
@@ -1210,6 +1211,16 @@ long i915_wait_request(struct drm_i915_gem_request *req,
trace_i915_gem_request_wait_begin(req, flags);
+ /* Very rarely do we wait whilst holding the mutex. We try to always
+ * do an unlocked wait before using a locked wait. However, when we
+ * have to resort to a locked wait, we want that wait to be as short
+ * as possible as the struct_mutex is our BKL that will stall the
+ * driver and all clients.
+ */
+ if (flags & I915_WAIT_PRIORITY && req->engine->schedule)
+ req->engine->schedule(req,
+ req->priotree.priority | I915_PRIORITY_LOCKED);
+
add_wait_queue(&req->execute, &exec);
if (flags & I915_WAIT_LOCKED)
add_wait_queue(errq, &reset);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 5753ab3aaff5..be42b9462e65 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -81,6 +81,8 @@ enum {
I915_PRIORITY_INVALID = INT_MIN
};
+#define I915_PRIORITY_LOCKED BIT(7)
+
struct i915_gem_capture_list {
struct i915_gem_capture_list *next;
struct i915_vma *vma;
@@ -303,7 +305,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
__attribute__((nonnull(1)));
#define I915_WAIT_INTERRUPTIBLE BIT(0)
#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */
-#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */
+#define I915_WAIT_PRIORITY BIT(2) /* struct_mutex held, boost priority */
+#define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */
static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
@@ -696,7 +699,9 @@ i915_gem_active_retire(struct i915_gem_active *active,
return 0;
ret = i915_wait_request(request,
- I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_PRIORITY,
MAX_SCHEDULE_TIMEOUT);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e2085820b586..ba150793d94d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1614,7 +1614,9 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
return -ENOSPC;
timeout = i915_wait_request(target,
- I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_PRIORITY,
MAX_SCHEDULE_TIMEOUT);
if (timeout < 0)
return timeout;
--
2.15.1
More information about the Intel-gfx-trybot
mailing list