[Intel-gfx] [PATCH 24/38] drm/i915: Avoid presumption of execution ordering for kernel context switching
Chris Wilson
chris at chris-wilson.co.uk
Fri Jan 18 14:00:55 UTC 2019
For future GuC implementations, the execution order of individual
requests will be opaque. As such we will not have a single execution
timeline and will not know the last request/context to be run on each
engine. The major consequence for this is that we do not know which
context is still volatile on the HW and which have been saved and can be
swapped out. The only point at which we can know is after a synchronous
switch to the kernel context which we perform on idling. So we must keep
each context pinned from its first use until the next time we idle the
GPU. One consequence of this is that context eviction requires switching
to the kernel context and idling!
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 4 +
drivers/gpu/drm/i915/i915_gem.c | 151 +++++++-----------
drivers/gpu/drm/i915/i915_gem_context.c | 93 +++--------
drivers/gpu/drm/i915/i915_gem_context.h | 10 +-
drivers/gpu/drm/i915/i915_gem_evict.c | 20 +--
drivers/gpu/drm/i915/i915_request.c | 41 ++---
drivers/gpu/drm/i915/i915_request.h | 1 +
drivers/gpu/drm/i915/intel_engine_cs.c | 60 +------
drivers/gpu/drm/i915/intel_ringbuffer.h | 14 --
drivers/gpu/drm/i915/selftests/i915_gem.c | 4 +-
.../gpu/drm/i915/selftests/i915_gem_context.c | 60 +++----
.../gpu/drm/i915/selftests/i915_gem_evict.c | 148 -----------------
.../gpu/drm/i915/selftests/intel_hangcheck.c | 2 +-
drivers/gpu/drm/i915/selftests/intel_lrc.c | 8 +-
.../drm/i915/selftests/intel_workarounds.c | 2 +-
drivers/gpu/drm/i915/selftests/mock_context.c | 1 +
drivers/gpu/drm/i915/selftests/mock_engine.c | 11 +-
.../gpu/drm/i915/selftests/mock_gem_device.c | 15 +-
18 files changed, 177 insertions(+), 468 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cd2ea6e13fa6..74bccb153359 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1984,8 +1984,10 @@ struct drm_i915_private {
struct list_head hwsp_free_list;
} timelines;
+ struct list_head active_contexts;
struct list_head active_rings;
struct list_head closed_vma;
+ unsigned long active_engines;
u32 active_requests;
/**
@@ -2095,6 +2097,8 @@ static inline struct drm_i915_private *huc_to_i915(struct intel_huc *huc)
(tmp__) ? \
((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \
0;)
+#define for_each_active_engine(engine__, i915__, tmp__) \
+ for_each_engine_masked((engine__), (i915__), (i915__)->gt.active_engines, (tmp__))
enum hdmi_force_audio {
HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9d6edb0c8a75..96b33f8ba9a9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -168,11 +168,19 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
synchronize_irq(i915->drm.irq);
intel_engines_park(i915);
+ intel_contexts_park(i915);
i915_timelines_park(i915);
i915_pmu_gt_parked(i915);
i915_vma_parked(i915);
+ /*
+ * A crude hack for mock selftesting to not release its fake powerwell.
+ * A better hack would be to give it a real fake powerwell.
+ */
+ if (I915_SELFTEST_ONLY(i915->gt.awake == -1 && i915->gt.epoch == -1))
+ return i915->gt.epoch;
+
wakeref = fetch_and_zero(&i915->gt.awake);
GEM_BUG_ON(!wakeref);
@@ -2986,43 +2994,23 @@ static void __sleep_rcu(struct rcu_head *rcu)
}
}
-static inline bool
-new_requests_since_last_retire(const struct drm_i915_private *i915)
-{
- return (READ_ONCE(i915->gt.active_requests) ||
- work_pending(&i915->gt.idle_work.work));
-}
-
-static void assert_kernel_context_is_current(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- if (i915_terminally_wedged(&i915->gpu_error))
- return;
-
- GEM_BUG_ON(i915->gt.active_requests);
- for_each_engine(engine, i915, id) {
- GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request));
- GEM_BUG_ON(engine->last_retired_context !=
- to_intel_context(i915->kernel_context, engine));
- }
-}
-
static void
i915_gem_idle_work_handler(struct work_struct *work)
{
- struct drm_i915_private *dev_priv =
- container_of(work, typeof(*dev_priv), gt.idle_work.work);
+ struct drm_i915_private *i915 =
+ container_of(work, typeof(*i915), gt.idle_work.work);
unsigned int epoch = I915_EPOCH_INVALID;
bool rearm_hangcheck;
- if (!READ_ONCE(dev_priv->gt.awake))
+ if (!READ_ONCE(i915->gt.awake))
return;
- if (READ_ONCE(dev_priv->gt.active_requests))
+ if (READ_ONCE(i915->gt.active_requests))
return;
+ rearm_hangcheck =
+ cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+
/*
* Flush out the last user context, leaving only the pinned
* kernel context resident. When we are idling on the kernel_context,
@@ -3031,56 +3019,41 @@ i915_gem_idle_work_handler(struct work_struct *work)
* always called at least twice before idling (and if the system is
* idle that implies a round trip through the retire worker).
*/
- mutex_lock(&dev_priv->drm.struct_mutex);
- i915_gem_switch_to_kernel_context(dev_priv);
- mutex_unlock(&dev_priv->drm.struct_mutex);
-
- GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n",
- READ_ONCE(dev_priv->gt.active_requests));
-
- /*
- * Wait for last execlists context complete, but bail out in case a
- * new request is submitted. As we don't trust the hardware, we
- * continue on if the wait times out. This is necessary to allow
- * the machine to suspend even if the hardware dies, and we will
- * try to recover in resume (after depriving the hardware of power,
- * it may be in a better mmod).
- */
- __wait_for(if (new_requests_since_last_retire(dev_priv)) return,
- intel_engines_are_idle(dev_priv),
- I915_IDLE_ENGINES_TIMEOUT * 1000,
- 10, 500);
-
- rearm_hangcheck =
- cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
-
- if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
+ if (!mutex_trylock(&i915->drm.struct_mutex)) {
/* Currently busy, come back later */
- mod_delayed_work(dev_priv->wq,
- &dev_priv->gt.idle_work,
+ mod_delayed_work(i915->wq,
+ &i915->gt.idle_work,
msecs_to_jiffies(50));
goto out_rearm;
}
- /*
- * New request retired after this work handler started, extend active
- * period until next instance of the work.
- */
- if (new_requests_since_last_retire(dev_priv))
- goto out_unlock;
+ if (!i915->gt.active_requests &&
+ !work_pending(&i915->gt.idle_work.work)) {
+ ++i915->gt.active_requests; /* don't requeue idle! */
- epoch = __i915_gem_park(dev_priv);
+ if (i915_gem_switch_to_kernel_context(i915) ||
+ i915_gem_wait_for_idle(i915,
+ I915_WAIT_LOCKED,
+ HZ / 10)) {
+ dev_err(i915->drm.dev,
+ "Failed to idle engines, declaring wedged!\n");
+ GEM_TRACE_DUMP();
+ i915_gem_set_wedged(i915);
+ i915_retire_requests(i915);
+ }
- assert_kernel_context_is_current(dev_priv);
+ if (!--i915->gt.active_requests) {
+ epoch = __i915_gem_park(i915);
+ rearm_hangcheck = false;
+ }
+ }
- rearm_hangcheck = false;
-out_unlock:
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ mutex_unlock(&i915->drm.struct_mutex);
out_rearm:
if (rearm_hangcheck) {
- GEM_BUG_ON(!dev_priv->gt.awake);
- i915_queue_hangcheck(dev_priv);
+ GEM_BUG_ON(!i915->gt.awake);
+ i915_queue_hangcheck(i915);
}
/*
@@ -3091,11 +3064,11 @@ i915_gem_idle_work_handler(struct work_struct *work)
* period, and then queue a task (that will run last on the wq) to
* shrink and re-optimize the caches.
*/
- if (same_epoch(dev_priv, epoch)) {
+ if (same_epoch(i915, epoch)) {
struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL);
if (s) {
init_rcu_head(&s->rcu);
- s->i915 = dev_priv;
+ s->i915 = i915;
s->epoch = epoch;
call_rcu(&s->rcu, __sleep_rcu);
}
@@ -3244,8 +3217,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
- /* If the device is asleep, we have no requests outstanding */
- if (!READ_ONCE(i915->gt.awake))
+ if (!READ_ONCE(i915->gt.active_requests))
return 0;
mutex_lock(&i915->gt.timelines.mutex);
@@ -3298,7 +3270,11 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
return err;
i915_retire_requests(i915);
- GEM_BUG_ON(i915->gt.active_requests);
+ if (flags & I915_WAIT_FOR_IDLE_PARK) {
+ GEM_BUG_ON(i915->gt.active_requests);
+ __i915_gem_park(i915);
+ GEM_BUG_ON(i915->gt.active_engines);
+ }
}
return 0;
@@ -4504,10 +4480,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
intel_runtime_pm_put(i915, wakeref);
-
- mutex_lock(&i915->drm.struct_mutex);
- i915_gem_contexts_lost(i915);
- mutex_unlock(&i915->drm.struct_mutex);
}
int i915_gem_suspend(struct drm_i915_private *i915)
@@ -4541,12 +4513,11 @@ int i915_gem_suspend(struct drm_i915_private *i915)
ret = i915_gem_wait_for_idle(i915,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
- I915_WAIT_FOR_IDLE_BOOST,
+ I915_WAIT_FOR_IDLE_BOOST |
+ I915_WAIT_FOR_IDLE_PARK,
MAX_SCHEDULE_TIMEOUT);
if (ret && ret != -EIO)
goto err_unlock;
-
- assert_kernel_context_is_current(i915);
}
i915_retire_requests(i915); /* ensure we flush after wedging */
@@ -4826,6 +4797,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
for_each_engine(engine, i915, id) {
struct i915_request *rq;
+ if (!engine->context_size)
+ continue;
+
rq = i915_request_alloc(engine, ctx);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
@@ -4845,21 +4819,19 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
if (err)
goto err_active;
- if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) {
- i915_gem_set_wedged(i915);
- err = -EIO; /* Caller will declare us wedged */
- goto err_active;
- }
-
- assert_kernel_context_is_current(i915);
-
/*
* Immediately park the GPU so that we enable powersaving and
* treat it as idle. The next time we issue a request, we will
* unpark and start using the engine->pinned_default_state, otherwise
* it is in limbo and an early reset may fail.
*/
- __i915_gem_park(i915);
+ if (i915_gem_wait_for_idle(i915,
+ I915_WAIT_LOCKED | I915_WAIT_FOR_IDLE_PARK,
+ HZ / 5)) {
+ i915_gem_set_wedged(i915);
+ err = -EIO; /* Caller will declare us wedged */
+ goto err_active;
+ }
for_each_engine(engine, i915, id) {
struct i915_vma *state;
@@ -4934,11 +4906,11 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
goto out_ctx;
if (WARN_ON(i915_gem_wait_for_idle(i915,
- I915_WAIT_LOCKED,
+ I915_WAIT_LOCKED |
+ I915_WAIT_FOR_IDLE_PARK,
MAX_SCHEDULE_TIMEOUT)))
goto out_ctx;
- i915_gem_contexts_lost(i915);
goto out_ctx;
}
@@ -5280,6 +5252,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
if (!dev_priv->priorities)
goto err_dependencies;
+ INIT_LIST_HEAD(&dev_priv->gt.active_contexts);
INIT_LIST_HEAD(&dev_priv->gt.active_rings);
INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 054d3e1bfe00..ecba3ee204a6 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -342,6 +342,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
struct intel_context *ce = &ctx->__engine[n];
ce->gem_context = ctx;
+ INIT_LIST_HEAD(&ce->active_link);
INIT_LIST_HEAD(&ce->signal_link);
INIT_LIST_HEAD(&ce->signals);
}
@@ -576,17 +577,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
return 0;
}
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
- for_each_engine(engine, dev_priv, id)
- intel_engine_lost_context(engine);
-}
-
void i915_gem_contexts_fini(struct drm_i915_private *i915)
{
lockdep_assert_held(&i915->drm.struct_mutex);
@@ -660,80 +650,28 @@ last_request_on_engine(struct i915_timeline *timeline,
return NULL;
}
-static bool engine_has_kernel_context_barrier(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- const struct intel_context * const ce =
- to_intel_context(i915->kernel_context, engine);
- struct i915_timeline *barrier = ce->ring->timeline;
- struct intel_ring *ring;
- bool any_active = false;
-
- lockdep_assert_held(&i915->drm.struct_mutex);
- list_for_each_entry(ring, &i915->gt.active_rings, active_link) {
- struct i915_request *rq;
-
- rq = last_request_on_engine(ring->timeline, engine);
- if (!rq)
- continue;
-
- any_active = true;
-
- if (rq->hw_context == ce)
- continue;
-
- /*
- * Was this request submitted after the previous
- * switch-to-kernel-context?
- */
- if (!i915_timeline_sync_is_later(barrier, &rq->fence)) {
- GEM_TRACE("%s needs barrier for %llx:%lld\n",
- ring->timeline->name,
- rq->fence.context,
- rq->fence.seqno);
- return false;
- }
-
- GEM_TRACE("%s has barrier after %llx:%lld\n",
- ring->timeline->name,
- rq->fence.context,
- rq->fence.seqno);
- }
-
- /*
- * If any other timeline was still active and behind the last barrier,
- * then our last switch-to-kernel-context must still be queued and
- * will run last (leaving the engine in the kernel context when it
- * eventually idles).
- */
- if (any_active)
- return true;
-
- /* The engine is idle; check that it is idling in the kernel context. */
- return engine->last_retired_context == ce;
-}
-
int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
{
struct intel_engine_cs *engine;
- enum intel_engine_id id;
+ unsigned long tmp;
GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake));
lockdep_assert_held(&i915->drm.struct_mutex);
GEM_BUG_ON(!i915->kernel_context);
- i915_retire_requests(i915);
+ /* Inoperable, so presume the GPU is safely pointing into the void! */
+ if (i915_terminally_wedged(&i915->gpu_error))
+ return 0;
- for_each_engine(engine, i915, id) {
+ for_each_active_engine(engine, i915, tmp) {
struct intel_ring *ring;
struct i915_request *rq;
- GEM_BUG_ON(!to_intel_context(i915->kernel_context, engine));
- if (engine_has_kernel_context_barrier(engine))
+ if (!engine->context_size)
continue;
- GEM_TRACE("emit barrier on %s\n", engine->name);
+ GEM_BUG_ON(!to_intel_context(i915->kernel_context, engine));
rq = i915_request_alloc(engine, i915->kernel_context);
if (IS_ERR(rq))
@@ -757,7 +695,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
i915_sw_fence_await_sw_fence_gfp(&rq->submit,
&prev->submit,
I915_FENCE_GFP);
- i915_timeline_sync_set(rq->timeline, &prev->fence);
}
i915_request_add(rq);
@@ -1024,6 +961,20 @@ int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
return err;
}
+void intel_contexts_park(struct drm_i915_private *i915)
+{
+ struct intel_context *ce, *cn;
+
+ lockdep_assert_held(&i915->drm.struct_mutex);
+
+ list_for_each_entry_safe(ce, cn,
+ &i915->gt.active_contexts, active_link) {
+ INIT_LIST_HEAD(&ce->active_link);
+ intel_context_unpin(ce);
+ }
+ INIT_LIST_HEAD(&i915->gt.active_contexts);
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_context.c"
#include "selftests/i915_gem_context.c"
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index e5eca29cd373..1e41a97b8007 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -164,6 +164,7 @@ struct i915_gem_context {
struct intel_context {
struct i915_gem_context *gem_context;
struct intel_engine_cs *active;
+ struct list_head active_link;
struct list_head signal_link;
struct list_head signals;
struct i915_vma *state;
@@ -314,6 +315,12 @@ static inline void __intel_context_pin(struct intel_context *ce)
ce->pin_count++;
}
+static inline void __intel_context_unpin(struct intel_context *ce)
+{
+ GEM_BUG_ON(!ce->pin_count);
+ ce->pin_count--;
+}
+
static inline void intel_context_unpin(struct intel_context *ce)
{
GEM_BUG_ON(!ce->pin_count);
@@ -324,9 +331,10 @@ static inline void intel_context_unpin(struct intel_context *ce)
ce->ops->unpin(ce);
}
+void intel_contexts_park(struct drm_i915_private *i915);
+
/* i915_gem_context.c */
int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
int i915_gem_context_open(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index dc137701acb8..d4a7d460f8e3 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -32,24 +32,9 @@
#include "intel_drv.h"
#include "i915_trace.h"
-I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
- bool fail_if_busy:1;
-} igt_evict_ctl;)
-
static bool ggtt_is_idle(struct drm_i915_private *i915)
{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- if (i915->gt.active_requests)
- return false;
-
- for_each_engine(engine, i915, id) {
- if (!intel_engine_has_kernel_context(engine))
- return false;
- }
-
- return true;
+ return !i915->gt.active_requests;
}
static int ggtt_flush(struct drm_i915_private *i915)
@@ -235,9 +220,6 @@ i915_gem_evict_something(struct i915_address_space *vm,
* the kernel's there is no more we can evict.
*/
if (!ggtt_is_idle(dev_priv)) {
- if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
- return -EBUSY;
-
ret = ggtt_flush(dev_priv);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 5ad14409b52d..6ff0c47c3a0d 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -210,18 +210,6 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
spin_unlock(&rq->lock);
local_irq_enable();
-
- /*
- * The backing object for the context is done after switching to the
- * *next* context. Therefore we cannot retire the previous context until
- * the next context has already started running. However, since we
- * cannot take the required locks at i915_request_submit() we
- * defer the unpinning of the active context to now, retirement of
- * the subsequent request.
- */
- if (engine->last_retired_context)
- intel_context_unpin(engine->last_retired_context);
- engine->last_retired_context = rq->hw_context;
}
static void __retire_engine_upto(struct intel_engine_cs *engine,
@@ -294,7 +282,6 @@ static void i915_request_retire(struct i915_request *request)
/* Retirement decays the ban score as it is a sign of ctx progress */
atomic_dec_if_positive(&request->gem_context->ban_score);
- intel_context_unpin(request->hw_context);
__retire_engine_upto(request->engine, request);
@@ -556,8 +543,15 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
* ourselves.
*/
ce = intel_context_pin(ctx, engine);
- if (IS_ERR(ce))
- return ERR_CAST(ce);
+ if (IS_ERR(ce)) {
+ i915_gem_wait_for_idle(i915,
+ I915_WAIT_LOCKED |
+ I915_WAIT_FOR_IDLE_PARK,
+ MAX_SCHEDULE_TIMEOUT);
+ ce = intel_context_pin(ctx, engine);
+ if (IS_ERR(ce))
+ return ERR_CAST(ce);
+ }
reserve_gt(i915);
@@ -663,9 +657,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
if (ret)
goto err_unwind;
- /* Keep a second pin for the dual retirement along engine and ring */
- __intel_context_pin(ce);
-
rq->infix = rq->ring->emit; /* end of header; start of user payload */
/* Check that we didn't interrupt ourselves with a new request */
@@ -860,6 +851,17 @@ void i915_request_skip(struct i915_request *rq, int error)
memset(vaddr + head, 0, rq->postfix - head);
}
+static void pin_active_context(struct drm_i915_private *i915,
+ struct intel_context *ce)
+{
+ if (unlikely(list_empty(&ce->active_link))) {
+ list_add(&ce->active_link, &i915->gt.active_contexts);
+ __intel_context_pin(ce);
+ }
+ __intel_context_unpin(ce);
+ GEM_BUG_ON(!ce->pin_count);
+}
+
/*
* NB: This function is not allowed to fail. Doing so would mean the the
* request is not being tracked for completion but the work itself is
@@ -935,6 +937,9 @@ void i915_request_add(struct i915_request *request)
GEM_TRACE("marking %s as active\n", ring->timeline->name);
list_add(&ring->active_link, &request->i915->gt.active_rings);
}
+ pin_active_context(request->i915, request->hw_context);
+ request->i915->gt.active_engines |= BIT(engine->id);
+
request->emitted_jiffies = jiffies;
/*
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index b6d473923506..dd413d51cc61 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -278,6 +278,7 @@ long i915_request_wait(struct i915_request *rq,
#define I915_WAIT_PRIORITY BIT(2) /* small priority bump for the request */
#define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */
#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
+#define I915_WAIT_FOR_IDLE_PARK BIT(5)
/**
* Returns true if seq1 is later than seq2.
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 81349258c4d6..b7f129504014 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -613,8 +613,8 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
return err;
}
-static void __intel_context_unpin(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+static void context_unpin(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
{
intel_context_unpin(to_intel_context(ctx, engine));
}
@@ -666,7 +666,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
return 0;
err_unpin_kernel:
- __intel_context_unpin(i915->kernel_context, engine);
+ context_unpin(i915->kernel_context, engine);
return ret;
}
@@ -691,8 +691,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
i915_gem_object_put(engine->default_state);
if (i915->preempt_context)
- __intel_context_unpin(i915->preempt_context, engine);
- __intel_context_unpin(i915->kernel_context, engine);
+ context_unpin(i915->preempt_context, engine);
+ context_unpin(i915->kernel_context, engine);
i915_timeline_fini(&engine->timeline);
@@ -1006,34 +1006,6 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
return true;
}
-/**
- * intel_engine_has_kernel_context:
- * @engine: the engine
- *
- * Returns true if the last context to be executed on this engine, or has been
- * executed if the engine is already idle, is the kernel context
- * (#i915.kernel_context).
- */
-bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
-{
- const struct intel_context *kernel_context =
- to_intel_context(engine->i915->kernel_context, engine);
- struct i915_request *rq;
-
- lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
- /*
- * Check the last context seen by the engine. If active, it will be
- * the last request that remains in the timeline. When idle, it is
- * the last executed context as tracked by retirement.
- */
- rq = __i915_gem_active_peek(&engine->timeline.last_request);
- if (rq)
- return rq->hw_context == kernel_context;
- else
- return engine->last_retired_context == kernel_context;
-}
-
void intel_engines_reset_default_submission(struct drm_i915_private *i915)
{
struct intel_engine_cs *engine;
@@ -1121,6 +1093,8 @@ void intel_engines_park(struct drm_i915_private *i915)
i915_gem_batch_pool_fini(&engine->batch_pool);
engine->execlists.no_priolist = false;
}
+
+ i915->gt.active_engines = 0;
}
/**
@@ -1152,26 +1126,6 @@ void intel_engines_unpark(struct drm_i915_private *i915)
}
}
-/**
- * intel_engine_lost_context: called when the GPU is reset into unknown state
- * @engine: the engine
- *
- * We have either reset the GPU or otherwise about to lose state tracking of
- * the current GPU logical state (e.g. suspend). On next use, it is therefore
- * imperative that we make no presumptions about the current state and load
- * from scratch.
- */
-void intel_engine_lost_context(struct intel_engine_cs *engine)
-{
- struct intel_context *ce;
-
- lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
- ce = fetch_and_zero(&engine->last_retired_context);
- if (ce)
- intel_context_unpin(ce);
-}
-
bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
{
switch (INTEL_GEN(engine->i915)) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 126c5285fd01..76e06ca0490b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -497,17 +497,6 @@ struct intel_engine_cs {
struct intel_engine_execlists execlists;
- /* Contexts are pinned whilst they are active on the GPU. The last
- * context executed remains active whilst the GPU is idle - the
- * switch away and write to the context object only occurs on the
- * next execution. Contexts are only unpinned on retirement of the
- * following request ensuring that we can always write to the object
- * on the context switch even after idling. Across suspend, we switch
- * to the kernel context and trash it as the save may not happen
- * before the hardware is powered down.
- */
- struct intel_context *last_retired_context;
-
/* status_notifier: list of callbacks for context-switch changes */
struct atomic_notifier_head context_status_notifier;
@@ -969,9 +958,6 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
bool intel_engine_is_idle(struct intel_engine_cs *engine);
bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
-bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
-void intel_engine_lost_context(struct intel_engine_cs *engine);
-
void intel_engines_park(struct drm_i915_private *i915);
void intel_engines_unpark(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index e77b7ed449ae..378ad0adf2c6 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -164,7 +164,7 @@ static int igt_gem_suspend(void *arg)
mutex_lock(&i915->drm.struct_mutex);
err = switch_to_context(i915, ctx);
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(i915, I915_WAIT_LOCKED | I915_WAIT_FOR_IDLE_PARK))
err = -EIO;
mutex_unlock(&i915->drm.struct_mutex);
out:
@@ -205,7 +205,7 @@ static int igt_gem_hibernate(void *arg)
mutex_lock(&i915->drm.struct_mutex);
err = switch_to_context(i915, ctx);
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(i915, I915_WAIT_LOCKED | I915_WAIT_FOR_IDLE_PARK))
err = -EIO;
mutex_unlock(&i915->drm.struct_mutex);
out:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 2cca234fd291..6a241745e78a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1097,7 +1097,7 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
{
struct intel_engine_cs *engine;
unsigned int tmp;
- int err;
+ int pass;
GEM_TRACE("Testing %s\n", __engine_name(i915, engines));
for_each_engine_masked(engine, i915, engines, tmp) {
@@ -1110,50 +1110,34 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
i915_request_add(rq);
}
- err = i915_gem_switch_to_kernel_context(i915);
- if (err)
- return err;
+ for (pass = 0; pass < 2; pass++) { /* Once busy; once idle */
+ int err;
- for_each_engine_masked(engine, i915, engines, tmp) {
- if (!engine_has_kernel_context_barrier(engine)) {
- pr_err("kernel context not last on engine %s!\n",
- engine->name);
- return -EINVAL;
- }
- }
+ err = i915_gem_switch_to_kernel_context(i915);
+ if (err)
+ return err;
- err = i915_gem_wait_for_idle(i915,
- I915_WAIT_LOCKED,
- MAX_SCHEDULE_TIMEOUT);
- if (err)
- return err;
+ if (!pass) {
+ err = i915_gem_wait_for_idle(i915,
+ I915_WAIT_LOCKED |
+ I915_WAIT_FOR_IDLE_PARK,
+ MAX_SCHEDULE_TIMEOUT);
+ if (err)
+ return err;
+ }
- GEM_BUG_ON(i915->gt.active_requests);
- for_each_engine_masked(engine, i915, engines, tmp) {
- if (engine->last_retired_context->gem_context != i915->kernel_context) {
- pr_err("engine %s not idling in kernel context!\n",
- engine->name);
+ if (i915->gt.active_requests) {
+ pr_err("%d active requests remain after switching to kernel context while %s\n",
+ i915->gt.active_requests,
+ pass ? "idle" : "busy");
return -EINVAL;
}
- }
- err = i915_gem_switch_to_kernel_context(i915);
- if (err)
- return err;
-
- if (i915->gt.active_requests) {
- pr_err("switch-to-kernel-context emitted %d requests even though it should already be idling in the kernel context\n",
- i915->gt.active_requests);
- return -EINVAL;
+ /* XXX Bonus points for proving we are the kernel context! */
}
- for_each_engine_masked(engine, i915, engines, tmp) {
- if (!intel_engine_has_kernel_context(engine)) {
- pr_err("kernel context not last on engine %s!\n",
- engine->name);
- return -EINVAL;
- }
- }
+ if (igt_flush_test(i915, I915_WAIT_LOCKED | I915_WAIT_FOR_IDLE_PARK))
+ return -EIO;
return 0;
}
@@ -1197,8 +1181,6 @@ static int igt_switch_to_kernel_context(void *arg)
out_unlock:
GEM_TRACE_DUMP_ON(err);
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index c8deb961a020..1e9c9f53a830 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -371,153 +371,6 @@ static int igt_evict_vm(void *arg)
return err;
}
-static int igt_evict_contexts(void *arg)
-{
- const u64 PRETEND_GGTT_SIZE = 16ull << 20;
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- struct reserved {
- struct drm_mm_node node;
- struct reserved *next;
- } *reserved = NULL;
- intel_wakeref_t wakeref;
- struct drm_mm_node hole;
- unsigned long count;
- int err;
-
- /*
- * The purpose of this test is to verify that we will trigger an
- * eviction in the GGTT when constructing a request that requires
- * additional space in the GGTT for pinning the context. This space
- * is not directly tied to the request so reclaiming it requires
- * extra work.
- *
- * As such this test is only meaningful for full-ppgtt environments
- * where the GTT space of the request is separate from the GGTT
- * allocation required to build the request.
- */
- if (!HAS_FULL_PPGTT(i915))
- return 0;
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(i915);
-
- /* Reserve a block so that we know we have enough to fit a few rq */
- memset(&hole, 0, sizeof(hole));
- err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole,
- PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE,
- 0, i915->ggtt.vm.total,
- PIN_NOEVICT);
- if (err)
- goto out_locked;
-
- /* Make the GGTT appear small by filling it with unevictable nodes */
- count = 0;
- do {
- struct reserved *r;
-
- r = kcalloc(1, sizeof(*r), GFP_KERNEL);
- if (!r) {
- err = -ENOMEM;
- goto out_locked;
- }
-
- if (i915_gem_gtt_insert(&i915->ggtt.vm, &r->node,
- 1ul << 20, 0, I915_COLOR_UNEVICTABLE,
- 0, i915->ggtt.vm.total,
- PIN_NOEVICT)) {
- kfree(r);
- break;
- }
-
- r->next = reserved;
- reserved = r;
-
- count++;
- } while (1);
- drm_mm_remove_node(&hole);
- mutex_unlock(&i915->drm.struct_mutex);
- pr_info("Filled GGTT with %lu 1MiB nodes\n", count);
-
- /* Overfill the GGTT with context objects and so try to evict one. */
- for_each_engine(engine, i915, id) {
- struct i915_sw_fence fence;
- struct drm_file *file;
-
- file = mock_file(i915);
- if (IS_ERR(file)) {
- err = PTR_ERR(file);
- break;
- }
-
- count = 0;
- mutex_lock(&i915->drm.struct_mutex);
- onstack_fence_init(&fence);
- do {
- struct i915_request *rq;
- struct i915_gem_context *ctx;
-
- ctx = live_context(i915, file);
- if (!ctx)
- break;
-
- /* We will need some GGTT space for the rq's context */
- igt_evict_ctl.fail_if_busy = true;
- rq = i915_request_alloc(engine, ctx);
- igt_evict_ctl.fail_if_busy = false;
-
- if (IS_ERR(rq)) {
- /* When full, fail_if_busy will trigger EBUSY */
- if (PTR_ERR(rq) != -EBUSY) {
- pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n",
- ctx->hw_id, engine->name,
- (int)PTR_ERR(rq));
- err = PTR_ERR(rq);
- }
- break;
- }
-
- /* Keep every request/ctx pinned until we are full */
- err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
- &fence,
- GFP_KERNEL);
- if (err < 0)
- break;
-
- i915_request_add(rq);
- count++;
- err = 0;
- } while(1);
- mutex_unlock(&i915->drm.struct_mutex);
-
- onstack_fence_fini(&fence);
- pr_info("Submitted %lu contexts/requests on %s\n",
- count, engine->name);
-
- mock_file_free(i915, file);
- if (err)
- break;
- }
-
- mutex_lock(&i915->drm.struct_mutex);
-out_locked:
- while (reserved) {
- struct reserved *next = reserved->next;
-
- drm_mm_remove_node(&reserved->node);
- kfree(reserved);
-
- reserved = next;
- }
- if (drm_mm_node_allocated(&hole))
- drm_mm_remove_node(&hole);
- intel_runtime_pm_put(i915, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
-
- return err;
-}
-
int i915_gem_evict_mock_selftests(void)
{
static const struct i915_subtest tests[] = {
@@ -548,7 +401,6 @@ int i915_gem_evict_mock_selftests(void)
int i915_gem_evict_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
- SUBTEST(igt_evict_contexts),
};
if (i915_terminally_wedged(&i915->gpu_error))
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 128d37bba1ac..04d66b4303ab 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -1669,7 +1669,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
err = i915_subtests(tests, i915);
mutex_lock(&i915->drm.struct_mutex);
- igt_flush_test(i915, I915_WAIT_LOCKED);
+ igt_flush_test(i915, I915_WAIT_LOCKED | I915_WAIT_FOR_IDLE_PARK);
mutex_unlock(&i915->drm.struct_mutex);
i915_modparams.enable_hangcheck = saved_hangcheck;
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 2b2ecd76c2ac..607c969ea605 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -67,7 +67,7 @@ static int live_sanitycheck(void *arg)
err_spin:
igt_spinner_fini(&spin);
err_unlock:
- igt_flush_test(i915, I915_WAIT_LOCKED);
+ igt_flush_test(i915, I915_WAIT_LOCKED | I915_WAIT_FOR_IDLE_PARK);
intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
return err;
@@ -161,7 +161,7 @@ static int live_preempt(void *arg)
err_spin_hi:
igt_spinner_fini(&spin_hi);
err_unlock:
- igt_flush_test(i915, I915_WAIT_LOCKED);
+ igt_flush_test(i915, I915_WAIT_LOCKED | I915_WAIT_FOR_IDLE_PARK);
intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
return err;
@@ -255,7 +255,7 @@ static int live_late_preempt(void *arg)
err_spin_hi:
igt_spinner_fini(&spin_hi);
err_unlock:
- igt_flush_test(i915, I915_WAIT_LOCKED);
+ igt_flush_test(i915, I915_WAIT_LOCKED | I915_WAIT_FOR_IDLE_PARK);
intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
return err;
@@ -379,7 +379,7 @@ static int live_preempt_hang(void *arg)
err_spin_hi:
igt_spinner_fini(&spin_hi);
err_unlock:
- igt_flush_test(i915, I915_WAIT_LOCKED);
+ igt_flush_test(i915, I915_WAIT_LOCKED | I915_WAIT_FOR_IDLE_PARK);
intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
return err;
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index b15c4f26c593..e8664bf5c873 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -505,7 +505,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
igt_global_reset_unlock(i915);
kernel_context_close(ctx);
- igt_flush_test(i915, I915_WAIT_LOCKED);
+ igt_flush_test(i915, I915_WAIT_LOCKED | I915_WAIT_FOR_IDLE_PARK);
return ret;
}
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index e4db9a31b510..2009e776b5d8 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -49,6 +49,7 @@ mock_context(struct drm_i915_private *i915,
struct intel_context *ce = &ctx->__engine[n];
ce->gem_context = ctx;
+ INIT_LIST_HEAD(&ce->active_link);
INIT_LIST_HEAD(&ce->signal_link);
INIT_LIST_HEAD(&ce->signals);
}
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index c1cd8b27b32a..0c5649044bc9 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -228,7 +228,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
goto err_free;
i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
+ engine->base.execlists.queue_priority = INT_MIN;
+ engine->base.execlists.queue = RB_ROOT_CACHED;
+
intel_engine_init_breadcrumbs(&engine->base);
+ intel_engine_init_batch_pool(&engine->base);
/* fake hw queue */
spin_lock_init(&engine->hw_lock);
@@ -271,15 +275,10 @@ void mock_engine_free(struct intel_engine_cs *engine)
{
struct mock_engine *mock =
container_of(engine, typeof(*mock), base);
- struct intel_context *ce;
GEM_BUG_ON(timer_pending(&mock->hw_delay));
- ce = fetch_and_zero(&engine->last_retired_context);
- if (ce)
- intel_context_unpin(ce);
-
- __intel_context_unpin(engine->i915->kernel_context, engine);
+ context_unpin(engine->i915->kernel_context, engine);
intel_engine_fini_breadcrumbs(engine);
i915_timeline_fini(&engine->timeline);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 41ae502361d7..42fa8b4cbd38 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -47,6 +47,15 @@ void mock_device_flush(struct drm_i915_private *i915)
GEM_BUG_ON(i915->gt.active_requests);
}
+static void mock_device_park(struct drm_i915_private *i915)
+{
+ intel_engines_park(i915);
+ intel_contexts_park(i915);
+ i915_timelines_park(i915);
+
+ i915_vma_parked(i915);
+}
+
static void mock_device_release(struct drm_device *dev)
{
struct drm_i915_private *i915 = to_i915(dev);
@@ -55,7 +64,7 @@ static void mock_device_release(struct drm_device *dev)
mutex_lock(&i915->drm.struct_mutex);
mock_device_flush(i915);
- i915_gem_contexts_lost(i915);
+ mock_device_park(i915);
mutex_unlock(&i915->drm.struct_mutex);
drain_delayed_work(&i915->gt.retire_work);
@@ -200,7 +209,8 @@ struct drm_i915_private *mock_gem_device(void)
INIT_DELAYED_WORK(&i915->gt.retire_work, mock_retire_work_handler);
INIT_DELAYED_WORK(&i915->gt.idle_work, mock_idle_work_handler);
- i915->gt.awake = true;
+ i915->gt.awake = -1;
+ i915->gt.epoch = -1;
i915->objects = KMEM_CACHE(mock_object, SLAB_HWCACHE_ALIGN);
if (!i915->objects)
@@ -229,6 +239,7 @@ struct drm_i915_private *mock_gem_device(void)
i915_timelines_init(i915);
+ INIT_LIST_HEAD(&i915->gt.active_contexts);
INIT_LIST_HEAD(&i915->gt.active_rings);
INIT_LIST_HEAD(&i915->gt.closed_vma);
--
2.20.1
More information about the Intel-gfx
mailing list