[Intel-gfx] [PATCH 29/33] drm/i915: Implement an "idle" barrier
Chris Wilson
chris at chris-wilson.co.uk
Fri Jan 25 02:30:01 UTC 2019
We have a number of tasks that we like to run when idle and parking the
GPU into a powersaving mode. A few of those tasks are using the global
idle point as a convenient moment when all previous execution has been
required (and so we know that the GPU is not still touching random
user memory). However, on a busy system we are unlikely to see global
idle points, and would prefer a much more incremental system of being
able to retire after all current execution has completed.
Enter the idle barrier and idle tasks.
To determine a point in the future when all current tasks are complete,
we schedule a new low priority request that will be executed after all
current work is complete, and by imposing a barrier before all future
work. We therefore know we retire that barrier, the GPU is no longer
touching any memory released before the barrier was submitting allowing
us to run a set of idle tasks clear of any dangling GPU references.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 5 ++
drivers/gpu/drm/i915/i915_gem.c | 90 ++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_request.c | 9 +++
drivers/gpu/drm/i915/i915_timeline.c | 3 +
4 files changed, 107 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d072f3369ee1..5ca77e2e53fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2021,6 +2021,9 @@ struct drm_i915_private {
*/
struct delayed_work idle_work;
+ struct i915_gem_active idle_barrier;
+ struct list_head idle_tasks;
+
ktime_t last_init_time;
struct i915_vma *scratch;
@@ -3040,6 +3043,8 @@ void i915_gem_fini(struct drm_i915_private *dev_priv);
void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
unsigned int flags, long timeout);
+void i915_gem_add_idle_task(struct drm_i915_private *i915,
+ struct i915_gem_active *idle);
int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
void i915_gem_resume(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 101a0f644787..0a8bcf6e7098 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -141,6 +141,15 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
return 0;
}
+static void call_idle_tasks(struct list_head *tasks)
+{
+ struct i915_gem_active *tsk, *tn;
+
+ list_for_each_entry_safe(tsk, tn, tasks, link)
+ tsk->retire(tsk, NULL);
+ INIT_LIST_HEAD(tasks);
+}
+
static u32 __i915_gem_park(struct drm_i915_private *i915)
{
intel_wakeref_t wakeref;
@@ -169,6 +178,8 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
*/
synchronize_irq(i915->drm.irq);
+ call_idle_tasks(&i915->gt.idle_tasks);
+
intel_engines_park(i915);
i915_timelines_park(i915);
@@ -2906,6 +2917,81 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
return active;
}
+static void idle_barrier(struct drm_i915_private *i915)
+{
+ struct i915_gt_timelines *gt = &i915->gt.timelines;
+ struct i915_timeline *tl;
+ struct i915_request *rq;
+ int err = 0;
+
+ if (list_empty(&i915->gt.idle_tasks))
+ return;
+
+ if (!i915->gt.active_requests) {
+ call_idle_tasks(&i915->gt.idle_tasks);
+ return;
+ }
+
+ /* Keep just one idle barrier in flight, amalgamating tasks instead */
+ if (i915_gem_active_isset(&i915->gt.idle_barrier))
+ return;
+
+ GEM_TRACE("adding idle barrier\n");
+
+ rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context);
+ if (IS_ERR(rq))
+ return;
+
+ /* run after all current requests have executed, but before any new */
+ mutex_lock(>->mutex);
+ list_for_each_entry(tl, >->active_list, link) {
+ struct i915_request *last;
+
+ if (tl == rq->timeline)
+ continue;
+
+ err = i915_timeline_set_barrier(tl, rq);
+ if (err == -EEXIST)
+ continue;
+ if (err)
+ break;
+
+ last = i915_gem_active_raw(&tl->last_request,
+ &i915->drm.struct_mutex);
+ if (!last)
+ continue;
+
+ mutex_unlock(>->mutex); /* allocation ahead! */
+ err = i915_request_await_dma_fence(rq, &last->fence);
+ mutex_lock(>->mutex);
+ if (err)
+ break;
+
+ /* restart after reacquiring the lock */
+ tl = list_entry(>->active_list, typeof(*tl), link);
+ }
+ mutex_unlock(>->mutex);
+
+ if (err == 0) {
+ list_splice_init(&i915->gt.idle_tasks, &rq->active_list);
+ i915_gem_active_set(&i915->gt.idle_barrier, rq);
+ }
+
+ i915_request_add(rq);
+}
+
+void i915_gem_add_idle_task(struct drm_i915_private *i915,
+ struct i915_gem_active *task)
+{
+ lockdep_assert_held(&i915->drm.struct_mutex);
+ GEM_TRACE("adding idle task hint:%pS\n", task->retire);
+
+ if (i915->gt.active_requests)
+ list_add(&task->link, &i915->gt.idle_tasks);
+ else
+ task->retire(task, NULL);
+}
+
static void
i915_gem_retire_work_handler(struct work_struct *work)
{
@@ -2916,6 +3002,7 @@ i915_gem_retire_work_handler(struct work_struct *work)
/* Come back later if the device is busy... */
if (mutex_trylock(&dev->struct_mutex)) {
i915_retire_requests(dev_priv);
+ idle_barrier(dev_priv);
mutex_unlock(&dev->struct_mutex);
}
@@ -5182,6 +5269,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
/* Flush any outstanding unpin_work. */
i915_gem_drain_workqueue(dev_priv);
+ GEM_BUG_ON(!list_empty(&dev_priv->gt.idle_tasks));
mutex_lock(&dev_priv->drm.struct_mutex);
intel_uc_fini_hw(dev_priv);
@@ -5302,6 +5390,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
INIT_LIST_HEAD(&dev_priv->gt.active_rings);
INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
+ INIT_LIST_HEAD(&dev_priv->gt.idle_tasks);
+ init_request_active(&dev_priv->gt.idle_barrier, NULL);
i915_gem_init__mm(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c09a6644a2ab..b397155fe8a7 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -532,6 +532,11 @@ static int add_barrier(struct i915_request *rq, struct i915_gem_active *active)
return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
}
+static int add_idle_barrier(struct i915_request *rq)
+{
+ return add_barrier(rq, &rq->i915->gt.idle_barrier);
+}
+
static int add_timeline_barrier(struct i915_request *rq)
{
return add_barrier(rq, &rq->timeline->barrier);
@@ -679,6 +684,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
*/
rq->head = rq->ring->emit;
+ ret = add_idle_barrier(rq);
+ if (ret)
+ goto err_unwind;
+
ret = add_timeline_barrier(rq);
if (ret)
goto err_unwind;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 8f5c57304064..60b2e1c3abf4 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -270,6 +270,9 @@ int i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq)
/* Must maintain ordering wrt existing barriers */
old = i915_gem_active_raw(&tl->barrier, &rq->i915->drm.struct_mutex);
if (old) {
+ if (old == rq)
+ return -EEXIST;
+
err = i915_request_await_dma_fence(rq, &old->fence);
if (err)
return err;
--
2.20.1
More information about the Intel-gfx
mailing list