[PATCH 30/33] drm/i915: Keep timeline HWSP allocated until the system is idle

Chris Wilson chris at chris-wilson.co.uk
Fri Jan 25 01:42:27 UTC 2019


In preparation for enabling HW semaphores, we need to keep in flight
timeline HWSP alive until the entire system is idle, as any other
timeline active on the GPU may still refer back to the already retired
timeline. We both have to delay recycling available cachelines and
unpinning old HWSP until the next idle point (i.e. on parking).

That we have to keep the HWSP alive for external references on HW raises
an interesting conundrum. On a busy system, we may never see a global
idle point, essentially meaning the resource will be leaking until we
are forced to sleep. What we need is a set of RCU primitives for the GPU!
This should also help mitigate the resource starvation issues
promulgating from keeping all logical state pinned until idle (instead
of as currently handled until the next context switch).

v2: Use idle barriers to free stale HWSP as soon as all current requests
are idle, rather than rely on the system reaching a global idle point.
(Tvrtko)

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c  |  33 ++++----
 drivers/gpu/drm/i915/i915_timeline.c | 118 +++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_timeline.h |   1 +
 3 files changed, 121 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index b397155fe8a7..690fb4eb6d15 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -331,11 +331,6 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (tmp != rq);
 }
 
-static u32 timeline_get_seqno(struct i915_timeline *tl)
-{
-	return tl->seqno += 1 + tl->has_initial_breadcrumb;
-}
-
 static void move_to_timeline(struct i915_request *request,
 			     struct i915_timeline *timeline)
 {
@@ -555,8 +550,10 @@ struct i915_request *
 i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct i915_request *rq;
 	struct intel_context *ce;
+	struct i915_timeline *tl;
+	struct i915_request *rq;
+	u32 seqno;
 	int ret;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -631,7 +628,15 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		}
 	}
 
-	rq->rcustate = get_state_synchronize_rcu();
+	tl = ce->ring->timeline;
+	GEM_BUG_ON(tl == &engine->timeline);
+	ret = i915_timeline_get_seqno(tl, &seqno);
+	if (ret)
+		goto err_free;
+
+	spin_lock_init(&rq->lock);
+	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
+		       tl->fence_context, seqno);
 
 	INIT_LIST_HEAD(&rq->active_list);
 	rq->i915 = i915;
@@ -639,16 +644,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->gem_context = ctx;
 	rq->hw_context = ce;
 	rq->ring = ce->ring;
-	rq->timeline = ce->ring->timeline;
-	GEM_BUG_ON(rq->timeline == &engine->timeline);
-	rq->hwsp_seqno = rq->timeline->hwsp_seqno;
-
-	spin_lock_init(&rq->lock);
-	dma_fence_init(&rq->fence,
-		       &i915_fence_ops,
-		       &rq->lock,
-		       rq->timeline->fence_context,
-		       timeline_get_seqno(rq->timeline));
+	rq->timeline = tl;
+	rq->hwsp_seqno = tl->hwsp_seqno;
+	rq->rcustate = get_state_synchronize_rcu();
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
@@ -713,6 +711,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
 
+err_free:
 	kmem_cache_free(i915->requests, rq);
 err_unreserve:
 	unreserve_gt(i915);
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 54ea82f2f235..284c121199bd 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -10,8 +10,9 @@
 #include "i915_syncmap.h"
 
 struct i915_timeline_hwsp {
-	struct i915_vma *vma;
+	struct i915_gt_timelines *gt;
 	struct list_head free_link;
+	struct i915_vma *vma;
 	u64 free_bitmap;
 };
 
@@ -65,6 +66,7 @@ static struct i915_vma *hwsp_alloc(struct i915_timeline *timeline, int *offset)
 		vma->private = hwsp;
 		hwsp->vma = vma;
 		hwsp->free_bitmap = ~0ull;
+		hwsp->gt = gt;
 
 		spin_lock(&gt->hwsp_lock);
 		list_add(&hwsp->free_link, &gt->hwsp_free_list);
@@ -84,14 +86,9 @@ static struct i915_vma *hwsp_alloc(struct i915_timeline *timeline, int *offset)
 	return hwsp->vma;
 }
 
-static void hwsp_free(struct i915_timeline *timeline)
+static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int offset)
 {
-	struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
-	struct i915_timeline_hwsp *hwsp;
-
-	hwsp = i915_timeline_hwsp(timeline);
-	if (!hwsp) /* leave global HWSP alone! */
-		return;
+	struct i915_gt_timelines *gt = hwsp->gt;
 
 	spin_lock(&gt->hwsp_lock);
 
@@ -99,7 +96,7 @@ static void hwsp_free(struct i915_timeline *timeline)
 	if (!hwsp->free_bitmap)
 		list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
 
-	hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+	hwsp->free_bitmap |= BIT_ULL(offset / CACHELINE_BYTES);
 
 	/* And if no one is left using it, give the page back to the system */
 	if (hwsp->free_bitmap == ~0ull) {
@@ -143,7 +140,9 @@ int i915_timeline_init(struct drm_i915_private *i915,
 
 	vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
-		hwsp_free(timeline);
+		if (!i915_timeline_is_global(timeline))
+			__idle_hwsp_free(i915_timeline_hwsp(timeline),
+					 timeline->hwsp_offset);
 		i915_vma_put(hwsp);
 		return PTR_ERR(vaddr);
 	}
@@ -228,10 +227,8 @@ void i915_timeline_fini(struct i915_timeline *timeline)
 {
 	GEM_BUG_ON(timeline->pin_count);
 	GEM_BUG_ON(!list_empty(&timeline->requests));
-	GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
-	hwsp_free(timeline);
 
 	i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
 	i915_vma_put(timeline->hwsp_ggtt);
@@ -303,6 +300,85 @@ int i915_timeline_pin(struct i915_timeline *tl)
 	return err;
 }
 
+static u32 timeline_advance(struct i915_timeline *tl)
+{
+	GEM_BUG_ON(!tl->pin_count);
+	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
+
+	return tl->seqno += 1 + tl->has_initial_breadcrumb;
+}
+
+static void timeline_rollback(struct i915_timeline *tl)
+{
+	tl->seqno -= 1 + tl->has_initial_breadcrumb;
+}
+
+struct idle_hwsp_free {
+	struct i915_gem_active active;
+	struct i915_timeline_hwsp *hwsp;
+	int offset;
+};
+
+static void idle_hwsp_free(struct i915_gem_active *active,
+			   struct i915_request *rq)
+{
+	struct idle_hwsp_free *idle =
+		container_of(active, typeof(*idle), active);
+
+	i915_vma_unpin(idle->hwsp->vma);
+	__idle_hwsp_free(idle->hwsp, idle->offset);
+
+	kfree(idle);
+}
+
+static noinline int
+__i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno)
+{
+	struct idle_hwsp_free *idle;
+	struct i915_vma *vma;
+	int offset;
+
+	idle = kmalloc(sizeof(*idle), GFP_KERNEL);
+	if (!idle)
+		return -ENOMEM;
+
+	init_request_active(&idle->active, idle_hwsp_free);
+	idle->hwsp = i915_timeline_hwsp(tl);
+	idle->offset = tl->hwsp_offset;
+
+	vma = hwsp_alloc(tl, &offset);
+	if (IS_ERR(vma)) {
+		timeline_rollback(tl);
+		kfree(idle);
+		return PTR_ERR(vma);
+	}
+
+	/*
+	 * We need to keep the previous HWSP cacheline alive until the GPU
+	 * is idle so that we can be sure that any inflight references to it
+	 * have been retired.
+	 */
+	i915_gem_add_idle_task(tl->i915, &idle->active);
+
+	tl->hwsp_ggtt = i915_vma_get(vma);
+	tl->hwsp_offset = offset;
+	__i915_vma_pin(tl->hwsp_ggtt);
+
+	*seqno = timeline_advance(tl);
+	return 0;
+}
+
+int i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno)
+{
+	*seqno = timeline_advance(tl);
+
+	/* Replace the HWSP on wraparound for HW semaphores */
+	if (unlikely(!*seqno && !i915_timeline_is_global(tl)))
+		return __i915_timeline_get_seqno(tl, seqno);
+
+	return 0;
+}
+
 void i915_timeline_unpin(struct i915_timeline *tl)
 {
 	GEM_BUG_ON(!tl->pin_count);
@@ -321,15 +397,29 @@ void i915_timeline_unpin(struct i915_timeline *tl)
 	__i915_vma_unpin(tl->hwsp_ggtt);
 }
 
-void __i915_timeline_free(struct kref *kref)
+static void idle_timeline_free(struct i915_gem_active *active,
+			       struct i915_request *rq)
 {
 	struct i915_timeline *timeline =
-		container_of(kref, typeof(*timeline), kref);
+		container_of(active, typeof(*timeline), barrier);
+
+	if (!i915_timeline_is_global(timeline))
+		__idle_hwsp_free(i915_timeline_hwsp(timeline),
+				 timeline->hwsp_offset);
 
 	i915_timeline_fini(timeline);
 	kfree(timeline);
 }
 
+void __i915_timeline_free(struct kref *kref)
+{
+	struct i915_timeline *timeline =
+		container_of(kref, typeof(*timeline), kref);
+
+	init_request_active(&timeline->barrier, idle_timeline_free);
+	i915_gem_add_idle_task(timeline->i915, &timeline->barrier);
+}
+
 void i915_timelines_fini(struct drm_i915_private *i915)
 {
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index d167e04073c5..37529981d909 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -159,6 +159,7 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 }
 
 int i915_timeline_pin(struct i915_timeline *tl);
+int i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno);
 void i915_timeline_unpin(struct i915_timeline *tl);
 
 void i915_timelines_init(struct drm_i915_private *i915);
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list