[PATCH 30/36] drm/i915: Keep timeline HWSP allocated until the system is idle

Chris Wilson chris at chris-wilson.co.uk
Sun Jan 20 23:56:44 UTC 2019


In preparation for enabling HW semaphores, we need to keep in flight
timeline HWSP alive until the entire system is idle, as any other
timeline active on the GPU may still refer back to the already retired
timeline. We both have to delay recycling available cachelines and
unpinning old HWSP until the next idle point (i.e. on parking).

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h      |   2 +
 drivers/gpu/drm/i915/i915_request.c  |  34 +++---
 drivers/gpu/drm/i915/i915_timeline.c | 150 ++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_timeline.h |   1 +
 drivers/gpu/drm/i915/i915_vma.h      |   5 +
 5 files changed, 146 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 35ac12e06103..1200677d0b04 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1981,7 +1981,9 @@ struct drm_i915_private {
 
 			/* Pack multiple timelines' seqnos into the same page */
 			spinlock_t hwsp_lock;
+			struct list_head hwsp_pin_list;
 			struct list_head hwsp_free_list;
+			struct list_head hwsp_dead_list;
 		} timelines;
 
 		struct list_head active_rings;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index cca437ac8a7e..099c6f994b99 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -331,12 +331,6 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (tmp != rq);
 }
 
-static u32 timeline_get_seqno(struct i915_timeline *tl)
-{
-	tl->seqno += tl->has_initial_breadcrumb;
-	return ++tl->seqno;
-}
-
 static void move_to_timeline(struct i915_request *request,
 			     struct i915_timeline *timeline)
 {
@@ -538,8 +532,10 @@ struct i915_request *
 i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct i915_request *rq;
 	struct intel_context *ce;
+	struct i915_timeline *tl;
+	struct i915_request *rq;
+	u32 seqno;
 	int ret;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -614,7 +610,15 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		}
 	}
 
-	rq->rcustate = get_state_synchronize_rcu();
+	tl = ce->ring->timeline;
+	GEM_BUG_ON(tl == &engine->timeline);
+	ret = i915_timeline_get_seqno(tl, &seqno);
+	if (ret)
+		goto err_free;
+
+	spin_lock_init(&rq->lock);
+	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
+		       tl->fence_context, seqno);
 
 	INIT_LIST_HEAD(&rq->active_list);
 	rq->i915 = i915;
@@ -622,16 +626,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->gem_context = ctx;
 	rq->hw_context = ce;
 	rq->ring = ce->ring;
-	rq->timeline = ce->ring->timeline;
-	GEM_BUG_ON(rq->timeline == &engine->timeline);
-	rq->hwsp_seqno = rq->timeline->hwsp_seqno;
-
-	spin_lock_init(&rq->lock);
-	dma_fence_init(&rq->fence,
-		       &i915_fence_ops,
-		       &rq->lock,
-		       rq->timeline->fence_context,
-		       timeline_get_seqno(rq->timeline));
+	rq->timeline = tl;
+	rq->hwsp_seqno = tl->hwsp_seqno;
+	rq->rcustate = get_state_synchronize_rcu();
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
@@ -688,6 +685,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
 
+err_free:
 	kmem_cache_free(i915->requests, rq);
 err_unreserve:
 	unreserve_gt(i915);
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 1118a2de89ba..db8b16a42786 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -11,8 +11,11 @@
 
 struct i915_timeline_hwsp {
 	struct i915_vma *vma;
+	struct list_head pin_link;
 	struct list_head free_link;
+	struct list_head dead_link;
 	u64 free_bitmap;
+	u64 dead_bitmap;
 };
 
 static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
@@ -33,7 +36,7 @@ static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
 	return vma;
 }
 
-static int hwsp_alloc(struct i915_timeline *timeline)
+static struct i915_vma *hwsp_alloc(struct i915_timeline *timeline, int *out)
 {
 	struct drm_i915_private *i915 = timeline->i915;
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
@@ -54,17 +57,18 @@ static int hwsp_alloc(struct i915_timeline *timeline)
 
 		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
 		if (!hwsp)
-			return -ENOMEM;
+			return ERR_PTR(-ENOMEM);
 
 		vma = __hwsp_alloc(i915);
 		if (IS_ERR(vma)) {
 			kfree(hwsp);
-			return PTR_ERR(vma);
+			return vma;
 		}
 
 		vma->private = hwsp;
 		hwsp->vma = vma;
 		hwsp->free_bitmap = ~0ull;
+		hwsp->dead_bitmap = 0;
 
 		spin_lock(&gt->hwsp_lock);
 		list_add(&hwsp->free_link, &gt->hwsp_free_list);
@@ -78,12 +82,8 @@ static int hwsp_alloc(struct i915_timeline *timeline)
 
 	spin_unlock(&gt->hwsp_lock);
 
-	timeline->hwsp_ggtt = i915_vma_get(hwsp->vma);
-	timeline->hwsp_offset = offset * CACHELINE_BYTES;
-
-	GEM_BUG_ON(timeline->hwsp_ggtt->private != hwsp);
-
-	return 0;
+	*out = offset * CACHELINE_BYTES;
+	return hwsp->vma;
 }
 
 static void hwsp_free(struct i915_timeline *timeline)
@@ -97,18 +97,11 @@ static void hwsp_free(struct i915_timeline *timeline)
 
 	spin_lock(&gt->hwsp_lock);
 
-	/* As a cacheline becomes available, publish the HWSP on the freelist */
-	if (!hwsp->free_bitmap)
-		list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
-
-	hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+	/* Defer recycling the HWSP cacheline until after the GPU is idle. */
+	if (!hwsp->dead_bitmap)
+		list_add_tail(&hwsp->dead_link, &gt->hwsp_dead_list);
 
-	/* And if no one is left using it, give the page back to the system */
-	if (hwsp->free_bitmap == ~0ull) {
-		i915_vma_put(hwsp->vma);
-		list_del(&hwsp->free_link);
-		kfree(hwsp);
-	}
+	hwsp->dead_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
 
 	spin_unlock(&gt->hwsp_lock);
 }
@@ -119,7 +112,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
 		       struct i915_vma *global_hwsp)
 {
 	void *vaddr;
-	int err;
 
 	/*
 	 * Ideally we want a set of engines on a single leaf as we expect
@@ -134,14 +126,13 @@ int i915_timeline_init(struct drm_i915_private *i915,
 	timeline->pin_count = 0;
 	timeline->has_initial_breadcrumb = !global_hwsp;
 
-	if (global_hwsp) {
-		timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
-		timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
-	} else {
-		err = hwsp_alloc(timeline);
-		if (err)
-			return err;
+	timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
+	if (!global_hwsp) {
+		global_hwsp = hwsp_alloc(timeline, &timeline->hwsp_offset);
+		if (IS_ERR(global_hwsp))
+			return PTR_ERR(global_hwsp);
 	}
+	timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
 
 	vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
@@ -175,7 +166,9 @@ void i915_timelines_init(struct drm_i915_private *i915)
 	INIT_LIST_HEAD(&gt->active_list);
 
 	spin_lock_init(&gt->hwsp_lock);
+	INIT_LIST_HEAD(&gt->hwsp_pin_list);
 	INIT_LIST_HEAD(&gt->hwsp_free_list);
+	INIT_LIST_HEAD(&gt->hwsp_dead_list);
 
 	/* via i915_gem_wait_for_idle() */
 	i915_gem_shrinker_taints_mutex(i915, &gt->mutex);
@@ -212,6 +205,7 @@ static void timeline_inactive(struct i915_timeline *tl)
 void i915_timelines_park(struct drm_i915_private *i915)
 {
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
+	struct i915_timeline_hwsp *hwsp, *hn;
 	struct i915_timeline *timeline;
 
 	mutex_lock(&gt->mutex);
@@ -225,6 +219,38 @@ void i915_timelines_park(struct drm_i915_private *i915)
 		i915_syncmap_free(&timeline->sync);
 	}
 	mutex_unlock(&gt->mutex);
+
+	/*
+	 * Now the system is idle, we can be sure that there are no more
+	 * references to our old HWSP pages remaining on the HW, so we
+	 * can return the pages back to the system.
+	 */
+	spin_lock(&gt->hwsp_lock);
+
+	list_for_each_entry_safe(hwsp, hn, &gt->hwsp_pin_list, pin_link) {
+		INIT_LIST_HEAD(&hwsp->pin_link);
+		i915_vma_unpin(hwsp->vma);
+	}
+	INIT_LIST_HEAD(&gt->hwsp_pin_list);
+
+	list_for_each_entry_safe(hwsp, hn, &gt->hwsp_dead_list, dead_link) {
+		GEM_BUG_ON(!hwsp->dead_bitmap);
+
+		if (!hwsp->free_bitmap)
+			list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
+
+		hwsp->free_bitmap |= hwsp->dead_bitmap;
+		hwsp->dead_bitmap = 0;
+
+		if (hwsp->free_bitmap == ~0ull) {
+			list_del(&hwsp->free_link);
+			i915_vma_put(hwsp->vma);
+			kfree(hwsp);
+		}
+	}
+	INIT_LIST_HEAD(&gt->hwsp_dead_list);
+
+	spin_unlock(&gt->hwsp_lock);
 }
 
 void i915_timeline_fini(struct i915_timeline *timeline)
@@ -262,6 +288,24 @@ i915_timeline_create(struct drm_i915_private *i915,
 	return timeline;
 }
 
+static void
+__i915_timeline_pin_hwsp(struct i915_timeline *tl,
+			 struct i915_timeline_hwsp *hwsp)
+{
+	GEM_BUG_ON(!tl->pin_count);
+
+	if (hwsp && list_empty(&hwsp->pin_link)) {
+		struct i915_gt_timelines *gt = &tl->i915->gt.timelines;
+
+		spin_lock(&gt->hwsp_lock);
+		if (list_empty(&hwsp->pin_link)) {
+			list_add(&hwsp->pin_link, &gt->hwsp_pin_list);
+			__i915_vma_pin(hwsp->vma);
+		}
+		spin_unlock(&gt->hwsp_lock);
+	}
+}
+
 int i915_timeline_pin(struct i915_timeline *tl)
 {
 	int err;
@@ -274,6 +318,7 @@ int i915_timeline_pin(struct i915_timeline *tl)
 	if (err)
 		goto unpin;
 
+	__i915_timeline_pin_hwsp(tl, tl->hwsp_ggtt->private);
 	timeline_active(tl);
 
 	return 0;
@@ -283,6 +328,51 @@ int i915_timeline_pin(struct i915_timeline *tl)
 	return err;
 }
 
+static u32 timeline_advance(struct i915_timeline *tl)
+{
+	GEM_BUG_ON(!tl->pin_count);
+	tl->seqno += tl->has_initial_breadcrumb;
+	return ++tl->seqno;
+}
+
+static void timeline_rollback(struct i915_timeline *tl)
+{
+	tl->seqno--;
+	tl->seqno -= tl->has_initial_breadcrumb;
+}
+
+static noinline int
+__i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno)
+{
+	struct i915_vma *vma;
+	int offset;
+
+	vma = hwsp_alloc(tl, &offset);
+	if (IS_ERR(vma)) {
+		timeline_rollback(tl);
+		return PTR_ERR(vma);
+	}
+	hwsp_free(tl);
+
+	tl->hwsp_ggtt = i915_vma_get(vma);
+	tl->hwsp_offset = offset;
+	__i915_timeline_pin_hwsp(tl, vma->private);
+
+	*seqno = timeline_advance(tl);
+	return 0;
+}
+
+int i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno)
+{
+	*seqno = timeline_advance(tl);
+
+	/* Replace the HWSP on wraparound for HW semaphores */
+	if (unlikely(!*seqno && !i915_timeline_is_global(tl)))
+		return __i915_timeline_get_seqno(tl, seqno);
+
+	return 0;
+}
+
 void i915_timeline_unpin(struct i915_timeline *tl)
 {
 	GEM_BUG_ON(!tl->pin_count);
@@ -314,8 +404,12 @@ void i915_timelines_fini(struct drm_i915_private *i915)
 {
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
 
+	i915_timelines_park(i915);
+
 	GEM_BUG_ON(!list_empty(&gt->active_list));
+	GEM_BUG_ON(!list_empty(&gt->hwsp_pin_list));
 	GEM_BUG_ON(!list_empty(&gt->hwsp_free_list));
+	GEM_BUG_ON(!list_empty(&gt->hwsp_dead_list));
 
 	mutex_destroy(&gt->mutex);
 }
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 421eb34568de..ab26e82b54c9 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -148,6 +148,7 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 }
 
 int i915_timeline_pin(struct i915_timeline *tl);
+int i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno);
 void i915_timeline_unpin(struct i915_timeline *tl);
 
 void i915_timelines_init(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 18be786a970d..42bd794b1e72 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -228,6 +228,11 @@ static inline u32 i915_timeline_seqno_address(const struct i915_timeline *tl)
 	return i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
 }
 
+static inline bool i915_timeline_is_global(const struct i915_timeline *tl)
+{
+	return !tl->hwsp_ggtt->private;
+}
+
 static inline u32 i915_ggtt_pin_bias(struct i915_vma *vma)
 {
 	return i915_vm_to_ggtt(vma->vm)->pin_bias;
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list