[PATCH 57/60] timeline-hwsp

Chris Wilson chris at chris-wilson.co.uk
Fri Dec 21 10:18:45 UTC 2018


---
 drivers/gpu/drm/i915/i915_drv.h      |  4 ++
 drivers/gpu/drm/i915/i915_request.c  | 25 ++++++------
 drivers/gpu/drm/i915/i915_timeline.c | 59 +++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_timeline.h |  6 +++
 drivers/gpu/drm/i915/intel_lrc.c     |  9 +++--
 5 files changed, 87 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2aeea38140b1..06451efdc8f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1961,6 +1961,10 @@ struct drm_i915_private {
 		struct mutex timeline_lock;
 		struct list_head timelines;
 
+		/* Pack multiple timelines' seqnos into the same page */
+		struct i915_vma *timeline_hwsp;
+		u64 timeline_free;
+
 		struct list_head active_rings;
 		struct list_head closed_vma;
 		u32 active_requests;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f523486d225d..152de321ea9c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -182,11 +182,12 @@ static void free_capture_list(struct i915_request *request)
 static void __retire_engine_request(struct intel_engine_cs *engine,
 				    struct i915_request *rq)
 {
-	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n",
 		  __func__, engine->name,
 		  rq->fence.context, rq->fence.seqno,
 		  lower_32_bits(rq->global_seqno),
-		  i915_request_hwsp(rq));
+		  i915_request_hwsp(rq),
+		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!i915_request_completed(rq));
 
@@ -244,11 +245,12 @@ static void i915_request_retire(struct i915_request *request)
 {
 	struct i915_gem_active *active, *next;
 
-	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
 		  request->engine->name,
 		  request->fence.context, request->fence.seqno,
 		  lower_32_bits(request->global_seqno),
-		  i915_request_hwsp(request));
+		  i915_request_hwsp(request),
+		  intel_engine_get_seqno(request->engine));
 
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
@@ -307,11 +309,12 @@ void i915_request_retire_upto(struct i915_request *rq)
 	struct intel_ring *ring = rq->ring;
 	struct i915_request *tmp;
 
-	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
 		  rq->engine->name,
 		  rq->fence.context, rq->fence.seqno,
 		  lower_32_bits(rq->global_seqno),
-		  i915_request_hwsp(rq));
+		  i915_request_hwsp(rq),
+		  intel_engine_get_seqno(rq->engine));
 
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_request_completed(rq));
@@ -348,10 +351,11 @@ void __i915_request_submit(struct i915_request *request)
 	struct intel_engine_cs *engine = request->engine;
 	u32 seqno;
 
-	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  engine->timeline.seqno + 1,
+		  i915_request_hwsp(request),
 		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!irqs_disabled());
@@ -400,11 +404,12 @@ void __i915_request_unsubmit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 
-	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  lower_32_bits(request->global_seqno),
-		  i915_request_hwsp(request));
+		  i915_request_hwsp(request),
+		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&engine->timeline.lock);
@@ -421,7 +426,6 @@ void __i915_request_unsubmit(struct i915_request *request)
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 	request->global_seqno = 0;
-	GEM_DEBUG_EXEC(request->hwsp_seqno = POISON_INUSE);
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
 		intel_engine_cancel_signaling(request);
 	spin_unlock(&request->lock);
@@ -604,7 +608,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	/* No zalloc, must clear what we need by hand */
 	rq->global_seqno = 0;
-	GEM_DEBUG_EXEC(rq->hwsp_seqno = POISON_INUSE);
 	rq->signaling.wait.global_seqno = 0;
 	rq->file_priv = NULL;
 	rq->batch = NULL;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index c4e7ad179d86..3aa4e3b33ffc 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -9,6 +9,52 @@
 #include "i915_timeline.h"
 #include "i915_syncmap.h"
 
+static void get_seqno_locked(struct drm_i915_private *i915,
+			     struct i915_timeline *timeline)
+{
+	struct i915_vma *vma;
+	void *vaddr;
+	int offset;
+
+	offset = __builtin_ffsll(i915->gt.timeline_free);
+	if (offset-- == 0 && i915->gt.timeline_hwsp) {
+		i915_vma_put(i915->gt.timeline_hwsp);
+		i915->gt.timeline_hwsp = NULL;
+	}
+
+	vma = i915->gt.timeline_hwsp;
+	if (!vma) {
+		struct drm_i915_gem_object *bo;
+
+		BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
+		bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
+		if (!bo)
+			return;
+
+		vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
+		if (IS_ERR(vma))
+			return;
+
+		i915->gt.timeline_hwsp = vma;
+		i915->gt.timeline_free = ~0ull;
+		offset = 0;
+	}
+
+	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+	if (IS_ERR(vaddr))
+		return;
+
+	timeline->hwsp_ggtt = i915_vma_get(vma);
+	timeline->hwsp_offset = offset * CACHELINE_BYTES;
+
+	timeline->hwsp_seqno =
+		memset(vaddr + timeline->hwsp_offset,
+		       0,
+		       sizeof(*timeline->hwsp_seqno));
+
+	i915->gt.timeline_free &= ~BIT_ULL(offset);
+}
+
 void i915_timeline_init(struct drm_i915_private *i915,
 			struct i915_timeline *timeline,
 			const char *name)
@@ -25,6 +71,7 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	timeline->name = name;
 
 	mutex_lock(&i915->gt.timeline_lock);
+	get_seqno_locked(i915, timeline);
 	list_add(&timeline->link, &i915->gt.timelines);
 	mutex_unlock(&i915->gt.timeline_lock);
 
@@ -69,13 +116,21 @@ void i915_timelines_park(struct drm_i915_private *i915)
 
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
+	struct drm_i915_private *i915 = timeline->i915;
+
 	GEM_BUG_ON(!list_empty(&timeline->requests));
 
 	i915_syncmap_free(&timeline->sync);
 
-	mutex_lock(&timeline->i915->gt.timeline_lock);
+	mutex_lock(&i915->gt.timeline_lock);
 	list_del(&timeline->link);
-	mutex_unlock(&timeline->i915->gt.timeline_lock);
+	if (timeline->hwsp_ggtt == i915->gt.timeline_hwsp)
+		i915->gt.timeline_free |=
+			BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+	mutex_unlock(&i915->gt.timeline_lock);
+
+	i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+	i915_vma_put(timeline->hwsp_ggtt);
 }
 
 struct i915_timeline *
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index df0ddcd6459d..826f16771a3d 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -32,6 +32,8 @@
 #include "i915_syncmap.h"
 #include "i915_utils.h"
 
+struct i915_vma;
+
 struct i915_timeline {
 	u64 fence_context;
 	u32 seqno;
@@ -41,6 +43,10 @@ struct i915_timeline {
 #define TIMELINE_ENGINE 1
 #define TIMELINE_VIRTUAL 2
 
+	const u32 *hwsp_seqno;
+	struct i915_vma *hwsp_ggtt;
+	u32 hwsp_offset;
+
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
 	 * outstanding.
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7481e394edbb..f7e92d78b649 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -472,11 +472,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
 
-			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
 				  engine->name, n,
 				  port[n].context_id, count,
 				  lower_32_bits(rq->global_seqno),
 				  rq->fence.context, rq->fence.seqno,
+				  i915_request_hwsp(rq),
 				  intel_engine_get_seqno(engine),
 				  rq_prio(rq));
 		} else {
@@ -904,11 +905,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 	while (num_ports-- && port_isset(port)) {
 		struct i915_request *rq = port_request(port);
 
-		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n",
+		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n",
 			  rq->engine->name,
 			  (unsigned int)(port - execlists->port),
 			  lower_32_bits(rq->global_seqno),
 			  rq->fence.context, rq->fence.seqno,
+			  i915_request_hwsp(rq),
 			  intel_engine_get_seqno(rq->engine));
 
 		GEM_BUG_ON(!execlists->active);
@@ -1132,12 +1134,13 @@ static void process_csb(struct intel_engine_cs *engine)
 						EXECLISTS_ACTIVE_USER));
 
 		rq = port_unpack(port, &count);
-		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
 			  engine->name,
 			  port->context_id, count,
 			  rq ? lower_32_bits(rq->global_seqno) : 0,
 			  rq ? rq->fence.context : 0,
 			  rq ? rq->fence.seqno : 0,
+			  rq ? i915_request_hwsp(rq) : 0,
 			  intel_engine_get_seqno(engine),
 			  rq ? rq_prio(rq) : 0);
 
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list