[PATCH 14/40] drm/i915: Introduce concept of per-timeline (context) HWSP
Chris Wilson
chris at chris-wilson.co.uk
Sat Jan 19 19:46:22 UTC 2019
Supplement the per-engine HWSP with a per-timeline HWSP. That is a
per-request pointer through which we can check a local seqno,
abstracting away the presumption of a global seqno. In this first step,
we point each request back into the engine's HWSP so everything
continues to work with the global timeline.
v2: s/i915_request_hwsp/hwsp_seqno/ to emphasis that this is the current
HW value and that we are accessing it via i915_request merely as a
convenience.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/i915_request.c | 17 +++++--
drivers/gpu/drm/i915/i915_request.h | 50 +++++++++++++++++---
drivers/gpu/drm/i915/i915_reset.c | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 12 +++--
drivers/gpu/drm/i915/intel_ringbuffer.c | 1 +
drivers/gpu/drm/i915/selftests/mock_engine.c | 1 +
6 files changed, 67 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 80232de8e2be..671ce57ee369 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request)
static void __retire_engine_request(struct intel_engine_cs *engine,
struct i915_request *rq)
{
- GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n",
+ GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n",
__func__, engine->name,
rq->fence.context, rq->fence.seqno,
rq->global_seqno,
+ hwsp_seqno(rq),
intel_engine_get_seqno(engine));
GEM_BUG_ON(!i915_request_completed(rq));
@@ -198,6 +199,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
spin_unlock(&engine->timeline.lock);
spin_lock(&rq->lock);
+ i915_request_mark_complete(rq);
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
dma_fence_signal_locked(&rq->fence);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
@@ -244,10 +246,11 @@ static void i915_request_retire(struct i915_request *request)
{
struct i915_gem_active *active, *next;
- GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+ GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
request->engine->name,
request->fence.context, request->fence.seqno,
request->global_seqno,
+ hwsp_seqno(request),
intel_engine_get_seqno(request->engine));
lockdep_assert_held(&request->i915->drm.struct_mutex);
@@ -307,10 +310,11 @@ void i915_request_retire_upto(struct i915_request *rq)
struct intel_ring *ring = rq->ring;
struct i915_request *tmp;
- GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+ GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
rq->engine->name,
rq->fence.context, rq->fence.seqno,
rq->global_seqno,
+ hwsp_seqno(rq),
intel_engine_get_seqno(rq->engine));
lockdep_assert_held(&rq->i915->drm.struct_mutex);
@@ -355,10 +359,11 @@ void __i915_request_submit(struct i915_request *request)
struct intel_engine_cs *engine = request->engine;
u32 seqno;
- GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n",
+ GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n",
engine->name,
request->fence.context, request->fence.seqno,
engine->timeline.seqno + 1,
+ hwsp_seqno(request),
intel_engine_get_seqno(engine));
GEM_BUG_ON(!irqs_disabled());
@@ -405,10 +410,11 @@ void __i915_request_unsubmit(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
- GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n",
+ GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n",
engine->name,
request->fence.context, request->fence.seqno,
request->global_seqno,
+ hwsp_seqno(request),
intel_engine_get_seqno(engine));
GEM_BUG_ON(!irqs_disabled());
@@ -616,6 +622,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
rq->ring = ce->ring;
rq->timeline = ce->ring->timeline;
GEM_BUG_ON(rq->timeline == &engine->timeline);
+ rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX];
spin_lock_init(&rq->lock);
dma_fence_init(&rq->fence,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index d014b0605445..5a08c84afed1 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -130,6 +130,13 @@ struct i915_request {
struct i915_sched_node sched;
struct i915_dependency dep;
+ /*
+ * A convenience pointer to the current breadcrumb value stored in
+ * the HW status page (or our timeline's local equivalent). The full
+ * path would be rq->hw_context->ring->timeline->hwsp_seqno.
+ */
+ const u32 *hwsp_seqno;
+
/**
* GEM sequence number associated with this request on the
* global execution timeline. It is zero when the request is not
@@ -280,11 +287,6 @@ long i915_request_wait(struct i915_request *rq,
#define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */
#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
-static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
- u32 seqno);
-static inline bool intel_engine_has_completed(struct intel_engine_cs *engine,
- u32 seqno);
-
/**
* Returns true if seq1 is later than seq2.
*/
@@ -293,6 +295,35 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
return (s32)(seq1 - seq2) >= 0;
}
+static inline u32 __hwsp_seqno(const struct i915_request *rq)
+{
+ return READ_ONCE(*rq->hwsp_seqno);
+}
+
+/**
+ * hwsp_seqno - the current breadcrumb value in the HW status page
+ * @rq: the request, to chase the relevant HW status page
+ *
+ * The emphasis in naming here is that hwsp_seqno() is not a property of the
+ * request, but an indication of the current HW state (associated with this
+ * request). Its value will change as the GPU executes more requests.
+ *
+ * Returns the current breadcrumb value in the associated HW status page (or
+ * the local timeline's equivalent) for this request. The request itself
+ * has the associated breadcrumb value of rq->fence.seqno, when the HW
+ * status page has that breadcrumb or later, this request is complete.
+ */
+static inline u32 hwsp_seqno(const struct i915_request *rq)
+{
+ u32 seqno;
+
+ rcu_read_lock(); /* the HWSP may be freed at runtime */
+ seqno = __hwsp_seqno(rq);
+ rcu_read_unlock();
+
+ return seqno;
+}
+
/**
* i915_request_started - check if the request has begun being executed
* @rq: the request
@@ -310,14 +341,14 @@ static inline bool i915_request_started(const struct i915_request *rq)
if (!seqno) /* not yet submitted to HW */
return false;
- return intel_engine_has_started(rq->engine, seqno);
+ return i915_seqno_passed(hwsp_seqno(rq), seqno - 1);
}
static inline bool
__i915_request_completed(const struct i915_request *rq, u32 seqno)
{
GEM_BUG_ON(!seqno);
- return intel_engine_has_completed(rq->engine, seqno) &&
+ return i915_seqno_passed(hwsp_seqno(rq), seqno) &&
seqno == i915_request_global_seqno(rq);
}
@@ -332,6 +363,11 @@ static inline bool i915_request_completed(const struct i915_request *rq)
return __i915_request_completed(rq, seqno);
}
+static inline void i915_request_mark_complete(struct i915_request *rq)
+{
+ rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */
+}
+
void i915_retire_requests(struct drm_i915_private *i915);
/*
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 12e5a2bc825c..09edf488f711 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -756,6 +756,7 @@ static void nop_submit_request(struct i915_request *request)
spin_lock_irqsave(&request->engine->timeline.lock, flags);
__i915_request_submit(request);
+ i915_request_mark_complete(request);
intel_engine_write_global_seqno(request->engine, request->global_seqno);
spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 869aeadb044a..dff2edefc61a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -447,11 +447,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
desc = execlists_update_context(rq);
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
- GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+ GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
engine->name, n,
port[n].context_id, count,
rq->global_seqno,
rq->fence.context, rq->fence.seqno,
+ hwsp_seqno(rq),
intel_engine_get_seqno(engine),
rq_prio(rq));
} else {
@@ -748,11 +749,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
while (num_ports-- && port_isset(port)) {
struct i915_request *rq = port_request(port);
- GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n",
+ GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n",
rq->engine->name,
(unsigned int)(port - execlists->port),
rq->global_seqno,
rq->fence.context, rq->fence.seqno,
+ hwsp_seqno(rq),
intel_engine_get_seqno(rq->engine));
GEM_BUG_ON(!execlists->active);
@@ -840,6 +842,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
continue;
dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_mark_complete(rq);
}
/* Flush the queued requests to the timeline list (for retiring). */
@@ -852,6 +855,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
dma_fence_set_error(&rq->fence, -EIO);
__i915_request_submit(rq);
+
+ i915_request_mark_complete(rq);
}
rb_erase_cached(&p->node, &execlists->queue);
@@ -976,12 +981,13 @@ static void process_csb(struct intel_engine_cs *engine)
EXECLISTS_ACTIVE_USER));
rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+ GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
engine->name,
port->context_id, count,
rq ? rq->global_seqno : 0,
rq ? rq->fence.context : 0,
rq ? rq->fence.seqno : 0,
+ rq ? hwsp_seqno(rq) : 0,
intel_engine_get_seqno(engine),
rq ? rq_prio(rq) : 0);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 66dc8e2fa353..d063e8f4ad0c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -881,6 +881,7 @@ static void cancel_requests(struct intel_engine_cs *engine)
continue;
dma_fence_set_error(&request->fence, -EIO);
+ i915_request_mark_complete(request);
}
intel_write_status_page(engine,
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 968a7e139a67..33dc03e61d4c 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -72,6 +72,7 @@ static void advance(struct mock_engine *engine,
{
list_del_init(&request->link);
mock_seqno_advance(&engine->base, request->base.global_seqno);
+ i915_request_mark_complete(&request->base);
}
static void hw_delay_complete(struct timer_list *t)
--
2.20.1
More information about the Intel-gfx-trybot
mailing list