[Intel-gfx] [PATCH 44/46] drm/i915: Track the context's seqno in its own timeline HWSP
Chris Wilson
chris at chris-wilson.co.uk
Mon Jan 7 11:55:07 UTC 2019
Now that we have allocated ourselves a cacheline to store a breadcrumb,
we can emit a write from the GPU into the timeline's HWSP of the
per-context seqno as we complete each request. This drops the mirroring
of the per-engine HWSP and allows each context to operate independently.
We do not need to unwind the per-context timeline, and so requests are
always consistent with the timeline breadcrumb, greatly simplifying the
completion checks as we no longer need to be concerned about the
global_seqno changing mid check.
At this point, we are emitting both per-context and global seqno and
still using the single per-engine execution timeline for resolving
interrupts.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem.c | 2 +-
drivers/gpu/drm/i915/i915_request.c | 2 +-
drivers/gpu/drm/i915/i915_request.h | 29 ++-----
drivers/gpu/drm/i915/i915_reset.c | 1 +
drivers/gpu/drm/i915/i915_vma.h | 7 ++
drivers/gpu/drm/i915/intel_lrc.c | 32 ++++---
drivers/gpu/drm/i915/intel_ringbuffer.c | 91 ++++++++++++++------
drivers/gpu/drm/i915/selftests/mock_engine.c | 8 +-
8 files changed, 108 insertions(+), 64 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 09c7ded8f498..16e56fed07be 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2877,7 +2877,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
*/
spin_lock_irqsave(&engine->timeline.lock, flags);
list_for_each_entry(request, &engine->timeline.requests, link) {
- if (__i915_request_completed(request, request->global_seqno))
+ if (i915_request_completed(request))
continue;
active = request;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 3b69c62d040f..ec4dbc67ef9e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -590,7 +590,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
rq->ring = ce->ring;
rq->timeline = ce->ring->timeline;
GEM_BUG_ON(rq->timeline == &engine->timeline);
- rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX];
+ rq->hwsp_seqno = rq->timeline->hwsp_seqno;
spin_lock_init(&rq->lock);
dma_fence_init(&rq->fence,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index e2b209a26a8e..344e52b53ccd 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -295,6 +295,11 @@ static inline u32 i915_request_hwsp(const struct i915_request *rq)
return READ_ONCE(*rq->hwsp_seqno);
}
+static inline void i915_request_fake_complete(const struct i915_request *rq)
+{
+ *(u32 *)rq->hwsp_seqno = rq->fence.seqno;
+}
+
/**
* i915_request_started - check if the request has begun being executed
* @rq: the request
@@ -306,32 +311,12 @@ static inline u32 i915_request_hwsp(const struct i915_request *rq)
*/
static inline bool i915_request_started(const struct i915_request *rq)
{
- u32 seqno;
-
- seqno = i915_request_global_seqno(rq);
- if (!seqno) /* not yet submitted to HW */
- return false;
-
- return i915_seqno_passed(i915_request_hwsp(rq), seqno - 1);
-}
-
-static inline bool
-__i915_request_completed(const struct i915_request *rq, u32 seqno)
-{
- GEM_BUG_ON(!seqno);
- return i915_seqno_passed(i915_request_hwsp(rq), seqno) &&
- seqno == i915_request_global_seqno(rq);
+ return i915_seqno_passed(i915_request_hwsp(rq), rq->fence.seqno - 1);
}
static inline bool i915_request_completed(const struct i915_request *rq)
{
- u32 seqno;
-
- seqno = i915_request_global_seqno(rq);
- if (!seqno)
- return false;
-
- return __i915_request_completed(rq, seqno);
+ return i915_seqno_passed(i915_request_hwsp(rq), rq->fence.seqno);
}
void i915_retire_requests(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 3e0833221c3a..89d4540ad5e6 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -756,6 +756,7 @@ static void nop_submit_request(struct i915_request *request)
spin_lock_irqsave(&request->engine->timeline.lock, flags);
__i915_request_submit(request);
+ i915_request_fake_complete(request);
intel_engine_write_global_seqno(request->engine, request->global_seqno);
spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
}
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 266b226ebef2..ea9b85576e2d 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -198,6 +198,13 @@ static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
return lower_32_bits(vma->node.start);
}
+/* XXX inline spaghetti */
+static inline u32 i915_timeline_seqno_address(const struct i915_timeline *tl)
+{
+ GEM_BUG_ON(!tl->pin_count);
+ return i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset;
+}
+
static inline u32 i915_ggtt_pin_bias(struct i915_vma *vma)
{
return i915_vm_to_ggtt(vma->vm)->pin_bias;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5927ef124bf9..3757a4d3a205 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -828,10 +828,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
list_for_each_entry(rq, &engine->timeline.requests, link) {
GEM_BUG_ON(!rq->global_seqno);
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
- continue;
+ if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+ dma_fence_set_error(&rq->fence, -EIO);
- dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_fake_complete(rq);
}
/* Flush the queued requests to the timeline list (for retiring). */
@@ -844,6 +844,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
dma_fence_set_error(&rq->fence, -EIO);
__i915_request_submit(rq);
+ i915_request_fake_complete(rq);
}
rb_erase_cached(&p->node, &execlists->queue);
@@ -2023,31 +2024,40 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
- cs = gen8_emit_ggtt_write(cs, request->global_seqno,
+ cs = gen8_emit_ggtt_write(cs,
+ request->fence.seqno,
+ i915_timeline_seqno_address(request->timeline));
+
+ cs = gen8_emit_ggtt_write(cs,
+ request->global_seqno,
intel_hws_seqno_address(request->engine));
+
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail);
gen8_emit_wa_tail(request, cs);
}
-static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
+static const int gen8_emit_breadcrumb_sz = 10 + WA_TAIL_DWORDS;
static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
- /* We're using qword write, seqno should be aligned to 8 bytes. */
- BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
-
cs = gen8_emit_ggtt_write_rcs(cs,
- request->global_seqno,
- intel_hws_seqno_address(request->engine),
+ request->fence.seqno,
+ i915_timeline_seqno_address(request->timeline),
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);
+ cs = gen8_emit_ggtt_write_rcs(cs,
+ request->global_seqno,
+ intel_hws_seqno_address(request->engine),
+ PIPE_CONTROL_CS_STALL);
+
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -2056,7 +2066,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
gen8_emit_wa_tail(request, cs);
}
-static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
+static const int gen8_emit_breadcrumb_rcs_sz = 14 + WA_TAIL_DWORDS;
static int gen8_init_rcs_context(struct i915_request *rq)
{
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 24bd6f5b4f57..99d821d6ad3b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -326,6 +326,12 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_CS_STALL);
+ *cs++ = i915_timeline_seqno_address(rq->timeline) |
+ PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
*cs++ = rq->global_seqno;
@@ -335,7 +341,7 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
}
-static const int gen6_rcs_emit_breadcrumb_sz = 14;
+static const int gen6_rcs_emit_breadcrumb_sz = 18;
static int
gen7_render_ring_cs_stall_wa(struct i915_request *rq)
@@ -426,6 +432,13 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL);
+ *cs++ = i915_timeline_seqno_address(rq->timeline);
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = (PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_CS_STALL);
*cs++ = intel_hws_seqno_address(rq->engine);
*cs++ = rq->global_seqno;
@@ -435,27 +448,37 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
}
-static const int gen7_rcs_emit_breadcrumb_sz = 6;
+static const int gen7_rcs_emit_breadcrumb_sz = 10;
static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
- *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
- *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->global_seqno;
+
*cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
}
-static const int gen6_xcs_emit_breadcrumb_sz = 4;
+static const int gen6_xcs_emit_breadcrumb_sz = 8;
#define GEN7_XCS_WA 32
static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
int i;
- *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
- *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->global_seqno;
for (i = 0; i < GEN7_XCS_WA; i++) {
@@ -469,12 +492,11 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
*cs++ = 0;
*cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
}
-static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
+static const int gen7_xcs_emit_breadcrumb_sz = 10 + GEN7_XCS_WA * 3;
#undef GEN7_XCS_WA
static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
@@ -734,7 +756,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
rq = NULL;
spin_lock_irqsave(&tl->lock, flags);
list_for_each_entry(pos, &tl->requests, link) {
- if (!__i915_request_completed(pos, pos->global_seqno)) {
+ if (!i915_request_completed(pos)) {
rq = pos;
break;
}
@@ -876,11 +898,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
list_for_each_entry(request, &engine->timeline.requests, link) {
GEM_BUG_ON(!request->global_seqno);
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
- &request->fence.flags))
- continue;
-
- dma_fence_set_error(&request->fence, -EIO);
+ if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &request->fence.flags))
+ dma_fence_set_error(&request->fence, -EIO);
+ i915_request_fake_complete(request);
}
intel_write_status_page(engine,
@@ -904,27 +925,38 @@ static void i9xx_submit_request(struct i915_request *request)
static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
+ GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+
*cs++ = MI_FLUSH;
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR;
+ *cs++ = rq->fence.seqno;
+
*cs++ = MI_STORE_DWORD_INDEX;
*cs++ = I915_GEM_HWS_INDEX_ADDR;
*cs++ = rq->global_seqno;
*cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
}
-static const int i9xx_emit_breadcrumb_sz = 6;
+static const int i9xx_emit_breadcrumb_sz = 8;
#define GEN5_WA_STORES 8 /* must be at least 1! */
static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
int i;
+ GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+
*cs++ = MI_FLUSH;
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR;
+ *cs++ = rq->fence.seqno;
+
BUILD_BUG_ON(GEN5_WA_STORES < 1);
for (i = 0; i < GEN5_WA_STORES; i++) {
*cs++ = MI_STORE_DWORD_INDEX;
@@ -933,11 +965,12 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
}
*cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
}
-static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
+static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 6;
#undef GEN5_WA_STORES
static void
@@ -1164,6 +1197,10 @@ int intel_ring_pin(struct intel_ring *ring)
GEM_BUG_ON(ring->vaddr);
+ ret = i915_timeline_pin(ring->timeline);
+ if (ret)
+ return ret;
+
flags = PIN_GLOBAL;
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
@@ -1180,28 +1217,32 @@ int intel_ring_pin(struct intel_ring *ring)
else
ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
if (unlikely(ret))
- return ret;
+ goto unpin_timeline;
}
ret = i915_vma_pin(vma, 0, 0, flags);
if (unlikely(ret))
- return ret;
+ goto unpin_timeline;
if (i915_vma_is_map_and_fenceable(vma))
addr = (void __force *)i915_vma_pin_iomap(vma);
else
addr = i915_gem_object_pin_map(vma->obj, map);
- if (IS_ERR(addr))
- goto err;
+ if (IS_ERR(addr)) {
+ ret = PTR_ERR(addr);
+ goto unpin_ring;
+ }
vma->obj->pin_global++;
ring->vaddr = addr;
return 0;
-err:
+unpin_ring:
i915_vma_unpin(vma);
- return PTR_ERR(addr);
+unpin_timeline:
+ i915_timeline_unpin(ring->timeline);
+ return ret;
}
void intel_ring_reset(struct intel_ring *ring, u32 tail)
@@ -1230,6 +1271,8 @@ void intel_ring_unpin(struct intel_ring *ring)
ring->vma->obj->pin_global--;
i915_vma_unpin(ring->vma);
+
+ i915_timeline_unpin(ring->timeline);
}
static struct i915_vma *
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index acd27c7e807b..b4b61056b227 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -79,6 +79,7 @@ static void advance(struct mock_engine *engine,
struct mock_request *request)
{
list_del_init(&request->link);
+ i915_request_fake_complete(&request->base);
mock_seqno_advance(&engine->base, request->base.global_seqno);
}
@@ -253,16 +254,13 @@ void mock_engine_flush(struct intel_engine_cs *engine)
del_timer_sync(&mock->hw_delay);
spin_lock_irq(&mock->hw_lock);
- list_for_each_entry_safe(request, rn, &mock->hw_queue, link) {
- list_del_init(&request->link);
- mock_seqno_advance(&mock->base, request->base.global_seqno);
- }
+ list_for_each_entry_safe(request, rn, &mock->hw_queue, link)
+ advance(mock, request);
spin_unlock_irq(&mock->hw_lock);
}
void mock_engine_reset(struct intel_engine_cs *engine)
{
- intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0);
}
void mock_engine_free(struct intel_engine_cs *engine)
--
2.20.1
More information about the Intel-gfx
mailing list