[PATCH 5/5] drm/i915: Keep ring tight to avoid garbage over resume
Chris Wilson
chris at chris-wilson.co.uk
Sat Apr 6 21:44:30 UTC 2019
Avoid the requirement to unwind after resume by updating the ring->tail
on retirement.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/gt/intel_engine.h | 18 +--------
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 -
drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 -
drivers/gpu/drm/i915/gt/intel_lrc.c | 29 +-------------
drivers/gpu/drm/i915/gt/intel_lrc.h | 1 -
drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 39 +++++++------------
drivers/gpu/drm/i915/i915_debugfs.c | 4 +-
drivers/gpu/drm/i915/i915_drv.h | 1 -
drivers/gpu/drm/i915/i915_gem.c | 14 +------
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +-
drivers/gpu/drm/i915/i915_request.c | 16 ++------
drivers/gpu/drm/i915/intel_guc_submission.c | 8 ++--
drivers/gpu/drm/i915/selftests/i915_request.c | 2 +-
13 files changed, 31 insertions(+), 106 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index e58d6f04177b..0e23bcf2ac9f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -268,8 +268,6 @@ static inline void intel_ring_put(struct intel_ring *ring)
void intel_engine_stop(struct intel_engine_cs *engine);
void intel_engine_cleanup(struct intel_engine_cs *engine);
-void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
-
int __must_check intel_ring_cacheline_align(struct i915_request *rq);
u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
@@ -284,7 +282,7 @@ static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
* reserved for the command packet (i.e. the value passed to
* intel_ring_begin()).
*/
- GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
+ GEM_BUG_ON((rq->ring->vaddr + rq->ring->tail) != cs);
}
static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
@@ -338,20 +336,6 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
#undef cacheline
}
-static inline unsigned int
-intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
-{
- /* Whilst writes to the tail are strictly order, there is no
- * serialisation between readers and the writers. The tail may be
- * read by i915_request_retire() just as it is being updated
- * by execlists, as although the breadcrumb is complete, the context
- * switch hasn't been seen.
- */
- assert_ring_tail_valid(ring, tail);
- ring->tail = tail;
- return tail;
-}
-
static inline unsigned int
__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index c7ae7d8c1806..1891a666ad09 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1537,8 +1537,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
rq->ring->head);
drm_printf(m, "\t\tring->tail: 0x%08x\n",
rq->ring->tail);
- drm_printf(m, "\t\tring->emit: 0x%08x\n",
- rq->ring->emit);
drm_printf(m, "\t\tring->space: 0x%08x\n",
rq->ring->space);
drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 51faf6d40ba5..d7d2f1b06bfd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -69,7 +69,6 @@ struct intel_ring {
u32 head;
u32 tail;
- u32 emit;
u32 space;
u32 size;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 85bdae1db76c..5b237f7ffd82 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -487,8 +487,7 @@ static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->hw_context;
- ce->lrc_reg_state[CTX_RING_TAIL + 1] =
- intel_ring_set_tail(rq->ring, rq->tail);
+ ce->lrc_reg_state[CTX_RING_TAIL + 1] = rq->tail;
/*
* Make sure the context image is complete before we submit it to HW.
@@ -1381,6 +1380,7 @@ static int execlists_context_pin(struct intel_context *ce)
static const struct intel_context_ops execlists_context_ops = {
.pin = execlists_context_pin,
.unpin = execlists_context_unpin,
+
.destroy = execlists_context_destroy,
};
@@ -2762,31 +2762,6 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
return ret;
}
-void intel_lr_context_resume(struct drm_i915_private *i915)
-{
- struct i915_gem_context *ctx;
- struct intel_context *ce;
-
- /*
- * Because we emit WA_TAIL_DWORDS there may be a disparity
- * between our bookkeeping in ce->ring->head and ce->ring->tail and
- * that stored in context. As we only write new commands from
- * ce->ring->tail onwards, everything before that is junk. If the GPU
- * starts reading from its RING_HEAD from the context, it may try to
- * execute that junk and die.
- *
- * So to avoid that we reset the context images upon resume. For
- * simplicity, we just zero everything out.
- */
- list_for_each_entry(ctx, &i915->contexts.list, link) {
- list_for_each_entry(ce, &ctx->active_engines, active_link) {
- GEM_BUG_ON(!ce->ring);
- intel_ring_reset(ce->ring, 0);
- __execlists_update_reg_state(ce, ce->engine);
- }
- }
-}
-
void intel_execlists_show_requests(struct intel_engine_cs *engine,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index b3274e707423..a598f2d56de3 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -100,7 +100,6 @@ struct drm_printer;
struct drm_i915_private;
-void intel_lr_context_resume(struct drm_i915_private *dev_priv);
void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
void intel_execlists_show_requests(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index be83cb8167f3..e624d5fd3426 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -46,7 +46,7 @@ unsigned int intel_ring_update_space(struct intel_ring *ring)
{
unsigned int space;
- space = __intel_ring_space(ring->head, ring->emit, ring->size);
+ space = __intel_ring_space(ring->head, ring->tail, ring->size);
ring->space = space;
return space;
@@ -902,8 +902,7 @@ static void i9xx_submit_request(struct i915_request *request)
{
i915_request_submit(request);
- ENGINE_WRITE(request->engine, RING_TAIL,
- intel_ring_set_tail(request->ring, request->tail));
+ ENGINE_WRITE(request->engine, RING_TAIL, request->tail);
}
static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
@@ -1216,7 +1215,6 @@ void intel_ring_reset(struct intel_ring *ring, u32 tail)
ring->tail = tail;
ring->head = tail;
- ring->emit = tail;
intel_ring_update_space(ring);
}
@@ -1510,6 +1508,7 @@ static int ring_context_pin(struct intel_context *ce)
static const struct intel_context_ops ring_context_ops = {
.pin = ring_context_pin,
.unpin = ring_context_unpin,
+
.destroy = ring_context_destroy,
};
@@ -1580,16 +1579,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
kfree(engine);
}
-void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- /* Restart from the beginning of the rings for convenience */
- for_each_engine(engine, dev_priv, id)
- intel_ring_reset(engine->buffer, 0);
-}
-
static int load_pd_dir(struct i915_request *rq,
const struct i915_hw_ppgtt *ppgtt)
{
@@ -1917,7 +1906,7 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
list_for_each_entry(target, &ring->request_list, ring_link) {
/* Would completion of this request free enough space? */
if (bytes <= __intel_ring_space(target->postfix,
- ring->emit, ring->size))
+ ring->tail, ring->size))
break;
}
@@ -1940,7 +1929,7 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
{
struct intel_ring *ring = rq->ring;
- const unsigned int remain_usable = ring->effective_size - ring->emit;
+ const unsigned int remain_usable = ring->effective_size - ring->tail;
const unsigned int bytes = num_dwords * sizeof(u32);
unsigned int need_wrap = 0;
unsigned int total_bytes;
@@ -1953,7 +1942,7 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
GEM_BUG_ON(total_bytes > ring->effective_size);
if (unlikely(total_bytes > remain_usable)) {
- const int remain_actual = ring->size - ring->emit;
+ const int remain_actual = ring->size - ring->tail;
if (bytes > remain_usable) {
/*
@@ -1996,20 +1985,20 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
if (unlikely(need_wrap)) {
need_wrap &= ~1;
GEM_BUG_ON(need_wrap > ring->space);
- GEM_BUG_ON(ring->emit + need_wrap > ring->size);
+ GEM_BUG_ON(ring->tail + need_wrap > ring->size);
GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
/* Fill the tail with MI_NOOP */
- memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
+ memset64(ring->vaddr + ring->tail, 0, need_wrap / sizeof(u64));
ring->space -= need_wrap;
- ring->emit = 0;
+ ring->tail = 0;
}
- GEM_BUG_ON(ring->emit > ring->size - bytes);
+ GEM_BUG_ON(ring->tail > ring->size - bytes);
GEM_BUG_ON(ring->space < bytes);
- cs = ring->vaddr + ring->emit;
+ cs = ring->vaddr + ring->tail;
GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
- ring->emit += bytes;
+ ring->tail += bytes;
ring->space -= bytes;
return cs;
@@ -2021,7 +2010,7 @@ int intel_ring_cacheline_align(struct i915_request *rq)
int num_dwords;
void *cs;
- num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
+ num_dwords = (rq->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(u32);
if (num_dwords == 0)
return 0;
@@ -2035,7 +2024,7 @@ int intel_ring_cacheline_align(struct i915_request *rq)
memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
intel_ring_advance(rq, cs);
- GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
+ GEM_BUG_ON(rq->ring->tail & (CACHELINE_BYTES - 1));
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index abf63f6ddde8..28fe6a9cb89b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1869,8 +1869,8 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
{
- seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, emit: %u)",
- ring->space, ring->head, ring->tail, ring->emit);
+ seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u)",
+ ring->space, ring->head, ring->tail);
}
static int i915_context_status(struct seq_file *m, void *unused)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 68b536500a04..53b3339ba861 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2003,7 +2003,6 @@ struct drm_i915_private {
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
struct {
- void (*resume)(struct drm_i915_private *);
void (*cleanup_engine)(struct intel_engine_cs *engine);
struct i915_gt_timelines {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1b1721152c58..b55d55ca325d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4508,13 +4508,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
i915_gem_restore_gtt_mappings(i915);
i915_gem_restore_fences(i915);
- /*
- * As we didn't flush the kernel context before suspend, we cannot
- * guarantee that the context image is complete. So let's just reset
- * it and start again.
- */
- i915->gt.resume(i915);
-
if (i915_gem_init_hw(i915))
goto err_wedged;
@@ -4853,13 +4846,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
- if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
- dev_priv->gt.resume = intel_lr_context_resume;
+ if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
- } else {
- dev_priv->gt.resume = intel_legacy_submission_resume;
+ else
dev_priv->gt.cleanup_engine = intel_engine_cleanup;
- }
i915_timelines_init(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3d672c9edb94..0f7141ddffa4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -773,7 +773,7 @@ static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
*/
list_for_each_entry(rq, &ring->request_list, ring_link) {
if (__intel_ring_space(rq->postfix,
- ring->emit, ring->size) > ring->size / 2)
+ ring->tail, ring->size) > ring->size / 2)
break;
}
if (&rq->ring_link == &ring->request_list)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 339e065da554..74135217063a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -158,15 +158,7 @@ static void advance_ring(struct i915_request *request)
*/
GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
if (list_is_last(&request->ring_link, &ring->request_list)) {
- /*
- * We may race here with execlists resubmitting this request
- * as we retire it. The resubmission will move the ring->tail
- * forwards (to request->wa_tail). We either read the
- * current value that was written to hw, or the value that
- * is just about to be. Either works, if we miss the last two
- * noops - they are safe to be replayed on a reset.
- */
- tail = READ_ONCE(request->tail);
+ tail = ring->tail;
list_del(&ring->active_link);
} else {
tail = request->postfix;
@@ -729,7 +721,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
* GPU processing the request, we never over-estimate the
* position of the head.
*/
- rq->head = rq->ring->emit;
+ rq->head = rq->ring->tail;
ret = engine->request_alloc(rq);
if (ret)
@@ -738,7 +730,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
/* Keep a second pin for the dual retirement along engine and ring */
__intel_context_pin(ce);
- rq->infix = rq->ring->emit; /* end of header; start of user payload */
+ rq->infix = rq->ring->tail; /* end of header; start of user payload */
/* Check that we didn't interrupt ourselves with a new request */
lockdep_assert_held(&rq->timeline->mutex);
@@ -748,7 +740,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
return rq;
err_unwind:
- ce->ring->emit = rq->head;
+ ce->ring->tail = rq->head;
/* Make sure we didn't add ourselves to external state before freeing */
GEM_BUG_ON(!list_empty(&rq->active_list));
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 1ed568852395..397622b65497 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -531,7 +531,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
struct intel_guc_client *client = guc->execbuf_client;
struct intel_engine_cs *engine = rq->engine;
u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
- u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
+ u32 ring_tail = rq->tail / sizeof(u64);
spin_lock(&client->wq_lock);
@@ -571,7 +571,7 @@ static void inject_preempt_context(struct work_struct *work)
struct intel_context *ce = intel_context_lookup(client->owner, engine);
u32 data[7];
- if (!ce->ring->emit) { /* recreate upon load/resume */
+ if (!ce->ring->tail) { /* recreate upon load/resume */
u32 addr = intel_hws_preempt_done_address(engine);
u32 *cs;
@@ -592,8 +592,8 @@ static void inject_preempt_context(struct work_struct *work)
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
- ce->ring->emit = GUC_PREEMPT_BREADCRUMB_BYTES;
- GEM_BUG_ON((void *)cs - ce->ring->vaddr != ce->ring->emit);
+ ce->ring->tail = GUC_PREEMPT_BREADCRUMB_BYTES;
+ GEM_BUG_ON((void *)cs - ce->ring->vaddr != ce->ring->tail);
flush_ggtt_writes(ce->ring->vma);
}
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index e6ffe2240126..7ba916f0f6ed 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -1084,7 +1084,7 @@ max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
ret = rq->ring->size - rq->reserved_space;
i915_request_add(rq);
- sz = rq->ring->emit - rq->head;
+ sz = rq->ring->tail - rq->head;
if (sz < 0)
sz += rq->ring->size;
ret /= sz;
--
2.20.1
More information about the Intel-gfx-trybot
mailing list