[PATCH 11/17] out
Chris Wilson
chris at chris-wilson.co.uk
Tue Sep 13 14:22:13 UTC 2016
---
drivers/gpu/drm/i915/i915_gem_request.c | 26 ++---
drivers/gpu/drm/i915/intel_lrc.c | 121 ++++++++---------------
drivers/gpu/drm/i915/intel_ringbuffer.c | 168 +++++++++++---------------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 10 +-
4 files changed, 112 insertions(+), 213 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index c60d2fe827d3..125449307252 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -630,9 +630,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
struct intel_ring *ring = request->ring;
struct intel_timeline *timeline = request->timeline;
struct drm_i915_gem_request *prev;
- u32 request_start;
- u32 reserved_tail;
- int ret;
+ int err;
trace_i915_gem_request_add(request);
@@ -641,8 +639,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
* should already have been reserved in the ring buffer. Let the ring
* know that it is time to use that space up.
*/
- request_start = ring->tail;
- reserved_tail = request->reserved_space;
request->reserved_space = 0;
/*
@@ -653,10 +649,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
* what.
*/
if (flush_caches) {
- ret = engine->emit_flush(request, EMIT_FLUSH);
+ err = engine->emit_flush(request, EMIT_FLUSH);
/* Not allowed to fail! */
- WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
+ WARN(err, "engine->emit_flush() failed: %d!\n", err);
}
/* Record the position of the start of the breadcrumb so that
@@ -664,20 +660,12 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
* GPU processing the request, we never over-estimate the
* position of the ring's HEAD.
*/
+ err = intel_ring_begin(request, engine->emit_request_sz);
+ GEM_BUG_ON(err);
request->postfix = ring->tail;
- /* Not allowed to fail! */
- ret = engine->emit_request(request);
- WARN(ret, "(%s)->emit_request failed: %d!\n", engine->name, ret);
-
- /* Sanity check that the reserved size was large enough. */
- ret = ring->tail - request_start;
- if (ret < 0)
- ret += ring->size;
- WARN_ONCE(ret > reserved_tail,
- "Not enough space reserved (%d bytes) "
- "for adding the request (%d bytes)\n",
- reserved_tail, ret);
+ engine->emit_request(request, request->ring->vaddr + request->postfix);
+ ring->tail += engine->emit_request_sz * sizeof(u32);
/* Seal the request and mark it as pending execution. Note that
* we may inspect this state, without holding any locks, during
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7e944a0acc10..ff081ce4a2aa 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -360,7 +360,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
u32 *reg_state = ce->lrc_reg_state;
- reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
+ reg_state[CTX_RING_TAIL+1] = rq->tail;
/* True 32b PPGTT with dynamic page allocation: update PDP
* registers and point the unallocated PDPs to scratch page.
@@ -594,6 +594,15 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
spin_lock_irqsave(&engine->execlist_lock, flags);
+ /* We keep the previous context alive until we retire the following
+ * request. This ensures that any the context object is still pinned
+ * for any residual writes the HW makes into it on the context switch
+ * into the next object following the breadcrumb. Otherwise, we may
+ * retire the context too early.
+ */
+ request->previous_context = engine->last_context;
+ engine->last_context = request->ctx;
+
list_add_tail(&request->execlist_link, &engine->execlist_queue);
if (execlists_elsp_idle(engine))
tasklet_hi_schedule(&engine->irq_tasklet);
@@ -663,46 +672,6 @@ err_unpin:
return ret;
}
-/*
- * intel_logical_ring_advance() - advance the tail and prepare for submission
- * @request: Request to advance the logical ringbuffer of.
- *
- * The tail is updated in our logical ringbuffer struct, not in the actual context. What
- * really happens during submission is that the context and current tail will be placed
- * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
- * point, the tail *inside* the context is updated and the ELSP written to.
- */
-static int
-intel_logical_ring_advance(struct drm_i915_gem_request *request)
-{
- struct intel_ring *ring = request->ring;
- struct intel_engine_cs *engine = request->engine;
-
- intel_ring_advance(ring);
- request->tail = ring->tail;
-
- /*
- * Here we add two extra NOOPs as padding to avoid
- * lite restore of a context with HEAD==TAIL.
- *
- * Caller must reserve WA_TAIL_DWORDS for us!
- */
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_advance(ring);
- request->wa_tail = ring->tail;
-
- /* We keep the previous context alive until we retire the following
- * request. This ensures that any the context object is still pinned
- * for any residual writes the HW makes into it on the context switch
- * into the next object following the breadcrumb. Otherwise, we may
- * retire the context too early.
- */
- request->previous_context = engine->last_context;
- engine->last_context = request->ctx;
- return 0;
-}
-
static int intel_lr_context_pin(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
@@ -1548,41 +1517,36 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
* restore with HEAD==TAIL (WaIdleLiteRestore).
*/
#define WA_TAIL_DWORDS 2
-
-static int gen8_emit_request(struct drm_i915_gem_request *request)
+static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *out)
{
- struct intel_ring *ring = request->ring;
- int ret;
-
- ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS);
- if (ret)
- return ret;
+ out[0] = MI_NOOP;
+ out[1] = MI_NOOP;
+ request->wa_tail = intel_ring_offset(request->ring,
+ out + WA_TAIL_DWORDS);
+}
+static void gen8_emit_request(struct drm_i915_gem_request *request,
+ u32 *out)
+{
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
- intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
- intel_ring_emit(ring,
- intel_hws_seqno_address(request->engine) |
- MI_FLUSH_DW_USE_GTT);
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, request->global_seqno);
- intel_ring_emit(ring, MI_USER_INTERRUPT);
- intel_ring_emit(ring, MI_NOOP);
- return intel_logical_ring_advance(request);
+ out[0] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+ out[1] = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT;
+ out[2] = 0;
+ out[3] = request->global_seqno;
+ out[4] = MI_USER_INTERRUPT;
+ out[5] = MI_NOOP;
+ request->tail = intel_ring_offset(request->ring, out + 6);
+
+ gen8_emit_wa_tail(request, out + 6);
}
static const int gen8_emit_request_sz = 6 + WA_TAIL_DWORDS;
-static int gen8_emit_request_render(struct drm_i915_gem_request *request)
+static void gen8_emit_request_render(struct drm_i915_gem_request *request,
+ u32 *out)
{
- struct intel_ring *ring = request->ring;
- int ret;
-
- ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS);
- if (ret)
- return ret;
-
/* We're using qword write, seqno should be aligned to 8 bytes. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
@@ -1590,19 +1554,20 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request)
* need a prior CS_STALL, which is emitted by the flush
* following the batch.
*/
- intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
- intel_ring_emit(ring,
- (PIPE_CONTROL_GLOBAL_GTT_IVB |
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_QW_WRITE));
- intel_ring_emit(ring, intel_hws_seqno_address(request->engine));
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, request->global_seqno);
+ out[0] = GFX_OP_PIPE_CONTROL(6);
+ out[1] = (PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE);
+ out[2] = intel_hws_seqno_address(request->engine);
+ out[3] = 0;
+ out[4] = request->global_seqno;
/* We're thrashing one dword of HWS. */
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, MI_USER_INTERRUPT);
- intel_ring_emit(ring, MI_NOOP);
- return intel_logical_ring_advance(request);
+ out[5] = 0;
+ out[6] = MI_USER_INTERRUPT;
+ out[7] = MI_NOOP;
+ request->tail = intel_ring_offset(request->ring, out + 8);
+
+ gen8_emit_wa_tail(request, out + 8);
}
static const int gen8_emit_request_render_sz = 8 + WA_TAIL_DWORDS;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c8c9ad40fd93..83d2a3f72ce1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1271,89 +1271,61 @@ static void render_ring_cleanup(struct intel_engine_cs *engine)
i915_vma_unpin_and_release(&dev_priv->semaphore);
}
-static int gen8_rcs_signal(struct drm_i915_gem_request *req)
+static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out)
{
- struct intel_ring *ring = req->ring;
struct drm_i915_private *dev_priv = req->i915;
struct intel_engine_cs *waiter;
enum intel_engine_id id;
- int ret, num_rings;
-
- num_rings = INTEL_INFO(dev_priv)->num_rings;
- ret = intel_ring_begin(req, (num_rings-1) * 8);
- if (ret)
- return ret;
for_each_engine_id(waiter, dev_priv, id) {
u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
continue;
- intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
- intel_ring_emit(ring,
- PIPE_CONTROL_GLOBAL_GTT_IVB |
- PIPE_CONTROL_QW_WRITE |
- PIPE_CONTROL_CS_STALL);
- intel_ring_emit(ring, lower_32_bits(gtt_offset));
- intel_ring_emit(ring, upper_32_bits(gtt_offset));
- intel_ring_emit(ring, req->global_seqno);
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring,
- MI_SEMAPHORE_SIGNAL |
- MI_SEMAPHORE_TARGET(waiter->hw_id));
- intel_ring_emit(ring, 0);
+ *out++ = GFX_OP_PIPE_CONTROL(6);
+ *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_CS_STALL);
+ *out++ = lower_32_bits(gtt_offset);
+ *out++ = upper_32_bits(gtt_offset);
+ *out++ = req->global_seqno;
+ *out++ = 0;
+ *out++ = (MI_SEMAPHORE_SIGNAL |
+ MI_SEMAPHORE_TARGET(waiter->hw_id));
+ *out++ = 0;
}
- intel_ring_advance(ring);
- return 0;
+ return out;
}
-static int gen8_xcs_signal(struct drm_i915_gem_request *req)
+static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out)
{
- struct intel_ring *ring = req->ring;
struct drm_i915_private *dev_priv = req->i915;
struct intel_engine_cs *waiter;
enum intel_engine_id id;
- int ret, num_rings;
-
- num_rings = INTEL_INFO(dev_priv)->num_rings;
- ret = intel_ring_begin(req, (num_rings-1) * 6);
- if (ret)
- return ret;
for_each_engine_id(waiter, dev_priv, id) {
u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
continue;
- intel_ring_emit(ring,
- (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
- intel_ring_emit(ring,
- lower_32_bits(gtt_offset) |
- MI_FLUSH_DW_USE_GTT);
- intel_ring_emit(ring, upper_32_bits(gtt_offset));
- intel_ring_emit(ring, req->global_seqno);
- intel_ring_emit(ring,
- MI_SEMAPHORE_SIGNAL |
- MI_SEMAPHORE_TARGET(waiter->hw_id));
- intel_ring_emit(ring, 0);
+ *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+ *out++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT;
+ *out++ = upper_32_bits(gtt_offset);
+ *out++ = req->global_seqno;
+ *out++ = (MI_SEMAPHORE_SIGNAL |
+ MI_SEMAPHORE_TARGET(waiter->hw_id));
+ *out++ = 0;
}
- intel_ring_advance(ring);
- return 0;
+ return out;
}
-static int gen6_signal(struct drm_i915_gem_request *req)
+static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out)
{
- struct intel_ring *ring = req->ring;
struct drm_i915_private *dev_priv = req->i915;
struct intel_engine_cs *engine;
- int ret, num_rings;
-
- num_rings = INTEL_INFO(dev_priv)->num_rings;
- ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2));
- if (ret)
- return ret;
+ int num_rings = 0;
for_each_engine(engine, dev_priv) {
i915_reg_t mbox_reg;
@@ -1363,46 +1335,34 @@ static int gen6_signal(struct drm_i915_gem_request *req)
mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id];
if (i915_mmio_reg_valid(mbox_reg)) {
- intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
- intel_ring_emit_reg(ring, mbox_reg);
- intel_ring_emit(ring, req->global_seqno);
+ *out++ = MI_LOAD_REGISTER_IMM(1);
+ *out++ = i915_mmio_reg_offset(mbox_reg);
+ *out++ = req->global_seqno;
+ num_rings++;
}
}
+ if (num_rings & 1)
+ *out++ = MI_NOOP;
- /* If num_dwords was rounded, make sure the tail pointer is correct */
- if (num_rings % 2 == 0)
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_advance(ring);
-
- return 0;
+ return out;
}
static void i9xx_submit_request(struct drm_i915_gem_request *request)
{
struct drm_i915_private *dev_priv = request->i915;
- I915_WRITE_TAIL(request->engine,
- intel_ring_offset(request->ring, request->tail));
+ I915_WRITE_TAIL(request->engine, request->tail);
}
-static int i9xx_emit_request(struct drm_i915_gem_request *req)
+static void i9xx_emit_request(struct drm_i915_gem_request *req,
+ u32 *out)
{
- struct intel_ring *ring = req->ring;
- int ret;
-
- ret = intel_ring_begin(req, 4);
- if (ret)
- return ret;
-
- intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
- intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
- intel_ring_emit(ring, req->global_seqno);
- intel_ring_emit(ring, MI_USER_INTERRUPT);
- intel_ring_advance(ring);
-
- req->tail = ring->tail;
+ out[0] = MI_STORE_DWORD_INDEX;
+ out[1] = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
+ out[2] = req->global_seqno;
+ out[3] = MI_USER_INTERRUPT;
- return 0;
+ req->tail = intel_ring_offset(req->ring, out + 4);
}
static const int i9xx_emit_request_sz = 4;
@@ -1415,49 +1375,33 @@ static const int i9xx_emit_request_sz = 4;
* Update the mailbox registers in the *other* rings with the current seqno.
* This acts like a signal in the canonical semaphore.
*/
-static int gen6_sema_emit_request(struct drm_i915_gem_request *req)
+static void gen6_sema_emit_request(struct drm_i915_gem_request *req,
+ u32 *out)
{
- int ret;
-
- ret = req->engine->semaphore.signal(req);
- if (ret)
- return ret;
-
- return i9xx_emit_request(req);
+ return i9xx_emit_request(req, req->engine->semaphore.signal(req, out));
}
-static int gen8_render_emit_request(struct drm_i915_gem_request *req)
+static void gen8_render_emit_request(struct drm_i915_gem_request *req,
+ u32 *out)
{
struct intel_engine_cs *engine = req->engine;
- struct intel_ring *ring = req->ring;
- int ret;
- if (engine->semaphore.signal) {
- ret = engine->semaphore.signal(req);
- if (ret)
- return ret;
- }
-
- ret = intel_ring_begin(req, 8);
- if (ret)
- return ret;
+ if (engine->semaphore.signal)
+ out = engine->semaphore.signal(req, out);
- intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
- intel_ring_emit(ring, (PIPE_CONTROL_GLOBAL_GTT_IVB |
+ out[0] = GFX_OP_PIPE_CONTROL(6);
+ out[1] = (PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_QW_WRITE));
- intel_ring_emit(ring, intel_hws_seqno_address(engine));
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, req->global_seqno);
+ PIPE_CONTROL_QW_WRITE);
+ out[2] = intel_hws_seqno_address(engine);
+ out[3] = 0;
+ out[4] = req->global_seqno;
/* We're thrashing one dword of HWS. */
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, MI_USER_INTERRUPT);
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_advance(ring);
-
- req->tail = ring->tail;
+ out[5] = 0;
+ out[6] = MI_USER_INTERRUPT;
+ out[7] = MI_NOOP;
- return 0;
+ req->tail = intel_ring_offset(req->ring, out + 8);
}
static const int gen8_render_emit_request_sz = 8;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 47ad19f5441d..a2f9c7d60381 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -225,7 +225,8 @@ struct intel_engine_cs {
#define I915_DISPATCH_SECURE BIT(0)
#define I915_DISPATCH_PINNED BIT(1)
#define I915_DISPATCH_RS BIT(2)
- int (*emit_request)(struct drm_i915_gem_request *req);
+ void (*emit_request)(struct drm_i915_gem_request *req,
+ u32 *out);
int emit_request_sz;
/* Pass the request to the hardware queue (e.g. directly into
@@ -301,7 +302,7 @@ struct intel_engine_cs {
/* AKA wait() */
int (*sync_to)(struct drm_i915_gem_request *req,
struct drm_i915_gem_request *signal);
- int (*signal)(struct drm_i915_gem_request *req);
+ u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out);
} semaphore;
/* Execlists */
@@ -463,10 +464,11 @@ static inline void intel_ring_advance(struct intel_ring *ring)
*/
}
-static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value)
+static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr)
{
/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
- return value & (ring->size - 1);
+ u32 offset = addr - ring->vaddr;
+ return offset & (ring->size - 1);
}
int __intel_ring_space(int head, int tail, int size);
--
2.9.3
More information about the Intel-gfx-trybot
mailing list