[PATCH 74/74] preemption
Chris Wilson
chris at chris-wilson.co.uk
Sun Jul 16 19:01:15 UTC 2017
---
drivers/gpu/drm/i915/intel_lrc.c | 144 +++++++++++++++++++++-----------
drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +-
2 files changed, 97 insertions(+), 49 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6215123b2943..b0410544d684 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -397,9 +397,9 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
return ce->lrc_desc;
}
-static void execlists_submit_ports(struct intel_engine_cs *engine)
+static void execlists_submit_ports(struct intel_engine_cs *engine,
+ struct execlist_port *port)
{
- struct execlist_port *port = engine->execlist_port;
u32 __iomem *elsp =
engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
unsigned int n;
@@ -458,22 +458,12 @@ static void port_assign(struct execlist_port *port,
static void execlists_dequeue(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *last;
- struct execlist_port *port = engine->execlist_port;
+ struct execlist_port *ports = engine->execlist_port;
+ struct execlist_port *port = ports;
+ struct drm_i915_gem_request *last = port_request(port);
struct rb_node *rb;
bool submit = false;
-
- last = port_request(port);
- if (last)
- /* WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
- * as we resubmit the request. See gen8_emit_breadcrumb()
- * for where we prepare the padding after the end of the
- * request.
- */
- last->tail = last->wa_tail;
-
- GEM_BUG_ON(port_isset(&port[1]));
+ bool once = last;
/* Hardware submission is through 2 ports. Conceptually each port
* has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -503,6 +493,48 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
struct drm_i915_gem_request *rq, *rn;
+ if (once) {
+ if (port_count(&port[0]) > 1)
+ goto done;
+
+ if (p->priority > max(last->priotree.priority, 0)) {
+ list_for_each_entry_safe_reverse(rq, rn,
+ &engine->timeline->requests,
+ link) {
+ struct i915_priolist *p;
+
+ if (i915_gem_request_completed(rq))
+ break;
+
+ __i915_gem_request_unsubmit(rq);
+
+ p = lookup_priolist(engine,
+ &rq->priotree,
+ rq->priotree.priority);
+ list_add(&rq->priotree.link,
+ &ptr_mask_bits(p, 1)->requests);
+ }
+
+ ports = engine->execlist_preempt;
+ port = ports;
+ last = NULL;
+ } else {
+ /* WaIdleLiteRestore:bdw,skl
+ * Apply the wa NOOPs to prevent
+ * ring:HEAD == req:TAIL as we resubmit the
+ * request. See gen8_emit_breadcrumb() for
+ * where we prepare the padding after the
+ * end of the request.
+ */
+ last->tail = last->wa_tail;
+ }
+
+ if (port_count(&port[1]))
+ goto done;
+
+ once = false;
+ }
+
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
/*
* Can we combine this request with the current port?
@@ -521,7 +553,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* combine this request with the last, then we
* are done.
*/
- if (port != engine->execlist_port) {
+ if (port != ports) {
__list_del_many(&p->requests,
&rq->priotree.link);
goto done;
@@ -568,14 +600,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
spin_unlock_irq(&engine->timeline->lock);
if (submit)
- execlists_submit_ports(engine);
+ execlists_submit_ports(engine, ports);
}
-static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
+static void switch_to_preempt(struct intel_engine_cs *engine)
{
- const struct execlist_port *port = engine->execlist_port;
-
- return port_count(&port[0]) + port_count(&port[1]) < 2;
+ memcpy(engine->execlist_port,
+ engine->execlist_preempt,
+ sizeof(engine->execlist_preempt));
+ memset(engine->execlist_preempt, 0,
+ sizeof(engine->execlist_preempt));
}
/*
@@ -585,7 +619,7 @@ static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
static void intel_lrc_irq_handler(unsigned long data)
{
struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
- struct execlist_port *port = engine->execlist_port;
+ struct execlist_port * const port = engine->execlist_port;
struct drm_i915_private *dev_priv = engine->i915;
/* We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -674,6 +708,24 @@ static void intel_lrc_irq_handler(unsigned long data)
/* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+ if (status & GEN8_CTX_STATUS_PREEMPTED &&
+ !(status & GEN8_CTX_STATUS_LITE_RESTORE)) {
+ int i;
+
+ GEM_BUG_ON(!port_isset(port));
+ GEM_BUG_ON(!port_isset(engine->execlist_preempt));
+ for (i = 0; i < ARRAY_SIZE(engine->execlist_port); i++) {
+ if (!port_isset(&port[i]))
+ break;
+
+ rq = port_request(&port[i]);
+ i915_gem_request_put(rq);
+ }
+
+ switch_to_preempt(engine);
+ continue;
+ }
+
rq = port_unpack(port, &count);
GEM_BUG_ON(count == 0);
if (--count == 0) {
@@ -691,6 +743,10 @@ static void intel_lrc_irq_handler(unsigned long data)
port_set(port, port_pack(rq, count));
}
+ if (!port_isset(port) &&
+ port_isset(engine->execlist_preempt))
+ switch_to_preempt(engine);
+
/* After the final element, the hw should be idle */
GEM_BUG_ON(port_count(port) == 0 &&
!(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
@@ -703,7 +759,7 @@ static void intel_lrc_irq_handler(unsigned long data)
}
}
- if (execlists_elsp_ready(engine))
+ if (!port_isset(engine->execlist_preempt))
execlists_dequeue(engine);
intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
@@ -716,7 +772,7 @@ static void insert_request(struct intel_engine_cs *engine,
struct i915_priolist *p = lookup_priolist(engine, pt, prio);
list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
- if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine))
+ if (ptr_unmask_bits(p, 1) && !port_isset(engine->execlist_preempt))
tasklet_hi_schedule(&engine->irq_tasklet);
}
@@ -837,8 +893,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
}
spin_unlock_irq(&engine->timeline->lock);
-
- /* XXX Do we need to preempt to make room for us and our deps? */
}
static struct intel_ring *
@@ -1075,17 +1129,11 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
* This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
* to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
*/
-static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
- /* WaDisableCtxRestoreArbitration:bdw,chv */
- *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- *batch++ = MI_BATCH_BUFFER_END;
-
- return batch;
-}
static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
{
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
@@ -1138,13 +1186,6 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
return batch;
}
-static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
- *batch++ = MI_BATCH_BUFFER_END;
-
- return batch;
-}
-
#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
@@ -1199,11 +1240,11 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
switch (INTEL_GEN(engine->i915)) {
case 9:
wa_bb_fn[0] = gen9_init_indirectctx_bb;
- wa_bb_fn[1] = gen9_init_perctx_bb;
+ wa_bb_fn[1] = NULL;
break;
case 8:
wa_bb_fn[0] = gen8_init_indirectctx_bb;
- wa_bb_fn[1] = gen8_init_perctx_bb;
+ wa_bb_fn[1] = NULL;
break;
default:
MISSING_CASE(INTEL_GEN(engine->i915));
@@ -1272,6 +1313,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
engine->csb_head = -1;
+ if (port_isset(engine->execlist_preempt))
+ switch_to_preempt(engine);
+
submit = false;
for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
if (!port_isset(&port[n]))
@@ -1287,7 +1331,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
}
if (submit && !i915.enable_guc_submission)
- execlists_submit_ports(engine);
+ execlists_submit_ports(engine, port);
return 0;
}
@@ -1447,10 +1491,12 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine);
}
- cs = intel_ring_begin(req, 4);
+ cs = intel_ring_begin(req, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
/* FIXME(BDW): Address space and security selectors. */
*cs++ = MI_BATCH_BUFFER_START_GEN8 |
(flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) |
@@ -1458,6 +1504,8 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
*cs++ = MI_NOOP;
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
intel_ring_advance(req, cs);
return 0;
@@ -1601,7 +1649,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
*cs++ = 0;
*cs++ = request->global_seqno;
*cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail);
@@ -1629,7 +1677,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
/* We're thrashing one dword of HWS. */
*cs++ = 0;
*cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5d73e0a406d5..72545eb7a6e4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -389,7 +389,7 @@ struct intel_engine_cs {
#define port_isset(p) ((p)->request_count)
#define port_index(p, e) ((p) - (e)->execlist_port)
GEM_DEBUG_DECL(u32 context_id);
- } execlist_port[2];
+ } execlist_port[2], execlist_preempt[2];
struct rb_root execlist_queue;
struct rb_node *execlist_first;
unsigned int fw_domains;
--
2.13.2
More information about the Intel-gfx-trybot
mailing list