[Intel-gfx] [CI 05/12] drm/i915/execlists: Pack the count into the low bits of the port.request
Chris Wilson
chris at chris-wilson.co.uk
Wed May 17 12:10:00 UTC 2017
add/remove: 1/1 grow/shrink: 5/4 up/down: 391/-578 (-187)
function old new delta
execlists_submit_ports 262 471 +209
port_assign.isra - 136 +136
capture 6344 6359 +15
reset_common_ring 438 452 +14
execlists_submit_request 228 238 +10
gen8_init_common_ring 334 341 +7
intel_engine_is_idle 106 105 -1
i915_engine_info 2314 2290 -24
__i915_gem_set_wedged_BKL 485 411 -74
intel_lrc_irq_handler 1789 1604 -185
execlists_update_context 294 - -294
The most important change there is the improve to the
intel_lrc_irq_handler and excclist_submit_ports (net improvement since
execlists_update_context is now inlined).
v2: Use the port_api() for guc as well (even though currently we do not
pack any counters in there, yet) and hide all port->request_count inside
the helpers.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 32 ++++----
drivers/gpu/drm/i915/i915_gem.c | 6 +-
drivers/gpu/drm/i915/i915_gpu_error.c | 13 +++-
drivers/gpu/drm/i915/i915_guc_submission.c | 34 +++++---
drivers/gpu/drm/i915/intel_engine_cs.c | 2 +-
drivers/gpu/drm/i915/intel_lrc.c | 121 ++++++++++++++++-------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++-
7 files changed, 126 insertions(+), 93 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 76abff186d01..e08ac708e547 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3353,6 +3353,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
if (i915.enable_execlists) {
u32 ptr, read, write;
struct rb_node *rb;
+ unsigned int idx;
seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
I915_READ(RING_EXECLIST_STATUS_LO(engine)),
@@ -3370,8 +3371,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
if (read > write)
write += GEN8_CSB_ENTRIES;
while (read < write) {
- unsigned int idx = ++read % GEN8_CSB_ENTRIES;
-
+ idx = ++read % GEN8_CSB_ENTRIES;
seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
idx,
I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
@@ -3379,21 +3379,19 @@ static int i915_engine_info(struct seq_file *m, void *unused)
}
rcu_read_lock();
- rq = READ_ONCE(engine->execlist_port[0].request);
- if (rq) {
- seq_printf(m, "\t\tELSP[0] count=%d, ",
- engine->execlist_port[0].count);
- print_request(m, rq, "rq: ");
- } else {
- seq_printf(m, "\t\tELSP[0] idle\n");
- }
- rq = READ_ONCE(engine->execlist_port[1].request);
- if (rq) {
- seq_printf(m, "\t\tELSP[1] count=%d, ",
- engine->execlist_port[1].count);
- print_request(m, rq, "rq: ");
- } else {
- seq_printf(m, "\t\tELSP[1] idle\n");
+ for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) {
+ unsigned int count;
+
+ rq = port_unpack(&engine->execlist_port[idx],
+ &count);
+ if (rq) {
+ seq_printf(m, "\t\tELSP[%d] count=%d, ",
+ idx, count);
+ print_request(m, rq, "rq: ");
+ } else {
+ seq_printf(m, "\t\tELSP[%d] idle\n",
+ idx);
+ }
}
rcu_read_unlock();
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ffd20dc71ba9..83518cdb19f3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3019,12 +3019,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
*/
if (i915.enable_execlists) {
+ struct execlist_port *port = engine->execlist_port;
unsigned long flags;
+ unsigned int n;
spin_lock_irqsave(&engine->timeline->lock, flags);
- i915_gem_request_put(engine->execlist_port[0].request);
- i915_gem_request_put(engine->execlist_port[1].request);
+ for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+ i915_gem_request_put(port_request(&port[n]));
memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
engine->execlist_queue = RB_ROOT;
engine->execlist_first = NULL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index ec526d92f908..e18f350bc364 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1324,12 +1324,17 @@ static void engine_record_requests(struct intel_engine_cs *engine,
static void error_record_engine_execlists(struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
{
+ const struct execlist_port *port = engine->execlist_port;
unsigned int n;
- for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
- if (engine->execlist_port[n].request)
- record_request(engine->execlist_port[n].request,
- &ee->execlist[n]);
+ for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+ struct drm_i915_gem_request *rq = port_request(&port[n]);
+
+ if (!rq)
+ break;
+
+ record_request(rq, &ee->execlist[n]);
+ }
}
static void record_context(struct drm_i915_error_context *e,
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 7e85b5ab8ae2..014cbd1a841e 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -653,10 +653,22 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
spin_unlock(&rq->lock);
}
+static void port_assign(struct execlist_port *port,
+ struct drm_i915_gem_request *rq)
+{
+ GEM_BUG_ON(rq == port_request(port));
+
+ if (port_isset(port))
+ i915_gem_request_put(port_request(port));
+
+ port_set(port, i915_gem_request_get(rq));
+ nested_enable_signaling(rq);
+}
+
static bool i915_guc_dequeue(struct intel_engine_cs *engine)
{
struct execlist_port *port = engine->execlist_port;
- struct drm_i915_gem_request *last = port[0].request;
+ struct drm_i915_gem_request *last = port_request(port);
struct rb_node *rb;
bool submit = false;
@@ -670,8 +682,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
if (port != engine->execlist_port)
break;
- i915_gem_request_assign(&port->request, last);
- nested_enable_signaling(last);
+ port_assign(port, last);
port++;
}
@@ -681,13 +692,12 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
rq->priotree.priority = INT_MAX;
i915_guc_submit(rq);
- trace_i915_gem_request_in(rq, port - engine->execlist_port);
+ trace_i915_gem_request_in(rq, port_index(port, engine));
last = rq;
submit = true;
}
if (submit) {
- i915_gem_request_assign(&port->request, last);
- nested_enable_signaling(last);
+ port_assign(port, last);
engine->execlist_first = rb;
}
spin_unlock_irq(&engine->timeline->lock);
@@ -703,17 +713,19 @@ static void i915_guc_irq_handler(unsigned long data)
bool submit;
do {
- rq = port[0].request;
+ rq = port_request(&port[0]);
while (rq && i915_gem_request_completed(rq)) {
trace_i915_gem_request_out(rq);
i915_gem_request_put(rq);
- port[0].request = port[1].request;
- port[1].request = NULL;
- rq = port[0].request;
+
+ port[0] = port[1];
+ memset(&port[1], 0, sizeof(port[1]));
+
+ rq = port_request(&port[0]);
}
submit = false;
- if (!port[1].request)
+ if (!port_count(&port[1]))
submit = i915_guc_dequeue(engine);
} while (submit);
}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 49c23152500e..e312decccaf1 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1233,7 +1233,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
return false;
/* Both ports drained, no more ELSP submission? */
- if (engine->execlist_port[0].request)
+ if (port_request(&engine->execlist_port[0]))
return false;
/* Ring stopped? */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9a1192d61538..53ec0d5713ad 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -337,39 +337,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
static void execlists_submit_ports(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = engine->i915;
struct execlist_port *port = engine->execlist_port;
u32 __iomem *elsp =
- dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
- u64 desc[2];
-
- GEM_BUG_ON(port[0].count > 1);
- if (!port[0].count)
- execlists_context_status_change(port[0].request,
- INTEL_CONTEXT_SCHEDULE_IN);
- desc[0] = execlists_update_context(port[0].request);
- GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0]));
- port[0].count++;
-
- if (port[1].request) {
- GEM_BUG_ON(port[1].count);
- execlists_context_status_change(port[1].request,
- INTEL_CONTEXT_SCHEDULE_IN);
- desc[1] = execlists_update_context(port[1].request);
- GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1]));
- port[1].count = 1;
- } else {
- desc[1] = 0;
- }
- GEM_BUG_ON(desc[0] == desc[1]);
+ engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+ unsigned int n;
- /* You must always write both descriptors in the order below. */
- writel(upper_32_bits(desc[1]), elsp);
- writel(lower_32_bits(desc[1]), elsp);
+ for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
+ struct drm_i915_gem_request *rq;
+ unsigned int count;
+ u64 desc;
+
+ rq = port_unpack(&port[n], &count);
+ if (rq) {
+ GEM_BUG_ON(count > !n);
+ if (!count++)
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+ port_set(&port[n], port_pack(rq, count));
+ desc = execlists_update_context(rq);
+ GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
+ } else {
+ GEM_BUG_ON(!n);
+ desc = 0;
+ }
- writel(upper_32_bits(desc[0]), elsp);
- /* The context is automatically loaded after the following */
- writel(lower_32_bits(desc[0]), elsp);
+ writel(upper_32_bits(desc), elsp);
+ writel(lower_32_bits(desc), elsp);
+ }
}
static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -390,6 +383,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
return true;
}
+static void port_assign(struct execlist_port *port,
+ struct drm_i915_gem_request *rq)
+{
+ GEM_BUG_ON(rq == port_request(port));
+
+ if (port_isset(port))
+ i915_gem_request_put(port_request(port));
+
+ port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *last;
@@ -397,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct rb_node *rb;
bool submit = false;
- last = port->request;
+ last = port_request(port);
if (last)
/* WaIdleLiteRestore:bdw,skl
* Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
@@ -407,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/
last->tail = last->wa_tail;
- GEM_BUG_ON(port[1].request);
+ GEM_BUG_ON(port_isset(&port[1]));
/* Hardware submission is through 2 ports. Conceptually each port
* has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -464,7 +468,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(last->ctx == cursor->ctx);
- i915_gem_request_assign(&port->request, last);
+ if (submit)
+ port_assign(port, last);
port++;
}
@@ -474,12 +479,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
cursor->priotree.priority = INT_MAX;
__i915_gem_request_submit(cursor);
- trace_i915_gem_request_in(cursor, port - engine->execlist_port);
+ trace_i915_gem_request_in(cursor, port_index(port, engine));
last = cursor;
submit = true;
}
if (submit) {
- i915_gem_request_assign(&port->request, last);
+ port_assign(port, last);
engine->execlist_first = rb;
}
spin_unlock_irq(&engine->timeline->lock);
@@ -488,16 +493,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
execlists_submit_ports(engine);
}
-static bool execlists_elsp_idle(struct intel_engine_cs *engine)
-{
- return !engine->execlist_port[0].request;
-}
-
static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
{
const struct execlist_port *port = engine->execlist_port;
- return port[0].count + port[1].count < 2;
+ return port_count(&port[0]) + port_count(&port[1]) < 2;
}
/*
@@ -547,7 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data)
tail = GEN8_CSB_WRITE_PTR(head);
head = GEN8_CSB_READ_PTR(head);
while (head != tail) {
+ struct drm_i915_gem_request *rq;
unsigned int status;
+ unsigned int count;
if (++head == GEN8_CSB_ENTRIES)
head = 0;
@@ -575,22 +577,26 @@ static void intel_lrc_irq_handler(unsigned long data)
/* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
- port[0].context_id);
+ port->context_id);
- GEM_BUG_ON(port[0].count == 0);
- if (--port[0].count == 0) {
+ rq = port_unpack(port, &count);
+ GEM_BUG_ON(count == 0);
+ if (--count == 0) {
GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
- GEM_BUG_ON(!i915_gem_request_completed(port[0].request));
- execlists_context_status_change(port[0].request,
- INTEL_CONTEXT_SCHEDULE_OUT);
+ GEM_BUG_ON(!i915_gem_request_completed(rq));
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+
+ trace_i915_gem_request_out(rq);
+ i915_gem_request_put(rq);
- trace_i915_gem_request_out(port[0].request);
- i915_gem_request_put(port[0].request);
port[0] = port[1];
memset(&port[1], 0, sizeof(port[1]));
+ } else {
+ port_set(port, port_pack(rq, count));
}
- GEM_BUG_ON(port[0].count == 0 &&
+ /* After the final element, the hw should be idle */
+ GEM_BUG_ON(port_count(port) == 0 &&
!(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
}
@@ -1147,6 +1153,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
struct drm_i915_private *dev_priv = engine->i915;
struct execlist_port *port = engine->execlist_port;
unsigned int n;
+ bool submit;
int ret;
ret = intel_mocs_init_engine(engine);
@@ -1168,19 +1175,21 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
/* After a GPU reset, we may have requests to replay */
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+ submit = false;
for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
- if (!port[n].request)
+ if (!port_isset(&port[n]))
break;
DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
engine->name, n,
- port[n].request->global_seqno);
+ port_request(&port[n])->global_seqno);
/* Discard the current inflight count */
- port[n].count = 0;
+ port_set(&port[n], port_request(&port[n]));
+ submit = true;
}
- if (!i915.enable_guc_submission && !execlists_elsp_idle(engine))
+ if (submit && !i915.enable_guc_submission)
execlists_submit_ports(engine);
return 0;
@@ -1258,13 +1267,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
intel_ring_update_space(request->ring);
/* Catch up with any missed context-switch interrupts */
- if (request->ctx != port[0].request->ctx) {
- i915_gem_request_put(port[0].request);
+ if (request->ctx != port_request(port)->ctx) {
+ i915_gem_request_put(port_request(port));
port[0] = port[1];
memset(&port[1], 0, sizeof(port[1]));
}
- GEM_BUG_ON(request->ctx != port[0].request->ctx);
+ GEM_BUG_ON(request->ctx != port_request(port)->ctx);
/* Reset WaIdleLiteRestore:bdw,skl as well */
request->tail =
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ec16fb6fde62..162f0a9c6abe 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -368,8 +368,15 @@ struct intel_engine_cs {
/* Execlists */
struct tasklet_struct irq_tasklet;
struct execlist_port {
- struct drm_i915_gem_request *request;
- unsigned int count;
+ struct drm_i915_gem_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
+#define port_set(p, packed) ((p)->request_count = (packed))
+#define port_isset(p) ((p)->request_count)
+#define port_index(p, e) ((p) - (e)->execlist_port)
GEM_DEBUG_DECL(u32 context_id);
} execlist_port[2];
struct rb_root execlist_queue;
--
2.11.0
More information about the Intel-gfx
mailing list