[Intel-gfx] [PATCH 2/2] drm/i915/execlists: Pack the count into the low bits of the port.request
Chris Wilson
chris at chris-wilson.co.uk
Fri Mar 31 14:10:33 UTC 2017
add/remove: 1/1 grow/shrink: 5/4 up/down: 391/-578 (-187)
function old new delta
execlists_submit_ports 262 471 +209
port_assign.isra - 136 +136
capture 6344 6359 +15
reset_common_ring 438 452 +14
execlists_submit_request 228 238 +10
gen8_init_common_ring 334 341 +7
intel_engine_is_idle 106 105 -1
i915_engine_info 2314 2290 -24
__i915_gem_set_wedged_BKL 485 411 -74
intel_lrc_irq_handler 1789 1604 -185
execlists_update_context 294 - -294
The most important change there is the improve to the
intel_lrc_irq_handler and excclist_submit_ports (net improvement since
execlists_update_context is now inlined).
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_debugfs.c | 32 ++++---
drivers/gpu/drm/i915/i915_gem.c | 6 +-
drivers/gpu/drm/i915/i915_gpu_error.c | 13 ++-
drivers/gpu/drm/i915/i915_guc_submission.c | 18 ++--
drivers/gpu/drm/i915/intel_engine_cs.c | 2 +-
drivers/gpu/drm/i915/intel_lrc.c | 133 ++++++++++++++++-------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 8 +-
7 files changed, 120 insertions(+), 92 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d689e511744e..0273591c43ad 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3317,6 +3317,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
if (i915.enable_execlists) {
u32 ptr, read, write;
struct rb_node *rb;
+ unsigned int idx;
seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
I915_READ(RING_EXECLIST_STATUS_LO(engine)),
@@ -3334,8 +3335,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
if (read > write)
write += GEN8_CSB_ENTRIES;
while (read < write) {
- unsigned int idx = ++read % GEN8_CSB_ENTRIES;
-
+ idx = ++read % GEN8_CSB_ENTRIES;
seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
idx,
I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
@@ -3343,21 +3343,19 @@ static int i915_engine_info(struct seq_file *m, void *unused)
}
rcu_read_lock();
- rq = READ_ONCE(engine->execlist_port[0].request);
- if (rq) {
- seq_printf(m, "\t\tELSP[0] count=%d, ",
- engine->execlist_port[0].count);
- print_request(m, rq, "rq: ");
- } else {
- seq_printf(m, "\t\tELSP[0] idle\n");
- }
- rq = READ_ONCE(engine->execlist_port[1].request);
- if (rq) {
- seq_printf(m, "\t\tELSP[1] count=%d, ",
- engine->execlist_port[1].count);
- print_request(m, rq, "rq: ");
- } else {
- seq_printf(m, "\t\tELSP[1] idle\n");
+ for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) {
+ unsigned int count;
+
+ rq = port_unpack(&engine->execlist_port[idx],
+ count);
+ if (rq) {
+ seq_printf(m, "\t\tELSP[%d] count=%d, ",
+ idx, count);
+ print_request(m, rq, "rq: ");
+ } else {
+ seq_printf(m, "\t\tELSP[%d] idle\n",
+ idx);
+ }
}
rcu_read_unlock();
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 82638895ad20..946050bf4282 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2967,12 +2967,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
*/
if (i915.enable_execlists) {
+ struct execlist_port *port = engine->execlist_port;
unsigned long flags;
+ unsigned int n;
spin_lock_irqsave(&engine->timeline->lock, flags);
- i915_gem_request_put(engine->execlist_port[0].request);
- i915_gem_request_put(engine->execlist_port[1].request);
+ for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+ i915_gem_request_put(port_request(&port[n]));
memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
engine->execlist_queue = RB_ROOT;
engine->execlist_first = NULL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8effc59f5cb5..c86a3bd89468 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1316,12 +1316,17 @@ static void engine_record_requests(struct intel_engine_cs *engine,
static void error_record_engine_execlists(struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
{
+ const struct execlist_port *port = engine->execlist_port;
unsigned int n;
- for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
- if (engine->execlist_port[n].request)
- record_request(engine->execlist_port[n].request,
- &ee->execlist[n]);
+ for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+ struct drm_i915_gem_request *rq = port_request(&port[n]);
+
+ if (!rq)
+ break;
+
+ record_request(rq, &ee->execlist[n]);
+ }
}
static void record_context(struct drm_i915_error_context *e,
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 1642fff9cf13..370373c97b81 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -658,7 +658,7 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
static bool i915_guc_dequeue(struct intel_engine_cs *engine)
{
struct execlist_port *port = engine->execlist_port;
- struct drm_i915_gem_request *last = port[0].request;
+ struct drm_i915_gem_request *last = port[0].request_count;
struct rb_node *rb;
bool submit = false;
@@ -672,7 +672,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
if (port != engine->execlist_port)
break;
- i915_gem_request_assign(&port->request, last);
+ i915_gem_request_assign(&port->request_count, last);
nested_enable_signaling(last);
port++;
}
@@ -688,7 +688,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
submit = true;
}
if (submit) {
- i915_gem_request_assign(&port->request, last);
+ i915_gem_request_assign(&port->request_count, last);
nested_enable_signaling(last);
engine->execlist_first = rb;
}
@@ -705,17 +705,19 @@ static void i915_guc_irq_handler(unsigned long data)
bool submit;
do {
- rq = port[0].request;
+ rq = port[0].request_count;
while (rq && i915_gem_request_completed(rq)) {
trace_i915_gem_request_out(rq);
i915_gem_request_put(rq);
- port[0].request = port[1].request;
- port[1].request = NULL;
- rq = port[0].request;
+
+ port[0].request_count = port[1].request_count;
+ port[1].request_count = NULL;
+
+ rq = port[0].request_count;
}
submit = false;
- if (!port[1].request)
+ if (!port[1].request_count)
submit = i915_guc_dequeue(engine);
} while (submit);
}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 854e8e0c836b..fca7160a29c2 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1096,7 +1096,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
return false;
/* Both ports drained, no more ELSP submission? */
- if (engine->execlist_port[0].request)
+ if (port_request(&engine->execlist_port[0]))
return false;
/* Ring stopped? */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c8f7c631fc1f..fcc74caaf291 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -342,39 +342,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
static void execlists_submit_ports(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = engine->i915;
struct execlist_port *port = engine->execlist_port;
u32 __iomem *elsp =
- dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
- u64 desc[2];
-
- GEM_BUG_ON(port[0].count > 1);
- if (!port[0].count)
- execlists_context_status_change(port[0].request,
- INTEL_CONTEXT_SCHEDULE_IN);
- desc[0] = execlists_update_context(port[0].request);
- GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0]));
- port[0].count++;
-
- if (port[1].request) {
- GEM_BUG_ON(port[1].count);
- execlists_context_status_change(port[1].request,
- INTEL_CONTEXT_SCHEDULE_IN);
- desc[1] = execlists_update_context(port[1].request);
- GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1]));
- port[1].count = 1;
- } else {
- desc[1] = 0;
- }
- GEM_BUG_ON(desc[0] == desc[1]);
-
- /* You must always write both descriptors in the order below. */
- writel(upper_32_bits(desc[1]), elsp);
- writel(lower_32_bits(desc[1]), elsp);
+ engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+ unsigned int n;
+
+ for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
+ struct drm_i915_gem_request *rq;
+ unsigned int count;
+ u64 desc;
+
+ rq = port_unpack(&port[n], count);
+ if (rq) {
+ GEM_BUG_ON(count > !n);
+ if (!count++)
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+ port[n].request_count = port_pack(rq, count);
+ desc = execlists_update_context(rq);
+ GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
+ } else {
+ GEM_BUG_ON(!n);
+ desc = 0;
+ }
- writel(upper_32_bits(desc[0]), elsp);
- /* The context is automatically loaded after the following */
- writel(lower_32_bits(desc[0]), elsp);
+ writel(upper_32_bits(desc), elsp);
+ writel(lower_32_bits(desc), elsp);
+ }
}
static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -395,6 +388,18 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
return true;
}
+static void port_assign(struct execlist_port *port,
+ struct drm_i915_gem_request *rq)
+{
+ GEM_BUG_ON(rq == port_request(port));
+
+ if (port->request_count)
+ i915_gem_request_put(port_request(port));
+
+ port->request_count =
+ port_pack(i915_gem_request_get(rq), port_count(port));
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *last;
@@ -402,7 +407,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct rb_node *rb;
bool submit = false;
- last = port->request;
+ last = port_request(port);
if (last)
/* WaIdleLiteRestore:bdw,skl
* Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
@@ -412,7 +417,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/
last->tail = last->wa_tail;
- GEM_BUG_ON(port[1].request);
+ GEM_BUG_ON(port[1].request_count);
/* Hardware submission is through 2 ports. Conceptually each port
* has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -469,7 +474,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(last->ctx == cursor->ctx);
- i915_gem_request_assign(&port->request, last);
+ if (submit)
+ port_assign(port, last);
port++;
}
@@ -484,7 +490,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
submit = true;
}
if (submit) {
- i915_gem_request_assign(&port->request, last);
+ port_assign(port, last);
engine->execlist_first = rb;
}
spin_unlock_irq(&engine->timeline->lock);
@@ -495,14 +501,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
static bool execlists_elsp_idle(struct intel_engine_cs *engine)
{
- return !engine->execlist_port[0].request;
+ return !port_count(&engine->execlist_port[0]);
}
static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
{
const struct execlist_port *port = engine->execlist_port;
- return port[0].count + port[1].count < 2;
+ return port_count(&port[0]) + port_count(&port[1]) < 2;
}
/*
@@ -543,7 +549,9 @@ static void intel_lrc_irq_handler(unsigned long data)
tail = GEN8_CSB_WRITE_PTR(head);
head = GEN8_CSB_READ_PTR(head);
while (head != tail) {
+ struct drm_i915_gem_request *rq;
unsigned int status;
+ unsigned int count;
if (++head == GEN8_CSB_ENTRIES)
head = 0;
@@ -573,20 +581,24 @@ static void intel_lrc_irq_handler(unsigned long data)
GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
port[0].context_id);
- GEM_BUG_ON(port[0].count == 0);
- if (--port[0].count == 0) {
+ rq = port_unpack(&port[0], count);
+ GEM_BUG_ON(count == 0);
+ if (--count == 0) {
GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
- GEM_BUG_ON(!i915_gem_request_completed(port[0].request));
- execlists_context_status_change(port[0].request,
- INTEL_CONTEXT_SCHEDULE_OUT);
+ GEM_BUG_ON(!i915_gem_request_completed(rq));
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+
+ trace_i915_gem_request_out(rq);
+ i915_gem_request_put(rq);
- trace_i915_gem_request_out(port[0].request);
- i915_gem_request_put(port[0].request);
port[0] = port[1];
memset(&port[1], 0, sizeof(port[1]));
+ } else {
+ port[0].request_count = port_pack(rq, count);
}
- GEM_BUG_ON(port[0].count == 0 &&
+ /* After the final element, the hw should be idle */
+ GEM_BUG_ON(port_count(&port[0]) == 0 &&
!(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
}
@@ -1139,11 +1151,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return ret;
}
-static u32 port_seqno(struct execlist_port *port)
-{
- return port->request ? port->request->global_seqno : 0;
-}
-
static int gen8_init_common_ring(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -1168,12 +1175,22 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
/* After a GPU reset, we may have requests to replay */
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
- DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n",
- engine->name,
- port_seqno(&engine->execlist_port[0]),
- port_seqno(&engine->execlist_port[1]));
- engine->execlist_port[0].count = 0;
- engine->execlist_port[1].count = 0;
+ struct execlist_port *port = engine->execlist_port;
+ unsigned int n;
+
+ for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+ if (!port[n].request_count)
+ break;
+
+ DRM_DEBUG_DRIVER("Restarting %s from 0x%x [%d]\n",
+ engine->name,
+ port_request(&port[n])->global_seqno,
+ n);
+
+ /* Discard the current inflight count */
+ port[n].request_count = port_request(&port[n]);
+ }
+
execlists_submit_ports(engine);
}
@@ -1252,13 +1269,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
intel_ring_update_space(request->ring);
/* Catch up with any missed context-switch interrupts */
- if (request->ctx != port[0].request->ctx) {
- i915_gem_request_put(port[0].request);
+ if (request->ctx != port_request(&port[0])->ctx) {
+ i915_gem_request_put(port_request(&port[0]));
port[0] = port[1];
memset(&port[1], 0, sizeof(port[1]));
}
- GEM_BUG_ON(request->ctx != port[0].request->ctx);
+ GEM_BUG_ON(request->ctx != port_request(&port[0])->ctx);
/* Reset WaIdleLiteRestore:bdw,skl as well */
request->tail =
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a82a0807f64d..51497653c830 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -373,8 +373,12 @@ struct intel_engine_cs {
/* Execlists */
struct tasklet_struct irq_tasklet;
struct execlist_port {
- struct drm_i915_gem_request *request;
- unsigned int count;
+ struct drm_i915_gem_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
GEM_DEBUG_DECL(u32 context_id);
} execlist_port[2];
struct rb_root execlist_queue;
--
2.11.0
More information about the Intel-gfx
mailing list