[PATCH 44/44] wip-execlists
Chris Wilson
chris at chris-wilson.co.uk
Wed Mar 6 23:45:04 UTC 2019
fixme: guc
fixme: stats
---
drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +-
drivers/gpu/drm/i915/i915_gpu_error.c | 13 +-
drivers/gpu/drm/i915/i915_scheduler.c | 15 +-
drivers/gpu/drm/i915/intel_context_types.h | 2 +
drivers/gpu/drm/i915/intel_engine_cs.c | 50 ++-
drivers/gpu/drm/i915/intel_engine_types.h | 54 +--
drivers/gpu/drm/i915/intel_guc_submission.c | 146 +++----
drivers/gpu/drm/i915/intel_lrc.c | 401 +++++---------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 54 +--
9 files changed, 202 insertions(+), 535 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index dac50cf67468..a095ad4d0f17 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -607,7 +607,7 @@ static void init_contexts(struct drm_i915_private *i915)
static bool needs_preempt_context(struct drm_i915_private *i915)
{
- return HAS_LOGICAL_RING_PREEMPTION(i915);
+ return USES_GUC_SUBMISSION(i915);
}
int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 7c7f69114f4d..02ac700ec413 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1304,16 +1304,11 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
{
const struct intel_engine_execlists * const execlists = &engine->execlists;
- unsigned int n;
+ struct i915_request **port = execlists->active;
+ unsigned int n = 0;
- for (n = 0; n < execlists_num_ports(execlists); n++) {
- struct i915_request *rq = port_request(&execlists->port[n]);
-
- if (!rq)
- break;
-
- record_request(rq, &ee->execlist[n]);
- }
+ while (*port)
+ record_request(*port++, &ee->execlist[n++]);
ee->num_ports = n;
}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index bb9819dbe313..f43df0ab22b9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -268,18 +268,6 @@ sched_lock_engine(const struct i915_sched_node *node,
return locked;
}
-static bool inflight(const struct i915_request *rq,
- const struct intel_engine_cs *engine)
-{
- const struct i915_request *active;
-
- if (!i915_request_is_active(rq))
- return false;
-
- active = port_request(engine->execlists.port);
- return active->hw_context == rq->hw_context;
-}
-
static void __i915_schedule(struct i915_request *rq,
const struct i915_sched_attr *attr)
{
@@ -410,7 +398,8 @@ static void __i915_schedule(struct i915_request *rq,
* If we are already the currently executing context, don't
* bother evaluating if we should preempt ourselves.
*/
- if (inflight(node_to_request(node), engine))
+ if (execlists_inflight(&engine->execlists,
+ node_to_request(node)))
continue;
/* Defer (tasklet) submission until after all of our updates. */
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 6dc9b4b9067b..3fcdd0d8e1b1 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -38,7 +38,9 @@ struct intel_sseu {
struct intel_context {
struct i915_gem_context *gem_context;
struct intel_engine_cs *engine;
+
struct intel_engine_cs *active;
+ unsigned int active_count;
struct list_head active_link;
struct list_head signal_link;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 4eb0e879b76a..2f2e876eda15 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -461,6 +461,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
execlists->port_mask = 1;
GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
+ execlists->active = &execlists->inflight[0];
execlists->queue_priority_hint = INT_MIN;
execlists->queue = RB_ROOT_CACHED;
@@ -1039,7 +1040,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
return true;
/* Waiting to drain ELSP? */
- if (READ_ONCE(engine->execlists.active)) {
+ if (execlists_active(&engine->execlists)) {
struct tasklet_struct *t = &engine->execlists.tasklet;
local_bh_disable();
@@ -1054,7 +1055,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
/* Otherwise flush the tasklet if it was on another cpu */
tasklet_unlock_wait(t);
- if (READ_ONCE(engine->execlists.active))
+ if (execlists_active(&engine->execlists))
return false;
}
@@ -1379,6 +1380,7 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
}
if (HAS_EXECLISTS(dev_priv)) {
+ struct i915_request **port;
const u32 *hws =
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
unsigned int idx;
@@ -1413,27 +1415,21 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)));
}
+ idx = 0;
+ port = execlists->active;
rcu_read_lock();
- for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
- struct i915_request *rq;
- unsigned int count;
-
- rq = port_unpack(&execlists->port[idx], &count);
- if (rq) {
- char hdr[80];
-
- snprintf(hdr, sizeof(hdr),
- "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
- idx, count,
- i915_ggtt_offset(rq->ring->vma),
- rq->timeline->hwsp_offset,
- hwsp_seqno(rq));
- print_request(m, rq, hdr);
- } else {
- drm_printf(m, "\t\tELSP[%d] idle\n", idx);
- }
+ while (*port) {
+ struct i915_request *rq = *port++;
+ char hdr[80];
+
+ snprintf(hdr, sizeof(hdr),
+ "\t\tELSP[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
+ idx++,
+ i915_ggtt_offset(rq->ring->vma),
+ rq->timeline->hwsp_offset,
+ hwsp_seqno(rq));
+ print_request(m, rq, hdr);
}
- drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
rcu_read_unlock();
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
@@ -1608,16 +1604,18 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
}
if (engine->stats.enabled++ == 0) {
- const struct execlist_port *port = execlists->port;
- unsigned int num_ports = execlists_num_ports(execlists);
+ struct i915_request **port;
engine->stats.enabled_at = ktime_get();
/* XXX submission method oblivious? */
- while (num_ports-- && port_isset(port)) {
+ port = execlists->active;
+ while (*port++)
+ engine->stats.active++;
+
+ port = execlists->pending;
+ while (*port++)
engine->stats.active++;
- port++;
- }
if (engine->stats.active)
engine->stats.start = engine->stats.enabled_at;
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 525e1f1c4479..b55826f6d616 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -148,51 +148,10 @@ struct intel_engine_execlists {
*/
u32 __iomem *ctrl_reg;
- /**
- * @port: execlist port states
- *
- * For each hardware ELSP (ExecList Submission Port) we keep
- * track of the last request and the number of times we submitted
- * that port to hw. We then count the number of times the hw reports
- * a context completion or preemption. As only one context can
- * be active on hw, we limit resubmission of context to port[0]. This
- * is called Lite Restore, of the context.
- */
- struct execlist_port {
- /**
- * @request_count: combined request and submission count
- */
- struct i915_request *request_count;
-#define EXECLIST_COUNT_BITS 2
-#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
-#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
-#define port_set(p, packed) ((p)->request_count = (packed))
-#define port_isset(p) ((p)->request_count)
-#define port_index(p, execlists) ((p) - (execlists)->port)
-
- /**
- * @context_id: context ID for port
- */
- GEM_DEBUG_DECL(u32 context_id);
-
#define EXECLIST_MAX_PORTS 2
- } port[EXECLIST_MAX_PORTS];
-
- /**
- * @active: is the HW active? We consider the HW as active after
- * submitting any context for execution and until we have seen the
- * last context completion event. After that, we do not expect any
- * more events until we submit, and so can park the HW.
- *
- * As we have a small number of different sources from which we feed
- * the HW, we track the state of each inside a single bitfield.
- */
- unsigned int active;
-#define EXECLISTS_ACTIVE_USER 0
-#define EXECLISTS_ACTIVE_PREEMPT 1
-#define EXECLISTS_ACTIVE_HWACK 2
+ struct i915_request **active;
+ struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */];
+ struct i915_request *pending[EXECLIST_MAX_PORTS + 1];
/**
* @port_mask: number of execlist ports - 1
@@ -233,16 +192,13 @@ struct intel_engine_execlists {
*/
u32 *csb_status;
- /**
- * @preempt_complete_status: expected CSB upon completing preemption
- */
- u32 preempt_complete_status;
-
/**
* @csb_head: context status buffer head
*/
u8 csb_head;
+ bool guc_preempt;
+
I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
};
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index aec2b73cde2c..8be6fcf9ed9c 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -533,15 +533,11 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
- spin_lock(&client->wq_lock);
-
guc_wq_item_append(client, engine->guc_id, ctx_desc,
ring_tail, rq->fence.seqno);
guc_ring_doorbell(client);
client->submissions[engine->id] += 1;
-
- spin_unlock(&client->wq_lock);
}
/*
@@ -626,8 +622,7 @@ static void inject_preempt_context(struct work_struct *work)
data[6] = intel_guc_ggtt_offset(guc, guc->shared_data);
if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) {
- execlists_clear_active(&engine->execlists,
- EXECLISTS_ACTIVE_PREEMPT);
+ engine->execlists.guc_preempt = false;
tasklet_schedule(&engine->execlists.tasklet);
}
@@ -666,7 +661,7 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
+ GEM_BUG_ON(!execlists->guc_preempt);
if (inject_preempt_hang(execlists))
return;
@@ -676,42 +671,27 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
wait_for_guc_preempt_report(engine);
intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0);
+
+ execlists->guc_preempt = false;
}
-/**
- * guc_submit() - Submit commands through GuC
- * @engine: engine associated with the commands
- *
- * The only error here arises if the doorbell hardware isn't functioning
- * as expected, which really shouln't happen.
- */
-static void guc_submit(struct intel_engine_cs *engine)
+static void guc_submit(struct intel_engine_cs *engine,
+ struct i915_request **out,
+ struct i915_request **last)
{
struct intel_guc *guc = &engine->i915->guc;
- struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
- unsigned int n;
-
- for (n = 0; n < execlists_num_ports(execlists); n++) {
- struct i915_request *rq;
- unsigned int count;
+ struct intel_guc_client *client = guc->execbuf_client;
- rq = port_unpack(&port[n], &count);
- if (rq && count == 0) {
- port_set(&port[n], port_pack(rq, ++count));
+ spin_lock(&client->wq_lock);
- flush_ggtt_writes(rq->ring->vma);
+ do {
+ struct i915_request *rq = *out++;
- guc_add_request(guc, rq);
- }
- }
-}
+ flush_ggtt_writes(rq->ring->vma);
+ guc_add_request(guc, rq);
+ } while (out != last);
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
-{
- GEM_BUG_ON(port_isset(port));
-
- port_set(port, i915_request_get(rq));
+ spin_unlock(&client->wq_lock);
}
static inline int rq_prio(const struct i915_request *rq)
@@ -719,65 +699,58 @@ static inline int rq_prio(const struct i915_request *rq)
return rq->sched.attr.priority;
}
-static inline int port_prio(const struct execlist_port *port)
-{
- return rq_prio(port_request(port)) | __NO_PREEMPTION;
-}
-
-static bool __guc_dequeue(struct intel_engine_cs *engine)
+static void __guc_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
- struct i915_request *last = NULL;
- const struct execlist_port * const last_port =
- &execlists->port[execlists->port_mask];
+ struct i915_request **first = execlists->inflight;
+ struct i915_request ** const end = first + execlists->port_mask;
+ struct i915_request *cur = first[0];
+ struct i915_request **port;
bool submit = false;
struct rb_node *rb;
lockdep_assert_held(&engine->timeline.lock);
- if (port_isset(port)) {
+ rb = rb_first_cached(&execlists->queue);
+
+ if (cur) {
if (intel_engine_has_preemption(engine)) {
struct guc_preempt_work *preempt_work =
&engine->i915->guc.preempt_work[engine->id];
int prio = execlists->queue_priority_hint;
- if (__execlists_need_preempt(prio, port_prio(port))) {
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT);
+ if (__execlists_need_preempt(prio, rq_prio(cur))) {
+ execlists->guc_preempt = true;
queue_work(engine->i915->guc.preempt_wq,
&preempt_work->work);
- return false;
+ return;
}
}
- port++;
- if (port_isset(port))
- return false;
+ if (*++first)
+ return;
}
- GEM_BUG_ON(port_isset(port));
- while ((rb = rb_first_cached(&execlists->queue))) {
+ port = first;
+ while (rb) {
struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- if (last && rq->hw_context != last->hw_context) {
- if (port == last_port)
+ if (cur && rq->hw_context != cur->hw_context) {
+ if (port == end)
goto done;
- if (submit)
- port_assign(port, last);
- port++;
+ *port++ = cur;
}
list_del_init(&rq->sched.link);
__i915_request_submit(rq);
- trace_i915_request_in(rq, port_index(port, execlists));
+ trace_i915_request_in(rq, port - execlists->inflight);
- last = rq;
+ cur = rq;
submit = true;
}
@@ -787,58 +760,43 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
done:
execlists->queue_priority_hint =
rb ? to_priolist(rb)->priority : INT_MIN;
- if (submit)
- port_assign(port, last);
- if (last)
- execlists_user_begin(execlists, execlists->port);
-
- /* We must always keep the beast fed if we have work piled up */
- GEM_BUG_ON(port_isset(execlists->port) &&
- !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
- GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
- !port_isset(execlists->port));
-
- return submit;
-}
-
-static void guc_dequeue(struct intel_engine_cs *engine)
-{
- if (__guc_dequeue(engine))
- guc_submit(engine);
+ if (submit) {
+ *port = i915_request_get(cur);
+ guc_submit(engine, first, port);
+ }
+ execlists->active = execlists->inflight;
}
static void guc_submission_tasklet(unsigned long data)
{
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
struct i915_request *rq;
unsigned long flags;
spin_lock_irqsave(&engine->timeline.lock, flags);
- rq = port_request(port);
+ rq = execlists->inflight[0];
while (rq && i915_request_completed(rq)) {
trace_i915_request_out(rq);
i915_request_put(rq);
- port = execlists_port_complete(execlists, port);
- if (port_isset(port)) {
- execlists_user_begin(execlists, port);
- rq = port_request(port);
- } else {
- execlists_user_end(execlists);
- rq = NULL;
- }
+ memmove(execlists->inflight,
+ execlists->inflight + 1,
+ execlists->port_mask * sizeof(*execlists->inflight));
+ memset(execlists->inflight + execlists->port_mask,
+ 0, sizeof(*execlists->inflight));
+
+ rq = execlists->inflight[0];
}
- if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
+ if (execlists->guc_preempt &&
intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
GUC_PREEMPT_FINISHED)
complete_preempt_context(engine);
- if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
- guc_dequeue(engine);
+ if (!execlists->guc_preempt)
+ __guc_dequeue(engine);
spin_unlock_irqrestore(&engine->timeline.lock, flags);
}
@@ -1310,7 +1268,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
* and it is guaranteed that it will remove the work item from the
* queue before our request is completed.
*/
- BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.port) *
+ BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) *
sizeof(struct guc_wq_item) *
I915_NUM_ENGINES > GUC_WQ_SIZE);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 10793dfac7fb..90b6ddcce979 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -345,7 +345,7 @@ assert_priority_queue(const struct i915_request *prev,
* Even with preemption, there are times when we think it is better not
* to preempt and leave an ostensibly lower priority request in flight.
*/
- if (port_request(execlists->port) == prev)
+ if (execlists_active(execlists) == prev)
return true;
return rq_prio(prev) >= rq_prio(next);
@@ -444,8 +444,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
__i915_request_unsubmit(rq);
unwind_wa_tail(rq);
- GEM_BUG_ON(rq->hw_context->active);
-
owner = rq->hw_context->engine;
if (likely(owner == engine)) {
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
@@ -517,36 +515,37 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
status, rq);
}
-inline void
-execlists_user_begin(struct intel_engine_execlists *execlists,
- const struct execlist_port *port)
+static inline struct i915_request *
+execlists_schedule_in(struct i915_request *rq, int idx)
{
- execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
-}
+ trace_i915_request_in(rq, idx);
-inline void
-execlists_user_end(struct intel_engine_execlists *execlists)
-{
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
-}
-
-static inline void
-execlists_context_schedule_in(struct i915_request *rq)
-{
- GEM_BUG_ON(rq->hw_context->active);
+ if (!rq->hw_context->active_count++) {
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+ intel_engine_context_in(rq->engine);
+ rq->hw_context->active = rq->engine;
+ }
- execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
- intel_engine_context_in(rq->engine);
- rq->hw_context->active = rq->engine;
+ GEM_BUG_ON(rq->hw_context->active != rq->engine);
+ return i915_request_get(rq);
}
static inline void
-execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
+execlists_schedule_out(struct i915_request *rq)
{
- rq->hw_context->active = NULL;
- intel_engine_context_out(rq->engine);
- execlists_context_status_change(rq, status);
trace_i915_request_out(rq);
+
+ GEM_BUG_ON(!rq->hw_context->active_count);
+ if (!--rq->hw_context->active_count) {
+ rq->hw_context->active = NULL;
+ intel_engine_context_out(rq->engine);
+ execlists_context_status_change(rq,
+ i915_request_completed(rq) ?
+ INTEL_CONTEXT_SCHEDULE_OUT :
+ INTEL_CONTEXT_SCHEDULE_PREEMPTED);
+ }
+
+ i915_request_put(rq);
}
static u64 execlists_update_context(struct i915_request *rq)
@@ -589,7 +588,6 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
static void execlists_submit_ports(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
unsigned int n;
/*
@@ -609,22 +607,15 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
* of elsq entries, keep this in mind before changing the loop below.
*/
for (n = execlists_num_ports(execlists); n--; ) {
- struct i915_request *rq;
- unsigned int count;
+ struct i915_request *rq = execlists->pending[n];
u64 desc;
- rq = port_unpack(&port[n], &count);
if (rq) {
- GEM_BUG_ON(count > !n);
- if (!count++)
- execlists_context_schedule_in(rq);
- port_set(&port[n], port_pack(rq, count));
desc = execlists_update_context(rq);
- GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
- GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
+ GEM_TRACE("%s in[%d]: ctx=%d, fence %llx:%lld (current %d), prio=%d\n",
engine->name, n,
- port[n].context_id, count,
+ upper_32_bits(desc),
rq->fence.context, rq->fence.seqno,
hwsp_seqno(rq),
rq_prio(rq));
@@ -639,8 +630,6 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
/* we need to manually load the submit queue */
if (execlists->ctrl_reg)
writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
}
static bool ctx_single_port_submission(const struct intel_context *ce)
@@ -666,64 +655,15 @@ static bool can_merge_rq(const struct i915_request *prev,
{
GEM_BUG_ON(!assert_priority_queue(prev, next));
+ if (i915_request_completed(prev))
+ return true;
+
if (!can_merge_ctx(prev->hw_context, next->hw_context))
return false;
return true;
}
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
-{
- GEM_BUG_ON(rq == port_request(port));
-
- if (port_isset(port))
- i915_request_put(port_request(port));
-
- port_set(port, port_pack(i915_request_get(rq), port_count(port)));
-}
-
-static void inject_preempt_context(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce = engine->preempt_context;
- unsigned int n;
-
- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
-
- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
- GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
-
- write_desc(execlists, ce->lrc_desc, n);
-
- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
- execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
-
- (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
-}
-
-static void complete_preempt_context(struct intel_engine_execlists *execlists)
-{
- GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
-
- if (inject_preempt_hang(execlists))
- return;
-
- execlists_cancel_port_requests(execlists);
- __unwind_incomplete_requests(container_of(execlists,
- struct intel_engine_cs,
- execlists));
-}
-
static void virtual_update_register_offsets(u32 *regs,
struct intel_engine_cs *engine)
{
@@ -771,10 +711,10 @@ static void virtual_update_register_offsets(u32 *regs,
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
- const struct execlist_port * const last_port =
- &execlists->port[execlists->port_mask];
- struct i915_request *last = port_request(port);
+ struct i915_request **port = &execlists->pending[0];
+ struct i915_request ** const last_port =
+ &execlists->pending[execlists->port_mask];
+ struct i915_request *last = execlists_active(execlists);
struct rb_node *rb;
bool submit = false;
@@ -824,64 +764,33 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last) {
/*
- * Don't resubmit or switch until all outstanding
- * preemptions (lite-restore) are seen. Then we
- * know the next preemption status we see corresponds
- * to this ELSP update.
- */
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_USER));
- GEM_BUG_ON(!port_count(&port[0]));
-
- /*
- * If we write to ELSP a second time before the HW has had
- * a chance to respond to the previous write, we can confuse
- * the HW and hit "undefined behaviour". After writing to ELSP,
- * we must then wait until we see a context-switch event from
- * the HW to indicate that it has had a chance to respond.
+ * If the queue is higher priority than the last
+ * request in the currently active context, submit afresh.
+ * We will resubmit again afterwards in case we need to split
+ * the active context to interject the preemption request,
+ * i.e. we will retrigger preemption following the ack in case
+ * of trouble.
*/
- if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
- return;
-
if (need_preempt(engine, last, rb)) {
- inject_preempt_context(engine);
- return;
+ last = __unwind_incomplete_requests(engine);
+ if (last && need_preempt(engine, last, rb))
+ last = NULL;
+ } else {
+ /*
+ * Otherwise if we already have a request pending
+ * for execution after the current one, we can
+ * just wait until the next CS event before
+ * queuing more. In either case we will force a
+ * lite-restore preemption event, but if we wait
+ * we hopefully coalesce several updates into a single
+ * submission.
+ */
+ if (execlists->active[1])
+ return;
}
-
- /*
- * In theory, we could coalesce more requests onto
- * the second port (the first port is active, with
- * no preemptions pending). However, that means we
- * then have to deal with the possible lite-restore
- * of the second port (as we submit the ELSP, there
- * may be a context-switch) but also we may complete
- * the resubmission before the context-switch. Ergo,
- * coalescing onto the second port will cause a
- * preemption event, but we cannot predict whether
- * that will affect port[0] or port[1].
- *
- * If the second port is already active, we can wait
- * until the next context-switch before contemplating
- * new requests. The GPU will be busy and we should be
- * able to resubmit the new ELSP before it idles,
- * avoiding pipeline bubbles (momentary pauses where
- * the driver is unable to keep up the supply of new
- * work). However, we have to double check that the
- * priorities of the ports haven't been switch.
- */
- if (port_count(&port[1]))
- return;
-
- /*
- * WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_fini_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
}
+ if (last)
+ last->tail = last->wa_tail;
while (rb) { /* XXX virtual is always taking precedence */
struct virtual_engine *ve =
@@ -940,7 +849,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
__i915_request_submit(rq);
- trace_i915_request_in(rq, port_index(port, execlists));
submit = true;
last = rq;
}
@@ -967,6 +875,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* hardware about the first.
*/
if (last && !can_merge_rq(last, rq)) {
+ int idx;
+
/*
* If we are on the second port and cannot
* combine this request with the last, then we
@@ -995,20 +905,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
goto done;
- if (submit)
- port_assign(port, last);
- port++;
-
- GEM_BUG_ON(port_isset(port));
+ idx = port - execlists->pending;
+ *port++ = execlists_schedule_in(last, idx);
}
list_del_init(&rq->sched.link);
-
__i915_request_submit(rq);
- trace_i915_request_in(rq, port_index(port, execlists));
-
- last = rq;
submit = true;
+ last = rq;
}
rb_erase_cached(&p->node, &execlists->queue);
@@ -1033,54 +937,29 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* interrupt for secondary ports).
*/
execlists->queue_priority_hint = queue_prio(execlists);
+ GEM_TRACE("%s: queue_priority_hint:%d, submit:%d\n",
+ __func__, execlists->queue_priority_hint, submit);
if (submit) {
- port_assign(port, last);
+ *port = execlists_schedule_in(last, port - execlists->pending);
+ *++port = NULL;
execlists_submit_ports(engine);
}
-
- /* We must always keep the beast fed if we have work piled up */
- GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
- !port_isset(execlists->port));
-
- /* Re-evaluate the executing context setup after each preemptive kick */
- if (last)
- execlists_user_begin(execlists, execlists->port);
-
- /* If the engine is now idle, so should be the flag; and vice versa. */
- GEM_BUG_ON(execlists_is_active(&engine->execlists,
- EXECLISTS_ACTIVE_USER) ==
- !port_isset(engine->execlists.port));
}
void
execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
{
- struct execlist_port *port = execlists->port;
- unsigned int num_ports = execlists_num_ports(execlists);
-
- while (num_ports-- && port_isset(port)) {
- struct i915_request *rq = port_request(port);
-
- GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
- rq->engine->name,
- (unsigned int)(port - execlists->port),
- rq->fence.context, rq->fence.seqno,
- hwsp_seqno(rq));
+ struct i915_request **port, *rq;
- GEM_BUG_ON(!execlists->active);
- execlists_context_schedule_out(rq,
- i915_request_completed(rq) ?
- INTEL_CONTEXT_SCHEDULE_OUT :
- INTEL_CONTEXT_SCHEDULE_PREEMPTED);
-
- i915_request_put(rq);
-
- memset(port, 0, sizeof(*port));
- port++;
- }
+ for (port = execlists->pending; (rq = *port); port++)
+ execlists_schedule_out(rq);
+ memset(execlists->pending, 0, sizeof(execlists->pending));
- execlists_clear_all_active(execlists);
+ for (port = execlists->active; (rq = *port); port++)
+ execlists_schedule_out(rq);
+ execlists->active =
+ memset(execlists->inflight, 0, sizeof(execlists->inflight));
}
static inline void
@@ -1142,7 +1021,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
/* Cancel the requests on the HW and clear the ELSP tracker. */
execlists_cancel_port_requests(execlists);
- execlists_user_end(execlists);
/* Mark all executing requests as skipped. */
list_for_each_entry(rq, &engine->timeline.requests, link) {
@@ -1172,7 +1050,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
execlists->queue_priority_hint = INT_MIN;
execlists->queue = RB_ROOT_CACHED;
- GEM_BUG_ON(port_isset(execlists->port));
GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
execlists->tasklet.func = nop_submission_tasklet;
@@ -1189,7 +1066,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
static void process_csb(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
const u32 * const buf = execlists->csb_status;
u8 head, tail;
@@ -1222,9 +1098,7 @@ static void process_csb(struct intel_engine_cs *engine)
rmb();
do {
- struct i915_request *rq;
unsigned int status;
- unsigned int count;
if (++head == GEN8_CSB_ENTRIES)
head = 0;
@@ -1247,68 +1121,31 @@ static void process_csb(struct intel_engine_cs *engine)
* status notifier.
*/
- GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+ GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
engine->name, head,
- buf[2 * head + 0], buf[2 * head + 1],
- execlists->active);
+ buf[2 * head + 0], buf[2 * head + 1]);
status = buf[2 * head];
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
-
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
-
- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
-
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
- GEM_TRACE("%s preempt-idle\n", engine->name);
- complete_preempt_context(execlists);
- continue;
- }
-
- if (status & GEN8_CTX_STATUS_PREEMPTED &&
- execlists_is_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT))
- continue;
-
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_USER));
-
- rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
- engine->name,
- port->context_id, count,
- rq ? rq->fence.context : 0,
- rq ? rq->fence.seqno : 0,
- rq ? hwsp_seqno(rq) : 0,
- rq ? rq_prio(rq) : 0);
-
- /* Check the context/desc id for this event matches */
- GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
-
- GEM_BUG_ON(count == 0);
- if (--count == 0) {
- /*
- * On the final event corresponding to the
- * submission of this context, we expect either
- * an element-switch event or a completion
- * event (and on completion, the active-idle
- * marker). No more preemptions, lite-restore
- * or otherwise.
- */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
- GEM_BUG_ON(port_isset(&port[1]) &&
- !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
- GEM_BUG_ON(!port_isset(&port[1]) &&
- !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+ if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) {
+promote:
+ GEM_BUG_ON(!execlists->pending[0]);
+ execlists->active =
+ memcpy(execlists->inflight,
+ execlists->pending,
+ execlists_num_ports(execlists) *
+ sizeof(*execlists->pending));
+ execlists->pending[0] = NULL;
+ } else if (status & GEN8_CTX_STATUS_PREEMPTED) {
+ struct i915_request **port = execlists->active;
+
+ while (*port)
+ execlists_schedule_out(*port++);
+
+ goto promote;
+ } else if (*execlists->active) {
+ struct i915_request *rq =
+ fetch_and_zero(execlists->active);
+ execlists->active++;
/*
* We rely on the hardware being strongly
@@ -1317,21 +1154,7 @@ static void process_csb(struct intel_engine_cs *engine)
* user interrupt and CSB is processed.
*/
GEM_BUG_ON(!i915_request_completed(rq));
-
- execlists_context_schedule_out(rq,
- INTEL_CONTEXT_SCHEDULE_OUT);
- i915_request_put(rq);
-
- GEM_TRACE("%s completed ctx=%d\n",
- engine->name, port->context_id);
-
- port = execlists_port_complete(execlists, port);
- if (port_isset(port))
- execlists_user_begin(execlists, port);
- else
- execlists_user_end(execlists);
- } else {
- port_set(port, port_pack(rq, count));
+ execlists_schedule_out(rq);
}
} while (head != tail);
@@ -1356,7 +1179,7 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
lockdep_assert_held(&engine->timeline.lock);
process_csb(engine);
- if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+ if (!engine->execlists.pending[0])
execlists_dequeue(engine);
}
@@ -1369,10 +1192,7 @@ static void execlists_submission_tasklet(unsigned long data)
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
unsigned long flags;
- GEM_TRACE("%s awake?=%d, active=%x\n",
- engine->name,
- !!engine->i915->gt.awake,
- engine->execlists.active);
+ GEM_TRACE("%s awake?=%d\n", engine->name, !!engine->i915->gt.awake);
spin_lock_irqsave(&engine->timeline.lock, flags);
__execlists_submission_tasklet(engine);
@@ -1399,14 +1219,6 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
tasklet_hi_schedule(&execlists->tasklet);
}
-static bool inflight(const struct intel_engine_execlists *execlists,
- const struct i915_request *rq)
-{
- const struct i915_request *active = port_request(execlists->port);
-
- return active && active->hw_context == rq->hw_context;
-}
-
static void submit_queue(struct intel_engine_cs *engine,
const struct i915_request *rq)
{
@@ -1417,7 +1229,7 @@ static void submit_queue(struct intel_engine_cs *engine,
execlists->queue_priority_hint = rq_prio(rq);
- if (!inflight(execlists, rq))
+ if (!execlists_inflight(execlists, rq))
__submit_queue_imm(engine);
}
@@ -2528,7 +2340,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->preempt_context)
+ if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
}
@@ -2629,11 +2441,6 @@ static int logical_ring_init(struct intel_engine_cs *engine)
i915_mmio_reg_offset(RING_ELSP(engine));
}
- execlists->preempt_complete_status = ~0u;
- if (engine->preempt_context)
- execlists->preempt_complete_status =
- upper_32_bits(engine->preempt_context->lrc_desc);
-
execlists->csb_status =
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
@@ -2641,6 +2448,7 @@ static int logical_ring_init(struct intel_engine_cs *engine)
&engine->status_page.addr[intel_hws_csb_write_index(i915)];
reset_csb_pointers(execlists);
+ execlists->active = execlists->inflight;
return 0;
}
@@ -3005,11 +2813,6 @@ populate_lr_context(struct intel_context *ce,
if (!engine->default_state)
regs[CTX_CONTEXT_CONTROL + 1] |=
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
- if (ce->gem_context == engine->i915->preempt_context &&
- INTEL_GEN(engine->i915) < 11)
- regs[CTX_CONTEXT_CONTROL + 1] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
err_unpin_ctx:
i915_gem_object_unpin_map(ctx_obj);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e612bdca9fd9..ca47601188a9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -103,44 +103,25 @@ static inline bool __execlists_need_preempt(int prio, int last)
return prio > max(I915_PRIORITY_NORMAL - 1, last);
}
-static inline void
-execlists_set_active(struct intel_engine_execlists *execlists,
- unsigned int bit)
+static inline struct i915_request *
+execlists_active(const struct intel_engine_execlists *execlists)
{
- __set_bit(bit, (unsigned long *)&execlists->active);
+ return READ_ONCE(*execlists->active);
}
static inline bool
-execlists_set_active_once(struct intel_engine_execlists *execlists,
- unsigned int bit)
+execlists_inflight(const struct intel_engine_execlists *execlists,
+ const struct i915_request *rq)
{
- return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
-}
+ const struct i915_request *active;
-static inline void
-execlists_clear_active(struct intel_engine_execlists *execlists,
- unsigned int bit)
-{
- __clear_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline void
-execlists_clear_all_active(struct intel_engine_execlists *execlists)
-{
- execlists->active = 0;
-}
+ active = execlists_active(execlists);
+ if (!active)
+ active = execlists->pending[0];
-static inline bool
-execlists_is_active(const struct intel_engine_execlists *execlists,
- unsigned int bit)
-{
- return test_bit(bit, (unsigned long *)&execlists->active);
+ return active && active->hw_context == rq->hw_context;
}
-void execlists_user_begin(struct intel_engine_execlists *execlists,
- const struct execlist_port *port);
-void execlists_user_end(struct intel_engine_execlists *execlists);
-
void
execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
@@ -153,21 +134,6 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
return execlists->port_mask + 1;
}
-static inline struct execlist_port *
-execlists_port_complete(struct intel_engine_execlists * const execlists,
- struct execlist_port * const port)
-{
- const unsigned int m = execlists->port_mask;
-
- GEM_BUG_ON(port_index(port, execlists) != 0);
- GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
-
- memmove(port, port + 1, m * sizeof(struct execlist_port));
- memset(port + m, 0, sizeof(struct execlist_port));
-
- return port;
-}
-
static inline u32
intel_read_status_page(const struct intel_engine_cs *engine, int reg)
{
--
2.20.1
More information about the Intel-gfx-trybot
mailing list