[PATCH 77/77] virtual
Chris Wilson
chris at chris-wilson.co.uk
Sat Apr 28 14:04:41 UTC 2018
---
drivers/gpu/drm/i915/i915_gem_context.c | 63 +++-
drivers/gpu/drm/i915/i915_gem_context.h | 1 +
drivers/gpu/drm/i915/i915_request.c | 3 +-
drivers/gpu/drm/i915/intel_engine_cs.c | 2 +-
drivers/gpu/drm/i915/intel_lrc.c | 410 ++++++++++++++++++++-
drivers/gpu/drm/i915/intel_lrc.h | 6 +
drivers/gpu/drm/i915/intel_ringbuffer.h | 9 +
drivers/gpu/drm/i915/selftests/intel_lrc.c | 198 ++++++++++
include/uapi/drm/i915_drm.h | 22 ++
9 files changed, 701 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5efbc45fb22d..f687a56584b7 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -92,6 +92,7 @@
#include "i915_drv.h"
#include "i915_trace.h"
#include "intel_gt_pm.h"
+#include "intel_lrc.h"
#include "intel_workarounds.h"
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -135,7 +136,10 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
ce->ops->destroy(ce);
}
- kfree(ctx->engines);
+ if (ctx->engines) {
+ intel_virtual_engine_put(ctx->engines[0]);
+ kfree(ctx->engines);
+ }
if (ctx->timeline)
i915_timeline_put(ctx->timeline);
@@ -839,6 +843,54 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
return ret;
}
+static int check_user_mbz64(u64 __user *user)
+{
+ u64 mbz;
+
+ if (get_user(mbz, user))
+ return -EFAULT;
+
+ return mbz ? -EINVAL : 0;
+}
+
+static int set_engines__load_balance(struct i915_gem_context *ctx,
+ struct intel_engine_cs **engines,
+ unsigned int nengine,
+ struct i915_user_extension __user *base)
+{
+ struct i915_context_engines_load_balance __user *ext =
+ container_of(base, typeof(*ext), base);
+ struct intel_engine_cs *ve;
+ unsigned int n;
+ int err;
+
+ if (engines[0])
+ return -EEXIST;
+
+ if (!HAS_EXECLISTS(ctx->i915))
+ return -ENODEV;
+
+ if (!ctx->timeline)
+ return -EINVAL;
+
+ err = check_user_mbz64(&ext->flags);
+ if (err)
+ return err;
+
+ for (n = 0; n < ARRAY_SIZE(ext->mbz); n++) {
+ err = check_user_mbz64(&ext->mbz[n]);
+ if (err)
+ return err;
+ }
+
+ ve = intel_execlists_create_virtual(ctx, engines + 1, nengine);
+ if (IS_ERR(ve))
+ return PTR_ERR(ve);
+
+ engines[0] = ve;
+ return 0;
+}
+
static int set_engines__extensions(struct i915_gem_context *ctx,
struct intel_engine_cs **engines,
unsigned int nengine,
@@ -855,6 +907,9 @@ static int set_engines__extensions(struct i915_gem_context *ctx,
err = -EINVAL;
switch (x) {
+ case I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE:
+ err = set_engines__load_balance(ctx, engines, nengine, ext);
+ break;
default:
break;
}
@@ -924,12 +979,16 @@ static int set_engines(struct i915_gem_context *ctx,
err = set_engines__extensions(ctx, engines, nengine,
u64_to_user_ptr(extensions));
if (err) {
+ intel_virtual_engine_put(engines[0]);
kfree(engines);
return err;
}
out:
- kfree(ctx->engines);
+ if (ctx->engines) {
+ intel_virtual_engine_put(ctx->engines[0]);
+ kfree(ctx->engines);
+ }
ctx->engines = engines;
ctx->nengine = nengine + 1;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 78d451f9d44d..4dca7745fe98 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -172,6 +172,7 @@ struct i915_gem_context {
/** engine: per-engine logical HW state */
struct intel_context {
struct i915_gem_context *gem_context;
+ struct intel_engine_cs *active;
struct i915_vma *state;
struct intel_ring *ring;
u32 *lrc_reg_state;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 4239d043f46d..1b98257e4ae7 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -353,6 +353,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
intel_engine_get_seqno(engine));
GEM_BUG_ON(!i915_request_completed(rq));
+ GEM_BUG_ON(intel_engine_is_virtual(engine));
local_irq_disable();
@@ -1097,7 +1098,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches)
prev = i915_gem_active_raw(&timeline->last_request,
&request->i915->drm.struct_mutex);
if (prev && !i915_request_completed(prev)) {
- if (prev->engine == engine)
+ if (prev->engine == engine && !intel_engine_is_virtual(engine))
i915_sw_fence_await_sw_fence(&request->submit,
&prev->submit,
&request->submitq);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e0ed87a1c372..53b97d82df3c 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -564,7 +564,7 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
return ret;
}
-static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
+void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
{
i915_vma_unpin_and_release(&engine->scratch);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6acadd227891..38c62d85a5d0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -164,6 +164,26 @@
#define WA_TAIL_DWORDS 2
#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
+struct virtual_engine {
+ struct intel_engine_cs base;
+
+ struct intel_context context;
+ struct kref kref;
+
+ struct intel_engine_cs *bound;
+
+ struct i915_request *request;
+ struct rb_node node[I915_NUM_ENGINES];
+
+ unsigned int count;
+ struct intel_engine_cs *siblings[0];
+};
+
+static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
+{
+ return container_of(engine, struct virtual_engine, base);
+}
+
static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
struct intel_engine_cs *engine,
struct intel_context *ce);
@@ -476,8 +496,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
if (rq) {
GEM_BUG_ON(!rq->hw_context->pin_count);
GEM_BUG_ON(count > !n);
- if (!count++)
+ if (!count++) {
+ GEM_BUG_ON(rq->hw_context->active);
execlists_context_schedule_in(rq);
+ rq->hw_context->active = engine;
+ }
port_set(&port[n], port_pack(rq, count));
desc = execlists_update_context(rq);
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
@@ -676,6 +699,50 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
}
+static void virtual_update_register_offsets(u32 *regs,
+ struct intel_engine_cs *engine)
+{
+ u32 base = engine->mmio_base;
+
+ regs[CTX_CONTEXT_CONTROL] =
+ i915_mmio_reg_offset(RING_CONTEXT_CONTROL(engine));
+ regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
+ regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
+ regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
+ regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
+
+ regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
+ regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
+ regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
+ regs[CTX_SECOND_BB_HEAD_U] =
+ i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
+ regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
+ regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
+
+ regs[CTX_CTX_TIMESTAMP] =
+ i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
+ regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 3));
+ regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 3));
+ regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 2));
+ regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 2));
+ regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 1));
+ regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 1));
+ regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
+ regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
+
+ if (engine->class == RENDER_CLASS) {
+ regs[CTX_RCS_INDIRECT_CTX] =
+ i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
+ regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
+ i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
+ regs[CTX_BB_PER_CTX_PTR] =
+ i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
+
+ regs[CTX_R_PWR_CLK_STATE] =
+ i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+ }
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -686,6 +753,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct rb_node *rb;
unsigned long flags;
bool submit = false;
+ int prio;
/* Hardware submission is through 2 ports. Conceptually each port
* has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -710,6 +778,29 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
spin_lock_irqsave(&engine->timeline.lock, flags);
+ prio = execlists->queue_priority;
+ for (rb = rb_first_cached(&execlists->virtual); rb; ) {
+ struct virtual_engine *ve =
+ rb_entry(rb, typeof(*ve), node[engine->id]);
+ struct intel_engine_cs *active;
+
+ if (!ve->request) {
+ rb_erase_cached(rb, &execlists->virtual);
+ RB_CLEAR_NODE(rb);
+ rb = rb_first_cached(&execlists->virtual);
+ continue;
+ }
+
+ active = READ_ONCE(ve->context.active);
+ if (active && active != engine) {
+ rb = rb_next(rb);
+ continue;
+ }
+
+ prio = max(prio, rq_prio(ve->request));
+ break;
+ }
+
if (last) {
/*
* Don't resubmit or switch until all outstanding
@@ -733,7 +824,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
goto unlock;
- if (need_preempt(engine, last, execlists->queue_priority)) {
+ if (need_preempt(engine, last, prio)) {
inject_preempt_context(engine);
goto unlock;
}
@@ -773,6 +864,64 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last->tail = last->wa_tail;
}
+ if (rb) { /* XXX virtual is always taking precedence */
+ struct virtual_engine *ve =
+ rb_entry(rb, typeof(*ve), node[engine->id]);
+ struct i915_request *rq;
+
+ spin_lock(&ve->base.timeline.lock);
+
+ rq = ve->request;
+ if (!rq)
+ goto remove_virtual;
+
+ if (rq_prio(rq) >= prio) {
+ if (last && !can_merge_rq(rq, last)) {
+ spin_unlock(&ve->base.timeline.lock);
+ goto unlock;
+ }
+
+ GEM_BUG_ON(rq->engine != &ve->base);
+ GEM_BUG_ON(rq->hw_context != &ve->context);
+ rq->engine = engine;
+
+ if (engine != ve->bound) {
+ u32 *regs = ve->context.lrc_reg_state;
+ unsigned int n;
+
+ virtual_update_register_offsets(regs, engine);
+ ve->bound = engine;
+
+ /*
+ * Move the bound engine to the top of the list
+ * for future execution. We then kick this
+ * tasklet first before checking others, so that
+ * we preferentially reuse this set of bound
+ * registers.
+ */
+ for (n = 1; n < ve->count; n++) {
+ if (ve->siblings[n] == engine) {
+ swap(ve->siblings[n],
+ ve->siblings[0]);
+ break;
+ }
+ }
+ }
+
+ __i915_request_submit(rq);
+ trace_i915_request_in(rq, port_index(port, execlists));
+ submit = true;
+ last = rq;
+
+ ve->request = NULL;
+remove_virtual:
+ rb_erase_cached(rb, &execlists->virtual);
+ RB_CLEAR_NODE(rb);
+ }
+
+ spin_unlock(&ve->base.timeline.lock);
+ }
+
while ((rb = rb_first_cached(&execlists->queue))) {
struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
@@ -896,6 +1045,7 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
intel_engine_get_seqno(rq->engine));
GEM_BUG_ON(!execlists->active);
+ rq->hw_context->active = NULL;
intel_engine_context_out(rq->engine);
execlists_context_status_change(rq,
@@ -1198,6 +1348,7 @@ static void process_csb(struct intel_engine_cs *engine)
*/
GEM_BUG_ON(!i915_request_completed(rq));
+ rq->hw_context->active = NULL;
execlists_context_schedule_out(rq);
trace_i915_request_out(rq);
@@ -1320,21 +1471,25 @@ static void submit_queue(struct intel_engine_cs *engine,
__submit_queue(engine, prio, timeout);
}
-static void execlists_submit_request(struct i915_request *request)
+static void __execlists_submit_request(struct intel_engine_cs *engine,
+ struct i915_request *request)
{
- struct intel_engine_cs *engine = request->engine;
- unsigned long flags;
-
- /* Will be called from irq-context when using foreign fences. */
- spin_lock_irqsave(&engine->timeline.lock, flags);
-
queue_request(engine, &request->sched, rq_prio(request));
submit_queue(engine,
rq_prio(request), request->gem_context->preempt_timeout);
GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
GEM_BUG_ON(list_empty(&request->sched.link));
+}
+
+static void execlists_submit_request(struct i915_request *request)
+{
+ struct intel_engine_cs *engine = request->engine;
+ unsigned long flags;
+ /* Will be called from irq-context when using foreign fences. */
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+ __execlists_submit_request(engine, request);
spin_unlock_irqrestore(&engine->timeline.lock, flags);
}
@@ -3000,6 +3155,243 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
}
}
+static void virtual_engine_free(struct kref *kref)
+{
+ struct virtual_engine *ve = container_of(kref, typeof(*ve), kref);
+ struct intel_context *ce = &ve->context;
+ unsigned int n;
+
+ GEM_BUG_ON(ve->request);
+ if (GEM_WARN_ON(test_bit(TASKLET_STATE_SCHED,
+ &ve->base.execlists.tasklet.state)))
+ tasklet_kill(&ve->base.execlists.tasklet);
+
+ for (n = 0; n < ve->count; n++) {
+ struct intel_engine_cs *sibling = ve->siblings[n];
+ struct rb_node *node = &ve->node[sibling->id];
+
+ if (RB_EMPTY_NODE(node))
+ continue;
+
+ spin_lock_irq(&sibling->timeline.lock);
+
+ if (!RB_EMPTY_NODE(node))
+ rb_erase_cached(node, &sibling->execlists.virtual);
+
+ spin_unlock_irq(&sibling->timeline.lock);
+
+ tasklet_kill(&ve->siblings[n]->execlists.tasklet);
+ }
+
+ if (ce->state)
+ execlists_context_destroy(ce);
+
+ intel_engine_cleanup_scratch(&ve->base);
+ i915_timeline_fini(&ve->base.timeline);
+ kfree(ve);
+}
+
+static void virtual_context_unpin(struct intel_context *ce)
+{
+ struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+
+ lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex);
+ GEM_BUG_ON(ce->pin_count == 0);
+
+ if (--ce->pin_count)
+ return;
+
+ __execlists_context_unpin(ce);
+ kref_put(&ve->kref, virtual_engine_free);
+}
+
+static const struct intel_context_ops virtual_context_ops = {
+ .unpin = virtual_context_unpin,
+};
+
+static struct intel_context *
+virtual_context_pin(struct intel_engine_cs *engine,
+ struct i915_gem_context *ctx)
+{
+ struct virtual_engine *ve = to_virtual_engine(engine);
+ struct intel_context *ce = &ve->context;
+
+ lockdep_assert_held(&ctx->i915->drm.struct_mutex);
+
+ if (likely(ce->pin_count++))
+ return ce;
+ GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
+
+ kref_get(&ve->kref);
+ ce->ops = &virtual_context_ops;
+
+ return __execlists_context_pin(engine, ctx, ce);
+}
+
+static void virtual_submission_tasklet(unsigned long data)
+{
+ struct virtual_engine * const ve = (struct virtual_engine *)data;
+ unsigned int n;
+ int prio;
+
+ local_irq_disable();
+
+ prio = I915_PRIORITY_INVALID;
+ spin_lock(&ve->base.timeline.lock);
+ if (ve->request)
+ prio = rq_prio(ve->request);
+ spin_unlock(&ve->base.timeline.lock);
+ if (prio == I915_PRIORITY_INVALID)
+ goto out;
+
+ for (n = 0; n < ve->count; n++) {
+ struct intel_engine_cs *sibling = ve->siblings[n];
+ struct rb_node *node = &ve->node[sibling->id];
+ struct rb_node **parent, *rb;
+ bool first = true;
+
+ spin_lock(&sibling->timeline.lock);
+
+ if (!RB_EMPTY_NODE(node))
+ rb_erase_cached(node, &sibling->execlists.virtual);
+
+ rb = NULL;
+ parent = &sibling->execlists.virtual.rb_root.rb_node;
+ while (*parent) {
+ struct virtual_engine *other;
+
+ rb = *parent;
+ other = rb_entry(rb, typeof(*other), node[sibling->id]);
+ if (!other->request || prio > rq_prio(other->request)) {
+ parent = &rb->rb_left;
+ } else {
+ parent = &rb->rb_right;
+ first = false;
+ }
+ }
+
+ rb_link_node(node, rb, parent);
+ rb_insert_color_cached(node,
+ &sibling->execlists.virtual,
+ first);
+ if (first && prio > sibling->execlists.queue_priority)
+ tasklet_hi_schedule(&sibling->execlists.tasklet);
+
+ spin_unlock(&sibling->timeline.lock);
+ }
+
+out:
+ local_irq_enable();
+}
+
+static void virtual_submit_request(struct i915_request *request)
+{
+ struct virtual_engine *ve = to_virtual_engine(request->engine);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ve->base.timeline.lock, flags);
+
+ GEM_BUG_ON(ve->request);
+ ve->request = request;
+
+ spin_unlock_irqrestore(&ve->base.timeline.lock, flags);
+
+ tasklet_hi_schedule(&ve->base.execlists.tasklet);
+}
+
+struct intel_engine_cs *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+ struct intel_engine_cs **siblings,
+ unsigned int count)
+{
+ struct virtual_engine *ve;
+ unsigned int n;
+ int err;
+
+ if (!count)
+ return ERR_PTR(-EINVAL);
+
+ ve = kzalloc(sizeof(*ve) + count * sizeof(*ve->siblings), GFP_KERNEL);
+ if (!ve)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&ve->kref);
+ ve->base.i915 = ctx->i915;
+ ve->base.id = -1;
+ ve->base.class = OTHER_CLASS;
+ ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+
+ snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
+ i915_timeline_init(ctx->i915, &ve->base.timeline, ve->base.name);
+ err = intel_engine_create_scratch(&ve->base, 4096);
+ if (err)
+ goto err_put;
+
+ ve->context.gem_context = ctx;
+
+ ve->base.context_pin = virtual_context_pin;
+ ve->base.request_alloc = execlists_request_alloc;
+
+ ve->base.schedule = execlists_schedule;
+ ve->base.submit_request = virtual_submit_request;
+
+ tasklet_init(&ve->base.execlists.tasklet,
+ virtual_submission_tasklet,
+ (unsigned long)ve);
+
+ ve->count = count;
+ for (n = 0; n < count; n++) {
+ struct intel_engine_cs *sibling = siblings[n];
+
+ ve->siblings[n] = sibling;
+
+ if (ve->base.class != OTHER_CLASS) {
+ if (ve->base.class != sibling->class) {
+ err = -EINVAL;
+ ve->count = n;
+ goto err_put;
+ }
+ continue;
+ }
+
+ if (RB_EMPTY_NODE(&ve->node[sibling->id])) {
+ err = -EINVAL;
+ ve->count = n;
+ goto err_put;
+ }
+
+ RB_CLEAR_NODE(&ve->node[sibling->id]);
+
+ ve->base.class = sibling->class;
+ snprintf(ve->base.name, sizeof(ve->base.name),
+ "v%dx%d", ve->base.class, count);
+ ve->base.context_size = sibling->context_size;
+
+ /* XXX single default state per class? */
+ ve->base.default_state =
+ i915_gem_object_get(sibling->default_state);
+
+ ve->base.emit_bb_start = sibling->emit_bb_start;
+ ve->base.emit_flush = sibling->emit_flush;
+ ve->base.emit_breadcrumb = sibling->emit_breadcrumb;
+ ve->base.emit_breadcrumb_sz = sibling->emit_breadcrumb_sz;
+ }
+
+ return &ve->base;
+
+err_put:
+ virtual_engine_free(&ve->kref);
+ return ERR_PTR(err);
+}
+
+void intel_virtual_engine_put(struct intel_engine_cs *engine)
+{
+ if (!engine)
+ return;
+
+ kref_put(&to_virtual_engine(engine)->kref, virtual_engine_free);
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/intel_lrc.c"
#endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194e930c..ec90d9406cf4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -104,4 +104,10 @@ struct i915_gem_context;
void intel_lr_context_resume(struct drm_i915_private *dev_priv);
+struct intel_engine_cs *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+ struct intel_engine_cs **siblings,
+ unsigned int count);
+void intel_virtual_engine_put(struct intel_engine_cs *engine);
+
#endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b43f27bc6e63..505a9690f17f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -293,6 +293,7 @@ struct intel_engine_execlists {
* @queue: queue of requests, in priority lists
*/
struct rb_root_cached queue;
+ struct rb_root_cached virtual;
/**
* @fw_domains: forcewake domains for irq tasklet
@@ -574,6 +575,7 @@ struct intel_engine_cs {
#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
+#define I915_ENGINE_IS_VIRTUAL BIT(3)
unsigned int flags;
/*
@@ -656,6 +658,12 @@ static inline bool __execlists_need_preempt(int prio, int last)
return prio > max(0, last);
}
+static inline bool
+intel_engine_is_virtual(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_IS_VIRTUAL;
+}
+
static inline void
execlists_set_active(struct intel_engine_execlists *execlists,
unsigned int bit)
@@ -871,6 +879,7 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
void intel_engine_setup_common(struct intel_engine_cs *engine);
int intel_engine_init_common(struct intel_engine_cs *engine);
int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
+void intel_engine_cleanup_scratch(struct intel_engine_cs *engine);
void intel_engine_cleanup_common(struct intel_engine_cs *engine);
int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 71cadef690f7..f28c46a90862 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -4,6 +4,8 @@
* Copyright © 2018 Intel Corporation
*/
+#include <linux/prime_numbers.h>
+
#include "../i915_selftest.h"
#include "mock_context.h"
@@ -885,6 +887,201 @@ static int live_context_preempt_timeout(void *arg)
return err;
}
+struct live_test {
+ struct drm_i915_private *i915;
+ const char *func;
+ const char *name;
+
+ unsigned int reset_count;
+ bool wedge;
+};
+
+static int begin_live_test(struct live_test *t,
+ struct drm_i915_private *i915,
+ const char *func,
+ const char *name)
+{
+ struct wedge_me wedge;
+ int err;
+
+ t->i915 = i915;
+ t->func = func;
+ t->name = name;
+
+ wedge_on_timeout(&wedge, i915, 10 * HZ) {
+ err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
+ if (err) {
+ pr_err("%s(%s): failed to idle before, with err=%d!",
+ func, name, err);
+ return err;
+ }
+ }
+
+ if (i915_terminally_wedged(&i915->gpu_error))
+ return -EIO;
+
+ i915->gpu_error.missed_irq_rings = 0;
+ t->reset_count = i915_reset_count(&i915->gpu_error);
+
+ return 0;
+}
+
+static int end_live_test(struct live_test *t)
+{
+ struct drm_i915_private *i915 = t->i915;
+ struct wedge_me wedge;
+
+ i915_retire_requests(i915);
+
+ wedge_on_timeout(&wedge, i915, 10 * HZ) {
+ if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED)) {
+ pr_err("%s(%s): failed to idle\n", t->func, t->name);
+ return -EIO;
+ }
+ }
+
+ if (i915_terminally_wedged(&i915->gpu_error)) {
+ pr_err("%s(%s): *** wedged ****\n", t->func, t->name);
+ return -EIO;
+ }
+
+ if (t->reset_count != i915_reset_count(&i915->gpu_error)) {
+ pr_err("%s(%s): GPU was reset %d times!\n",
+ t->func, t->name,
+ i915_reset_count(&i915->gpu_error) - t->reset_count);
+ return -EIO;
+ }
+
+ if (i915->gpu_error.missed_irq_rings) {
+ pr_err("%s(%s): Missed interrupts on engines %lx\n",
+ t->func, t->name, i915->gpu_error.missed_irq_rings);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int nop_virtual_engine(struct drm_i915_private *i915,
+ struct intel_engine_cs **siblings,
+ unsigned int nsibling,
+ unsigned int nctx)
+{
+ IGT_TIMEOUT(end_time);
+ struct i915_request *request[16];
+ struct i915_gem_context *ctx[16];
+ struct intel_engine_cs *ve[16];
+ unsigned long n, prime, nc;
+ ktime_t times[2] = {};
+ struct live_test t;
+ int err;
+
+ GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+
+ for (n = 0; n < nctx; n++) {
+ ctx[n] = kernel_context(i915);
+ if (!ctx[n])
+ return -ENOMEM;
+
+ ve[n] = intel_execlists_create_virtual(ctx[n],
+ siblings, nsibling);
+ if (IS_ERR(ve[n]))
+ return PTR_ERR(ve[n]);
+ }
+
+ err = begin_live_test(&t, i915, __func__, ve[0]->name);
+ if (err)
+ goto out;
+
+ for_each_prime_number_from(prime, 1, 8192) {
+ times[1] = ktime_get_raw();
+
+ for (nc = 0; nc < nctx; nc++) {
+ for (n = 0; n < prime; n++) {
+ request[nc] =
+ i915_request_alloc(ve[nc], ctx[nc]);
+ if (IS_ERR(request[nc])) {
+ err = PTR_ERR(request[nc]);
+ goto out;
+ }
+
+ i915_request_add(request[nc]);
+ }
+ }
+
+ for (nc = 0; nc < nctx; nc++)
+ i915_request_wait(request[nc],
+ I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT);
+
+ times[1] = ktime_sub(ktime_get_raw(), times[1]);
+ if (prime == 1)
+ times[0] = times[1];
+
+ if (__igt_timeout(end_time, NULL))
+ break;
+ }
+
+ err = end_live_test(&t);
+ if (err)
+ goto out;
+
+ pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
+ nctx, ve[0]->name, ktime_to_ns(times[0]),
+ prime, div64_u64(ktime_to_ns(times[1]), prime));
+
+out:
+ for (nc = 0; nc < nctx; nc++) {
+ intel_virtual_engine_put(ve[nc]);
+ kernel_context_close(ctx[nc]);
+ }
+ return err;
+}
+
+static int live_virtual_engine(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned int class, inst;
+ int err = -ENODEV;
+
+ mutex_lock(&i915->drm.struct_mutex);
+
+ for_each_engine(engine, i915, id) {
+ err = nop_virtual_engine(i915, &engine, 1, 1);
+ if (err) {
+ pr_err("Failed to wrap engine %s: err=%d\n",
+ engine->name, err);
+ goto out_unlock;
+ }
+ }
+
+ for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+ int nsibling, n;
+
+ nsibling = 0;
+ for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+ if (!i915->engine_class[class][inst])
+ break;
+
+ siblings[nsibling++] = i915->engine_class[class][inst];
+ }
+ if (nsibling < 2)
+ continue;
+
+ for (n = 1; n <= nsibling + 1; n++) {
+ err = nop_virtual_engine(i915, siblings, nsibling, n);
+ if (err)
+ goto out_unlock;
+ }
+ }
+
+out_unlock:
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
int intel_execlists_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
@@ -895,6 +1092,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_preempt_reset),
SUBTEST(live_late_preempt_timeout),
SUBTEST(live_context_preempt_timeout),
+ SUBTEST(live_virtual_engine),
};
return i915_subtests(tests, i915);
}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ac9261b59531..e7a66e914ce1 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1524,8 +1524,30 @@ struct i915_user_extension {
__u64 name;
};
+/*
+ * I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot, a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * The context must be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+ struct i915_user_extension base;
+
+ __u64 flags; /* all undefined flags must be zero */
+ __u64 mbz[3]; /* reserved for future use; must be zero */
+};
+
struct i915_context_param_engines {
__u64 extensions;
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0x1
struct {
__u32 class; /* see enum drm_i915_gem_engine_class */
--
2.17.0
More information about the Intel-gfx-trybot
mailing list