[Intel-gfx] [PATCH 20/45] drm/i915: Apply an execution_mask to the virtual_engine
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Apr 29 14:12:23 UTC 2019
On 25/04/2019 10:19, Chris Wilson wrote:
> Allow the user to direct which physical engines of the virtual engine
> they wish to execute one, as sometimes it is necessary to override the
> load balancing algorithm.
>
> v2: Only kick the virtual engines on context-out if required
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_lrc.c | 67 +++++++++++++
> drivers/gpu/drm/i915/gt/selftest_lrc.c | 131 +++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_request.c | 1 +
> drivers/gpu/drm/i915/i915_request.h | 3 +
> 4 files changed, 202 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index e8faf3417f9c..8acf9d1c6f2f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -549,6 +549,15 @@ execlists_context_schedule_in(struct i915_request *rq)
> rq->hw_context->active = rq->engine;
> }
>
> +static void kick_siblings(struct i915_request *rq)
> +{
> + struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
> + struct i915_request *next = READ_ONCE(ve->request);
> +
> + if (next && next->execution_mask & ~rq->execution_mask)
> + tasklet_schedule(&ve->base.execlists.tasklet);
> +}
> +
> static inline void
> execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
> {
> @@ -556,6 +565,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
> intel_engine_context_out(rq->engine);
> execlists_context_status_change(rq, status);
> trace_i915_request_out(rq);
> +
> + /*
> + * If this is part of a virtual engine, its next request may have
> + * been blocked waiting for access to the active context. We have
> + * to kick all the siblings again in case we need to switch (e.g.
> + * the next request is not runnable on this engine). Hopefully,
> + * we will already have submitted the next request before the
> + * tasklet runs and do not need to rebuild each virtual tree
> + * and kick everyone again.
> + */
> + if (rq->engine != rq->hw_context->engine)
> + kick_siblings(rq);
> }
>
> static u64 execlists_update_context(struct i915_request *rq)
> @@ -783,6 +804,9 @@ static bool virtual_matches(const struct virtual_engine *ve,
> {
> const struct intel_engine_cs *active;
>
> + if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
> + return false;
> +
> /*
> * We track when the HW has completed saving the context image
> * (i.e. when we have seen the final CS event switching out of
> @@ -3141,12 +3165,44 @@ static const struct intel_context_ops virtual_context_ops = {
> .destroy = virtual_context_destroy,
> };
>
> +static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
> +{
> + struct i915_request *rq;
> + intel_engine_mask_t mask;
> +
> + rq = READ_ONCE(ve->request);
> + if (!rq)
> + return 0;
> +
> + /* The rq is ready for submission; rq->execution_mask is now stable. */
> + mask = rq->execution_mask;
> + if (unlikely(!mask)) {
> + /* Invalid selection, submit to a random engine in error */
> + i915_request_skip(rq, -ENODEV);
> + mask = ve->siblings[0]->mask;
> + }
> +
> + GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
> + ve->base.name,
> + rq->fence.context, rq->fence.seqno,
> + mask, ve->base.execlists.queue_priority_hint);
> +
> + return mask;
> +}
> +
> static void virtual_submission_tasklet(unsigned long data)
> {
> struct virtual_engine * const ve = (struct virtual_engine *)data;
> const int prio = ve->base.execlists.queue_priority_hint;
> + intel_engine_mask_t mask;
> unsigned int n;
>
> + rcu_read_lock();
> + mask = virtual_submission_mask(ve);
> + rcu_read_unlock();
> + if (unlikely(!mask))
Is the rcu_lock think solely for the same protection against wedging in
submit_notify?
Regards,
Tvrtko
> + return;
> +
> local_irq_disable();
> for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
> struct intel_engine_cs *sibling = ve->siblings[n];
> @@ -3154,6 +3210,17 @@ static void virtual_submission_tasklet(unsigned long data)
> struct rb_node **parent, *rb;
> bool first;
>
> + if (unlikely(!(mask & sibling->mask))) {
> + if (!RB_EMPTY_NODE(&node->rb)) {
> + spin_lock(&sibling->timeline.lock);
> + rb_erase_cached(&node->rb,
> + &sibling->execlists.virtual);
> + RB_CLEAR_NODE(&node->rb);
> + spin_unlock(&sibling->timeline.lock);
> + }
> + continue;
> + }
> +
> spin_lock(&sibling->timeline.lock);
>
> if (!RB_EMPTY_NODE(&node->rb)) {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 558399e5c7bb..73308749936c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1480,6 +1480,136 @@ static int live_virtual_engine(void *arg)
> return err;
> }
>
> +static int mask_virtual_engine(struct drm_i915_private *i915,
> + struct intel_engine_cs **siblings,
> + unsigned int nsibling)
> +{
> + struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
> + struct i915_gem_context *ctx;
> + struct intel_context *ve;
> + struct igt_live_test t;
> + unsigned int n;
> + int err;
> +
> + /*
> + * Check that by setting the execution mask on a request, we can
> + * restrict it to our desired engine within the virtual engine.
> + */
> +
> + ctx = kernel_context(i915);
> + if (!ctx)
> + return -ENOMEM;
> +
> + ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
> + if (IS_ERR(ve)) {
> + err = PTR_ERR(ve);
> + goto out_close;
> + }
> +
> + err = intel_context_pin(ve);
> + if (err)
> + goto out_put;
> +
> + err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
> + if (err)
> + goto out_unpin;
> +
> + for (n = 0; n < nsibling; n++) {
> + request[n] = i915_request_create(ve);
> + if (IS_ERR(request)) {
> + err = PTR_ERR(request);
> + nsibling = n;
> + goto out;
> + }
> +
> + /* Reverse order as it's more likely to be unnatural */
> + request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
> +
> + i915_request_get(request[n]);
> + i915_request_add(request[n]);
> + }
> +
> + for (n = 0; n < nsibling; n++) {
> + if (i915_request_wait(request[n], I915_WAIT_LOCKED, HZ / 10) < 0) {
> + pr_err("%s(%s): wait for %llx:%lld timed out\n",
> + __func__, ve->engine->name,
> + request[n]->fence.context,
> + request[n]->fence.seqno);
> +
> + GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
> + __func__, ve->engine->name,
> + request[n]->fence.context,
> + request[n]->fence.seqno);
> + GEM_TRACE_DUMP();
> + i915_gem_set_wedged(i915);
> + err = -EIO;
> + goto out;
> + }
> +
> + if (request[n]->engine != siblings[nsibling - n - 1]) {
> + pr_err("Executed on wrong sibling '%s', expected '%s'\n",
> + request[n]->engine->name,
> + siblings[nsibling - n - 1]->name);
> + err = -EINVAL;
> + goto out;
> + }
> + }
> +
> + err = igt_live_test_end(&t);
> + if (err)
> + goto out;
> +
> +out:
> + if (igt_flush_test(i915, I915_WAIT_LOCKED))
> + err = -EIO;
> +
> + for (n = 0; n < nsibling; n++)
> + i915_request_put(request[n]);
> +
> +out_unpin:
> + intel_context_unpin(ve);
> +out_put:
> + intel_context_put(ve);
> +out_close:
> + kernel_context_close(ctx);
> + return err;
> +}
> +
> +static int live_virtual_mask(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> + unsigned int class, inst;
> + int err = 0;
> +
> + if (USES_GUC_SUBMISSION(i915))
> + return 0;
> +
> + mutex_lock(&i915->drm.struct_mutex);
> +
> + for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> + unsigned int nsibling;
> +
> + nsibling = 0;
> + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> + if (!i915->engine_class[class][inst])
> + break;
> +
> + siblings[nsibling++] = i915->engine_class[class][inst];
> + }
> + if (nsibling < 2)
> + continue;
> +
> + err = mask_virtual_engine(i915, siblings, nsibling);
> + if (err)
> + goto out_unlock;
> + }
> +
> +out_unlock:
> + mutex_unlock(&i915->drm.struct_mutex);
> + return err;
> +}
> +
> int intel_execlists_live_selftests(struct drm_i915_private *i915)
> {
> static const struct i915_subtest tests[] = {
> @@ -1493,6 +1623,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
> SUBTEST(live_preempt_hang),
> SUBTEST(live_preempt_smoke),
> SUBTEST(live_virtual_engine),
> + SUBTEST(live_virtual_mask),
> };
>
> if (!HAS_EXECLISTS(i915))
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index a5a2ccc23958..06c2b89d0b0a 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -688,6 +688,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
> rq->batch = NULL;
> rq->capture_list = NULL;
> rq->waitboost = false;
> + rq->execution_mask = ALL_ENGINES;
>
> INIT_LIST_HEAD(&rq->active_list);
> INIT_LIST_HEAD(&rq->execute_cb);
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 8025a89b5999..d7f9b2194568 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -28,6 +28,8 @@
> #include <linux/dma-fence.h>
> #include <linux/lockdep.h>
>
> +#include "gt/intel_engine_types.h"
> +
> #include "i915_gem.h"
> #include "i915_scheduler.h"
> #include "i915_selftest.h"
> @@ -156,6 +158,7 @@ struct i915_request {
> */
> struct i915_sched_node sched;
> struct i915_dependency dep;
> + intel_engine_mask_t execution_mask;
>
> /*
> * A convenience pointer to the current breadcrumb value stored in
>
More information about the Intel-gfx
mailing list