[PATCH 52/52] virtual-bond
Chris Wilson
chris at chris-wilson.co.uk
Sun Feb 3 21:22:09 UTC 2019
Don't mention preemption and unsubmit.
Also what about bonding an engine as its own master? It doesn't break
anything internally, so allow the silliness.
---
drivers/gpu/drm/i915/i915_gem_context.c | 45 ++++++
drivers/gpu/drm/i915/i915_request.c | 1 +
drivers/gpu/drm/i915/i915_request.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 98 ++++++++++++
drivers/gpu/drm/i915/intel_lrc.h | 3 +
drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +
drivers/gpu/drm/i915/selftests/intel_lrc.c | 167 +++++++++++++++++++++
include/uapi/drm/i915_drm.h | 18 +++
8 files changed, 340 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 228834280208..d8d6cee333dd 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1348,6 +1348,16 @@ static int set_sseu(struct i915_gem_context *ctx,
return 0;
};
+static int check_user_mbz32(u32 __user *user)
+{
+ u32 mbz;
+
+ if (get_user(mbz, user))
+ return -EFAULT;
+
+ return mbz ? -EINVAL : 0;
+}
+
static int check_user_mbz64(u64 __user *user)
{
u64 mbz;
@@ -1431,8 +1441,43 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
return 0;
}
+static int
+set_engines__bond(struct i915_user_extension __user *base, void *data)
+{
+ struct i915_context_engines_bond __user *ext =
+ container_of_user(base, typeof(*ext), base);
+ const struct set_engines *set = data;
+ struct intel_engine_cs *master;
+ u32 class, instance, siblings;
+ int err;
+
+ if (!set->engines[0])
+ return -EINVAL;
+
+ err = check_user_mbz32(&ext->flags);
+ if (err)
+ return err;
+
+ if (get_user(class, &ext->master_class))
+ return -EFAULT;
+
+ if (get_user(instance, &ext->master_instance))
+ return -EFAULT;
+
+ master = intel_engine_lookup_user(set->ctx->i915, class, instance);
+ if (!master)
+ return -EINVAL;
+
+ if (get_user(siblings, &ext->sibling_mask))
+ return -EFAULT;
+
+ return intel_virtual_engine_attach_bond(set->engines[0],
+ master, siblings);
+}
+
static const i915_user_extension_fn set_engines__extensions[] = {
[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
+ [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
};
static int
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 124d4b65261e..a61ea142553e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -733,6 +733,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
rq->batch = NULL;
rq->capture_list = NULL;
rq->waitboost = false;
+ rq->execution_mask = ~0u;
/*
* Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 901aaeb926a8..bd24576e588c 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -154,6 +154,7 @@ struct i915_request {
*/
struct i915_sched_node sched;
struct i915_dependency dep;
+ unsigned int execution_mask;
/*
* A convenience pointer to the current breadcrumb value stored in
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d4b53db74122..3f8b65fc7c89 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -178,6 +178,12 @@ struct virtual_engine {
int prio;
} nodes[I915_NUM_ENGINES];
+ struct ve_bond {
+ struct intel_engine_cs *master;
+ unsigned int sibling_mask;
+ } *bonds;
+ unsigned int nbond;
+
unsigned int count;
struct intel_engine_cs *siblings[0];
};
@@ -3156,6 +3162,7 @@ virtual_context_pin(struct intel_engine_cs *engine,
static void virtual_submission_tasklet(unsigned long data)
{
struct virtual_engine * const ve = (struct virtual_engine *)data;
+ unsigned int mask;
unsigned int n;
int prio;
@@ -3164,12 +3171,30 @@ static void virtual_submission_tasklet(unsigned long data)
return;
local_irq_disable();
+
+ mask = 0;
+ spin_lock(&ve->base.timeline.lock);
+ if (ve->request)
+ mask = ve->request->execution_mask;
+ spin_unlock(&ve->base.timeline.lock);
+
for (n = 0; READ_ONCE(ve->request) && n < ve->count; n++) {
struct intel_engine_cs *sibling = ve->siblings[n];
struct ve_node * const node = &ve->nodes[sibling->id];
struct rb_node **parent, *rb;
bool first;
+ if (unlikely(!(mask & sibling->mask))) {
+ if (!RB_EMPTY_NODE(&node->rb)) {
+ spin_lock(&sibling->timeline.lock);
+ rb_erase_cached(&node->rb,
+ &sibling->execlists.virtual);
+ RB_CLEAR_NODE(&node->rb);
+ spin_unlock(&sibling->timeline.lock);
+ }
+ continue;
+ }
+
spin_lock(&sibling->timeline.lock);
if (!RB_EMPTY_NODE(&node->rb)) {
@@ -3227,6 +3252,30 @@ static void virtual_submit_request(struct i915_request *request)
tasklet_schedule(&ve->base.execlists.tasklet);
}
+static struct ve_bond *
+virtual_find_bond(struct virtual_engine *ve, struct intel_engine_cs *master)
+{
+ int i;
+
+ for (i = 0; i < ve->nbond; i++) {
+ if (ve->bonds[i].master == master)
+ return &ve->bonds[i];
+ }
+
+ return NULL;
+}
+
+static void
+virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
+{
+ struct virtual_engine *ve = to_virtual_engine(rq->engine);
+ struct ve_bond *bond;
+
+ bond = virtual_find_bond(ve, to_request(signal)->engine);
+ if (bond) /* XXX serialise with rq->lock? */
+ rq->execution_mask &= bond->sibling_mask;
+}
+
struct intel_engine_cs *
intel_execlists_create_virtual(struct i915_gem_context *ctx,
struct intel_engine_cs **siblings,
@@ -3268,6 +3317,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
ve->base.schedule = i915_schedule;
ve->base.submit_request = virtual_submit_request;
+ ve->base.bond_execute = virtual_bond_execute;
ve->base.execlists.queue_priority_hint = INT_MIN;
tasklet_init(&ve->base.execlists.tasklet,
@@ -3327,6 +3377,54 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
return ERR_PTR(err);
}
+static unsigned long
+virtual_execution_mask(struct virtual_engine *ve,
+ unsigned long mask)
+{
+ unsigned long emask = 0;
+ int bit;
+
+ for_each_set_bit(bit, &mask, ve->count)
+ emask |= ve->siblings[bit]->mask;
+
+ return emask;
+}
+
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+ struct intel_engine_cs *master,
+ unsigned long mask)
+{
+ struct virtual_engine *ve = to_virtual_engine(engine);
+ struct ve_bond *bond;
+
+ if (mask >> ve->count)
+ return -EINVAL;
+
+ mask = virtual_execution_mask(ve, mask);
+ if (!mask)
+ return -EINVAL;
+
+ bond = virtual_find_bond(ve, master);
+ if (bond) {
+ bond->sibling_mask |= mask;
+ return 0;
+ }
+
+ bond = krealloc(ve->bonds,
+ sizeof(*bond) * (ve->nbond + 1),
+ GFP_KERNEL);
+ if (!bond)
+ return -ENOMEM;
+
+ bond[ve->nbond].master = master;
+ bond[ve->nbond].sibling_mask = mask;
+
+ ve->bonds = bond;
+ ve->nbond++;
+
+ return 0;
+}
+
void intel_virtual_engine_put(struct intel_engine_cs *engine)
{
if (!engine || !intel_engine_is_virtual(engine))
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 5779e776cc3f..23ff6583973d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -116,6 +116,9 @@ struct intel_engine_cs *
intel_execlists_create_virtual(struct i915_gem_context *ctx,
struct intel_engine_cs **siblings,
unsigned int count);
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+ struct intel_engine_cs *master,
+ unsigned long siblings);
void intel_virtual_engine_put(struct intel_engine_cs *engine);
u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b738f005fcfa..1df0aabc0823 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -460,6 +460,13 @@ struct intel_engine_cs {
*/
void (*submit_request)(struct i915_request *rq);
+ /*
+ * Called on signaling of a SUBMIT_FENCE, passing along the signaling
+ * request down to the bonded pairs.
+ */
+ void (*bond_execute)(struct i915_request *rq,
+ struct dma_fence *signal);
+
/*
* Call when the priority on a request has changed and it and its
* dependencies may need rescheduling. Note the request itself may
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 1099d2e69cb1..fedc5478a822 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -13,6 +13,7 @@
#include "igt_live_test.h"
#include "igt_spinner.h"
#include "i915_random.h"
+#include "lib_sw_fence.h"
#include "mock_context.h"
@@ -1199,6 +1200,171 @@ static int live_virtual_engine(void *arg)
return err;
}
+static int bond_virtual_engine(struct drm_i915_private *i915,
+ unsigned int class,
+ struct intel_engine_cs **siblings,
+ unsigned int nsibling,
+ unsigned int flags)
+#define BOND_SCHEDULE BIT(0)
+{
+ struct intel_engine_cs *master;
+ struct i915_gem_context *ctx;
+ struct i915_request *rq[16];
+ enum intel_engine_id id;
+ unsigned long n;
+ int err;
+
+ GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
+
+ ctx = kernel_context(i915);
+ if (!ctx)
+ return -ENOMEM;
+
+ err = 0;
+ rq[0] = ERR_PTR(-ENOMEM);
+ for_each_engine(master, i915, id) {
+ struct i915_sw_fence fence;
+
+ if (master->class == class)
+ continue;
+
+ rq[0] = i915_request_alloc(master, ctx);
+ if (IS_ERR(rq[0])) {
+ err = PTR_ERR(rq[0]);
+ goto out;
+ }
+
+ if (flags & BOND_SCHEDULE)
+ onstack_fence_init(&fence);
+
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+
+ for (n = 0; n < nsibling; n++) {
+ struct intel_engine_cs *engine;
+
+ engine = intel_execlists_create_virtual(ctx,
+ siblings,
+ nsibling);
+ if (IS_ERR(engine)) {
+ err = PTR_ERR(engine);
+ goto out;
+ }
+
+ err = intel_virtual_engine_attach_bond(engine,
+ master,
+ BIT(n));
+ if (err) {
+ intel_virtual_engine_put(engine);
+ goto out;
+ }
+
+ rq[n + 1] = i915_request_alloc(engine, ctx);
+ if (IS_ERR(rq[n + 1])) {
+ err = PTR_ERR(rq[n + 1]);
+ intel_virtual_engine_put(engine);
+ goto out;
+ }
+ i915_request_get(rq[n + 1]);
+
+ err = i915_request_await_execution(rq[n + 1],
+ &rq[0]->fence,
+ engine->bond_execute);
+ i915_request_add(rq[n + 1]);
+ intel_virtual_engine_put(engine);
+ if (err < 0)
+ goto out;
+ }
+ rq[n + 1] = ERR_PTR(-EINVAL);
+
+ if (flags & BOND_SCHEDULE)
+ onstack_fence_fini(&fence);
+
+ for (n = 0; n < nsibling; n++) {
+ if (i915_request_wait(rq[n + 1],
+ I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq[n + 1]->engine != siblings[n]) {
+ pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
+ siblings[n]->name,
+ rq[n + 1]->engine->name,
+ rq[0]->engine->name);
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ for (n = 0; !IS_ERR(rq[n]); n++)
+ i915_request_put(rq[n]);
+ rq[0] = ERR_PTR(-ENOMEM);
+ }
+
+out:
+ for (n = 0; !IS_ERR(rq[n]); n++)
+ i915_request_put(rq[n]);
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+
+ kernel_context_close(ctx);
+ return err;
+}
+
+static int live_virtual_bond(void *arg)
+{
+ static const struct phase {
+ const char *name;
+ unsigned int flags;
+ } phases[] = {
+ { "", 0 },
+ { "schedule", BOND_SCHEDULE },
+ { },
+ };
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+ unsigned int class, inst;
+ int err = 0;
+
+ if (USES_GUC_SUBMISSION(i915))
+ return 0;
+
+ mutex_lock(&i915->drm.struct_mutex);
+
+ for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+ const struct phase *p;
+ int nsibling;
+
+ nsibling = 0;
+ for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+ if (!i915->engine_class[class][inst])
+ break;
+
+ GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
+ siblings[nsibling++] = i915->engine_class[class][inst];
+ }
+ if (nsibling < 2)
+ continue;
+
+ for (p = phases; p->name; p++) {
+ err = bond_virtual_engine(i915,
+ class, siblings, nsibling,
+ p->flags);
+ if (err) {
+ pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
+ __func__, p->name, class, nsibling, err);
+ goto out_unlock;
+ }
+ }
+ }
+
+out_unlock:
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
int intel_execlists_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
@@ -1211,6 +1377,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_preempt_hang),
SUBTEST(live_preempt_smoke),
SUBTEST(live_virtual_engine),
+ SUBTEST(live_virtual_bond),
};
if (!HAS_EXECLISTS(i915))
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2c54eadc2046..95ae0fa6ce33 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1492,6 +1492,10 @@ struct drm_i915_gem_context_param {
* sized argument, will revert back to default settings.
*
* See struct i915_context_param_engines.
+ *
+ * Extensions:
+ * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
+ * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
*/
#define I915_CONTEXT_PARAM_ENGINES 0x9
@@ -1582,9 +1586,23 @@ struct i915_context_engines_load_balance {
__u64 mbz[4]; /* reserved for future use; must be zero */
};
+/*
+ * i915_context_engines_bond:
+ *
+ */
+struct i915_context_engines_bond {
+ struct i915_user_extension base;
+
+ __u16 master_class;
+ __u16 master_instance;
+ __u32 flags; /* all undefined flags must be zero */
+ __u64 sibling_mask;
+};
+
struct i915_context_param_engines {
__u64 extensions; /* linked chain of extension blocks, 0 terminates */
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
+#define I915_CONTEXT_ENGINES_EXT_BOND 1
struct {
__u16 engine_class; /* see enum drm_i915_gem_engine_class */
--
2.20.1
More information about the Intel-gfx-trybot
mailing list