[Intel-gfx] [PATCH 16/39] drm/i915/execlists: Virtual engine bonding

Thu Mar 14 17:26:19 UTC 2019

On 13/03/2019 14:43, Chris Wilson wrote:
> Some users require that when a master batch is executed on one particular
> engine, a companion batch is run simultaneously on a specific slave
> engine. For this purpose, we introduce virtual engine bonding, allowing
> maps of master:slaves to be constructed to constrain which physical
> engines a virtual engine may select given a fence on a master engine.
> 
> For the moment, we continue to ignore the issue of preemption deferring
> the master request for later. Ideally, we would like to then also remove
> the slave and run something else rather than have it stall the pipeline.
> With load balancing, we should be able to move workload around it, but
> there is a similar stall on the master pipeline while it may wait for
> the slave to be executed. At the cost of more latency for the bonded
> request, it may be interesting to launch both on their engines in
> lockstep. (Bubbles abound.)
> 
> Opens: Also what about bonding an engine as its own master? It doesn't
> break anything internally, so allow the silliness.
> 
> v2: Emancipate the bonds
> v3: Couple in delayed scheduling for the selftests
> v4: Handle invalid mutually exclusive bonding
> v5: Mention what the uapi does
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c       |  50 +++++
>   drivers/gpu/drm/i915/i915_request.c           |   1 +
>   drivers/gpu/drm/i915/i915_request.h           |   1 +
>   drivers/gpu/drm/i915/intel_engine_types.h     |   7 +
>   drivers/gpu/drm/i915/intel_lrc.c              | 143 ++++++++++++++
>   drivers/gpu/drm/i915/intel_lrc.h              |   4 +
>   drivers/gpu/drm/i915/selftests/intel_lrc.c    | 185 ++++++++++++++++++
>   drivers/gpu/drm/i915/selftests/lib_sw_fence.c |   3 +
>   include/uapi/drm/i915_drm.h                   |  33 ++++
>   9 files changed, 427 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 98763d3f1b12..0ec78c386473 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1513,8 +1513,58 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
>   	return 0;
>   }
>   
> +static int
> +set_engines__bond(struct i915_user_extension __user *base, void *data)
> +{
> +	struct i915_context_engines_bond __user *ext =
> +		container_of_user(base, typeof(*ext), base);
> +	const struct set_engines *set = data;
> +	struct intel_engine_cs *master;
> +	u32 class, instance, siblings;

u16 class, instance for no real gain.

> +	u16 idx;
> +	int err;
> +
> +	if (get_user(idx, &ext->virtual_index))
> +		return -EFAULT;
> +
> +	if (idx >= set->nengine)
> +		return -EINVAL;
> +
> +	idx = array_index_nospec(idx, set->nengine);
> +	if (!set->engines[idx])
> +		return -EINVAL;
> +
> +	/*
> +	 * A non-virtual engine has 0 siblings to choose between; and submit
> +	 * fence will always be directed to the one engine.
> +	 */
> +	if (!intel_engine_is_virtual(set->engines[idx]))
> +		return 0;
> +
> +	err = check_user_mbz(&ext->mbz);
> +	if (err)
> +		return err;
> +
> +	if (get_user(class, &ext->master_class))
> +		return -EFAULT;
> +
> +	if (get_user(instance, &ext->master_instance))
> +		return -EFAULT;
> +
> +	master = intel_engine_lookup_user(set->ctx->i915, class, instance);
> +	if (!master)
> +		return -EINVAL;
> +
> +	if (get_user(siblings, &ext->sibling_mask))
> +		return -EFAULT;
> +
> +	return intel_virtual_engine_attach_bond(set->engines[idx],
> +						master, siblings);
> +}
> +
>   static const i915_user_extension_fn set_engines__extensions[] = {
>   	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
> +	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
>   };
>   
>   static int
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 0a46f8113f5c..9ce710baa452 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -743,6 +743,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	rq->batch = NULL;
>   	rq->capture_list = NULL;
>   	rq->waitboost = false;
> +	rq->execution_mask = ~0u;
>   
>   	/*
>   	 * Reserve space in the ring buffer for all the commands required to
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index d4f6b2940130..862b25930de0 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -145,6 +145,7 @@ struct i915_request {
>   	 */
>   	struct i915_sched_node sched;
>   	struct i915_dependency dep;
> +	unsigned int execution_mask;
>   
>   	/*
>   	 * A convenience pointer to the current breadcrumb value stored in
> diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
> index 322fbda65190..1da35509d811 100644
> --- a/drivers/gpu/drm/i915/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/intel_engine_types.h
> @@ -384,6 +384,13 @@ struct intel_engine_cs {
>   	 */
>   	void		(*submit_request)(struct i915_request *rq);
>   
> +	/*
> +	 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
> +	 * request down to the bonded pairs.
> +	 */
> +	void            (*bond_execute)(struct i915_request *rq,
> +					struct dma_fence *signal);
> +
>   	/*
>   	 * Call when the priority on a request has changed and it and its
>   	 * dependencies may need rescheduling. Note the request itself may
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d54b7cc43633..79ab4bc543fd 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -191,6 +191,18 @@ struct virtual_engine {
>   		int prio;
>   	} nodes[I915_NUM_ENGINES];
>   
> +	/*
> +	 * Keep track of bonded pairs -- restrictions upon on our selection
> +	 * of physical engines any particular request may be submitted to.
> +	 * If we receive a submit-fence from a master engine, we will only
> +	 * use one of sibling_mask physical engines.
> +	 */
> +	struct ve_bond {
> +		struct intel_engine_cs *master;
> +		unsigned int sibling_mask;
> +	} *bonds;
> +	unsigned int nbond;

I'll tempt to pluralize this as well.

> +
>   	/* And finally, which physical engines this virtual engine maps onto. */
>   	unsigned int count;
>   	struct intel_engine_cs *siblings[0];
> @@ -3156,6 +3168,8 @@ static void virtual_context_destroy(struct kref *kref)
>   	if (ve->context.state)
>   		__execlists_context_fini(&ve->context);
>   
> +	kfree(ve->bonds);
> +
>   	i915_timeline_fini(&ve->base.timeline);
>   	kfree(ve);
>   }
> @@ -3207,9 +3221,28 @@ static const struct intel_context_ops virtual_context_ops = {
>   	.destroy = virtual_context_destroy,
>   };
>   
> +static void virtual_submit_error(struct virtual_engine *ve)
> +{
> +	struct i915_request *rq = fetch_and_zero(&ve->request);
> +	struct intel_engine_cs *engine = ve->siblings[0]; /* any victim */
> +
> +	dma_fence_set_error(&rq->fence, -ENODEV);
> +	i915_request_mark_complete(rq);
> +
> +	spin_lock(&engine->timeline.lock);
> +	rq->engine = engine;
> +
> +	__i915_request_submit(rq);
> +	/* move to start of list to avoid unwind complications */
> +	list_move(&rq->link, &engine->timeline.requests);
> +
> +	spin_unlock(&engine->timeline.lock);
> +}
> +
>   static void virtual_submission_tasklet(unsigned long data)
>   {
>   	struct virtual_engine * const ve = (struct virtual_engine *)data;
> +	unsigned int mask;
>   	unsigned int n;
>   	int prio;
>   
> @@ -3218,12 +3251,35 @@ static void virtual_submission_tasklet(unsigned long data)
>   		return;
>   
>   	local_irq_disable();
> +
> +	mask = 0;
> +	spin_lock(&ve->base.timeline.lock);
> +	if (ve->request) {
> +		mask = ve->request->execution_mask;
> +		if (unlikely(!mask))
> +			virtual_submit_error(ve);

What clears the mask? And virtual_submit_error fails it then?

> +	}
> +	spin_unlock(&ve->base.timeline.lock);
> +	if (!mask)
> +		goto out_irq;
> +
>   	for (n = 0; READ_ONCE(ve->request) && n < ve->count; n++) {
>   		struct intel_engine_cs *sibling = ve->siblings[n];
>   		struct ve_node * const node = &ve->nodes[sibling->id];
>   		struct rb_node **parent, *rb;
>   		bool first;
>   
> +		if (unlikely(!(mask & sibling->mask))) {
> +			if (!RB_EMPTY_NODE(&node->rb)) {
> +				spin_lock(&sibling->timeline.lock);
> +				rb_erase_cached(&node->rb,
> +						&sibling->execlists.virtual);
> +				RB_CLEAR_NODE(&node->rb);
> +				spin_unlock(&sibling->timeline.lock);
> +			}
> +			continue;
> +		}
> +
>   		spin_lock(&sibling->timeline.lock);
>   
>   		if (!RB_EMPTY_NODE(&node->rb)) {
> @@ -3270,6 +3326,7 @@ static void virtual_submission_tasklet(unsigned long data)
>   
>   		spin_unlock(&sibling->timeline.lock);
>   	}
> +out_irq:
>   	local_irq_enable();
>   }
>   
> @@ -3286,6 +3343,30 @@ static void virtual_submit_request(struct i915_request *request)
>   	tasklet_schedule(&ve->base.execlists.tasklet);
>   }
>   
> +static struct ve_bond *
> +virtual_find_bond(struct virtual_engine *ve, struct intel_engine_cs *master)
> +{
> +	int i;
> +
> +	for (i = 0; i < ve->nbond; i++) {
> +		if (ve->bonds[i].master == master)
> +			return &ve->bonds[i];
> +	}
> +
> +	return NULL;
> +}
> +
> +static void
> +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(rq->engine);
> +	struct ve_bond *bond;
> +
> +	bond = virtual_find_bond(ve, to_request(signal)->engine);
> +	if (bond) /* XXX serialise with rq->lock? */
> +		rq->execution_mask &= bond->sibling_mask;
> +}
> +
>   struct intel_engine_cs *
>   intel_execlists_create_virtual(struct i915_gem_context *ctx,
>   			       struct intel_engine_cs **siblings,
> @@ -3324,6 +3405,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
>   
>   	ve->base.schedule = i915_schedule;
>   	ve->base.submit_request = virtual_submit_request;
> +	ve->base.bond_execute = virtual_bond_execute;
>   
>   	ve->base.execlists.queue_priority_hint = INT_MIN;
>   	tasklet_init(&ve->base.execlists.tasklet,
> @@ -3414,9 +3496,70 @@ intel_execlists_clone_virtual(struct i915_gem_context *ctx,
>   	if (IS_ERR(dst))
>   		return dst;
>   
> +	if (se->nbond) {
> +		struct virtual_engine *de = to_virtual_engine(dst);
> +
> +		de->bonds = kmemdup(se->bonds,
> +				    sizeof(*se->bonds) * se->nbond,
> +				    GFP_KERNEL);
> +		if (!de->bonds) {
> +			intel_virtual_engine_destroy(dst);
> +			return ERR_PTR(-ENOMEM);
> +		}
> +
> +		de->nbond = se->nbond;
> +	}
> +
>   	return dst;
>   }
>   
> +static unsigned long
> +virtual_execution_mask(struct virtual_engine *ve, unsigned long mask)
> +{
> +	unsigned long emask = 0;
> +	int bit;
> +
> +	for_each_set_bit(bit, &mask, ve->count)
> +		emask |= ve->siblings[bit]->mask;
> +
> +	return emask;
> +}
> +
> +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> +				     struct intel_engine_cs *master,
> +				     unsigned long mask)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(engine);
> +	struct ve_bond *bond;
> +
> +	if (mask >> ve->count)
> +		return -EINVAL;
> +
> +	mask = virtual_execution_mask(ve, mask);
> +	if (!mask)
> +		return -EINVAL;
> +
> +	bond = virtual_find_bond(ve, master);
> +	if (bond) {
> +		bond->sibling_mask |= mask;
> +		return 0;
> +	}
> +
> +	bond = krealloc(ve->bonds,
> +			sizeof(*bond) * (ve->nbond + 1),
> +			GFP_KERNEL);
> +	if (!bond)
> +		return -ENOMEM;
> +
> +	bond[ve->nbond].master = master;
> +	bond[ve->nbond].sibling_mask = mask;
> +
> +	ve->bonds = bond;
> +	ve->nbond++;
> +
> +	return 0;
> +}
> +
>   void intel_virtual_engine_destroy(struct intel_engine_cs *engine)
>   {
>   	struct virtual_engine *ve = to_virtual_engine(engine);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 9d90dc68e02b..77b85648045a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -121,6 +121,10 @@ struct intel_engine_cs *
>   intel_execlists_clone_virtual(struct i915_gem_context *ctx,
>   			      struct intel_engine_cs *src);
>   
> +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> +				     struct intel_engine_cs *master,
> +				     unsigned long mask);
> +
>   void intel_virtual_engine_destroy(struct intel_engine_cs *engine);
>   
>   u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 4b8a339529d1..d2e00175c417 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -13,6 +13,7 @@
>   #include "igt_live_test.h"
>   #include "igt_spinner.h"
>   #include "i915_random.h"
> +#include "lib_sw_fence.h"
>   
>   #include "mock_context.h"
>   
> @@ -1224,6 +1225,189 @@ static int live_virtual_engine(void *arg)
>   	return err;
>   }
>   
> +static int bond_virtual_engine(struct drm_i915_private *i915,
> +			       unsigned int class,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int nsibling,
> +			       unsigned int flags)
> +#define BOND_SCHEDULE BIT(0)
> +{
> +	struct intel_engine_cs *master;
> +	struct i915_gem_context *ctx;
> +	struct i915_request *rq[16];
> +	enum intel_engine_id id;
> +	unsigned long n;
> +	int err;
> +
> +	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
> +
> +	ctx = kernel_context(i915);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	err = 0;
> +	rq[0] = ERR_PTR(-ENOMEM);
> +	for_each_engine(master, i915, id) {
> +		struct i915_sw_fence fence = {};
> +
> +		if (master->class == class)
> +			continue;
> +
> +		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
> +
> +		rq[0] = i915_request_alloc(master, ctx);
> +		if (IS_ERR(rq[0])) {
> +			err = PTR_ERR(rq[0]);
> +			goto out;
> +		}
> +		i915_request_get(rq[0]);
> +
> +		if (flags & BOND_SCHEDULE) {
> +			onstack_fence_init(&fence);
> +			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
> +							       &fence,
> +							       GFP_KERNEL);
> +		}
> +		i915_request_add(rq[0]);
> +		if (err < 0)
> +			goto out;
> +
> +		for (n = 0; n < nsibling; n++) {
> +			struct intel_engine_cs *engine;
> +
> +			engine = intel_execlists_create_virtual(ctx,
> +								siblings,
> +								nsibling);
> +			if (IS_ERR(engine)) {
> +				err = PTR_ERR(engine);
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +
> +			err = intel_virtual_engine_attach_bond(engine,
> +							       master,
> +							       BIT(n));
> +			if (err) {
> +				intel_virtual_engine_destroy(engine);
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +
> +			rq[n + 1] = i915_request_alloc(engine, ctx);
> +			if (IS_ERR(rq[n + 1])) {
> +				err = PTR_ERR(rq[n + 1]);
> +				intel_virtual_engine_destroy(engine);
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +			i915_request_get(rq[n + 1]);
> +
> +			err = i915_request_await_execution(rq[n + 1],
> +							   &rq[0]->fence,
> +							   engine->bond_execute);
> +			i915_request_add(rq[n + 1]);
> +			intel_virtual_engine_destroy(engine);
> +			if (err < 0) {
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +		}
> +		onstack_fence_fini(&fence);
> +
> +		if (i915_request_wait(rq[0],
> +					I915_WAIT_LOCKED,
> +					HZ / 10) < 0) {
> +			pr_err("Master request did not execute (on %s)!\n",
> +			       rq[0]->engine->name);
> +			err = -EIO;
> +			goto out;
> +		}
> +
> +		for (n = 0; n < nsibling; n++) {
> +			if (i915_request_wait(rq[n + 1],
> +					      I915_WAIT_LOCKED,
> +					      MAX_SCHEDULE_TIMEOUT) < 0) {
> +				err = -EIO;
> +				goto out;
> +			}
> +
> +			if (rq[n + 1]->engine != siblings[n]) {
> +				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
> +				       siblings[n]->name,
> +				       rq[n + 1]->engine->name,
> +				       rq[0]->engine->name);
> +				err = -EINVAL;
> +				goto out;
> +			}
> +		}
> +
> +		for (n = 0; !IS_ERR(rq[n]); n++)
> +			i915_request_put(rq[n]);
> +		rq[0] = ERR_PTR(-ENOMEM);
> +	}
> +
> +out:
> +	for (n = 0; !IS_ERR(rq[n]); n++)
> +		i915_request_put(rq[n]);
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	kernel_context_close(ctx);
> +	return err;
> +}
> +
> +static int live_virtual_bond(void *arg)
> +{
> +	static const struct phase {
> +		const char *name;
> +		unsigned int flags;
> +	} phases[] = {
> +		{ "", 0 },
> +		{ "schedule", BOND_SCHEDULE },
> +		{ },
> +	};
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	unsigned int class, inst;
> +	int err = 0;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> +		const struct phase *p;
> +		int nsibling;
> +
> +		nsibling = 0;
> +		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> +			if (!i915->engine_class[class][inst])
> +				break;
> +
> +			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
> +			siblings[nsibling++] = i915->engine_class[class][inst];
> +		}
> +		if (nsibling < 2)
> +			continue;
> +
> +		for (p = phases; p->name; p++) {
> +			err = bond_virtual_engine(i915,
> +						  class, siblings, nsibling,
> +						  p->flags);
> +			if (err) {
> +				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
> +				       __func__, p->name, class, nsibling, err);
> +				goto out_unlock;
> +			}
> +		}
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -1236,6 +1420,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt_hang),
>   		SUBTEST(live_preempt_smoke),
>   		SUBTEST(live_virtual_engine),
> +		SUBTEST(live_virtual_bond),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
> diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
> index 2bfa72c1654b..b976c12817c5 100644
> --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
> +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
> @@ -45,6 +45,9 @@ void __onstack_fence_init(struct i915_sw_fence *fence,
>   
>   void onstack_fence_fini(struct i915_sw_fence *fence)
>   {
> +	if (!fence->flags)
> +		return;
> +
>   	i915_sw_fence_commit(fence);
>   	i915_sw_fence_fini(fence);
>   }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index c057536df5cd..ed33b8af8692 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1532,6 +1532,10 @@ struct drm_i915_gem_context_param {
>    * sized argument, will revert back to default settings.
>    *
>    * See struct i915_context_param_engines.
> + *
> + * Extensions:
> + *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
> + *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
>    */
>   #define I915_CONTEXT_PARAM_ENGINES	0xa
>   /* Must be kept compact -- no holes and well documented */
> @@ -1627,9 +1631,38 @@ struct i915_context_engines_load_balance {
>   	__u64 mbz64[4]; /* reserved for future use; must be zero */
>   };
>   
> +/*
> + * i915_context_engines_bond:
> + *
> + * Constructed bonded pairs for execution within a virtual engine.
> + *
> + * All engines are equal, but some are more equal than others. Given
> + * the distribution of resources in the HW, it may be preferable to run
> + * a request on a given subset of engines in parallel to a request on a
> + * specific engine. We enable this selection of engines within a virtual
> + * engine by specifying bonding pairs, for any given master engine we will
> + * only execute on one of the corresponding siblings within the virtual engine.
> + *
> + * To execute a request in parallel on the master engine and a sibling requires
> + * coordination with a I915_EXEC_FENCE_SUBMIT.
> + */
> +struct i915_context_engines_bond {
> +	struct i915_user_extension base;
> +
> +	__u16 virtual_index; /* index of virtual engine in ctx->engines[] */
> +	__u16 mbz;
> +
> +	__u16 master_class;
> +	__u16 master_instance;
> +
> +	__u64 sibling_mask; /* bitmask of BIT(sibling_index) wrt the v.engine */
> +	__u64 flags; /* all undefined flags must be zero */
> +};
> +
>   struct i915_context_param_engines {
>   	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
>   #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
> +#define I915_CONTEXT_ENGINES_EXT_BOND 1
>   
>   	struct {
>   		__u16 engine_class; /* see enum drm_i915_gem_engine_class */
> 

Regards,

Tvrtko