[Intel-gfx] [PATCH 08/21] drm/i915/gem: Disallow bonding of virtual engines

Wed Apr 28 10:13:52 UTC 2021

On Tue, Apr 27, 2021 at 08:51:08AM -0500, Jason Ekstrand wrote:
> On Fri, Apr 23, 2021 at 5:31 PM Jason Ekstrand <jason at jlekstrand.net> wrote:
> >
> > This adds a bunch of complexity which the media driver has never
> > actually used.  The media driver does technically bond a balanced engine
> > to another engine but the balanced engine only has one engine in the
> > sibling set.  This doesn't actually result in a virtual engine.
> >
> > Unless some userspace badly wants it, there's no good reason to support
> > this case.  This makes I915_CONTEXT_ENGINES_EXT_BOND a total no-op.  We
> > leave the validation code in place in case we ever decide we want to do
> > something interesting with the bonding information.
> >
> > Signed-off-by: Jason Ekstrand <jason at jlekstrand.net>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_context.c   |  18 +-
> >  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |   2 +-
> >  drivers/gpu/drm/i915/gt/intel_engine_types.h  |   7 -
> >  .../drm/i915/gt/intel_execlists_submission.c  | 100 --------
> >  .../drm/i915/gt/intel_execlists_submission.h  |   4 -
> >  drivers/gpu/drm/i915/gt/selftest_execlists.c  | 229 ------------------
> >  6 files changed, 7 insertions(+), 353 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index e8179918fa306..5f8d0faf783aa 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > @@ -1553,6 +1553,12 @@ set_engines__bond(struct i915_user_extension __user *base, void *data)
> >         }
> >         virtual = set->engines->engines[idx]->engine;
> >
> > +       if (intel_engine_is_virtual(virtual)) {
> > +               drm_dbg(&i915->drm,
> > +                       "Bonding with virtual engines not allowed\n");
> > +               return -EINVAL;
> > +       }
> > +
> >         err = check_user_mbz(&ext->flags);
> >         if (err)
> >                 return err;
> > @@ -1593,18 +1599,6 @@ set_engines__bond(struct i915_user_extension __user *base, void *data)
> >                                 n, ci.engine_class, ci.engine_instance);
> >                         return -EINVAL;
> >                 }
> > -
> > -               /*
> > -                * A non-virtual engine has no siblings to choose between; and
> > -                * a submit fence will always be directed to the one engine.
> > -                */
> > -               if (intel_engine_is_virtual(virtual)) {
> > -                       err = intel_virtual_engine_attach_bond(virtual,
> > -                                                              master,
> > -                                                              bond);
> > -                       if (err)
> > -                               return err;
> > -               }
> >         }
> >
> >         return 0;
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > index d640bba6ad9ab..efb2fa3522a42 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > @@ -3474,7 +3474,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> >                 if (args->flags & I915_EXEC_FENCE_SUBMIT)
> >                         err = i915_request_await_execution(eb.request,
> >                                                            in_fence,
> > -                                                          eb.engine->bond_execute);
> > +                                                          NULL);
> >                 else
> >                         err = i915_request_await_dma_fence(eb.request,
> >                                                            in_fence);
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index 883bafc449024..68cfe5080325c 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -446,13 +446,6 @@ struct intel_engine_cs {
> >          */
> >         void            (*submit_request)(struct i915_request *rq);
> >
> > -       /*
> > -        * Called on signaling of a SUBMIT_FENCE, passing along the signaling
> > -        * request down to the bonded pairs.
> > -        */
> > -       void            (*bond_execute)(struct i915_request *rq,
> > -                                       struct dma_fence *signal);
> > -
> >         /*
> >          * Call when the priority on a request has changed and it and its
> >          * dependencies may need rescheduling. Note the request itself may
> > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > index de124870af44d..b6e2b59f133b7 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > @@ -181,18 +181,6 @@ struct virtual_engine {
> >                 int prio;
> >         } nodes[I915_NUM_ENGINES];
> >
> > -       /*
> > -        * Keep track of bonded pairs -- restrictions upon on our selection
> > -        * of physical engines any particular request may be submitted to.
> > -        * If we receive a submit-fence from a master engine, we will only
> > -        * use one of sibling_mask physical engines.
> > -        */
> > -       struct ve_bond {
> > -               const struct intel_engine_cs *master;
> > -               intel_engine_mask_t sibling_mask;
> > -       } *bonds;
> > -       unsigned int num_bonds;
> > -
> >         /* And finally, which physical engines this virtual engine maps onto. */
> >         unsigned int num_siblings;
> >         struct intel_engine_cs *siblings[];
> > @@ -3307,7 +3295,6 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk)
> >         intel_breadcrumbs_free(ve->base.breadcrumbs);
> >         intel_engine_free_request_pool(&ve->base);
> >
> > -       kfree(ve->bonds);
> >         kfree(ve);
> >  }
> >
> > @@ -3560,42 +3547,6 @@ static void virtual_submit_request(struct i915_request *rq)
> >         spin_unlock_irqrestore(&ve->base.active.lock, flags);
> >  }
> >
> > -static struct ve_bond *
> > -virtual_find_bond(struct virtual_engine *ve,
> > -                 const struct intel_engine_cs *master)
> > -{
> > -       int i;
> > -
> > -       for (i = 0; i < ve->num_bonds; i++) {
> > -               if (ve->bonds[i].master == master)
> > -                       return &ve->bonds[i];
> > -       }
> > -
> > -       return NULL;
> > -}
> > -
> > -static void
> > -virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
> > -{
> > -       struct virtual_engine *ve = to_virtual_engine(rq->engine);
> > -       intel_engine_mask_t allowed, exec;
> > -       struct ve_bond *bond;
> > -
> > -       allowed = ~to_request(signal)->engine->mask;
> > -
> > -       bond = virtual_find_bond(ve, to_request(signal)->engine);
> > -       if (bond)
> > -               allowed &= bond->sibling_mask;
> > -
> > -       /* Restrict the bonded request to run on only the available engines */
> > -       exec = READ_ONCE(rq->execution_mask);
> > -       while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
> > -               ;
> > -
> > -       /* Prevent the master from being re-run on the bonded engines */
> > -       to_request(signal)->execution_mask &= ~allowed;
> 
> I sent a v2 of this patch because it turns out I deleted a bit too
> much code.  This function in particular, has to stay, unfortunately.
> When a batch is submitted with a SUBMIT_FENCE, this is used to push
> the work onto a different engine than than the one it's supposed to
> run in parallel with.  This means we can't dead-code this function or
> the bond_execution function pointer and related stuff.

Uh that's disappointing, since if I understand your point correctly, the
sibling engines should all be singletons, not load balancing virtual ones.
So there really should not be any need to pick the right one at execution
time.

At least my understanding is that we're only limiting the engine set
further, so if both signaller and signalled request can only run on
singletons (which must be distinct, or the bonded parameter validation is
busted) there's really nothing to do here.

Also this is the locking code that freaks me out about the current bonded
execlist code ...

Dazzled and confused.
-Daniel

> 
> --Jason
> 
> 
> > -}
> > -
> >  struct intel_context *
> >  intel_execlists_create_virtual(struct intel_engine_cs **siblings,
> >                                unsigned int count)
> > @@ -3649,7 +3600,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
> >
> >         ve->base.schedule = i915_schedule;
> >         ve->base.submit_request = virtual_submit_request;
> > -       ve->base.bond_execute = virtual_bond_execute;
> >
> >         INIT_LIST_HEAD(virtual_queue(ve));
> >         ve->base.execlists.queue_priority_hint = INT_MIN;
> > @@ -3747,59 +3697,9 @@ intel_execlists_clone_virtual(struct intel_engine_cs *src)
> >         if (IS_ERR(dst))
> >                 return dst;
> >
> > -       if (se->num_bonds) {
> > -               struct virtual_engine *de = to_virtual_engine(dst->engine);
> > -
> > -               de->bonds = kmemdup(se->bonds,
> > -                                   sizeof(*se->bonds) * se->num_bonds,
> > -                                   GFP_KERNEL);
> > -               if (!de->bonds) {
> > -                       intel_context_put(dst);
> > -                       return ERR_PTR(-ENOMEM);
> > -               }
> > -
> > -               de->num_bonds = se->num_bonds;
> > -       }
> > -
> >         return dst;
> >  }
> >
> > -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> > -                                    const struct intel_engine_cs *master,
> > -                                    const struct intel_engine_cs *sibling)
> > -{
> > -       struct virtual_engine *ve = to_virtual_engine(engine);
> > -       struct ve_bond *bond;
> > -       int n;
> > -
> > -       /* Sanity check the sibling is part of the virtual engine */
> > -       for (n = 0; n < ve->num_siblings; n++)
> > -               if (sibling == ve->siblings[n])
> > -                       break;
> > -       if (n == ve->num_siblings)
> > -               return -EINVAL;
> > -
> > -       bond = virtual_find_bond(ve, master);
> > -       if (bond) {
> > -               bond->sibling_mask |= sibling->mask;
> > -               return 0;
> > -       }
> > -
> > -       bond = krealloc(ve->bonds,
> > -                       sizeof(*bond) * (ve->num_bonds + 1),
> > -                       GFP_KERNEL);
> > -       if (!bond)
> > -               return -ENOMEM;
> > -
> > -       bond[ve->num_bonds].master = master;
> > -       bond[ve->num_bonds].sibling_mask = sibling->mask;
> > -
> > -       ve->bonds = bond;
> > -       ve->num_bonds++;
> > -
> > -       return 0;
> > -}
> > -
> >  void intel_execlists_show_requests(struct intel_engine_cs *engine,
> >                                    struct drm_printer *m,
> >                                    void (*show_request)(struct drm_printer *m,
> > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
> > index fd61dae820e9e..80cec37a56ba9 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
> > @@ -39,10 +39,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
> >  struct intel_context *
> >  intel_execlists_clone_virtual(struct intel_engine_cs *src);
> >
> > -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> > -                                    const struct intel_engine_cs *master,
> > -                                    const struct intel_engine_cs *sibling);
> > -
> >  bool
> >  intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
> >
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> > index 1081cd36a2bd3..f03446d587160 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> > @@ -4311,234 +4311,6 @@ static int live_virtual_preserved(void *arg)
> >         return 0;
> >  }
> >
> > -static int bond_virtual_engine(struct intel_gt *gt,
> > -                              unsigned int class,
> > -                              struct intel_engine_cs **siblings,
> > -                              unsigned int nsibling,
> > -                              unsigned int flags)
> > -#define BOND_SCHEDULE BIT(0)
> > -{
> > -       struct intel_engine_cs *master;
> > -       struct i915_request *rq[16];
> > -       enum intel_engine_id id;
> > -       struct igt_spinner spin;
> > -       unsigned long n;
> > -       int err;
> > -
> > -       /*
> > -        * A set of bonded requests is intended to be run concurrently
> > -        * across a number of engines. We use one request per-engine
> > -        * and a magic fence to schedule each of the bonded requests
> > -        * at the same time. A consequence of our current scheduler is that
> > -        * we only move requests to the HW ready queue when the request
> > -        * becomes ready, that is when all of its prerequisite fences have
> > -        * been signaled. As one of those fences is the master submit fence,
> > -        * there is a delay on all secondary fences as the HW may be
> > -        * currently busy. Equally, as all the requests are independent,
> > -        * they may have other fences that delay individual request
> > -        * submission to HW. Ergo, we do not guarantee that all requests are
> > -        * immediately submitted to HW at the same time, just that if the
> > -        * rules are abided by, they are ready at the same time as the
> > -        * first is submitted. Userspace can embed semaphores in its batch
> > -        * to ensure parallel execution of its phases as it requires.
> > -        * Though naturally it gets requested that perhaps the scheduler should
> > -        * take care of parallel execution, even across preemption events on
> > -        * different HW. (The proper answer is of course "lalalala".)
> > -        *
> > -        * With the submit-fence, we have identified three possible phases
> > -        * of synchronisation depending on the master fence: queued (not
> > -        * ready), executing, and signaled. The first two are quite simple
> > -        * and checked below. However, the signaled master fence handling is
> > -        * contentious. Currently we do not distinguish between a signaled
> > -        * fence and an expired fence, as once signaled it does not convey
> > -        * any information about the previous execution. It may even be freed
> > -        * and hence checking later it may not exist at all. Ergo we currently
> > -        * do not apply the bonding constraint for an already signaled fence,
> > -        * as our expectation is that it should not constrain the secondaries
> > -        * and is outside of the scope of the bonded request API (i.e. all
> > -        * userspace requests are meant to be running in parallel). As
> > -        * it imposes no constraint, and is effectively a no-op, we do not
> > -        * check below as normal execution flows are checked extensively above.
> > -        *
> > -        * XXX Is the degenerate handling of signaled submit fences the
> > -        * expected behaviour for userpace?
> > -        */
> > -
> > -       GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
> > -
> > -       if (igt_spinner_init(&spin, gt))
> > -               return -ENOMEM;
> > -
> > -       err = 0;
> > -       rq[0] = ERR_PTR(-ENOMEM);
> > -       for_each_engine(master, gt, id) {
> > -               struct i915_sw_fence fence = {};
> > -               struct intel_context *ce;
> > -
> > -               if (master->class == class)
> > -                       continue;
> > -
> > -               ce = intel_context_create(master);
> > -               if (IS_ERR(ce)) {
> > -                       err = PTR_ERR(ce);
> > -                       goto out;
> > -               }
> > -
> > -               memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
> > -
> > -               rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
> > -               intel_context_put(ce);
> > -               if (IS_ERR(rq[0])) {
> > -                       err = PTR_ERR(rq[0]);
> > -                       goto out;
> > -               }
> > -               i915_request_get(rq[0]);
> > -
> > -               if (flags & BOND_SCHEDULE) {
> > -                       onstack_fence_init(&fence);
> > -                       err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
> > -                                                              &fence,
> > -                                                              GFP_KERNEL);
> > -               }
> > -
> > -               i915_request_add(rq[0]);
> > -               if (err < 0)
> > -                       goto out;
> > -
> > -               if (!(flags & BOND_SCHEDULE) &&
> > -                   !igt_wait_for_spinner(&spin, rq[0])) {
> > -                       err = -EIO;
> > -                       goto out;
> > -               }
> > -
> > -               for (n = 0; n < nsibling; n++) {
> > -                       struct intel_context *ve;
> > -
> > -                       ve = intel_execlists_create_virtual(siblings, nsibling);
> > -                       if (IS_ERR(ve)) {
> > -                               err = PTR_ERR(ve);
> > -                               onstack_fence_fini(&fence);
> > -                               goto out;
> > -                       }
> > -
> > -                       err = intel_virtual_engine_attach_bond(ve->engine,
> > -                                                              master,
> > -                                                              siblings[n]);
> > -                       if (err) {
> > -                               intel_context_put(ve);
> > -                               onstack_fence_fini(&fence);
> > -                               goto out;
> > -                       }
> > -
> > -                       err = intel_context_pin(ve);
> > -                       intel_context_put(ve);
> > -                       if (err) {
> > -                               onstack_fence_fini(&fence);
> > -                               goto out;
> > -                       }
> > -
> > -                       rq[n + 1] = i915_request_create(ve);
> > -                       intel_context_unpin(ve);
> > -                       if (IS_ERR(rq[n + 1])) {
> > -                               err = PTR_ERR(rq[n + 1]);
> > -                               onstack_fence_fini(&fence);
> > -                               goto out;
> > -                       }
> > -                       i915_request_get(rq[n + 1]);
> > -
> > -                       err = i915_request_await_execution(rq[n + 1],
> > -                                                          &rq[0]->fence,
> > -                                                          ve->engine->bond_execute);
> > -                       i915_request_add(rq[n + 1]);
> > -                       if (err < 0) {
> > -                               onstack_fence_fini(&fence);
> > -                               goto out;
> > -                       }
> > -               }
> > -               onstack_fence_fini(&fence);
> > -               intel_engine_flush_submission(master);
> > -               igt_spinner_end(&spin);
> > -
> > -               if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
> > -                       pr_err("Master request did not execute (on %s)!\n",
> > -                              rq[0]->engine->name);
> > -                       err = -EIO;
> > -                       goto out;
> > -               }
> > -
> > -               for (n = 0; n < nsibling; n++) {
> > -                       if (i915_request_wait(rq[n + 1], 0,
> > -                                             MAX_SCHEDULE_TIMEOUT) < 0) {
> > -                               err = -EIO;
> > -                               goto out;
> > -                       }
> > -
> > -                       if (rq[n + 1]->engine != siblings[n]) {
> > -                               pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
> > -                                      siblings[n]->name,
> > -                                      rq[n + 1]->engine->name,
> > -                                      rq[0]->engine->name);
> > -                               err = -EINVAL;
> > -                               goto out;
> > -                       }
> > -               }
> > -
> > -               for (n = 0; !IS_ERR(rq[n]); n++)
> > -                       i915_request_put(rq[n]);
> > -               rq[0] = ERR_PTR(-ENOMEM);
> > -       }
> > -
> > -out:
> > -       for (n = 0; !IS_ERR(rq[n]); n++)
> > -               i915_request_put(rq[n]);
> > -       if (igt_flush_test(gt->i915))
> > -               err = -EIO;
> > -
> > -       igt_spinner_fini(&spin);
> > -       return err;
> > -}
> > -
> > -static int live_virtual_bond(void *arg)
> > -{
> > -       static const struct phase {
> > -               const char *name;
> > -               unsigned int flags;
> > -       } phases[] = {
> > -               { "", 0 },
> > -               { "schedule", BOND_SCHEDULE },
> > -               { },
> > -       };
> > -       struct intel_gt *gt = arg;
> > -       struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> > -       unsigned int class;
> > -       int err;
> > -
> > -       if (intel_uc_uses_guc_submission(&gt->uc))
> > -               return 0;
> > -
> > -       for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> > -               const struct phase *p;
> > -               int nsibling;
> > -
> > -               nsibling = select_siblings(gt, class, siblings);
> > -               if (nsibling < 2)
> > -                       continue;
> > -
> > -               for (p = phases; p->name; p++) {
> > -                       err = bond_virtual_engine(gt,
> > -                                                 class, siblings, nsibling,
> > -                                                 p->flags);
> > -                       if (err) {
> > -                               pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
> > -                                      __func__, p->name, class, nsibling, err);
> > -                               return err;
> > -                       }
> > -               }
> > -       }
> > -
> > -       return 0;
> > -}
> > -
> >  static int reset_virtual_engine(struct intel_gt *gt,
> >                                 struct intel_engine_cs **siblings,
> >                                 unsigned int nsibling)
> > @@ -4712,7 +4484,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
> >                 SUBTEST(live_virtual_mask),
> >                 SUBTEST(live_virtual_preserved),
> >                 SUBTEST(live_virtual_slice),
> > -               SUBTEST(live_virtual_bond),
> >                 SUBTEST(live_virtual_reset),
> >         };
> >
> > --
> > 2.31.1
> >
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch