[Intel-gfx] [PATCH 12/13] drm/i915/execlists: Virtual engine bonding

Mon Mar 11 14:30:17 UTC 2019

Quoting Tvrtko Ursulin (2019-03-11 13:38:52)
> 
> On 08/03/2019 14:12, Chris Wilson wrote:
> > @@ -3191,12 +3198,30 @@ static void virtual_submission_tasklet(unsigned long data)
> >               return;
> >   
> >       local_irq_disable();
> > +
> > +     mask = 0;
> > +     spin_lock(&ve->base.timeline.lock);
> > +     if (ve->request)
> > +             mask = ve->request->execution_mask;
> > +     spin_unlock(&ve->base.timeline.lock);
> > +
> >       for (n = 0; READ_ONCE(ve->request) && n < ve->count; n++) {
> >               struct intel_engine_cs *sibling = ve->siblings[n];
> >               struct ve_node * const node = &ve->nodes[sibling->id];
> >               struct rb_node **parent, *rb;
> >               bool first;
> > 
> 
> /* Check if request can be executed on this sibling. Because it cannot 
> be known in advance (at insert to queue time).. ? */

This is insert-to-queue. You mean i915_request_add()? You definitely
don't know which engines all of your partners will be running on at that
point.

> > +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> > +                                  struct intel_engine_cs *master,
> > +                                  unsigned long mask)
> > +{
> > +     struct virtual_engine *ve = to_virtual_engine(engine);
> > +     struct ve_bond *bond;
> > +
> > +     if (mask >> ve->count)
> > +             return -EINVAL;
> > +
> > +     mask = virtual_execution_mask(ve, mask);
> > +     if (!mask)
> > +             return -EINVAL;
> > +
> > +     bond = virtual_find_bond(ve, master);
> > +     if (bond) {
> > +             bond->sibling_mask |= mask;
> > +             return 0;
> > +     }
> > +
> > +     bond = krealloc(ve->bonds,
> > +                     sizeof(*bond) * (ve->nbond + 1),
> > +                     GFP_KERNEL);
> > +     if (!bond)
> > +             return -ENOMEM;
> > +
> > +     bond[ve->nbond].master = master;
> > +     bond[ve->nbond].sibling_mask = mask;
> > +
> > +     ve->bonds = bond;
> > +     ve->nbond++;
> > +
> > +     return 0;
> > +}
> > +
> >   void intel_virtual_engine_destroy(struct intel_engine_cs *engine)
> >   {
> 
> I think there should be a hunk in here to free ve->bonds.

Might be sensible indeed.

> > +     err = 0;
> > +     rq[0] = ERR_PTR(-ENOMEM);
> > +     for_each_engine(master, i915, id) {
> > +             struct i915_sw_fence fence;
> > +
> > +             if (master->class == class)
> > +                     continue;
> > +
> > +             rq[0] = i915_request_alloc(master, ctx);
> > +             if (IS_ERR(rq[0])) {
> > +                     err = PTR_ERR(rq[0]);
> > +                     goto out;
> > +             }
> > +
> > +             if (flags & BOND_SCHEDULE)
> > +                     onstack_fence_init(&fence);
> > +
> > +             i915_request_get(rq[0]);
> > +             i915_request_add(rq[0]);
> > +
> > +             for (n = 0; n < nsibling; n++) {
> > +                     struct intel_engine_cs *engine;
> > +
> > +                     engine = intel_execlists_create_virtual(ctx,
> > +                                                             siblings,
> > +                                                             nsibling);
> > +                     if (IS_ERR(engine)) {
> > +                             err = PTR_ERR(engine);
> > +                             goto out;
> > +                     }
> > +
> > +                     err = intel_virtual_engine_attach_bond(engine,
> > +                                                            master,
> > +                                                            BIT(n));
> > +                     if (err) {
> > +                             intel_virtual_engine_destroy(engine);
> > +                             goto out;
> > +                     }
> > +
> > +                     rq[n + 1] = i915_request_alloc(engine, ctx);
> > +                     if (IS_ERR(rq[n + 1])) {
> > +                             err = PTR_ERR(rq[n + 1]);
> > +                             intel_virtual_engine_destroy(engine);
> > +                             goto out;
> > +                     }
> > +                     i915_request_get(rq[n + 1]);
> > +
> > +                     err = i915_request_await_execution(rq[n + 1],
> > +                                                        &rq[0]->fence,
> > +                                                        engine->bond_execute);
> > +                     i915_request_add(rq[n + 1]);
> > +                     intel_virtual_engine_destroy(engine);
> > +                     if (err < 0)
> > +                             goto out;
> > +             }
> > +             rq[n + 1] = ERR_PTR(-EINVAL);
> > +
> > +             if (flags & BOND_SCHEDULE)
> > +                     onstack_fence_fini(&fence);
> 
> The idea of this fence is to delay rq[0]? But I don't see it passed in 
> anywhere? I wanted to suggest to check before onstack_fence_fini that 
> rq[0], or any other request, haven't been submitted/completed yet.

Aye. It's meant to be added just after onstack_fence_init().

> > +/*
> > + * i915_context_engines_bond:
> > + *
> > + */
> > +struct i915_context_engines_bond {
> > +     struct i915_user_extension base;
> > +
> > +     __u16 engine_index;
> > +     __u16 mbz;
> > +
> > +     __u16 master_class;
> > +     __u16 master_instance;
> > +
> > +     __u64 sibling_mask;
> > +     __u64 flags; /* all undefined flags must be zero */
> > +};
> 
> Need some api doc for it all.

I knew there was a patch that I kept skipping.

> We are not in danger of any compiler added padding due not leaving any 
> holes, right? So no need to define this as packed?

This is naturally aligned to u64, and that's the largest type used, so
no holes and no padding, and no silly games with variable length arrays.
-Chris