[Intel-gfx] [PATCH v2 2/4] drm/i915: Verify the engine workarounds stick on application

Tue Apr 16 09:57:45 UTC 2019

Quoting Tvrtko Ursulin (2019-04-16 10:43:43)
> 
> On 16/04/2019 09:10, Chris Wilson wrote:
> > Read the engine workarounds back using the GPU after loading the initial
> > context state to verify that we are setting them correctly, and bail if
> > it fails.
> > 
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c               |   6 +
> >   drivers/gpu/drm/i915/intel_workarounds.c      | 120 ++++++++++++++++++
> >   drivers/gpu/drm/i915/intel_workarounds.h      |   2 +
> >   .../drm/i915/selftests/intel_workarounds.c    |  53 +-------
> >   4 files changed, 134 insertions(+), 47 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 0a818a60ad31..95ae69753e91 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -4717,6 +4717,12 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
> >               i915_request_add(rq);
> >               if (err)
> >                       goto err_active;
> > +
> > +             if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) &&
> > +                 intel_engine_verify_workarounds(engine, "load")) {
> > +                     err = -EIO;
> > +                     goto err_active;
> 
> The two submission use different contexts timelines so strictly speaking 
> should be somehow explicitly serialized.

Yes, we are reading from the kernel context. Neither of these contexts
will overwrite the existing register state on first load, and there is
nothing to prevent fifo here. But it doesn't matter which order the read
is done, as the w/a are set by mmio, not engine->init_context().
Although I've been contemplating using init_context in case LRI work
better.

It could be in a separate loop beforehand, the only catch is that we
expect to leave this function with the GT idle, so it's best done before
we park.

> > diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> > index 1c54b5030807..db99f2e676bb 100644
> > --- a/drivers/gpu/drm/i915/intel_workarounds.c
> > +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> > @@ -1259,6 +1259,126 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
> >       wa_list_apply(engine->uncore, &engine->wa_list);
> >   }
> >   
> > +static struct i915_vma *
> > +create_scratch(struct i915_address_space *vm, int count)
> > +{
> > +     struct drm_i915_gem_object *obj;
> > +     struct i915_vma *vma;
> > +     unsigned int size;
> > +     int err;
> > +
> > +     size = round_up(count * 4, PAGE_SIZE);
> 
> sizeof(u32) if you want.
> 
> > +     obj = i915_gem_object_create_internal(vm->i915, size);
> > +     if (IS_ERR(obj))
> > +             return ERR_CAST(obj);
> > +
> > +     i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
> > +
> > +     vma = i915_vma_instance(obj, vm, NULL);
> > +     if (IS_ERR(vma)) {
> > +             err = PTR_ERR(vma);
> > +             goto err_obj;
> > +     }
> > +
> > +     err = i915_vma_pin(vma, 0, 0,
> > +                        i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
> > +     if (err)
> > +             goto err_obj;
> > +
> > +     return vma;
> > +
> > +err_obj:
> > +     i915_gem_object_put(obj);
> > +     return ERR_PTR(err);
> > +}
> > +
> > +static int
> > +wa_list_srm(struct i915_request *rq,
> > +         const struct i915_wa_list *wal,
> > +         struct i915_vma *vma)
> > +{
> > +     const struct i915_wa *wa;
> > +     u32 srm, *cs;
> > +     int i;
> 
> A little bit of inconsistency between type of i here and in 
> engine_wa_list_verify. Up to you.
> 
> > +
> > +     srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
> > +     if (INTEL_GEN(rq->i915) >= 8)
> > +             srm++;
> > +
> > +     cs = intel_ring_begin(rq, 4 * wal->count);
> > +     if (IS_ERR(cs))
> > +             return PTR_ERR(cs);
> > +
> > +     for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
> > +             *cs++ = srm;
> > +             *cs++ = i915_mmio_reg_offset(wa->reg);
> > +             *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
> > +             *cs++ = 0;
> > +     }
> > +     intel_ring_advance(rq, cs);
> > +
> > +     return 0;
> > +}
> > +
> > +static int engine_wa_list_verify(struct intel_engine_cs *engine,
> > +                              const struct i915_wa_list * const wal,
> > +                              const char *from)
> > +{
> > +     const struct i915_wa *wa;
> > +     struct i915_request *rq;
> > +     struct i915_vma *vma;
> > +     unsigned int i;
> > +     u32 *results;
> > +     int err;
> > +
> > +     if (!wal->count)
> > +             return 0;
> > +
> > +     vma = create_scratch(&engine->i915->ggtt.vm, wal->count);
> > +     if (IS_ERR(vma))
> > +             return PTR_ERR(vma);
> > +
> > +     rq = i915_request_alloc(engine, engine->kernel_context->gem_context);
> > +     if (IS_ERR(rq)) {
> > +             err = PTR_ERR(rq);
> > +             goto err_vma;
> > +     }
> > +
> > +     err = wa_list_srm(rq, wal, vma);
> > +     if (err)
> > +             goto err_vma;
> > +
> > +     i915_request_add(rq);
> > +     if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
> 
> Wouldn't:
> 
> err = i915_request_wait(...
> if (err < 0 || !i915_request_completed())
> 
> be more correct?

It can't return for any other reason as it's an uninterruptible wait, so
it times out, or the request is completed.

If you want to consider that hangcheck could have kicked in the 0.2ms
and cancelled the request, then we could need to check
request->fence.error.

> 
> Okay so MMIO verification happens immediately after application and with 
> SRM from selftests.
> 
> What about the context workarounds? With the SRM infrastructure those 
> could be verified easily as well I think.

They can, one problem at at time :) I thought the context workarounds
were listed and checked by gem_workarounds. Aye, but only ctx_wa:

static int i915_wa_registers(struct seq_file *m, void *unused)
{
        struct drm_i915_private *i915 = node_to_i915(m->private);
        const struct i915_wa_list *wal = &i915->engine[RCS0]->ctx_wa_list;
        struct i915_wa *wa;
        unsigned int i;

        seq_printf(m, "Workarounds applied: %u\n", wal->count);
        for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
                seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
                           i915_mmio_reg_offset(wa->reg), wa->val, wa->mask);

        return 0;
}