[Intel-gfx] [PATCH v2 2/4] drm/i915: Verify the engine workarounds stick on application

Tue Apr 16 10:36:12 UTC 2019

On 16/04/2019 10:57, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-04-16 10:43:43)
>>
>> On 16/04/2019 09:10, Chris Wilson wrote:
>>> Read the engine workarounds back using the GPU after loading the initial
>>> context state to verify that we are setting them correctly, and bail if
>>> it fails.
>>>
>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem.c               |   6 +
>>>    drivers/gpu/drm/i915/intel_workarounds.c      | 120 ++++++++++++++++++
>>>    drivers/gpu/drm/i915/intel_workarounds.h      |   2 +
>>>    .../drm/i915/selftests/intel_workarounds.c    |  53 +-------
>>>    4 files changed, 134 insertions(+), 47 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index 0a818a60ad31..95ae69753e91 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -4717,6 +4717,12 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
>>>                i915_request_add(rq);
>>>                if (err)
>>>                        goto err_active;
>>> +
>>> +             if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) &&
>>> +                 intel_engine_verify_workarounds(engine, "load")) {
>>> +                     err = -EIO;
>>> +                     goto err_active;
>>
>> The two submission use different contexts timelines so strictly speaking
>> should be somehow explicitly serialized.
> 
> Yes, we are reading from the kernel context. Neither of these contexts
> will overwrite the existing register state on first load, and there is
> nothing to prevent fifo here. But it doesn't matter which order the read
> is done, as the w/a are set by mmio, not engine->init_context().
> Although I've been contemplating using init_context in case LRI work
> better.

Using init_context for what, engine wa/?

> 
> It could be in a separate loop beforehand, the only catch is that we
> expect to leave this function with the GT idle, so it's best done before
> we park.

Yep since the placement in the ->init_context loop confused me I think a 
separate loop would indeed be better.

__i915_gem_restart_engines looks like the place, to follow 
engine->init_hw which applies the engine workarounds.

>>> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
>>> index 1c54b5030807..db99f2e676bb 100644
>>> --- a/drivers/gpu/drm/i915/intel_workarounds.c
>>> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
>>> @@ -1259,6 +1259,126 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
>>>        wa_list_apply(engine->uncore, &engine->wa_list);
>>>    }
>>>    
>>> +static struct i915_vma *
>>> +create_scratch(struct i915_address_space *vm, int count)
>>> +{
>>> +     struct drm_i915_gem_object *obj;
>>> +     struct i915_vma *vma;
>>> +     unsigned int size;
>>> +     int err;
>>> +
>>> +     size = round_up(count * 4, PAGE_SIZE);
>>
>> sizeof(u32) if you want.
>>
>>> +     obj = i915_gem_object_create_internal(vm->i915, size);
>>> +     if (IS_ERR(obj))
>>> +             return ERR_CAST(obj);
>>> +
>>> +     i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
>>> +
>>> +     vma = i915_vma_instance(obj, vm, NULL);
>>> +     if (IS_ERR(vma)) {
>>> +             err = PTR_ERR(vma);
>>> +             goto err_obj;
>>> +     }
>>> +
>>> +     err = i915_vma_pin(vma, 0, 0,
>>> +                        i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
>>> +     if (err)
>>> +             goto err_obj;
>>> +
>>> +     return vma;
>>> +
>>> +err_obj:
>>> +     i915_gem_object_put(obj);
>>> +     return ERR_PTR(err);
>>> +}
>>> +
>>> +static int
>>> +wa_list_srm(struct i915_request *rq,
>>> +         const struct i915_wa_list *wal,
>>> +         struct i915_vma *vma)
>>> +{
>>> +     const struct i915_wa *wa;
>>> +     u32 srm, *cs;
>>> +     int i;
>>
>> A little bit of inconsistency between type of i here and in
>> engine_wa_list_verify. Up to you.
>>
>>> +
>>> +     srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
>>> +     if (INTEL_GEN(rq->i915) >= 8)
>>> +             srm++;
>>> +
>>> +     cs = intel_ring_begin(rq, 4 * wal->count);
>>> +     if (IS_ERR(cs))
>>> +             return PTR_ERR(cs);
>>> +
>>> +     for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
>>> +             *cs++ = srm;
>>> +             *cs++ = i915_mmio_reg_offset(wa->reg);
>>> +             *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
>>> +             *cs++ = 0;
>>> +     }
>>> +     intel_ring_advance(rq, cs);
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +static int engine_wa_list_verify(struct intel_engine_cs *engine,
>>> +                              const struct i915_wa_list * const wal,
>>> +                              const char *from)
>>> +{
>>> +     const struct i915_wa *wa;
>>> +     struct i915_request *rq;
>>> +     struct i915_vma *vma;
>>> +     unsigned int i;
>>> +     u32 *results;
>>> +     int err;
>>> +
>>> +     if (!wal->count)
>>> +             return 0;
>>> +
>>> +     vma = create_scratch(&engine->i915->ggtt.vm, wal->count);
>>> +     if (IS_ERR(vma))
>>> +             return PTR_ERR(vma);
>>> +
>>> +     rq = i915_request_alloc(engine, engine->kernel_context->gem_context);
>>> +     if (IS_ERR(rq)) {
>>> +             err = PTR_ERR(rq);
>>> +             goto err_vma;
>>> +     }
>>> +
>>> +     err = wa_list_srm(rq, wal, vma);
>>> +     if (err)
>>> +             goto err_vma;
>>> +
>>> +     i915_request_add(rq);
>>> +     if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
>>
>> Wouldn't:
>>
>> err = i915_request_wait(...
>> if (err < 0 || !i915_request_completed())
>>
>> be more correct?
> 
> It can't return for any other reason as it's an uninterruptible wait, so
> it times out, or the request is completed.
> 
> If you want to consider that hangcheck could have kicked in the 0.2ms
> and cancelled the request, then we could need to check
> request->fence.error.

I was going by the comment for i915_request_wait which says "Returns the 
remaining time (in jiffies) if the request completed, which may be zero 
or -ETIME if the request is unfinished after the timeout expires.". So 
two timeout conditions. Anyways, I know it is highly hypothetical if that.

> 
>>
>> Okay so MMIO verification happens immediately after application and with
>> SRM from selftests.
>>
>> What about the context workarounds? With the SRM infrastructure those
>> could be verified easily as well I think.
> 
> They can, one problem at at time :) I thought the context workarounds
> were listed and checked by gem_workarounds. Aye, but only ctx_wa:
> 
> static int i915_wa_registers(struct seq_file *m, void *unused)
> {
>          struct drm_i915_private *i915 = node_to_i915(m->private);
>          const struct i915_wa_list *wal = &i915->engine[RCS0]->ctx_wa_list;
>          struct i915_wa *wa;
>          unsigned int i;
> 
>          seq_printf(m, "Workarounds applied: %u\n", wal->count);
>          for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
>                  seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
>                             i915_mmio_reg_offset(wa->reg), wa->val, wa->mask);
> 
>          return 0;
> }
> 
> 
> 

Yes one thing at a time is fine. :)

Regards,

Tvrtko