[Intel-gfx] [PATCH 2/2] drm/i915: Unshare the idle-barrier from other kernel requests

Mon Jul 29 09:43:31 UTC 2019

On 29/07/2019 09:54, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-07-29 08:46:59)
>>
>> On 25/07/2019 14:14, Chris Wilson wrote:
>>> +static int __live_remote_context(struct intel_engine_cs *engine,
>>> +                              struct i915_gem_context *fixme)
>>> +{
>>> +     struct intel_context *local, *remote;
>>> +     struct i915_request *rq;
>>> +     int pass;
>>> +     int err;
>>> +
>>> +     /*
>>> +      * Check that our idle barriers do not interfere with normal
>>> +      * activity tracking. In particular, check that operating
>>> +      * on the context image remotely (intel_context_prepare_remote_request)
>>> +      * which inserts foriegn fences into intel_context.active are not
>>
>> typo in foreign
>>
>> "operating ... are not .." ? Foreign fences are not clobbered?
> 
> , does not clobber the active tracking.
> 
>>
>>> +      * clobbered.
>>> +      */
>>> +
>>> +     remote = intel_context_create(fixme, engine);
>>> +     if (!remote)
>>> +             return -ENOMEM;
>>> +
>>> +     local = intel_context_create(fixme, engine);
>>> +     if (!local) {
>>> +             err = -ENOMEM;
>>> +             goto err_remote;
>>> +     }
>>> +
>>> +     for (pass = 0; pass <= 2; pass++) {
>>> +             err = intel_context_pin(remote);
>>> +             if (err)
>>> +                     goto err_local;
>>> +
>>> +             rq = intel_context_create_request(local);
>>> +             if (IS_ERR(rq)) {
>>> +                     err = PTR_ERR(rq);
>>> +                     goto err_unpin;
>>> +             }
>>> +
>>> +             err = intel_context_prepare_remote_request(remote, rq);
>>> +             if (err) {
>>> +                     i915_request_add(rq);
>>> +                     goto err_unpin;
>>> +             }
>>> +
>>> +             err = request_sync(rq);
>>> +             if (err)
>>> +                     goto err_unpin;
>>> +
>>> +             intel_context_unpin(remote);
>>> +             err = intel_context_pin(remote);
>>
>> unpin-pin is to trigger transfer of idle barriers and maybe trigger some
>> asserts?
> 
> unpin is to trigger the idle-barrier. The pin is just the start of the
> next phase with another context. At first I tried doing two remote
> requests within on pin-phase, but that doesn't hit the bug. It needed
> the idle barrier in the middle of the test, not between passes.
> 
> v2 wrapped it with another subroutine so the unpin-pin is not so
> glaringly obvious.

v*2*? :D Hard to know without revisioning and multiple sends... I've 
found it now, I've been looking at a very different version here.

> 
>>> +             if (err)
>>> +                     goto err_local;
>>> +
>>> +             rq = i915_request_create(engine->kernel_context);
>>
>> Why a request on kernel context here, a third context?
> 
> The kernel_context is most important since that's the one used by the
> idle barrier. I included the normal context as well for completeness as
> the intel_context_prepare_remote_request() interface should not assume
> it is working from the kernel context.

Most important = worthy of a comment? ;)

> 
>>> +             if (IS_ERR(rq)) {
>>> +                     err = PTR_ERR(rq);
>>> +                     goto err_unpin;
>>> +             }
>>> +
>>> +             err = intel_context_prepare_remote_request(remote, rq);
>>> +             if (err) {
>>> +                     i915_request_add(rq);
>>> +                     goto err_unpin;
>>> +             }
>>> +
>>> +             err = request_sync(rq);
>>> +             if (err)
>>> +                     goto err_unpin;
>>> +
>>> +             intel_context_unpin(remote);
>>> +
>>> +             if (i915_active_is_idle(&remote->active)) {
>>> +                     pr_err("remote context is not active; expected idle-barrier (pass %d)\n", pass);
>>> +                     err = -EINVAL;
>>> +                     goto err_local;
>>> +             }
>>> +     }
>>> +
>>> +     goto err_local;
>>> +
>>> +err_unpin:
>>> +     intel_context_unpin(remote);
>>> +err_local:
>>> +     intel_context_put(local);
>>> +err_remote:
>>> +     intel_context_put(remote);
>>> +     return err;
>>> +}
>>> +
>>> +static int live_remote_context(void *arg)
>>> +{
>>> +     struct intel_gt *gt = arg;
>>> +     struct intel_engine_cs *engine;
>>> +     struct i915_gem_context *fixme;
>>> +     enum intel_engine_id id;
>>> +     struct drm_file *file;
>>> +     int err = 0;
>>> +
>>> +     file = mock_file(gt->i915);
>>> +     if (IS_ERR(file))
>>> +             return PTR_ERR(file);
>>> +
>>> +     mutex_lock(&gt->i915->drm.struct_mutex);
>>> +
>>> +     fixme = live_context(gt->i915, file);
>>> +     if (!fixme) {
>>> +             err = -ENOMEM;
>>> +             goto unlock;
>>> +     }
>>> +
>>> +     for_each_engine(engine, gt->i915, id) {
>>> +             err = __live_remote_context(engine, fixme);
>>> +             if (err)
>>> +                     break;
>>> +
>>> +             err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
>>> +             if (err)
>>> +                     break;
>>> +     }
>>> +
>>> +unlock:
>>> +     mutex_unlock(&gt->i915->drm.struct_mutex);
>>> +     mock_file_free(gt->i915, file);
>>> +     return err;
>>> +}
>>> +
>>> +int intel_context_live_selftests(struct drm_i915_private *i915)
>>> +{
>>> +     static const struct i915_subtest tests[] = {
>>> +             SUBTEST(live_active_context),
>>> +             SUBTEST(live_remote_context),
>>> +     };
>>> +     struct intel_gt *gt = &i915->gt;
>>> +
>>> +     if (intel_gt_is_wedged(gt))
>>> +             return 0;
>>> +
>>> +     return intel_gt_live_subtests(tests, gt);
>>> +}
>>> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
>>> index 13f304a29fc8..e04afb519264 100644
>>> --- a/drivers/gpu/drm/i915/i915_active.c
>>> +++ b/drivers/gpu/drm/i915/i915_active.c
>>> @@ -184,6 +184,7 @@ active_instance(struct i915_active *ref, u64 idx)
>>>        ref->cache = node;
>>>        mutex_unlock(&ref->mutex);
>>>    
>>> +     BUILD_BUG_ON(offsetof(typeof(*node), base));
>>>        return &node->base;
>>>    }
>>>    
>>> @@ -212,6 +213,8 @@ int i915_active_ref(struct i915_active *ref,
>>>        struct i915_active_request *active;
>>>        int err;
>>>    
>>> +     GEM_BUG_ON(!timeline); /* reserved for idle-barrier */
>>> +
>>>        /* Prevent reaping in case we malloc/wait while building the tree */
>>>        err = i915_active_acquire(ref);
>>>        if (err)
>>> @@ -222,6 +225,7 @@ int i915_active_ref(struct i915_active *ref,
>>>                err = -ENOMEM;
>>>                goto out;
>>>        }
>>> +     GEM_BUG_ON(IS_ERR(active->request));
>>>    
>>>        if (!i915_active_request_isset(active))
>>>                atomic_inc(&ref->count);
>>> @@ -342,6 +346,34 @@ void i915_active_fini(struct i915_active *ref)
>>>    }
>>>    #endif
>>>    
>>> +static struct active_node *idle_barrier(struct i915_active *ref)
>>> +{
>>> +     struct active_node *idle = NULL;
>>> +     struct rb_node *rb;
>>> +
>>> +     if (RB_EMPTY_ROOT(&ref->tree))
>>> +             return NULL;
>>> +
>>> +     mutex_lock(&ref->mutex);
>>> +     for (rb = rb_first(&ref->tree); rb; rb = rb_next(rb)) {
>>> +             struct active_node *node;
>>> +
>>> +             node = rb_entry(rb, typeof(*node), node);
>>> +             if (node->timeline)
>>> +                     break;
>>> +
>>> +             if (!i915_active_request_isset(&node->base)) {
>>> +                     GEM_BUG_ON(!list_empty(&node->base.link));
>>> +                     rb_erase(rb, &ref->tree);
>>> +                     idle = node;
>>> +                     break;
>>> +             }
>>
>> Under what circumstances does the walk continue? There can be two idle
>> barriers (timeline == 0) in the tree?
> 
> Yes, there can be more than one (virtual engines). It should be the case
> that when i915_active becomes idle (all idle barriers are idle) that the
> tree is reaped. But... if we overlap active phases, we will get multiple
> idle barriers, some idle, some active, which we want to reuse to avoid
> having a potentially unbounded allocation.

This feels is comment worthy as well.

Regards,

Tvrtko