[Intel-gfx] [PATCH 1/2] drm/i915: Remove i915_request.lock requirement for execution callbacks
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Jul 15 13:36:58 UTC 2020
On 15/07/2020 13:48, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-07-15 13:39:56)
>>
>> On 14/07/2020 10:47, Chris Wilson wrote:
>>> We are using the i915_request.lock to serialise adding an execution
>>> callback with __i915_request_submit. However, if we use an atomic
>>> llist_add to serialise multiple waiters and then check to see if the
>>> request is already executing, we can remove the irq-spinlock.
>>>
>>> Fixes: 1d9221e9d395 ("drm/i915: Skip signaling a signaled request")
>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_request.c | 38 +++++++----------------------
>>> 1 file changed, 9 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>> index 0b2fe55e6194..c59315def07d 100644
>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>> @@ -190,13 +190,11 @@ static void __notify_execute_cb(struct i915_request *rq)
>>> {
>>> struct execute_cb *cb, *cn;
>>>
>>> - lockdep_assert_held(&rq->lock);
>>> -
>>> - GEM_BUG_ON(!i915_request_is_active(rq));
>>> if (llist_empty(&rq->execute_cb))
>>> return;
>>>
>>> - llist_for_each_entry_safe(cb, cn, rq->execute_cb.first, work.llnode)
>>> + llist_for_each_entry_safe(cb, cn,
>>> + llist_del_all(&rq->execute_cb), work.llnode)
>>> irq_work_queue(&cb->work);
>>>
>>> /*
>>> @@ -209,7 +207,6 @@ static void __notify_execute_cb(struct i915_request *rq)
>>> * preempt-to-idle cycle on the target engine, all the while the
>>> * master execute_cb may refire.
>>> */
>>> - init_llist_head(&rq->execute_cb);
>>> }
>>>
>>> static inline void
>>> @@ -276,6 +273,7 @@ static void remove_from_engine(struct i915_request *rq)
>>> list_del_init(&rq->sched.link);
>>> clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>>> clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
>>> + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
>>> spin_unlock_irq(&locked->active.lock);
>>> }
>>>
>>> @@ -323,12 +321,8 @@ bool i915_request_retire(struct i915_request *rq)
>>> GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
>>> atomic_dec(&rq->engine->gt->rps.num_waiters);
>>> }
>>> - if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
>>> - set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
>>> - __notify_execute_cb(rq);
>>> - }
>>> - GEM_BUG_ON(!llist_empty(&rq->execute_cb));
>>> spin_unlock_irq(&rq->lock);
>>> + __notify_execute_cb(rq);
>>>
>>> remove_from_client(rq);
>>> __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
>>> @@ -357,12 +351,6 @@ void i915_request_retire_upto(struct i915_request *rq)
>>> } while (i915_request_retire(tmp) && tmp != rq);
>>> }
>>>
>>> -static void __llist_add(struct llist_node *node, struct llist_head *head)
>>> -{
>>> - node->next = head->first;
>>> - head->first = node;
>>> -}
>>> -
>>> static struct i915_request * const *
>>> __engine_active(struct intel_engine_cs *engine)
>>> {
>>> @@ -439,18 +427,11 @@ __await_execution(struct i915_request *rq,
>>> cb->work.func = irq_execute_cb_hook;
>>> }
>>>
>>> - spin_lock_irq(&signal->lock);
>>> - if (i915_request_is_active(signal) || __request_in_flight(signal)) {
>>> - if (hook) {
>>> - hook(rq, &signal->fence);
>>> - i915_request_put(signal);
>>> - }
>>> - i915_sw_fence_complete(cb->fence);
>>> - kmem_cache_free(global.slab_execute_cbs, cb);
>>> - } else {
>>> - __llist_add(&cb->work.llnode, &signal->execute_cb);
>>> + if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
>>> + if (i915_request_is_active(signal) ||
>>> + __request_in_flight(signal))
>>> + __notify_execute_cb(signal);
>>
>> Any reason why the hook couldn't be called straight away but needs to
>> always go through the worker now?
>>
>> Maybe it would be easier to figure out if it is race free that way..
>>
>> if (llist_add(..)) {
>> llist_for_each_entry_safe(.., llist_del_all(..), .)
>
> Then you would tell me off for open coding __notify_execute_cb for the
> benefit of not going through the irq_work. Something like
>
> @@ -186,7 +186,7 @@ static void irq_execute_cb_hook(struct irq_work *wrk)
> irq_execute_cb(wrk);
> }
>
> -static void __notify_execute_cb(struct i915_request *rq)
> +static void __execute_cb_irq(struct i915_request *rq)
> {
> struct execute_cb *cb, *cn;
>
> @@ -209,6 +209,15 @@ static void __notify_execute_cb(struct i915_request *rq)
> */
> }
>
> +static void __execute_cb_imm(struct i915_request *rq)
> +{
> + struct execute_cb *cb, *cn;
> +
> + llist_for_each_entry_safe(cb, cn,
> + llist_del_all(&rq->execute_cb), work.llnode)
> + cb->work.func(&cb->work);
> +}
> +
> static inline void
> remove_from_client(struct i915_request *request)
> {
> @@ -323,7 +332,7 @@ bool i915_request_retire(struct i915_request *rq)
> atomic_dec(&rq->engine->gt->rps.num_waiters);
> }
> spin_unlock_irq(&rq->lock);
> - __notify_execute_cb(rq);
> + __execute_cb_imm(rq);
>
> remove_from_client(rq);
> __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
> @@ -431,7 +440,7 @@ __await_execution(struct i915_request *rq,
> if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
> if (i915_request_is_active(signal) ||
> __request_in_flight(signal))
> - __notify_execute_cb(signal);
> + __execute_cb_imm(signal);
> }
>
> return 0;
> @@ -547,7 +556,7 @@ bool __i915_request_submit(struct i915_request *request)
> list_move_tail(&request->sched.link, &engine->active.requests);
> clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
> }
> - __notify_execute_cb(request);
> + __execute_cb_irq(request);
>
> /* We may be recursing from the signal callback of another i915 fence */
> if (!i915_request_signaled(request)) {
I think so. IRQ callback can call the imm version and check for if
(..func) needs to go somewhere.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list