[Intel-gfx] [PATCH v2] drm/i915/gt: Eliminate the trylock for reading a timeline's hwsp
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Dec 17 16:56:19 UTC 2019
On 16/12/2019 17:52, Chris Wilson wrote:
> As we stash a pointer to the HWSP cacheline on the request, when reading
> it we only need confirm that the cacheline is still valid by checking
> that the request and timeline are still intact.
>
> v2: Protect hwsp_cachline with RCU
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_timeline.c | 59 +++++++++++-------------
> drivers/gpu/drm/i915/i915_request.c | 4 +-
> drivers/gpu/drm/i915/i915_request.h | 5 +-
> 3 files changed, 30 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index d71aafb66d6e..6da3f4af9614 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -25,10 +25,14 @@ struct intel_timeline_hwsp {
>
> struct intel_timeline_cacheline {
> struct i915_active active;
> +
> struct intel_timeline_hwsp *hwsp;
> void *vaddr;
> #define CACHELINE_BITS 6
> #define CACHELINE_FREE CACHELINE_BITS
> + u32 offset;
> +
> + struct rcu_head rcu;
> };
>
> static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
> @@ -133,7 +137,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
> __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
>
> i915_active_fini(&cl->active);
> - kfree(cl);
> + kfree_rcu(cl, rcu);
> }
>
> __i915_active_call
> @@ -177,6 +181,8 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
> i915_vma_get(hwsp->vma);
> cl->hwsp = hwsp;
> cl->vaddr = page_pack_bits(vaddr, cacheline);
> + cl->offset =
> + i915_ggtt_offset(cl->hwsp->vma) + cacheline * CACHELINE_BYTES;
>
> i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
>
> @@ -514,46 +520,33 @@ int intel_timeline_read_hwsp(struct i915_request *from,
> struct i915_request *to,
> u32 *hwsp)
> {
> - struct intel_timeline *tl;
> + struct intel_timeline_cacheline *cl;
> int err;
>
> + GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
> +
> rcu_read_lock();
> - tl = rcu_dereference(from->timeline);
> - if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref))
> - tl = NULL;
> + cl = rcu_dereference(from->hwsp_cacheline);
> + if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
> + goto unlock; /* seqno wrapped and completed! */
> + if (unlikely(i915_request_completed(from)))
> + goto release;
> rcu_read_unlock();
> - if (!tl) /* already completed */
> - return 1;
> -
> - GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl);
> -
> - err = -EAGAIN;
> - if (mutex_trylock(&tl->mutex)) {
> - struct intel_timeline_cacheline *cl = from->hwsp_cacheline;
> -
> - if (i915_request_completed(from)) {
> - err = 1;
> - goto unlock;
> - }
>
> - err = cacheline_ref(cl, to);
> - if (err)
> - goto unlock;
> + err = cacheline_ref(cl, to);
> + if (err)
> + goto out;
>
> - if (likely(cl == tl->hwsp_cacheline)) {
> - *hwsp = tl->hwsp_offset;
> - } else { /* across a seqno wrap, recover the original offset */
> - *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
> - ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
> - CACHELINE_BYTES;
> - }
> + *hwsp = cl->offset;
> +out:
> + i915_active_release(&cl->active);
> + return err;
>
> +release:
> + i915_active_release(&cl->active);
> unlock:
> - mutex_unlock(&tl->mutex);
> - }
> - intel_timeline_put(tl);
> -
> - return err;
> + rcu_read_unlock();
> + return 1;
> }
>
> void intel_timeline_unpin(struct intel_timeline *tl)
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index a59b803aef92..269470d3527a 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -655,9 +655,9 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
> rq->execution_mask = ce->engine->mask;
> rq->flags = 0;
>
> - rcu_assign_pointer(rq->timeline, tl);
> + RCU_INIT_POINTER(rq->timeline, tl);
> + RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
> rq->hwsp_seqno = tl->hwsp_seqno;
> - rq->hwsp_cacheline = tl->hwsp_cacheline;
>
> rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
>
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index a561b8efe869..aa38290eea3d 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -30,6 +30,7 @@
>
> #include "gt/intel_context_types.h"
> #include "gt/intel_engine_types.h"
> +#include "gt/intel_timeline_types.h"
>
> #include "i915_gem.h"
> #include "i915_scheduler.h"
> @@ -41,8 +42,6 @@
> struct drm_file;
> struct drm_i915_gem_object;
> struct i915_request;
> -struct intel_timeline;
> -struct intel_timeline_cacheline;
>
> struct i915_capture_list {
> struct i915_capture_list *next;
> @@ -183,7 +182,7 @@ struct i915_request {
> * inside the timeline's HWSP vma, but it is only valid while this
> * request has not completed and guarded by the timeline mutex.
> */
> - struct intel_timeline_cacheline *hwsp_cacheline;
> + struct intel_timeline_cacheline __rcu *hwsp_cacheline;
>
> /** Position in the ring of the start of the request */
> u32 head;
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list