[Intel-gfx] [PATCH] drm/i915: Keep timeline HWSP allocated until idle across the system
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Feb 27 14:20:25 UTC 2019
On 27/02/2019 11:15, Chris Wilson wrote:
> In preparation for enabling HW semaphores, we need to keep in flight
> timeline HWSP alive until its use across entire system has completed,
> as any other timeline active on the GPU may still refer back to the
> already retired timeline. We both have to delay recycling available
> cachelines and unpinning old HWSP until the next idle point.
>
> An easy option would be to simply keep all used HWSP until the system as
> a whole was idle, i.e. we could release them all at once on parking.
> However, on a busy system, we may never see a global idle point,
> essentially meaning the resource will be leaked until we are forced to
> do a GC pass. We already employ a fine-grained idle detection mechanism
> for vma, which we can reuse here so that each cacheline can be freed
> immediately after the last request using it is retired.
>
> v3: Keep track of the activity of each cacheline.
> v4: cacheline_free() on canceling the seqno tracking
> v5: Finally with a testcase to exercise wraparound
> v6: Pack cacheline into empty bits of page-aligned vaddr
> v7: Use i915_utils to hide the pointer casting around bit manipulation
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com> #v5
> ---
> drivers/gpu/drm/i915/i915_request.c | 31 +-
> drivers/gpu/drm/i915/i915_request.h | 11 +
> drivers/gpu/drm/i915/i915_timeline.c | 290 ++++++++++++++++--
> drivers/gpu/drm/i915/i915_timeline.h | 11 +-
> drivers/gpu/drm/i915/i915_utils.h | 3 +
> .../gpu/drm/i915/selftests/i915_timeline.c | 113 +++++++
> 6 files changed, 420 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 719d1a5ab082..d354967d6ae8 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -325,11 +325,6 @@ void i915_request_retire_upto(struct i915_request *rq)
> } while (tmp != rq);
> }
>
> -static u32 timeline_get_seqno(struct i915_timeline *tl)
> -{
> - return tl->seqno += 1 + tl->has_initial_breadcrumb;
> -}
> -
> static void move_to_timeline(struct i915_request *request,
> struct i915_timeline *timeline)
> {
> @@ -532,8 +527,10 @@ struct i915_request *
> i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> {
> struct drm_i915_private *i915 = engine->i915;
> - struct i915_request *rq;
> struct intel_context *ce;
> + struct i915_timeline *tl;
> + struct i915_request *rq;
> + u32 seqno;
> int ret;
>
> lockdep_assert_held(&i915->drm.struct_mutex);
> @@ -610,24 +607,27 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> }
> }
>
> - rq->rcustate = get_state_synchronize_rcu();
> -
> INIT_LIST_HEAD(&rq->active_list);
> +
> + tl = ce->ring->timeline;
> + ret = i915_timeline_get_seqno(tl, rq, &seqno);
> + if (ret)
> + goto err_free;
> +
> rq->i915 = i915;
> rq->engine = engine;
> rq->gem_context = ctx;
> rq->hw_context = ce;
> rq->ring = ce->ring;
> - rq->timeline = ce->ring->timeline;
> + rq->timeline = tl;
> GEM_BUG_ON(rq->timeline == &engine->timeline);
> - rq->hwsp_seqno = rq->timeline->hwsp_seqno;
> + rq->hwsp_seqno = tl->hwsp_seqno;
> + rq->hwsp_cacheline = tl->hwsp_cacheline;
> + rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
>
> spin_lock_init(&rq->lock);
> - dma_fence_init(&rq->fence,
> - &i915_fence_ops,
> - &rq->lock,
> - rq->timeline->fence_context,
> - timeline_get_seqno(rq->timeline));
> + dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
> + tl->fence_context, seqno);
>
> /* We bump the ref for the fence chain */
> i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
> @@ -687,6 +687,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
> GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
>
> +err_free:
> kmem_cache_free(global.slab_requests, rq);
> err_unreserve:
> mutex_unlock(&ce->ring->timeline->mutex);
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index be3ded6bcf56..ea1e6f0ade53 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -38,6 +38,7 @@ struct drm_file;
> struct drm_i915_gem_object;
> struct i915_request;
> struct i915_timeline;
> +struct i915_timeline_cacheline;
>
> struct i915_capture_list {
> struct i915_capture_list *next;
> @@ -148,6 +149,16 @@ struct i915_request {
> */
> const u32 *hwsp_seqno;
>
> + /*
> + * If we need to access the timeline's seqno for this request in
> + * another request, we need to keep a read reference to this associated
> + * cacheline, so that we do not free and recycle it before the foriegn
> + * observers have completed. Hence, we keep a pointer to the cacheline
> + * inside the timeline's HWSP vma, but it is only valid while this
> + * request has not completed and guarded by the timeline mutex.
> + */
> + struct i915_timeline_cacheline *hwsp_cacheline;
> +
> /** Position in the ring of the start of the request */
> u32 head;
>
> diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> index 87a80558da28..e45e81b4cab6 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_timeline.c
> @@ -6,19 +6,29 @@
>
> #include "i915_drv.h"
>
> -#include "i915_timeline.h"
> +#include "i915_active.h"
> #include "i915_syncmap.h"
> +#include "i915_timeline.h"
>
> struct i915_timeline_hwsp {
> - struct i915_vma *vma;
> + struct i915_gt_timelines *gt;
> struct list_head free_link;
> + struct i915_vma *vma;
> u64 free_bitmap;
> };
>
> -static inline struct i915_timeline_hwsp *
> -i915_timeline_hwsp(const struct i915_timeline *tl)
> +struct i915_timeline_cacheline {
> + struct i915_active active;
> + struct i915_timeline_hwsp *hwsp;
> + void *vaddr;
> +#define CACHELINE_BITS 6
> +#define CACHELINE_FREE CACHELINE_BITS
> +};
> +
> +static inline struct drm_i915_private *
> +hwsp_to_i915(struct i915_timeline_hwsp *hwsp)
> {
> - return tl->hwsp_ggtt->private;
> + return container_of(hwsp->gt, struct drm_i915_private, gt.timelines);
> }
>
> static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
> @@ -71,6 +81,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
> vma->private = hwsp;
> hwsp->vma = vma;
> hwsp->free_bitmap = ~0ull;
> + hwsp->gt = gt;
>
> spin_lock(>->hwsp_lock);
> list_add(&hwsp->free_link, >->hwsp_free_list);
> @@ -88,14 +99,9 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
> return hwsp->vma;
> }
>
> -static void hwsp_free(struct i915_timeline *timeline)
> +static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
> {
> - struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
> - struct i915_timeline_hwsp *hwsp;
> -
> - hwsp = i915_timeline_hwsp(timeline);
> - if (!hwsp) /* leave global HWSP alone! */
> - return;
> + struct i915_gt_timelines *gt = hwsp->gt;
>
> spin_lock(>->hwsp_lock);
>
> @@ -103,7 +109,8 @@ static void hwsp_free(struct i915_timeline *timeline)
> if (!hwsp->free_bitmap)
> list_add_tail(&hwsp->free_link, >->hwsp_free_list);
>
> - hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
> + GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
> + hwsp->free_bitmap |= BIT_ULL(cacheline);
>
> /* And if no one is left using it, give the page back to the system */
> if (hwsp->free_bitmap == ~0ull) {
> @@ -115,6 +122,76 @@ static void hwsp_free(struct i915_timeline *timeline)
> spin_unlock(>->hwsp_lock);
> }
>
> +static void __idle_cacheline_free(struct i915_timeline_cacheline *cl)
> +{
> + GEM_BUG_ON(!i915_active_is_idle(&cl->active));
> +
> + i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> + i915_vma_put(cl->hwsp->vma);
> + __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
> +
> + i915_active_fini(&cl->active);
> + kfree(cl);
> +}
> +
> +static void __cacheline_retire(struct i915_active *active)
> +{
> + struct i915_timeline_cacheline *cl =
> + container_of(active, typeof(*cl), active);
> +
> + i915_vma_unpin(cl->hwsp->vma);
> + if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
> + __idle_cacheline_free(cl);
> +}
> +
> +static struct i915_timeline_cacheline *
> +cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline)
> +{
> + struct i915_timeline_cacheline *cl;
> + void *vaddr;
> +
> + GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
> +
> + cl = kmalloc(sizeof(*cl), GFP_KERNEL);
> + if (!cl)
> + return ERR_PTR(-ENOMEM);
> +
> + vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
> + if (IS_ERR(vaddr)) {
> + kfree(cl);
> + return ERR_CAST(vaddr);
> + }
> +
> + i915_vma_get(hwsp->vma);
> + cl->hwsp = hwsp;
> + cl->vaddr = page_pack_bits(vaddr, cacheline);
> +
> + i915_active_init(hwsp_to_i915(hwsp), &cl->active, __cacheline_retire);
> +
> + return cl;
> +}
> +
> +static void cacheline_acquire(struct i915_timeline_cacheline *cl)
> +{
> + if (cl && i915_active_acquire(&cl->active))
> + __i915_vma_pin(cl->hwsp->vma);
> +}
> +
> +static void cacheline_release(struct i915_timeline_cacheline *cl)
> +{
> + if (cl)
> + i915_active_release(&cl->active);
> +}
> +
> +static void cacheline_free(struct i915_timeline_cacheline *cl)
> +{
> + GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
> + cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
> +
> + if (i915_active_is_idle(&cl->active))
> + __idle_cacheline_free(cl);
> +}
> +
> int i915_timeline_init(struct drm_i915_private *i915,
> struct i915_timeline *timeline,
> const char *name,
> @@ -136,29 +213,40 @@ int i915_timeline_init(struct drm_i915_private *i915,
> timeline->name = name;
> timeline->pin_count = 0;
> timeline->has_initial_breadcrumb = !hwsp;
> + timeline->hwsp_cacheline = NULL;
>
> - timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
> if (!hwsp) {
> + struct i915_timeline_cacheline *cl;
> unsigned int cacheline;
>
> hwsp = hwsp_alloc(timeline, &cacheline);
> if (IS_ERR(hwsp))
> return PTR_ERR(hwsp);
>
> + cl = cacheline_alloc(hwsp->private, cacheline);
> + if (IS_ERR(cl)) {
> + __idle_hwsp_free(hwsp->private, cacheline);
> + return PTR_ERR(cl);
> + }
> +
> + timeline->hwsp_cacheline = cl;
> timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
> - }
> - timeline->hwsp_ggtt = i915_vma_get(hwsp);
>
> - vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
> - if (IS_ERR(vaddr)) {
> - hwsp_free(timeline);
> - i915_vma_put(hwsp);
> - return PTR_ERR(vaddr);
> + vaddr = page_mask_bits(cl->vaddr);
> + } else {
> + timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
> +
> + vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
> + if (IS_ERR(vaddr))
> + return PTR_ERR(vaddr);
> }
>
> timeline->hwsp_seqno =
> memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
>
> + timeline->hwsp_ggtt = i915_vma_get(hwsp);
> + GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
> +
> timeline->fence_context = dma_fence_context_alloc(1);
>
> spin_lock_init(&timeline->lock);
> @@ -240,9 +328,12 @@ void i915_timeline_fini(struct i915_timeline *timeline)
> GEM_BUG_ON(i915_active_request_isset(&timeline->barrier));
>
> i915_syncmap_free(&timeline->sync);
> - hwsp_free(timeline);
>
> - i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
> + if (timeline->hwsp_cacheline)
> + cacheline_free(timeline->hwsp_cacheline);
> + else
> + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
> +
> i915_vma_put(timeline->hwsp_ggtt);
> }
>
> @@ -285,6 +376,7 @@ int i915_timeline_pin(struct i915_timeline *tl)
> i915_ggtt_offset(tl->hwsp_ggtt) +
> offset_in_page(tl->hwsp_offset);
>
> + cacheline_acquire(tl->hwsp_cacheline);
> timeline_add_to_active(tl);
>
> return 0;
> @@ -294,6 +386,157 @@ int i915_timeline_pin(struct i915_timeline *tl)
> return err;
> }
>
> +static u32 timeline_advance(struct i915_timeline *tl)
> +{
> + GEM_BUG_ON(!tl->pin_count);
> + GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
> +
> + return tl->seqno += 1 + tl->has_initial_breadcrumb;
> +}
> +
> +static void timeline_rollback(struct i915_timeline *tl)
> +{
> + tl->seqno -= 1 + tl->has_initial_breadcrumb;
> +}
> +
> +static noinline int
> +__i915_timeline_get_seqno(struct i915_timeline *tl,
> + struct i915_request *rq,
> + u32 *seqno)
> +{
> + struct i915_timeline_cacheline *cl;
> + unsigned int cacheline;
> + struct i915_vma *vma;
> + void *vaddr;
> + int err;
> +
> + /*
> + * If there is an outstanding GPU reference to this cacheline,
> + * such as it being sampled by a HW semaphore on another timeline,
> + * we cannot wraparound our seqno value (the HW semaphore does
> + * a strict greater-than-or-equals compare, not i915_seqno_passed).
> + * So if the cacheline is still busy, we must detach ourselves
> + * from it and leave it inflight alongside its users.
> + *
> + * However, if nobody is watching and we can guarantee that nobody
> + * will, we could simply reuse the same cacheline.
> + *
> + * if (i915_active_request_is_signaled(&tl->last_request) &&
> + * i915_active_is_signaled(&tl->hwsp_cacheline->active))
> + * return 0;
> + *
> + * That seems unlikely for a busy timeline that needed to wrap in
> + * the first place, so just replace the cacheline.
> + */
> +
> + vma = hwsp_alloc(tl, &cacheline);
> + if (IS_ERR(vma)) {
> + err = PTR_ERR(vma);
> + goto err_rollback;
> + }
> +
> + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> + if (err) {
> + __idle_hwsp_free(vma->private, cacheline);
> + goto err_rollback;
> + }
> +
> + cl = cacheline_alloc(vma->private, cacheline);
> + if (IS_ERR(cl)) {
> + err = PTR_ERR(cl);
> + __idle_hwsp_free(vma->private, cacheline);
> + goto err_unpin;
> + }
> + GEM_BUG_ON(cl->hwsp->vma != vma);
> +
> + /*
> + * Attach the old cacheline to the current request, so that we only
> + * free it after the current request is retired, which ensures that
> + * all writes into the cacheline from previous requests are complete.
> + */
> + err = i915_active_ref(&tl->hwsp_cacheline->active,
> + tl->fence_context, rq);
> + if (err)
> + goto err_cacheline;
> +
> + cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
> + cacheline_free(tl->hwsp_cacheline);
> +
> + i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
> + i915_vma_put(tl->hwsp_ggtt);
> +
> + tl->hwsp_ggtt = i915_vma_get(vma);
> +
> + vaddr = page_mask_bits(cl->vaddr);
> + tl->hwsp_offset = cacheline * CACHELINE_BYTES;
> + tl->hwsp_seqno =
> + memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
> +
> + tl->hwsp_offset += i915_ggtt_offset(vma);
> +
> + cacheline_acquire(cl);
> + tl->hwsp_cacheline = cl;
> +
> + *seqno = timeline_advance(tl);
> + GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
> + return 0;
> +
> +err_cacheline:
> + cacheline_free(cl);
> +err_unpin:
> + i915_vma_unpin(vma);
> +err_rollback:
> + timeline_rollback(tl);
> + return err;
> +}
> +
> +int i915_timeline_get_seqno(struct i915_timeline *tl,
> + struct i915_request *rq,
> + u32 *seqno)
> +{
> + *seqno = timeline_advance(tl);
> +
> + /* Replace the HWSP on wraparound for HW semaphores */
> + if (unlikely(!*seqno && tl->hwsp_cacheline))
> + return __i915_timeline_get_seqno(tl, rq, seqno);
> +
> + return 0;
> +}
> +
> +static int cacheline_ref(struct i915_timeline_cacheline *cl,
> + struct i915_request *rq)
> +{
> + return i915_active_ref(&cl->active, rq->fence.context, rq);
> +}
> +
> +int i915_timeline_read_hwsp(struct i915_request *from,
> + struct i915_request *to,
> + u32 *hwsp)
> +{
> + struct i915_timeline_cacheline *cl = from->hwsp_cacheline;
> + struct i915_timeline *tl = from->timeline;
> + int err;
> +
> + GEM_BUG_ON(to->timeline == tl);
> +
> + mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
> + err = i915_request_completed(from);
> + if (!err)
> + err = cacheline_ref(cl, to);
> + if (!err) {
> + if (likely(cl == tl->hwsp_cacheline)) {
> + *hwsp = tl->hwsp_offset;
> + } else { /* across a seqno wrap, recover the original offset */
> + *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
> + ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
> + CACHELINE_BYTES;
> + }
> + }
> + mutex_unlock(&tl->mutex);
> +
> + return err;
> +}
> +
> void i915_timeline_unpin(struct i915_timeline *tl)
> {
> GEM_BUG_ON(!tl->pin_count);
> @@ -301,6 +544,7 @@ void i915_timeline_unpin(struct i915_timeline *tl)
> return;
>
> timeline_remove_from_active(tl);
> + cacheline_release(tl->hwsp_cacheline);
>
> /*
> * Since this timeline is idle, all bariers upon which we were waiting
> diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
> index 36c3849f7108..60b1dfad93ed 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_timeline.h
> @@ -34,7 +34,7 @@
> #include "i915_utils.h"
>
> struct i915_vma;
> -struct i915_timeline_hwsp;
> +struct i915_timeline_cacheline;
>
> struct i915_timeline {
> u64 fence_context;
> @@ -51,6 +51,8 @@ struct i915_timeline {
> struct i915_vma *hwsp_ggtt;
> u32 hwsp_offset;
>
> + struct i915_timeline_cacheline *hwsp_cacheline;
> +
> bool has_initial_breadcrumb;
>
> /**
> @@ -162,8 +164,15 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
> }
>
> int i915_timeline_pin(struct i915_timeline *tl);
> +int i915_timeline_get_seqno(struct i915_timeline *tl,
> + struct i915_request *rq,
> + u32 *seqno);
> void i915_timeline_unpin(struct i915_timeline *tl);
>
> +int i915_timeline_read_hwsp(struct i915_request *from,
> + struct i915_request *until,
> + u32 *hwsp_offset);
> +
> void i915_timelines_init(struct drm_i915_private *i915);
> void i915_timelines_park(struct drm_i915_private *i915);
> void i915_timelines_fini(struct drm_i915_private *i915);
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index 9726df37c4c4..1fb9c8b2f5b3 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -92,6 +92,9 @@
> ((typeof(ptr))((unsigned long)(ptr) | __bits)); \
> })
>
> +#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
> +#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
> +
> #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
> #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
> #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
> index 12ea69b1a1e5..844701759ffc 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
> @@ -641,6 +641,118 @@ static int live_hwsp_alternate(void *arg)
> #undef NUM_TIMELINES
> }
>
> +static int live_hwsp_wrap(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct intel_engine_cs *engine;
> + struct i915_timeline *tl;
> + enum intel_engine_id id;
> + intel_wakeref_t wakeref;
> + int err = 0;
> +
> + /*
> + * Across a seqno wrap, we need to keep the old cacheline alive for
> + * foreign GPU references.
> + */
> +
> + mutex_lock(&i915->drm.struct_mutex);
> + wakeref = intel_runtime_pm_get(i915);
> +
> + tl = i915_timeline_create(i915, __func__, NULL);
> + if (IS_ERR(tl)) {
> + err = PTR_ERR(tl);
> + goto out_rpm;
> + }
> + if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
> + goto out_free;
> +
> + err = i915_timeline_pin(tl);
> + if (err)
> + goto out_free;
> +
> + for_each_engine(engine, i915, id) {
> + const u32 *hwsp_seqno[2];
> + struct i915_request *rq;
> + u32 seqno[2];
> +
> + if (!intel_engine_can_store_dword(engine))
> + continue;
> +
> + rq = i915_request_alloc(engine, i915->kernel_context);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + goto out;
> + }
> +
> + tl->seqno = -4u;
> +
> + err = i915_timeline_get_seqno(tl, rq, &seqno[0]);
> + if (err) {
> + i915_request_add(rq);
> + goto out;
> + }
> + pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
> + seqno[0], tl->hwsp_offset);
> +
> + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
> + if (err) {
> + i915_request_add(rq);
> + goto out;
> + }
> + hwsp_seqno[0] = tl->hwsp_seqno;
> +
> + err = i915_timeline_get_seqno(tl, rq, &seqno[1]);
> + if (err) {
> + i915_request_add(rq);
> + goto out;
> + }
> + pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
> + seqno[1], tl->hwsp_offset);
> +
> + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
> + if (err) {
> + i915_request_add(rq);
> + goto out;
> + }
> + hwsp_seqno[1] = tl->hwsp_seqno;
> +
> + /* With wrap should come a new hwsp */
> + GEM_BUG_ON(seqno[1] >= seqno[0]);
> + GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
> +
> + i915_request_add(rq);
> +
> + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
> + pr_err("Wait for timeline writes timed out!\n");
> + err = -EIO;
> + goto out;
> + }
> +
> + if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
> + pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
> + *hwsp_seqno[0], *hwsp_seqno[1],
> + seqno[0], seqno[1]);
> + err = -EINVAL;
> + goto out;
> + }
> +
> + i915_retire_requests(i915); /* recycle HWSP */
> + }
> +
> +out:
> + if (igt_flush_test(i915, I915_WAIT_LOCKED))
> + err = -EIO;
> +
> + i915_timeline_unpin(tl);
> +out_free:
> + i915_timeline_put(tl);
> +out_rpm:
> + intel_runtime_pm_put(i915, wakeref);
> + mutex_unlock(&i915->drm.struct_mutex);
> +
> + return err;
> +}
> +
> static int live_hwsp_recycle(void *arg)
> {
> struct drm_i915_private *i915 = arg;
> @@ -723,6 +835,7 @@ int i915_timeline_live_selftests(struct drm_i915_private *i915)
> SUBTEST(live_hwsp_recycle),
> SUBTEST(live_hwsp_engine),
> SUBTEST(live_hwsp_alternate),
> + SUBTEST(live_hwsp_wrap),
> };
>
> return i915_subtests(tests, i915);
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list