[Intel-gfx] [PATCH] drm/i915/gt: Use the local HWSP offset during submission
Mika Kuoppala
mika.kuoppala at linux.intel.com
Fri Oct 23 11:25:08 UTC 2020
Chris Wilson <chris at chris-wilson.co.uk> writes:
> We wrap the timeline on construction of the next request, but there may
> still be requests in flight that have not yet finalized the breadcrumb.
> (The breadcrumb is delayed as we need engine-local offsets, and for the
> virtual engine that is not known until execution.) As such, by the time
> we write to the timeline's HWSP offset it may have changed, and we
> should use the value we preserved in the request instead.
>
> Though the window is small and infrequent (at full flow we can expect a
> timeline's seqno to wrap once every 30 minutes), the impact of writing
> the old seqno into the new HWSP is severe: the old requests are never
> completed, and the new requests are completed before they are even
> submitted.
>
> Fixes: ebece7539242 ("drm/i915: Keep timeline HWSP allocated until idle across the system")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: <stable at vger.kernel.org> # v5.2+
> ---
> drivers/gpu/drm/i915/gt/intel_lrc.c | 27 +++++++++++++------
> drivers/gpu/drm/i915/gt/intel_timeline.c | 18 +++++++------
> .../gpu/drm/i915/gt/intel_timeline_types.h | 2 ++
> 3 files changed, 31 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index c22d47cc6701..d0be98b67138 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3597,6 +3597,19 @@ static const struct intel_context_ops execlists_context_ops = {
> .destroy = execlists_context_destroy,
> };
>
> +static u32 hwsp_offset(const struct i915_request *rq)
> +{
> + const struct intel_timeline_cacheline *cl;
> +
> + /* Before the request is executed, the timeline/cachline is fixed */
s/cachline/cacheline
Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> +
> + cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
> + if (cl)
> + return cl->ggtt_offset;
> +
> + return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
> +}
> +
> static int gen8_emit_init_breadcrumb(struct i915_request *rq)
> {
> u32 *cs;
> @@ -3619,7 +3632,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
> *cs++ = MI_NOOP;
>
> *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> - *cs++ = i915_request_timeline(rq)->hwsp_offset;
> + *cs++ = hwsp_offset(rq);
> *cs++ = 0;
> *cs++ = rq->fence.seqno - 1;
>
> @@ -4939,11 +4952,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
> return gen8_emit_wa_tail(request, cs);
> }
>
> -static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
> +static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
> {
> - u32 addr = i915_request_active_timeline(request)->hwsp_offset;
> -
> - return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
> + return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
> }
>
> static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
> @@ -4962,7 +4973,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
> cs = gen8_emit_ggtt_write_rcs(cs,
> request->fence.seqno,
> - i915_request_active_timeline(request)->hwsp_offset,
> + hwsp_offset(request),
> PIPE_CONTROL_FLUSH_ENABLE |
> PIPE_CONTROL_CS_STALL);
>
> @@ -4974,7 +4985,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> {
> cs = gen8_emit_ggtt_write_rcs(cs,
> request->fence.seqno,
> - i915_request_active_timeline(request)->hwsp_offset,
> + hwsp_offset(request),
> PIPE_CONTROL_CS_STALL |
> PIPE_CONTROL_TILE_CACHE_FLUSH |
> PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> @@ -5044,7 +5055,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> {
> cs = gen12_emit_ggtt_write_rcs(cs,
> request->fence.seqno,
> - i915_request_active_timeline(request)->hwsp_offset,
> + hwsp_offset(request),
> PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
> PIPE_CONTROL_CS_STALL |
> PIPE_CONTROL_TILE_CACHE_FLUSH |
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index a2f74cefe4c3..7ea94d201fe6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -188,10 +188,14 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
> return cl;
> }
>
> -static void cacheline_acquire(struct intel_timeline_cacheline *cl)
> +static void cacheline_acquire(struct intel_timeline_cacheline *cl,
> + u32 ggtt_offset)
> {
> - if (cl)
> - i915_active_acquire(&cl->active);
> + if (!cl)
> + return;
> +
> + cl->ggtt_offset = ggtt_offset;
> + i915_active_acquire(&cl->active);
> }
>
> static void cacheline_release(struct intel_timeline_cacheline *cl)
> @@ -340,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
> GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
> tl->fence_context, tl->hwsp_offset);
>
> - cacheline_acquire(tl->hwsp_cacheline);
> + cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
> if (atomic_fetch_inc(&tl->pin_count)) {
> cacheline_release(tl->hwsp_cacheline);
> __i915_vma_unpin(tl->hwsp_ggtt);
> @@ -515,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
> GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
> tl->fence_context, tl->hwsp_offset);
>
> - cacheline_acquire(cl);
> + cacheline_acquire(cl, tl->hwsp_offset);
> tl->hwsp_cacheline = cl;
>
> *seqno = timeline_advance(tl);
> @@ -573,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
> if (err)
> goto out;
>
> - *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
> - ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
> -
> + *hwsp = cl->ggtt_offset;
> out:
> i915_active_release(&cl->active);
> return err;
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> index 02181c5020db..4474f487f589 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> @@ -94,6 +94,8 @@ struct intel_timeline_cacheline {
> struct intel_timeline_hwsp *hwsp;
> void *vaddr;
>
> + u32 ggtt_offset;
> +
> struct rcu_head rcu;
> };
>
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list