[Intel-gfx] [PATCH v2 1/3] drm/i915: Mark i915_request.timeline as a volatile, rcu pointer

Thu Sep 19 13:02:19 UTC 2019

On 19/09/2019 12:19, Chris Wilson wrote:
> The request->timeline is only valid until the request is retired (i.e.
> before it is completed). Upon retiring the request, the context may be
> unpinned and freed, and along with it the timeline may be freed. We
> therefore need to be very careful when chasing rq->timeline that the
> pointer does not disappear beneath us. The vast majority of users are in
> a protected context, either during request construction or retirement,
> where the timeline->mutex is held and the timeline cannot disappear. It
> is those few off the beaten path (where we access a second timeline) that
> need extra scrutiny -- to be added in the next patch after first adding
> the warnings about dangerous access.
> 
> One complication, where we cannot use the timeline->mutex itself, is
> during request submission onto hardware (under spinlocks). Here, we want
> to check on the timeline to finalize the breadcrumb, and so we need to
> impose a second rule to ensure that the request->timeline is indeed
> valid. As we are submitting the request, it's context and timeline must
> be pinned, as it will be used by the hardware. Since it is pinned, we
> know the request->timeline must still be valid, and we cannot submit the
> idle barrier until after we release the engine->active.lock, ergo while
> submitting and holding that spinlock, a second thread cannot release the
> timeline.
> 
> v2: Don't be lazy inside selftests; hold the timeline->mutex for as long
> as we need it, and tidy up acquiring the timeline with a bit of
> refactoring (i915_active_add_request)
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/display/intel_overlay.c  |  2 +-
>   .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
>   drivers/gpu/drm/i915/gt/intel_context.c       |  4 +-
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 57 ++++++++++++++++---
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  2 +-
>   drivers/gpu/drm/i915/gt/intel_engine_pool.h   |  2 +-
>   drivers/gpu/drm/i915/gt/intel_lrc.c           | 17 +++---
>   drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 27 +++++----
>   drivers/gpu/drm/i915/gt/intel_timeline.c      |  6 +-
>   .../gpu/drm/i915/gt/intel_timeline_types.h    |  1 +
>   drivers/gpu/drm/i915/gt/selftest_context.c    | 18 ++++--
>   drivers/gpu/drm/i915/gt/selftest_lrc.c        |  2 +-
>   drivers/gpu/drm/i915/i915_active.c            |  2 +-
>   drivers/gpu/drm/i915/i915_active.h            |  6 ++
>   drivers/gpu/drm/i915/i915_request.c           | 28 +++++----
>   drivers/gpu/drm/i915/i915_request.h           | 22 ++++++-
>   drivers/gpu/drm/i915/i915_vma.c               |  6 +-
>   drivers/gpu/drm/i915/selftests/i915_active.c  |  2 +-
>   drivers/gpu/drm/i915/selftests/igt_spinner.c  |  2 +-
>   20 files changed, 147 insertions(+), 63 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
> index 29edfc343716..5efef9babadb 100644
> --- a/drivers/gpu/drm/i915/display/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/display/intel_overlay.c
> @@ -230,7 +230,7 @@ alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *))
>   	if (IS_ERR(rq))
>   		return rq;
>   
> -	err = i915_active_ref(&overlay->last_flip, rq->timeline, rq);
> +	err = i915_active_add_request(&overlay->last_flip, rq);
>   	if (err) {
>   		i915_request_add(rq);
>   		return ERR_PTR(err);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> index f99920652751..7f61a8024133 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> @@ -211,7 +211,7 @@ static void clear_pages_worker(struct work_struct *work)
>   	 * keep track of the GPU activity within this vma/request, and
>   	 * propagate the signal from the request to w->dma.
>   	 */
> -	err = i915_active_ref(&vma->active, rq->timeline, rq);
> +	err = i915_active_add_request(&vma->active, rq);
>   	if (err)
>   		goto out_request;
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index f1c0e5d958f3..4a34c4f62065 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -910,7 +910,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>   		if (emit)
>   			err = emit(rq, data);
>   		if (err == 0)
> -			err = i915_active_ref(&cb->base, rq->timeline, rq);
> +			err = i915_active_add_request(&cb->base, rq);
>   
>   		i915_request_add(rq);
>   		if (err)
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index c0495811f493..26cb838c272c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -298,7 +298,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
>   	/* Only suitable for use in remotely modifying this context */
>   	GEM_BUG_ON(rq->hw_context == ce);
>   
> -	if (rq->timeline != tl) { /* beware timeline sharing */
> +	if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
>   		err = mutex_lock_interruptible_nested(&tl->mutex,
>   						      SINGLE_DEPTH_NESTING);
>   		if (err)
> @@ -319,7 +319,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
>   	 * words transfer the pinned ce object to tracked active request.
>   	 */
>   	GEM_BUG_ON(i915_active_is_idle(&ce->active));
> -	return i915_active_ref(&ce->active, rq->timeline, rq);
> +	return i915_active_add_request(&ce->active, rq);
>   }
>   
>   struct i915_request *intel_context_create_request(struct intel_context *ce)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 3c176b0f4b45..f451d5076bde 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -680,6 +680,8 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
>   				engine->status_page.vma))
>   		goto out_frame;
>   
> +	mutex_lock(&frame->timeline.mutex);
> +
>   	frame->ring.vaddr = frame->cs;
>   	frame->ring.size = sizeof(frame->cs);
>   	frame->ring.effective_size = frame->ring.size;
> @@ -688,18 +690,22 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
>   	frame->rq.i915 = engine->i915;
>   	frame->rq.engine = engine;
>   	frame->rq.ring = &frame->ring;
> -	frame->rq.timeline = &frame->timeline;
> +	rcu_assign_pointer(frame->rq.timeline, &frame->timeline);
>   
>   	dw = intel_timeline_pin(&frame->timeline);
>   	if (dw < 0)
>   		goto out_timeline;
>   
> +	spin_lock_irq(&engine->active.lock);
>   	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
> +	spin_unlock_irq(&engine->active.lock);
> +
>   	GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
>   
>   	intel_timeline_unpin(&frame->timeline);
>   
>   out_timeline:
> +	mutex_unlock(&frame->timeline.mutex);
>   	intel_timeline_fini(&frame->timeline);
>   out_frame:
>   	kfree(frame);
> @@ -1196,6 +1202,27 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)
>   	}
>   }
>   
> +static struct intel_timeline *get_timeline(struct i915_request *rq)
> +{
> +	struct intel_timeline *tl;
> +
> +	/*
> +	 * Even though we are holding the engine->active.lock here, there
> +	 * is no control over the submission queue per-se and we are
> +	 * inspecting the active state at a random point in time, with an
> +	 * unknown queue. Play safe and make sure the timeline remains valid.
> +	 * (Only being used for pretty printing, one extra kref shouldn't
> +	 * cause a camel stampede!)
> +	 */
> +	rcu_read_lock();
> +	tl = rcu_dereference(rq->timeline);
> +	if (!kref_get_unless_zero(&tl->kref))
> +		tl = NULL;
> +	rcu_read_unlock();

How can it be NULL under the active lock? Isn't that the same assertion 
from i915_timeline_get_active.

> +
> +	return tl;
> +}
> +
>   static void intel_engine_print_registers(struct intel_engine_cs *engine,
>   					 struct drm_printer *m)
>   {
> @@ -1290,27 +1317,37 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
>   			int len;
>   
>   			len = snprintf(hdr, sizeof(hdr),
> -				       "\t\tActive[%d: ",
> +				       "\t\tActive[%d]: ",
>   				       (int)(port - execlists->active));
> -			if (!i915_request_signaled(rq))
> +			if (!i915_request_signaled(rq)) {
> +				struct intel_timeline *tl = get_timeline(rq);
> +
>   				len += snprintf(hdr + len, sizeof(hdr) - len,
>   						"ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
>   						i915_ggtt_offset(rq->ring->vma),
> -						rq->timeline->hwsp_offset,
> +						tl ? tl->hwsp_offset : 0,
>   						hwsp_seqno(rq));
> +
> +				if (tl)
> +					intel_timeline_put(tl);
> +			}
>   			snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
>   			print_request(m, rq, hdr);
>   		}
>   		for (port = execlists->pending; (rq = *port); port++) {
> +			struct intel_timeline *tl = get_timeline(rq);
>   			char hdr[80];
>   
>   			snprintf(hdr, sizeof(hdr),
>   				 "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
>   				 (int)(port - execlists->pending),
>   				 i915_ggtt_offset(rq->ring->vma),
> -				 rq->timeline->hwsp_offset,
> +				 tl ? tl->hwsp_offset : 0,
>   				 hwsp_seqno(rq));
>   			print_request(m, rq, hdr);
> +
> +			if (tl)
> +				intel_timeline_put(tl);
>   		}
>   		spin_unlock_irqrestore(&engine->active.lock, flags);
>   	} else if (INTEL_GEN(dev_priv) > 6) {
> @@ -1388,6 +1425,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	spin_lock_irqsave(&engine->active.lock, flags);
>   	rq = intel_engine_find_active_request(engine);
>   	if (rq) {
> +		struct intel_timeline *tl = get_timeline(rq);
> +
>   		print_request(m, rq, "\t\tactive ");
>   
>   		drm_printf(m, "\t\tring->start:  0x%08x\n",
> @@ -1400,8 +1439,12 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   			   rq->ring->emit);
>   		drm_printf(m, "\t\tring->space:  0x%08x\n",
>   			   rq->ring->space);
> -		drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
> -			   rq->timeline->hwsp_offset);
> +
> +		if (tl) {
> +			drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
> +				   tl->hwsp_offset);
> +			intel_timeline_put(tl);
> +		}
>   
>   		print_request_ring(m, rq);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 65b5ca74b394..ce61c83d68e9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -103,7 +103,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
>   		/* Context switch failed, hope for the best! Maybe reset? */
>   		goto out_unlock;
>   
> -	intel_timeline_enter(rq->timeline);
> +	intel_timeline_enter(i915_request_timeline(rq));
>   
>   	/* Check again on the next retirement. */
>   	engine->wakeref_serial = engine->serial + 1;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
> index 7e2123b33594..1bd89cadc3b7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
> @@ -18,7 +18,7 @@ static inline int
>   intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
>   			      struct i915_request *rq)
>   {
> -	return i915_active_ref(&node->active, rq->timeline, rq);
> +	return i915_active_add_request(&node->active, rq);
>   }
>   
>   static inline void
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index a99166a2d2eb..ac21587b75f6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1825,7 +1825,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
>   {
>   	u32 *cs;
>   
> -	GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
> +	GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
>   
>   	cs = intel_ring_begin(rq, 6);
>   	if (IS_ERR(cs))
> @@ -1841,7 +1841,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
>   	*cs++ = MI_NOOP;
>   
>   	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> -	*cs++ = rq->timeline->hwsp_offset;
> +	*cs++ = i915_request_timeline(rq)->hwsp_offset;
>   	*cs++ = 0;
>   	*cs++ = rq->fence.seqno - 1;
>   
> @@ -2324,7 +2324,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
>   
>   static struct i915_request *active_request(struct i915_request *rq)
>   {
> -	const struct list_head * const list = &rq->timeline->requests;
> +	const struct list_head * const list =
> +		&i915_request_active_timeline(rq)->requests;
>   	const struct intel_context * const ce = rq->hw_context;
>   	struct i915_request *active = NULL;
>   
> @@ -2855,7 +2856,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
>   {
>   	cs = gen8_emit_ggtt_write(cs,
>   				  request->fence.seqno,
> -				  request->timeline->hwsp_offset,
> +				  i915_request_active_timeline(request)->hwsp_offset,
>   				  0);
>   
>   	return gen8_emit_fini_breadcrumb_footer(request, cs);
> @@ -2872,7 +2873,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>   	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
>   	cs = gen8_emit_ggtt_write_rcs(cs,
>   				      request->fence.seqno,
> -				      request->timeline->hwsp_offset,
> +				      i915_request_active_timeline(request)->hwsp_offset,
>   				      PIPE_CONTROL_FLUSH_ENABLE |
>   				      PIPE_CONTROL_CS_STALL);
>   
> @@ -2884,7 +2885,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>   {
>   	cs = gen8_emit_ggtt_write_rcs(cs,
>   				      request->fence.seqno,
> -				      request->timeline->hwsp_offset,
> +				      i915_request_active_timeline(request)->hwsp_offset,
>   				      PIPE_CONTROL_CS_STALL |
>   				      PIPE_CONTROL_TILE_CACHE_FLUSH |
>   				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> @@ -2948,7 +2949,7 @@ static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
>   {
>   	cs = gen8_emit_ggtt_write(cs,
>   				  request->fence.seqno,
> -				  request->timeline->hwsp_offset,
> +				  i915_request_active_timeline(request)->hwsp_offset,
>   				  0);
>   
>   	return gen12_emit_fini_breadcrumb_footer(request, cs);
> @@ -2959,7 +2960,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>   {
>   	cs = gen8_emit_ggtt_write_rcs(cs,
>   				      request->fence.seqno,
> -				      request->timeline->hwsp_offset,
> +				      i915_request_active_timeline(request)->hwsp_offset,
>   				      PIPE_CONTROL_CS_STALL |
>   				      PIPE_CONTROL_TILE_CACHE_FLUSH |
>   				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index a25b84b12ef1..0747b8c9f768 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -322,7 +322,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   		 PIPE_CONTROL_DC_FLUSH_ENABLE |
>   		 PIPE_CONTROL_QW_WRITE |
>   		 PIPE_CONTROL_CS_STALL);
> -	*cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
> +	*cs++ = i915_request_active_timeline(rq)->hwsp_offset |
> +		PIPE_CONTROL_GLOBAL_GTT;
>   	*cs++ = rq->fence.seqno;
>   
>   	*cs++ = MI_USER_INTERRUPT;
> @@ -425,7 +426,7 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   		 PIPE_CONTROL_QW_WRITE |
>   		 PIPE_CONTROL_GLOBAL_GTT_IVB |
>   		 PIPE_CONTROL_CS_STALL);
> -	*cs++ = rq->timeline->hwsp_offset;
> +	*cs++ = i915_request_active_timeline(rq)->hwsp_offset;
>   	*cs++ = rq->fence.seqno;
>   
>   	*cs++ = MI_USER_INTERRUPT;
> @@ -439,8 +440,8 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   
>   static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   {
> -	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
> -	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
> +	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
> +	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
>   
>   	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
>   	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
> @@ -459,8 +460,8 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   {
>   	int i;
>   
> -	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
> -	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
> +	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
> +	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
>   
>   	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
>   	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
> @@ -938,8 +939,8 @@ static void i9xx_submit_request(struct i915_request *request)
>   
>   static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   {
> -	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
> -	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
> +	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
> +	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
>   
>   	*cs++ = MI_FLUSH;
>   
> @@ -961,8 +962,8 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   {
>   	int i;
>   
> -	GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
> -	GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
> +	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
> +	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
>   
>   	*cs++ = MI_FLUSH;
>   
> @@ -1815,7 +1816,7 @@ static int ring_request_alloc(struct i915_request *request)
>   	int ret;
>   
>   	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
> -	GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
> +	GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
>   
>   	/*
>   	 * Flush enough space to reduce the likelihood of waiting after
> @@ -1926,7 +1927,9 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
>   		 */
>   		GEM_BUG_ON(!rq->reserved_space);
>   
> -		ret = wait_for_space(ring, rq->timeline, total_bytes);
> +		ret = wait_for_space(ring,
> +				     i915_request_timeline(rq),
> +				     total_bytes);
>   		if (unlikely(ret))
>   			return ERR_PTR(ret);
>   	}
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index 9cb01d9828f1..115a24d4a20a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -493,7 +493,7 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
>   static int cacheline_ref(struct intel_timeline_cacheline *cl,
>   			 struct i915_request *rq)
>   {
> -	return i915_active_ref(&cl->active, rq->timeline, rq);
> +	return i915_active_add_request(&cl->active, rq);
>   }
>   
>   int intel_timeline_read_hwsp(struct i915_request *from,
> @@ -504,7 +504,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
>   	struct intel_timeline *tl = from->timeline;
>   	int err;
>   
> -	GEM_BUG_ON(to->timeline == tl);
> +	GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl);
>   
>   	mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
>   	err = i915_request_completed(from);
> @@ -541,7 +541,7 @@ void __intel_timeline_free(struct kref *kref)
>   		container_of(kref, typeof(*timeline), kref);
>   
>   	intel_timeline_fini(timeline);
> -	kfree(timeline);
> +	kfree_rcu(timeline, rcu);
>   }
>   
>   static void timelines_fini(struct intel_gt *gt)
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> index 2b1baf2fcc8e..c668c4c50e75 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> @@ -80,6 +80,7 @@ struct intel_timeline {
>   	struct intel_gt *gt;
>   
>   	struct kref kref;
> +	struct rcu_head rcu;
>   };
>   
>   #endif /* __I915_TIMELINE_TYPES_H__ */
> diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
> index 9d1ea26c7a2d..4ce1e25433d2 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_context.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_context.c
> @@ -14,22 +14,28 @@
>   
>   static int request_sync(struct i915_request *rq)
>   {
> +	struct intel_timeline *tl = i915_request_timeline(rq);
>   	long timeout;
>   	int err = 0;
>   
> +	intel_timeline_get(tl);
>   	i915_request_get(rq);
>   
> -	i915_request_add(rq);
> +	/* Opencode i915_request_add() so we can keep the timeline locked. */
> +	__i915_request_commit(rq);
> +	__i915_request_queue(rq, NULL);

Looking at i915_request_add here we also have tasklet kicking but I 
guess for selftests it's not important.

> +
>   	timeout = i915_request_wait(rq, 0, HZ / 10);
> -	if (timeout < 0) {
> +	if (timeout < 0)
>   		err = timeout;
> -	} else {
> -		mutex_lock(&rq->timeline->mutex);
> +	else
>   		i915_request_retire_upto(rq);
> -		mutex_unlock(&rq->timeline->mutex);
> -	}
> +
> +	lockdep_unpin_lock(&tl->mutex, rq->cookie);
> +	mutex_unlock(&tl->mutex);
>   
>   	i915_request_put(rq);
> +	intel_timeline_put(tl);
>   
>   	return err;
>   }
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 26d05bd1bdc8..93a871bfd95d 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1089,7 +1089,7 @@ static int live_suppress_wait_preempt(void *arg)
>   				}
>   
>   				/* Disable NEWCLIENT promotion */
> -				__i915_active_request_set(&rq[i]->timeline->last_request,
> +				__i915_active_request_set(&i915_request_timeline(rq[i])->last_request,
>   							  dummy);
>   				i915_request_add(rq[i]);
>   			}
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 6a447f1d0110..d5aac6ff803a 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -695,7 +695,7 @@ void i915_request_add_active_barriers(struct i915_request *rq)
>   	struct llist_node *node, *next;
>   
>   	GEM_BUG_ON(intel_engine_is_virtual(engine));
> -	GEM_BUG_ON(rq->timeline != engine->kernel_context->timeline);
> +	GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
>   
>   	/*
>   	 * Attach the list of proto-fences to the in-flight request such
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index f95058f99057..949c6835335b 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -373,6 +373,12 @@ int i915_active_ref(struct i915_active *ref,
>   		    struct intel_timeline *tl,
>   		    struct i915_request *rq);
>   
> +static inline int
> +i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
> +{
> +	return i915_active_ref(ref, i915_request_timeline(rq), rq);
> +}
> +
>   int i915_active_wait(struct i915_active *ref);
>   
>   int i915_request_await_active(struct i915_request *rq,
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 606acde47fa0..fb6f21c41934 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -220,7 +220,6 @@ static bool i915_request_retire(struct i915_request *rq)
>   {
>   	struct i915_active_request *active, *next;
>   
> -	lockdep_assert_held(&rq->timeline->mutex);
>   	if (!i915_request_completed(rq))
>   		return false;
>   
> @@ -241,7 +240,8 @@ static bool i915_request_retire(struct i915_request *rq)
>   	 * Note this requires that we are always called in request
>   	 * completion order.
>   	 */
> -	GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
> +	GEM_BUG_ON(!list_is_first(&rq->link,
> +				  &i915_request_timeline(rq)->requests));
>   	rq->ring->head = rq->postfix;
>   
>   	/*
> @@ -317,7 +317,7 @@ static bool i915_request_retire(struct i915_request *rq)
>   
>   void i915_request_retire_upto(struct i915_request *rq)
>   {
> -	struct intel_timeline * const tl = rq->timeline;
> +	struct intel_timeline * const tl = i915_request_timeline(rq);
>   	struct i915_request *tmp;
>   
>   	GEM_TRACE("%s fence %llx:%lld, current %d\n",
> @@ -325,7 +325,6 @@ void i915_request_retire_upto(struct i915_request *rq)
>   		  rq->fence.context, rq->fence.seqno,
>   		  hwsp_seqno(rq));
>   
> -	lockdep_assert_held(&tl->mutex);
>   	GEM_BUG_ON(!i915_request_completed(rq));
>   
>   	do {
> @@ -661,9 +660,11 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>   	rq->gem_context = ce->gem_context;
>   	rq->engine = ce->engine;
>   	rq->ring = ce->ring;
> -	rq->timeline = tl;
> +
> +	rcu_assign_pointer(rq->timeline, tl);
>   	rq->hwsp_seqno = tl->hwsp_seqno;
>   	rq->hwsp_cacheline = tl->hwsp_cacheline;
> +
>   	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
>   
>   	spin_lock_init(&rq->lock);
> @@ -771,7 +772,8 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
>   		return 0;
>   
>   	signal = list_prev_entry(signal, link);
> -	if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
> +	if (intel_timeline_sync_is_later(i915_request_timeline(rq),
> +					 &signal->fence))
>   		return 0;
>   
>   	return i915_sw_fence_await_dma_fence(&rq->submit,
> @@ -947,7 +949,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
>   
>   		/* Squash repeated waits to the same timelines */
>   		if (fence->context &&
> -		    intel_timeline_sync_is_later(rq->timeline, fence))
> +		    intel_timeline_sync_is_later(i915_request_timeline(rq),
> +						 fence))
>   			continue;
>   
>   		if (dma_fence_is_i915(fence))
> @@ -961,7 +964,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
>   
>   		/* Record the latest fence used against each timeline */
>   		if (fence->context)
> -			intel_timeline_sync_set(rq->timeline, fence);
> +			intel_timeline_sync_set(i915_request_timeline(rq),
> +						fence);
>   	} while (--nchild);
>   
>   	return 0;
> @@ -1103,7 +1107,7 @@ void i915_request_skip(struct i915_request *rq, int error)
>   static struct i915_request *
>   __i915_request_add_to_timeline(struct i915_request *rq)
>   {
> -	struct intel_timeline *timeline = rq->timeline;
> +	struct intel_timeline *timeline = i915_request_timeline(rq);
>   	struct i915_request *prev;
>   
>   	/*
> @@ -1216,7 +1220,7 @@ void __i915_request_queue(struct i915_request *rq,
>   void i915_request_add(struct i915_request *rq)
>   {
>   	struct i915_sched_attr attr = rq->gem_context->sched;
> -	struct intel_timeline * const tl = rq->timeline;
> +	struct intel_timeline * const tl = i915_request_timeline(rq);
>   	struct i915_request *prev;
>   
>   	lockdep_assert_held(&tl->mutex);
> @@ -1271,7 +1275,9 @@ void i915_request_add(struct i915_request *rq)
>   	 * work on behalf of others -- but instead we should benefit from
>   	 * improved resource management. (Well, that's the theory at least.)
>   	 */
> -	if (prev && i915_request_completed(prev) && prev->timeline == tl)
> +	if (prev &&
> +	    i915_request_completed(prev) &&
> +	    rcu_access_pointer(prev->timeline) == tl)
>   		i915_request_retire_upto(prev);
>   
>   	mutex_unlock(&tl->mutex);
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 8ac6e1226a56..b18f49528ded 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -113,7 +113,7 @@ struct i915_request {
>   	struct intel_engine_cs *engine;
>   	struct intel_context *hw_context;
>   	struct intel_ring *ring;
> -	struct intel_timeline *timeline;
> +	struct intel_timeline __rcu *timeline;
>   	struct list_head signal_link;
>   
>   	/*
> @@ -442,6 +442,26 @@ static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
>   	return unlikely(rq->flags & I915_REQUEST_NOPREEMPT);
>   }
>   
> +static inline struct intel_timeline *
> +i915_request_timeline(struct i915_request *rq)
> +{
> +	/* Valid only while the request is being constructed (or retired). */
> +	return rcu_dereference_protected(rq->timeline,
> +					 lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
> +}
> +
> +static inline struct intel_timeline *
> +i915_request_active_timeline(struct i915_request *rq)
> +{
> +	/*
> +	 * When in use during submission, we are protected by a guarantee that
> +	 * the context/timeline is pinned and must remain pinned until after
> +	 * this submission.
> +	 */
> +	return rcu_dereference_protected(rq->timeline,
> +					 lockdep_is_held(&rq->engine->active.lock));
> +}
> +
>   bool i915_retire_requests(struct drm_i915_private *i915);
>   
>   #endif /* I915_REQUEST_H */
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 411047d6a909..9d5b0f87c210 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -900,15 +900,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   	 * add the active reference first and queue for it to be dropped
>   	 * *last*.
>   	 */
> -	err = i915_active_ref(&vma->active, rq->timeline, rq);
> +	err = i915_active_add_request(&vma->active, rq);
>   	if (unlikely(err))
>   		return err;
>   
>   	if (flags & EXEC_OBJECT_WRITE) {
>   		if (intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CS))
> -			i915_active_ref(&obj->frontbuffer->write,
> -					rq->timeline,
> -					rq);
> +			i915_active_add_request(&obj->frontbuffer->write, rq);
>   
>   		dma_resv_add_excl_fence(vma->resv, &rq->fence);
>   		obj->write_domain = I915_GEM_DOMAIN_RENDER;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
> index 77d844ac8b71..afecfa081ff4 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_active.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_active.c
> @@ -110,7 +110,7 @@ __live_active_setup(struct drm_i915_private *i915)
>   						       submit,
>   						       GFP_KERNEL);
>   		if (err >= 0)
> -			err = i915_active_ref(&active->base, rq->timeline, rq);
> +			err = i915_active_add_request(&active->base, rq);
>   		i915_request_add(rq);
>   		if (err) {
>   			pr_err("Failed to track active ref!\n");
> diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
> index 11f04ad48e68..ee8450b871da 100644
> --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
> +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
> @@ -147,7 +147,7 @@ igt_spinner_create_request(struct igt_spinner *spin,
>   	intel_gt_chipset_flush(engine->gt);
>   
>   	if (engine->emit_init_breadcrumb &&
> -	    rq->timeline->has_initial_breadcrumb) {
> +	    i915_request_timeline(rq)->has_initial_breadcrumb) {
>   		err = engine->emit_init_breadcrumb(rq);
>   		if (err)
>   			goto cancel_rq;
> 

Regards,

Tvrtko