[Intel-gfx] [PATCH 2/6] drm/i915: Avoid accessing request->timeline outside of its lifetime

Mon Oct 31 17:35:50 UTC 2016

On 31/10/2016 10:26, Chris Wilson wrote:
> Whilst waiting on a request, we may do so without holding any locks or
> any guards beyond a reference to the request. In order to avoid taking
> locks within request deallocation, we drop references to its timeline
> (via the context and ppgtt) upon retirement. We should avoid chasing

Couldn't find that there is a reference taken (or dropped) on the 
timeline when stored in a request. It looks like a borrowed pointer to me?

> such pointers outside of their control, in particular we inspect the
> request->timeline to see if we may restore the RPS waitboost for a
> client. If we instead look at the engine->timeline, we will have similar
> behaviour on both full-ppgtt and !full-ppgtt systems and reduce the
> amount of reward we give towards stalling clients (i.e. only if the
> client stalls and the GPU is uncontended does it reclaim its boost).
> This restores behaviour back to pre-timelines, whilst fixing:
>
> [  645.078485] BUG: KASAN: use-after-free in i915_gem_object_wait_fence+0x1ee/0x2e0 at addr ffff8802335643a0
> [  645.078577] Read of size 4 by task gem_exec_schedu/28408
> [  645.078638] CPU: 1 PID: 28408 Comm: gem_exec_schedu Not tainted 4.9.0-rc2+ #64
> [  645.078724] Hardware name:                  /        , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015
> [  645.078816]  ffff88022daef9a0 ffffffff8143d059 ffff880235402a80 ffff880233564200
> [  645.078998]  ffff88022daef9c8 ffffffff81229c5c ffff88022daefa48 ffff880233564200
> [  645.079172]  ffff880235402a80 ffff88022daefa38 ffffffff81229ef0 000000008110a796
> [  645.079345] Call Trace:
> [  645.079404]  [<ffffffff8143d059>] dump_stack+0x68/0x9f
> [  645.079467]  [<ffffffff81229c5c>] kasan_object_err+0x1c/0x70
> [  645.079534]  [<ffffffff81229ef0>] kasan_report_error+0x1f0/0x4b0
> [  645.079601]  [<ffffffff8122a244>] kasan_report+0x34/0x40
> [  645.079676]  [<ffffffff81634f5e>] ? i915_gem_object_wait_fence+0x1ee/0x2e0
> [  645.079741]  [<ffffffff81229951>] __asan_load4+0x61/0x80
> [  645.079807]  [<ffffffff81634f5e>] i915_gem_object_wait_fence+0x1ee/0x2e0
> [  645.079876]  [<ffffffff816364bf>] i915_gem_object_wait+0x19f/0x590
> [  645.079944]  [<ffffffff81636320>] ? i915_gem_object_wait_priority+0x500/0x500
> [  645.080016]  [<ffffffff8110fb30>] ? debug_show_all_locks+0x1e0/0x1e0
> [  645.080084]  [<ffffffff8110abdc>] ? check_chain_key+0x14c/0x210
> [  645.080157]  [<ffffffff8110a796>] ? __lock_is_held+0x46/0xc0
> [  645.080226]  [<ffffffff8163bc61>] ? i915_gem_set_domain_ioctl+0x141/0x690
> [  645.080296]  [<ffffffff8163bcc2>] i915_gem_set_domain_ioctl+0x1a2/0x690
> [  645.080366]  [<ffffffff811f8f85>] ? __might_fault+0x75/0xe0
> [  645.080433]  [<ffffffff815a55f7>] drm_ioctl+0x327/0x640
> [  645.080508]  [<ffffffff8163bb20>] ? i915_gem_obj_prepare_shmem_write+0x3a0/0x3a0
> [  645.080603]  [<ffffffff815a52d0>] ? drm_ioctl_permit+0x120/0x120
> [  645.080670]  [<ffffffff8110abdc>] ? check_chain_key+0x14c/0x210
> [  645.080738]  [<ffffffff81275717>] do_vfs_ioctl+0x127/0xa20
> [  645.080804]  [<ffffffff8120268c>] ? do_mmap+0x47c/0x580
> [  645.080871]  [<ffffffff811da567>] ? vm_mmap_pgoff+0x117/0x140
> [  645.080938]  [<ffffffff812755f0>] ? ioctl_preallocate+0x150/0x150
> [  645.081011]  [<ffffffff81108c53>] ? up_write+0x23/0x50
> [  645.081078]  [<ffffffff811da567>] ? vm_mmap_pgoff+0x117/0x140
> [  645.081145]  [<ffffffff811da450>] ? vma_is_stack_for_current+0x90/0x90
> [  645.081214]  [<ffffffff8110d853>] ? mark_held_locks+0x23/0xc0
> [  645.082030]  [<ffffffff81288408>] ? __fget+0x168/0x250
> [  645.082106]  [<ffffffff819ad517>] ? entry_SYSCALL_64_fastpath+0x5/0xb1
> [  645.082176]  [<ffffffff81288592>] ? __fget_light+0xa2/0xc0
> [  645.082242]  [<ffffffff8127604c>] SyS_ioctl+0x3c/0x70
> [  645.082309]  [<ffffffff819ad52e>] entry_SYSCALL_64_fastpath+0x1c/0xb1
> [  645.082374] Object at ffff880233564200, in cache kmalloc-8192 size: 8192
> [  645.082431] Allocated:
> [  645.082480] PID = 28408
> [  645.082535]  [  645.082566] [<ffffffff8103ae66>] save_stack_trace+0x16/0x20
> [  645.082623]  [  645.082656] [<ffffffff81228b06>] save_stack+0x46/0xd0
> [  645.082716]  [  645.082756] [<ffffffff812292fd>] kasan_kmalloc+0xad/0xe0
> [  645.082817]  [  645.082848] [<ffffffff81631752>] i915_ppgtt_create+0x52/0x220
> [  645.082908]  [  645.082941] [<ffffffff8161db96>] i915_gem_create_context+0x396/0x560
> [  645.083027]  [  645.083059] [<ffffffff8161f857>] i915_gem_context_create_ioctl+0x97/0xf0
> [  645.083152]  [  645.083183] [<ffffffff815a55f7>] drm_ioctl+0x327/0x640
> [  645.083243]  [  645.083274] [<ffffffff81275717>] do_vfs_ioctl+0x127/0xa20
> [  645.083334]  [  645.083372] [<ffffffff8127604c>] SyS_ioctl+0x3c/0x70
> [  645.083432]  [  645.083464] [<ffffffff819ad52e>] entry_SYSCALL_64_fastpath+0x1c/0xb1
> [  645.083551] Freed:
> [  645.083599] PID = 27629
> [  645.083648]  [  645.083676] [<ffffffff8103ae66>] save_stack_trace+0x16/0x20
> [  645.083738]  [  645.083770] [<ffffffff81228b06>] save_stack+0x46/0xd0
> [  645.083830]  [  645.083862] [<ffffffff81229203>] kasan_slab_free+0x73/0xc0
> [  645.083922]  [  645.083961] [<ffffffff812279c9>] kfree+0xa9/0x170
> [  645.084021]  [  645.084053] [<ffffffff81629f60>] i915_ppgtt_release+0x100/0x180
> [  645.084139]  [  645.084171] [<ffffffff8161d414>] i915_gem_context_free+0x1b4/0x230
> [  645.084257]  [  645.084288] [<ffffffff816537b2>] intel_lr_context_unpin+0x192/0x230
> [  645.084380]  [  645.084413] [<ffffffff81645250>] i915_gem_request_retire+0x620/0x630
> [  645.084500]  [  645.085226] [<ffffffff816473d1>] i915_gem_retire_requests+0x181/0x280
> [  645.085313]  [  645.085352] [<ffffffff816352ba>] i915_gem_retire_work_handler+0xca/0xe0
> [  645.085440]  [  645.085471] [<ffffffff810c725b>] process_one_work+0x4fb/0x920
> [  645.085532]  [  645.085562] [<ffffffff810c770d>] worker_thread+0x8d/0x840
> [  645.085622]  [  645.085653] [<ffffffff810d21e5>] kthread+0x185/0x1b0
> [  645.085718]  [  645.085750] [<ffffffff819ad7a7>] ret_from_fork+0x27/0x40
> [  645.085811] Memory state around the buggy address:
> [  645.085869]  ffff880233564280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [  645.085956]  ffff880233564300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [  645.086053] >ffff880233564380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [  645.086138]                                ^
> [  645.086193]  ffff880233564400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [  645.086283]  ffff880233564480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>
> Fixes: 73cb97010d4f ("drm/i915: Combine seqno + tracking into a global timeline struct")
> Fixes: 80b204bce8f2 ("drm/i915: Enable multiple timelines")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c     | 7 +++----
>  drivers/gpu/drm/i915/i915_gem.c         | 4 ++--
>  drivers/gpu/drm/i915/i915_gpu_error.c   | 2 +-
>  drivers/gpu/drm/i915/i915_irq.c         | 2 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +++++
>  5 files changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 9bef6f55f99d..bf19192dcc3b 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1348,9 +1348,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>
>  		seq_printf(m, "%s:\n", engine->name);
>  		seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
> -			   engine->hangcheck.seqno,
> -			   seqno[id],
> -			   engine->timeline->last_submitted_seqno);
> +			   engine->hangcheck.seqno, seqno[id],
> +			   intel_engine_last_submit(engine));
>  		seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
>  			   yesno(intel_engine_has_waiter(engine)),
>  			   yesno(test_bit(engine->id,
> @@ -3167,7 +3166,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>  		seq_printf(m, "%s\n", engine->name);
>  		seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [score %d]\n",
>  			   intel_engine_get_seqno(engine),
> -			   engine->timeline->last_submitted_seqno,
> +			   intel_engine_last_submit(engine),
>  			   engine->hangcheck.seqno,
>  			   engine->hangcheck.score);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1e5d2bf777e4..b9f540b16a45 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -371,7 +371,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
>  	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
>  		i915_gem_request_retire_upto(rq);

You could have stuck with req or request when adding new code. Now we 
have three names for requests in the code base. :(

>
> -	if (rps && rq->fence.seqno == rq->timeline->last_submitted_seqno) {
> +	if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {

Okay so not touching the rq->timeline here and rq->global_seqno is from 
the same seqno-space as the engine timeline seqnos I assume.

>  		/* The GPU is now idle and this client has stalled.
>  		 * Since no other client has submitted a request in the
>  		 * meantime, assume that this client is the only one
> @@ -2674,7 +2674,7 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
>  	 * reset it.
>  	 */
>  	intel_engine_init_global_seqno(engine,
> -				       engine->timeline->last_submitted_seqno);
> +				       intel_engine_last_submit(engine));
>
>  	/*
>  	 * Clear the execlists queue up before freeing the requests, as those
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 7ba40487e345..204093f3eaa5 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1120,7 +1120,7 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
>  	ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
>  	ee->acthd = intel_engine_get_active_head(engine);
>  	ee->seqno = intel_engine_get_seqno(engine);
> -	ee->last_seqno = engine->timeline->last_submitted_seqno;
> +	ee->last_seqno = intel_engine_last_submit(engine);
>  	ee->start = I915_READ_START(engine);
>  	ee->head = I915_READ_HEAD(engine);
>  	ee->tail = I915_READ_TAIL(engine);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 90d0905592f2..4dda2b1eefdb 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -3168,7 +3168,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>
>  		acthd = intel_engine_get_active_head(engine);
>  		seqno = intel_engine_get_seqno(engine);
> -		submit = READ_ONCE(engine->timeline->last_submitted_seqno);
> +		submit = intel_engine_last_submit(engine);
>
>  		if (engine->hangcheck.seqno == seqno) {
>  			if (i915_seqno_passed(seqno, submit)) {
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 43f61aa24ed7..a089e11ee575 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -496,6 +496,11 @@ static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
>  	return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
>  }
>
> +static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
> +{
> +	return READ_ONCE(engine->timeline->last_submitted_seqno);
> +}
> +

Don't like that READ_ONCE gets sprinkled all over the place via call 
sites. It should be extremely well defined and controlled from where it 
is used. Otherwise it suggests READ_ONCE is not really appropriate.

>  int init_workarounds_ring(struct intel_engine_cs *engine);
>
>  void intel_engine_get_instdone(struct intel_engine_cs *engine,
>

Regards,

Tvrtko