[PATCH v2] drm: Get ref on CRTC commit object when waiting for flip_done

Daniel Vetter daniel at ffwll.ch
Tue Oct 16 12:33:17 UTC 2018


On Mon, Oct 15, 2018 at 09:46:40AM -0400, sunpeng.li at amd.com wrote:
> From: Leo Li <sunpeng.li at amd.com>
> 
> This fixes a general protection fault, caused by accessing the contents
> of a flip_done completion object that has already been freed. It occurs
> due to the preemption of a non-blocking commit worker thread W by
> another commit thread X. X continues to clear its atomic state at the
> end, destroying the CRTC commit object that W still needs. Switching
> back to W and accessing the commit objects then leads to bad results.
> 
> Worker W becomes preemptable when waiting for flip_done to complete. At
> this point, a frequently occurring commit thread X can take over. Here's
> an example where W is a worker thread that flips on both CRTCs, and X
> does a legacy cursor update on both CRTCs:
> 
>         ...
>      1. W does flip work
>      2. W runs commit_hw_done()
>      3. W waits for flip_done on CRTC 1
>      4. > flip_done for CRTC 1 completes
>      5. W finishes waiting for CRTC 1
>      6. W waits for flip_done on CRTC 2
> 
>      7. > Preempted by X
>      8. > flip_done for CRTC 2 completes
>      9. X atomic_check: hw_done and flip_done are complete on all CRTCs
>     10. X updates cursor on both CRTCs
>     11. X destroys atomic state
>     12. X done
> 
>     13. > Switch back to W
>     14. W waits for flip_done on CRTC 2
>     15. W raises general protection fault
> 
> The error looks like so:
> 
>     general protection fault: 0000 [#1] PREEMPT SMP PTI
>     **snip**
>     Call Trace:
>      lock_acquire+0xa2/0x1b0
>      _raw_spin_lock_irq+0x39/0x70
>      wait_for_completion_timeout+0x31/0x130
>      drm_atomic_helper_wait_for_flip_done+0x64/0x90 [drm_kms_helper]
>      amdgpu_dm_atomic_commit_tail+0xcae/0xdd0 [amdgpu]
>      commit_tail+0x3d/0x70 [drm_kms_helper]
>      process_one_work+0x212/0x650
>      worker_thread+0x49/0x420
>      kthread+0xfb/0x130
>      ret_from_fork+0x3a/0x50
>     Modules linked in: x86_pkg_temp_thermal amdgpu(O) chash(O)
>     gpu_sched(O) drm_kms_helper(O) syscopyarea sysfillrect sysimgblt
>     fb_sys_fops ttm(O) drm(O)
> 
> Note that i915 has this issue masked, since hw_done is signaled after
> waiting for flip_done. Doing so will block the cursor update from
> happening until hw_done is signaled, preventing the cursor commit from
> destroying the state.
> 
> v2: The reference on the commit object needs to be obtained before
>     hw_done() is signaled, since that's the point where another commit
>     is allowed to modify the state. Assuming that the
>     new_crtc_state->commit object still exists within flip_done() is
>     incorrect.
> 
>     Fix by getting a reference in setup_commit(), and releasing it
>     during default_clear().
> 
> Signed-off-by: Leo Li <sunpeng.li at amd.com>
> ---
> 
> Sending again, this time to the correct mailing list :)
> 
> Leo

Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
Cc: stable at vger.kernel.org

I think it'd be really good if you could get intel-gfx-ci to test this
patch. Simply submit it to intel-gfx at lists.freedesktop.org. I'll leave
applying to one of the amd drm-misc committers, once it's passed CI.

Cheers, Daniel

> 
>  drivers/gpu/drm/drm_atomic.c        |  5 +++++
>  drivers/gpu/drm/drm_atomic_helper.c | 12 ++++++++----
>  include/drm/drm_atomic.h            | 11 +++++++++++
>  3 files changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
> index 3eb061e..12ae917 100644
> --- a/drivers/gpu/drm/drm_atomic.c
> +++ b/drivers/gpu/drm/drm_atomic.c
> @@ -174,6 +174,11 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
>  		state->crtcs[i].state = NULL;
>  		state->crtcs[i].old_state = NULL;
>  		state->crtcs[i].new_state = NULL;
> +
> +		if (state->crtcs[i].commit) {
> +			drm_crtc_commit_put(state->crtcs[i].commit);
> +			state->crtcs[i].commit = NULL;
> +		}
>  	}
>  
>  	for (i = 0; i < config->num_total_plane; i++) {
> diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
> index 80be74d..1bb4c31 100644
> --- a/drivers/gpu/drm/drm_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_atomic_helper.c
> @@ -1408,15 +1408,16 @@ EXPORT_SYMBOL(drm_atomic_helper_wait_for_vblanks);
>  void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev,
>  					  struct drm_atomic_state *old_state)
>  {
> -	struct drm_crtc_state *new_crtc_state;
>  	struct drm_crtc *crtc;
>  	int i;
>  
> -	for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i) {
> -		struct drm_crtc_commit *commit = new_crtc_state->commit;
> +	for (i = 0; i < dev->mode_config.num_crtc; i++) {
> +		struct drm_crtc_commit *commit = old_state->crtcs[i].commit;
>  		int ret;
>  
> -		if (!commit)
> +		crtc = old_state->crtcs[i].ptr;
> +
> +		if (!crtc || !commit)
>  			continue;
>  
>  		ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ);
> @@ -1934,6 +1935,9 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state,
>  		drm_crtc_commit_get(commit);
>  
>  		commit->abort_completion = true;
> +
> +		state->crtcs[i].commit = commit;
> +		drm_crtc_commit_get(commit);
>  	}
>  
>  	for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) {
> diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
> index da9d95a..1e71315 100644
> --- a/include/drm/drm_atomic.h
> +++ b/include/drm/drm_atomic.h
> @@ -153,6 +153,17 @@ struct __drm_planes_state {
>  struct __drm_crtcs_state {
>  	struct drm_crtc *ptr;
>  	struct drm_crtc_state *state, *old_state, *new_state;
> +
> +	/**
> +	 * @commit:
> +	 *
> +	 * A reference to the CRTC commit object that is kept for use by
> +	 * drm_atomic_helper_wait_for_flip_done() after
> +	 * drm_atomic_helper_commit_hw_done() is called. This ensures that a
> +	 * concurrent commit won't free a commit object that is still in use.
> +	 */
> +	struct drm_crtc_commit *commit;
> +
>  	s32 __user *out_fence_ptr;
>  	u64 last_vblank_count;
>  };
> -- 
> 2.7.4
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the amd-gfx mailing list