[RFC 1/2] drm/vc4: Handle async page flips in the atomic_commit() path

Fri Mar 30 15:20:13 UTC 2018

On Fri, 30 Mar 2018 17:09:03 +0200
Boris Brezillon <boris.brezillon at bootlin.com> wrote:

> Rework the atomic commit logic to handle async page flip requests in
> the atomic update path.
> 
> Async page flips are a bit more complicated than async cursor updates
> (already handled in the vc4_atomic_commit() function) in that you need
> to wait for fences on the new framebuffers to be signaled before doing
> the flip. In order to ensure that, we schedule a commit_work work to do
> the async update (note that the existing implementation already uses a
> work to wait for fences to be signaled, so, the latency shouldn't
> change that much).
> 
> Of course, we have to modify a bit the atomic_complete_commit()
> implementation to avoid waiting for things we don't care about when
> doing an async page flip:
> 
> * drm_atomic_helper_wait_for_dependencies() waits for flip_done which
>   we don't care about when doing async page flips. Instead we replace
>   that call by a loop that waits for hw_done only
> * drm_atomic_helper_commit_modeset_disables() and
>   drm_atomic_helper_commit_modeset_enables() are not needed because
>   nothing except the planes' FBs are updated in async page flips
> * drm_atomic_helper_commit_planes() is replaced by
>   vc4_plane_async_set_fb() which is doing only the FB update and is
>   thus much simpler
> * drm_atomic_helper_wait_for_vblanks() is not needed because we don't
>   wait for the next VBLANK period to apply the new state, and flip
>   events are signaled manually just after the HW has been updated
> 
> Thanks to this rework, we can get rid of the custom vc4_page_flip()
> implementation and its dependencies and use
> drm_atomic_helper_page_flip() instead.

Another good reason for moving async page flip to the atomic commit
path is that is fixes a bug we had:
drm_atomic_helper_prepare/cleanup_planes() were not called in the async
page flip path, which can lead to unbalanced vc4_inc/dec_usecnt()
in some situations (when the plane is updated once with an async page
flip and then with a regular update) or trigger a use after free if the
BO passed to the plane is marked purgeable and the kernel decides to
purge its cache.

> 
> Signed-off-by: Boris Brezillon <boris.brezillon at bootlin.com>
> ---
>  drivers/gpu/drm/vc4/vc4_crtc.c | 103 +----------------------------------------
>  drivers/gpu/drm/vc4/vc4_kms.c  | 101 +++++++++++++++++++++++++++++++++-------
>  2 files changed, 86 insertions(+), 118 deletions(-)
> 
> diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
> index bf4667481935..3843c39dce61 100644
> --- a/drivers/gpu/drm/vc4/vc4_crtc.c
> +++ b/drivers/gpu/drm/vc4/vc4_crtc.c
> @@ -757,107 +757,6 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
>  	return ret;
>  }
>  
> -struct vc4_async_flip_state {
> -	struct drm_crtc *crtc;
> -	struct drm_framebuffer *fb;
> -	struct drm_pending_vblank_event *event;
> -
> -	struct vc4_seqno_cb cb;
> -};
> -
> -/* Called when the V3D execution for the BO being flipped to is done, so that
> - * we can actually update the plane's address to point to it.
> - */
> -static void
> -vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
> -{
> -	struct vc4_async_flip_state *flip_state =
> -		container_of(cb, struct vc4_async_flip_state, cb);
> -	struct drm_crtc *crtc = flip_state->crtc;
> -	struct drm_device *dev = crtc->dev;
> -	struct vc4_dev *vc4 = to_vc4_dev(dev);
> -	struct drm_plane *plane = crtc->primary;
> -
> -	vc4_plane_async_set_fb(plane, flip_state->fb);
> -	if (flip_state->event) {
> -		unsigned long flags;
> -
> -		spin_lock_irqsave(&dev->event_lock, flags);
> -		drm_crtc_send_vblank_event(crtc, flip_state->event);
> -		spin_unlock_irqrestore(&dev->event_lock, flags);
> -	}
> -
> -	drm_crtc_vblank_put(crtc);
> -	drm_framebuffer_put(flip_state->fb);
> -	kfree(flip_state);
> -
> -	up(&vc4->async_modeset);
> -}
> -
> -/* Implements async (non-vblank-synced) page flips.
> - *
> - * The page flip ioctl needs to return immediately, so we grab the
> - * modeset semaphore on the pipe, and queue the address update for
> - * when V3D is done with the BO being flipped to.
> - */
> -static int vc4_async_page_flip(struct drm_crtc *crtc,
> -			       struct drm_framebuffer *fb,
> -			       struct drm_pending_vblank_event *event,
> -			       uint32_t flags)
> -{
> -	struct drm_device *dev = crtc->dev;
> -	struct vc4_dev *vc4 = to_vc4_dev(dev);
> -	struct drm_plane *plane = crtc->primary;
> -	int ret = 0;
> -	struct vc4_async_flip_state *flip_state;
> -	struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
> -	struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
> -
> -	flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
> -	if (!flip_state)
> -		return -ENOMEM;
> -
> -	drm_framebuffer_get(fb);
> -	flip_state->fb = fb;
> -	flip_state->crtc = crtc;
> -	flip_state->event = event;
> -
> -	/* Make sure all other async modesetes have landed. */
> -	ret = down_interruptible(&vc4->async_modeset);
> -	if (ret) {
> -		drm_framebuffer_put(fb);
> -		kfree(flip_state);
> -		return ret;
> -	}
> -
> -	WARN_ON(drm_crtc_vblank_get(crtc) != 0);
> -
> -	/* Immediately update the plane's legacy fb pointer, so that later
> -	 * modeset prep sees the state that will be present when the semaphore
> -	 * is released.
> -	 */
> -	drm_atomic_set_fb_for_plane(plane->state, fb);
> -	plane->fb = fb;
> -
> -	vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
> -			   vc4_async_page_flip_complete);
> -
> -	/* Driver takes ownership of state on successful async commit. */
> -	return 0;
> -}
> -
> -static int vc4_page_flip(struct drm_crtc *crtc,
> -			 struct drm_framebuffer *fb,
> -			 struct drm_pending_vblank_event *event,
> -			 uint32_t flags,
> -			 struct drm_modeset_acquire_ctx *ctx)
> -{
> -	if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
> -		return vc4_async_page_flip(crtc, fb, event, flags);
> -	else
> -		return drm_atomic_helper_page_flip(crtc, fb, event, flags, ctx);
> -}
> -
>  static struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc)
>  {
>  	struct vc4_crtc_state *vc4_state;
> @@ -902,7 +801,7 @@ vc4_crtc_reset(struct drm_crtc *crtc)
>  static const struct drm_crtc_funcs vc4_crtc_funcs = {
>  	.set_config = drm_atomic_helper_set_config,
>  	.destroy = vc4_crtc_destroy,
> -	.page_flip = vc4_page_flip,
> +	.page_flip = drm_atomic_helper_page_flip,
>  	.set_property = NULL,
>  	.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
>  	.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
> diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
> index e791e498a3dd..faa2c26f1235 100644
> --- a/drivers/gpu/drm/vc4/vc4_kms.c
> +++ b/drivers/gpu/drm/vc4/vc4_kms.c
> @@ -25,33 +25,89 @@
>  #include "vc4_drv.h"
>  
>  static void
> -vc4_atomic_complete_commit(struct drm_atomic_state *state)
> +vc4_atomic_complete_commit(struct drm_atomic_state *state, bool async)
>  {
>  	struct drm_device *dev = state->dev;
>  	struct vc4_dev *vc4 = to_vc4_dev(dev);
>  
>  	drm_atomic_helper_wait_for_fences(dev, state, false);
>  
> -	drm_atomic_helper_wait_for_dependencies(state);
> +	if (async) {
> +		struct drm_plane_state *plane_state;
> +		struct drm_crtc_state *new_crtc_state;
> +		struct drm_plane *plane;
> +		struct drm_crtc *crtc;
> +		int i;
> +
> +		/* We need to wait for HW done before applying the new FBs
> +		 * otherwise our update might be overwritten by the previous
> +		 * commit.
> +		 */
> +		for_each_old_plane_in_state(state, plane, plane_state, i) {
> +			struct drm_crtc_commit *commit = plane_state->commit;
> +			int ret;
> +
> +			if (!commit)
> +				continue;
> +
> +			ret = wait_for_completion_timeout(&commit->hw_done,
> +							  10 * HZ);
> +			if (!ret)
> +				DRM_ERROR("[PLANE:%d:%s] hw_done timed out\n",
> +					  plane->base.id, plane->name);
> +		}
>  
> -	drm_atomic_helper_commit_modeset_disables(dev, state);
> +		/* FIXME:
> +		 * We could use drm_atomic_helper_async_commit() here, but
> +		 * VC4's ->atomic_async_update() implementation expects
> +		 * plane->state to point to the old_state and old/new states
> +		 * have already been swapped.
> +		 * Let's just call our custom function for now and see how we
> +		 * can make that more generic afterwards.
> +		 */
> +		for_each_new_plane_in_state(state, plane, plane_state, i)
> +			vc4_plane_async_set_fb(plane, plane_state->fb);
> +
> +		/* Now, send 'fake' VBLANK events to let the user now its
> +		 * pageflip is done. By fake I mean they are really VBLANK
> +		 * synchronized but it seems to be expected by the core.
> +		 */
> +		for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
> +			unsigned long flags;
> +
> +			if (!new_crtc_state->event)
> +				continue;
> +
> +			WARN_ON(drm_crtc_vblank_get(crtc));
> +			spin_lock_irqsave(&dev->event_lock, flags);
> +			drm_crtc_send_vblank_event(crtc, new_crtc_state->event);
> +			spin_unlock_irqrestore(&dev->event_lock, flags);
> +			drm_crtc_vblank_put(crtc);
> +		}
> +	} else {
> +		drm_atomic_helper_wait_for_dependencies(state);
>  
> -	drm_atomic_helper_commit_planes(dev, state, 0);
> +		drm_atomic_helper_commit_modeset_disables(dev, state);
>  
> -	drm_atomic_helper_commit_modeset_enables(dev, state);
> +		drm_atomic_helper_commit_planes(dev, state, 0);
>  
> -	/* Make sure that drm_atomic_helper_wait_for_vblanks()
> -	 * actually waits for vblank.  If we're doing a full atomic
> -	 * modeset (as opposed to a vc4_update_plane() short circuit),
> -	 * then we need to wait for scanout to be done with our
> -	 * display lists before we free it and potentially reallocate
> -	 * and overwrite the dlist memory with a new modeset.
> -	 */
> -	state->legacy_cursor_update = false;
> +		drm_atomic_helper_commit_modeset_enables(dev, state);
> +	}
>  
>  	drm_atomic_helper_commit_hw_done(state);
>  
> -	drm_atomic_helper_wait_for_vblanks(dev, state);
> +	if (!async) {
> +		/* Make sure that drm_atomic_helper_wait_for_vblanks()
> +		 * actually waits for vblank.  If we're doing a full atomic
> +		 * modeset (as opposed to a vc4_update_plane() short circuit),
> +		 * then we need to wait for scanout to be done with our
> +		 * display lists before we free it and potentially reallocate
> +		 * and overwrite the dlist memory with a new modeset.
> +		 */
> +		state->legacy_cursor_update = false;
> +
> +		drm_atomic_helper_wait_for_vblanks(dev, state);
> +	}
>  
>  	drm_atomic_helper_cleanup_planes(dev, state);
>  
> @@ -67,7 +123,20 @@ static void commit_work(struct work_struct *work)
>  	struct drm_atomic_state *state = container_of(work,
>  						      struct drm_atomic_state,
>  						      commit_work);
> -	vc4_atomic_complete_commit(state);
> +	struct drm_crtc_state *new_crtc_state;
> +	bool async_commit = true;
> +	struct drm_crtc *crtc;
> +	int i;
> +
> +	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
> +		if (new_crtc_state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC)
> +			continue;
> +
> +		async_commit = false;
> +		break;
> +	}
> +
> +	vc4_atomic_complete_commit(state, async_commit);
>  }
>  
>  /**
> @@ -163,7 +232,7 @@ static int vc4_atomic_commit(struct drm_device *dev,
>  	if (nonblock)
>  		queue_work(system_unbound_wq, &state->commit_work);
>  	else
> -		vc4_atomic_complete_commit(state);
> +		vc4_atomic_complete_commit(state, false);
>  
>  	return 0;
>  }

-- 
Boris Brezillon, Bootlin (formerly Free Electrons)
Embedded Linux and Kernel engineering
https://bootlin.com