[Intel-gfx] [PATCH 3/4] drm/i915: Enable asynchronous nuclear flips

Mon Feb 2 01:36:05 PST 2015

On Sat, Jan 31, 2015 at 12:07:56PM -0800, Matt Roper wrote:
> On Sat, Jan 31, 2015 at 10:30:29AM +0100, Daniel Vetter wrote:
> > On Fri, Jan 30, 2015 at 04:22:38PM -0800, Matt Roper wrote:
> > > The initial i915 nuclear pageflip support rejected asynchronous updates.
> > > Allow all work after we swap in the new state structures to run
> > > asynchronously.  We also need to start sending completion events to
> > > userspace if they were requested.
> > > 
> > > Signed-off-by: Matt Roper <matthew.d.roper at intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_drv.h     |   3 +
> > >  drivers/gpu/drm/i915/intel_atomic.c | 162 +++++++++++++++++++++++++++++++-----
> > >  drivers/gpu/drm/i915/intel_drv.h    |   8 ++
> > >  3 files changed, 150 insertions(+), 23 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index 8fad702..c7a520a 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -1777,6 +1777,9 @@ struct drm_i915_private {
> > >  	struct drm_crtc *pipe_to_crtc_mapping[I915_MAX_PIPES];
> > >  	wait_queue_head_t pending_flip_queue;
> > >  
> > > +	/* CRTC mask of pending atomic flips */
> > > +	uint32_t pending_atomic;
> > > +
> > >  #ifdef CONFIG_DEBUG_FS
> > >  	struct intel_pipe_crc pipe_crc[I915_MAX_PIPES];
> > >  #endif
> > > diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c
> > > index 19a9dd5..5dd7897 100644
> > > --- a/drivers/gpu/drm/i915/intel_atomic.c
> > > +++ b/drivers/gpu/drm/i915/intel_atomic.c
> > > @@ -76,6 +76,8 @@ int intel_atomic_check(struct drm_device *dev,
> > >  	state->allow_modeset = false;
> > >  	for (i = 0; i < ncrtcs; i++) {
> > >  		struct intel_crtc *crtc = to_intel_crtc(state->crtcs[i]);
> > > +		if (crtc)
> > > +			state->crtc_states[i]->enable = crtc->active;
> > >  		if (crtc && crtc->pipe != nuclear_pipe)
> > >  			not_nuclear = true;
> > >  	}
> > > @@ -96,6 +98,87 @@ int intel_atomic_check(struct drm_device *dev,
> > >  }
> > >  
> > >  
> > > +/*
> > > + * Wait until CRTC's have no pending flip, then atomically mark those CRTC's
> > > + * as busy.
> > > + */
> > > +static int wait_for_pending_flip(uint32_t crtc_mask,
> > > +				 struct intel_pending_atomic *commit)
> > > +{
> > > +	struct drm_i915_private *dev_priv = commit->dev->dev_private;
> > > +	int ret;
> > > +
> > > +	spin_lock_irq(&dev_priv->pending_flip_queue.lock);
> > > +	ret = wait_event_interruptible_locked(dev_priv->pending_flip_queue,
> > > +					      !(dev_priv->pending_atomic & crtc_mask));
> > > +	if (ret == 0)
> > > +		dev_priv->pending_atomic |= crtc_mask;
> > > +	spin_unlock_irq(&dev_priv->pending_flip_queue.lock);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +/* Finish pending flip operation on specified CRTC's */
> > > +static void flip_completion(struct intel_pending_atomic *commit)
> > > +{
> > > +	struct drm_i915_private *dev_priv = commit->dev->dev_private;
> > > +
> > > +	spin_lock_irq(&dev_priv->pending_flip_queue.lock);
> > > +	dev_priv->pending_atomic &= ~commit->crtc_mask;
> > > +	wake_up_all_locked(&dev_priv->pending_flip_queue);
> > > +	spin_unlock_irq(&dev_priv->pending_flip_queue.lock);
> > > +}
> > > +
> > > +/*
> > > + * Finish an atomic commit.  The work here can be performed asynchronously
> > > + * if desired.  The new state has already been applied to the DRM objects
> > > + * and no modeset locks are needed.
> > > + */
> > > +static void finish_atomic_commit(struct work_struct *work)
> > > +{
> > > +	struct intel_pending_atomic *commit =
> > > +		container_of(work, struct intel_pending_atomic, work);
> > > +	struct drm_device *dev = commit->dev;
> > > +	struct drm_crtc *crtc;
> > > +	struct drm_atomic_state *state = commit->state;
> > > +	int i;
> > > +
> > > +	/*
> > > +	 * FIXME:  The proper sequence here will eventually be:
> > > +	 *
> > > +	 * drm_atomic_helper_commit_pre_planes(dev, state);
> > > +	 * drm_atomic_helper_commit_planes(dev, state);
> > > +	 * drm_atomic_helper_commit_post_planes(dev, state);
> > > +	 * drm_atomic_helper_wait_for_vblanks(dev, state);
> > > +	 * drm_atomic_helper_cleanup_planes(dev, state);
> > > +	 * drm_atomic_state_free(state);
> > > +	 *
> > > +	 * once we have full atomic modeset.  For now, just manually update
> > > +	 * plane states to avoid clobbering good states with dummy states
> > > +	 * while nuclear pageflipping.
> > > +	 */
> > > +	drm_atomic_helper_commit_planes(dev, state);
> > > +	drm_atomic_helper_wait_for_vblanks(dev, state);
> > > +
> > > +	/* Send CRTC completion events. */
> > > +	for (i = 0; i < dev->mode_config.num_crtc; i++) {
> > > +		crtc = state->crtcs[i];
> > > +		if (crtc && crtc->state->event) {
> > > +			spin_lock_irq(&dev->event_lock);
> > > +			drm_send_vblank_event(dev, to_intel_crtc(crtc)->pipe,
> > > +					      crtc->state->event);
> > > +			spin_unlock_irq(&dev->event_lock);
> > > +			crtc->state->event = NULL;
> > > +		}
> > > +	}
> > > +
> > > +	drm_atomic_helper_cleanup_planes(dev, state);
> > > +	drm_atomic_state_free(state);
> > > +
> > > +	flip_completion(commit);
> > > +	kfree(commit);
> > > +}
> > > +
> > >  /**
> > >   * intel_atomic_commit - commit validated state object
> > >   * @dev: DRM device
> > > @@ -116,34 +199,48 @@ int intel_atomic_commit(struct drm_device *dev,
> > >  			struct drm_atomic_state *state,
> > >  			bool async)
> > >  {
> > > +	struct intel_pending_atomic *commit;
> > >  	int ret;
> > >  	int i;
> > >  
> > > -	if (async) {
> > > -		DRM_DEBUG_KMS("i915 does not yet support async commit\n");
> > > -		return -EINVAL;
> > > -	}
> > > -
> > >  	ret = drm_atomic_helper_prepare_planes(dev, state);
> > >  	if (ret)
> > >  		return ret;
> > >  
> > > -	/* Point of no return */
> > > +	commit = kzalloc(sizeof(*commit), GFP_KERNEL);
> > > +	if (!commit)
> > > +		return -ENOMEM;
> > > +
> > > +	commit->dev = dev;
> > > +	commit->state = state;
> > > +
> > > +	for (i = 0; i < dev->mode_config.num_crtc; i++)
> > > +		if (state->crtcs[i])
> > > +			commit->crtc_mask |=
> > > +				(1 << drm_crtc_index(state->crtcs[i]));
> > >  
> > >  	/*
> > > -	 * FIXME:  The proper sequence here will eventually be:
> > > -	 *
> > > -	 * drm_atomic_helper_swap_state(dev, state)
> > > -	 * drm_atomic_helper_commit_pre_planes(dev, state);
> > > -	 * drm_atomic_helper_commit_planes(dev, state);
> > > -	 * drm_atomic_helper_commit_post_planes(dev, state);
> > > -	 * drm_atomic_helper_wait_for_vblanks(dev, state);
> > > -	 * drm_atomic_helper_cleanup_planes(dev, state);
> > > -	 * drm_atomic_state_free(state);
> > > -	 *
> > > -	 * once we have full atomic modeset.  For now, just manually update
> > > -	 * plane states to avoid clobbering good states with dummy states
> > > -	 * while nuclear pageflipping.
> > > +	 * If there's already a flip pending, we won't schedule another one
> > > +	 * until it completes.  We may relax this in the future, but for now
> > > +	 * this matches the legacy pageflip behavior.
> > > +	 */
> > > +	ret = wait_for_pending_flip(commit->crtc_mask, commit);
> > > +	if (ret) {
> > > +		kfree(commit);
> > > +		return ret;
> > > +	}
> > > +
> > > +	/*
> > > +	 * Point of no return; everything from here on can't fail, so swap
> > > +	 * the new state into the DRM objects.
> > > +	 */
> > > +
> > > +	/*
> > > +	 * FIXME:  Should eventually use drm_atomic_helper_swap_state().
> > > +	 * However since we only handle planes at the moment, we don't
> > > +	 * want to clobber our good crtc state with something bogus,
> > > +	 * so just manually swap the plane states and copy over any completion
> > > +	 * events for CRTC's.
> > >  	 */
> > >  	for (i = 0; i < dev->mode_config.num_total_plane; i++) {
> > >  		struct drm_plane *plane = state->planes[i];
> > > @@ -155,10 +252,29 @@ int intel_atomic_commit(struct drm_device *dev,
> > >  		swap(state->plane_states[i], plane->state);
> > >  		plane->state->state = NULL;
> > >  	}
> > > -	drm_atomic_helper_commit_planes(dev, state);
> > > -	drm_atomic_helper_wait_for_vblanks(dev, state);
> > > -	drm_atomic_helper_cleanup_planes(dev, state);
> > > -	drm_atomic_state_free(state);
> > > +
> > > +	for (i = 0; i < dev->mode_config.num_crtc; i++) {
> > > +		struct drm_crtc *crtc = state->crtcs[i];
> > > +		struct drm_crtc_state *crtc_state = state->crtc_states[i];
> > > +
> > > +		if (!crtc || !crtc_state)
> > > +			continue;
> > > +
> > > +		/* n.b., we only copy the event and enabled flag for now */
> > > +		swap(crtc->state->event, crtc_state->event);
> > > +		swap(crtc->state->enable, crtc_state->enable);
> > > +	}
> > 
> > Before we can swap in the new state we need to stall for any oustanding
> > async flip to complete. At least until we have a full-blown flip queue.
> 
> I think the wait_for_pending_flip() call above the two comment blocks
> before the loop should take care of that --- it waits until all the
> CRTC's in commit->crtc_mask have no pending flip, then atomically marks
> them as busy before proceeding.

Right, completely missed that one, must have been blind. Looks sensible.

Another lesson learned from the old pageflip code to consider is that
free-standing flip/unpin works which only communicate their completion
status to the core of the driver through some bitmasks are a pain to
integrate into the shrinker. And even more so with atomic flips we must be
able to stall for the unpin to complete on any outstanding flips if we run
out of ggtt space.

Currently that's handled by restarting the entire ioctl from the eviction
code (see the EAGAIN in i915_gem_evict.c), which isn't terrible pretty.
Imo some explicit queue which the shrinker could stall on directly (and
maybe even process synchronously if needed) would be much better. With
atomic it should be possible since the async worker doesn't need any
modeset locks, only the dev->struct_mutex to unpin objects.

Anyway something to keep in mind, but maybe only once we have the full
flip queue in place. For now the important part is not to break things,
but I think we should have full coverage for everything in igt (but maybe
not in the limited prts subset).

Another thing I might or might not have missed is the async waiting.
Currently prepare_plane_fb does a synchronous stall for rendering, which
needs to be split like with the mmio_flip code which has an explicit
wait_request in the work function. Long-term I'd like to use the
plane_state->fence pointer, but unfortunately i915 reqeusts haven't been
subclassed properly yet. So we'd need our own request pointer for now - at
least those are refcounted already.

And one more wishlist: Would it be feasible to build up the atomic async
stuff from the mmio_flip code? Just for additional continuity and testing
...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch