[PATCH v5 22/22] drm/i915: Solve the GPU reset vs. modeset deadlocks with an rw_semaphore

Daniel Vetter daniel at ffwll.ch
Mon Jul 10 06:48:24 UTC 2017


On Thu, Jul 06, 2017 at 11:24:42PM +0300, ville.syrjala at linux.intel.com wrote:
> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
> 
> Introduce an rw_semaphore to protect the display commits. All normal
> commits use down_read() and hence can proceed in parallel, but GPU reset
> will use down_write() making sure no other commits are in progress when
> we have to pull the plug on the display engine on pre-g4x platforms.
> There are no modeset/gem locks taken inside __intel_atomic_commit_tail()
> itself, and we wait for all dependencies before the down_read(), and
> thus there is no chance of deadlocks with this scheme.

After all these discussions I get the feeling the deadlocks is a lot more
involved than what I thought it is. But your patch doesn't really describe
it. Can you pls fix that?

> During reset we will recommit the state that was commited last. Hence we
> require tracking which state that was, and we need a way to duplicate
> that state. That is now handled by the atomic core and helpers. We also
> have to be sure that none of the commit codepaths look up
> plane->state, crtc->state, etc. as that would lead us to pontetially
> commit some future state prematurely. crtc->config is actually fine
> since that gets update during the commit as well (although eventually
> we do want to rid ourselves of crtc->config).
> 
> I left out the state readout from the post-reset display
> reinitialization as that still likes to clobber crtc->state etc.
> If we make it use a free standing atomic state and mke sure it doesn't
> need any locks we could reintroduce it. For now I just mark the
> post-reset display state as dirty as possible to make sure we
> reinitialize everything.
> 
> One potential issue remains in the form of display detection. Either
> we need to protect that with the same rw_semaphore as well, or perhaps
> it would be enough to force gmbus into bitbanging mode while the reset
> is happening and we don't have interrupts, and just across the actual
> hardware GPU reset we could hold the gmbus mutex.

Can you pls also elaborate on what's the problem here?

The other problem we've never fixed is that by shutting down the pipe we
also kill vblank interrupts, and userspace can notice that, just as an
aside.

Cheers, Daniel

> v2: Keep intel_prepare/finish_reset() outside struct_mutex (Chris)
> v3: Drop all the committed_state refactoring to make this less
>     obnoxious to backport (Daniel)
> v4: Preserve the wedge timeout mechanism (Chris)
> v5: Go back to the full committed state tracking since it actually
>     is needed
> 
> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_drv.h      |   2 +
>  drivers/gpu/drm/i915/intel_display.c | 220 ++++++++++++++++++++++++-----------
>  drivers/gpu/drm/i915/intel_sprite.c  |   1 +
>  3 files changed, 157 insertions(+), 66 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index baec61b078f5..432b356017ba 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2247,6 +2247,8 @@ struct drm_i915_private {
>  	struct drm_atomic_state *modeset_restore_state;
>  	struct drm_modeset_acquire_ctx reset_ctx;
>  
> +	struct rw_semaphore commit_sem;
> +
>  	struct list_head vm_list; /* Global list of all address spaces */
>  	struct i915_ggtt ggtt; /* VM representing the global address space */
>  
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index e9c85d7cbb3e..e8ff5095ede3 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -123,6 +123,7 @@ static void ironlake_pfit_enable(struct intel_crtc *crtc);
>  static void intel_modeset_setup_hw_state(struct drm_device *dev,
>  					 struct drm_modeset_acquire_ctx *ctx);
>  static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
> +static void __intel_atomic_commit_tail(struct drm_atomic_state *state, bool is_reset);
>  
>  struct intel_limit {
>  	struct {
> @@ -3432,15 +3433,15 @@ static void intel_update_primary_planes(struct drm_device *dev)
>  
>  	for_each_crtc(dev, crtc) {
>  		struct intel_plane *plane = to_intel_plane(crtc->primary);
> -		struct intel_plane_state *plane_state =
> -			to_intel_plane_state(plane->base.state);
> +		const struct intel_plane_state *plane_state =
> +			to_intel_plane_state(plane->base.committed_state);
>  
>  		if (plane_state->base.visible) {
>  			trace_intel_update_plane(&plane->base,
>  						 to_intel_crtc(crtc));
>  
>  			plane->update_plane(plane,
> -					    to_intel_crtc_state(crtc->state),
> +					    to_intel_crtc_state(crtc->committed_state),
>  					    plane_state);
>  		}
>  	}
> @@ -3491,27 +3492,97 @@ static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
>  		INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv);
>  }
>  
> -void intel_prepare_reset(struct drm_i915_private *dev_priv)
> +static void init_intel_state(struct intel_atomic_state *state)
> +{
> +	struct drm_crtc *crtc;
> +	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
> +	int i;
> +
> +	state->modeset = true;
> +
> +	for_each_oldnew_crtc_in_state(&state->base, crtc, old_crtc_state, new_crtc_state, i) {
> +		if (new_crtc_state->active)
> +			state->active_crtcs |= 1 << i;
> +		else
> +			state->active_crtcs &= ~(1 << i);
> +
> +		if (old_crtc_state->active != new_crtc_state->active)
> +			state->active_pipe_changes |= drm_crtc_mask(crtc);
> +	}
> +}
> +
> +static struct drm_atomic_state *
> +intel_duplicate_committed_state(struct drm_i915_private *dev_priv)
>  {
> -	struct drm_device *dev = &dev_priv->drm;
> -	struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
>  	struct drm_atomic_state *state;
> -	int ret;
> +	struct drm_crtc *crtc;
> +	struct drm_crtc_state *crtc_state;
> +	int i;
>  
> -	/*
> -	 * Need mode_config.mutex so that we don't
> -	 * trample ongoing ->detect() and whatnot.
> -	 */
> -	mutex_lock(&dev->mode_config.mutex);
> -	drm_modeset_acquire_init(ctx, 0);
> -	while (1) {
> -		ret = drm_modeset_lock_all_ctx(dev, ctx);
> -		if (ret != -EDEADLK)
> -			break;
> +	state = drm_atomic_helper_duplicate_committed_state(&dev_priv->drm);
> +	if (IS_ERR(state)) {
> +		DRM_ERROR("Duplicating state failed with %ld\n",
> +			  PTR_ERR(state));
> +		return NULL;
> +	}
> +
> +	/* handle private obj states */
> +	/* FIXME could have the core track private objs too */
> +	for (i = 0; i < I915_MAX_PORTS; i++) {
> +		struct intel_digital_port *intel_dig_port =
> +			dev_priv->hotplug.irq_port[i];
> +
> +		if (!intel_dig_port || !intel_dig_port->dp.can_mst)
> +			continue;
> +
> +		drm_atomic_helper_duplicate_private_obj_committed_state(state,
> +									&intel_dig_port->dp.mst_mgr.base);
> +	}
> +
> +	to_intel_atomic_state(state)->active_crtcs = 0;
> +	to_intel_atomic_state(state)->cdclk.logical = dev_priv->cdclk.hw;
> +	to_intel_atomic_state(state)->cdclk.actual = dev_priv->cdclk.hw;
> +
> +	init_intel_state(to_intel_atomic_state(state));
> +
> +	/* force a full update */
> +	for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
> +		struct intel_crtc_state *intel_crtc_state =
> +			to_intel_crtc_state(crtc_state);
> +
> +		if (!crtc_state->active)
> +			continue;
> +
> +		crtc_state->mode_changed = true;
> +		crtc_state->active_changed = true;
> +		crtc_state->planes_changed = true;
> +		crtc_state->connectors_changed = true;
> +		crtc_state->color_mgmt_changed = true;
> +		crtc_state->zpos_changed = true;
>  
> -		drm_modeset_backoff(ctx);
> +		intel_crtc_state->update_pipe = true;
> +		intel_crtc_state->disable_lp_wm = true;
> +		intel_crtc_state->disable_cxsr = true;
> +		intel_crtc_state->update_wm_post = true;
> +		intel_crtc_state->fb_changed = true;
> +		intel_crtc_state->fifo_changed = true;
> +		intel_crtc_state->wm.need_postvbl_update = true;
>  	}
>  
> +	return state;
> +}
> +
> +void intel_prepare_reset(struct drm_i915_private *dev_priv)
> +{
> +	struct drm_atomic_state *disable_state, *restore_state = NULL;
> +	struct drm_crtc *crtc;
> +	struct drm_crtc_state *crtc_state;
> +	struct drm_plane *plane;
> +	struct drm_plane_state *plane_state;
> +	int i;
> +
> +	down_write(&dev_priv->commit_sem);
> +
>  	/* reset doesn't touch the display, but flips might get nuked anyway, */
>  	if (!i915.force_reset_modeset_test &&
>  	    !gpu_reset_clobbers_display(dev_priv))
> @@ -3521,30 +3592,40 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv)
>  	 * Disabling the crtcs gracefully seems nicer. Also the
>  	 * g33 docs say we should at least disable all the planes.
>  	 */
> -	state = drm_atomic_helper_duplicate_state(dev, ctx);
> -	if (IS_ERR(state)) {
> -		ret = PTR_ERR(state);
> -		DRM_ERROR("Duplicating state failed with %i\n", ret);
> -		return;
> -	}
> +	disable_state = intel_duplicate_committed_state(dev_priv);
> +	if (IS_ERR(disable_state))
> +		goto out;
>  
> -	ret = drm_atomic_helper_disable_all(dev, ctx);
> -	if (ret) {
> -		DRM_ERROR("Suspending crtc's failed with %i\n", ret);
> -		drm_atomic_state_put(state);
> -		return;
> -	}
> +	to_intel_atomic_state(disable_state)->active_crtcs = 0;
>  
> -	dev_priv->modeset_restore_state = state;
> -	state->acquire_ctx = ctx;
> +	for_each_new_crtc_in_state(disable_state, crtc, crtc_state, i)
> +		crtc_state->active = false;
> +	for_each_new_plane_in_state(disable_state, plane, plane_state, i)
> +		plane_state->visible = false;
> +
> +	__intel_atomic_commit_tail(disable_state, true);
> +
> +	drm_atomic_helper_clean_committed_state(disable_state);
> +	drm_atomic_state_put(disable_state);
> +
> +	restore_state = intel_duplicate_committed_state(dev_priv);
> +	if (IS_ERR(restore_state))
> +		restore_state = NULL;
> +
> +	for_each_old_crtc_in_state(restore_state, crtc, crtc_state, i)
> +		crtc_state->active = false;
> +	for_each_old_plane_in_state(restore_state, plane, plane_state, i)
> +		plane_state->visible = false;
> +
> +out:
> +	dev_priv->modeset_restore_state = restore_state;
>  }
>  
>  void intel_finish_reset(struct drm_i915_private *dev_priv)
>  {
>  	struct drm_device *dev = &dev_priv->drm;
> -	struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
> -	struct drm_atomic_state *state = dev_priv->modeset_restore_state;
> -	int ret;
> +	struct drm_atomic_state *restore_state =
> +		dev_priv->modeset_restore_state;
>  
>  	/*
>  	 * Flips in the rings will be nuked by the reset,
> @@ -3557,7 +3638,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
>  
>  	/* reset doesn't touch the display */
>  	if (!gpu_reset_clobbers_display(dev_priv)) {
> -		if (!state) {
> +		if (!restore_state) {
>  			/*
>  			 * Flips in the rings have been nuked by the reset,
>  			 * so update the base address of all primary
> @@ -3569,11 +3650,11 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
>  			 */
>  			intel_update_primary_planes(dev);
>  		} else {
> -			ret = __intel_display_resume(dev, state, ctx);
> -			if (ret)
> -				DRM_ERROR("Restoring old state failed with %i\n", ret);
> +			__intel_atomic_commit_tail(restore_state, true);
>  		}
>  	} else {
> +		i915_redisable_vga(dev_priv);
> +
>  		/*
>  		 * The display has been reset as well,
>  		 * so need a full re-initialization.
> @@ -3589,18 +3670,17 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
>  			dev_priv->display.hpd_irq_setup(dev_priv);
>  		spin_unlock_irq(&dev_priv->irq_lock);
>  
> -		ret = __intel_display_resume(dev, state, ctx);
> -		if (ret)
> -			DRM_ERROR("Restoring old state failed with %i\n", ret);
> +		__intel_atomic_commit_tail(restore_state, true);
>  
>  		intel_hpd_init(dev_priv);
>  	}
>  
> -	if (state)
> -		drm_atomic_state_put(state);
> -	drm_modeset_drop_locks(ctx);
> -	drm_modeset_acquire_fini(ctx);
> -	mutex_unlock(&dev->mode_config.mutex);
> +	if (restore_state) {
> +		drm_atomic_helper_clean_committed_state(restore_state);
> +		drm_atomic_state_put(restore_state);
> +	}
> +
> +	up_write(&dev_priv->commit_sem);
>  }
>  
>  static bool abort_flip_on_reset(struct intel_crtc *crtc)
> @@ -12609,29 +12689,18 @@ static int intel_modeset_checks(struct drm_atomic_state *state)
>  {
>  	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
>  	struct drm_i915_private *dev_priv = to_i915(state->dev);
> -	struct drm_crtc *crtc;
> -	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
> -	int ret = 0, i;
> +	int ret = 0;
>  
>  	if (!check_digital_port_conflicts(state)) {
>  		DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n");
>  		return -EINVAL;
>  	}
>  
> -	intel_state->modeset = true;
>  	intel_state->active_crtcs = dev_priv->active_crtcs;
>  	intel_state->cdclk.logical = dev_priv->cdclk.logical;
>  	intel_state->cdclk.actual = dev_priv->cdclk.actual;
>  
> -	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
> -		if (new_crtc_state->active)
> -			intel_state->active_crtcs |= 1 << i;
> -		else
> -			intel_state->active_crtcs &= ~(1 << i);
> -
> -		if (old_crtc_state->active != new_crtc_state->active)
> -			intel_state->active_pipe_changes |= drm_crtc_mask(crtc);
> -	}
> +	init_intel_state(intel_state);
>  
>  	/*
>  	 * See if the config requires any additional preparation, e.g.
> @@ -13021,7 +13090,7 @@ static void intel_atomic_helper_free_state_worker(struct work_struct *work)
>  	intel_atomic_helper_free_state(dev_priv);
>  }
>  
> -static void __intel_atomic_commit_tail(struct drm_atomic_state *state)
> +static void __intel_atomic_commit_tail(struct drm_atomic_state *state, bool is_reset)
>  {
>  	struct drm_device *dev = state->dev;
>  	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
> @@ -13085,10 +13154,18 @@ static void __intel_atomic_commit_tail(struct drm_atomic_state *state)
>  
>  	/* Only after disabling all output pipelines that will be changed can we
>  	 * update the the output configuration. */
> -	intel_modeset_update_crtc_state(state);
> +	if (!is_reset)
> +		intel_modeset_update_crtc_state(state);
>  
>  	if (intel_state->modeset) {
> -		drm_atomic_helper_update_legacy_modeset_state(state->dev, state);
> +		if (!is_reset) {
> +			drm_atomic_helper_update_legacy_modeset_state(state->dev, state);
> +		} else {
> +			for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
> +				if (new_crtc_state->enable)
> +					drm_calc_timestamping_constants(crtc, &new_crtc_state->adjusted_mode);
> +			}
> +		}
>  
>  		intel_set_cdclk(dev_priv, &dev_priv->cdclk.actual);
>  
> @@ -13099,7 +13176,8 @@ static void __intel_atomic_commit_tail(struct drm_atomic_state *state)
>  		if (!intel_can_enable_sagv(state))
>  			intel_disable_sagv(dev_priv);
>  
> -		intel_modeset_verify_disabled(dev, state);
> +		if (!is_reset)
> +			intel_modeset_verify_disabled(dev, state);
>  	}
>  
>  	/* Complete the events for pipes that have now been disabled */
> @@ -13152,7 +13230,8 @@ static void __intel_atomic_commit_tail(struct drm_atomic_state *state)
>  		if (put_domains[i])
>  			modeset_put_power_domains(dev_priv, put_domains[i]);
>  
> -		intel_modeset_verify_crtc(crtc, state, old_crtc_state, new_crtc_state);
> +		if (!is_reset)
> +			intel_modeset_verify_crtc(crtc, state, old_crtc_state, new_crtc_state);
>  	}
>  
>  	if (intel_state->modeset && intel_can_enable_sagv(state))
> @@ -13177,10 +13256,14 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
>  
>  	drm_atomic_helper_wait_for_dependencies(state);
>  
> -	__intel_atomic_commit_tail(state);
> +	down_read(&dev_priv->commit_sem);
> +
> +	__intel_atomic_commit_tail(state, false);
>  
>  	drm_atomic_helper_commit_hw_done(state);
>  
> +	up_read(&dev_priv->commit_sem);
> +
>  	mutex_lock(&dev->struct_mutex);
>  	drm_atomic_helper_cleanup_planes(dev, state);
>  	mutex_unlock(&dev->struct_mutex);
> @@ -13779,6 +13862,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
>  	}
>  
>  	primary->base.state = &state->base;
> +	primary->base.committed_state = &state->base;
>  
>  	primary->can_scale = false;
>  	primary->max_downscale = 1;
> @@ -13892,6 +13976,7 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv,
>  	}
>  
>  	cursor->base.state = &state->base;
> +	cursor->base.committed_state = &state->base;
>  
>  	cursor->can_scale = false;
>  	cursor->max_downscale = 1;
> @@ -13986,6 +14071,7 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
>  	}
>  	intel_crtc->config = crtc_state;
>  	intel_crtc->base.state = &crtc_state->base;
> +	intel_crtc->base.committed_state = &crtc_state->base;
>  	crtc_state->base.crtc = &intel_crtc->base;
>  
>  	primary = intel_primary_plane_create(dev_priv, pipe);
> @@ -15059,6 +15145,8 @@ int intel_modeset_init(struct drm_device *dev)
>  	INIT_WORK(&dev_priv->atomic_helper.free_work,
>  		  intel_atomic_helper_free_state_worker);
>  
> +	init_rwsem(&dev_priv->commit_sem);
> +
>  	intel_init_quirks(dev);
>  
>  	intel_init_pm(dev_priv);
> diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
> index aed4fa833b48..9d68db31a0f1 100644
> --- a/drivers/gpu/drm/i915/intel_sprite.c
> +++ b/drivers/gpu/drm/i915/intel_sprite.c
> @@ -1097,6 +1097,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
>  		goto fail;
>  	}
>  	intel_plane->base.state = &state->base;
> +	intel_plane->base.committed_state = &state->base;
>  
>  	if (INTEL_GEN(dev_priv) >= 9) {
>  		intel_plane->can_scale = true;
> -- 
> 2.13.0
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the dri-devel mailing list