[PATCH v5 22/22] drm/i915: Solve the GPU reset vs. modeset deadlocks with an rw_semaphore

ville.syrjala at linux.intel.com ville.syrjala at linux.intel.com
Thu Jul 6 20:24:42 UTC 2017


From: Ville Syrjälä <ville.syrjala at linux.intel.com>

Introduce an rw_semaphore to protect the display commits. All normal
commits use down_read() and hence can proceed in parallel, but GPU reset
will use down_write() making sure no other commits are in progress when
we have to pull the plug on the display engine on pre-g4x platforms.
There are no modeset/gem locks taken inside __intel_atomic_commit_tail()
itself, and we wait for all dependencies before the down_read(), and
thus there is no chance of deadlocks with this scheme.

During reset we will recommit the state that was commited last. Hence we
require tracking which state that was, and we need a way to duplicate
that state. That is now handled by the atomic core and helpers. We also
have to be sure that none of the commit codepaths look up
plane->state, crtc->state, etc. as that would lead us to pontetially
commit some future state prematurely. crtc->config is actually fine
since that gets update during the commit as well (although eventually
we do want to rid ourselves of crtc->config).

I left out the state readout from the post-reset display
reinitialization as that still likes to clobber crtc->state etc.
If we make it use a free standing atomic state and mke sure it doesn't
need any locks we could reintroduce it. For now I just mark the
post-reset display state as dirty as possible to make sure we
reinitialize everything.

One potential issue remains in the form of display detection. Either
we need to protect that with the same rw_semaphore as well, or perhaps
it would be enough to force gmbus into bitbanging mode while the reset
is happening and we don't have interrupts, and just across the actual
hardware GPU reset we could hold the gmbus mutex.

v2: Keep intel_prepare/finish_reset() outside struct_mutex (Chris)
v3: Drop all the committed_state refactoring to make this less
    obnoxious to backport (Daniel)
v4: Preserve the wedge timeout mechanism (Chris)
v5: Go back to the full committed state tracking since it actually
    is needed

Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h      |   2 +
 drivers/gpu/drm/i915/intel_display.c | 220 ++++++++++++++++++++++++-----------
 drivers/gpu/drm/i915/intel_sprite.c  |   1 +
 3 files changed, 157 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index baec61b078f5..432b356017ba 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2247,6 +2247,8 @@ struct drm_i915_private {
 	struct drm_atomic_state *modeset_restore_state;
 	struct drm_modeset_acquire_ctx reset_ctx;
 
+	struct rw_semaphore commit_sem;
+
 	struct list_head vm_list; /* Global list of all address spaces */
 	struct i915_ggtt ggtt; /* VM representing the global address space */
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e9c85d7cbb3e..e8ff5095ede3 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -123,6 +123,7 @@ static void ironlake_pfit_enable(struct intel_crtc *crtc);
 static void intel_modeset_setup_hw_state(struct drm_device *dev,
 					 struct drm_modeset_acquire_ctx *ctx);
 static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
+static void __intel_atomic_commit_tail(struct drm_atomic_state *state, bool is_reset);
 
 struct intel_limit {
 	struct {
@@ -3432,15 +3433,15 @@ static void intel_update_primary_planes(struct drm_device *dev)
 
 	for_each_crtc(dev, crtc) {
 		struct intel_plane *plane = to_intel_plane(crtc->primary);
-		struct intel_plane_state *plane_state =
-			to_intel_plane_state(plane->base.state);
+		const struct intel_plane_state *plane_state =
+			to_intel_plane_state(plane->base.committed_state);
 
 		if (plane_state->base.visible) {
 			trace_intel_update_plane(&plane->base,
 						 to_intel_crtc(crtc));
 
 			plane->update_plane(plane,
-					    to_intel_crtc_state(crtc->state),
+					    to_intel_crtc_state(crtc->committed_state),
 					    plane_state);
 		}
 	}
@@ -3491,27 +3492,97 @@ static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
 		INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv);
 }
 
-void intel_prepare_reset(struct drm_i915_private *dev_priv)
+static void init_intel_state(struct intel_atomic_state *state)
+{
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+	int i;
+
+	state->modeset = true;
+
+	for_each_oldnew_crtc_in_state(&state->base, crtc, old_crtc_state, new_crtc_state, i) {
+		if (new_crtc_state->active)
+			state->active_crtcs |= 1 << i;
+		else
+			state->active_crtcs &= ~(1 << i);
+
+		if (old_crtc_state->active != new_crtc_state->active)
+			state->active_pipe_changes |= drm_crtc_mask(crtc);
+	}
+}
+
+static struct drm_atomic_state *
+intel_duplicate_committed_state(struct drm_i915_private *dev_priv)
 {
-	struct drm_device *dev = &dev_priv->drm;
-	struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
 	struct drm_atomic_state *state;
-	int ret;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	int i;
 
-	/*
-	 * Need mode_config.mutex so that we don't
-	 * trample ongoing ->detect() and whatnot.
-	 */
-	mutex_lock(&dev->mode_config.mutex);
-	drm_modeset_acquire_init(ctx, 0);
-	while (1) {
-		ret = drm_modeset_lock_all_ctx(dev, ctx);
-		if (ret != -EDEADLK)
-			break;
+	state = drm_atomic_helper_duplicate_committed_state(&dev_priv->drm);
+	if (IS_ERR(state)) {
+		DRM_ERROR("Duplicating state failed with %ld\n",
+			  PTR_ERR(state));
+		return NULL;
+	}
+
+	/* handle private obj states */
+	/* FIXME could have the core track private objs too */
+	for (i = 0; i < I915_MAX_PORTS; i++) {
+		struct intel_digital_port *intel_dig_port =
+			dev_priv->hotplug.irq_port[i];
+
+		if (!intel_dig_port || !intel_dig_port->dp.can_mst)
+			continue;
+
+		drm_atomic_helper_duplicate_private_obj_committed_state(state,
+									&intel_dig_port->dp.mst_mgr.base);
+	}
+
+	to_intel_atomic_state(state)->active_crtcs = 0;
+	to_intel_atomic_state(state)->cdclk.logical = dev_priv->cdclk.hw;
+	to_intel_atomic_state(state)->cdclk.actual = dev_priv->cdclk.hw;
+
+	init_intel_state(to_intel_atomic_state(state));
+
+	/* force a full update */
+	for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
+		struct intel_crtc_state *intel_crtc_state =
+			to_intel_crtc_state(crtc_state);
+
+		if (!crtc_state->active)
+			continue;
+
+		crtc_state->mode_changed = true;
+		crtc_state->active_changed = true;
+		crtc_state->planes_changed = true;
+		crtc_state->connectors_changed = true;
+		crtc_state->color_mgmt_changed = true;
+		crtc_state->zpos_changed = true;
 
-		drm_modeset_backoff(ctx);
+		intel_crtc_state->update_pipe = true;
+		intel_crtc_state->disable_lp_wm = true;
+		intel_crtc_state->disable_cxsr = true;
+		intel_crtc_state->update_wm_post = true;
+		intel_crtc_state->fb_changed = true;
+		intel_crtc_state->fifo_changed = true;
+		intel_crtc_state->wm.need_postvbl_update = true;
 	}
 
+	return state;
+}
+
+void intel_prepare_reset(struct drm_i915_private *dev_priv)
+{
+	struct drm_atomic_state *disable_state, *restore_state = NULL;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	struct drm_plane *plane;
+	struct drm_plane_state *plane_state;
+	int i;
+
+	down_write(&dev_priv->commit_sem);
+
 	/* reset doesn't touch the display, but flips might get nuked anyway, */
 	if (!i915.force_reset_modeset_test &&
 	    !gpu_reset_clobbers_display(dev_priv))
@@ -3521,30 +3592,40 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv)
 	 * Disabling the crtcs gracefully seems nicer. Also the
 	 * g33 docs say we should at least disable all the planes.
 	 */
-	state = drm_atomic_helper_duplicate_state(dev, ctx);
-	if (IS_ERR(state)) {
-		ret = PTR_ERR(state);
-		DRM_ERROR("Duplicating state failed with %i\n", ret);
-		return;
-	}
+	disable_state = intel_duplicate_committed_state(dev_priv);
+	if (IS_ERR(disable_state))
+		goto out;
 
-	ret = drm_atomic_helper_disable_all(dev, ctx);
-	if (ret) {
-		DRM_ERROR("Suspending crtc's failed with %i\n", ret);
-		drm_atomic_state_put(state);
-		return;
-	}
+	to_intel_atomic_state(disable_state)->active_crtcs = 0;
 
-	dev_priv->modeset_restore_state = state;
-	state->acquire_ctx = ctx;
+	for_each_new_crtc_in_state(disable_state, crtc, crtc_state, i)
+		crtc_state->active = false;
+	for_each_new_plane_in_state(disable_state, plane, plane_state, i)
+		plane_state->visible = false;
+
+	__intel_atomic_commit_tail(disable_state, true);
+
+	drm_atomic_helper_clean_committed_state(disable_state);
+	drm_atomic_state_put(disable_state);
+
+	restore_state = intel_duplicate_committed_state(dev_priv);
+	if (IS_ERR(restore_state))
+		restore_state = NULL;
+
+	for_each_old_crtc_in_state(restore_state, crtc, crtc_state, i)
+		crtc_state->active = false;
+	for_each_old_plane_in_state(restore_state, plane, plane_state, i)
+		plane_state->visible = false;
+
+out:
+	dev_priv->modeset_restore_state = restore_state;
 }
 
 void intel_finish_reset(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
-	struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
-	struct drm_atomic_state *state = dev_priv->modeset_restore_state;
-	int ret;
+	struct drm_atomic_state *restore_state =
+		dev_priv->modeset_restore_state;
 
 	/*
 	 * Flips in the rings will be nuked by the reset,
@@ -3557,7 +3638,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 
 	/* reset doesn't touch the display */
 	if (!gpu_reset_clobbers_display(dev_priv)) {
-		if (!state) {
+		if (!restore_state) {
 			/*
 			 * Flips in the rings have been nuked by the reset,
 			 * so update the base address of all primary
@@ -3569,11 +3650,11 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 			 */
 			intel_update_primary_planes(dev);
 		} else {
-			ret = __intel_display_resume(dev, state, ctx);
-			if (ret)
-				DRM_ERROR("Restoring old state failed with %i\n", ret);
+			__intel_atomic_commit_tail(restore_state, true);
 		}
 	} else {
+		i915_redisable_vga(dev_priv);
+
 		/*
 		 * The display has been reset as well,
 		 * so need a full re-initialization.
@@ -3589,18 +3670,17 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 			dev_priv->display.hpd_irq_setup(dev_priv);
 		spin_unlock_irq(&dev_priv->irq_lock);
 
-		ret = __intel_display_resume(dev, state, ctx);
-		if (ret)
-			DRM_ERROR("Restoring old state failed with %i\n", ret);
+		__intel_atomic_commit_tail(restore_state, true);
 
 		intel_hpd_init(dev_priv);
 	}
 
-	if (state)
-		drm_atomic_state_put(state);
-	drm_modeset_drop_locks(ctx);
-	drm_modeset_acquire_fini(ctx);
-	mutex_unlock(&dev->mode_config.mutex);
+	if (restore_state) {
+		drm_atomic_helper_clean_committed_state(restore_state);
+		drm_atomic_state_put(restore_state);
+	}
+
+	up_write(&dev_priv->commit_sem);
 }
 
 static bool abort_flip_on_reset(struct intel_crtc *crtc)
@@ -12609,29 +12689,18 @@ static int intel_modeset_checks(struct drm_atomic_state *state)
 {
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	struct drm_i915_private *dev_priv = to_i915(state->dev);
-	struct drm_crtc *crtc;
-	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
-	int ret = 0, i;
+	int ret = 0;
 
 	if (!check_digital_port_conflicts(state)) {
 		DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n");
 		return -EINVAL;
 	}
 
-	intel_state->modeset = true;
 	intel_state->active_crtcs = dev_priv->active_crtcs;
 	intel_state->cdclk.logical = dev_priv->cdclk.logical;
 	intel_state->cdclk.actual = dev_priv->cdclk.actual;
 
-	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
-		if (new_crtc_state->active)
-			intel_state->active_crtcs |= 1 << i;
-		else
-			intel_state->active_crtcs &= ~(1 << i);
-
-		if (old_crtc_state->active != new_crtc_state->active)
-			intel_state->active_pipe_changes |= drm_crtc_mask(crtc);
-	}
+	init_intel_state(intel_state);
 
 	/*
 	 * See if the config requires any additional preparation, e.g.
@@ -13021,7 +13090,7 @@ static void intel_atomic_helper_free_state_worker(struct work_struct *work)
 	intel_atomic_helper_free_state(dev_priv);
 }
 
-static void __intel_atomic_commit_tail(struct drm_atomic_state *state)
+static void __intel_atomic_commit_tail(struct drm_atomic_state *state, bool is_reset)
 {
 	struct drm_device *dev = state->dev;
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
@@ -13085,10 +13154,18 @@ static void __intel_atomic_commit_tail(struct drm_atomic_state *state)
 
 	/* Only after disabling all output pipelines that will be changed can we
 	 * update the the output configuration. */
-	intel_modeset_update_crtc_state(state);
+	if (!is_reset)
+		intel_modeset_update_crtc_state(state);
 
 	if (intel_state->modeset) {
-		drm_atomic_helper_update_legacy_modeset_state(state->dev, state);
+		if (!is_reset) {
+			drm_atomic_helper_update_legacy_modeset_state(state->dev, state);
+		} else {
+			for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+				if (new_crtc_state->enable)
+					drm_calc_timestamping_constants(crtc, &new_crtc_state->adjusted_mode);
+			}
+		}
 
 		intel_set_cdclk(dev_priv, &dev_priv->cdclk.actual);
 
@@ -13099,7 +13176,8 @@ static void __intel_atomic_commit_tail(struct drm_atomic_state *state)
 		if (!intel_can_enable_sagv(state))
 			intel_disable_sagv(dev_priv);
 
-		intel_modeset_verify_disabled(dev, state);
+		if (!is_reset)
+			intel_modeset_verify_disabled(dev, state);
 	}
 
 	/* Complete the events for pipes that have now been disabled */
@@ -13152,7 +13230,8 @@ static void __intel_atomic_commit_tail(struct drm_atomic_state *state)
 		if (put_domains[i])
 			modeset_put_power_domains(dev_priv, put_domains[i]);
 
-		intel_modeset_verify_crtc(crtc, state, old_crtc_state, new_crtc_state);
+		if (!is_reset)
+			intel_modeset_verify_crtc(crtc, state, old_crtc_state, new_crtc_state);
 	}
 
 	if (intel_state->modeset && intel_can_enable_sagv(state))
@@ -13177,10 +13256,14 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 
 	drm_atomic_helper_wait_for_dependencies(state);
 
-	__intel_atomic_commit_tail(state);
+	down_read(&dev_priv->commit_sem);
+
+	__intel_atomic_commit_tail(state, false);
 
 	drm_atomic_helper_commit_hw_done(state);
 
+	up_read(&dev_priv->commit_sem);
+
 	mutex_lock(&dev->struct_mutex);
 	drm_atomic_helper_cleanup_planes(dev, state);
 	mutex_unlock(&dev->struct_mutex);
@@ -13779,6 +13862,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 	}
 
 	primary->base.state = &state->base;
+	primary->base.committed_state = &state->base;
 
 	primary->can_scale = false;
 	primary->max_downscale = 1;
@@ -13892,6 +13976,7 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv,
 	}
 
 	cursor->base.state = &state->base;
+	cursor->base.committed_state = &state->base;
 
 	cursor->can_scale = false;
 	cursor->max_downscale = 1;
@@ -13986,6 +14071,7 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
 	}
 	intel_crtc->config = crtc_state;
 	intel_crtc->base.state = &crtc_state->base;
+	intel_crtc->base.committed_state = &crtc_state->base;
 	crtc_state->base.crtc = &intel_crtc->base;
 
 	primary = intel_primary_plane_create(dev_priv, pipe);
@@ -15059,6 +15145,8 @@ int intel_modeset_init(struct drm_device *dev)
 	INIT_WORK(&dev_priv->atomic_helper.free_work,
 		  intel_atomic_helper_free_state_worker);
 
+	init_rwsem(&dev_priv->commit_sem);
+
 	intel_init_quirks(dev);
 
 	intel_init_pm(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index aed4fa833b48..9d68db31a0f1 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -1097,6 +1097,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 		goto fail;
 	}
 	intel_plane->base.state = &state->base;
+	intel_plane->base.committed_state = &state->base;
 
 	if (INTEL_GEN(dev_priv) >= 9) {
 		intel_plane->can_scale = true;
-- 
2.13.0



More information about the dri-devel mailing list