[Intel-gfx] [PATCH v2 3/5] drm/i915: Move SST DP link retraining into the ->post_hotplug() hook

Lyude Paul lyude at redhat.com
Wed Feb 28 19:07:34 UTC 2018


On Tue, 2018-02-27 at 23:17 -0800, Manasi Navare wrote:
> Ville,  thanks for the patch and
> Sorry for not being able to review this earlier.
> Please find some comments below:
> 
> On Wed, Jan 31, 2018 at 03:27:10PM +0200, Ville Syrjälä wrote:
> > On Tue, Jan 30, 2018 at 06:16:59PM -0500, Lyude Paul wrote:
> > > On Wed, 2018-01-17 at 21:21 +0200, Ville Syrjala wrote:
> > > > From: Ville Syrjälä <ville.syrjala at linux.intel.com>
> > > > 
> > > > Doing link retraining from the short pulse handler is problematic
> > > > since
> > > > that might introduce deadlocks with MST sideband processing. Currently
> > > > we don't retrain MST links from this code, but we want to change that.
> > > > So better to move the entire thing to the hotplug work. We can utilize
> > > > the new encoder->hotplug() hook for this.
> > > > 
> > > > The only thing we leave in the short pulse handler is the link status
> > > > check. That one still depends on the link parameters stored under
> > > > intel_dp, so no locking around that but races should be mostly
> > > > harmless
> > > > as the actual retraining code will recheck the link state if we
> > > > end up there by mistake.
> > > > 
> > > > v2: Rebase due to ->post_hotplug() now being just ->hotplug()
> > > >     Check the connector type to figure out if we should do
> > > >     the HDMI thing or the DP think for DDI
> > > > 
> > > > Cc: Manasi Navare <manasi.d.navare at intel.com>
> > > > Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> > > > Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/intel_ddi.c |  10 +-
> > > >  drivers/gpu/drm/i915/intel_dp.c  | 196 ++++++++++++++++++++++------
> > > > --------
> > > > ---
> > > >  drivers/gpu/drm/i915/intel_drv.h |   2 +
> > > >  3 files changed, 120 insertions(+), 88 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/intel_ddi.c
> > > > b/drivers/gpu/drm/i915/intel_ddi.c
> > > > index 25793bdc692f..5f3d58f1ae6e 100644
> > > > --- a/drivers/gpu/drm/i915/intel_ddi.c
> > > > +++ b/drivers/gpu/drm/i915/intel_ddi.c
> > > > @@ -2880,7 +2880,10 @@ static bool intel_ddi_hotplug(struct
> > > > intel_encoder
> > > > *encoder,
> > > >  	drm_modeset_acquire_init(&ctx, 0);
> > > >  
> > > >  	for (;;) {
> > > > -		ret = intel_hdmi_reset_link(encoder, &ctx);
> > > > +		if (connector->base.connector_type ==
> > > > DRM_MODE_CONNECTOR_HDMIA)
> > > > +			ret = intel_hdmi_reset_link(encoder, &ctx);
> > > > +		else
> > > > +			ret = intel_dp_retrain_link(encoder, &ctx);
> > > >  
> > > >  		if (ret == -EDEADLK) {
> > > >  			drm_modeset_backoff(&ctx);
> > > > @@ -3007,10 +3010,7 @@ void intel_ddi_init(struct drm_i915_private
> > > > *dev_priv,
> > > > enum port port)
> > > >  	drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs,
> > > >  			 DRM_MODE_ENCODER_TMDS, "DDI %c",
> > > > port_name(port));
> > > >  
> > > > -	if (init_hdmi)
> > > > -		intel_encoder->hotplug = intel_ddi_hotplug;
> > > > -	else
> > > > -		intel_encoder->hotplug = intel_encoder_hotplug;
> > > > +	intel_encoder->hotplug = intel_ddi_hotplug;
> > > >  	intel_encoder->compute_output_type =
> > > > intel_ddi_compute_output_type;
> > > >  	intel_encoder->compute_config = intel_ddi_compute_config;
> > > >  	intel_encoder->enable = intel_enable_ddi;
> > > > diff --git a/drivers/gpu/drm/i915/intel_dp.c
> > > > b/drivers/gpu/drm/i915/intel_dp.c
> > > > index 6bbf14410c2a..152016e09a11 100644
> > > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > > @@ -4275,12 +4275,83 @@ intel_dp_check_mst_status(struct intel_dp
> > > > *intel_dp)
> > > >  	return -EINVAL;
> > > >  }
> > > >  
> > > > -static void
> > > > -intel_dp_retrain_link(struct intel_dp *intel_dp)
> > > > +static bool
> > > > +intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
> > > > +{
> > > > +	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > +
> > > > +	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > > > +		DRM_ERROR("Failed to get link status\n");
> > > > +		return false;
> > > > +	}
> > > > +
> > > > +	/*
> > > > +	 * Validate the cached values of intel_dp->link_rate and
> > > > +	 * intel_dp->lane_count before attempting to retrain.
> > > > +	 */
> > > > +	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > >link_rate,
> > > > +					intel_dp->lane_count))
> > > > +		return false;
> > > > +
> > > > +	/* Retrain if Channel EQ or CR not ok */
> > > > +	return !drm_dp_channel_eq_ok(link_status, intel_dp-
> > > > >lane_count);
> > > > +}
> > > > +
> > > > +/*
> > > > + * If display is now connected check links status,
> > > > + * there has been known issues of link loss triggering
> > > > + * long pulse.
> > > > + *
> > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > + * end up with HPD going low during a modeset, and then
> > > > + * going back up soon after. And once that happens we must
> > > > + * retrain the link to get a picture. That's in case no
> > > > + * userspace component reacted to intermittent HPD dip.
> > > > + */
> > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > +			  struct drm_modeset_acquire_ctx *ctx)
> > > >  {
> > > > -	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)-
> > > > >base;
> > > >  	struct drm_i915_private *dev_priv = to_i915(encoder-
> > > > >base.dev);
> > > > -	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
> > > > +	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
> > > > +	struct intel_connector *connector = intel_dp-
> > > > >attached_connector;
> > > > +	struct drm_connector_state *conn_state;
> > > > +	struct intel_crtc_state *crtc_state;
> > > > +	struct intel_crtc *crtc;
> > > > +	int ret;
> > > > +
> > > > +	/* FIXME handle the MST connectors as well */
> > > > +
> > > > +	if (!connector || connector->base.status !=
> > > > connector_status_connected)
> > > > +		return 0;
> > > > +
> > > > +	ret = drm_modeset_lock(&dev_priv-
> > > > >drm.mode_config.connection_mutex,
> > > > ctx);
> > > > +	if (ret)
> > > > +		return ret;
> > > > +
> > > > +	conn_state = connector->base.state;
> > > > +
> > > > +	crtc = to_intel_crtc(conn_state->crtc);
> > > > +	if (!crtc)
> > > > +		return 0;
> > > > +
> > > > +	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
> > > > +	if (ret)
> > > > +		return ret;
> > > > +
> > > > +	crtc_state = to_intel_crtc_state(crtc->base.state);
> > > > +
> > > > +	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
> > > > +
> > > > +	if (!crtc_state->base.active)
> > > > +		return 0;
> > > > +
> > > > +	if (conn_state->commit &&
> > > > +	    !try_wait_for_completion(&conn_state->commit->hw_done))
> > > > +		return 0;
> > > > +
> > > > +	if (!intel_dp_needs_link_retrain(intel_dp))
> > > > +		return 0;
> > > 
> > > NAK, this definitely won't work for implementing MST retraining. There's
> > > some
> > > pretty huge differences with how retraining needs to be handled on SST
> > > vs. MST.
> > > An example with some normal SST sink vs. what happens on my caldigit TS3
> > > 
> > > SST:
> > >     1. commit modeset, everything is OK
> > >     2. something happens, sink sends shortpulse and changes link status
> > > registers
> > >     in dpcd
> > >     3. Source receives short pulse, tries retraining five times
> > >     4. if this succeeds:
> > >         5. we're done here
> > >     6. if this fails:
> > >         7. mark link status as bad
> > >         8. get fallback parameters
> > >         9. hotplug event
> > > 
> > > MST (i915 doesn't do this yet, but this is generally how it needs to be
> > > handled):
> > >     1. commit modeset, everything is OK
> > >     2. something happens (in my case, the MST hub discovers it had the
> > > wrong max
> > >     link rate/lane count), sink sends ESI indicating channel EQ has
> > > failed
> > >     3. retraining commences with five retries.
> > >     4. if this succeeds:
> > >        5. continue
> > >     6. if this fails (I actually haven't seen this once yet)
> > >         7. mark link status as bad on all downstream connectors
> > >         8. get fallback parameters
> > >         9. hotplug event
> > >     10. the retrain didn't actually work (despite what the SST link
> > > status
> > >     registers told us). go back to step 3 five more times
> > >     11. if this fails:
> > >         12. mark link status as bad on all downstream connectors
> > >         13. get fallback parameters
> > >         14. hotplug event
> > > 
> > > simply put: we really should keep the "do we need to retrain?" logic out
> > > of the
> > > actual retraining helpers so that SST/MST codepaths can do their own
> > > checks to
> > > figure this out.
> > 
> > No, we need it since we want to check it *after* any modeset has
> > finished. With MST I think what we'll want to do is find all the pipes
> > affected by the link failure, lock them, and wait until they're done
> > with their modesets, then we check the link state. If it's bad we
> > proceed to retrain the link.
> > 
> > So basically just walking over the MST encoders in addition to the
> > SST encoder, and repeating most of the steps in this code for each.
> > Hence the the MST FIXME I left in there ;)
> > 
> 
> Lyude,
> 
> I agree with Ville here, can we add the MST retrain required check from
> within the intel_dp_retrain_link()? So for now the FIXME should be left
> there
> and MST retraining check can be added from your patches.
> 
> Manasi

Yep! Sorry I probably should have reiterated this part over email since I only
mentioned it to ville directly: this part should definitely work fine, and
intel_dp_retrain_link() is actually where we handle MST retraining in the
latest version of the retraining series:

jfyi, you can find that here:
https://github.com/Lyude/linux/tree/wip/i915-mst-fallback-a4
It's almost ready for the ML, just want to get the other patches this is going
to rely on in first and fix some small concerns I've got about that series'
current implementation of handling modesets after a failed retrain sequence
> 
> > > 
> > > >  
> > > >  	/* Suppress underruns caused by re-training */
> > > >  	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe,
> > > > false);
> > > > @@ -4298,51 +4369,49 @@ intel_dp_retrain_link(struct intel_dp
> > > > *intel_dp)
> > > >  	if (crtc->config->has_pch_encoder)
> > > >  		intel_set_pch_fifo_underrun_reporting(dev_priv,
> > > >  						      intel_crtc_pch_
> > > > transcod
> > > > er(crtc), true);
> > > > +
> > > > +	return 0;
> > > >  }
> > > >  
> > > > -static void
> > > > -intel_dp_check_link_status(struct intel_dp *intel_dp)
> > > > +/*
> > > > + * If display is now connected check links status,
> > > > + * there has been known issues of link loss triggering
> > > > + * long pulse.
> > > > + *
> > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > + * end up with HPD going low during a modeset, and then
> > > > + * going back up soon after. And once that happens we must
> > > > + * retrain the link to get a picture. That's in case no
> > > > + * userspace component reacted to intermittent HPD dip.
> > > > + */
> > > > +static bool intel_dp_hotplug(struct intel_encoder *encoder,
> > > > +			     struct intel_connector *connector)
> > > >  {
> > > > -	struct drm_i915_private *dev_priv =
> > > > to_i915(intel_dp_to_dev(intel_dp));
> > > > -	struct intel_encoder *intel_encoder =
> > > > &dp_to_dig_port(intel_dp)-
> > > > > base;
> > > > 
> > > > -	struct drm_connector_state *conn_state =
> > > > -		intel_dp->attached_connector->base.state;
> > > > -	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > -
> > > > -	WARN_ON(!drm_modeset_is_locked(&dev_priv-
> > > > > drm.mode_config.connection_mutex));
> > > > 
> > > > -
> > > > -	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > > > -		DRM_ERROR("Failed to get link status\n");
> > > > -		return;
> > > > -	}
> > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > +	bool changed;
> > > > +	int ret;
> > > >  
> > > > -	if (!conn_state->crtc)
> > > > -		return;
> > > > +	changed = intel_encoder_hotplug(encoder, connector);
> > > >  
> > > > -	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex));
> > > > +	drm_modeset_acquire_init(&ctx, 0);
> > > >  
> > > > -	if (!conn_state->crtc->state->active)
> > > > -		return;
> > > > +	for (;;) {
> 
> Here if this is getting executed due to hpd ping pong during the modeset
> and that modeset is already happening at a link fallback parameter then
> while we call retrain link, we should also validate the link parameters
> so that it doesnt try to retrain with stale values.
> 
> I think we need to call intel_dp_link_params_valid() before retrain.
> 
> > > > +		ret = intel_dp_retrain_link(encoder, &ctx);
> > > >  
> > > > -	if (conn_state->commit &&
> > > > -	    !try_wait_for_completion(&conn_state->commit->hw_done))
> > > > -		return;
> > > > +		if (ret == -EDEADLK) {
> > > > +			drm_modeset_backoff(&ctx);
> > > > +			continue;
> > > > +		}
> > > >  
> > > > -	/*
> > > > -	 * Validate the cached values of intel_dp->link_rate and
> > > > -	 * intel_dp->lane_count before attempting to retrain.
> > > > -	 */
> > > > -	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > >link_rate,
> > > > -					intel_dp->lane_count))
> > > > -		return;
> > > > +		break;
> > > > +	}
> > > >  
> > > > -	/* Retrain if Channel EQ or CR not ok */
> > > > -	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) 
> > > > {
> > > > -		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
> > > > -			      intel_encoder->base.name);
> > > > +	drm_modeset_drop_locks(&ctx);
> > > > +	drm_modeset_acquire_fini(&ctx);
> > > > +	WARN(ret, "Acquiring modeset locks failed with %i\n", ret);
> > > >  
> > > > -		intel_dp_retrain_link(intel_dp);
> > > > -	}
> > > > +	return changed;
> > > >  }
> > > >  
> > > >  /*
> > > > @@ -4400,7 +4469,9 @@ intel_dp_short_pulse(struct intel_dp *intel_dp)
> > > >  			DRM_DEBUG_DRIVER("CP or sink specific irq
> > > > unhandled\n");
> > > >  	}
> > > >  
> > > > -	intel_dp_check_link_status(intel_dp);
> > > > +	/* defer to the hotplug work for link retraining if needed */
> > > > +	if (intel_dp_needs_link_retrain(intel_dp))
> > > > +		return false;
> > > >  
> > > >  	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING)
> > > > {
> > > >  		DRM_DEBUG_KMS("Link Training Compliance Test
> > > > requested\n");
> > > > @@ -4785,20 +4856,6 @@ intel_dp_long_pulse(struct intel_connector
> > > > *connector)
> > > >  		 */
> > > >  		status = connector_status_disconnected;
> > > >  		goto out;
> > > > -	} else {
> > > > -		/*
> > > > -		 * If display is now connected check links status,
> > > > -		 * there has been known issues of link loss
> > > > triggerring
> > > > -		 * long pulse.
> > > > -		 *
> > > > -		 * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > -		 * weird HPD ping pong during modesets. So we can
> > > > apparently
> > > > -		 * end up with HPD going low during a modeset, and
> > > > then
> > > > -		 * going back up soon after. And once that happens we
> > > > must
> > > > -		 * retrain the link to get a picture. That's in case
> > > > no
> > > > -		 * userspace component reacted to intermittent HPD
> > > > dip.
> > > > -		 */
> > > > -		intel_dp_check_link_status(intel_dp);
> > > >  	}
> > > >  
> > > >  	/*
> > > > @@ -5340,37 +5397,10 @@ intel_dp_hpd_pulse(struct intel_digital_port
> > > > *intel_dig_port, bool long_hpd)
> > > >  	}
> > > >  
> > > >  	if (!intel_dp->is_mst) {
> > > > -		struct drm_modeset_acquire_ctx ctx;
> > > > -		struct drm_connector *connector = &intel_dp-
> > > > > attached_connector->base;
> > > > 
> > > > -		struct drm_crtc *crtc;
> > > > -		int iret;
> > > > -		bool handled = false;
> > > > -
> > > > -		drm_modeset_acquire_init(&ctx, 0);
> > > > -retry:
> > > > -		iret = drm_modeset_lock(&dev_priv-
> > > > > drm.mode_config.connection_mutex, &ctx);
> > > > 
> > > > -		if (iret)
> > > > -			goto err;
> > > > -
> > > > -		crtc = connector->state->crtc;
> > > > -		if (crtc) {
> > > > -			iret = drm_modeset_lock(&crtc->mutex, &ctx);
> > > > -			if (iret)
> > > > -				goto err;
> > > > -		}
> > > > +		bool handled;
> > > >  
> > > >  		handled = intel_dp_short_pulse(intel_dp);
> > > >  
> > > > -err:
> > > > -		if (iret == -EDEADLK) {
> > > > -			drm_modeset_backoff(&ctx);
> > > > -			goto retry;
> > > > -		}
> > > > -
> > > > -		drm_modeset_drop_locks(&ctx);
> > > > -		drm_modeset_acquire_fini(&ctx);
> > > > -		WARN(iret, "Acquiring modeset locks failed with
> > > > %i\n", iret);
> > > > -
> > > >  		/* Short pulse can signify loss of hdcp
> > > > authentication */
> > > >  		intel_hdcp_check_link(intel_dp->attached_connector);
> > > >  
> > > > @@ -6400,7 +6430,7 @@ bool intel_dp_init(struct drm_i915_private
> > > > *dev_priv,
> > > >  			     "DP %c", port_name(port)))
> > > >  		goto err_encoder_init;
> > > >  
> > > > -	intel_encoder->hotplug = intel_encoder_hotplug;
> > > > +	intel_encoder->hotplug = intel_dp_hotplug;
> > > >  	intel_encoder->compute_config = intel_dp_compute_config;
> > > >  	intel_encoder->get_hw_state = intel_dp_get_hw_state;
> > > >  	intel_encoder->get_config = intel_dp_get_config;
> > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > > b/drivers/gpu/drm/i915/intel_drv.h
> > > > index 5ea1dc3f63bf..ddf28a442cd7 100644
> > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > @@ -1611,6 +1611,8 @@ int
> > > > intel_dp_get_link_train_fallback_values(struct
> > > > intel_dp *intel_dp,
> > > >  					    int link_rate, uint8_t
> > > > lane_count);
> > > >  void intel_dp_start_link_train(struct intel_dp *intel_dp);
> > > >  void intel_dp_stop_link_train(struct intel_dp *intel_dp);
> > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > +			  struct drm_modeset_acquire_ctx *ctx);
> > > >  void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
> > > >  void intel_dp_encoder_reset(struct drm_encoder *encoder);
> > > >  void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder);
> > 
> > -- 
> > Ville Syrjälä
> > Intel OTC
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
-- 
Cheers,
	Lyude Paul


More information about the Intel-gfx mailing list