[Intel-gfx] [PATCH v2 3/5] drm/i915: Move SST DP link retraining into the ->post_hotplug() hook

Tue Jan 30 23:16:59 UTC 2018

On Wed, 2018-01-17 at 21:21 +0200, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
> 
> Doing link retraining from the short pulse handler is problematic since
> that might introduce deadlocks with MST sideband processing. Currently
> we don't retrain MST links from this code, but we want to change that.
> So better to move the entire thing to the hotplug work. We can utilize
> the new encoder->hotplug() hook for this.
> 
> The only thing we leave in the short pulse handler is the link status
> check. That one still depends on the link parameters stored under
> intel_dp, so no locking around that but races should be mostly harmless
> as the actual retraining code will recheck the link state if we
> end up there by mistake.
> 
> v2: Rebase due to ->post_hotplug() now being just ->hotplug()
>     Check the connector type to figure out if we should do
>     the HDMI thing or the DP think for DDI
> 
> Cc: Manasi Navare <manasi.d.navare at intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_ddi.c |  10 +-
>  drivers/gpu/drm/i915/intel_dp.c  | 196 ++++++++++++++++++++++--------------
> ---
>  drivers/gpu/drm/i915/intel_drv.h |   2 +
>  3 files changed, 120 insertions(+), 88 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ddi.c
> b/drivers/gpu/drm/i915/intel_ddi.c
> index 25793bdc692f..5f3d58f1ae6e 100644
> --- a/drivers/gpu/drm/i915/intel_ddi.c
> +++ b/drivers/gpu/drm/i915/intel_ddi.c
> @@ -2880,7 +2880,10 @@ static bool intel_ddi_hotplug(struct intel_encoder
> *encoder,
>  	drm_modeset_acquire_init(&ctx, 0);
>  
>  	for (;;) {
> -		ret = intel_hdmi_reset_link(encoder, &ctx);
> +		if (connector->base.connector_type ==
> DRM_MODE_CONNECTOR_HDMIA)
> +			ret = intel_hdmi_reset_link(encoder, &ctx);
> +		else
> +			ret = intel_dp_retrain_link(encoder, &ctx);
>  
>  		if (ret == -EDEADLK) {
>  			drm_modeset_backoff(&ctx);
> @@ -3007,10 +3010,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv,
> enum port port)
>  	drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs,
>  			 DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port));
>  
> -	if (init_hdmi)
> -		intel_encoder->hotplug = intel_ddi_hotplug;
> -	else
> -		intel_encoder->hotplug = intel_encoder_hotplug;
> +	intel_encoder->hotplug = intel_ddi_hotplug;
>  	intel_encoder->compute_output_type = intel_ddi_compute_output_type;
>  	intel_encoder->compute_config = intel_ddi_compute_config;
>  	intel_encoder->enable = intel_enable_ddi;
> diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> index 6bbf14410c2a..152016e09a11 100644
> --- a/drivers/gpu/drm/i915/intel_dp.c
> +++ b/drivers/gpu/drm/i915/intel_dp.c
> @@ -4275,12 +4275,83 @@ intel_dp_check_mst_status(struct intel_dp *intel_dp)
>  	return -EINVAL;
>  }
>  
> -static void
> -intel_dp_retrain_link(struct intel_dp *intel_dp)
> +static bool
> +intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
> +{
> +	u8 link_status[DP_LINK_STATUS_SIZE];
> +
> +	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> +		DRM_ERROR("Failed to get link status\n");
> +		return false;
> +	}
> +
> +	/*
> +	 * Validate the cached values of intel_dp->link_rate and
> +	 * intel_dp->lane_count before attempting to retrain.
> +	 */
> +	if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate,
> +					intel_dp->lane_count))
> +		return false;
> +
> +	/* Retrain if Channel EQ or CR not ok */
> +	return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count);
> +}
> +
> +/*
> + * If display is now connected check links status,
> + * there has been known issues of link loss triggering
> + * long pulse.
> + *
> + * Some sinks (eg. ASUS PB287Q) seem to perform some
> + * weird HPD ping pong during modesets. So we can apparently
> + * end up with HPD going low during a modeset, and then
> + * going back up soon after. And once that happens we must
> + * retrain the link to get a picture. That's in case no
> + * userspace component reacted to intermittent HPD dip.
> + */
> +int intel_dp_retrain_link(struct intel_encoder *encoder,
> +			  struct drm_modeset_acquire_ctx *ctx)
>  {
> -	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
>  	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
> -	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
> +	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
> +	struct intel_connector *connector = intel_dp->attached_connector;
> +	struct drm_connector_state *conn_state;
> +	struct intel_crtc_state *crtc_state;
> +	struct intel_crtc *crtc;
> +	int ret;
> +
> +	/* FIXME handle the MST connectors as well */
> +
> +	if (!connector || connector->base.status !=
> connector_status_connected)
> +		return 0;
> +
> +	ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
> ctx);
> +	if (ret)
> +		return ret;
> +
> +	conn_state = connector->base.state;
> +
> +	crtc = to_intel_crtc(conn_state->crtc);
> +	if (!crtc)
> +		return 0;
> +
> +	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
> +	if (ret)
> +		return ret;
> +
> +	crtc_state = to_intel_crtc_state(crtc->base.state);
> +
> +	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
> +
> +	if (!crtc_state->base.active)
> +		return 0;
> +
> +	if (conn_state->commit &&
> +	    !try_wait_for_completion(&conn_state->commit->hw_done))
> +		return 0;
> +
> +	if (!intel_dp_needs_link_retrain(intel_dp))
> +		return 0;
NAK, this definitely won't work for implementing MST retraining. There's some
pretty huge differences with how retraining needs to be handled on SST vs. MST.
An example with some normal SST sink vs. what happens on my caldigit TS3

SST:
    1. commit modeset, everything is OK
    2. something happens, sink sends shortpulse and changes link status registers
    in dpcd
    3. Source receives short pulse, tries retraining five times
    4. if this succeeds:
        5. we're done here
    6. if this fails:
        7. mark link status as bad
        8. get fallback parameters
        9. hotplug event

MST (i915 doesn't do this yet, but this is generally how it needs to be
handled):
    1. commit modeset, everything is OK
    2. something happens (in my case, the MST hub discovers it had the wrong max
    link rate/lane count), sink sends ESI indicating channel EQ has failed
    3. retraining commences with five retries.
    4. if this succeeds:
       5. continue
    6. if this fails (I actually haven't seen this once yet)
        7. mark link status as bad on all downstream connectors
        8. get fallback parameters
        9. hotplug event
    10. the retrain didn't actually work (despite what the SST link status
    registers told us). go back to step 3 five more times
    11. if this fails:
        12. mark link status as bad on all downstream connectors
        13. get fallback parameters
        14. hotplug event

simply put: we really should keep the "do we need to retrain?" logic out of the
actual retraining helpers so that SST/MST codepaths can do their own checks to
figure this out.

>  
>  	/* Suppress underruns caused by re-training */
>  	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false);
> @@ -4298,51 +4369,49 @@ intel_dp_retrain_link(struct intel_dp *intel_dp)
>  	if (crtc->config->has_pch_encoder)
>  		intel_set_pch_fifo_underrun_reporting(dev_priv,
>  						      intel_crtc_pch_transcod
> er(crtc), true);
> +
> +	return 0;
>  }
>  
> -static void
> -intel_dp_check_link_status(struct intel_dp *intel_dp)
> +/*
> + * If display is now connected check links status,
> + * there has been known issues of link loss triggering
> + * long pulse.
> + *
> + * Some sinks (eg. ASUS PB287Q) seem to perform some
> + * weird HPD ping pong during modesets. So we can apparently
> + * end up with HPD going low during a modeset, and then
> + * going back up soon after. And once that happens we must
> + * retrain the link to get a picture. That's in case no
> + * userspace component reacted to intermittent HPD dip.
> + */
> +static bool intel_dp_hotplug(struct intel_encoder *encoder,
> +			     struct intel_connector *connector)
>  {
> -	struct drm_i915_private *dev_priv =
> to_i915(intel_dp_to_dev(intel_dp));
> -	struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)-
> >base;
> -	struct drm_connector_state *conn_state =
> -		intel_dp->attached_connector->base.state;
> -	u8 link_status[DP_LINK_STATUS_SIZE];
> -
> -	WARN_ON(!drm_modeset_is_locked(&dev_priv-
> >drm.mode_config.connection_mutex));
> -
> -	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> -		DRM_ERROR("Failed to get link status\n");
> -		return;
> -	}
> +	struct drm_modeset_acquire_ctx ctx;
> +	bool changed;
> +	int ret;
>  
> -	if (!conn_state->crtc)
> -		return;
> +	changed = intel_encoder_hotplug(encoder, connector);
>  
> -	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex));
> +	drm_modeset_acquire_init(&ctx, 0);
>  
> -	if (!conn_state->crtc->state->active)
> -		return;
> +	for (;;) {
> +		ret = intel_dp_retrain_link(encoder, &ctx);
>  
> -	if (conn_state->commit &&
> -	    !try_wait_for_completion(&conn_state->commit->hw_done))
> -		return;
> +		if (ret == -EDEADLK) {
> +			drm_modeset_backoff(&ctx);
> +			continue;
> +		}
>  
> -	/*
> -	 * Validate the cached values of intel_dp->link_rate and
> -	 * intel_dp->lane_count before attempting to retrain.
> -	 */
> -	if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate,
> -					intel_dp->lane_count))
> -		return;
> +		break;
> +	}
>  
> -	/* Retrain if Channel EQ or CR not ok */
> -	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) {
> -		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
> -			      intel_encoder->base.name);
> +	drm_modeset_drop_locks(&ctx);
> +	drm_modeset_acquire_fini(&ctx);
> +	WARN(ret, "Acquiring modeset locks failed with %i\n", ret);
>  
> -		intel_dp_retrain_link(intel_dp);
> -	}
> +	return changed;
>  }
>  
>  /*
> @@ -4400,7 +4469,9 @@ intel_dp_short_pulse(struct intel_dp *intel_dp)
>  			DRM_DEBUG_DRIVER("CP or sink specific irq
> unhandled\n");
>  	}
>  
> -	intel_dp_check_link_status(intel_dp);
> +	/* defer to the hotplug work for link retraining if needed */
> +	if (intel_dp_needs_link_retrain(intel_dp))
> +		return false;
>  
>  	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) {
>  		DRM_DEBUG_KMS("Link Training Compliance Test requested\n");
> @@ -4785,20 +4856,6 @@ intel_dp_long_pulse(struct intel_connector *connector)
>  		 */
>  		status = connector_status_disconnected;
>  		goto out;
> -	} else {
> -		/*
> -		 * If display is now connected check links status,
> -		 * there has been known issues of link loss triggerring
> -		 * long pulse.
> -		 *
> -		 * Some sinks (eg. ASUS PB287Q) seem to perform some
> -		 * weird HPD ping pong during modesets. So we can apparently
> -		 * end up with HPD going low during a modeset, and then
> -		 * going back up soon after. And once that happens we must
> -		 * retrain the link to get a picture. That's in case no
> -		 * userspace component reacted to intermittent HPD dip.
> -		 */
> -		intel_dp_check_link_status(intel_dp);
>  	}
>  
>  	/*
> @@ -5340,37 +5397,10 @@ intel_dp_hpd_pulse(struct intel_digital_port
> *intel_dig_port, bool long_hpd)
>  	}
>  
>  	if (!intel_dp->is_mst) {
> -		struct drm_modeset_acquire_ctx ctx;
> -		struct drm_connector *connector = &intel_dp-
> >attached_connector->base;
> -		struct drm_crtc *crtc;
> -		int iret;
> -		bool handled = false;
> -
> -		drm_modeset_acquire_init(&ctx, 0);
> -retry:
> -		iret = drm_modeset_lock(&dev_priv-
> >drm.mode_config.connection_mutex, &ctx);
> -		if (iret)
> -			goto err;
> -
> -		crtc = connector->state->crtc;
> -		if (crtc) {
> -			iret = drm_modeset_lock(&crtc->mutex, &ctx);
> -			if (iret)
> -				goto err;
> -		}
> +		bool handled;
>  
>  		handled = intel_dp_short_pulse(intel_dp);
>  
> -err:
> -		if (iret == -EDEADLK) {
> -			drm_modeset_backoff(&ctx);
> -			goto retry;
> -		}
> -
> -		drm_modeset_drop_locks(&ctx);
> -		drm_modeset_acquire_fini(&ctx);
> -		WARN(iret, "Acquiring modeset locks failed with %i\n", iret);
> -
>  		/* Short pulse can signify loss of hdcp authentication */
>  		intel_hdcp_check_link(intel_dp->attached_connector);
>  
> @@ -6400,7 +6430,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv,
>  			     "DP %c", port_name(port)))
>  		goto err_encoder_init;
>  
> -	intel_encoder->hotplug = intel_encoder_hotplug;
> +	intel_encoder->hotplug = intel_dp_hotplug;
>  	intel_encoder->compute_config = intel_dp_compute_config;
>  	intel_encoder->get_hw_state = intel_dp_get_hw_state;
>  	intel_encoder->get_config = intel_dp_get_config;
> diff --git a/drivers/gpu/drm/i915/intel_drv.h
> b/drivers/gpu/drm/i915/intel_drv.h
> index 5ea1dc3f63bf..ddf28a442cd7 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1611,6 +1611,8 @@ int intel_dp_get_link_train_fallback_values(struct
> intel_dp *intel_dp,
>  					    int link_rate, uint8_t
> lane_count);
>  void intel_dp_start_link_train(struct intel_dp *intel_dp);
>  void intel_dp_stop_link_train(struct intel_dp *intel_dp);
> +int intel_dp_retrain_link(struct intel_encoder *encoder,
> +			  struct drm_modeset_acquire_ctx *ctx);
>  void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
>  void intel_dp_encoder_reset(struct drm_encoder *encoder);
>  void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder);