[PATCH v2 25/27] drm/dp_mst: Add basic topology reprobing when resuming

Lyude Paul lyude at redhat.com
Wed Oct 9 19:06:34 UTC 2019


On Fri, 2019-09-27 at 09:52 -0400, Sean Paul wrote:
> On Tue, Sep 03, 2019 at 04:46:03PM -0400, Lyude Paul wrote:
> > Finally! For a very long time, our MST helpers have had one very
> > annoying issue: They don't know how to reprobe the topology state when
> > coming out of suspend. This means that if a user has a machine connected
> > to an MST topology and decides to suspend their machine, we lose all
> > topology changes that happened during that period. That can be a big
> > problem if the machine was connected to a different topology on the same
> > port before resuming, as we won't bother reprobing any of the ports and
> > likely cause the user's monitors not to come back up as expected.
> > 
> > So, we start fixing this by teaching our MST helpers how to reprobe the
> > link addresses of each connected topology when resuming. As it turns
> > out, the behavior that we want here is identical to the behavior we want
> > when initially probing a newly connected MST topology, with a couple of
> > important differences:
> > 
> > - We need to be more careful about handling the potential races between
> >   events from the MST hub that could change the topology state as we're
> >   performing the link address reprobe
> > - We need to be more careful about handling unlikely state changes on
> >   ports - such as an input port turning into an output port, something
> >   that would be far more likely to happen in situations like the MST hub
> >   we're connected to being changed while we're suspend
> > 
> > Both of which have been solved by previous commits. That leaves one
> > requirement:
> > 
> > - We need to prune any MST ports in our in-memory topology state that
> >   were present when suspending, but have not appeared in the post-resume
> >   link address response from their parent branch device
> > 
> > Which we can now handle in this commit by modifying
> > drm_dp_send_link_address(). We then introduce suspend/resume reprobing
> > by introducing drm_dp_mst_topology_mgr_invalidate_mstb(), which we call
> > in drm_dp_mst_topology_mgr_suspend() to traverse the in-memory topology
> > state to indicate that each mstb needs it's link address resent and PBN
> > resources reprobed.
> > 
> > On resume, we start back up &mgr->work and have it reprobe the topology
> > in the same way we would on a hotplug, removing any leftover ports that
> > no longer appear in the topology state.
> > 
> > Cc: Juston Li <juston.li at intel.com>
> > Cc: Imre Deak <imre.deak at intel.com>
> > Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> > Cc: Harry Wentland <hwentlan at amd.com>
> > Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> > Signed-off-by: Lyude Paul <lyude at redhat.com>
> > ---
> >  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   2 +-
> >  drivers/gpu/drm/drm_dp_mst_topology.c         | 138 +++++++++++++-----
> >  drivers/gpu/drm/i915/display/intel_dp.c       |   3 +-
> >  drivers/gpu/drm/nouveau/dispnv50/disp.c       |   6 +-
> >  include/drm/drm_dp_mst_helper.h               |   3 +-
> >  5 files changed, 112 insertions(+), 40 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index 4d3c8bff77da..27ee3e045b86 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -973,7 +973,7 @@ static void s3_handle_mst(struct drm_device *dev, bool
> > suspend)
> >  		if (suspend) {
> >  			drm_dp_mst_topology_mgr_suspend(mgr);
> >  		} else {
> > -			ret = drm_dp_mst_topology_mgr_resume(mgr);
> > +			ret = drm_dp_mst_topology_mgr_resume(mgr, true);
> >  			if (ret < 0) {
> >  				drm_dp_mst_topology_mgr_set_mst(mgr, false);
> >  				need_hotplug = true;
> > diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c
> > b/drivers/gpu/drm/drm_dp_mst_topology.c
> > index e407aba1fbd2..2fe24e366925 100644
> > --- a/drivers/gpu/drm/drm_dp_mst_topology.c
> > +++ b/drivers/gpu/drm/drm_dp_mst_topology.c
> > @@ -2020,6 +2020,14 @@ drm_dp_mst_handle_link_address_port(struct
> > drm_dp_mst_branch *mstb,
> >  		goto fail_unlock;
> >  	}
> >  
> > +	/*
> > +	 * If this port wasn't just created, then we're reprobing because
> > +	 * we're coming out of suspend. In this case, always resend the link
> > +	 * address if there's an MSTB on this port
> > +	 */
> > +	if (!created && port->pdt == DP_PEER_DEVICE_MST_BRANCHING)
> > +		send_link_addr = true;
> > +
> >  	if (send_link_addr) {
> >  		mutex_lock(&mgr->lock);
> >  		if (port->mstb) {
> > @@ -2530,7 +2538,8 @@ static void drm_dp_send_link_address(struct
> > drm_dp_mst_topology_mgr *mgr,
> >  {
> >  	struct drm_dp_sideband_msg_tx *txmsg;
> >  	struct drm_dp_link_address_ack_reply *reply;
> > -	int i, len, ret;
> > +	struct drm_dp_mst_port *port, *tmp;
> > +	int i, len, ret, port_mask = 0;
> >  
> >  	txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
> >  	if (!txmsg)
> > @@ -2560,9 +2569,28 @@ static void drm_dp_send_link_address(struct
> > drm_dp_mst_topology_mgr *mgr,
> >  
> >  	drm_dp_check_mstb_guid(mstb, reply->guid);
> >  
> > -	for (i = 0; i < reply->nports; i++)
> > +	for (i = 0; i < reply->nports; i++) {
> > +		port_mask |= BIT(reply->ports[i].port_number);
> >  		drm_dp_mst_handle_link_address_port(mstb, mgr->dev,
> >  						    &reply->ports[i]);
> > +	}
> > +
> > +	/* Prune any ports that are currently a part of mstb in our in-memory
> > +	 * topology, but were not seen in this link address. Usually this
> > +	 * means that they were removed while the topology was out of sync,
> > +	 * e.g. during suspend/resume
> > +	 */
> > +	mutex_lock(&mgr->lock);
> > +	list_for_each_entry_safe(port, tmp, &mstb->ports, next) {
> > +		if (port_mask & BIT(port->port_num))
> > +			continue;
> > +
> > +		DRM_DEBUG_KMS("port %d was not in link address, removing\n",
> > +			      port->port_num);
> > +		list_del(&port->next);
> > +		drm_dp_mst_topology_put_port(port);
> > +	}
> > +	mutex_unlock(&mgr->lock);
> >  
> >  	drm_kms_helper_hotplug_event(mgr->dev);
> >  
> > @@ -3191,6 +3219,23 @@ int drm_dp_mst_topology_mgr_set_mst(struct
> > drm_dp_mst_topology_mgr *mgr, bool ms
> >  }
> >  EXPORT_SYMBOL(drm_dp_mst_topology_mgr_set_mst);
> >  
> > +static void
> > +drm_dp_mst_topology_mgr_invalidate_mstb(struct drm_dp_mst_branch *mstb)
> > +{
> > +	struct drm_dp_mst_port *port;
> > +
> > +	/* The link address will need to be re-sent on resume */
> > +	mstb->link_address_sent = false;
> > +
> > +	list_for_each_entry(port, &mstb->ports, next) {
> > +		/* The PBN for each port will also need to be re-probed */
> > +		port->available_pbn = 0;
> > +
> > +		if (port->mstb)
> > +			drm_dp_mst_topology_mgr_invalidate_mstb(port->mstb);
> > +	}
> > +}
> > +
> >  /**
> >   * drm_dp_mst_topology_mgr_suspend() - suspend the MST manager
> >   * @mgr: manager to suspend
> > @@ -3207,60 +3252,85 @@ void drm_dp_mst_topology_mgr_suspend(struct
> > drm_dp_mst_topology_mgr *mgr)
> >  	flush_work(&mgr->up_req_work);
> >  	flush_work(&mgr->work);
> >  	flush_work(&mgr->delayed_destroy_work);
> > +
> > +	mutex_lock(&mgr->lock);
> > +	if (mgr->mst_state && mgr->mst_primary)
> > +		drm_dp_mst_topology_mgr_invalidate_mstb(mgr->mst_primary);
> > +	mutex_unlock(&mgr->lock);
> >  }
> >  EXPORT_SYMBOL(drm_dp_mst_topology_mgr_suspend);
> >  
> >  /**
> >   * drm_dp_mst_topology_mgr_resume() - resume the MST manager
> >   * @mgr: manager to resume
> > + * @sync: whether or not to perform topology reprobing synchronously
> >   *
> >   * This will fetch DPCD and see if the device is still there,
> >   * if it is, it will rewrite the MSTM control bits, and return.
> >   *
> > - * if the device fails this returns -1, and the driver should do
> > + * If the device fails this returns -1, and the driver should do
> >   * a full MST reprobe, in case we were undocked.
> 
> nit: I don't think this sentence applies any longer since we're doing the
> reprobe.
Note that this does actually still apply, but I should be a bit more clear
about it: "reprobe" in this sense just means drm_dp_mst_topology_set_mst(mgr,
off)
> 
> > + *
> > + * During system resume (where it is assumed that the driver will be
> > calling
> > + * drm_atomic_helper_resume()) this function should be called beforehand
> > with
> > + * @sync set to true. In contexts like runtime resume where the driver is
> > not
> > + * expected to be calling drm_atomic_helper_resume(), this function
> > should be
> > + * called with @sync set to false in order to avoid deadlocking.
> > + *
> > + * Returns: -1 if the MST topology was removed while we were suspended, 0
> > + * otherwise.
> >   */
> > -int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr)
> > +int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr,
> > +				   bool sync)
> >  {
> > -	int ret = 0;
> > +	int ret;
> > +	u8 guid[16];
> >  
> >  	mutex_lock(&mgr->lock);
> > +	if (!mgr->mst_primary)
> > +		goto out_fail;
> >  
> > -	if (mgr->mst_primary) {
> > -		int sret;
> > -		u8 guid[16];
> > +	ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd,
> > +			       DP_RECEIVER_CAP_SIZE);
> > +	if (ret != DP_RECEIVER_CAP_SIZE) {
> > +		DRM_DEBUG_KMS("dpcd read failed - undocked during
> > suspend?\n");
> > +		goto out_fail;
> > +	}
> >  
> > -		sret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd,
> > DP_RECEIVER_CAP_SIZE);
> > -		if (sret != DP_RECEIVER_CAP_SIZE) {
> > -			DRM_DEBUG_KMS("dpcd read failed - undocked during
> > suspend?\n");
> > -			ret = -1;
> > -			goto out_unlock;
> > -		}
> > +	ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
> > +				 DP_MST_EN |
> > +				 DP_UP_REQ_EN |
> > +				 DP_UPSTREAM_IS_SRC);
> > +	if (ret < 0) {
> > +		DRM_DEBUG_KMS("mst write failed - undocked during
> > suspend?\n");
> > +		goto out_fail;
> > +	}
> >  
> > -		ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
> > -					 DP_MST_EN | DP_UP_REQ_EN |
> > DP_UPSTREAM_IS_SRC);
> > -		if (ret < 0) {
> > -			DRM_DEBUG_KMS("mst write failed - undocked during
> > suspend?\n");
> > -			ret = -1;
> > -			goto out_unlock;
> > -		}
> > +	/* Some hubs forget their guids after they resume */
> > +	ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16);
> > +	if (ret != 16) {
> > +		DRM_DEBUG_KMS("dpcd read failed - undocked during
> > suspend?\n");
> > +		goto out_fail;
> > +	}
> > +	drm_dp_check_mstb_guid(mgr->mst_primary, guid);
> >  
> > -		/* Some hubs forget their guids after they resume */
> > -		sret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16);
> > -		if (sret != 16) {
> > -			DRM_DEBUG_KMS("dpcd read failed - undocked during
> > suspend?\n");
> > -			ret = -1;
> > -			goto out_unlock;
> > -		}
> > -		drm_dp_check_mstb_guid(mgr->mst_primary, guid);
> > +	/* For the final step of resuming the topology, we need to bring the
> > +	 * state of our in-memory topology back into sync with reality. So,
> > +	 * restart the probing process as if we're probing a new hub
> > +	 */
> > +	queue_work(system_long_wq, &mgr->work);
> > +	mutex_unlock(&mgr->lock);
> >  
> > -		ret = 0;
> > -	} else
> > -		ret = -1;
> > +	if (sync) {
> > +		DRM_DEBUG_KMS("Waiting for link probe work to finish re-
> > syncing topology...\n");
> > +		flush_work(&mgr->work);
> > +	}
> 
> It took me longer than I'd like to admit to realize that most of the diff in
> this function is just removing the indent. Would you mind splitting that out
> into a separate patch so the reprobe change is more obvious?
> 
> With these nits fixed,
> 
> Reviewed-by: Sean Paul <sean at poorly.run>
> 
> 
> >  
> > -out_unlock:
> > +	return 0;
> > +
> > +out_fail:
> >  	mutex_unlock(&mgr->lock);
> > -	return ret;
> > +	return -1;
> >  }
> >  EXPORT_SYMBOL(drm_dp_mst_topology_mgr_resume);
> >  
> > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
> > b/drivers/gpu/drm/i915/display/intel_dp.c
> > index 5673ed75e428..b78364dcdef9 100644
> > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > @@ -7400,7 +7400,8 @@ void intel_dp_mst_resume(struct drm_i915_private
> > *dev_priv)
> >  		if (!intel_dp->can_mst)
> >  			continue;
> >  
> > -		ret = drm_dp_mst_topology_mgr_resume(&intel_dp->mst_mgr);
> > +		ret = drm_dp_mst_topology_mgr_resume(&intel_dp->mst_mgr,
> > +						     true);
> >  		if (ret) {
> >  			intel_dp->is_mst = false;
> >  			drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr,
> > diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c
> > b/drivers/gpu/drm/nouveau/dispnv50/disp.c
> > index 307584107d77..e459e2a79d78 100644
> > --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
> > +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
> > @@ -1309,14 +1309,14 @@ nv50_mstm_fini(struct nv50_mstm *mstm)
> >  }
> >  
> >  static void
> > -nv50_mstm_init(struct nv50_mstm *mstm)
> > +nv50_mstm_init(struct nv50_mstm *mstm, bool runtime)
> >  {
> >  	int ret;
> >  
> >  	if (!mstm || !mstm->mgr.mst_state)
> >  		return;
> >  
> > -	ret = drm_dp_mst_topology_mgr_resume(&mstm->mgr);
> > +	ret = drm_dp_mst_topology_mgr_resume(&mstm->mgr, !runtime);
> >  	if (ret == -1) {
> >  		drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false);
> >  		drm_kms_helper_hotplug_event(mstm->mgr.dev);
> > @@ -2262,7 +2262,7 @@ nv50_display_init(struct drm_device *dev, bool
> > resume, bool runtime)
> >  		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
> >  			struct nouveau_encoder *nv_encoder =
> >  				nouveau_encoder(encoder);
> > -			nv50_mstm_init(nv_encoder->dp.mstm);
> > +			nv50_mstm_init(nv_encoder->dp.mstm, runtime);
> >  		}
> >  	}
> >  
> > diff --git a/include/drm/drm_dp_mst_helper.h
> > b/include/drm/drm_dp_mst_helper.h
> > index 1efbb086f7ac..1bdee5ee6dcd 100644
> > --- a/include/drm/drm_dp_mst_helper.h
> > +++ b/include/drm/drm_dp_mst_helper.h
> > @@ -685,7 +685,8 @@ void drm_dp_mst_dump_topology(struct seq_file *m,
> >  
> >  void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr
> > *mgr);
> >  int __must_check
> > -drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr);
> > +drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr,
> > +			       bool sync);
> >  
> >  ssize_t drm_dp_mst_dpcd_read(struct drm_dp_aux *aux,
> >  			     unsigned int offset, void *buffer, size_t size);
> > -- 
> > 2.21.0
> > 
-- 
Cheers,
	Lyude Paul



More information about the amd-gfx mailing list