[PATCH] drm/dp/mst: close deadlock in connector destruction.

Daniel Vetter daniel at ffwll.ch
Mon Jun 15 00:49:09 PDT 2015


On Mon, Jun 15, 2015 at 10:34:28AM +1000, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
> 
> I've only seen this once, and I failed to capture the
> lockdep backtrace, but I did some investigations.
> 
> If we are calling into the MST layer from EDID probing,
> we have the mode_config mutex held, if during that EDID
> probing, the MST hub goes away, then we can get a deadlock
> where the connector destruction function in the driver
> tries to retake the mode config mutex.
> 
> This offloads connector destruction to a workqueue,
> and avoid the subsequenct lock ordering issue.
> 
> Signed-off-by: Dave Airlie <airlied at redhat.com>

The trouble here is (afai understand it at least) that we must
synchronously destroy the connector since the drm core and mst helpers
really don't like zombies. I thought that mode_config->mutex is providing
all that race prevention, but obviously it's not (since we'd deadlock
otherwise all the time).

I think the proper fix for this is to
- refcount connectors (well no surprise there)
- in the dp mst destroy callback only unregister the connector (i.e. make
  it invisible to userspace), which would also drop one reference (from
  the kms idr). But for clarity maybe better if the port holds its own
  proper reference.
- send out hotplug event so that userspace/fbdev notice the change and can
  do a suitable modeset, which would drop all the connector references due
  to the connector still being in use in an active mode
- add checks to the fake encoder's compute_config to make sure no one can
  enable it again, only disable it (to avoid races with the destroy
  against mst unplug). Probably a call to mst_get_validate_ref plus
  immediate put (maybe wrapped up into a nice mst_validate_port helper)
  should be all we need for synchronization.
- Same synchronization needs to happen in the probe helper, but I think
  the mst helpers already take care of that.

Since the port->connector link is by pointer this design shouldn't result
in any trouble with 2 refcounts for the same structure.

Cheers, Daniel

> ---
>  drivers/gpu/drm/drm_dp_mst_topology.c | 40 +++++++++++++++++++++++++++++++++--
>  include/drm/drm_crtc.h                |  2 ++
>  include/drm/drm_dp_mst_helper.h       |  4 ++++
>  3 files changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
> index a88ae51..c98c96d 100644
> --- a/drivers/gpu/drm/drm_dp_mst_topology.c
> +++ b/drivers/gpu/drm/drm_dp_mst_topology.c
> @@ -871,8 +871,16 @@ static void drm_dp_destroy_port(struct kref *kref)
>  		port->vcpi.num_slots = 0;
>  
>  		kfree(port->cached_edid);
> -		if (port->connector)
> -			(*port->mgr->cbs->destroy_connector)(mgr, port->connector);
> +
> +		/* we can destroy the connector here, as
> +		   we might be holding the mode_config.mutex
> +		   from an EDID retrieval */
> +		if (port->connector) {
> +			mutex_lock(&mgr->destroy_connector_lock);
> +			list_add(&port->connector->destroy_list, &mgr->destroy_connector_list);
> +			mutex_unlock(&mgr->destroy_connector_lock);
> +			schedule_work(&mgr->destroy_connector_work);
> +		}
>  		drm_dp_port_teardown_pdt(port, port->pdt);
>  
>  		if (!port->input && port->vcpi.vcpi > 0)
> @@ -2652,6 +2660,30 @@ static void drm_dp_tx_work(struct work_struct *work)
>  	mutex_unlock(&mgr->qlock);
>  }
>  
> +static void drm_dp_destroy_connector_work(struct work_struct *work)
> +{
> +	struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work);
> +	struct drm_connector *connector;
> +
> +	/*
> +	 * Not a regular list traverse as we have to drop the destroy
> +	 * connector lock before destroying the connector, to avoid AB->BA
> +	 * ordering between this lock and the config mutex.
> +	 */
> +	for (;;) {
> +		mutex_lock(&mgr->destroy_connector_lock);
> +		connector = list_first_entry_or_null(&mgr->destroy_connector_list, struct drm_connector, destroy_list);
> +		if (!connector) {
> +			mutex_unlock(&mgr->destroy_connector_lock);
> +			break;
> +		}
> +		list_del(&connector->destroy_list);
> +		mutex_unlock(&mgr->destroy_connector_lock);
> +
> +		mgr->cbs->destroy_connector(mgr, connector);
> +	}
> +}
> +
>  /**
>   * drm_dp_mst_topology_mgr_init - initialise a topology manager
>   * @mgr: manager struct to initialise
> @@ -2671,10 +2703,13 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
>  	mutex_init(&mgr->lock);
>  	mutex_init(&mgr->qlock);
>  	mutex_init(&mgr->payload_lock);
> +	mutex_init(&mgr->destroy_connector_lock);
>  	INIT_LIST_HEAD(&mgr->tx_msg_upq);
>  	INIT_LIST_HEAD(&mgr->tx_msg_downq);
> +	INIT_LIST_HEAD(&mgr->destroy_connector_list);
>  	INIT_WORK(&mgr->work, drm_dp_mst_link_probe_work);
>  	INIT_WORK(&mgr->tx_work, drm_dp_tx_work);
> +	INIT_WORK(&mgr->destroy_connector_work, drm_dp_destroy_connector_work);
>  	init_waitqueue_head(&mgr->tx_waitq);
>  	mgr->dev = dev;
>  	mgr->aux = aux;
> @@ -2699,6 +2734,7 @@ EXPORT_SYMBOL(drm_dp_mst_topology_mgr_init);
>   */
>  void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr)
>  {
> +	flush_work(&mgr->destroy_connector_work);
>  	mutex_lock(&mgr->payload_lock);
>  	kfree(mgr->payloads);
>  	mgr->payloads = NULL;
> diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
> index ca71c03..5423358 100644
> --- a/include/drm/drm_crtc.h
> +++ b/include/drm/drm_crtc.h
> @@ -731,6 +731,8 @@ struct drm_connector {
>  	uint8_t num_h_tile, num_v_tile;
>  	uint8_t tile_h_loc, tile_v_loc;
>  	uint16_t tile_h_size, tile_v_size;
> +
> +	struct list_head destroy_list;
>  };
>  
>  /**
> diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
> index 04e668a..4786919 100644
> --- a/include/drm/drm_dp_mst_helper.h
> +++ b/include/drm/drm_dp_mst_helper.h
> @@ -464,6 +464,10 @@ struct drm_dp_mst_topology_mgr {
>  	struct work_struct work;
>  
>  	struct work_struct tx_work;
> +
> +	struct list_head destroy_connector_list;
> +	struct mutex destroy_connector_lock;
> +	struct work_struct destroy_connector_work;
>  };
>  
>  int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, struct device *dev, struct drm_dp_aux *aux, int max_dpcd_transaction_bytes, int max_payloads, int conn_base_id);
> -- 
> 2.4.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the dri-devel mailing list