[PATCH] drm/dp/mst: close deadlock in connector destruction.

Dave Airlie airlied at gmail.com
Mon Jun 15 01:47:21 PDT 2015


On 15 June 2015 at 17:49, Daniel Vetter <daniel at ffwll.ch> wrote:
> On Mon, Jun 15, 2015 at 10:34:28AM +1000, Dave Airlie wrote:
>> From: Dave Airlie <airlied at redhat.com>
>>
>> I've only seen this once, and I failed to capture the
>> lockdep backtrace, but I did some investigations.
>>
>> If we are calling into the MST layer from EDID probing,
>> we have the mode_config mutex held, if during that EDID
>> probing, the MST hub goes away, then we can get a deadlock
>> where the connector destruction function in the driver
>> tries to retake the mode config mutex.
>>
>> This offloads connector destruction to a workqueue,
>> and avoid the subsequenct lock ordering issue.
>>
>> Signed-off-by: Dave Airlie <airlied at redhat.com>
>
> The trouble here is (afai understand it at least) that we must
> synchronously destroy the connector since the drm core and mst helpers
> really don't like zombies. I thought that mode_config->mutex is providing
> all that race prevention, but obviously it's not (since we'd deadlock
> otherwise all the time).

I don't think the mst core cares at all, not sure what problems the drm core
would have since all that happens is the callbacks into the mst layer will
fail to validate the port pointer. So the connectors aren't zombies as such.

Maybe you can illuminate that more before we go with the major rework,
to fix a corner case.

I'll take a look at the design below, but I've no real time to work on
it at the moment.

Dave.

>
> I think the proper fix for this is to
> - refcount connectors (well no surprise there)
> - in the dp mst destroy callback only unregister the connector (i.e. make
>   it invisible to userspace), which would also drop one reference (from
>   the kms idr). But for clarity maybe better if the port holds its own
>   proper reference.
> - send out hotplug event so that userspace/fbdev notice the change and can
>   do a suitable modeset, which would drop all the connector references due
>   to the connector still being in use in an active mode
> - add checks to the fake encoder's compute_config to make sure no one can
>   enable it again, only disable it (to avoid races with the destroy
>   against mst unplug). Probably a call to mst_get_validate_ref plus
>   immediate put (maybe wrapped up into a nice mst_validate_port helper)
>   should be all we need for synchronization.
> - Same synchronization needs to happen in the probe helper, but I think
>   the mst helpers already take care of that.
>
> Since the port->connector link is by pointer this design shouldn't result
> in any trouble with 2 refcounts for the same structure.
>
> Cheers, Daniel
>
>> ---
>>  drivers/gpu/drm/drm_dp_mst_topology.c | 40 +++++++++++++++++++++++++++++++++--
>>  include/drm/drm_crtc.h                |  2 ++
>>  include/drm/drm_dp_mst_helper.h       |  4 ++++
>>  3 files changed, 44 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
>> index a88ae51..c98c96d 100644
>> --- a/drivers/gpu/drm/drm_dp_mst_topology.c
>> +++ b/drivers/gpu/drm/drm_dp_mst_topology.c
>> @@ -871,8 +871,16 @@ static void drm_dp_destroy_port(struct kref *kref)
>>               port->vcpi.num_slots = 0;
>>
>>               kfree(port->cached_edid);
>> -             if (port->connector)
>> -                     (*port->mgr->cbs->destroy_connector)(mgr, port->connector);
>> +
>> +             /* we can destroy the connector here, as
>> +                we might be holding the mode_config.mutex
>> +                from an EDID retrieval */
>> +             if (port->connector) {
>> +                     mutex_lock(&mgr->destroy_connector_lock);
>> +                     list_add(&port->connector->destroy_list, &mgr->destroy_connector_list);
>> +                     mutex_unlock(&mgr->destroy_connector_lock);
>> +                     schedule_work(&mgr->destroy_connector_work);
>> +             }
>>               drm_dp_port_teardown_pdt(port, port->pdt);
>>
>>               if (!port->input && port->vcpi.vcpi > 0)
>> @@ -2652,6 +2660,30 @@ static void drm_dp_tx_work(struct work_struct *work)
>>       mutex_unlock(&mgr->qlock);
>>  }
>>
>> +static void drm_dp_destroy_connector_work(struct work_struct *work)
>> +{
>> +     struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work);
>> +     struct drm_connector *connector;
>> +
>> +     /*
>> +      * Not a regular list traverse as we have to drop the destroy
>> +      * connector lock before destroying the connector, to avoid AB->BA
>> +      * ordering between this lock and the config mutex.
>> +      */
>> +     for (;;) {
>> +             mutex_lock(&mgr->destroy_connector_lock);
>> +             connector = list_first_entry_or_null(&mgr->destroy_connector_list, struct drm_connector, destroy_list);
>> +             if (!connector) {
>> +                     mutex_unlock(&mgr->destroy_connector_lock);
>> +                     break;
>> +             }
>> +             list_del(&connector->destroy_list);
>> +             mutex_unlock(&mgr->destroy_connector_lock);
>> +
>> +             mgr->cbs->destroy_connector(mgr, connector);
>> +     }
>> +}
>> +
>>  /**
>>   * drm_dp_mst_topology_mgr_init - initialise a topology manager
>>   * @mgr: manager struct to initialise
>> @@ -2671,10 +2703,13 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
>>       mutex_init(&mgr->lock);
>>       mutex_init(&mgr->qlock);
>>       mutex_init(&mgr->payload_lock);
>> +     mutex_init(&mgr->destroy_connector_lock);
>>       INIT_LIST_HEAD(&mgr->tx_msg_upq);
>>       INIT_LIST_HEAD(&mgr->tx_msg_downq);
>> +     INIT_LIST_HEAD(&mgr->destroy_connector_list);
>>       INIT_WORK(&mgr->work, drm_dp_mst_link_probe_work);
>>       INIT_WORK(&mgr->tx_work, drm_dp_tx_work);
>> +     INIT_WORK(&mgr->destroy_connector_work, drm_dp_destroy_connector_work);
>>       init_waitqueue_head(&mgr->tx_waitq);
>>       mgr->dev = dev;
>>       mgr->aux = aux;
>> @@ -2699,6 +2734,7 @@ EXPORT_SYMBOL(drm_dp_mst_topology_mgr_init);
>>   */
>>  void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr)
>>  {
>> +     flush_work(&mgr->destroy_connector_work);
>>       mutex_lock(&mgr->payload_lock);
>>       kfree(mgr->payloads);
>>       mgr->payloads = NULL;
>> diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
>> index ca71c03..5423358 100644
>> --- a/include/drm/drm_crtc.h
>> +++ b/include/drm/drm_crtc.h
>> @@ -731,6 +731,8 @@ struct drm_connector {
>>       uint8_t num_h_tile, num_v_tile;
>>       uint8_t tile_h_loc, tile_v_loc;
>>       uint16_t tile_h_size, tile_v_size;
>> +
>> +     struct list_head destroy_list;
>>  };
>>
>>  /**
>> diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
>> index 04e668a..4786919 100644
>> --- a/include/drm/drm_dp_mst_helper.h
>> +++ b/include/drm/drm_dp_mst_helper.h
>> @@ -464,6 +464,10 @@ struct drm_dp_mst_topology_mgr {
>>       struct work_struct work;
>>
>>       struct work_struct tx_work;
>> +
>> +     struct list_head destroy_connector_list;
>> +     struct mutex destroy_connector_lock;
>> +     struct work_struct destroy_connector_work;
>>  };
>>
>>  int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, struct device *dev, struct drm_dp_aux *aux, int max_dpcd_transaction_bytes, int max_payloads, int conn_base_id);
>> --
>> 2.4.1
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch


More information about the dri-devel mailing list