[PATCH] drm/amd/display: Add fast path for cursor plane updates

Wed Dec 5 20:44:55 UTC 2018

On 12/05/2018 03:42 PM, Kazlauskas, Nicholas wrote:
> On 2018-12-05 3:26 p.m., Grodzovsky, Andrey wrote:
>>
>> On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:
>>> [Why]
>>> Legacy cursor plane updates from drm helpers go through the full
>>> atomic codepath. A high volume of cursor updates through this slow
>>> code path can cause subsequent page-flips to skip vblank intervals
>>> since each individual update is slow.
>>>
>>> This problem is particularly noticeable for the compton compositor.
>>>
>>> [How]
>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>> commit support provided by async_check and async_update. These don't do
>>> a full state/flip_done dependency stall and they don't block other
>>> commit work.
>>>
>>> However, DC still expects itself to be single-threaded for anything
>>> that can issue register writes. Screen corruption or hangs can occur
>>> if write sequences overlap. Every call that potentially perform
>>> register writes needs to be guarded for asynchronous updates to work.
>>> The dc_lock mutex was added for this.
>> As far as page flip related register writes concerned this I think this
>> was never an issue - we always
>> supported fully concurrent page flips on different CRTCs meaning writing
>> surface address concurrently
>> on different pipes. So Are you sure you  can't do cursor update
>> concurrently against at least page flips ?
>> I am not sure about full updates.
>>
>> Andrey
> I think this was true before we started locking the pipes around cursor
> updates (and probably only for dce). The problem that occur here is
> writing to the lock register from multiple threads at the same time.
>
> In general I think the "contract" between dm/dc now is that anything
> from dc that does register write sequences can't be called from multiple
> threads.
>
> Nicholas Kazlauskas

Ok, do note that you also serialize all the page flips now which looks 
unneeded - maybe consider r/w lock to at least avoid that.

Andrey

>
>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>
>>> Cc: Leo Li <sunpeng.li at amd.com>
>>> Cc: Harry Wentland <harry.wentland at amd.com>
>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas at amd.com>
>>> ---
>>>     .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-
>>>     .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
>>>     2 files changed, 73 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> index 23d61570df17..4df14a50a8ac 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> @@ -57,6 +57,7 @@
>>>     
>>>     #include <drm/drmP.h>
>>>     #include <drm/drm_atomic.h>
>>> +#include <drm/drm_atomic_uapi.h>
>>>     #include <drm/drm_atomic_helper.h>
>>>     #include <drm/drm_dp_mst_helper.h>
>>>     #include <drm/drm_fb_helper.h>
>>> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
>>>     static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>     				  struct drm_atomic_state *state);
>>>     
>>> +static void handle_cursor_update(struct drm_plane *plane,
>>> +				 struct drm_plane_state *old_plane_state);
>>>     
>>>     
>>>     
>>> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
>>>     	/* Zero all the fields */
>>>     	memset(&init_data, 0, sizeof(init_data));
>>>     
>>> +	mutex_init(&adev->dm.dc_lock);
>>> +
>>>     	if(amdgpu_dm_irq_init(adev)) {
>>>     		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
>>>     		goto error;
>>> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
>>>     	/* DC Destroy TODO: Replace destroy DAL */
>>>     	if (adev->dm.dc)
>>>     		dc_destroy(&adev->dm.dc);
>>> +
>>> +	mutex_destroy(&adev->dm.dc_lock);
>>> +
>>>     	return;
>>>     }
>>>     
>>> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
>>>     	return -EINVAL;
>>>     }
>>>     
>>> +static int dm_plane_atomic_async_check(struct drm_plane *plane,
>>> +				       struct drm_plane_state *new_plane_state)
>>> +{
>>> +	/* Only support async updates on cursor planes. */
>>> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)
>>> +		return -EINVAL;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static void dm_plane_atomic_async_update(struct drm_plane *plane,
>>> +					 struct drm_plane_state *new_state)
>>> +{
>>> +	struct drm_plane_state *old_state =
>>> +		drm_atomic_get_old_plane_state(new_state->state, plane);
>>> +
>>> +	if (plane->state->fb != new_state->fb)
>>> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
>>> +
>>> +	plane->state->src_x = new_state->src_x;
>>> +	plane->state->src_y = new_state->src_y;
>>> +	plane->state->src_w = new_state->src_w;
>>> +	plane->state->src_h = new_state->src_h;
>>> +	plane->state->crtc_x = new_state->crtc_x;
>>> +	plane->state->crtc_y = new_state->crtc_y;
>>> +	plane->state->crtc_w = new_state->crtc_w;
>>> +	plane->state->crtc_h = new_state->crtc_h;
>>> +
>>> +	handle_cursor_update(plane, old_state);
>>> +}
>>> +
>>>     static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
>>>     	.prepare_fb = dm_plane_helper_prepare_fb,
>>>     	.cleanup_fb = dm_plane_helper_cleanup_fb,
>>>     	.atomic_check = dm_plane_atomic_check,
>>> +	.atomic_async_check = dm_plane_atomic_async_check,
>>> +	.atomic_async_update = dm_plane_atomic_async_update
>>>     };
>>>     
>>>     /*
>>> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
>>>     static void handle_cursor_update(struct drm_plane *plane,
>>>     				 struct drm_plane_state *old_plane_state)
>>>     {
>>> +	struct amdgpu_device *adev = plane->dev->dev_private;
>>>     	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
>>>     	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
>>>     	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
>>> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>     
>>>     	if (!position.enable) {
>>>     		/* turn off cursor */
>>> -		if (crtc_state && crtc_state->stream)
>>> +		if (crtc_state && crtc_state->stream) {
>>> +			mutex_lock(&adev->dm.dc_lock);
>>>     			dc_stream_set_cursor_position(crtc_state->stream,
>>>     						      &position);
>>> +			mutex_unlock(&adev->dm.dc_lock);
>>> +		}
>>>     		return;
>>>     	}
>>>     
>>> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>     	attributes.pitch = attributes.width;
>>>     
>>>     	if (crtc_state->stream) {
>>> +		mutex_lock(&adev->dm.dc_lock);
>>>     		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
>>>     							 &attributes))
>>>     			DRM_ERROR("DC failed to set cursor attributes\n");
>>> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>     		if (!dc_stream_set_cursor_position(crtc_state->stream,
>>>     						   &position))
>>>     			DRM_ERROR("DC failed to set cursor position\n");
>>> +		mutex_unlock(&adev->dm.dc_lock);
>>>     	}
>>>     }
>>>     
>>> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>     				&acrtc_state->stream->vrr_infopacket;
>>>     	}
>>>     
>>> +	mutex_lock(&adev->dm.dc_lock);
>>>     	dc_commit_updates_for_stream(adev->dm.dc,
>>>     					     surface_updates,
>>>     					     1,
>>> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>     					     &stream_update,
>>>     					     &surface_updates->surface,
>>>     					     state);
>>> +	mutex_unlock(&adev->dm.dc_lock);
>>>     
>>>     	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
>>>     			 __func__,
>>> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>      * with a dc_plane_state and follow the atomic model a bit more closely here.
>>>      */
>>>     static bool commit_planes_to_stream(
>>> +		struct amdgpu_display_manager *dm,
>>>     		struct dc *dc,
>>>     		struct dc_plane_state **plane_states,
>>>     		uint8_t new_plane_count,
>>> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(
>>>     		updates[i].scaling_info = &scaling_info[i];
>>>     	}
>>>     
>>> +	mutex_lock(&dm->dc_lock);
>>>     	dc_commit_updates_for_stream(
>>>     			dc,
>>>     			updates,
>>>     			new_plane_count,
>>>     			dc_stream, stream_update, plane_states, state);
>>> +	mutex_unlock(&dm->dc_lock);
>>>     
>>>     	kfree(flip_addr);
>>>     	kfree(plane_info);
>>> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>>>     
>>>     		dc_stream_attach->abm_level = acrtc_state->abm_level;
>>>     
>>> -		if (false == commit_planes_to_stream(dm->dc,
>>> +		if (false == commit_planes_to_stream(dm,
>>> +							dm->dc,
>>>     							plane_states_constructed,
>>>     							planes_count,
>>>     							acrtc_state,
>>> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>>     
>>>     	if (dc_state) {
>>>     		dm_enable_per_frame_crtc_master_sync(dc_state);
>>> +		mutex_lock(&dm->dc_lock);
>>>     		WARN_ON(!dc_commit_state(dm->dc, dc_state));
>>> +		mutex_unlock(&dm->dc_lock);
>>>     	}
>>>     
>>>     	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
>>> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>>     
>>>     		/*TODO How it works with MPO ?*/
>>>     		if (!commit_planes_to_stream(
>>> +				dm,
>>>     				dm->dc,
>>>     				status->plane_states,
>>>     				status->plane_count,
>>> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>     			ret = -EINVAL;
>>>     			goto fail;
>>>     		}
>>> +	} else if (state->legacy_cursor_update) {
>>> +		/*
>>> +		 * This is a fast cursor update coming from the plane update
>>> +		 * helper, check if it can be done asynchronously for better
>>> +		 * performance.
>>> +		 */
>>> +		state->async_update = !drm_atomic_helper_async_check(dev, state);
>>>     	}
>>>     
>>>     	/* Must be success */
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>> index 4326dc256491..25bb91ee80ba 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {
>>>     
>>>     	struct drm_modeset_lock atomic_obj_lock;
>>>     
>>> +	/**
>>> +	 * @dc_lock:
>>> +	 *
>>> +	 * Guards access to DC functions that can issue register write
>>> +	 * sequences.
>>> +	 */
>>> +	struct mutex dc_lock;
>>> +
>>>     	/**
>>>     	 * @irq_handler_list_low_tab:
>>>     	 *