[PATCH] drm/amd/display: Add fast path for cursor plane updates

Thu Dec 6 13:42:28 UTC 2018

On 12/5/18 4:01 PM, Kazlauskas, Nicholas wrote:
> On 2018-12-05 3:44 p.m., Grodzovsky, Andrey wrote:
>>
>>
>> On 12/05/2018 03:42 PM, Kazlauskas, Nicholas wrote:
>>> On 2018-12-05 3:26 p.m., Grodzovsky, Andrey wrote:
>>>>
>>>> On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:
>>>>> [Why]
>>>>> Legacy cursor plane updates from drm helpers go through the full
>>>>> atomic codepath. A high volume of cursor updates through this slow
>>>>> code path can cause subsequent page-flips to skip vblank intervals
>>>>> since each individual update is slow.
>>>>>
>>>>> This problem is particularly noticeable for the compton compositor.
>>>>>
>>>>> [How]
>>>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>>>> commit support provided by async_check and async_update. These don't do
>>>>> a full state/flip_done dependency stall and they don't block other
>>>>> commit work.
>>>>>
>>>>> However, DC still expects itself to be single-threaded for anything
>>>>> that can issue register writes. Screen corruption or hangs can occur
>>>>> if write sequences overlap. Every call that potentially perform
>>>>> register writes needs to be guarded for asynchronous updates to work.
>>>>> The dc_lock mutex was added for this.
>>>> As far as page flip related register writes concerned this I think this
>>>> was never an issue - we always
>>>> supported fully concurrent page flips on different CRTCs meaning writing
>>>> surface address concurrently
>>>> on different pipes. So Are you sure you  can't do cursor update
>>>> concurrently against at least page flips ?
>>>> I am not sure about full updates.
>>>>
>>>> Andrey
>>> I think this was true before we started locking the pipes around cursor
>>> updates (and probably only for dce). The problem that occur here is
>>> writing to the lock register from multiple threads at the same time.
>>>
>>> In general I think the "contract" between dm/dc now is that anything
>>> from dc that does register write sequences can't be called from multiple
>>> threads.
>>>
>>> Nicholas Kazlauskas
>>
>> Ok, do note that you also serialize all the page flips now which looks
>> unneeded - maybe consider r/w lock to at least avoid that.
>>
>> Andrey
> 
> Yeah, they definitely shouldn't be happening at the same time as the
> cursor calls.
> 
> I'd need to look into whether it's okay now to do concurrent writes from
> the stream update functions though, but I'd imagine it's not something
> we'd want to be doing. A r/w lock would work if we could though.
> 
> Nicholas Kazlauskas

A reader/writer lock wouldn't help here, actually.

Fast updates (and page flips in particular) need to lock the pipe as 
well, see: commit_planes_for_stream.

Nicholas Kazlauskas

> 
>>
>>>
>>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>>>
>>>>> Cc: Leo Li <sunpeng.li at amd.com>
>>>>> Cc: Harry Wentland <harry.wentland at amd.com>
>>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas at amd.com>
>>>>> ---
>>>>>       .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-
>>>>>       .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
>>>>>       2 files changed, 73 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> index 23d61570df17..4df14a50a8ac 100644
>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> @@ -57,6 +57,7 @@
>>>>>       
>>>>>       #include <drm/drmP.h>
>>>>>       #include <drm/drm_atomic.h>
>>>>> +#include <drm/drm_atomic_uapi.h>
>>>>>       #include <drm/drm_atomic_helper.h>
>>>>>       #include <drm/drm_dp_mst_helper.h>
>>>>>       #include <drm/drm_fb_helper.h>
>>>>> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
>>>>>       static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>>>       				  struct drm_atomic_state *state);
>>>>>       
>>>>> +static void handle_cursor_update(struct drm_plane *plane,
>>>>> +				 struct drm_plane_state *old_plane_state);
>>>>>       
>>>>>       
>>>>>       
>>>>> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
>>>>>       	/* Zero all the fields */
>>>>>       	memset(&init_data, 0, sizeof(init_data));
>>>>>       
>>>>> +	mutex_init(&adev->dm.dc_lock);
>>>>> +
>>>>>       	if(amdgpu_dm_irq_init(adev)) {
>>>>>       		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
>>>>>       		goto error;
>>>>> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
>>>>>       	/* DC Destroy TODO: Replace destroy DAL */
>>>>>       	if (adev->dm.dc)
>>>>>       		dc_destroy(&adev->dm.dc);
>>>>> +
>>>>> +	mutex_destroy(&adev->dm.dc_lock);
>>>>> +
>>>>>       	return;
>>>>>       }
>>>>>       
>>>>> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
>>>>>       	return -EINVAL;
>>>>>       }
>>>>>       
>>>>> +static int dm_plane_atomic_async_check(struct drm_plane *plane,
>>>>> +				       struct drm_plane_state *new_plane_state)
>>>>> +{
>>>>> +	/* Only support async updates on cursor planes. */
>>>>> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)
>>>>> +		return -EINVAL;
>>>>> +
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>> +static void dm_plane_atomic_async_update(struct drm_plane *plane,
>>>>> +					 struct drm_plane_state *new_state)
>>>>> +{
>>>>> +	struct drm_plane_state *old_state =
>>>>> +		drm_atomic_get_old_plane_state(new_state->state, plane);
>>>>> +
>>>>> +	if (plane->state->fb != new_state->fb)
>>>>> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
>>>>> +
>>>>> +	plane->state->src_x = new_state->src_x;
>>>>> +	plane->state->src_y = new_state->src_y;
>>>>> +	plane->state->src_w = new_state->src_w;
>>>>> +	plane->state->src_h = new_state->src_h;
>>>>> +	plane->state->crtc_x = new_state->crtc_x;
>>>>> +	plane->state->crtc_y = new_state->crtc_y;
>>>>> +	plane->state->crtc_w = new_state->crtc_w;
>>>>> +	plane->state->crtc_h = new_state->crtc_h;
>>>>> +
>>>>> +	handle_cursor_update(plane, old_state);
>>>>> +}
>>>>> +
>>>>>       static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
>>>>>       	.prepare_fb = dm_plane_helper_prepare_fb,
>>>>>       	.cleanup_fb = dm_plane_helper_cleanup_fb,
>>>>>       	.atomic_check = dm_plane_atomic_check,
>>>>> +	.atomic_async_check = dm_plane_atomic_async_check,
>>>>> +	.atomic_async_update = dm_plane_atomic_async_update
>>>>>       };
>>>>>       
>>>>>       /*
>>>>> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
>>>>>       static void handle_cursor_update(struct drm_plane *plane,
>>>>>       				 struct drm_plane_state *old_plane_state)
>>>>>       {
>>>>> +	struct amdgpu_device *adev = plane->dev->dev_private;
>>>>>       	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
>>>>>       	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
>>>>>       	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
>>>>> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>>>       
>>>>>       	if (!position.enable) {
>>>>>       		/* turn off cursor */
>>>>> -		if (crtc_state && crtc_state->stream)
>>>>> +		if (crtc_state && crtc_state->stream) {
>>>>> +			mutex_lock(&adev->dm.dc_lock);
>>>>>       			dc_stream_set_cursor_position(crtc_state->stream,
>>>>>       						      &position);
>>>>> +			mutex_unlock(&adev->dm.dc_lock);
>>>>> +		}
>>>>>       		return;
>>>>>       	}
>>>>>       
>>>>> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>>>       	attributes.pitch = attributes.width;
>>>>>       
>>>>>       	if (crtc_state->stream) {
>>>>> +		mutex_lock(&adev->dm.dc_lock);
>>>>>       		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
>>>>>       							 &attributes))
>>>>>       			DRM_ERROR("DC failed to set cursor attributes\n");
>>>>> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>>>       		if (!dc_stream_set_cursor_position(crtc_state->stream,
>>>>>       						   &position))
>>>>>       			DRM_ERROR("DC failed to set cursor position\n");
>>>>> +		mutex_unlock(&adev->dm.dc_lock);
>>>>>       	}
>>>>>       }
>>>>>       
>>>>> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>>       				&acrtc_state->stream->vrr_infopacket;
>>>>>       	}
>>>>>       
>>>>> +	mutex_lock(&adev->dm.dc_lock);
>>>>>       	dc_commit_updates_for_stream(adev->dm.dc,
>>>>>       					     surface_updates,
>>>>>       					     1,
>>>>> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>>       					     &stream_update,
>>>>>       					     &surface_updates->surface,
>>>>>       					     state);
>>>>> +	mutex_unlock(&adev->dm.dc_lock);
>>>>>       
>>>>>       	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
>>>>>       			 __func__,
>>>>> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>>        * with a dc_plane_state and follow the atomic model a bit more closely here.
>>>>>        */
>>>>>       static bool commit_planes_to_stream(
>>>>> +		struct amdgpu_display_manager *dm,
>>>>>       		struct dc *dc,
>>>>>       		struct dc_plane_state **plane_states,
>>>>>       		uint8_t new_plane_count,
>>>>> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(
>>>>>       		updates[i].scaling_info = &scaling_info[i];
>>>>>       	}
>>>>>       
>>>>> +	mutex_lock(&dm->dc_lock);
>>>>>       	dc_commit_updates_for_stream(
>>>>>       			dc,
>>>>>       			updates,
>>>>>       			new_plane_count,
>>>>>       			dc_stream, stream_update, plane_states, state);
>>>>> +	mutex_unlock(&dm->dc_lock);
>>>>>       
>>>>>       	kfree(flip_addr);
>>>>>       	kfree(plane_info);
>>>>> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>>>>>       
>>>>>       		dc_stream_attach->abm_level = acrtc_state->abm_level;
>>>>>       
>>>>> -		if (false == commit_planes_to_stream(dm->dc,
>>>>> +		if (false == commit_planes_to_stream(dm,
>>>>> +							dm->dc,
>>>>>       							plane_states_constructed,
>>>>>       							planes_count,
>>>>>       							acrtc_state,
>>>>> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>>>>       
>>>>>       	if (dc_state) {
>>>>>       		dm_enable_per_frame_crtc_master_sync(dc_state);
>>>>> +		mutex_lock(&dm->dc_lock);
>>>>>       		WARN_ON(!dc_commit_state(dm->dc, dc_state));
>>>>> +		mutex_unlock(&dm->dc_lock);
>>>>>       	}
>>>>>       
>>>>>       	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
>>>>> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>>>>       
>>>>>       		/*TODO How it works with MPO ?*/
>>>>>       		if (!commit_planes_to_stream(
>>>>> +				dm,
>>>>>       				dm->dc,
>>>>>       				status->plane_states,
>>>>>       				status->plane_count,
>>>>> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>>>       			ret = -EINVAL;
>>>>>       			goto fail;
>>>>>       		}
>>>>> +	} else if (state->legacy_cursor_update) {
>>>>> +		/*
>>>>> +		 * This is a fast cursor update coming from the plane update
>>>>> +		 * helper, check if it can be done asynchronously for better
>>>>> +		 * performance.
>>>>> +		 */
>>>>> +		state->async_update = !drm_atomic_helper_async_check(dev, state);
>>>>>       	}
>>>>>       
>>>>>       	/* Must be success */
>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>>> index 4326dc256491..25bb91ee80ba 100644
>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>>> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {
>>>>>       
>>>>>       	struct drm_modeset_lock atomic_obj_lock;
>>>>>       
>>>>> +	/**
>>>>> +	 * @dc_lock:
>>>>> +	 *
>>>>> +	 * Guards access to DC functions that can issue register write
>>>>> +	 * sequences.
>>>>> +	 */
>>>>> +	struct mutex dc_lock;
>>>>> +
>>>>>       	/**
>>>>>       	 * @irq_handler_list_low_tab:
>>>>>       	 *
>>
>