[PATCH] drm/amd/display: Add fast path for cursor plane updates

Wed Dec 5 21:01:31 UTC 2018

On 2018-12-05 3:44 p.m., Grodzovsky, Andrey wrote:
> 
> 
> On 12/05/2018 03:42 PM, Kazlauskas, Nicholas wrote:
>> On 2018-12-05 3:26 p.m., Grodzovsky, Andrey wrote:
>>>
>>> On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:
>>>> [Why]
>>>> Legacy cursor plane updates from drm helpers go through the full
>>>> atomic codepath. A high volume of cursor updates through this slow
>>>> code path can cause subsequent page-flips to skip vblank intervals
>>>> since each individual update is slow.
>>>>
>>>> This problem is particularly noticeable for the compton compositor.
>>>>
>>>> [How]
>>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>>> commit support provided by async_check and async_update. These don't do
>>>> a full state/flip_done dependency stall and they don't block other
>>>> commit work.
>>>>
>>>> However, DC still expects itself to be single-threaded for anything
>>>> that can issue register writes. Screen corruption or hangs can occur
>>>> if write sequences overlap. Every call that potentially perform
>>>> register writes needs to be guarded for asynchronous updates to work.
>>>> The dc_lock mutex was added for this.
>>> As far as page flip related register writes concerned this I think this
>>> was never an issue - we always
>>> supported fully concurrent page flips on different CRTCs meaning writing
>>> surface address concurrently
>>> on different pipes. So Are you sure you  can't do cursor update
>>> concurrently against at least page flips ?
>>> I am not sure about full updates.
>>>
>>> Andrey
>> I think this was true before we started locking the pipes around cursor
>> updates (and probably only for dce). The problem that occur here is
>> writing to the lock register from multiple threads at the same time.
>>
>> In general I think the "contract" between dm/dc now is that anything
>> from dc that does register write sequences can't be called from multiple
>> threads.
>>
>> Nicholas Kazlauskas
> 
> Ok, do note that you also serialize all the page flips now which looks
> unneeded - maybe consider r/w lock to at least avoid that.
> 
> Andrey

Yeah, they definitely shouldn't be happening at the same time as the 
cursor calls.

I'd need to look into whether it's okay now to do concurrent writes from 
the stream update functions though, but I'd imagine it's not something 
we'd want to be doing. A r/w lock would work if we could though.

Nicholas Kazlauskas

> 
>>
>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>>
>>>> Cc: Leo Li <sunpeng.li at amd.com>
>>>> Cc: Harry Wentland <harry.wentland at amd.com>
>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas at amd.com>
>>>> ---
>>>>      .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-
>>>>      .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
>>>>      2 files changed, 73 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>> index 23d61570df17..4df14a50a8ac 100644
>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>> @@ -57,6 +57,7 @@
>>>>      
>>>>      #include <drm/drmP.h>
>>>>      #include <drm/drm_atomic.h>
>>>> +#include <drm/drm_atomic_uapi.h>
>>>>      #include <drm/drm_atomic_helper.h>
>>>>      #include <drm/drm_dp_mst_helper.h>
>>>>      #include <drm/drm_fb_helper.h>
>>>> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
>>>>      static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>>      				  struct drm_atomic_state *state);
>>>>      
>>>> +static void handle_cursor_update(struct drm_plane *plane,
>>>> +				 struct drm_plane_state *old_plane_state);
>>>>      
>>>>      
>>>>      
>>>> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
>>>>      	/* Zero all the fields */
>>>>      	memset(&init_data, 0, sizeof(init_data));
>>>>      
>>>> +	mutex_init(&adev->dm.dc_lock);
>>>> +
>>>>      	if(amdgpu_dm_irq_init(adev)) {
>>>>      		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
>>>>      		goto error;
>>>> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
>>>>      	/* DC Destroy TODO: Replace destroy DAL */
>>>>      	if (adev->dm.dc)
>>>>      		dc_destroy(&adev->dm.dc);
>>>> +
>>>> +	mutex_destroy(&adev->dm.dc_lock);
>>>> +
>>>>      	return;
>>>>      }
>>>>      
>>>> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
>>>>      	return -EINVAL;
>>>>      }
>>>>      
>>>> +static int dm_plane_atomic_async_check(struct drm_plane *plane,
>>>> +				       struct drm_plane_state *new_plane_state)
>>>> +{
>>>> +	/* Only support async updates on cursor planes. */
>>>> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)
>>>> +		return -EINVAL;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static void dm_plane_atomic_async_update(struct drm_plane *plane,
>>>> +					 struct drm_plane_state *new_state)
>>>> +{
>>>> +	struct drm_plane_state *old_state =
>>>> +		drm_atomic_get_old_plane_state(new_state->state, plane);
>>>> +
>>>> +	if (plane->state->fb != new_state->fb)
>>>> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
>>>> +
>>>> +	plane->state->src_x = new_state->src_x;
>>>> +	plane->state->src_y = new_state->src_y;
>>>> +	plane->state->src_w = new_state->src_w;
>>>> +	plane->state->src_h = new_state->src_h;
>>>> +	plane->state->crtc_x = new_state->crtc_x;
>>>> +	plane->state->crtc_y = new_state->crtc_y;
>>>> +	plane->state->crtc_w = new_state->crtc_w;
>>>> +	plane->state->crtc_h = new_state->crtc_h;
>>>> +
>>>> +	handle_cursor_update(plane, old_state);
>>>> +}
>>>> +
>>>>      static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
>>>>      	.prepare_fb = dm_plane_helper_prepare_fb,
>>>>      	.cleanup_fb = dm_plane_helper_cleanup_fb,
>>>>      	.atomic_check = dm_plane_atomic_check,
>>>> +	.atomic_async_check = dm_plane_atomic_async_check,
>>>> +	.atomic_async_update = dm_plane_atomic_async_update
>>>>      };
>>>>      
>>>>      /*
>>>> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
>>>>      static void handle_cursor_update(struct drm_plane *plane,
>>>>      				 struct drm_plane_state *old_plane_state)
>>>>      {
>>>> +	struct amdgpu_device *adev = plane->dev->dev_private;
>>>>      	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
>>>>      	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
>>>>      	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
>>>> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>>      
>>>>      	if (!position.enable) {
>>>>      		/* turn off cursor */
>>>> -		if (crtc_state && crtc_state->stream)
>>>> +		if (crtc_state && crtc_state->stream) {
>>>> +			mutex_lock(&adev->dm.dc_lock);
>>>>      			dc_stream_set_cursor_position(crtc_state->stream,
>>>>      						      &position);
>>>> +			mutex_unlock(&adev->dm.dc_lock);
>>>> +		}
>>>>      		return;
>>>>      	}
>>>>      
>>>> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>>      	attributes.pitch = attributes.width;
>>>>      
>>>>      	if (crtc_state->stream) {
>>>> +		mutex_lock(&adev->dm.dc_lock);
>>>>      		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
>>>>      							 &attributes))
>>>>      			DRM_ERROR("DC failed to set cursor attributes\n");
>>>> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>>      		if (!dc_stream_set_cursor_position(crtc_state->stream,
>>>>      						   &position))
>>>>      			DRM_ERROR("DC failed to set cursor position\n");
>>>> +		mutex_unlock(&adev->dm.dc_lock);
>>>>      	}
>>>>      }
>>>>      
>>>> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>      				&acrtc_state->stream->vrr_infopacket;
>>>>      	}
>>>>      
>>>> +	mutex_lock(&adev->dm.dc_lock);
>>>>      	dc_commit_updates_for_stream(adev->dm.dc,
>>>>      					     surface_updates,
>>>>      					     1,
>>>> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>      					     &stream_update,
>>>>      					     &surface_updates->surface,
>>>>      					     state);
>>>> +	mutex_unlock(&adev->dm.dc_lock);
>>>>      
>>>>      	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
>>>>      			 __func__,
>>>> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>       * with a dc_plane_state and follow the atomic model a bit more closely here.
>>>>       */
>>>>      static bool commit_planes_to_stream(
>>>> +		struct amdgpu_display_manager *dm,
>>>>      		struct dc *dc,
>>>>      		struct dc_plane_state **plane_states,
>>>>      		uint8_t new_plane_count,
>>>> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(
>>>>      		updates[i].scaling_info = &scaling_info[i];
>>>>      	}
>>>>      
>>>> +	mutex_lock(&dm->dc_lock);
>>>>      	dc_commit_updates_for_stream(
>>>>      			dc,
>>>>      			updates,
>>>>      			new_plane_count,
>>>>      			dc_stream, stream_update, plane_states, state);
>>>> +	mutex_unlock(&dm->dc_lock);
>>>>      
>>>>      	kfree(flip_addr);
>>>>      	kfree(plane_info);
>>>> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>>>>      
>>>>      		dc_stream_attach->abm_level = acrtc_state->abm_level;
>>>>      
>>>> -		if (false == commit_planes_to_stream(dm->dc,
>>>> +		if (false == commit_planes_to_stream(dm,
>>>> +							dm->dc,
>>>>      							plane_states_constructed,
>>>>      							planes_count,
>>>>      							acrtc_state,
>>>> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>>>      
>>>>      	if (dc_state) {
>>>>      		dm_enable_per_frame_crtc_master_sync(dc_state);
>>>> +		mutex_lock(&dm->dc_lock);
>>>>      		WARN_ON(!dc_commit_state(dm->dc, dc_state));
>>>> +		mutex_unlock(&dm->dc_lock);
>>>>      	}
>>>>      
>>>>      	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
>>>> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>>>      
>>>>      		/*TODO How it works with MPO ?*/
>>>>      		if (!commit_planes_to_stream(
>>>> +				dm,
>>>>      				dm->dc,
>>>>      				status->plane_states,
>>>>      				status->plane_count,
>>>> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>>      			ret = -EINVAL;
>>>>      			goto fail;
>>>>      		}
>>>> +	} else if (state->legacy_cursor_update) {
>>>> +		/*
>>>> +		 * This is a fast cursor update coming from the plane update
>>>> +		 * helper, check if it can be done asynchronously for better
>>>> +		 * performance.
>>>> +		 */
>>>> +		state->async_update = !drm_atomic_helper_async_check(dev, state);
>>>>      	}
>>>>      
>>>>      	/* Must be success */
>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>> index 4326dc256491..25bb91ee80ba 100644
>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {
>>>>      
>>>>      	struct drm_modeset_lock atomic_obj_lock;
>>>>      
>>>> +	/**
>>>> +	 * @dc_lock:
>>>> +	 *
>>>> +	 * Guards access to DC functions that can issue register write
>>>> +	 * sequences.
>>>> +	 */
>>>> +	struct mutex dc_lock;
>>>> +
>>>>      	/**
>>>>      	 * @irq_handler_list_low_tab:
>>>>      	 *
>