[PATCH] drm/amd/display: Add fast path for cursor plane updates

Tue Dec 11 22:30:08 UTC 2018

On 2018-12-05 2:59 p.m., Nicholas Kazlauskas wrote:
> [Why]
> Legacy cursor plane updates from drm helpers go through the full
> atomic codepath. A high volume of cursor updates through this slow
> code path can cause subsequent page-flips to skip vblank intervals
> since each individual update is slow.
> 
> This problem is particularly noticeable for the compton compositor.
> 
> [How]
> A fast path for cursor plane updates is added by using DRM asynchronous
> commit support provided by async_check and async_update. These don't do
> a full state/flip_done dependency stall and they don't block other
> commit work.
> 
> However, DC still expects itself to be single-threaded for anything
> that can issue register writes. Screen corruption or hangs can occur
> if write sequences overlap. Every call that potentially perform
> register writes needs to be guarded for asynchronous updates to work.
> The dc_lock mutex was added for this.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
> 
> Cc: Leo Li <sunpeng.li at amd.com>
> Cc: Harry Wentland <harry.wentland at amd.com>
> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas at amd.com>

I just skimmed through the calls to DC, but I trust you've locked on
everything that can potentially modify registers.

Might be a good future task to document DC interfaces that are not
thread safe, since it's currently not clear. Although this change does
shine light on that :)

Thanks,

Reviewed-by Leo Li <sunpeng.li at amd.com>

> ---
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
>   2 files changed, 73 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 23d61570df17..4df14a50a8ac 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -57,6 +57,7 @@
>   
>   #include <drm/drmP.h>
>   #include <drm/drm_atomic.h>
> +#include <drm/drm_atomic_uapi.h>
>   #include <drm/drm_atomic_helper.h>
>   #include <drm/drm_dp_mst_helper.h>
>   #include <drm/drm_fb_helper.h>
> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
>   static int amdgpu_dm_atomic_check(struct drm_device *dev,
>   				  struct drm_atomic_state *state);
>   
> +static void handle_cursor_update(struct drm_plane *plane,
> +				 struct drm_plane_state *old_plane_state);
>   
>   
>   
> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
>   	/* Zero all the fields */
>   	memset(&init_data, 0, sizeof(init_data));
>   
> +	mutex_init(&adev->dm.dc_lock);
> +
>   	if(amdgpu_dm_irq_init(adev)) {
>   		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
>   		goto error;
> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
>   	/* DC Destroy TODO: Replace destroy DAL */
>   	if (adev->dm.dc)
>   		dc_destroy(&adev->dm.dc);
> +
> +	mutex_destroy(&adev->dm.dc_lock);
> +
>   	return;
>   }
>   
> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
>   	return -EINVAL;
>   }
>   
> +static int dm_plane_atomic_async_check(struct drm_plane *plane,
> +				       struct drm_plane_state *new_plane_state)
> +{
> +	/* Only support async updates on cursor planes. */
> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static void dm_plane_atomic_async_update(struct drm_plane *plane,
> +					 struct drm_plane_state *new_state)
> +{
> +	struct drm_plane_state *old_state =
> +		drm_atomic_get_old_plane_state(new_state->state, plane);
> +
> +	if (plane->state->fb != new_state->fb)
> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
> +
> +	plane->state->src_x = new_state->src_x;
> +	plane->state->src_y = new_state->src_y;
> +	plane->state->src_w = new_state->src_w;
> +	plane->state->src_h = new_state->src_h;
> +	plane->state->crtc_x = new_state->crtc_x;
> +	plane->state->crtc_y = new_state->crtc_y;
> +	plane->state->crtc_w = new_state->crtc_w;
> +	plane->state->crtc_h = new_state->crtc_h;
> +
> +	handle_cursor_update(plane, old_state);
> +}
> +
>   static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
>   	.prepare_fb = dm_plane_helper_prepare_fb,
>   	.cleanup_fb = dm_plane_helper_cleanup_fb,
>   	.atomic_check = dm_plane_atomic_check,
> +	.atomic_async_check = dm_plane_atomic_async_check,
> +	.atomic_async_update = dm_plane_atomic_async_update
>   };
>   
>   /*
> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
>   static void handle_cursor_update(struct drm_plane *plane,
>   				 struct drm_plane_state *old_plane_state)
>   {
> +	struct amdgpu_device *adev = plane->dev->dev_private;
>   	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
>   	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
>   	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,
>   
>   	if (!position.enable) {
>   		/* turn off cursor */
> -		if (crtc_state && crtc_state->stream)
> +		if (crtc_state && crtc_state->stream) {
> +			mutex_lock(&adev->dm.dc_lock);
>   			dc_stream_set_cursor_position(crtc_state->stream,
>   						      &position);
> +			mutex_unlock(&adev->dm.dc_lock);
> +		}
>   		return;
>   	}
>   
> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>   	attributes.pitch = attributes.width;
>   
>   	if (crtc_state->stream) {
> +		mutex_lock(&adev->dm.dc_lock);
>   		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
>   							 &attributes))
>   			DRM_ERROR("DC failed to set cursor attributes\n");
> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>   		if (!dc_stream_set_cursor_position(crtc_state->stream,
>   						   &position))
>   			DRM_ERROR("DC failed to set cursor position\n");
> +		mutex_unlock(&adev->dm.dc_lock);
>   	}
>   }
>   
> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>   				&acrtc_state->stream->vrr_infopacket;
>   	}
>   
> +	mutex_lock(&adev->dm.dc_lock);
>   	dc_commit_updates_for_stream(adev->dm.dc,
>   					     surface_updates,
>   					     1,
> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>   					     &stream_update,
>   					     &surface_updates->surface,
>   					     state);
> +	mutex_unlock(&adev->dm.dc_lock);
>   
>   	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
>   			 __func__,
> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>    * with a dc_plane_state and follow the atomic model a bit more closely here.
>    */
>   static bool commit_planes_to_stream(
> +		struct amdgpu_display_manager *dm,
>   		struct dc *dc,
>   		struct dc_plane_state **plane_states,
>   		uint8_t new_plane_count,
> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(
>   		updates[i].scaling_info = &scaling_info[i];
>   	}
>   
> +	mutex_lock(&dm->dc_lock);
>   	dc_commit_updates_for_stream(
>   			dc,
>   			updates,
>   			new_plane_count,
>   			dc_stream, stream_update, plane_states, state);
> +	mutex_unlock(&dm->dc_lock);
>   
>   	kfree(flip_addr);
>   	kfree(plane_info);
> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>   
>   		dc_stream_attach->abm_level = acrtc_state->abm_level;
>   
> -		if (false == commit_planes_to_stream(dm->dc,
> +		if (false == commit_planes_to_stream(dm,
> +							dm->dc,
>   							plane_states_constructed,
>   							planes_count,
>   							acrtc_state,
> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>   
>   	if (dc_state) {
>   		dm_enable_per_frame_crtc_master_sync(dc_state);
> +		mutex_lock(&dm->dc_lock);
>   		WARN_ON(!dc_commit_state(dm->dc, dc_state));
> +		mutex_unlock(&dm->dc_lock);
>   	}
>   
>   	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>   
>   		/*TODO How it works with MPO ?*/
>   		if (!commit_planes_to_stream(
> +				dm,
>   				dm->dc,
>   				status->plane_states,
>   				status->plane_count,
> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>   			ret = -EINVAL;
>   			goto fail;
>   		}
> +	} else if (state->legacy_cursor_update) {
> +		/*
> +		 * This is a fast cursor update coming from the plane update
> +		 * helper, check if it can be done asynchronously for better
> +		 * performance.
> +		 */
> +		state->async_update = !drm_atomic_helper_async_check(dev, state);
>   	}
>   
>   	/* Must be success */
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> index 4326dc256491..25bb91ee80ba 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {
>   
>   	struct drm_modeset_lock atomic_obj_lock;
>   
> +	/**
> +	 * @dc_lock:
> +	 *
> +	 * Guards access to DC functions that can issue register write
> +	 * sequences.
> +	 */
> +	struct mutex dc_lock;
> +
>   	/**
>   	 * @irq_handler_list_low_tab:
>   	 *
>