[PATCH 2/2] drm: Measure Self Refresh Entry/Exit times to avoid thrashing

Wed Sep 18 08:17:40 UTC 2019

On Tue, Sep 17, 2019 at 04:04:33PM -0400, Sean Paul wrote:
> From: Sean Paul <seanpaul at chromium.org>
> 
> Currently the self refresh idle timer is a const set by the crtc. This
> is fine if the self refresh entry/exit times are well-known for all
> panels used on that crtc. However panels and workloads can vary quite a
> bit, and a timeout which works well for one doesn't work well for
> another.
> 
> In the extreme, if the timeout is too short we could get in a situation
> where the self refresh exits are taking so long we queue up a self refresh
> entry before the exit commit is even finished.
> 
> This patch changes the idle timeout to a moving average of the entry
> times + a moving average of exit times + the crtc constant.
> 
> This patch was tested on rockchip, with a "kevin" CrOS panel the idle
> delay averages out to about ~235ms (35 entry + 100 exit + 100 const). On
> the same board, the "bob" panel idle delay lands around ~340ms (90 entry
> + 150 exit + 100 const).
> 
> Signed-off-by: Sean Paul <seanpaul at chromium.org>
> ---
>  drivers/gpu/drm/drm_atomic_helper.c       | 20 +++++++
>  drivers/gpu/drm/drm_self_refresh_helper.c | 71 ++++++++++++++++++++++-
>  include/drm/drm_self_refresh_helper.h     |  2 +
>  3 files changed, 92 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
> index 9d7e4da6c292..3f13fa9a9e24 100644
> --- a/drivers/gpu/drm/drm_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_atomic_helper.c
> @@ -26,6 +26,7 @@
>   */
>  
>  #include <linux/dma-fence.h>
> +#include <linux/ktime.h>
>  
>  #include <drm/drm_atomic.h>
>  #include <drm/drm_atomic_helper.h>
> @@ -1570,9 +1571,23 @@ static void commit_tail(struct drm_atomic_state *old_state)
>  {
>  	struct drm_device *dev = old_state->dev;
>  	const struct drm_mode_config_helper_funcs *funcs;
> +	ktime_t start;
> +	s64 commit_time_ms;
>  
>  	funcs = dev->mode_config.helper_private;
>  
> +	/*
> +	 * We're measuring the _entire_ commit, so the time will vary depending
> +	 * on how many fences and objects are involved. For the purposes of self
> +	 * refresh, this is desirable since it'll give us an idea of how
> +	 * congested things are. This will inform our decision on how often we
> +	 * should enter self refresh after idle.
> +	 *
> +	 * These times will be averaged out in the self refresh helpers to avoid
> +	 * overreacting over one outlier frame
> +	 */
> +	start = ktime_get();
> +
>  	drm_atomic_helper_wait_for_fences(dev, old_state, false);
>  
>  	drm_atomic_helper_wait_for_dependencies(old_state);
> @@ -1582,6 +1597,11 @@ static void commit_tail(struct drm_atomic_state *old_state)
>  	else
>  		drm_atomic_helper_commit_tail(old_state);
>  
> +	commit_time_ms = ktime_ms_delta(ktime_get(), start);
> +	if (commit_time_ms > 0)
> +		drm_self_refresh_helper_update_avg_times(old_state,
> +						 (unsigned long)commit_time_ms);
> +
>  	drm_atomic_helper_commit_cleanup_done(old_state);
>  
>  	drm_atomic_state_put(old_state);
> diff --git a/drivers/gpu/drm/drm_self_refresh_helper.c b/drivers/gpu/drm/drm_self_refresh_helper.c
> index 9095cebf2147..522430f8eef1 100644
> --- a/drivers/gpu/drm/drm_self_refresh_helper.c
> +++ b/drivers/gpu/drm/drm_self_refresh_helper.c
> @@ -5,6 +5,7 @@
>   * Authors:
>   * Sean Paul <seanpaul at chromium.org>
>   */
> +#include <linux/average.h>
>  #include <linux/bitops.h>
>  #include <linux/slab.h>
>  #include <linux/workqueue.h>
> @@ -50,10 +51,16 @@
>   * atomic_check when &drm_crtc_state.self_refresh_active is true.
>   */
>  
> +DECLARE_EWMA(psr_time, 4, 4)
> +
>  struct drm_self_refresh_data {
>  	struct drm_crtc *crtc;
>  	struct delayed_work entry_work;
>  	unsigned int entry_delay_ms;
> +
> +	struct mutex avg_mutex;
> +	struct ewma_psr_time entry_avg_ms;
> +	struct ewma_psr_time exit_avg_ms;
>  };
>  
>  static void drm_self_refresh_helper_entry_work(struct work_struct *work)
> @@ -121,6 +128,59 @@ static void drm_self_refresh_helper_entry_work(struct work_struct *work)
>  	drm_modeset_acquire_fini(&ctx);
>  }
>  
> +/**
> + * drm_self_refresh_helper_update_avg_times - Updates a crtc's SR time averages
> + * @state: the state which has just been applied to hardware
> + * @commit_time_ms: the amount of time in ms that this commit took to complete
> + *
> + * Called after &drm_mode_config_funcs.atomic_commit_tail, this function will
> + * update the average entry/exit self refresh times on self refresh transitions.
> + * These averages will be used when calculating how long to delay before
> + * entering self refresh mode after activity.
> + */
> +void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
> +					      unsigned int commit_time_ms)
> +{
> +	struct drm_crtc *crtc;
> +	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
> +	int i;
> +
> +	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
> +				      new_crtc_state, i) {
> +		struct drm_self_refresh_data *sr_data = crtc->self_refresh_data;
> +		struct ewma_psr_time *time;
> +
> +		if (old_crtc_state->self_refresh_active ==
> +		    new_crtc_state->self_refresh_active)
> +			continue;
> +
> +		if (new_crtc_state->self_refresh_active)
> +			time = &sr_data->entry_avg_ms;
> +		else
> +			time = &sr_data->exit_avg_ms;
> +
> +		/*
> +		 * It might be nice if we could rely on &drm_crtc.mutex to
> +		 * protect &drm_self_refresh_data.exit_avg_ms, as we do with
> +		 * &drm_self_refresh_data.entry_avg_ms, but there are a few
> +		 * reasons why a separate lock is a better choice:
> +		 * - We can't rely on &drm_crtc.mutex being held here if we're
> +		 *   doing a nonblocking commit
> +		 * - We can't grab &drm_crtc.mutex here since drm_modeset_lock()
> +		 *   doesn't tell us whether the lock was already held in the
> +		 *   acquire context (it eats -EALREADY), so we can't tell if we
> +		 *   should drop it or not
> +		 * - We don't need such a heavy-handed lock for what we're
> +		 *   trying to do here, commit ordering doesn't matter, so a
> +		 *   point-of-use lock will be less contentious
> +		 */

This comment here feels rather misplaced, I think better to put that into
the commit message. Elaborate locking analysis in code comments tends to
not age well ime.

> +		mutex_lock(&sr_data->avg_mutex);
> +		ewma_psr_time_add(time, commit_time_ms);
> +		mutex_unlock(&sr_data->avg_mutex);
> +	}
> +}
> +EXPORT_SYMBOL(drm_self_refresh_helper_update_avg_times);
> +
>  /**
>   * drm_self_refresh_helper_alter_state - Alters the atomic state for SR exit
>   * @state: the state currently being checked
> @@ -152,6 +212,7 @@ void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state)
>  
>  	for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
>  		struct drm_self_refresh_data *sr_data;
> +		unsigned int delay;
>  
>  		/* Don't trigger the entry timer when we're already in SR */
>  		if (crtc_state->self_refresh_active)
> @@ -161,8 +222,13 @@ void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state)
>  		if (!sr_data)
>  			continue;
>  
> +		mutex_lock(&sr_data->avg_mutex);
> +		delay = ewma_psr_time_read(&sr_data->entry_avg_ms) +
> +			ewma_psr_time_read(&sr_data->exit_avg_ms) +
> +			sr_data->entry_delay_ms;

Since you auto-tune now, I'd remove the entry_delay_ms thing outright, and
just use 2x the entry+exit times for this. That would scale lot better
from real quick panels that take only 1 frame to enter/exit (i.e. no real
delay) ot real horrrors that might take even longer than the panels you
have. Adding a constant 100ms still assumes that entry+exit aren't too far
away from that 100ms value you hardcoded in drivers.

Should we have a debug print somewhere that tells us the self-refresh
delay? I'd expect a "why am I not entering sr?" moment with this
otherwise.

> +		mutex_unlock(&sr_data->avg_mutex);
>  		mod_delayed_work(system_wq, &sr_data->entry_work,
> -				 msecs_to_jiffies(sr_data->entry_delay_ms));
> +				 msecs_to_jiffies(delay));
>  	}
>  }
>  EXPORT_SYMBOL(drm_self_refresh_helper_alter_state);
> @@ -191,6 +257,9 @@ int drm_self_refresh_helper_init(struct drm_crtc *crtc,
>  			  drm_self_refresh_helper_entry_work);
>  	sr_data->entry_delay_ms = entry_delay_ms;
>  	sr_data->crtc = crtc;
> +	mutex_init(&sr_data->avg_mutex);
> +	ewma_psr_time_init(&sr_data->entry_avg_ms);
> +	ewma_psr_time_init(&sr_data->exit_avg_ms);
>  
>  	crtc->self_refresh_data = sr_data;
>  	return 0;
> diff --git a/include/drm/drm_self_refresh_helper.h b/include/drm/drm_self_refresh_helper.h
> index 397a583ccca7..ff777690c564 100644
> --- a/include/drm/drm_self_refresh_helper.h
> +++ b/include/drm/drm_self_refresh_helper.h
> @@ -12,6 +12,8 @@ struct drm_atomic_state;
>  struct drm_crtc;
>  
>  void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state);
> +void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
> +					      unsigned int commit_time_ms);
>  
>  int drm_self_refresh_helper_init(struct drm_crtc *crtc,
>  				 unsigned int entry_delay_ms);

With the bikesheds addressed somehow:

Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>

> -- 
> Sean Paul, Software Engineer, Google / Chromium OS
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch