[Intel-gfx] [PATCH v8 1/2] drm/i915: Refactor intel_can_enable_sagv

Fri Oct 25 10:24:14 UTC 2019

On Fri, Oct 25, 2019 at 12:53:51PM +0300, Stanislav Lisovskiy wrote:
> Currently intel_can_enable_sagv function contains
> a mix of workarounds for different platforms
> some of them are not valid for gens >= 11 already,
> so lets split it into separate functions.
> 
> v2:
>     - Rework watermark calculation algorithm to
>       attempt to calculate Level 0 watermark
>       with added sagv block time latency and
>       check if it fits in DBuf in order to
>       determine if SAGV can be enabled already
>       at this stage, just as BSpec 49325 states.
>       if that fails rollback to usual Level 0
>       latency and disable SAGV.
>     - Remove unneeded tabs(James Ausmus)
> 
> v3: Rebased the patch
> 
> v4: - Added back interlaced check for Gen12 and
>       added separate function for TGL SAGV check
>       (thanks to James Ausmus for spotting)
>     - Removed unneeded gen check
>     - Extracted Gen12 SAGV decision making code
>       to a separate function from skl_compute_wm
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy at intel.com>
> Cc: Ville Syrjälä <ville.syrjala at intel.com>
> Cc: James Ausmus <james.ausmus at intel.com>
> ---
>  .../drm/i915/display/intel_display_types.h    |   8 +
>  drivers/gpu/drm/i915/intel_pm.c               | 254 +++++++++++++++++-
>  2 files changed, 254 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
> index 8358152e403e..f09c80c96470 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_types.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_types.h
> @@ -490,6 +490,13 @@ struct intel_atomic_state {
>  	 */
>  	u8 active_pipe_changes;
>  
> +	/*
> +	 * For Gen12 only after calculating watermarks with
> +	 * additional latency, we can determine if SAGV can be enabled
> +	 * or not for that particular configuration.
> +	 */
> +	bool gen12_can_sagv;
> +
>  	u8 active_pipes;
>  	/* minimum acceptable cdclk for each pipe */
>  	int min_cdclk[I915_MAX_PIPES];
> @@ -642,6 +649,7 @@ struct skl_plane_wm {
>  	struct skl_wm_level wm[8];
>  	struct skl_wm_level uv_wm[8];
>  	struct skl_wm_level trans_wm;
> +	struct skl_wm_level sagv_wm_l0;

sagv_wm0 (or maybe even just sagv_wm) would be a bit less ugly
name I think.

>  	bool is_planar;
>  };
>  
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 362234449087..b61eb6aaa89b 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3751,7 +3751,7 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>  
> -bool intel_can_enable_sagv(struct intel_atomic_state *state)
> +bool skl_can_enable_sagv(struct intel_atomic_state *state)
>  {
>  	struct drm_device *dev = state->base.dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> @@ -3817,6 +3817,95 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state)
>  	return true;
>  }
>  
> +bool icl_can_enable_sagv(struct intel_atomic_state *state)
> +{
> +	struct drm_device *dev = state->base.dev;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct intel_crtc *crtc;
> +	struct intel_crtc_state *new_crtc_state;
> +	int level, latency;
> +	int i;
> +	int plane_id;
> +
> +	if (!intel_has_sagv(dev_priv))
> +		return false;
> +
> +	/*
> +	 * If there are no active CRTCs, no additional checks need be performed
> +	 */
> +	if (hweight8(state->active_pipes) == 0)
> +		return true;
> +
> +	for_each_new_intel_crtc_in_state(state, crtc,
> +					     new_crtc_state, i) {
> +		unsigned int flags = crtc->base.state->adjusted_mode.flags;
> +
> +		if (flags & DRM_MODE_FLAG_INTERLACE)
> +			return false;
> +
> +		if (!new_crtc_state->base.enable)
> +			continue;

Do we use active or enable elsewhere to decide whether to compute wms
for a pipe? Should be consistent here so we don't get into some wonky
state where we didn't compute normal wms but are computing the sagv wm.

> +
> +		for_each_plane_id_on_crtc(crtc, plane_id) {
> +			struct skl_plane_wm *wm =
> +				&new_crtc_state->wm.skl.optimal.planes[plane_id];
> +
> +			/* Skip this plane if it's not enabled */
> +			if (!wm->wm[0].plane_en)
> +				continue;
> +
> +			/* Find the highest enabled wm level for this plane */
> +			for (level = ilk_wm_max_level(dev_priv);
> +			     !wm->wm[level].plane_en; --level) {
> +			}
> +
> +			latency = dev_priv->wm.skl_latency[level];
> +
> +			/*
> +			 * If any of the planes on this pipe don't enable
> +			 * wm levels that incur memory latencies higher than
> +			 * sagv_block_time_us we can't enable SAGV.
> +			 */
> +			if (latency < dev_priv->sagv_block_time_us)
> +				return false;
> +		}
> +	}
> +
> +	return true;
> +}
> +
> +bool tgl_can_enable_sagv(struct intel_atomic_state *state)
> +{
> +	struct intel_crtc *crtc;
> +	struct intel_crtc_state *new_crtc_state;
> +	int i;
> +
> +	if (!state->gen12_can_sagv)
> +		return false;
> +
> +	for_each_new_intel_crtc_in_state(state, crtc,
> +					     new_crtc_state, i) {
> +		unsigned int flags = crtc->base.state->adjusted_mode.flags;
> +
> +		if (flags & DRM_MODE_FLAG_INTERLACE)
> +			return false;
> +	}
> +	return true;
> +}
> +
> +bool intel_can_enable_sagv(struct intel_atomic_state *state)
> +{
> +	struct drm_device *dev = state->base.dev;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +
> +	if (INTEL_GEN(dev_priv) >= 12)
> +		return tgl_can_enable_sagv(state);
> +	else if (INTEL_GEN(dev_priv) == 11)
> +		return icl_can_enable_sagv(state);
> +
> +	return skl_can_enable_sagv(state);

Why do we have three separate code paths now? I believe there should be
just two.

Also if you go to the trouble of adding dev_priv->..can_sagv just make
it work for all platforms.

> +}
> +
>  static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
>  			      const struct intel_crtc_state *crtc_state,
>  			      const u64 total_data_rate,
> @@ -3936,6 +4025,7 @@ static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
>  				 int color_plane);
>  static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
>  				 int level,
> +				 u32 latency,
>  				 const struct skl_wm_params *wp,
>  				 const struct skl_wm_level *result_prev,
>  				 struct skl_wm_level *result /* out */);
> @@ -3958,7 +4048,9 @@ skl_cursor_allocation(const struct intel_crtc_state *crtc_state,
>  	WARN_ON(ret);
>  
>  	for (level = 0; level <= max_level; level++) {
> -		skl_compute_plane_wm(crtc_state, level, &wp, &wm, &wm);
> +		u32 latency = dev_priv->wm.skl_latency[level];
> +		skl_compute_plane_wm(crtc_state, level, latency, &wp, &wm, &wm);
> +
>  		if (wm.min_ddb_alloc == U16_MAX)
>  			break;
>  
> @@ -4310,6 +4402,68 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
>  	return total_data_rate;
>  }
>  
> +static int
> +tgl_check_pipe_fits_sagv_wm(struct intel_crtc_state *crtc_state,
> +		      struct skl_ddb_allocation *ddb /* out */)
> +{
> +	struct drm_crtc *crtc = crtc_state->base.crtc;
> +	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	struct skl_ddb_entry *alloc = &crtc_state->wm.skl.ddb;
> +	u16 alloc_size;
> +	u16 total[I915_MAX_PLANES] = {};
> +	u64 total_data_rate;
> +	enum plane_id plane_id;
> +	int num_active;
> +	u64 plane_data_rate[I915_MAX_PLANES] = {};
> +	u32 blocks;
> +
> +	/*
> +	 * No need to check gen here, we call this only for gen12
> +	 */
> +	total_data_rate =
> +		icl_get_total_relative_data_rate(crtc_state,
> +						 plane_data_rate);
> +
> +	skl_ddb_get_pipe_allocation_limits(dev_priv, crtc_state,
> +					   total_data_rate,
> +					   ddb, alloc, &num_active);
> +	alloc_size = skl_ddb_entry_size(alloc);
> +	if (alloc_size == 0)
> +		return -ENOSPC;
> +
> +	/* Allocate fixed number of blocks for cursor. */
> +	total[PLANE_CURSOR] = skl_cursor_allocation(crtc_state, num_active);
> +	alloc_size -= total[PLANE_CURSOR];
> +	crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR].start =
> +		alloc->end - total[PLANE_CURSOR];
> +	crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end;
> +
> +	/*
> +	 * Do check if we can fit L0 + sagv_block_time and
> +	 * disable SAGV if we can't.
> +	 */
> +	blocks = 0;
> +	for_each_plane_id_on_crtc(intel_crtc, plane_id) {
> +		const struct skl_plane_wm *wm =
> +			&crtc_state->wm.skl.optimal.planes[plane_id];
> +
> +		if (plane_id == PLANE_CURSOR) {
> +			if (WARN_ON(wm->sagv_wm_l0.min_ddb_alloc >
> +				    total[PLANE_CURSOR])) {
> +				blocks = U32_MAX;
> +				break;
> +			}
> +			continue;
> +		}
> +
> +		blocks += wm->sagv_wm_l0.min_ddb_alloc;
> +		if (blocks > alloc_size)
> +			return -ENOSPC;
> +	}
> +	return 0;
> +}
> +
>  static int
>  skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state,
>  		      struct skl_ddb_allocation *ddb /* out */)
> @@ -4739,12 +4893,12 @@ static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level)
>  
>  static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
>  				 int level,
> +				 u32 latency,
>  				 const struct skl_wm_params *wp,
>  				 const struct skl_wm_level *result_prev,
>  				 struct skl_wm_level *result /* out */)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
> -	u32 latency = dev_priv->wm.skl_latency[level];
>  	uint_fixed_16_16_t method1, method2;
>  	uint_fixed_16_16_t selected_result;
>  	u32 res_blocks, res_lines, min_ddb_alloc = 0;
> @@ -4865,19 +5019,44 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
>  static void
>  skl_compute_wm_levels(const struct intel_crtc_state *crtc_state,
>  		      const struct skl_wm_params *wm_params,
> -		      struct skl_wm_level *levels)
> +		      struct skl_plane_wm *plane_wm,
> +		      bool yuv)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
>  	int level, max_level = ilk_wm_max_level(dev_priv);
> +	/*
> +	 * Check which kind of plane is it and based on that calculate
> +	 * correspondent WM levels.
> +	 */
> +	struct skl_wm_level *levels = yuv ? plane_wm->uv_wm : plane_wm->wm;
>  	struct skl_wm_level *result_prev = &levels[0];
>  
>  	for (level = 0; level <= max_level; level++) {
>  		struct skl_wm_level *result = &levels[level];
> +		u32 latency = dev_priv->wm.skl_latency[level];
>  
> -		skl_compute_plane_wm(crtc_state, level, wm_params,
> -				     result_prev, result);
> +		skl_compute_plane_wm(crtc_state, level, latency,
> +				     wm_params, result_prev, result);
>  
>  		result_prev = result;
> +		if (level == 0) {
> +			/*
> +			 * For Gen12 if it is an L0 we need to also
> +			 * consider sagv_block_time when calculating
> +			 * L0 watermark - we will need that when making
> +			 * a decision whether enable SAGV or not.
> +			 * For older gens we agreed to copy L0 value for
> +			 * compatibility.
> +			 */
> +			if ((INTEL_GEN(dev_priv) >= 12)) {
> +				latency += dev_priv->sagv_block_time_us;
> +				skl_compute_plane_wm(crtc_state, level, latency,
> +				     wm_params, result_prev,
> +				    &plane_wm->sagv_wm_l0);
> +			} else
> +				memcpy(&plane_wm->sagv_wm_l0, &levels[0],
> +					sizeof(struct skl_wm_level));

Putting this stuff inside the loop seems a bit silly.

> +		}
>  	}
>  }
>  
> @@ -4971,7 +5150,7 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,
>  	if (ret)
>  		return ret;
>  
> -	skl_compute_wm_levels(crtc_state, &wm_params, wm->wm);
> +	skl_compute_wm_levels(crtc_state, &wm_params, wm, false);
>  	skl_compute_transition_wm(crtc_state, &wm_params, wm);
>  
>  	return 0;
> @@ -4993,7 +5172,7 @@ static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state,
>  	if (ret)
>  		return ret;
>  
> -	skl_compute_wm_levels(crtc_state, &wm_params, wm->uv_wm);
> +	skl_compute_wm_levels(crtc_state, &wm_params, wm, true);
>  
>  	return 0;
>  }
> @@ -5541,9 +5720,62 @@ static int skl_wm_add_affected_planes(struct intel_atomic_state *state,
>  	return 0;
>  }
>  
> +static void tgl_check_and_set_sagv(struct intel_atomic_state *state)
> +{
> +	struct drm_device *dev = state->base.dev;
> +	const struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct intel_crtc *crtc;
> +	struct intel_crtc_state *new_crtc_state;
> +	struct intel_crtc_state *old_crtc_state;
> +	struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
> +	int ret, i;
> +
> +	/*
> +	 * Lets assume we can tolerate SAGV for now,
> +	 * until watermark calculations prove the opposite
> +	 * if any of the pipe planes in the state will
> +	 * fail the requirements it will be assigned to false
> +	 * in skl_compute_ddb.
> +	 */
> +	state->gen12_can_sagv = true;
> +
> +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> +					    new_crtc_state, i) {
> +		ret = tgl_check_pipe_fits_sagv_wm(new_crtc_state, ddb);
> +		if (ret) {
> +			state->gen12_can_sagv = false;
> +			break;
> +		}

This is not going to work. We need the infromation from _all_ pipes, not
just the ones in the state. We probably want to make that can_sagv thing
a bitmask of pipes so that we don't have to have all pipes in the state.

> +	}
> +
> +	if (state->gen12_can_sagv) {
> +		/*
> +		 * If we determined that we can actually enable SAGV, then
> +		 * actually use those levels tgl_check_pipe_fits_sagv_wm
> +		 * has already taken care of checking if L0 + sagv block time
> +		 * fits into ddb.
> +		 */
> +		for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> +					    new_crtc_state, i) {
> +			struct intel_plane *plane;
> +			for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
> +				enum plane_id plane_id = plane->id;
> +				struct skl_plane_wm *plane_wm = \
> +				    &new_crtc_state->wm.skl.optimal.planes[plane_id];
> +				struct skl_wm_level *sagv_wm0 = &plane_wm->sagv_wm_l0;
> +				struct skl_wm_level *l0_wm0 = &plane_wm->wm[0];
> +
> +				memcpy(l0_wm0, sagv_wm0, sizeof(struct skl_wm_level));
> +			}
> +		}
> +	}
> +}
> +
>  static int
>  skl_compute_wm(struct intel_atomic_state *state)
>  {
> +	struct drm_device *dev = state->base.dev;
> +	const struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct intel_crtc *crtc;
>  	struct intel_crtc_state *new_crtc_state;
>  	struct intel_crtc_state *old_crtc_state;
> @@ -5553,6 +5785,9 @@ skl_compute_wm(struct intel_atomic_state *state)
>  	/* Clear all dirty flags */
>  	results->dirty_pipes = 0;
>  
> +	/* If we exit before check is done */
> +	state->gen12_can_sagv = false;
> +
>  	ret = skl_ddb_add_affected_pipes(state);
>  	if (ret)
>  		return ret;
> @@ -5579,6 +5814,9 @@ skl_compute_wm(struct intel_atomic_state *state)
>  			results->dirty_pipes |= BIT(crtc->pipe);
>  	}
>  
> +	if (INTEL_GEN(dev_priv) >= 12)
> +		tgl_check_and_set_sagv(state);
> +
>  	ret = skl_compute_ddb(state);
>  	if (ret)
>  		return ret;
> -- 
> 2.17.1

-- 
Ville Syrjälä
Intel