[Intel-gfx] [PATCH v3 3/9] drm/i915/skl: New ddb allocation algorithm

Mon Sep 12 10:50:20 UTC 2016

Op 09-09-16 om 10:01 schreef Kumar, Mahesh:
> From: Mahesh Kumar <mahesh1.kumar at intel.com>
>
> This patch implements new DDB allocation algorithm as per HW team
> suggestion. This algo takecare of scenario where we allocate less DDB
> for the planes with lower relative pixel rate, but they require more DDB
> to work.
> It also takes care of enabling same watermark level for each
> plane, for efficient power saving.
>
> Changes since v1:
>  - Rebase on top of Paulo's patch series
>
> Signed-off-by: Mahesh Kumar <mahesh1.kumar at intel.com>
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 129 +++++++++++++++++++++-------------------
>  1 file changed, 67 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index cfd9b7d1..7c70e07 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3348,6 +3348,7 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate,
>  
>  static int
>  skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
> +		      struct skl_pipe_wm *pipe_wm,
>  		      struct skl_ddb_allocation *ddb /* out */)
>  {
>  	struct drm_atomic_state *state = cstate->base.state;
> @@ -3363,8 +3364,11 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>  	uint16_t *minimum = cstate->wm.skl.minimum_blocks;
>  	uint16_t *y_minimum = cstate->wm.skl.minimum_y_blocks;
>  	unsigned int total_data_rate;
> +	uint16_t total_min_blocks = 0;
> +	uint16_t total_level_ddb = 0;
>  	int num_active;
> -	int id, i;
> +	int max_level, level;
> +	int id, i, ret = 0;
>  
>  	if (WARN_ON(!state))
>  		return 0;
> @@ -3380,6 +3384,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>  	alloc_size = skl_ddb_entry_size(alloc);
>  	if (alloc_size == 0) {
>  		memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
> +		memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe]));
>  		return 0;
>  	}
This hunk looks like a bugfix, and should be a separate commit?
> @@ -3413,19 +3418,42 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>  	}
>  
>  	for (i = 0; i < PLANE_CURSOR; i++) {
> -		alloc_size -= minimum[i];
> -		alloc_size -= y_minimum[i];
> +		total_min_blocks += minimum[i];
> +		total_min_blocks += y_minimum[i];
>  	}
>  
> -	/*
> -	 * 2. Distribute the remaining space in proportion to the amount of
> -	 * data each plane needs to fetch from memory.
> -	 *
> -	 * FIXME: we may not allocate every single block here.
> -	 */
> +	for (level = ilk_wm_max_level(dev); level >= 0; level--) {
> +		total_level_ddb = 0;
> +		for (i = 0; i < PLANE_CURSOR; i++) {
> +			/*
> +			 * TODO: We should calculate watermark values for Y/UV
> +			 * plane both in case of NV12 format and use both values
> +			 * for ddb calculation, As NV12 is disabled as of now.
> +			 * using only single plane value here.
> +			 */
> +			uint16_t min = minimum[i] + y_minimum[i];
> +			uint16_t plane_level_ddb_wm =
> +				max(pipe_wm->wm[level].plane_res_b[i], min);
> +			total_level_ddb += plane_level_ddb_wm;
> +		}
> +
> +		if (total_level_ddb <= alloc_size)
> +			break;
> +	}
> +
> +	if ((level < 0) || (total_min_blocks > alloc_size)) {
> +		DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
> +		DRM_DEBUG_KMS("minimum required %d/%d\n", (level < 0) ?
> +				total_level_ddb : total_min_blocks, alloc_size);
> +		ret = -EINVAL;
> +		goto exit;
> +	}
> +	max_level = level;
> +	alloc_size -= total_level_ddb;
> +
>  	total_data_rate = skl_get_total_relative_data_rate(cstate);
>  	if (total_data_rate == 0)
> -		return 0;
> +		goto exit;
>  
>  	start = alloc->start;
>  	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
> @@ -3440,7 +3468,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>  		 * promote the expression to 64 bits to avoid overflowing, the
>  		 * result is < available as data_rate / total_data_rate < 1
>  		 */
> -		plane_blocks = minimum[id];
> +		plane_blocks = max(pipe_wm->wm[max_level].plane_res_b[id],
> +					minimum[id]);
>  		plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
>  					total_data_rate);
>  
> @@ -3454,6 +3483,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>  
>  		/*
>  		 * allocation for y_plane part of planar format:
> +		 * TODO: Once we start calculating watermark values for Y/UV
> +		 * plane both consider it for initial allowed wm blocks.
>  		 */
>  		y_data_rate = cstate->wm.skl.plane_y_data_rate[id];
>  
> @@ -3467,9 +3498,22 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>  		}
>  
>  		start += y_plane_blocks;
> +
> +		/*
> +		 * Now enable all levels in WM structure which can be enabled
> +		 * using current DDB allocation
> +		 */
> +		for (i = ilk_wm_max_level(dev); i > 0; i--) {
> +			if (i > max_level || pipe_wm->wm[i].plane_res_l[id] > 31
> +					|| pipe_wm->wm[i].plane_res_b[id] == 0)
> +				pipe_wm->wm[i].plane_en[id] = false;
> +			else
> +				pipe_wm->wm[i].plane_en[id] = true;
> +		}
>  	}
>  
> -	return 0;
> +exit:
> +	return ret;
>  }
>  
>  static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config)
> @@ -3540,7 +3584,6 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst
>  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>  				struct intel_crtc_state *cstate,
>  				struct intel_plane_state *intel_pstate,
> -				uint16_t ddb_allocation,
>  				int level,
>  				struct skl_pipe_wm *pipe_wm)
>  {
> @@ -3559,12 +3602,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>  	struct skl_wm_level *result = &pipe_wm->wm[level];
>  	uint16_t *out_blocks = &result->plane_res_b[id];
>  	uint8_t *out_lines = &result->plane_res_l[id];
> -	bool *enabled = &result->plane_en[id];
>  
> -	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) {
> -		*enabled = false;
> +	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible)
>  		return 0;
> -	}
>  
>  	width = drm_rect_width(&intel_pstate->base.src) >> 16;
>  	height = drm_rect_height(&intel_pstate->base.src) >> 16;
> @@ -3649,54 +3689,27 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>  		}
>  	}
>  
> -	if (res_blocks >= ddb_allocation || res_lines > 31) {
> -		*enabled = false;
> -
> -		/*
> -		 * If there are no valid level 0 watermarks, then we can't
> -		 * support this display configuration.
> -		 */
> -		if (level) {
> -			return 0;
> -		} else {
> -			DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n");
> -			DRM_DEBUG_KMS("Plane %d.%d: blocks required = %u/%u, lines required = %u/31\n",
> -				      to_intel_crtc(cstate->base.crtc)->pipe,
> -				      skl_wm_plane_id(to_intel_plane(pstate->plane)),
> -				      res_blocks, ddb_allocation, res_lines);
> -
> -			return -EINVAL;
> -		}
> -	}
> -
>  	*out_blocks = res_blocks;
>  	*out_lines = res_lines;
> -	*enabled = true;
>  
>  	return 0;
>  }
>  
>  static int
>  skl_compute_wm_level(const struct drm_i915_private *dev_priv,
> -		     struct skl_ddb_allocation *ddb,
>  		     struct intel_crtc_state *cstate,
>  		     int level,
>  		     struct skl_pipe_wm *pipe_wm)
>  {
>  	struct drm_atomic_state *state = cstate->base.state;
> -	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
>  	struct drm_plane *plane;
>  	struct intel_plane *intel_plane;
>  	struct intel_plane_state *intel_pstate;
> -	uint16_t ddb_blocks;
> -	enum pipe pipe = intel_crtc->pipe;
>  	int ret;
>  
>  	for_each_intel_plane_mask(&dev_priv->drm,
>  				  intel_plane,
>  				  cstate->base.plane_mask) {
> -		int i = skl_wm_plane_id(intel_plane);
> -
>  		plane = &intel_plane->base;
>  		intel_pstate = NULL;
>  		if (state)
> @@ -3722,12 +3735,9 @@ skl_compute_wm_level(const struct drm_i915_private *dev_priv,
>  
>  		WARN_ON(!intel_pstate->base.fb);
>  
> -		ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
> -
>  		ret = skl_compute_plane_wm(dev_priv,
>  					   cstate,
>  					   intel_pstate,
> -					   ddb_blocks,
>  					   level,
>  					   pipe_wm);
>  		if (ret)
> @@ -3784,11 +3794,15 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
>  	memset(pipe_wm, 0, sizeof(*pipe_wm));
>  
>  	for (level = 0; level <= max_level; level++) {
> -		ret = skl_compute_wm_level(dev_priv, ddb, cstate,
> +		ret = skl_compute_wm_level(dev_priv, cstate,
>  					   level, pipe_wm);
>  		if (ret)
>  			return ret;
>  	}
> +	ret = skl_allocate_pipe_ddb(cstate, pipe_wm, ddb);
> +	if (ret)
> +		return ret;
> +
>  	pipe_wm->linetime = skl_compute_linetime_wm(cstate);
>  
>  	skl_compute_transition_wm(cstate, &pipe_wm->trans_wm);
> @@ -3976,13 +3990,12 @@ pipes_modified(struct drm_atomic_state *state)
>  }
>  
>  static int
> -skl_compute_ddb(struct drm_atomic_state *state)
> +skl_include_affected_pipes(struct drm_atomic_state *state)
>  {
>  	struct drm_device *dev = state->dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
>  	struct intel_crtc *intel_crtc;
> -	struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb;
>  	uint32_t realloc_pipes = pipes_modified(state);
>  	int ret;
>  
> @@ -4035,10 +4048,6 @@ skl_compute_ddb(struct drm_atomic_state *state)
>  		if (IS_ERR(cstate))
>  			return PTR_ERR(cstate);
>  
> -		ret = skl_allocate_pipe_ddb(cstate, ddb);
> -		if (ret)
> -			return ret;
> -
>  		ret = drm_atomic_add_affected_planes(state, &intel_crtc->base);
>  		if (ret)
>  			return ret;
> @@ -4092,19 +4101,15 @@ skl_compute_wm(struct drm_atomic_state *state)
>  	/* Clear all dirty flags */
>  	results->dirty_pipes = 0;
>  
> -	ret = skl_compute_ddb(state);
> +	ret = skl_include_affected_pipes(state);
>  	if (ret)
>  		return ret;
>  
>  	/*
>  	 * Calculate WM's for all pipes that are part of this transaction.
> -	 * Note that the DDB allocation above may have added more CRTC's that
> -	 * weren't otherwise being modified (and set bits in dirty_pipes) if
> -	 * pipe allocations had to change.
> -	 *
> -	 * FIXME:  Now that we're doing this in the atomic check phase, we
> -	 * should allow skl_update_pipe_wm() to return failure in cases where
> -	 * no suitable watermark values can be found.
> +	 * Note that affected pipe calculation above may have added more
> +	 * CRTC's that weren't otherwise being modified (and set bits in
> +	 * dirty_pipes) if pipe allocations had to change.
>  	 */
>  	for_each_crtc_in_state(state, crtc, cstate, i) {
>  		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);

Rest looks ok on first glance. I didn't check yet if compute_pipe_ddb is called for included pipes too, but that's probably the case.
I'll probably throw the whole series through some tests to be sure it works right. :)