[Intel-gfx] [PATCH v16 5/7] drm/i915: Correctly map DBUF slices to pipes

Matt Roper matthew.d.roper at intel.com
Tue Jan 28 23:15:30 UTC 2020


On Fri, Jan 24, 2020 at 10:44:54AM +0200, Stanislav Lisovskiy wrote:
> Added proper DBuf slice mapping to correspondent
> pipes, depending on pipe configuration as stated
> in BSpec.
> 
> v2:
>     - Remove unneeded braces
>     - Stop using macro for DBuf assignments as
>       it seems to reduce readability.
> 
> v3: Start using enabled slices mask in dev_priv
> 
> v4: Renamed "enabled_slices" used in dev_priv
>     to "enabled_dbuf_slices_mask"(Matt Roper)
> 
> v5: - Removed redundant parameters from
>       intel_get_ddb_size function.(Matt Roper)
>     - Made i915_possible_dbuf_slices static(Matt Roper)
>     - Renamed total_width into total_width_in_range
>       so that it now reflects that this is not
>       a total pipe width but the one in current
>       dbuf slice allowed range for pipe.(Matt Roper)
>     - Removed 4th pipe for ICL in DBuf assignment
>       table(Matt Roper)
>     - Fixed wrong DBuf slice in DBuf table for TGL
>       (Matt Roper)
>     - Added comment regarding why we currently not
>       using pipe ratio for DBuf assignment for ICL
> 
> v6: - Changed u32 to unsigned int in
>       icl_get_first_dbuf_slice_offset function signature
>       (Ville Syrjälä)
>     - Changed also u32 to u8 in dbuf slice mask structure
>       (Ville Syrjälä)
>     - Switched from DBUF_S1_BIT to enum + explicit
>       BIT(DBUF_S1) access(Ville Syrjälä)
>     - Switched to named initializers in DBuf assignment
>       arrays(Ville Syrjälä)
>     - DBuf assignment arrays now use autogeneration tool
>       from
>       https://patchwork.freedesktop.org/series/70493/
>       to avoid typos.
>     - Renamed i915_find_pipe_conf to *_compute_dbuf_slices
>       (Ville Syrjälä)
>     - Changed platforms ordering in skl_compute_dbuf_slices
>       to be from newest to oldest(Ville Syrjälä)
> 
> v7: - Now ORing assigned DBuf slice config always with DBUF_S1
>       because slice 1 has to be constantly powered on.
>       (Ville Syrjälä)
> 
> v8: - Added pipe_name for neater printing(Ville Syrjälä)
>     - Renamed width_before_pipe to width_before_pipe_in_range,
>       to better reflect that now all the calculations are happening
>       inside DBuf range allowed by current pipe configuration mask
>       (Ville Syrjälä)
>     - Shortened FIXME comment message, regarding constant ORing with
>       DBUF_S1(Ville Syrjälä)
>     - Added .dbuf_mask named initializer to pipe assignment array
>       (Ville Syrjälä)
>     - Edited pipe assignment array to use only single DBuf slice
>       for gen11 single pipe configurations, until "pipe ratio"
>       thing is finally sorted out(Ville Syrjälä)
>     - Removed unused parameter crtc_state for now(Ville Syrjälä)
>       from icl/tgl_compute_dbuf_slices function
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy at intel.com>
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 385 ++++++++++++++++++++++++++++++--
>  1 file changed, 366 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index ca5b34d297d9..92c4d4624092 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3856,13 +3856,29 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state)
>  	return true;
>  }
>  
> -static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
> -			      const struct intel_crtc_state *crtc_state,
> -			      const u64 total_data_rate,
> -			      const int num_active)
> +/*
> + * Calculate initial DBuf slice offset, based on slice size
> + * and mask(i.e if slice size is 1024 and second slice is enabled
> + * offset would be 1024)
> + */
> +static unsigned int
> +icl_get_first_dbuf_slice_offset(u32 dbuf_slice_mask,
> +				u32 slice_size,
> +				u32 ddb_size)
> +{
> +	unsigned int offset = 0;
> +
> +	if (!dbuf_slice_mask)
> +		return 0;
> +
> +	offset = (ffs(dbuf_slice_mask) - 1) * slice_size;
> +
> +	WARN_ON(offset >= ddb_size);
> +	return offset;
> +}
> +
> +static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv)
>  {
> -	struct drm_atomic_state *state = crtc_state->uapi.state;
> -	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
>  	u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
>  
>  	drm_WARN_ON(&dev_priv->drm, ddb_size == 0);
> @@ -3870,12 +3886,12 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
>  	if (INTEL_GEN(dev_priv) < 11)
>  		return ddb_size - 4; /* 4 blocks for bypass path allocation */
>  
> -	intel_state->enabled_dbuf_slices_mask = BIT(DBUF_S1);
> -	ddb_size /= 2;
> -
>  	return ddb_size;
>  }
>  
> +static u8 skl_compute_dbuf_slices(const struct intel_crtc_state *crtc_state,
> +				  u32 active_pipes);
> +
>  static void
>  skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  				   const struct intel_crtc_state *crtc_state,
> @@ -3887,10 +3903,17 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
>  	struct drm_crtc *for_crtc = crtc_state->uapi.crtc;
>  	const struct intel_crtc *crtc;
> -	u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
> +	u32 pipe_width = 0, total_width_in_range = 0, width_before_pipe_in_range = 0;
>  	enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
>  	u16 ddb_size;
> +	u32 ddb_range_size;
>  	u32 i;
> +	u32 dbuf_slice_mask;
> +	u32 active_pipes;
> +	u32 offset;
> +	u32 slice_size;
> +	u32 total_slice_mask;
> +	u32 start, end;
>  
>  	if (drm_WARN_ON(&dev_priv->drm, !state) || !crtc_state->hw.active) {
>  		alloc->start = 0;
> @@ -3900,12 +3923,15 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  	}
>  
>  	if (intel_state->active_pipe_changes)
> -		*num_active = hweight8(intel_state->active_pipes);
> +		active_pipes = intel_state->active_pipes;
>  	else
> -		*num_active = hweight8(dev_priv->active_pipes);
> +		active_pipes = dev_priv->active_pipes;
> +
> +	*num_active = hweight8(active_pipes);
> +
> +	ddb_size = intel_get_ddb_size(dev_priv);
>  
> -	ddb_size = intel_get_ddb_size(dev_priv, crtc_state, total_data_rate,
> -				      *num_active);
> +	slice_size = ddb_size / INTEL_INFO(dev_priv)->num_supported_dbuf_slices;
>  
>  	/*
>  	 * If the state doesn't change the active CRTC's or there is no
> @@ -3924,31 +3950,96 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  		return;
>  	}
>  
> +	/*
> +	 * Get allowed DBuf slices for correspondent pipe and platform.
> +	 */
> +	dbuf_slice_mask = skl_compute_dbuf_slices(crtc_state, active_pipes);
> +
> +	DRM_DEBUG_KMS("DBuf slice mask %x pipe %c active pipes %x\n",
> +		      dbuf_slice_mask,
> +		      pipe_name(for_pipe), active_pipes);
> +
> +	/*
> +	 * Figure out at which DBuf slice we start, i.e if we start at Dbuf S2
> +	 * and slice size is 1024, the offset would be 1024
> +	 */
> +	offset = icl_get_first_dbuf_slice_offset(dbuf_slice_mask,
> +						 slice_size, ddb_size);
> +
> +	/*
> +	 * Figure out total size of allowed DBuf slices, which is basically
> +	 * a number of allowed slices for that pipe multiplied by slice size.
> +	 * Inside of this
> +	 * range ddb entries are still allocated in proportion to display width.
> +	 */
> +	ddb_range_size = hweight8(dbuf_slice_mask) * slice_size;
> +
>  	/*
>  	 * Watermark/ddb requirement highly depends upon width of the
>  	 * framebuffer, So instead of allocating DDB equally among pipes
>  	 * distribute DDB based on resolution/width of the display.
>  	 */
> +	total_slice_mask = dbuf_slice_mask;
>  	for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
>  		const struct drm_display_mode *adjusted_mode =
>  			&crtc_state->hw.adjusted_mode;
>  		enum pipe pipe = crtc->pipe;
>  		int hdisplay, vdisplay;
> +		u32 pipe_dbuf_slice_mask;
>  
> -		if (!crtc_state->hw.enable)
> +		if (!crtc_state->hw.active)
> +			continue;
> +
> +		pipe_dbuf_slice_mask = skl_compute_dbuf_slices(crtc_state,
> +							       active_pipes);
> +
> +		/*
> +		 * According to BSpec pipe can share one dbuf slice with another
> +		 * pipes or pipe can use multiple dbufs, in both cases we
> +		 * account for other pipes only if they have exactly same mask.
> +		 * However we need to account how many slices we should enable
> +		 * in total.
> +		 */
> +		total_slice_mask |= pipe_dbuf_slice_mask;
> +
> +		/*
> +		 * Do not account pipes using other slice sets
> +		 * luckily as of current BSpec slice sets do not partially
> +		 * intersect(pipes share either same one slice or same slice set
> +		 * i.e no partial intersection), so it is enough to check for
> +		 * equality for now.
> +		 */
> +		if (dbuf_slice_mask != pipe_dbuf_slice_mask)
>  			continue;
>  
>  		drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
> -		total_width += hdisplay;
> +
> +		total_width_in_range += hdisplay;
>  
>  		if (pipe < for_pipe)
> -			width_before_pipe += hdisplay;
> +			width_before_pipe_in_range += hdisplay;
>  		else if (pipe == for_pipe)
>  			pipe_width = hdisplay;
>  	}
>  
> -	alloc->start = ddb_size * width_before_pipe / total_width;
> -	alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
> +	/*
> +	 * FIXME: For now we always enable slice S1 as per
> +	 * the Bspec display initialization sequence.
> +	 */
> +	intel_state->enabled_dbuf_slices_mask = total_slice_mask | BIT(DBUF_S1);
> +
> +	start = ddb_range_size * width_before_pipe_in_range / total_width_in_range;
> +	end = ddb_range_size *
> +		(width_before_pipe_in_range + pipe_width) / total_width_in_range;
> +
> +	alloc->start = offset + start;
> +	alloc->end = offset + end;
> +
> +	DRM_DEBUG_KMS("Pipe %d ddb %d-%d\n", for_pipe,
> +		      alloc->start, alloc->end);
> +	DRM_DEBUG_KMS("Enabled ddb slices mask %x num supported %d\n",
> +		      intel_state->enabled_dbuf_slices_mask,
> +		      INTEL_INFO(dev_priv)->num_supported_dbuf_slices);
>  }
>  
>  static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
> @@ -4119,6 +4210,262 @@ skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state,
>  	return mul_fixed16(downscale_w, downscale_h);
>  }
>  
> +struct dbuf_slice_conf_entry {
> +	u8 active_pipes;
> +	u8 dbuf_mask[I915_MAX_PIPES];
> +};
> +
> +/*
> + * Table taken from Bspec 12716
> + * Pipes do have some preferred DBuf slice affinity,
> + * plus there are some hardcoded requirements on how
> + * those should be distributed for multipipe scenarios.
> + * For more DBuf slices algorithm can get even more messy
> + * and less readable, so decided to use a table almost
> + * as is from BSpec itself - that way it is at least easier
> + * to compare, change and check.
> + */
> +static struct dbuf_slice_conf_entry icl_allowed_dbufs[] =
> +/* Autogenerated with igt/tools/intel_dbuf_map tool: */
> +{
> +	{
> +		.active_pipes = BIT(PIPE_A),
> +		.dbuf_mask = {
> +			[PIPE_A] = BIT(DBUF_S1)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_B),
> +		.dbuf_mask = {
> +			[PIPE_B] = BIT(DBUF_S1)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_A) | BIT(PIPE_B),
> +		.dbuf_mask = {
> +			[PIPE_A] = BIT(DBUF_S1),
> +			[PIPE_B] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_C),
> +		.dbuf_mask = {
> +			[PIPE_C] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_A) | BIT(PIPE_C),
> +		.dbuf_mask = {
> +			[PIPE_A] = BIT(DBUF_S1),
> +			[PIPE_C] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_B) | BIT(PIPE_C),
> +		.dbuf_mask = {
> +			[PIPE_B] = BIT(DBUF_S1),
> +			[PIPE_C] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C),
> +		.dbuf_mask = {
> +			[PIPE_A] = BIT(DBUF_S1),
> +			[PIPE_B] = BIT(DBUF_S1),
> +			[PIPE_C] = BIT(DBUF_S2)
> +		}
> +	},
> +};
> +
> +/*
> + * Table taken from Bspec 49255
> + * Pipes do have some preferred DBuf slice affinity,
> + * plus there are some hardcoded requirements on how
> + * those should be distributed for multipipe scenarios.
> + * For more DBuf slices algorithm can get even more messy
> + * and less readable, so decided to use a table almost
> + * as is from BSpec itself - that way it is at least easier
> + * to compare, change and check.
> + */
> +static struct dbuf_slice_conf_entry tgl_allowed_dbufs[] =
> +/* Autogenerated with igt/tools/intel_dbuf_map tool: */
> +{
> +	{
> +		.active_pipes = BIT(PIPE_A),
> +		.dbuf_mask = {
> +			[PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_B),
> +		.dbuf_mask = {
> +			[PIPE_B] = BIT(DBUF_S1) | BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_A) | BIT(PIPE_B),
> +		.dbuf_mask = {
> +			[PIPE_A] = BIT(DBUF_S2),
> +			[PIPE_B] = BIT(DBUF_S1)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_C),
> +		.dbuf_mask = {
> +			[PIPE_C] = BIT(DBUF_S2) | BIT(DBUF_S1)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_A) | BIT(PIPE_C),
> +		.dbuf_mask = {
> +			[PIPE_A] = BIT(DBUF_S1),
> +			[PIPE_C] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_B) | BIT(PIPE_C),
> +		.dbuf_mask = {
> +			[PIPE_B] = BIT(DBUF_S1),
> +			[PIPE_C] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C),
> +		.dbuf_mask = {
> +			[PIPE_A] = BIT(DBUF_S1),
> +			[PIPE_B] = BIT(DBUF_S1),
> +			[PIPE_C] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_D),
> +		.dbuf_mask = {
> +			[PIPE_D] = BIT(DBUF_S2) | BIT(DBUF_S1)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_A) | BIT(PIPE_D),
> +		.dbuf_mask = {
> +			[PIPE_A] = BIT(DBUF_S1),
> +			[PIPE_D] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_B) | BIT(PIPE_D),
> +		.dbuf_mask = {
> +			[PIPE_B] = BIT(DBUF_S1),
> +			[PIPE_D] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_D),
> +		.dbuf_mask = {
> +			[PIPE_A] = BIT(DBUF_S1),
> +			[PIPE_B] = BIT(DBUF_S1),
> +			[PIPE_D] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_C) | BIT(PIPE_D),
> +		.dbuf_mask = {
> +			[PIPE_C] = BIT(DBUF_S1),
> +			[PIPE_D] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_A) | BIT(PIPE_C) | BIT(PIPE_D),
> +		.dbuf_mask = {
> +			[PIPE_A] = BIT(DBUF_S1),
> +			[PIPE_C] = BIT(DBUF_S2),
> +			[PIPE_D] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D),
> +		.dbuf_mask = {
> +			[PIPE_B] = BIT(DBUF_S1),
> +			[PIPE_C] = BIT(DBUF_S2),
> +			[PIPE_D] = BIT(DBUF_S2)
> +		}
> +	},
> +	{
> +		.active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D),
> +		.dbuf_mask = {
> +			[PIPE_A] = BIT(DBUF_S1),
> +			[PIPE_B] = BIT(DBUF_S1),
> +			[PIPE_C] = BIT(DBUF_S2),
> +			[PIPE_D] = BIT(DBUF_S2)
> +		}
> +	},
> +};
> +
> +static u8 compute_dbuf_slices(enum pipe pipe,
> +			      u32 active_pipes,
> +			      const struct dbuf_slice_conf_entry *dbuf_slices,
> +			      int size)
> +{
> +	int i;
> +
> +	for (i = 0; i < size; i++) {
> +		if (dbuf_slices[i].active_pipes == active_pipes)
> +			return dbuf_slices[i].dbuf_mask[pipe];
> +	}
> +	return 0;
> +}
> +
> +/*
> + * This function finds an entry with same enabled pipe configuration and
> + * returns correspondent DBuf slice mask as stated in BSpec for particular
> + * platform.
> + */
> +static u32 icl_compute_dbuf_slices(enum pipe pipe,
> +				   u32 active_pipes)
> +{
> +	/*
> +	 * FIXME: For ICL this is still a bit unclear as prev BSpec revision
> +	 * required calculating "pipe ratio" in order to determine
> +	 * if one or two slices can be used for single pipe configurations
> +	 * as additional constraint to the existing table.
> +	 * However based on recent info, it should be not "pipe ratio"
> +	 * but rather ratio between pixel_rate and cdclk with additional
> +	 * constants, so for now we are using only table until this is
> +	 * clarified. Also this is the reason why crtc_state param is
> +	 * still here - we will need it once those additional constraints
> +	 * pop up.

The last part of this comment no longer applies --- crtc_state isn't
still here.

I haven't heard any recent discussion with the hardware folks --- if the
bspec is still unclear in this area, is it safe to try to enable the
second dbuf slice at this time?  I'm worried that we might add
regressions due to the incomplete hardware documentation.  Should we
initially only enable it on TGL until the bspec gets clarified?  Or at
least only enable it on ICL/EHL as a completely separate patch that's
really easy to revert?  AFAIK, we don't yet have EHL machines in CI, so
even if CI results come back clean on ICL, I'd still be a little bit
nervous about regressing EHL/JSL.

> +	 */
> +	return compute_dbuf_slices(pipe, active_pipes,
> +				   icl_allowed_dbufs,
> +				   ARRAY_SIZE(icl_allowed_dbufs));
> +}
> +
> +static u32 tgl_compute_dbuf_slices(enum pipe pipe,
> +				   u32 active_pipes)
> +{
> +	return compute_dbuf_slices(pipe, active_pipes,
> +				   tgl_allowed_dbufs,
> +				   ARRAY_SIZE(tgl_allowed_dbufs));
> +}
> +
> +static u8 skl_compute_dbuf_slices(const struct intel_crtc_state *crtc_state,
> +				  u32 active_pipes)

Given that this is basically a common frontend function that just
dispatches to an appropriate per-platform handler, maybe we should
rename this to to an intel_ prefix rather than skl_?  Up to you.

Aside from this and the comments above,

Reviewed-by: Matt Roper <matthew.d.roper at intel.com>

> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> +	enum pipe pipe = crtc->pipe;
> +
> +	if (IS_GEN(dev_priv, 12))
> +		return tgl_compute_dbuf_slices(pipe,
> +					       active_pipes);
> +	else if (IS_GEN(dev_priv, 11))
> +		return icl_compute_dbuf_slices(pipe,
> +					       active_pipes);
> +	/*
> +	 * For anything else just return one slice yet.
> +	 * Should be extended for other platforms.
> +	 */
> +	return BIT(DBUF_S1);
> +}
> +
>  static u64
>  skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state,
>  			     const struct intel_plane_state *plane_state,
> -- 
> 2.24.1.485.gad05a3d8e5
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation
(916) 356-2795


More information about the Intel-gfx mailing list