[Intel-gfx] [PATCH v2] drm/i915: Enable second dbuf slice for ICL

Wed Oct 16 23:41:14 UTC 2019

On Wed, Oct 09, 2019 at 10:39:08AM +0300, Stanislav Lisovskiy wrote:
> Also implemented algorithm for choosing DBuf slice configuration
> based on active pipes, pipe ratio as stated in BSpec 12716.
> 
> Now pipe allocation still stays proportional to pipe width as before,
> however within allowed DBuf slice for this particular configuration.
> 
> v2: Remove unneeded check from commit as ddb enabled slices might
>     now differ from hw state.

I can't seem to find v1 of this patch in my archives; can you elaborate
on this?  It looks like a bit of a mismatch in terms of how we're
interpreting 'ddb.enabled_slices' in different parts of the driver.
During hw readout we're treating the value as the number of powered up
slices (which will always be 2 since we always power them both up in
icl_display_core_init -> icl_dbuf_enable even when we're not going to
use both), whereas in the atomic check code we're interpreting the value
as the number of slices we want to distribute our plane allocations
over.  We may want to break these out into two separate fields (powered
slices vs utilized slices) that we track separately.

> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy at intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_display.c |   6 -
>  drivers/gpu/drm/i915/intel_pm.c              | 208 ++++++++++++++++++-
>  2 files changed, 202 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> index 1a533ccdb54f..4683731ac1ca 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -12989,12 +12989,6 @@ static void verify_wm_state(struct intel_crtc *crtc,
>  	skl_ddb_get_hw_state(dev_priv, &hw->ddb);
>  	sw_ddb = &dev_priv->wm.skl_hw.ddb;
>  
> -	if (INTEL_GEN(dev_priv) >= 11 &&
> -	    hw->ddb.enabled_slices != sw_ddb->enabled_slices)
> -		DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n",
> -			  sw_ddb->enabled_slices,
> -			  hw->ddb.enabled_slices);
> -

Related to the comment above, we probably do want to make sure that the
number of powered up dbuf slices matches what we expect (which today is
always 2, but that might change in the future if we try to be more
intelligent and power down the second slice when it isn't needed).

If we're already confirming that the planes' hw/sw DDB allocations
themselves match farther down this function, then we're effectively
already checking the distribution of planes between the two slices.

>  	/* planes */
>  	for_each_universal_plane(dev_priv, pipe, plane) {
>  		struct skl_plane_wm *hw_plane_wm, *sw_plane_wm;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index bfcf03ab5245..0fbeea61299f 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3616,7 +3616,7 @@ static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
>  	 * only that 1 slice enabled until we have a proper way for on-demand
>  	 * toggling of the second slice.
>  	 */

The comment here is no longer correct now that we're actually using the
second slice.

> -	if (0 && I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
> +	if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
>  		enabled_slices++;
>  
>  	return enabled_slices;
> @@ -3821,7 +3821,7 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
>  	 * - plane straddling both slices is illegal in multi-pipe scenarios
>  	 * - should validate we stay within the hw bandwidth limits
>  	 */

This comment is/was also outdated.  Also note that this change is going
to impact TGL as well even though you haven't added the TGL handling for
slice assignment yet.

> -	if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
> +	if ((num_active > 1 || total_data_bw >= GBps(12))) {

Where does the 12 GBps number come from?  I know the comment above this
says that's the maximum BW supported by a single DBuf slice, but I can't
find where this is mentioned in the bspec (and I'm not sure if that
would apply to all gen11+ platforms or whether that was an ICL-specific
number from when the comment/code was first written).

>  		ddb->enabled_slices = 2;
>  	} else {
>  		ddb->enabled_slices = 1;
> @@ -3831,6 +3831,35 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
>  	return ddb_size;
>  }
>  
> +/*
> + * Calculate initial DBuf slice offset, based on slice size
> + * and mask(i.e if slice size is 1024 and second slice is enabled
> + * offset would be 1024)
> + */
> +static u32 skl_get_first_dbuf_slice_offset(u32 dbuf_slice_mask,
> +					   u32 slice_size, u32 ddb_size)
> +{
> +	u32 offset = 0;
> +
> +	if (!dbuf_slice_mask)
> +		return 0;
> +
> +	while (!(dbuf_slice_mask & 1)) {
> +		dbuf_slice_mask >>= 1;
> +		offset += slice_size;
> +		if (offset >= ddb_size)

This should probably include a WARN_ON() since if there's any way we can
return an offset outside our DDB we're going to have problems.

> +			break;
> +	}
> +	return offset;
> +}
> +
> +static u32 i915_get_allowed_dbuf_mask(struct drm_i915_private *dev_priv,
> +				      int pipe, u32 active_pipes,
> +				      uint_fixed_16_16_t pipe_ratio);
> +
> +static uint_fixed_16_16_t
> +skl_get_pipe_ratio(const struct intel_crtc_state *crtc_state);
> +
>  static void
>  skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  				   const struct intel_crtc_state *crtc_state,
> @@ -3846,7 +3875,13 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  	u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
>  	enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
>  	u16 ddb_size;
> +	u16 ddb_range_size;
>  	u32 i;
> +	u32 dbuf_slice_mask;
> +	u32 active_pipes;
> +	u32 offset;
> +	u32 slice_size;
> +	uint_fixed_16_16_t pipe_ratio;
>  
>  	if (WARN_ON(!state) || !crtc_state->base.active) {
>  		alloc->start = 0;
> @@ -3855,14 +3890,23 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  		return;
>  	}
>  
> -	if (intel_state->active_pipe_changes)
> +	if (intel_state->active_pipe_changes) {
>  		*num_active = hweight8(intel_state->active_pipes);
> -	else
> +		active_pipes = intel_state->active_pipes;
> +	} else {
>  		*num_active = hweight8(dev_priv->active_pipes);
> +		active_pipes = dev_priv->active_pipes;
> +	}
>  
>  	ddb_size = intel_get_ddb_size(dev_priv, crtc_state, total_data_rate,
>  				      *num_active, ddb);
>  
> +	DRM_DEBUG_KMS("Got total ddb size %d\n", ddb_size);
> +
> +	slice_size = ddb_size / ddb->enabled_slices;
> +
> +	DRM_DEBUG_KMS("Got DBuf slice size %d\n", slice_size);
> +
>  	/*
>  	 * If the state doesn't change the active CRTC's or there is no
>  	 * modeset request, then there's no need to recalculate;
> @@ -3880,6 +3924,39 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  		return;
>  	}
>  
> +	/*
> +	 * Calculate pipe ratio as stated in BSpec 28692
> +	 */
> +	pipe_ratio = skl_get_pipe_ratio(crtc_state);

We can probably move this call down into the function below.  AFAICS,
pipe ratio doesn't matter anymore once we get to TGL and doesn't matter
on earlier platforms that only have one slice, so there's no need
to calculate it if we aren't going to use it.

> +
> +	/*
> +	 * Get allowed DBuf slices for correspondent pipe and platform.
> +	 */
> +	dbuf_slice_mask = i915_get_allowed_dbuf_mask(dev_priv, for_pipe,
> +						     active_pipes, pipe_ratio);
> +
> +	DRM_DEBUG_KMS("DBuf slice mask %x pipe %d active pipes %x\n",
> +		      dbuf_slice_mask,
> +		      for_pipe, active_pipes);
> +
> +	/*
> +	 * Figure out at which DBuf slice we start, i.e if we start at Dbuf S2
> +	 * and slice size is 1024, the offset would be 1024
> +	 */
> +	offset = skl_get_first_dbuf_slice_offset(dbuf_slice_mask,
> +						 slice_size, ddb_size);
> +
> +	/*
> +	 * Figure out total size of allowed DBuf slices, which is basically
> +	 * a number of allowed slices for that pipe multiplied by slice size.
> +	 * We might still have some dbuf slices disabled in case if those
> +	 * are not needed based on bandwidth requirements and num_active pipes,
> +	 * so stick to real ddb size if it happens to be less. Inside of this
> +	 * range ddb entries are still allocated in proportion to display width.
> +	 */
> +	ddb_range_size = min(hweight8(dbuf_slice_mask) * slice_size,
> +			     (unsigned int)ddb_size);
> +
>  	/*
>  	 * Watermark/ddb requirement highly depends upon width of the
>  	 * framebuffer, So instead of allocating DDB equally among pipes
> @@ -3890,10 +3967,23 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  			&crtc_state->base.adjusted_mode;
>  		enum pipe pipe = crtc->pipe;
>  		int hdisplay, vdisplay;
> +		uint_fixed_16_16_t ratio = skl_get_pipe_ratio(crtc_state);
> +		u32 pipe_dbuf_slice_mask = i915_get_allowed_dbuf_mask(dev_priv,
> +								      pipe,
> +								      active_pipes,
> +								      ratio);

Minor nitpick, but the lines here are a bit over 80 characters.  You may
want to break the line after the equals sign.

>  
>  		if (!crtc_state->base.enable)
>  			continue;
>  
> +		/*
> +		 * According to BSpec pipe can share one dbuf slice with another pipes or pipe can use
> +		 * multiple dbufs, in both cases we account for other pipes only if they have
> +		 * exactly same mask.
> +		 */

Some more long lines that need to be wrapped.

> +		if (dbuf_slice_mask != pipe_dbuf_slice_mask)
> +			continue;
> +
>  		drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
>  		total_width += hdisplay;
>  
> @@ -3903,8 +3993,11 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  			pipe_width = hdisplay;
>  	}
>  
> -	alloc->start = ddb_size * width_before_pipe / total_width;
> -	alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
> +	alloc->start = offset + ddb_range_size * width_before_pipe / total_width;
> +	alloc->end = offset + ddb_range_size * (width_before_pipe + pipe_width) / total_width;
> +
> +	DRM_DEBUG_KMS("Pipe %d ddb %d-%d\n", for_pipe,
> +		      alloc->start, alloc->end);
>  }
>  
>  static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
> @@ -4255,6 +4348,109 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
>  	return total_data_rate;
>  }
>  
> +uint_fixed_16_16_t
> +skl_get_pipe_ratio(const struct intel_crtc_state *crtc_state)

I think this should be named icl_get_pipe_ratio() given that it comes
from a gen11-specific page of the bspec?

> +{
> +	struct drm_plane *plane;
> +	const struct drm_plane_state *drm_plane_state;
> +	uint_fixed_16_16_t pipe_downscale;
> +	uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
> +
> +	if (!crtc_state->base.enable)

This function only gets called from skl_ddb_get_pipe_allocation_limits()
which tests crtc_state->base.active at the beginning and bails out if
the CRTC isn't active.  active && !enable shouldn't be possible, so I'd
add a WARN_ON() here to make that assertion clear.

> +		return max_downscale;
> +
> +	drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) {
> +		uint_fixed_16_16_t plane_downscale;
> +		const struct intel_plane_state *plane_state =
> +			to_intel_plane_state(drm_plane_state);
> +
> +		if (!intel_wm_plane_visible(crtc_state, plane_state))
> +			continue;
> +
> +		plane_downscale = skl_plane_downscale_amount(crtc_state, plane_state);
> +
> +		max_downscale = max_fixed16(plane_downscale, max_downscale);
> +	}
> +
> +	pipe_downscale = skl_pipe_downscale_amount(crtc_state);
> +
> +	pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
> +
> +	return pipe_downscale;

Wouldn't the pipe ratio be 1/pipe_downscale?

> +}
> +
> +#define DBUF_S1_BIT 1
> +#define DBUF_S2_BIT 2
> +
> +struct dbuf_slice_conf_entry {
> +	u32 dbuf_mask[I915_MAX_PIPES];
> +	u32 active_pipes;
> +};
> +
> +
> +/*
> + * Table taken from Bspec 12716
> + * Pipes do have some preferred DBuf slice affinity,
> + * plus there are some hardcoded requirements on how
> + * those should be distributed for multipipe scenarios.
> + * For more DBuf slices algorithm can get even more messy
> + * and less readable, so decided to use a table almost
> + * as is from BSpec itself - that way it is at least easier
> + * to compare, change and check.
> + */
> +struct dbuf_slice_conf_entry icl_allowed_dbufs[] = {
> +{ { 0, 0, 0, 0 }, 0 },
> +{ { DBUF_S1_BIT | DBUF_S2_BIT, 0, 0, 0 }, BIT(PIPE_A) },
> +{ { DBUF_S1_BIT, 0, 0, 0 }, BIT(PIPE_A) },
> +{ { 0, DBUF_S1_BIT | DBUF_S2_BIT, 0, 0 }, BIT(PIPE_B) },
> +{ { 0, DBUF_S1_BIT, 0, 0 }, BIT(PIPE_B) },
> +{ { 0, 0, DBUF_S1_BIT | DBUF_S2_BIT }, BIT(PIPE_C) },
> +{ { 0, 0, DBUF_S2_BIT, 0 }, BIT(PIPE_C) },
> +{ { DBUF_S1_BIT, DBUF_S2_BIT, 0, 0 }, BIT(PIPE_A) | BIT(PIPE_B) },
> +{ { DBUF_S1_BIT, 0, DBUF_S2_BIT, 0 }, BIT(PIPE_A) | BIT(PIPE_C) },
> +{ { 0, DBUF_S1_BIT, DBUF_S2_BIT, 0 }, BIT(PIPE_B) | BIT(PIPE_C) },
> +{ { DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT, 0 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) },

You might want to align some of the columns here to make it slightly
easier to read.  And even though the bspec puts the pipes in the final
column, I think it would be more natural for readability to move that
first and/or put it on a line by itself.  You've already got a line here
that exceeds 80 characters by a bit and once you add a TGL table with
four pipes you're going to have even longer lines.

> +};
> +
> +/*
> + * This function finds an entry with same enabled pipe configuration and
> + * returns correspondent DBuf slice mask as stated in BSpec for particular
> + * platform.
> + */
> +static u32 icl_get_allowed_dbuf_mask(int pipe,
> +				     u32 active_pipes,
> +				     uint_fixed_16_16_t pipe_ratio)
> +{
> +	int i;
> +	for (i = 0; i < ARRAY_SIZE(icl_allowed_dbufs); i++) {
> +		if (icl_allowed_dbufs[i].active_pipes == active_pipes) {
> +			/*
> +			 * According to BSpec 12716: if pipe_ratio >= 88.8
> +			 * use single pipe, even if two are accessible.
> +			 */
> +			if (pipe_ratio.val >= div_fixed16(888, 10).val)
> +				++i;
> +			return icl_allowed_dbufs[i].dbuf_mask[pipe];
> +		}
> +	}
> +	return 0;
> +}
> +
> +u32 i915_get_allowed_dbuf_mask(struct drm_i915_private *dev_priv,
> +				      int pipe, u32 active_pipes,
> +				      uint_fixed_16_16_t pipe_ratio)
> +{
> +	if (IS_GEN(dev_priv, 11))
> +		return icl_get_allowed_dbuf_mask(pipe,
> +						 active_pipes,
> +						 pipe_ratio);
> +	/*
> +	 * For anything else just return one slice yet.
> +	 * Should be extended for other platforms.
> +	 */

Note that you've already adjusted the DDB size for TGL in
intel_get_ddb_size(), so we probably want to add TGL's table at the same
time as the gen11 table.

Matt

> +	return DBUF_S1_BIT;
> +}
> +
>  static u64
>  icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
>  				 u64 *plane_data_rate)
> -- 
> 2.17.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation
(916) 356-2795