[Intel-gfx] [PATCH v6] drm/i915: Enable second dbuf slice for ICL and TGL

Wed Nov 13 21:15:06 UTC 2019

On Fri, Nov 08, 2019 at 03:45:00PM +0200, Stanislav Lisovskiy wrote:
> Also implemented algorithm for choosing DBuf slice configuration
> based on active pipes, pipe ratio as stated in BSpec 12716.
> 
> Now pipe allocation still stays proportional to pipe width as before,
> however within allowed DBuf slice for this particular configuration.
> 
> v2: Remove unneeded check from commit as ddb enabled slices might
>     now differ from hw state.
> 
> v3: - Added new field "supported_slices" to ddb to separate max
>       supported slices vs currently enabled, to avoid confusion.
>     - Removed obsolete comments and code related to DBuf(Matthew Roper).
>     - Some code style and long line removal(Matthew Roper).
>     - Added WARN_ON to new ddb range offset calc function(Matthew Roper).
>     - Removed platform specific call to calc pipe ratio from ddb
>       allocation function and fixed the return value(Matthew Roper)
>     - Refactored DBUF slice allocation table to improve readability
>     - Added DBUF slice allocation for TGL as well.
>     - ICL(however not TGL) seems to voluntarily enable second DBuf slice
>       after pm suspend/resume causing a mismatch failure, because we
>       update DBuf slices only if we do a modeset, however this check
>       is done always. Fixed it to be done only when modeset for ICL.
> 
> v4: - Now enabled slices is not actually a number, but a bitmask,
>       because we might need to enable second slice only and number
>       of slices would still 1 and that current code doesn't allow.
>     - Remove redundant duplicate code to have some unified way of
>       enabling dbuf slices instead of hardcoding.
> 
> v5: - Fix failing gen9_assert_dbuf_enabled as it was naively thinking
>       that we have only one DBUF_CTL slice. Now another version for
>       gen11 will check both slices as only second one can be enabled,
>       to keep CI happy.
> 
> v6: - Removed enabled dbuf assertion completely. Previous code
>       was keeping dbuf enabled, even(!) when _dbuf_disable is called.
>       Currently we enable/disable dbuf slices based on requrement
>       so no point in asserting this here.
>     - Removed unnecessary modeset check from verify_wm_state(Matthew Roper)
>     - Slices intersection after union is same as final result(Matthew Roper)
>     - Moved DBuf slices to intel_device_info(Matthew Roper)
>     - Some macros added(Matthew Roper)
>     - ddb range is now always less or equal to ddb size - no need for additional
>       checks(previously needed as we had some bandwidth checks in there which
>       could "suddenly" return ddb_size smaller than it is.
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy at intel.com>
> Cc: Matthew Roper <matthew.d.roper at intel.com>
> Cc: Ville Syrjälä <ville.syrjala at intel.com>
> Cc: James Ausmus <james.ausmus at intel.com>

A handful of comments inline below but most of them are cosmetic, so
I'll leave it up to you whether you agree or not.  The only one I see
that doesn't appear to match the spec is on your TGL pipe A+B dbuf
assignment.

Otherwise,

Reviewed-by: Matt Roper <matthew.d.roper at intel.com>

> ---
>  drivers/gpu/drm/i915/display/intel_display.c  |  26 +-
>  .../drm/i915/display/intel_display_power.c    |  98 ++---
>  .../drm/i915/display/intel_display_power.h    |   2 +
>  drivers/gpu/drm/i915/i915_drv.c               |   5 +
>  drivers/gpu/drm/i915/i915_drv.h               |   2 +-
>  drivers/gpu/drm/i915/i915_pci.c               |   7 +-
>  drivers/gpu/drm/i915/i915_reg.h               |   8 +-
>  drivers/gpu/drm/i915/intel_device_info.h      |   1 +
>  drivers/gpu/drm/i915/intel_pm.c               | 384 ++++++++++++++++--
>  9 files changed, 429 insertions(+), 104 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> index 876fc25968bf..bd7aff675198 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -13387,7 +13387,7 @@ static void verify_wm_state(struct intel_crtc *crtc,
>  
>  	if (INTEL_GEN(dev_priv) >= 11 &&
>  	    hw->ddb.enabled_slices != sw_ddb->enabled_slices)
> -		DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n",
> +		DRM_ERROR("mismatch in DBUF Slices (expected %x, got %x)\n",
>  			  sw_ddb->enabled_slices,
>  			  hw->ddb.enabled_slices);
>  
> @@ -14614,15 +14614,24 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
>  	u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
>  	u8 required_slices = state->wm_results.ddb.enabled_slices;
>  	struct skl_ddb_entry entries[I915_MAX_PIPES] = {};
> +	u8 slices_union = hw_enabled_slices | required_slices;
> +	u8 slices_intersection = required_slices;

We can probably just drop this variable and use required_slices directly
in the one place below.

>  
>  	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i)
>  		/* ignore allocations for crtc's that have been turned off. */
>  		if (new_crtc_state->hw.active)
>  			entries[i] = old_crtc_state->wm.skl.ddb;
>  
> -	/* If 2nd DBuf slice required, enable it here */
> -	if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices)
> -		icl_dbuf_slices_update(dev_priv, required_slices);
> +	DRM_DEBUG_KMS("DBuf req slices %d hw slices %d\n",
> +		      required_slices, hw_enabled_slices);
> +
> +	/*
> +	 * If some other DBuf slice required, enable it here,
> +	 * as here we only add more slices, but not remove to prevent
> +	 * issues if somebody still uses those.
> +	 */
> +	if (INTEL_GEN(dev_priv) >= 11 && required_slices != hw_enabled_slices)
> +		icl_dbuf_slices_update(dev_priv, slices_union);
>  
>  	/*
>  	 * Whenever the number of active pipes changes, we need to make sure we
> @@ -14681,9 +14690,12 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
>  		}
>  	} while (progress);
>  
> -	/* If 2nd DBuf slice is no more required disable it */
> -	if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices)
> -		icl_dbuf_slices_update(dev_priv, required_slices);
> +	/*
> +	 * If some other DBuf slice is not needed, disable it here,
> +	 * as here we only remove slices, which are not needed anymore.
> +	 */
> +	if (INTEL_GEN(dev_priv) >= 11 && required_slices != hw_enabled_slices)
> +		icl_dbuf_slices_update(dev_priv, slices_intersection);
>  }
>  
>  static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
> index ce1b64f4dd44..f406ee5329b0 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_power.c
> +++ b/drivers/gpu/drm/i915/display/intel_display_power.c
> @@ -1031,15 +1031,6 @@ static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv,
>  		(I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0);
>  }
>  
> -static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv)
> -{
> -	u32 tmp = I915_READ(DBUF_CTL);
> -
> -	WARN((tmp & (DBUF_POWER_STATE | DBUF_POWER_REQUEST)) !=
> -	     (DBUF_POWER_STATE | DBUF_POWER_REQUEST),
> -	     "Unexpected DBuf power power state (0x%08x)\n", tmp);
> -}
> -
>  static void gen9_disable_dc_states(struct drm_i915_private *dev_priv)
>  {
>  	struct intel_cdclk_state cdclk_state = {};
> @@ -1055,8 +1046,6 @@ static void gen9_disable_dc_states(struct drm_i915_private *dev_priv)
>  	/* Can't read out voltage_level so can't use intel_cdclk_changed() */
>  	WARN_ON(intel_cdclk_needs_modeset(&dev_priv->cdclk.hw, &cdclk_state));
>  
> -	gen9_assert_dbuf_enabled(dev_priv);
> -
>  	if (IS_GEN9_LP(dev_priv))
>  		bxt_verify_ddi_phy_power_wells(dev_priv);
>  
> @@ -4254,31 +4243,51 @@ static void gen9_dbuf_disable(struct drm_i915_private *dev_priv)
>  	intel_dbuf_slice_set(dev_priv, DBUF_CTL, false);
>  }
>  
> -static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
> +int intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
>  {
> -	if (INTEL_GEN(dev_priv) < 11)
> -		return 1;
> -	return 2;
> +	return INTEL_INFO(dev_priv)->supported_slices;
> +}

I'd be inclined to just drop this function and directly access the
device info at the callsites of this function.

> +
> +void icl_dbuf_slices_restore(struct drm_i915_private *dev_priv)
> +{
> +	const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
> +
> +	icl_dbuf_slices_update(dev_priv, hw_enabled_slices);
>  }

Minor nitpick:  you're using this for a lot more than just "restoring"
now.  It's more a matter of you committing software state into the
hardware.  A name like icl_dbuf_slices_commit or icl_program_dbuf_slices
might be slightly more intuitive?

>  
>  void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
>  			    u8 req_slices)
>  {
> -	const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
> -	bool ret;
> +	bool ret = true;
> +	int i;
> +	int max_slices = intel_dbuf_max_slices(dev_priv);
>  
> -	if (req_slices > intel_dbuf_max_slices(dev_priv)) {
> +	if (hweight8(req_slices) > intel_dbuf_max_slices(dev_priv)) {
>  		DRM_ERROR("Invalid number of dbuf slices requested\n");
>  		return;
>  	}
>  
> -	if (req_slices == hw_enabled_slices || req_slices == 0)
> -		return;
> +	DRM_DEBUG_KMS("Updating dbuf slices to %x\n", req_slices);
>  
> -	if (req_slices > hw_enabled_slices)
> -		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true);
> -	else
> -		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, false);
> +	for (i = 0; i < max_slices; i++) {
> +		int slice_bit = BIT(i);
> +		bool slice_set = (slice_bit & req_slices) != 0;
> +
> +		switch (slice_bit) {
> +		case DBUF_S1_BIT:
> +			ret = ret && intel_dbuf_slice_set(dev_priv,
> +							  DBUF_CTL_S1,
> +							  slice_set);
> +			break;
> +		case DBUF_S2_BIT:
> +			ret = ret && intel_dbuf_slice_set(dev_priv,
> +							  DBUF_CTL_S2,
> +							  slice_set);
> +			break;
> +		default:
> +			MISSING_CASE(slice_bit);
> +		}
> +	}
>  
>  	if (ret)
>  		dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices;
> @@ -4286,40 +4295,21 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
>  
>  static void icl_dbuf_enable(struct drm_i915_private *dev_priv)
>  {
> -	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST);
> -	I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) | DBUF_POWER_REQUEST);
> -	POSTING_READ(DBUF_CTL_S2);
> -
> -	udelay(10);
> -
> -	if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
> -	    !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
> -		DRM_ERROR("DBuf power enable timeout\n");
> -	else
> -		/*
> -		 * FIXME: for now pretend that we only have 1 slice, see
> -		 * intel_enabled_dbuf_slices_num().
> -		 */
> -		dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
> +	/*
> +	 * Just power up 1 slice, we will
> +	 * figure out later which slices we have and what we need.
> +	 */
> +	dev_priv->wm.skl_hw.ddb.enabled_slices = DBUF_S1_BIT;
> +	icl_dbuf_slices_restore(dev_priv);
>  }
>  
>  static void icl_dbuf_disable(struct drm_i915_private *dev_priv)
>  {
> -	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) & ~DBUF_POWER_REQUEST);
> -	I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) & ~DBUF_POWER_REQUEST);
> -	POSTING_READ(DBUF_CTL_S2);
> -
> -	udelay(10);
> -
> -	if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
> -	    (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
> -		DRM_ERROR("DBuf power disable timeout!\n");
> -	else
> -		/*
> -		 * FIXME: for now pretend that the first slice is always
> -		 * enabled, see intel_enabled_dbuf_slices_num().
> -		 */
> -		dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
> +	/*
> +	 * Disable all slices
> +	 */
> +	dev_priv->wm.skl_hw.ddb.enabled_slices = 0;
> +	icl_dbuf_slices_restore(dev_priv);
>  }
>  
>  static void icl_mbus_init(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
> index 1da04f3e0fb3..c5afb3129571 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_power.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_power.h
> @@ -311,8 +311,10 @@ intel_display_power_put_async(struct drm_i915_private *i915,
>  	for ((wf) = intel_display_power_get((i915), (domain)); (wf); \
>  	     intel_display_power_put_async((i915), (domain), (wf)), (wf) = 0)
>  
> +int intel_dbuf_max_slices(struct drm_i915_private *dev_priv);
>  void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
>  			    u8 req_slices);
> +void icl_dbuf_slices_restore(struct drm_i915_private *dev_priv);
>  
>  void chv_phy_powergate_lanes(struct intel_encoder *encoder,
>  			     bool override, unsigned int mask);
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 82e4e6bf08c3..0307aec2c928 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -51,6 +51,7 @@
>  #include "display/intel_bw.h"
>  #include "display/intel_cdclk.h"
>  #include "display/intel_display_types.h"
> +#include "display/intel_display_power.h"
>  #include "display/intel_dp.h"
>  #include "display/intel_fbdev.h"
>  #include "display/intel_hotplug.h"
> @@ -2588,6 +2589,10 @@ static int intel_runtime_resume(struct device *kdev)
>  	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
>  		ret = vlv_resume_prepare(dev_priv, true);
>  
> +	/* Weird hack to fix ICL hardware bug, as it resets DBUF slices reg */
> +	if (INTEL_GEN(dev_priv) == 11)
> +		icl_dbuf_slices_restore(dev_priv);

Has anyone tried this on EHL yet?  I'm wondering if this should be
IS_ICELAKE().

> +
>  	intel_uncore_runtime_resume(&dev_priv->uncore);
>  
>  	intel_runtime_pm_enable_interrupts(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7e0f67babe20..a396977c9c2d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -804,7 +804,7 @@ static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1,
>  }
>  
>  struct skl_ddb_allocation {
> -	u8 enabled_slices; /* GEN11 has configurable 2 slices */
> +	u8 enabled_slices;   /* Bitmask of currently enabled DBuf slices */
>  };
>  
>  struct skl_ddb_values {
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 1bb701d32a5d..39c3db4ef7da 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -614,7 +614,8 @@ static const struct intel_device_info intel_cherryview_info = {
>  	.has_gt_uc = 1, \
>  	.display.has_hdcp = 1, \
>  	.display.has_ipc = 1, \
> -	.ddb_size = 896
> +	.ddb_size = 896, \
> +	.supported_slices = 1
>  
>  #define SKL_PLATFORM \
>  	GEN9_FEATURES, \
> @@ -682,12 +683,14 @@ static const struct intel_device_info intel_broxton_info = {
>  	GEN9_LP_FEATURES,
>  	PLATFORM(INTEL_BROXTON),
>  	.ddb_size = 512,
> +	.supported_slices = 1,

If you put this in GEN9_LP_FEATURES it will take care of both BXT and
GLK.

>  };
>  
>  static const struct intel_device_info intel_geminilake_info = {
>  	GEN9_LP_FEATURES,
>  	PLATFORM(INTEL_GEMINILAKE),
>  	.ddb_size = 1024,
> +	.supported_slices = 1,
>  	GLK_COLORS,
>  };
>  
> @@ -737,6 +740,7 @@ static const struct intel_device_info intel_coffeelake_gt3_info = {
>  	GEN9_FEATURES, \
>  	GEN(10), \
>  	.ddb_size = 1024, \
> +	.supported_slices = 1, \
>  	.display.has_dsc = 1, \
>  	.has_coherent_ggtt = false, \
>  	GLK_COLORS
> @@ -773,6 +777,7 @@ static const struct intel_device_info intel_cannonlake_info = {
>  	}, \
>  	GEN(11), \
>  	.ddb_size = 2048, \
> +	.supported_slices = 2, \
>  	.has_logical_ring_elsq = 1, \
>  	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index a607ea520829..fba5731063d8 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7734,9 +7734,11 @@ enum {
>  #define DISP_ARB_CTL2	_MMIO(0x45004)
>  #define  DISP_DATA_PARTITION_5_6	(1 << 6)
>  #define  DISP_IPC_ENABLE		(1 << 3)
> -#define DBUF_CTL	_MMIO(0x45008)
> -#define DBUF_CTL_S1	_MMIO(0x45008)
> -#define DBUF_CTL_S2	_MMIO(0x44FE8)
> +#define DBUF_S1_BIT			BIT(0)
> +#define DBUF_S2_BIT			BIT(1)
> +#define DBUF_CTL			_MMIO(0x45008)
> +#define DBUF_CTL_S1			_MMIO(0x45008)
> +#define DBUF_CTL_S2			_MMIO(0x44FE8)
>  #define  DBUF_POWER_REQUEST		(1 << 31)
>  #define  DBUF_POWER_STATE		(1 << 30)
>  #define GEN7_MSG_CTL	_MMIO(0x45010)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 4bdf8a6cfb47..ba34e1a5c591 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -180,6 +180,7 @@ struct intel_device_info {
>  	} display;
>  
>  	u16 ddb_size; /* in blocks */
> +	u8 supported_slices; /* number of DBuf slices */

Since slice/subslice terminology is used on the GT side, it might be
worth naming this "dbuf_slices" instead to avoid any confusion.

>  
>  	/* Register offsets for the various display pipes and transcoders */
>  	int pipe_offsets[I915_MAX_TRANSCODERS];
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 2d389e437e87..b6627c845f1c 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3586,24 +3586,20 @@ bool ilk_disable_lp_wm(struct drm_device *dev)
>  	return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
>  }
>  
> -static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
> +static u8 intel_enabled_dbuf_slices(struct drm_i915_private *dev_priv)
>  {
> -	u8 enabled_slices;
> -
> -	/* Slice 1 will always be enabled */
> -	enabled_slices = 1;
> +	u8 enabled_slices = 0;
>  
>  	/* Gen prior to GEN11 have only one DBuf slice */
>  	if (INTEL_GEN(dev_priv) < 11)
> -		return enabled_slices;
> +		return DBUF_S1_BIT;
>  
> -	/*
> -	 * FIXME: for now we'll only ever use 1 slice; pretend that we have
> -	 * only that 1 slice enabled until we have a proper way for on-demand
> -	 * toggling of the second slice.
> -	 */
> -	if (0 && I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
> -		enabled_slices++;
> +	/* Check if second DBuf slice is enabled */
> +	if (I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE)
> +		enabled_slices |= DBUF_S1_BIT;
> +
> +	if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
> +		enabled_slices |= DBUF_S2_BIT;
>  
>  	return enabled_slices;
>  }
> @@ -3812,36 +3808,38 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
>  			      const int num_active,
>  			      struct skl_ddb_allocation *ddb)
>  {
> -	const struct drm_display_mode *adjusted_mode;
> -	u64 total_data_bw;
>  	u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
> -
>  	WARN_ON(ddb_size == 0);
>  
>  	if (INTEL_GEN(dev_priv) < 11)
>  		return ddb_size - 4; /* 4 blocks for bypass path allocation */
>  
> -	adjusted_mode = &crtc_state->hw.adjusted_mode;
> -	total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode);
> +	return ddb_size;
> +}
>  
> -	/*
> -	 * 12GB/s is maximum BW supported by single DBuf slice.
> -	 *
> -	 * FIXME dbuf slice code is broken:
> -	 * - must wait for planes to stop using the slice before powering it off
> -	 * - plane straddling both slices is illegal in multi-pipe scenarios
> -	 * - should validate we stay within the hw bandwidth limits
> -	 */
> -	if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
> -		ddb->enabled_slices = 2;
> -	} else {
> -		ddb->enabled_slices = 1;
> -		ddb_size /= 2;
> -	}
> +/*
> + * Calculate initial DBuf slice offset, based on slice size
> + * and mask(i.e if slice size is 1024 and second slice is enabled
> + * offset would be 1024)
> + */
> +static u32 skl_get_first_dbuf_slice_offset(u32 dbuf_slice_mask,
> +					   u32 slice_size, u32 ddb_size)
> +{
> +	u32 offset = 0;
>  
> -	return ddb_size;
> +	if (!dbuf_slice_mask)
> +		return 0;
> +
> +	offset = (ffs(dbuf_slice_mask) - 1) * slice_size;
> +
> +	WARN_ON(offset >= ddb_size);
> +	return offset;
>  }
>  
> +static u32 i915_get_allowed_dbuf_mask(struct drm_i915_private *dev_priv,
> +				      int pipe, u32 active_pipes,
> +				      const struct intel_crtc_state *crtc_state);
> +
>  static void
>  skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  				   const struct intel_crtc_state *crtc_state,
> @@ -3857,7 +3855,14 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  	u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
>  	enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
>  	u16 ddb_size;
> +	u32 ddb_range_size;
>  	u32 i;
> +	u32 dbuf_slice_mask;
> +	u32 active_pipes;
> +	u32 offset;
> +	u32 slice_size;
> +	u32 total_slice_mask;
> +	u32 start, end;
>  
>  	if (WARN_ON(!state) || !crtc_state->hw.active) {
>  		alloc->start = 0;
> @@ -3866,14 +3871,23 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  		return;
>  	}
>  
> -	if (intel_state->active_pipe_changes)
> +	if (intel_state->active_pipe_changes) {
>  		*num_active = hweight8(intel_state->active_pipes);
> -	else
> +		active_pipes = intel_state->active_pipes;
> +	} else {
>  		*num_active = hweight8(dev_priv->active_pipes);
> +		active_pipes = dev_priv->active_pipes;
> +	}
>  
>  	ddb_size = intel_get_ddb_size(dev_priv, crtc_state, total_data_rate,
>  				      *num_active, ddb);
>  
> +	DRM_DEBUG_KMS("Got total ddb size %d\n", ddb_size);
> +
> +	slice_size = ddb_size / INTEL_INFO(dev_priv)->supported_slices;
> +
> +	DRM_DEBUG_KMS("Got DBuf slice size %d\n", slice_size);
> +
>  	/*
>  	 * If the state doesn't change the active CRTC's or there is no
>  	 * modeset request, then there's no need to recalculate;
> @@ -3891,20 +3905,70 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  		return;
>  	}
>  
> +	/*
> +	 * Get allowed DBuf slices for correspondent pipe and platform.
> +	 */
> +	dbuf_slice_mask = i915_get_allowed_dbuf_mask(dev_priv, for_pipe,
> +						     active_pipes, crtc_state);
> +
> +	DRM_DEBUG_KMS("DBuf slice mask %x pipe %d active pipes %x\n",
> +		      dbuf_slice_mask,
> +		      for_pipe, active_pipes);
> +
> +	/*
> +	 * Figure out at which DBuf slice we start, i.e if we start at Dbuf S2
> +	 * and slice size is 1024, the offset would be 1024
> +	 */
> +	offset = skl_get_first_dbuf_slice_offset(dbuf_slice_mask,
> +						 slice_size, ddb_size);
> +
> +	/*
> +	 * Figure out total size of allowed DBuf slices, which is basically
> +	 * a number of allowed slices for that pipe multiplied by slice size.
> +	 * Inside of this
> +	 * range ddb entries are still allocated in proportion to display width.
> +	 */
> +	ddb_range_size = hweight8(dbuf_slice_mask) * slice_size;
> +
>  	/*
>  	 * Watermark/ddb requirement highly depends upon width of the
>  	 * framebuffer, So instead of allocating DDB equally among pipes
>  	 * distribute DDB based on resolution/width of the display.
>  	 */
> +	total_slice_mask = dbuf_slice_mask;
>  	for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
>  		const struct drm_display_mode *adjusted_mode =
>  			&crtc_state->hw.adjusted_mode;
>  		enum pipe pipe = crtc->pipe;
>  		int hdisplay, vdisplay;
> +		u32 pipe_dbuf_slice_mask = \
> +			i915_get_allowed_dbuf_mask(dev_priv,
> +						pipe,
> +						active_pipes,
> +						crtc_state);
>  
>  		if (!crtc_state->hw.enable)
>  			continue;
>  
> +		/*
> +		 * According to BSpec pipe can share one dbuf slice with another
> +		 * pipes or pipe can use multiple dbufs, in both cases we
> +		 * account for other pipes only if they have exactly same mask.
> +		 * However we need to account how many slices we should enable
> +		 * in total.
> +		 */
> +		total_slice_mask |= pipe_dbuf_slice_mask;
> +
> +		/*
> +		 * Do not account pipes using other slice sets
> +		 * luckily as of current BSpec slice sets do not partially
> +		 * intersect(pipes share either same one slice or same slice set
> +		 * i.e no partial intersection), so it is enough to check for
> +		 * equality for now.
> +		 */
> +		if (dbuf_slice_mask != pipe_dbuf_slice_mask)
> +			continue;
> +
>  		drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
>  		total_width += hdisplay;
>  
> @@ -3914,8 +3978,19 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  			pipe_width = hdisplay;
>  	}
>  
> -	alloc->start = ddb_size * width_before_pipe / total_width;
> -	alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
> +	ddb->enabled_slices = total_slice_mask;
> +
> +	start = ddb_range_size * width_before_pipe / total_width;
> +	end = ddb_range_size * (width_before_pipe + pipe_width) / total_width;
> +
> +	alloc->start = offset + start;
> +	alloc->end = offset + end;
> +
> +	DRM_DEBUG_KMS("Pipe %d ddb %d-%d\n", for_pipe,
> +		      alloc->start, alloc->end);
> +	DRM_DEBUG_KMS("Enabled ddb slices mask %x num supported %d\n",
> +		      ddb->enabled_slices,
> +		      INTEL_INFO(dev_priv)->supported_slices);
>  }
>  
>  static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
> @@ -4036,7 +4111,8 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
>  void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
>  			  struct skl_ddb_allocation *ddb /* out */)
>  {
> -	ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
> +	ddb->enabled_slices = intel_enabled_dbuf_slices(dev_priv);
> +	DRM_DEBUG_KMS("Got hw dbuf slices mask %x\n", ddb->enabled_slices);
>  }
>  
>  /*
> @@ -4164,6 +4240,238 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
>  	return total_data_rate;
>  }
>  
> +uint_fixed_16_16_t
> +skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)

Since this is only used on gen11+, should it be named
icl_pipe_downscale_amount?

> +{
> +	u32 src_w, src_h, dst_w, dst_h;
> +	uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
> +	uint_fixed_16_16_t downscale_h, downscale_w;
> +	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
> +
> +	src_w = crtc_state->pipe_src_w;
> +	src_h = crtc_state->pipe_src_h;
> +	dst_w = adjusted_mode->crtc_hdisplay;
> +	dst_h = adjusted_mode->crtc_vdisplay;
> +
> +	fp_w_ratio = div_fixed16(src_w, dst_w);
> +	fp_h_ratio = div_fixed16(src_h, dst_h);
> +	downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
> +	downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
> +
> +	return mul_fixed16(downscale_w, downscale_h);
> +}
> +
> +uint_fixed_16_16_t
> +icl_get_pipe_ratio(const struct intel_crtc_state *crtc_state)
> +{
> +	struct drm_plane *plane;
> +	const struct drm_plane_state *drm_plane_state;
> +	uint_fixed_16_16_t pipe_downscale, pipe_ratio;
> +	uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
> +
> +	if (!crtc_state->uapi.enable)
> +		return max_downscale;
> +
> +	drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->uapi) {
> +		uint_fixed_16_16_t plane_downscale;
> +		const struct intel_plane_state *plane_state =
> +			to_intel_plane_state(drm_plane_state);
> +
> +		if (!intel_wm_plane_visible(crtc_state, plane_state))
> +			continue;
> +
> +		plane_downscale = skl_plane_downscale_amount(crtc_state, plane_state);
> +
> +		DRM_DEBUG_KMS("Plane %d downscale amount %d.%d\n",
> +			      to_intel_plane(plane)->id,
> +			      plane_downscale.val >> 16,
> +			      plane_downscale.val & 0xffff);
> +
> +		max_downscale = max_fixed16(plane_downscale, max_downscale);
> +	}
> +
> +
> +	pipe_downscale = skl_pipe_downscale_amount(crtc_state);
> +
> +	DRM_DEBUG_KMS("Pipe %d downscale amount %d.%d\n",
> +		       crtc->pipe, pipe_downscale.val >> 16,
> +		       pipe_downscale.val & 0xffff);
> +
> +	pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
> +
> +	/* Convert result to percentage */
> +	pipe_ratio = u32_to_fixed16(100 * div_round_up_u32_fixed16(1, pipe_downscale));

I think the math here is correct, but it gets a bit confusing since we
work with different terms than the bspec steps (e.g., using max plane
downscale rather than minimum plane ratio and such)...maybe a few extra
comments in this function would help reduce confusion?

> +
> +	DRM_DEBUG_KMS("Pipe %d ratio %d.%d\n", crtc->pipe,
> +		      pipe_ratio.val >> 16, pipe_ratio.val & 0xffff);
> +
> +	return pipe_ratio;
> +}
> +
> +struct dbuf_slice_conf_entry {
> +	u32 dbuf_mask[I915_MAX_PIPES];
> +	u32 active_pipes;
> +};
> +
> +
> +#define ICL_PIPE_A_DBUF_SLICES(DBuf1)  \
> +	{ { DBuf1, 0, 0, 0 }, BIT(PIPE_A) }
> +#define ICL_PIPE_B_DBUF_SLICES(DBuf1)  \
> +	{ { 0, DBuf1, 0, 0 }, BIT(PIPE_B) }
> +#define ICL_PIPE_C_DBUF_SLICES(DBuf1)  \
> +	{ { 0, 0, DBuf1, 0 }, BIT(PIPE_C) }
> +#define ICL_PIPE_D_DBUF_SLICES(DBuf1)  \
> +	{ { 0, 0, 0, DBuf1 }, BIT(PIPE_D) }

We'd probably want to call this (and the others that mention pipe D)
TGL_* since they aren't actually relevant until gen12.

> +#define ICL_PIPE_AB_DBUF_SLICES(DBuf1, DBuf2)   \
> +	{ { DBuf1, DBuf2, 0, 0 }, BIT(PIPE_A) | BIT(PIPE_B) }
> +#define ICL_PIPE_BC_DBUF_SLICES(DBuf1, DBuf2)   \
> +	{ { 0, DBuf1, DBuf2, 0 }, BIT(PIPE_B) | BIT(PIPE_C) }
> +#define ICL_PIPE_BD_DBUF_SLICES(DBuf1, DBuf2)   \
> +	{ { 0, DBuf1, 0, DBuf2 }, BIT(PIPE_B) | BIT(PIPE_D) }
> +#define ICL_PIPE_AC_DBUF_SLICES(DBuf1, DBuf2)   \
> +	{ { DBuf1, 0, DBuf2, 0 }, BIT(PIPE_A) | BIT(PIPE_C) }
> +#define ICL_PIPE_AD_DBUF_SLICES(DBuf1, DBuf2)   \
> +	{ { DBuf1, 0, 0, DBuf2 }, BIT(PIPE_A) | BIT(PIPE_D) }
> +#define ICL_PIPE_CD_DBUF_SLICES(DBuf1, DBuf2)   \
> +	{ { 0, 0, DBuf1, DBuf2 }, BIT(PIPE_C) | BIT(PIPE_D) }
> +#define ICL_PIPE_ABC_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> +	{ { DBuf1, DBuf2, DBuf3, 0 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) }
> +#define ICL_PIPE_ACD_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> +	{ { DBuf1, 0, DBuf2, DBuf3 }, BIT(PIPE_A) | BIT(PIPE_C) | BIT(PIPE_D) }
> +#define ICL_PIPE_BCD_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> +	{ { 0, DBuf1, DBuf2, DBuf3 }, BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D) }
> +#define ICL_PIPE_ABD_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> +	{ { DBuf1, DBuf2, 0, DBuf3 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_D) }
> +#define ICL_PIPE_ABC_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> +	{ { DBuf1, DBuf2, DBuf3, 0 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) }
> +#define ICL_PIPE_ABCD_DBUF_SLICES(DBuf1, DBuf2, DBuf3, DBuf4)  \
> +	{ { DBuf1, DBuf2, DBuf3, DBuf4 }, BIT(PIPE_A) | BIT(PIPE_B) | \
> +					  BIT(PIPE_C) | BIT(PIPE_D) }
> +
> +/*
> + * Table taken from Bspec 12716
> + * Pipes do have some preferred DBuf slice affinity,
> + * plus there are some hardcoded requirements on how
> + * those should be distributed for multipipe scenarios.
> + * For more DBuf slices algorithm can get even more messy
> + * and less readable, so decided to use a table almost
> + * as is from BSpec itself - that way it is at least easier
> + * to compare, change and check.
> + */
> +struct dbuf_slice_conf_entry icl_allowed_dbufs[] = {
> +{ { 0, 0, 0, 0 }, 0 },
> +ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),  /* pipe ratio < 88.8 */
> +ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT),                /* for pipe ratio >= 88.8 */
> +ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),  /* pipe ratio < 88.8 */
> +ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT),                /* for pipe ratio >= 88.8 */
> +ICL_PIPE_C_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),  /* pipe ratio < 88.8 */
> +ICL_PIPE_C_DBUF_SLICES(DBUF_S2_BIT),                /* for pipe ratio >= 88.8 */
> +ICL_PIPE_AB_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_AC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_BC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ABC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
> +};
> +
> +/*
> + * Table taken from Bspec 49255
> + * Pipes do have some preferred DBuf slice affinity,
> + * plus there are some hardcoded requirements on how
> + * those should be distributed for multipipe scenarios.
> + * For more DBuf slices algorithm can get even more messy
> + * and less readable, so decided to use a table almost
> + * as is from BSpec itself - that way it is at least easier
> + * to compare, change and check.
> + */
> +struct dbuf_slice_conf_entry tgl_allowed_dbufs[] = {
> +{ { 0, 0, 0, 0 }, 0 },
> +ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> +ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> +ICL_PIPE_C_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> +ICL_PIPE_D_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> +ICL_PIPE_AB_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),

The column order in the bspec is bizarre (D, C, A, B), but isn't this
one backwards?  Pipe A gets S2, pipe B gets S1?  Or do we think this is
a bspec mistake (it does seem unusual)?

> +ICL_PIPE_AC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_BC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_AD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_BD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_CD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ABD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ABC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ACD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
> +ICL_PIPE_BCD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ABCD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
> +};
> +
> +/*
> + * This function finds an entry with same enabled pipe configuration and
> + * returns correspondent DBuf slice mask as stated in BSpec for particular
> + * platform.
> + */
> +static u32 icl_get_allowed_dbuf_mask(int pipe,
> +				     u32 active_pipes,
> +				     const struct intel_crtc_state *crtc_state)
> +{
> +	int i;
> +	uint_fixed_16_16_t pipe_ratio;
> +
> +	/*
> +	 * Calculate pipe ratio as stated in BSpec 28692
> +	 */
> +	pipe_ratio = icl_get_pipe_ratio(crtc_state);
> +
> +	for (i = 0; i < ARRAY_SIZE(icl_allowed_dbufs); i++) {
> +		if (icl_allowed_dbufs[i].active_pipes == active_pipes) {
> +			if (hweight32(active_pipes) == 1) {
> +				/*
> +				 * According to BSpec 12716: if ratio >= 88.8
> +				 * always use single dbuf slice.
> +				 */
> +				if (pipe_ratio.val >= div_fixed16(888, 10).val)
> +					++i;
> +			}
> +			return icl_allowed_dbufs[i].dbuf_mask[pipe];
> +		}
> +	}
> +	return 0;
> +}
> +
> +/*
> + * This function finds an entry with same enabled pipe configuration and
> + * returns correspondent DBuf slice mask as stated in BSpec for particular
> + * platform.
> + */
> +static u32 tgl_get_allowed_dbuf_mask(int pipe,
> +				     u32 active_pipes,
> +				     const struct intel_crtc_state *crtc_state)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(tgl_allowed_dbufs); i++) {
> +		if (tgl_allowed_dbufs[i].active_pipes == active_pipes)
> +			return tgl_allowed_dbufs[i].dbuf_mask[pipe];
> +	}
> +	return 0;
> +}
> +
> +u32 i915_get_allowed_dbuf_mask(struct drm_i915_private *dev_priv,
> +				      int pipe, u32 active_pipes,
> +				      const struct intel_crtc_state *crtc_state)
> +{
> +	if (IS_GEN(dev_priv, 11))
> +		return icl_get_allowed_dbuf_mask(pipe,
> +						 active_pipes,
> +						 crtc_state);
> +	else if (IS_GEN(dev_priv, 12))
> +		return tgl_get_allowed_dbuf_mask(pipe,
> +						 active_pipes,
> +						 crtc_state);
> +	/*
> +	 * For anything else just return one slice yet.
> +	 * Should be extended for other platforms.
> +	 */
> +	return DBUF_S1_BIT;
> +}
> +
>  static u64
>  icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
>  				 u64 *plane_data_rate)
> -- 
> 2.17.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation
(916) 356-2795