[Intel-gfx] [PATCH v6] drm/i915: Enable second dbuf slice for ICL and TGL
Matt Roper
matthew.d.roper at intel.com
Wed Nov 13 21:15:06 UTC 2019
On Fri, Nov 08, 2019 at 03:45:00PM +0200, Stanislav Lisovskiy wrote:
> Also implemented algorithm for choosing DBuf slice configuration
> based on active pipes, pipe ratio as stated in BSpec 12716.
>
> Now pipe allocation still stays proportional to pipe width as before,
> however within allowed DBuf slice for this particular configuration.
>
> v2: Remove unneeded check from commit as ddb enabled slices might
> now differ from hw state.
>
> v3: - Added new field "supported_slices" to ddb to separate max
> supported slices vs currently enabled, to avoid confusion.
> - Removed obsolete comments and code related to DBuf(Matthew Roper).
> - Some code style and long line removal(Matthew Roper).
> - Added WARN_ON to new ddb range offset calc function(Matthew Roper).
> - Removed platform specific call to calc pipe ratio from ddb
> allocation function and fixed the return value(Matthew Roper)
> - Refactored DBUF slice allocation table to improve readability
> - Added DBUF slice allocation for TGL as well.
> - ICL(however not TGL) seems to voluntarily enable second DBuf slice
> after pm suspend/resume causing a mismatch failure, because we
> update DBuf slices only if we do a modeset, however this check
> is done always. Fixed it to be done only when modeset for ICL.
>
> v4: - Now enabled slices is not actually a number, but a bitmask,
> because we might need to enable second slice only and number
> of slices would still 1 and that current code doesn't allow.
> - Remove redundant duplicate code to have some unified way of
> enabling dbuf slices instead of hardcoding.
>
> v5: - Fix failing gen9_assert_dbuf_enabled as it was naively thinking
> that we have only one DBUF_CTL slice. Now another version for
> gen11 will check both slices as only second one can be enabled,
> to keep CI happy.
>
> v6: - Removed enabled dbuf assertion completely. Previous code
> was keeping dbuf enabled, even(!) when _dbuf_disable is called.
> Currently we enable/disable dbuf slices based on requrement
> so no point in asserting this here.
> - Removed unnecessary modeset check from verify_wm_state(Matthew Roper)
> - Slices intersection after union is same as final result(Matthew Roper)
> - Moved DBuf slices to intel_device_info(Matthew Roper)
> - Some macros added(Matthew Roper)
> - ddb range is now always less or equal to ddb size - no need for additional
> checks(previously needed as we had some bandwidth checks in there which
> could "suddenly" return ddb_size smaller than it is.
>
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy at intel.com>
> Cc: Matthew Roper <matthew.d.roper at intel.com>
> Cc: Ville Syrjälä <ville.syrjala at intel.com>
> Cc: James Ausmus <james.ausmus at intel.com>
A handful of comments inline below but most of them are cosmetic, so
I'll leave it up to you whether you agree or not. The only one I see
that doesn't appear to match the spec is on your TGL pipe A+B dbuf
assignment.
Otherwise,
Reviewed-by: Matt Roper <matthew.d.roper at intel.com>
> ---
> drivers/gpu/drm/i915/display/intel_display.c | 26 +-
> .../drm/i915/display/intel_display_power.c | 98 ++---
> .../drm/i915/display/intel_display_power.h | 2 +
> drivers/gpu/drm/i915/i915_drv.c | 5 +
> drivers/gpu/drm/i915/i915_drv.h | 2 +-
> drivers/gpu/drm/i915/i915_pci.c | 7 +-
> drivers/gpu/drm/i915/i915_reg.h | 8 +-
> drivers/gpu/drm/i915/intel_device_info.h | 1 +
> drivers/gpu/drm/i915/intel_pm.c | 384 ++++++++++++++++--
> 9 files changed, 429 insertions(+), 104 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> index 876fc25968bf..bd7aff675198 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -13387,7 +13387,7 @@ static void verify_wm_state(struct intel_crtc *crtc,
>
> if (INTEL_GEN(dev_priv) >= 11 &&
> hw->ddb.enabled_slices != sw_ddb->enabled_slices)
> - DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n",
> + DRM_ERROR("mismatch in DBUF Slices (expected %x, got %x)\n",
> sw_ddb->enabled_slices,
> hw->ddb.enabled_slices);
>
> @@ -14614,15 +14614,24 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
> u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
> u8 required_slices = state->wm_results.ddb.enabled_slices;
> struct skl_ddb_entry entries[I915_MAX_PIPES] = {};
> + u8 slices_union = hw_enabled_slices | required_slices;
> + u8 slices_intersection = required_slices;
We can probably just drop this variable and use required_slices directly
in the one place below.
>
> for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i)
> /* ignore allocations for crtc's that have been turned off. */
> if (new_crtc_state->hw.active)
> entries[i] = old_crtc_state->wm.skl.ddb;
>
> - /* If 2nd DBuf slice required, enable it here */
> - if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices)
> - icl_dbuf_slices_update(dev_priv, required_slices);
> + DRM_DEBUG_KMS("DBuf req slices %d hw slices %d\n",
> + required_slices, hw_enabled_slices);
> +
> + /*
> + * If some other DBuf slice required, enable it here,
> + * as here we only add more slices, but not remove to prevent
> + * issues if somebody still uses those.
> + */
> + if (INTEL_GEN(dev_priv) >= 11 && required_slices != hw_enabled_slices)
> + icl_dbuf_slices_update(dev_priv, slices_union);
>
> /*
> * Whenever the number of active pipes changes, we need to make sure we
> @@ -14681,9 +14690,12 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
> }
> } while (progress);
>
> - /* If 2nd DBuf slice is no more required disable it */
> - if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices)
> - icl_dbuf_slices_update(dev_priv, required_slices);
> + /*
> + * If some other DBuf slice is not needed, disable it here,
> + * as here we only remove slices, which are not needed anymore.
> + */
> + if (INTEL_GEN(dev_priv) >= 11 && required_slices != hw_enabled_slices)
> + icl_dbuf_slices_update(dev_priv, slices_intersection);
> }
>
> static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
> index ce1b64f4dd44..f406ee5329b0 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_power.c
> +++ b/drivers/gpu/drm/i915/display/intel_display_power.c
> @@ -1031,15 +1031,6 @@ static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv,
> (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0);
> }
>
> -static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv)
> -{
> - u32 tmp = I915_READ(DBUF_CTL);
> -
> - WARN((tmp & (DBUF_POWER_STATE | DBUF_POWER_REQUEST)) !=
> - (DBUF_POWER_STATE | DBUF_POWER_REQUEST),
> - "Unexpected DBuf power power state (0x%08x)\n", tmp);
> -}
> -
> static void gen9_disable_dc_states(struct drm_i915_private *dev_priv)
> {
> struct intel_cdclk_state cdclk_state = {};
> @@ -1055,8 +1046,6 @@ static void gen9_disable_dc_states(struct drm_i915_private *dev_priv)
> /* Can't read out voltage_level so can't use intel_cdclk_changed() */
> WARN_ON(intel_cdclk_needs_modeset(&dev_priv->cdclk.hw, &cdclk_state));
>
> - gen9_assert_dbuf_enabled(dev_priv);
> -
> if (IS_GEN9_LP(dev_priv))
> bxt_verify_ddi_phy_power_wells(dev_priv);
>
> @@ -4254,31 +4243,51 @@ static void gen9_dbuf_disable(struct drm_i915_private *dev_priv)
> intel_dbuf_slice_set(dev_priv, DBUF_CTL, false);
> }
>
> -static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
> +int intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
> {
> - if (INTEL_GEN(dev_priv) < 11)
> - return 1;
> - return 2;
> + return INTEL_INFO(dev_priv)->supported_slices;
> +}
I'd be inclined to just drop this function and directly access the
device info at the callsites of this function.
> +
> +void icl_dbuf_slices_restore(struct drm_i915_private *dev_priv)
> +{
> + const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
> +
> + icl_dbuf_slices_update(dev_priv, hw_enabled_slices);
> }
Minor nitpick: you're using this for a lot more than just "restoring"
now. It's more a matter of you committing software state into the
hardware. A name like icl_dbuf_slices_commit or icl_program_dbuf_slices
might be slightly more intuitive?
>
> void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
> u8 req_slices)
> {
> - const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
> - bool ret;
> + bool ret = true;
> + int i;
> + int max_slices = intel_dbuf_max_slices(dev_priv);
>
> - if (req_slices > intel_dbuf_max_slices(dev_priv)) {
> + if (hweight8(req_slices) > intel_dbuf_max_slices(dev_priv)) {
> DRM_ERROR("Invalid number of dbuf slices requested\n");
> return;
> }
>
> - if (req_slices == hw_enabled_slices || req_slices == 0)
> - return;
> + DRM_DEBUG_KMS("Updating dbuf slices to %x\n", req_slices);
>
> - if (req_slices > hw_enabled_slices)
> - ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true);
> - else
> - ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, false);
> + for (i = 0; i < max_slices; i++) {
> + int slice_bit = BIT(i);
> + bool slice_set = (slice_bit & req_slices) != 0;
> +
> + switch (slice_bit) {
> + case DBUF_S1_BIT:
> + ret = ret && intel_dbuf_slice_set(dev_priv,
> + DBUF_CTL_S1,
> + slice_set);
> + break;
> + case DBUF_S2_BIT:
> + ret = ret && intel_dbuf_slice_set(dev_priv,
> + DBUF_CTL_S2,
> + slice_set);
> + break;
> + default:
> + MISSING_CASE(slice_bit);
> + }
> + }
>
> if (ret)
> dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices;
> @@ -4286,40 +4295,21 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
>
> static void icl_dbuf_enable(struct drm_i915_private *dev_priv)
> {
> - I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST);
> - I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) | DBUF_POWER_REQUEST);
> - POSTING_READ(DBUF_CTL_S2);
> -
> - udelay(10);
> -
> - if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
> - !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
> - DRM_ERROR("DBuf power enable timeout\n");
> - else
> - /*
> - * FIXME: for now pretend that we only have 1 slice, see
> - * intel_enabled_dbuf_slices_num().
> - */
> - dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
> + /*
> + * Just power up 1 slice, we will
> + * figure out later which slices we have and what we need.
> + */
> + dev_priv->wm.skl_hw.ddb.enabled_slices = DBUF_S1_BIT;
> + icl_dbuf_slices_restore(dev_priv);
> }
>
> static void icl_dbuf_disable(struct drm_i915_private *dev_priv)
> {
> - I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) & ~DBUF_POWER_REQUEST);
> - I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) & ~DBUF_POWER_REQUEST);
> - POSTING_READ(DBUF_CTL_S2);
> -
> - udelay(10);
> -
> - if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
> - (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
> - DRM_ERROR("DBuf power disable timeout!\n");
> - else
> - /*
> - * FIXME: for now pretend that the first slice is always
> - * enabled, see intel_enabled_dbuf_slices_num().
> - */
> - dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
> + /*
> + * Disable all slices
> + */
> + dev_priv->wm.skl_hw.ddb.enabled_slices = 0;
> + icl_dbuf_slices_restore(dev_priv);
> }
>
> static void icl_mbus_init(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
> index 1da04f3e0fb3..c5afb3129571 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_power.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_power.h
> @@ -311,8 +311,10 @@ intel_display_power_put_async(struct drm_i915_private *i915,
> for ((wf) = intel_display_power_get((i915), (domain)); (wf); \
> intel_display_power_put_async((i915), (domain), (wf)), (wf) = 0)
>
> +int intel_dbuf_max_slices(struct drm_i915_private *dev_priv);
> void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
> u8 req_slices);
> +void icl_dbuf_slices_restore(struct drm_i915_private *dev_priv);
>
> void chv_phy_powergate_lanes(struct intel_encoder *encoder,
> bool override, unsigned int mask);
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 82e4e6bf08c3..0307aec2c928 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -51,6 +51,7 @@
> #include "display/intel_bw.h"
> #include "display/intel_cdclk.h"
> #include "display/intel_display_types.h"
> +#include "display/intel_display_power.h"
> #include "display/intel_dp.h"
> #include "display/intel_fbdev.h"
> #include "display/intel_hotplug.h"
> @@ -2588,6 +2589,10 @@ static int intel_runtime_resume(struct device *kdev)
> if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> ret = vlv_resume_prepare(dev_priv, true);
>
> + /* Weird hack to fix ICL hardware bug, as it resets DBUF slices reg */
> + if (INTEL_GEN(dev_priv) == 11)
> + icl_dbuf_slices_restore(dev_priv);
Has anyone tried this on EHL yet? I'm wondering if this should be
IS_ICELAKE().
> +
> intel_uncore_runtime_resume(&dev_priv->uncore);
>
> intel_runtime_pm_enable_interrupts(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7e0f67babe20..a396977c9c2d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -804,7 +804,7 @@ static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1,
> }
>
> struct skl_ddb_allocation {
> - u8 enabled_slices; /* GEN11 has configurable 2 slices */
> + u8 enabled_slices; /* Bitmask of currently enabled DBuf slices */
> };
>
> struct skl_ddb_values {
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 1bb701d32a5d..39c3db4ef7da 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -614,7 +614,8 @@ static const struct intel_device_info intel_cherryview_info = {
> .has_gt_uc = 1, \
> .display.has_hdcp = 1, \
> .display.has_ipc = 1, \
> - .ddb_size = 896
> + .ddb_size = 896, \
> + .supported_slices = 1
>
> #define SKL_PLATFORM \
> GEN9_FEATURES, \
> @@ -682,12 +683,14 @@ static const struct intel_device_info intel_broxton_info = {
> GEN9_LP_FEATURES,
> PLATFORM(INTEL_BROXTON),
> .ddb_size = 512,
> + .supported_slices = 1,
If you put this in GEN9_LP_FEATURES it will take care of both BXT and
GLK.
> };
>
> static const struct intel_device_info intel_geminilake_info = {
> GEN9_LP_FEATURES,
> PLATFORM(INTEL_GEMINILAKE),
> .ddb_size = 1024,
> + .supported_slices = 1,
> GLK_COLORS,
> };
>
> @@ -737,6 +740,7 @@ static const struct intel_device_info intel_coffeelake_gt3_info = {
> GEN9_FEATURES, \
> GEN(10), \
> .ddb_size = 1024, \
> + .supported_slices = 1, \
> .display.has_dsc = 1, \
> .has_coherent_ggtt = false, \
> GLK_COLORS
> @@ -773,6 +777,7 @@ static const struct intel_device_info intel_cannonlake_info = {
> }, \
> GEN(11), \
> .ddb_size = 2048, \
> + .supported_slices = 2, \
> .has_logical_ring_elsq = 1, \
> .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index a607ea520829..fba5731063d8 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7734,9 +7734,11 @@ enum {
> #define DISP_ARB_CTL2 _MMIO(0x45004)
> #define DISP_DATA_PARTITION_5_6 (1 << 6)
> #define DISP_IPC_ENABLE (1 << 3)
> -#define DBUF_CTL _MMIO(0x45008)
> -#define DBUF_CTL_S1 _MMIO(0x45008)
> -#define DBUF_CTL_S2 _MMIO(0x44FE8)
> +#define DBUF_S1_BIT BIT(0)
> +#define DBUF_S2_BIT BIT(1)
> +#define DBUF_CTL _MMIO(0x45008)
> +#define DBUF_CTL_S1 _MMIO(0x45008)
> +#define DBUF_CTL_S2 _MMIO(0x44FE8)
> #define DBUF_POWER_REQUEST (1 << 31)
> #define DBUF_POWER_STATE (1 << 30)
> #define GEN7_MSG_CTL _MMIO(0x45010)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 4bdf8a6cfb47..ba34e1a5c591 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -180,6 +180,7 @@ struct intel_device_info {
> } display;
>
> u16 ddb_size; /* in blocks */
> + u8 supported_slices; /* number of DBuf slices */
Since slice/subslice terminology is used on the GT side, it might be
worth naming this "dbuf_slices" instead to avoid any confusion.
>
> /* Register offsets for the various display pipes and transcoders */
> int pipe_offsets[I915_MAX_TRANSCODERS];
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 2d389e437e87..b6627c845f1c 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3586,24 +3586,20 @@ bool ilk_disable_lp_wm(struct drm_device *dev)
> return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
> }
>
> -static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
> +static u8 intel_enabled_dbuf_slices(struct drm_i915_private *dev_priv)
> {
> - u8 enabled_slices;
> -
> - /* Slice 1 will always be enabled */
> - enabled_slices = 1;
> + u8 enabled_slices = 0;
>
> /* Gen prior to GEN11 have only one DBuf slice */
> if (INTEL_GEN(dev_priv) < 11)
> - return enabled_slices;
> + return DBUF_S1_BIT;
>
> - /*
> - * FIXME: for now we'll only ever use 1 slice; pretend that we have
> - * only that 1 slice enabled until we have a proper way for on-demand
> - * toggling of the second slice.
> - */
> - if (0 && I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
> - enabled_slices++;
> + /* Check if second DBuf slice is enabled */
> + if (I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE)
> + enabled_slices |= DBUF_S1_BIT;
> +
> + if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
> + enabled_slices |= DBUF_S2_BIT;
>
> return enabled_slices;
> }
> @@ -3812,36 +3808,38 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
> const int num_active,
> struct skl_ddb_allocation *ddb)
> {
> - const struct drm_display_mode *adjusted_mode;
> - u64 total_data_bw;
> u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
> -
> WARN_ON(ddb_size == 0);
>
> if (INTEL_GEN(dev_priv) < 11)
> return ddb_size - 4; /* 4 blocks for bypass path allocation */
>
> - adjusted_mode = &crtc_state->hw.adjusted_mode;
> - total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode);
> + return ddb_size;
> +}
>
> - /*
> - * 12GB/s is maximum BW supported by single DBuf slice.
> - *
> - * FIXME dbuf slice code is broken:
> - * - must wait for planes to stop using the slice before powering it off
> - * - plane straddling both slices is illegal in multi-pipe scenarios
> - * - should validate we stay within the hw bandwidth limits
> - */
> - if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
> - ddb->enabled_slices = 2;
> - } else {
> - ddb->enabled_slices = 1;
> - ddb_size /= 2;
> - }
> +/*
> + * Calculate initial DBuf slice offset, based on slice size
> + * and mask(i.e if slice size is 1024 and second slice is enabled
> + * offset would be 1024)
> + */
> +static u32 skl_get_first_dbuf_slice_offset(u32 dbuf_slice_mask,
> + u32 slice_size, u32 ddb_size)
> +{
> + u32 offset = 0;
>
> - return ddb_size;
> + if (!dbuf_slice_mask)
> + return 0;
> +
> + offset = (ffs(dbuf_slice_mask) - 1) * slice_size;
> +
> + WARN_ON(offset >= ddb_size);
> + return offset;
> }
>
> +static u32 i915_get_allowed_dbuf_mask(struct drm_i915_private *dev_priv,
> + int pipe, u32 active_pipes,
> + const struct intel_crtc_state *crtc_state);
> +
> static void
> skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
> const struct intel_crtc_state *crtc_state,
> @@ -3857,7 +3855,14 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
> u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
> enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
> u16 ddb_size;
> + u32 ddb_range_size;
> u32 i;
> + u32 dbuf_slice_mask;
> + u32 active_pipes;
> + u32 offset;
> + u32 slice_size;
> + u32 total_slice_mask;
> + u32 start, end;
>
> if (WARN_ON(!state) || !crtc_state->hw.active) {
> alloc->start = 0;
> @@ -3866,14 +3871,23 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
> return;
> }
>
> - if (intel_state->active_pipe_changes)
> + if (intel_state->active_pipe_changes) {
> *num_active = hweight8(intel_state->active_pipes);
> - else
> + active_pipes = intel_state->active_pipes;
> + } else {
> *num_active = hweight8(dev_priv->active_pipes);
> + active_pipes = dev_priv->active_pipes;
> + }
>
> ddb_size = intel_get_ddb_size(dev_priv, crtc_state, total_data_rate,
> *num_active, ddb);
>
> + DRM_DEBUG_KMS("Got total ddb size %d\n", ddb_size);
> +
> + slice_size = ddb_size / INTEL_INFO(dev_priv)->supported_slices;
> +
> + DRM_DEBUG_KMS("Got DBuf slice size %d\n", slice_size);
> +
> /*
> * If the state doesn't change the active CRTC's or there is no
> * modeset request, then there's no need to recalculate;
> @@ -3891,20 +3905,70 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
> return;
> }
>
> + /*
> + * Get allowed DBuf slices for correspondent pipe and platform.
> + */
> + dbuf_slice_mask = i915_get_allowed_dbuf_mask(dev_priv, for_pipe,
> + active_pipes, crtc_state);
> +
> + DRM_DEBUG_KMS("DBuf slice mask %x pipe %d active pipes %x\n",
> + dbuf_slice_mask,
> + for_pipe, active_pipes);
> +
> + /*
> + * Figure out at which DBuf slice we start, i.e if we start at Dbuf S2
> + * and slice size is 1024, the offset would be 1024
> + */
> + offset = skl_get_first_dbuf_slice_offset(dbuf_slice_mask,
> + slice_size, ddb_size);
> +
> + /*
> + * Figure out total size of allowed DBuf slices, which is basically
> + * a number of allowed slices for that pipe multiplied by slice size.
> + * Inside of this
> + * range ddb entries are still allocated in proportion to display width.
> + */
> + ddb_range_size = hweight8(dbuf_slice_mask) * slice_size;
> +
> /*
> * Watermark/ddb requirement highly depends upon width of the
> * framebuffer, So instead of allocating DDB equally among pipes
> * distribute DDB based on resolution/width of the display.
> */
> + total_slice_mask = dbuf_slice_mask;
> for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
> const struct drm_display_mode *adjusted_mode =
> &crtc_state->hw.adjusted_mode;
> enum pipe pipe = crtc->pipe;
> int hdisplay, vdisplay;
> + u32 pipe_dbuf_slice_mask = \
> + i915_get_allowed_dbuf_mask(dev_priv,
> + pipe,
> + active_pipes,
> + crtc_state);
>
> if (!crtc_state->hw.enable)
> continue;
>
> + /*
> + * According to BSpec pipe can share one dbuf slice with another
> + * pipes or pipe can use multiple dbufs, in both cases we
> + * account for other pipes only if they have exactly same mask.
> + * However we need to account how many slices we should enable
> + * in total.
> + */
> + total_slice_mask |= pipe_dbuf_slice_mask;
> +
> + /*
> + * Do not account pipes using other slice sets
> + * luckily as of current BSpec slice sets do not partially
> + * intersect(pipes share either same one slice or same slice set
> + * i.e no partial intersection), so it is enough to check for
> + * equality for now.
> + */
> + if (dbuf_slice_mask != pipe_dbuf_slice_mask)
> + continue;
> +
> drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
> total_width += hdisplay;
>
> @@ -3914,8 +3978,19 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
> pipe_width = hdisplay;
> }
>
> - alloc->start = ddb_size * width_before_pipe / total_width;
> - alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
> + ddb->enabled_slices = total_slice_mask;
> +
> + start = ddb_range_size * width_before_pipe / total_width;
> + end = ddb_range_size * (width_before_pipe + pipe_width) / total_width;
> +
> + alloc->start = offset + start;
> + alloc->end = offset + end;
> +
> + DRM_DEBUG_KMS("Pipe %d ddb %d-%d\n", for_pipe,
> + alloc->start, alloc->end);
> + DRM_DEBUG_KMS("Enabled ddb slices mask %x num supported %d\n",
> + ddb->enabled_slices,
> + INTEL_INFO(dev_priv)->supported_slices);
> }
>
> static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
> @@ -4036,7 +4111,8 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
> void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
> struct skl_ddb_allocation *ddb /* out */)
> {
> - ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
> + ddb->enabled_slices = intel_enabled_dbuf_slices(dev_priv);
> + DRM_DEBUG_KMS("Got hw dbuf slices mask %x\n", ddb->enabled_slices);
> }
>
> /*
> @@ -4164,6 +4240,238 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
> return total_data_rate;
> }
>
> +uint_fixed_16_16_t
> +skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
Since this is only used on gen11+, should it be named
icl_pipe_downscale_amount?
> +{
> + u32 src_w, src_h, dst_w, dst_h;
> + uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
> + uint_fixed_16_16_t downscale_h, downscale_w;
> + const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
> +
> + src_w = crtc_state->pipe_src_w;
> + src_h = crtc_state->pipe_src_h;
> + dst_w = adjusted_mode->crtc_hdisplay;
> + dst_h = adjusted_mode->crtc_vdisplay;
> +
> + fp_w_ratio = div_fixed16(src_w, dst_w);
> + fp_h_ratio = div_fixed16(src_h, dst_h);
> + downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
> + downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
> +
> + return mul_fixed16(downscale_w, downscale_h);
> +}
> +
> +uint_fixed_16_16_t
> +icl_get_pipe_ratio(const struct intel_crtc_state *crtc_state)
> +{
> + struct drm_plane *plane;
> + const struct drm_plane_state *drm_plane_state;
> + uint_fixed_16_16_t pipe_downscale, pipe_ratio;
> + uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
> +
> + if (!crtc_state->uapi.enable)
> + return max_downscale;
> +
> + drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->uapi) {
> + uint_fixed_16_16_t plane_downscale;
> + const struct intel_plane_state *plane_state =
> + to_intel_plane_state(drm_plane_state);
> +
> + if (!intel_wm_plane_visible(crtc_state, plane_state))
> + continue;
> +
> + plane_downscale = skl_plane_downscale_amount(crtc_state, plane_state);
> +
> + DRM_DEBUG_KMS("Plane %d downscale amount %d.%d\n",
> + to_intel_plane(plane)->id,
> + plane_downscale.val >> 16,
> + plane_downscale.val & 0xffff);
> +
> + max_downscale = max_fixed16(plane_downscale, max_downscale);
> + }
> +
> +
> + pipe_downscale = skl_pipe_downscale_amount(crtc_state);
> +
> + DRM_DEBUG_KMS("Pipe %d downscale amount %d.%d\n",
> + crtc->pipe, pipe_downscale.val >> 16,
> + pipe_downscale.val & 0xffff);
> +
> + pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
> +
> + /* Convert result to percentage */
> + pipe_ratio = u32_to_fixed16(100 * div_round_up_u32_fixed16(1, pipe_downscale));
I think the math here is correct, but it gets a bit confusing since we
work with different terms than the bspec steps (e.g., using max plane
downscale rather than minimum plane ratio and such)...maybe a few extra
comments in this function would help reduce confusion?
> +
> + DRM_DEBUG_KMS("Pipe %d ratio %d.%d\n", crtc->pipe,
> + pipe_ratio.val >> 16, pipe_ratio.val & 0xffff);
> +
> + return pipe_ratio;
> +}
> +
> +struct dbuf_slice_conf_entry {
> + u32 dbuf_mask[I915_MAX_PIPES];
> + u32 active_pipes;
> +};
> +
> +
> +#define ICL_PIPE_A_DBUF_SLICES(DBuf1) \
> + { { DBuf1, 0, 0, 0 }, BIT(PIPE_A) }
> +#define ICL_PIPE_B_DBUF_SLICES(DBuf1) \
> + { { 0, DBuf1, 0, 0 }, BIT(PIPE_B) }
> +#define ICL_PIPE_C_DBUF_SLICES(DBuf1) \
> + { { 0, 0, DBuf1, 0 }, BIT(PIPE_C) }
> +#define ICL_PIPE_D_DBUF_SLICES(DBuf1) \
> + { { 0, 0, 0, DBuf1 }, BIT(PIPE_D) }
We'd probably want to call this (and the others that mention pipe D)
TGL_* since they aren't actually relevant until gen12.
> +#define ICL_PIPE_AB_DBUF_SLICES(DBuf1, DBuf2) \
> + { { DBuf1, DBuf2, 0, 0 }, BIT(PIPE_A) | BIT(PIPE_B) }
> +#define ICL_PIPE_BC_DBUF_SLICES(DBuf1, DBuf2) \
> + { { 0, DBuf1, DBuf2, 0 }, BIT(PIPE_B) | BIT(PIPE_C) }
> +#define ICL_PIPE_BD_DBUF_SLICES(DBuf1, DBuf2) \
> + { { 0, DBuf1, 0, DBuf2 }, BIT(PIPE_B) | BIT(PIPE_D) }
> +#define ICL_PIPE_AC_DBUF_SLICES(DBuf1, DBuf2) \
> + { { DBuf1, 0, DBuf2, 0 }, BIT(PIPE_A) | BIT(PIPE_C) }
> +#define ICL_PIPE_AD_DBUF_SLICES(DBuf1, DBuf2) \
> + { { DBuf1, 0, 0, DBuf2 }, BIT(PIPE_A) | BIT(PIPE_D) }
> +#define ICL_PIPE_CD_DBUF_SLICES(DBuf1, DBuf2) \
> + { { 0, 0, DBuf1, DBuf2 }, BIT(PIPE_C) | BIT(PIPE_D) }
> +#define ICL_PIPE_ABC_DBUF_SLICES(DBuf1, DBuf2, DBuf3) \
> + { { DBuf1, DBuf2, DBuf3, 0 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) }
> +#define ICL_PIPE_ACD_DBUF_SLICES(DBuf1, DBuf2, DBuf3) \
> + { { DBuf1, 0, DBuf2, DBuf3 }, BIT(PIPE_A) | BIT(PIPE_C) | BIT(PIPE_D) }
> +#define ICL_PIPE_BCD_DBUF_SLICES(DBuf1, DBuf2, DBuf3) \
> + { { 0, DBuf1, DBuf2, DBuf3 }, BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D) }
> +#define ICL_PIPE_ABD_DBUF_SLICES(DBuf1, DBuf2, DBuf3) \
> + { { DBuf1, DBuf2, 0, DBuf3 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_D) }
> +#define ICL_PIPE_ABC_DBUF_SLICES(DBuf1, DBuf2, DBuf3) \
> + { { DBuf1, DBuf2, DBuf3, 0 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) }
> +#define ICL_PIPE_ABCD_DBUF_SLICES(DBuf1, DBuf2, DBuf3, DBuf4) \
> + { { DBuf1, DBuf2, DBuf3, DBuf4 }, BIT(PIPE_A) | BIT(PIPE_B) | \
> + BIT(PIPE_C) | BIT(PIPE_D) }
> +
> +/*
> + * Table taken from Bspec 12716
> + * Pipes do have some preferred DBuf slice affinity,
> + * plus there are some hardcoded requirements on how
> + * those should be distributed for multipipe scenarios.
> + * For more DBuf slices algorithm can get even more messy
> + * and less readable, so decided to use a table almost
> + * as is from BSpec itself - that way it is at least easier
> + * to compare, change and check.
> + */
> +struct dbuf_slice_conf_entry icl_allowed_dbufs[] = {
> +{ { 0, 0, 0, 0 }, 0 },
> +ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT), /* pipe ratio < 88.8 */
> +ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT), /* for pipe ratio >= 88.8 */
> +ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT), /* pipe ratio < 88.8 */
> +ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT), /* for pipe ratio >= 88.8 */
> +ICL_PIPE_C_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT), /* pipe ratio < 88.8 */
> +ICL_PIPE_C_DBUF_SLICES(DBUF_S2_BIT), /* for pipe ratio >= 88.8 */
> +ICL_PIPE_AB_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_AC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_BC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ABC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
> +};
> +
> +/*
> + * Table taken from Bspec 49255
> + * Pipes do have some preferred DBuf slice affinity,
> + * plus there are some hardcoded requirements on how
> + * those should be distributed for multipipe scenarios.
> + * For more DBuf slices algorithm can get even more messy
> + * and less readable, so decided to use a table almost
> + * as is from BSpec itself - that way it is at least easier
> + * to compare, change and check.
> + */
> +struct dbuf_slice_conf_entry tgl_allowed_dbufs[] = {
> +{ { 0, 0, 0, 0 }, 0 },
> +ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> +ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> +ICL_PIPE_C_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> +ICL_PIPE_D_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> +ICL_PIPE_AB_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
The column order in the bspec is bizarre (D, C, A, B), but isn't this
one backwards? Pipe A gets S2, pipe B gets S1? Or do we think this is
a bspec mistake (it does seem unusual)?
> +ICL_PIPE_AC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_BC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_AD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_BD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_CD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ABD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ABC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ACD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
> +ICL_PIPE_BCD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ABCD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
> +};
> +
> +/*
> + * This function finds an entry with same enabled pipe configuration and
> + * returns correspondent DBuf slice mask as stated in BSpec for particular
> + * platform.
> + */
> +static u32 icl_get_allowed_dbuf_mask(int pipe,
> + u32 active_pipes,
> + const struct intel_crtc_state *crtc_state)
> +{
> + int i;
> + uint_fixed_16_16_t pipe_ratio;
> +
> + /*
> + * Calculate pipe ratio as stated in BSpec 28692
> + */
> + pipe_ratio = icl_get_pipe_ratio(crtc_state);
> +
> + for (i = 0; i < ARRAY_SIZE(icl_allowed_dbufs); i++) {
> + if (icl_allowed_dbufs[i].active_pipes == active_pipes) {
> + if (hweight32(active_pipes) == 1) {
> + /*
> + * According to BSpec 12716: if ratio >= 88.8
> + * always use single dbuf slice.
> + */
> + if (pipe_ratio.val >= div_fixed16(888, 10).val)
> + ++i;
> + }
> + return icl_allowed_dbufs[i].dbuf_mask[pipe];
> + }
> + }
> + return 0;
> +}
> +
> +/*
> + * This function finds an entry with same enabled pipe configuration and
> + * returns correspondent DBuf slice mask as stated in BSpec for particular
> + * platform.
> + */
> +static u32 tgl_get_allowed_dbuf_mask(int pipe,
> + u32 active_pipes,
> + const struct intel_crtc_state *crtc_state)
> +{
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(tgl_allowed_dbufs); i++) {
> + if (tgl_allowed_dbufs[i].active_pipes == active_pipes)
> + return tgl_allowed_dbufs[i].dbuf_mask[pipe];
> + }
> + return 0;
> +}
> +
> +u32 i915_get_allowed_dbuf_mask(struct drm_i915_private *dev_priv,
> + int pipe, u32 active_pipes,
> + const struct intel_crtc_state *crtc_state)
> +{
> + if (IS_GEN(dev_priv, 11))
> + return icl_get_allowed_dbuf_mask(pipe,
> + active_pipes,
> + crtc_state);
> + else if (IS_GEN(dev_priv, 12))
> + return tgl_get_allowed_dbuf_mask(pipe,
> + active_pipes,
> + crtc_state);
> + /*
> + * For anything else just return one slice yet.
> + * Should be extended for other platforms.
> + */
> + return DBUF_S1_BIT;
> +}
> +
> static u64
> icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
> u64 *plane_data_rate)
> --
> 2.17.1
>
--
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation
(916) 356-2795
More information about the Intel-gfx
mailing list