[Intel-gfx] [PATCH v2 1/4] drm/i915/fbc: Rework cfb stride/size calculations
Shankar, Uma
uma.shankar at intel.com
Wed Sep 22 18:26:04 UTC 2021
> -----Original Message-----
> From: Ville Syrjala <ville.syrjala at linux.intel.com>
> Sent: Tuesday, September 21, 2021 8:55 PM
> To: intel-gfx at lists.freedesktop.org
> Cc: Shankar, Uma <uma.shankar at intel.com>
> Subject: [PATCH v2 1/4] drm/i915/fbc: Rework cfb stride/size calculations
>
> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
>
> The code to calculate the cfb stride/size is a bit of mess.
> The cfb size is getting calculated based purely on the plane stride and plane height.
> That doesn't account for extra alignment we want for the cfb stride. The gen9
> override stride OTOH is just calculated based on the plane width, and it does try to
> make things more aligned but any extra alignment added there is not considered in
> the cfb size calculations.
> So not at all convinced this is working as intended. Additionally the compression limit
> handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the
> 16bpp case), which is just confusing.
>
> Let's streamline the whole thing:
> - Start with the plane stride, convert that into cfb stride (cfb is
> always 4 bytes per pixel). All the calculations will assume 1:1
> compression limit since that will give us the max values, and we
> don't yet know how much stolen memory we will be able to allocate
> - Align the cfb stride to 512 bytes on modern platforms. This guarantees
> the 4 line segment will be 512 byte aligned regardles of the final
> compression limit we choose later. The 512 byte alignment for the
> segment is required by at least some of the platforms, and just doing
> it always seems like the easiest option
> - Figure out if we need to use the override stride or not. For X-tiled
> it's never needed since the plane stride is already 512 byte aligned,
> for Y-tiled it will be needed if the plane stride is not a multiple
> of 512 bytes, and for linear it's apparently always needed because the
> hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of
> the PLANE_STRIDE*64 that it use with linear.
> - The cfb size will be calculated based on the aligned cfb stride to
> guarantee we actually reserved enough stolen memory and the FBC hw
> won't end up scribbling over whatever else is allocated in stolen
> - The compression limit handling we just do fully in the cfb allocation
> code to make things less confusing
>
> v2: Write the min cfb segment stride calculation in a more
> explicit way to make it clear what is going on
Looks Good to me.
Reviewed-by: Uma Shankar <uma.shankar at intel.com>
> Cc: Uma Shankar <uma.shankar at intel.com>
> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/display/intel_fbc.c | 163 +++++++++++++++--------
> drivers/gpu/drm/i915/i915_drv.h | 4 +-
> 2 files changed, 112 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c
> b/drivers/gpu/drm/i915/display/intel_fbc.c
> index b1c1a23c36be..f51871f39eb6 100644
> --- a/drivers/gpu/drm/i915/display/intel_fbc.c
> +++ b/drivers/gpu/drm/i915/display/intel_fbc.c
> @@ -62,19 +62,76 @@ static void intel_fbc_get_plane_source_size(const struct
> intel_fbc_state_cache *
> *height = cache->plane.src_h;
> }
>
> -static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv,
> - const struct intel_fbc_state_cache *cache)
> +/* plane stride in pixels */
> +static unsigned int intel_fbc_plane_stride(const struct
> +intel_plane_state *plane_state)
> {
> - int lines;
> + const struct drm_framebuffer *fb = plane_state->hw.fb;
> + unsigned int stride;
> +
> + stride = plane_state->view.color_plane[0].stride;
> + if (!drm_rotation_90_or_270(plane_state->hw.rotation))
> + stride /= fb->format->cpp[0];
> +
> + return stride;
> +}
> +
> +/* plane stride based cfb stride in bytes, assuming 1:1 compression
> +limit */ static unsigned int _intel_fbc_cfb_stride(const struct
> +intel_fbc_state_cache *cache) {
> + unsigned int cpp = 4; /* FBC always 4 bytes per pixel */
> +
> + return cache->fb.stride * cpp;
> +}
> +
> +/* minimum acceptable cfb stride in bytes, assuming 1:1 compression
> +limit */ static unsigned int skl_fbc_min_cfb_stride(const struct
> +intel_fbc_state_cache *cache) {
> + unsigned int limit = 4; /* 1:4 compression limit is the worst case */
> + unsigned int cpp = 4; /* FBC always 4 bytes per pixel */
> + unsigned int height = 4; /* FBC segment is 4 lines */
> + unsigned int stride;
> +
> + /* minimum segment stride we can use */
> + stride = cache->plane.src_w * cpp * height / limit;
> +
> + /*
> + * At least some of the platforms require each 4 line segment to
> + * be 512 byte aligned. Just do it always for simplicity.
> + */
> + stride = ALIGN(stride, 512);
> +
> + /* convert back to single line equivalent with 1:1 compression limit */
> + return stride * limit / height;
> +}
> +
> +/* properly aligned cfb stride in bytes, assuming 1:1 compression limit
> +*/ static unsigned int intel_fbc_cfb_stride(struct drm_i915_private *i915,
> + const struct intel_fbc_state_cache *cache)
> {
> + unsigned int stride = _intel_fbc_cfb_stride(cache);
> +
> + /*
> + * At least some of the platforms require each 4 line segment to
> + * be 512 byte aligned. Aligning each line to 512 bytes guarantees
> + * that regardless of the compression limit we choose later.
> + */
> + if (DISPLAY_VER(i915) == 9)
> + return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(cache));
> + else
> + return stride;
> +}
> +
> +static unsigned int intel_fbc_cfb_size(struct drm_i915_private *dev_priv,
> + const struct intel_fbc_state_cache *cache) {
> + int lines = cache->plane.src_h;
>
> - intel_fbc_get_plane_source_size(cache, NULL, &lines);
> if (DISPLAY_VER(dev_priv) == 7)
> lines = min(lines, 2048);
> else if (DISPLAY_VER(dev_priv) >= 8)
> lines = min(lines, 2560);
>
> - /* Hardware needs the full buffer stride, not just the active area. */
> - return lines * cache->fb.stride;
> + return lines * intel_fbc_cfb_stride(dev_priv, cache);
> }
>
> static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv) @@ -150,15
> +207,9 @@ static bool i8xx_fbc_is_active(struct drm_i915_private *dev_priv)
>
> static u32 g4x_dpfc_ctl_limit(struct drm_i915_private *i915) {
> - const struct intel_fbc_reg_params *params = &i915->fbc.params;
> - int limit = i915->fbc.limit;
> -
> - if (params->fb.format->cpp[0] == 2)
> - limit <<= 1;
> -
> - switch (limit) {
> + switch (i915->fbc.limit) {
> default:
> - MISSING_CASE(limit);
> + MISSING_CASE(i915->fbc.limit);
> fallthrough;
> case 1:
> return DPFC_CTL_LIMIT_1X;
> @@ -301,7 +352,8 @@ static bool ilk_fbc_is_active(struct drm_i915_private
> *dev_priv)
>
> static void gen7_fbc_activate(struct drm_i915_private *dev_priv) {
> - struct intel_fbc_reg_params *params = &dev_priv->fbc.params;
> + struct intel_fbc *fbc = &dev_priv->fbc;
> + const struct intel_fbc_reg_params *params = &fbc->params;
> u32 dpfc_ctl;
>
> /* Display WA #0529: skl, kbl, bxt. */ @@ -310,7 +362,7 @@ static void
> gen7_fbc_activate(struct drm_i915_private *dev_priv)
>
> if (params->override_cfb_stride)
> val |= CHICKEN_FBC_STRIDE_OVERRIDE |
> - CHICKEN_FBC_STRIDE(params-
> >override_cfb_stride);
> + CHICKEN_FBC_STRIDE(params-
> >override_cfb_stride / fbc->limit);
>
> intel_de_rmw(dev_priv, CHICKEN_MISC_4,
> CHICKEN_FBC_STRIDE_OVERRIDE |
> @@ -443,7 +495,12 @@ static u64 intel_fbc_stolen_end(struct drm_i915_private
> *dev_priv)
> return min(end, intel_fbc_cfb_base_max(dev_priv));
> }
>
> -static int intel_fbc_max_limit(struct drm_i915_private *dev_priv, int fb_cpp)
> +static int intel_fbc_min_limit(int fb_cpp) {
> + return fb_cpp == 2 ? 2 : 1;
> +}
> +
> +static int intel_fbc_max_limit(struct drm_i915_private *dev_priv)
> {
> /*
> * FIXME: FBC1 can have arbitrary cfb stride, @@ -457,7 +514,7 @@ static
> int intel_fbc_max_limit(struct drm_i915_private *dev_priv, int fb_cpp)
> return 1;
>
> /* FBC2 can only do 1:1, 1:2, 1:4 */
> - return fb_cpp == 2 ? 2 : 4;
> + return 4;
> }
>
> static int find_compression_limit(struct drm_i915_private *dev_priv, @@ -466,7
> +523,9 @@ static int find_compression_limit(struct drm_i915_private *dev_priv, {
> struct intel_fbc *fbc = &dev_priv->fbc;
> u64 end = intel_fbc_stolen_end(dev_priv);
> - int ret, limit = 1;
> + int ret, limit = intel_fbc_min_limit(fb_cpp);
> +
> + size /= limit;
>
> /* Try to over-allocate to reduce reallocations and fragmentation. */
> ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc-
> >compressed_fb, @@ -474,7 +533,7 @@ static int find_compression_limit(struct
> drm_i915_private *dev_priv,
> if (ret == 0)
> return limit;
>
> - for (; limit <= intel_fbc_max_limit(dev_priv, fb_cpp); limit <<= 1) {
> + for (; limit <= intel_fbc_max_limit(dev_priv); limit <<= 1) {
> ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc-
> >compressed_fb,
> size >>= 1, 4096, 0, end);
> if (ret == 0)
> @@ -505,10 +564,9 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private
> *dev_priv,
> ret = find_compression_limit(dev_priv, size, fb_cpp);
> if (!ret)
> goto err_llb;
> - else if (ret > 1) {
> + else if (ret > intel_fbc_min_limit(fb_cpp))
> drm_info_once(&dev_priv->drm,
> "Reducing the compressed framebuffer size. This may
> lead to less power savings than a non-reduced-size. Try to increase stolen memory
> size if available in BIOS.\n");
> - }
>
> fbc->limit = ret;
>
> @@ -719,11 +777,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc
> *crtc,
>
> cache->fb.format = fb->format;
> cache->fb.modifier = fb->modifier;
> -
> - /* FIXME is this correct? */
> - cache->fb.stride = plane_state->view.color_plane[0].stride;
> - if (drm_rotation_90_or_270(plane_state->hw.rotation))
> - cache->fb.stride *= fb->format->cpp[0];
> + cache->fb.stride = intel_fbc_plane_stride(plane_state);
>
> /* FBC1 compression interval: arbitrary choice of 1 second */
> cache->interval = drm_mode_vrefresh(&crtc_state->hw.adjusted_mode);
> @@ -746,27 +800,29 @@ static bool intel_fbc_cfb_size_changed(struct
> drm_i915_private *dev_priv) {
> struct intel_fbc *fbc = &dev_priv->fbc;
>
> - return intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) >
> + return intel_fbc_cfb_size(dev_priv, &fbc->state_cache) >
> fbc->compressed_fb.size * fbc->limit; }
>
> -static u16 intel_fbc_override_cfb_stride(struct drm_i915_private *dev_priv)
> +static u16 intel_fbc_override_cfb_stride(struct drm_i915_private *dev_priv,
> + const struct intel_fbc_state_cache *cache)
> {
> - struct intel_fbc *fbc = &dev_priv->fbc;
> - struct intel_fbc_state_cache *cache = &fbc->state_cache;
> + unsigned int stride = _intel_fbc_cfb_stride(cache);
> + unsigned int stride_aligned = intel_fbc_cfb_stride(dev_priv, cache);
>
> - if ((DISPLAY_VER(dev_priv) == 9) &&
> - cache->fb.modifier != I915_FORMAT_MOD_X_TILED)
> - return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->limit) * 8;
> - else
> - return 0;
> -}
> + /*
> + * Override stride in 64 byte units per 4 line segment.
> + *
> + * Gen9 hw miscalculates cfb stride for linear as
> + * PLANE_STRIDE*512 instead of PLANE_STRIDE*64, so
> + * we always need to use the override there.
> + */
> + if (stride != stride_aligned ||
> + (DISPLAY_VER(dev_priv) == 9 &&
> + cache->fb.modifier == DRM_FORMAT_MOD_LINEAR))
> + return stride_aligned * 4 / 64;
>
> -static bool intel_fbc_override_cfb_stride_changed(struct drm_i915_private
> *dev_priv) -{
> - struct intel_fbc *fbc = &dev_priv->fbc;
> -
> - return fbc->params.override_cfb_stride !=
> intel_fbc_override_cfb_stride(dev_priv);
> + return 0;
> }
>
> static bool intel_fbc_can_enable(struct drm_i915_private *dev_priv) @@ -861,7
> +917,8 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
> return false;
> }
>
> - if (!stride_is_valid(dev_priv, cache->fb.modifier, cache->fb.stride)) {
> + if (!stride_is_valid(dev_priv, cache->fb.modifier,
> + cache->fb.stride * cache->fb.format->cpp[0])) {
> fbc->no_fbc_reason = "framebuffer stride not supported";
> return false;
> }
> @@ -949,9 +1006,9 @@ static void intel_fbc_get_reg_params(struct intel_crtc
> *crtc,
> params->fb.modifier = cache->fb.modifier;
> params->fb.stride = cache->fb.stride;
>
> - params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache);
> -
> - params->override_cfb_stride = cache->override_cfb_stride;
> + params->cfb_stride = intel_fbc_cfb_stride(dev_priv, cache);
> + params->cfb_size = intel_fbc_cfb_size(dev_priv, cache);
> + params->override_cfb_stride = intel_fbc_override_cfb_stride(dev_priv,
> +cache);
>
> params->plane_visible = cache->plane.visible; } @@ -982,10 +1039,13 @@
> static bool intel_fbc_can_flip_nuke(const struct intel_crtc_state *crtc_state)
> if (params->fb.stride != cache->fb.stride)
> return false;
>
> - if (params->cfb_size != intel_fbc_calculate_cfb_size(dev_priv, cache))
> + if (params->cfb_stride != intel_fbc_cfb_stride(dev_priv, cache))
> return false;
>
> - if (params->override_cfb_stride != cache->override_cfb_stride)
> + if (params->cfb_size != intel_fbc_cfb_size(dev_priv, cache))
> + return false;
> +
> + if (params->override_cfb_stride !=
> +intel_fbc_override_cfb_stride(dev_priv, cache))
> return false;
>
> return true;
> @@ -1258,8 +1318,7 @@ static void intel_fbc_enable(struct intel_atomic_state
> *state,
>
> if (fbc->crtc) {
> if (fbc->crtc != crtc ||
> - (!intel_fbc_cfb_size_changed(dev_priv) &&
> - !intel_fbc_override_cfb_stride_changed(dev_priv)))
> + !intel_fbc_cfb_size_changed(dev_priv))
> goto out;
>
> __intel_fbc_disable(dev_priv);
> @@ -1274,15 +1333,13 @@ static void intel_fbc_enable(struct intel_atomic_state
> *state,
> goto out;
>
> if (intel_fbc_alloc_cfb(dev_priv,
> - intel_fbc_calculate_cfb_size(dev_priv, cache),
> + intel_fbc_cfb_size(dev_priv, cache),
> plane_state->hw.fb->format->cpp[0])) {
> cache->plane.visible = false;
> fbc->no_fbc_reason = "not enough stolen memory";
> goto out;
> }
>
> - cache->override_cfb_stride = intel_fbc_override_cfb_stride(dev_priv);
> -
> drm_dbg_kms(&dev_priv->drm, "Enabling FBC on pipe %c\n",
> pipe_name(crtc->pipe));
> fbc->no_fbc_reason = "FBC enabled but not active yet\n"; diff --git
> a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index
> cc355aa05dbf..804c2a470e94 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -453,7 +453,6 @@ struct intel_fbc {
> } fb;
>
> unsigned int fence_y_offset;
> - u16 override_cfb_stride;
> u16 interval;
> s8 fence_id;
> bool psr2_active;
> @@ -478,7 +477,8 @@ struct intel_fbc {
> u64 modifier;
> } fb;
>
> - int cfb_size;
> + unsigned int cfb_stride;
> + unsigned int cfb_size;
> unsigned int fence_y_offset;
> u16 override_cfb_stride;
> u16 interval;
> --
> 2.32.0
More information about the Intel-gfx
mailing list