[Intel-gfx] [PATCH v2 1/4] drm/i915/fbc: Rework cfb stride/size calculations

Shankar, Uma uma.shankar at intel.com
Wed Sep 22 18:26:04 UTC 2021



> -----Original Message-----
> From: Ville Syrjala <ville.syrjala at linux.intel.com>
> Sent: Tuesday, September 21, 2021 8:55 PM
> To: intel-gfx at lists.freedesktop.org
> Cc: Shankar, Uma <uma.shankar at intel.com>
> Subject: [PATCH v2 1/4] drm/i915/fbc: Rework cfb stride/size calculations
> 
> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
> 
> The code to calculate the cfb stride/size is a bit of mess.
> The cfb size is getting calculated based purely on the plane stride and plane height.
> That doesn't account for extra alignment we want for the cfb stride. The gen9
> override stride OTOH is just calculated based on the plane width, and it does try to
> make things more aligned but any extra alignment added there is not considered in
> the cfb size calculations.
> So not at all convinced this is working as intended. Additionally the compression limit
> handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the
> 16bpp case), which is just confusing.
> 
> Let's streamline the whole thing:
> - Start with the plane stride, convert that into cfb stride (cfb is
>   always 4 bytes per pixel). All the calculations will assume 1:1
>   compression limit since that will give us the max values, and we
>   don't yet know how much stolen memory we will be able to allocate
> - Align the cfb stride to 512 bytes on modern platforms. This guarantees
>   the 4 line segment will be 512 byte aligned regardles of the final
>   compression limit we choose later. The 512 byte alignment for the
>   segment is required by at least some of the platforms, and just doing
>   it always seems like the easiest option
> - Figure out if we need to use the override stride or not. For X-tiled
>   it's never needed since the plane stride is already 512 byte aligned,
>   for Y-tiled it will be needed if the plane stride is not a multiple
>   of 512 bytes, and for linear it's apparently always needed because the
>   hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of
>   the PLANE_STRIDE*64 that it use with linear.
> - The cfb size will be calculated based on the aligned cfb stride to
>   guarantee we actually reserved enough stolen memory and the FBC hw
>   won't end up scribbling over whatever else is allocated in stolen
> - The compression limit handling we just do fully in the cfb allocation
>   code to make things less confusing
> 
> v2: Write the min cfb segment stride calculation in a more
>     explicit way to make it clear what is going on

Looks Good to me.
Reviewed-by: Uma Shankar <uma.shankar at intel.com>

> Cc: Uma Shankar <uma.shankar at intel.com>
> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_fbc.c | 163 +++++++++++++++--------
>  drivers/gpu/drm/i915/i915_drv.h          |   4 +-
>  2 files changed, 112 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c
> b/drivers/gpu/drm/i915/display/intel_fbc.c
> index b1c1a23c36be..f51871f39eb6 100644
> --- a/drivers/gpu/drm/i915/display/intel_fbc.c
> +++ b/drivers/gpu/drm/i915/display/intel_fbc.c
> @@ -62,19 +62,76 @@ static void intel_fbc_get_plane_source_size(const struct
> intel_fbc_state_cache *
>  		*height = cache->plane.src_h;
>  }
> 
> -static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv,
> -					const struct intel_fbc_state_cache *cache)
> +/* plane stride in pixels */
> +static unsigned int intel_fbc_plane_stride(const struct
> +intel_plane_state *plane_state)
>  {
> -	int lines;
> +	const struct drm_framebuffer *fb = plane_state->hw.fb;
> +	unsigned int stride;
> +
> +	stride = plane_state->view.color_plane[0].stride;
> +	if (!drm_rotation_90_or_270(plane_state->hw.rotation))
> +		stride /= fb->format->cpp[0];
> +
> +	return stride;
> +}
> +
> +/* plane stride based cfb stride in bytes, assuming 1:1 compression
> +limit */ static unsigned int _intel_fbc_cfb_stride(const struct
> +intel_fbc_state_cache *cache) {
> +	unsigned int cpp = 4; /* FBC always 4 bytes per pixel */
> +
> +	return cache->fb.stride * cpp;
> +}
> +
> +/* minimum acceptable cfb stride in bytes, assuming 1:1 compression
> +limit */ static unsigned int skl_fbc_min_cfb_stride(const struct
> +intel_fbc_state_cache *cache) {
> +	unsigned int limit = 4; /* 1:4 compression limit is the worst case */
> +	unsigned int cpp = 4; /* FBC always 4 bytes per pixel */
> +	unsigned int height = 4; /* FBC segment is 4 lines */
> +	unsigned int stride;
> +
> +	/* minimum segment stride we can use */
> +	stride = cache->plane.src_w * cpp * height / limit;
> +
> +	/*
> +	 * At least some of the platforms require each 4 line segment to
> +	 * be 512 byte aligned. Just do it always for simplicity.
> +	 */
> +	stride = ALIGN(stride, 512);
> +
> +	/* convert back to single line equivalent with 1:1 compression limit */
> +	return stride * limit / height;
> +}
> +
> +/* properly aligned cfb stride in bytes, assuming 1:1 compression limit
> +*/ static unsigned int intel_fbc_cfb_stride(struct drm_i915_private *i915,
> +					 const struct intel_fbc_state_cache *cache)
> {
> +	unsigned int stride = _intel_fbc_cfb_stride(cache);
> +
> +	/*
> +	 * At least some of the platforms require each 4 line segment to
> +	 * be 512 byte aligned. Aligning each line to 512 bytes guarantees
> +	 * that regardless of the compression limit we choose later.
> +	 */
> +	if (DISPLAY_VER(i915) == 9)
> +		return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(cache));
> +	else
> +		return stride;
> +}
> +
> +static unsigned int intel_fbc_cfb_size(struct drm_i915_private *dev_priv,
> +				       const struct intel_fbc_state_cache *cache) {
> +	int lines = cache->plane.src_h;
> 
> -	intel_fbc_get_plane_source_size(cache, NULL, &lines);
>  	if (DISPLAY_VER(dev_priv) == 7)
>  		lines = min(lines, 2048);
>  	else if (DISPLAY_VER(dev_priv) >= 8)
>  		lines = min(lines, 2560);
> 
> -	/* Hardware needs the full buffer stride, not just the active area. */
> -	return lines * cache->fb.stride;
> +	return lines * intel_fbc_cfb_stride(dev_priv, cache);
>  }
> 
>  static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv) @@ -150,15
> +207,9 @@ static bool i8xx_fbc_is_active(struct drm_i915_private *dev_priv)
> 
>  static u32 g4x_dpfc_ctl_limit(struct drm_i915_private *i915)  {
> -	const struct intel_fbc_reg_params *params = &i915->fbc.params;
> -	int limit = i915->fbc.limit;
> -
> -	if (params->fb.format->cpp[0] == 2)
> -		limit <<= 1;
> -
> -	switch (limit) {
> +	switch (i915->fbc.limit) {
>  	default:
> -		MISSING_CASE(limit);
> +		MISSING_CASE(i915->fbc.limit);
>  		fallthrough;
>  	case 1:
>  		return DPFC_CTL_LIMIT_1X;
> @@ -301,7 +352,8 @@ static bool ilk_fbc_is_active(struct drm_i915_private
> *dev_priv)
> 
>  static void gen7_fbc_activate(struct drm_i915_private *dev_priv)  {
> -	struct intel_fbc_reg_params *params = &dev_priv->fbc.params;
> +	struct intel_fbc *fbc = &dev_priv->fbc;
> +	const struct intel_fbc_reg_params *params = &fbc->params;
>  	u32 dpfc_ctl;
> 
>  	/* Display WA #0529: skl, kbl, bxt. */ @@ -310,7 +362,7 @@ static void
> gen7_fbc_activate(struct drm_i915_private *dev_priv)
> 
>  		if (params->override_cfb_stride)
>  			val |= CHICKEN_FBC_STRIDE_OVERRIDE |
> -				CHICKEN_FBC_STRIDE(params-
> >override_cfb_stride);
> +				CHICKEN_FBC_STRIDE(params-
> >override_cfb_stride / fbc->limit);
> 
>  		intel_de_rmw(dev_priv, CHICKEN_MISC_4,
>  			     CHICKEN_FBC_STRIDE_OVERRIDE |
> @@ -443,7 +495,12 @@ static u64 intel_fbc_stolen_end(struct drm_i915_private
> *dev_priv)
>  	return min(end, intel_fbc_cfb_base_max(dev_priv));
>  }
> 
> -static int intel_fbc_max_limit(struct drm_i915_private *dev_priv, int fb_cpp)
> +static int intel_fbc_min_limit(int fb_cpp) {
> +	return fb_cpp == 2 ? 2 : 1;
> +}
> +
> +static int intel_fbc_max_limit(struct drm_i915_private *dev_priv)
>  {
>  	/*
>  	 * FIXME: FBC1 can have arbitrary cfb stride, @@ -457,7 +514,7 @@ static
> int intel_fbc_max_limit(struct drm_i915_private *dev_priv, int fb_cpp)
>  		return 1;
> 
>  	/* FBC2 can only do 1:1, 1:2, 1:4 */
> -	return fb_cpp == 2 ? 2 : 4;
> +	return 4;
>  }
> 
>  static int find_compression_limit(struct drm_i915_private *dev_priv, @@ -466,7
> +523,9 @@ static int find_compression_limit(struct drm_i915_private *dev_priv,  {
>  	struct intel_fbc *fbc = &dev_priv->fbc;
>  	u64 end = intel_fbc_stolen_end(dev_priv);
> -	int ret, limit = 1;
> +	int ret, limit = intel_fbc_min_limit(fb_cpp);
> +
> +	size /= limit;
> 
>  	/* Try to over-allocate to reduce reallocations and fragmentation. */
>  	ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc-
> >compressed_fb, @@ -474,7 +533,7 @@ static int find_compression_limit(struct
> drm_i915_private *dev_priv,
>  	if (ret == 0)
>  		return limit;
> 
> -	for (; limit <= intel_fbc_max_limit(dev_priv, fb_cpp); limit <<= 1) {
> +	for (; limit <= intel_fbc_max_limit(dev_priv); limit <<= 1) {
>  		ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc-
> >compressed_fb,
>  							   size >>= 1, 4096, 0, end);
>  		if (ret == 0)
> @@ -505,10 +564,9 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private
> *dev_priv,
>  	ret = find_compression_limit(dev_priv, size, fb_cpp);
>  	if (!ret)
>  		goto err_llb;
> -	else if (ret > 1) {
> +	else if (ret > intel_fbc_min_limit(fb_cpp))
>  		drm_info_once(&dev_priv->drm,
>  			      "Reducing the compressed framebuffer size. This may
> lead to less power savings than a non-reduced-size. Try to increase stolen memory
> size if available in BIOS.\n");
> -	}
> 
>  	fbc->limit = ret;
> 
> @@ -719,11 +777,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc
> *crtc,
> 
>  	cache->fb.format = fb->format;
>  	cache->fb.modifier = fb->modifier;
> -
> -	/* FIXME is this correct? */
> -	cache->fb.stride = plane_state->view.color_plane[0].stride;
> -	if (drm_rotation_90_or_270(plane_state->hw.rotation))
> -		cache->fb.stride *= fb->format->cpp[0];
> +	cache->fb.stride = intel_fbc_plane_stride(plane_state);
> 
>  	/* FBC1 compression interval: arbitrary choice of 1 second */
>  	cache->interval = drm_mode_vrefresh(&crtc_state->hw.adjusted_mode);
> @@ -746,27 +800,29 @@ static bool intel_fbc_cfb_size_changed(struct
> drm_i915_private *dev_priv)  {
>  	struct intel_fbc *fbc = &dev_priv->fbc;
> 
> -	return intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) >
> +	return intel_fbc_cfb_size(dev_priv, &fbc->state_cache) >
>  		fbc->compressed_fb.size * fbc->limit;  }
> 
> -static u16 intel_fbc_override_cfb_stride(struct drm_i915_private *dev_priv)
> +static u16 intel_fbc_override_cfb_stride(struct drm_i915_private *dev_priv,
> +					 const struct intel_fbc_state_cache *cache)
>  {
> -	struct intel_fbc *fbc = &dev_priv->fbc;
> -	struct intel_fbc_state_cache *cache = &fbc->state_cache;
> +	unsigned int stride = _intel_fbc_cfb_stride(cache);
> +	unsigned int stride_aligned = intel_fbc_cfb_stride(dev_priv, cache);
> 
> -	if ((DISPLAY_VER(dev_priv) == 9) &&
> -	    cache->fb.modifier != I915_FORMAT_MOD_X_TILED)
> -		return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->limit) * 8;
> -	else
> -		return 0;
> -}
> +	/*
> +	 * Override stride in 64 byte units per 4 line segment.
> +	 *
> +	 * Gen9 hw miscalculates cfb stride for linear as
> +	 * PLANE_STRIDE*512 instead of PLANE_STRIDE*64, so
> +	 * we always need to use the override there.
> +	 */
> +	if (stride != stride_aligned ||
> +	    (DISPLAY_VER(dev_priv) == 9 &&
> +	     cache->fb.modifier == DRM_FORMAT_MOD_LINEAR))
> +		return stride_aligned * 4 / 64;
> 
> -static bool intel_fbc_override_cfb_stride_changed(struct drm_i915_private
> *dev_priv) -{
> -	struct intel_fbc *fbc = &dev_priv->fbc;
> -
> -	return fbc->params.override_cfb_stride !=
> intel_fbc_override_cfb_stride(dev_priv);
> +	return 0;
>  }
> 
>  static bool intel_fbc_can_enable(struct drm_i915_private *dev_priv) @@ -861,7
> +917,8 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
>  		return false;
>  	}
> 
> -	if (!stride_is_valid(dev_priv, cache->fb.modifier, cache->fb.stride)) {
> +	if (!stride_is_valid(dev_priv, cache->fb.modifier,
> +			     cache->fb.stride * cache->fb.format->cpp[0])) {
>  		fbc->no_fbc_reason = "framebuffer stride not supported";
>  		return false;
>  	}
> @@ -949,9 +1006,9 @@ static void intel_fbc_get_reg_params(struct intel_crtc
> *crtc,
>  	params->fb.modifier = cache->fb.modifier;
>  	params->fb.stride = cache->fb.stride;
> 
> -	params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache);
> -
> -	params->override_cfb_stride = cache->override_cfb_stride;
> +	params->cfb_stride = intel_fbc_cfb_stride(dev_priv, cache);
> +	params->cfb_size = intel_fbc_cfb_size(dev_priv, cache);
> +	params->override_cfb_stride = intel_fbc_override_cfb_stride(dev_priv,
> +cache);
> 
>  	params->plane_visible = cache->plane.visible;  } @@ -982,10 +1039,13 @@
> static bool intel_fbc_can_flip_nuke(const struct intel_crtc_state *crtc_state)
>  	if (params->fb.stride != cache->fb.stride)
>  		return false;
> 
> -	if (params->cfb_size != intel_fbc_calculate_cfb_size(dev_priv, cache))
> +	if (params->cfb_stride != intel_fbc_cfb_stride(dev_priv, cache))
>  		return false;
> 
> -	if (params->override_cfb_stride != cache->override_cfb_stride)
> +	if (params->cfb_size != intel_fbc_cfb_size(dev_priv, cache))
> +		return false;
> +
> +	if (params->override_cfb_stride !=
> +intel_fbc_override_cfb_stride(dev_priv, cache))
>  		return false;
> 
>  	return true;
> @@ -1258,8 +1318,7 @@ static void intel_fbc_enable(struct intel_atomic_state
> *state,
> 
>  	if (fbc->crtc) {
>  		if (fbc->crtc != crtc ||
> -		    (!intel_fbc_cfb_size_changed(dev_priv) &&
> -		     !intel_fbc_override_cfb_stride_changed(dev_priv)))
> +		    !intel_fbc_cfb_size_changed(dev_priv))
>  			goto out;
> 
>  		__intel_fbc_disable(dev_priv);
> @@ -1274,15 +1333,13 @@ static void intel_fbc_enable(struct intel_atomic_state
> *state,
>  		goto out;
> 
>  	if (intel_fbc_alloc_cfb(dev_priv,
> -				intel_fbc_calculate_cfb_size(dev_priv, cache),
> +				intel_fbc_cfb_size(dev_priv, cache),
>  				plane_state->hw.fb->format->cpp[0])) {
>  		cache->plane.visible = false;
>  		fbc->no_fbc_reason = "not enough stolen memory";
>  		goto out;
>  	}
> 
> -	cache->override_cfb_stride = intel_fbc_override_cfb_stride(dev_priv);
> -
>  	drm_dbg_kms(&dev_priv->drm, "Enabling FBC on pipe %c\n",
>  		    pipe_name(crtc->pipe));
>  	fbc->no_fbc_reason = "FBC enabled but not active yet\n"; diff --git
> a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index
> cc355aa05dbf..804c2a470e94 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -453,7 +453,6 @@ struct intel_fbc {
>  		} fb;
> 
>  		unsigned int fence_y_offset;
> -		u16 override_cfb_stride;
>  		u16 interval;
>  		s8 fence_id;
>  		bool psr2_active;
> @@ -478,7 +477,8 @@ struct intel_fbc {
>  			u64 modifier;
>  		} fb;
> 
> -		int cfb_size;
> +		unsigned int cfb_stride;
> +		unsigned int cfb_size;
>  		unsigned int fence_y_offset;
>  		u16 override_cfb_stride;
>  		u16 interval;
> --
> 2.32.0



More information about the Intel-gfx mailing list