[PATCH 15/37] drm/amd/display: Use DML for MALL SS and Subvp allocation calculations

Rodrigo Siqueira Rodrigo.Siqueira at amd.com
Tue Jan 10 16:55:13 UTC 2023


From: Dillon Varone <Dillon.Varone at amd.com>

MALL SS and Subvp use the same calculations for determining the size of
the required allocation for a given surface, which is already done in
DML. Add an interface to extract this information from VBA variables and
use in their respective helper functions. Also refactor existing code to
remove stale workarounds.

Reviewed-by: Alvin Lee <Alvin.Lee2 at amd.com>
Acked-by: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
Signed-off-by: Dillon Varone <Dillon.Varone at amd.com>
---
 .../drm/amd/display/dc/dcn32/dcn32_hwseq.c    | 146 ++-------------
 .../drm/amd/display/dc/dcn32/dcn32_resource.h |  13 +-
 .../display/dc/dcn32/dcn32_resource_helpers.c | 175 ++++++++----------
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  |  29 ++-
 .../drm/amd/display/dc/dml/display_mode_vba.c |   1 +
 .../drm/amd/display/dc/dml/display_mode_vba.h |   1 +
 .../gpu/drm/amd/display/dc/inc/core_types.h   |   6 +-
 7 files changed, 134 insertions(+), 237 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index f2cffb96ebf1..07362c66f023 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -207,151 +207,31 @@ static bool dcn32_check_no_memory_request_for_cab(struct dc *dc)
  */
 static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *ctx)
 {
-	int i, j;
-	struct dc_stream_state *stream = NULL;
-	struct dc_plane_state *plane = NULL;
-	uint32_t cursor_size = 0;
-	uint32_t total_lines = 0;
-	uint32_t lines_per_way = 0;
+	int i;
 	uint8_t num_ways = 0;
-	uint8_t bytes_per_pixel = 0;
-	uint8_t cursor_bpp = 0;
-	uint16_t mblk_width = 0;
-	uint16_t mblk_height = 0;
-	uint16_t mall_alloc_width_blk_aligned = 0;
-	uint16_t mall_alloc_height_blk_aligned = 0;
-	uint16_t num_mblks = 0;
-	uint32_t bytes_in_mall = 0;
-	uint32_t cache_lines_used = 0;
-	uint32_t cache_lines_per_plane = 0;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
-
-		/* If PSR is supported on an eDP panel that's connected, but that panel is
-		 * not in PSR at the time of trying to enter MALL SS, we have to include it
-		 * in the static screen CAB calculation
-		 */
-		if (!pipe->stream || !pipe->plane_state ||
-				(pipe->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED &&
-				pipe->stream->link->psr_settings.psr_allow_active) ||
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
-			continue;
-
-		bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
-		mblk_width = DCN3_2_MBLK_WIDTH;
-		mblk_height = bytes_per_pixel == 4 ? DCN3_2_MBLK_HEIGHT_4BPE : DCN3_2_MBLK_HEIGHT_8BPE;
-
-		/* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) -
-		 * FLOOR(vp_x_start, blk_width)
-		 *
-		 * mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c
-		 */
-		mall_alloc_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x +
-				pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) -
-						(pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width);
-
-		/* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) -
-		 * FLOOR(vp_y_start, blk_height)
-		 *
-		 * mall_alloc_height_blk_aligned_l/c = full_vp_height_blk_aligned_l/c
-		 */
-		mall_alloc_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y +
-				pipe->plane_res.scl_data.viewport.height + mblk_height - 1) / mblk_height * mblk_height) -
-						(pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height);
-
-		num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
-				((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
-
-		/*For DCC:
-		 * meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1)
-		 */
-		if (pipe->plane_state->dcc.enable)
-			num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel +
-					(256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
+	uint32_t mall_ss_size_bytes = 0;
 
-		bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
-
-		/* (cache lines used is total bytes / cache_line size. Add +2 for worst case alignment
-		 * (MALL is 64-byte aligned)
-		 */
-		cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2;
-		cache_lines_used += cache_lines_per_plane;
-	}
+	mall_ss_size_bytes = ctx->bw_ctx.bw.dcn.mall_ss_size_bytes;
+	// TODO add additional logic for PSR active stream exclusion optimization
+	// mall_ss_psr_active_size_bytes = ctx->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes;
 
 	// Include cursor size for CAB allocation
-	for (j = 0; j < dc->res_pool->pipe_count; j++) {
-		struct pipe_ctx *pipe = &ctx->res_ctx.pipe_ctx[j];
-		struct hubp *hubp = pipe->plane_res.hubp;
-
-		if (pipe->stream && pipe->plane_state && hubp)
-			/* Find the cursor plane and use the exact size instead of
-			using the max for calculation */
-
-		if (hubp->curs_attr.width > 0) {
-				cursor_size = hubp->curs_attr.pitch * hubp->curs_attr.height;
-
-				switch (pipe->stream->cursor_attributes.color_format) {
-				case CURSOR_MODE_MONO:
-					cursor_size /= 2;
-					cursor_bpp = 4;
-					break;
-				case CURSOR_MODE_COLOR_1BIT_AND:
-				case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
-				case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
-					cursor_size *= 4;
-					cursor_bpp = 4;
-					break;
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &ctx->res_ctx.pipe_ctx[i];
 
-				case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
-				case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
-					cursor_size *= 8;
-					cursor_bpp = 8;
-					break;
-				}
+		if (!pipe->stream || !pipe->plane_state)
+			continue;
 
-				if (pipe->stream->cursor_position.enable && !dc->debug.alloc_extra_way_for_cursor &&
-						cursor_size > 16384) {
-					/* cursor_num_mblk = CEILING(num_cursors*cursor_width*cursor_width*cursor_Bpe/mblk_bytes, 1)
-					 */
-					cache_lines_used += (((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) /
-							DCN3_2_MALL_MBLK_SIZE_BYTES) * DCN3_2_MALL_MBLK_SIZE_BYTES) /
-							dc->caps.cache_line_size + 2;
-					break;
-				}
-			}
+		mall_ss_size_bytes += dcn32_helper_calculate_mall_bytes_for_cursor(dc, pipe, false);
 	}
 
 	// Convert number of cache lines required to number of ways
-	total_lines = dc->caps.max_cab_allocation_bytes / dc->caps.cache_line_size;
-	lines_per_way = total_lines / dc->caps.cache_num_ways;
-	num_ways = cache_lines_used / lines_per_way;
-
-	if (cache_lines_used % lines_per_way > 0)
-		num_ways++;
-
-	for (i = 0; i < ctx->stream_count; i++) {
-		stream = ctx->streams[i];
-		for (j = 0; j < ctx->stream_status[i].plane_count; j++) {
-			plane = ctx->stream_status[i].plane_states[j];
-
-			if (stream->cursor_position.enable && plane &&
-					dc->debug.alloc_extra_way_for_cursor &&
-					cursor_size > 16384) {
-				/* Cursor caching is not supported since it won't be on the same line.
-				 * So we need an extra line to accommodate it. With large cursors and a single 4k monitor
-				 * this case triggers corruption. If we're at the edge, then dont trigger display refresh
-				 * from MALL. We only need to cache cursor if its greater that 64x64 at 4 bpp.
-				 */
-				num_ways++;
-				/* We only expect one cursor plane */
-				break;
-			}
-		}
-	}
 	if (dc->debug.force_mall_ss_num_ways > 0) {
 		num_ways = dc->debug.force_mall_ss_num_ways;
+	} else {
+		num_ways = dcn32_helper_mall_bytes_to_ways(dc, mall_ss_size_bytes);
 	}
+
 	return num_ways;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
index 57ce1d670abe..40cda0f4c12c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
@@ -96,8 +96,17 @@ void dcn32_calculate_wm_and_dlg(
 		int pipe_cnt,
 		int vlevel);
 
-uint32_t dcn32_helper_calculate_num_ways_for_subvp
-		(struct dc *dc,
+uint32_t dcn32_helper_mall_bytes_to_ways(
+		struct dc *dc,
+		uint32_t total_size_in_mall_bytes);
+
+uint32_t dcn32_helper_calculate_mall_bytes_for_cursor(
+		struct dc *dc,
+		struct pipe_ctx *pipe_ctx,
+		bool ignore_cursor_buf);
+
+uint32_t dcn32_helper_calculate_num_ways_for_subvp(
+		struct dc *dc,
 		struct dc_state *context);
 
 void dcn32_merge_pipes_for_subvp(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index e5287e5f66d5..50f20549c951 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -33,13 +33,75 @@ static bool is_dual_plane(enum surface_pixel_format format)
 	return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
 }
 
+
+uint32_t dcn32_helper_mall_bytes_to_ways(
+		struct dc *dc,
+		uint32_t total_size_in_mall_bytes)
+{
+	uint32_t cache_lines_used, lines_per_way, total_cache_lines, num_ways;
+
+	/* add 2 lines for worst case alignment */
+	cache_lines_used = total_size_in_mall_bytes / dc->caps.cache_line_size + 2;
+
+	total_cache_lines = dc->caps.max_cab_allocation_bytes / dc->caps.cache_line_size;
+	lines_per_way = total_cache_lines / dc->caps.cache_num_ways;
+	num_ways = cache_lines_used / lines_per_way;
+	if (cache_lines_used % lines_per_way > 0)
+		num_ways++;
+
+	return num_ways;
+}
+
+uint32_t dcn32_helper_calculate_mall_bytes_for_cursor(
+		struct dc *dc,
+		struct pipe_ctx *pipe_ctx,
+		bool ignore_cursor_buf)
+{
+	struct hubp *hubp = pipe_ctx->plane_res.hubp;
+	uint32_t cursor_size = hubp->curs_attr.pitch * hubp->curs_attr.height;
+	uint32_t cursor_bpp = 4;
+	uint32_t cursor_mall_size_bytes = 0;
+
+	switch (pipe_ctx->stream->cursor_attributes.color_format) {
+	case CURSOR_MODE_MONO:
+		cursor_size /= 2;
+		cursor_bpp = 4;
+		break;
+	case CURSOR_MODE_COLOR_1BIT_AND:
+	case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
+	case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
+		cursor_size *= 4;
+		cursor_bpp = 4;
+		break;
+
+	case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
+	case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
+		cursor_size *= 8;
+		cursor_bpp = 8;
+		break;
+	}
+
+	/* only count if cursor is enabled, and if additional allocation needed outside of the
+	 * DCN cursor buffer
+	 */
+	if (pipe_ctx->stream->cursor_position.enable && (ignore_cursor_buf ||
+			cursor_size > 16384)) {
+		/* cursor_num_mblk = CEILING(num_cursors*cursor_width*cursor_width*cursor_Bpe/mblk_bytes, 1)
+		 * Note: add 1 mblk in case of cursor misalignment
+		 */
+		cursor_mall_size_bytes = ((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) /
+				DCN3_2_MALL_MBLK_SIZE_BYTES + 1) * DCN3_2_MALL_MBLK_SIZE_BYTES;
+	}
+
+	return cursor_mall_size_bytes;
+}
+
 /**
  * ********************************************************************************************
  * dcn32_helper_calculate_num_ways_for_subvp: Calculate number of ways needed for SubVP
  *
- * This function first checks the bytes required per pixel on the SubVP pipe, then calculates
- * the total number of pixels required in the SubVP MALL region. These are used to calculate
- * the number of cache lines used (then number of ways required) for SubVP MCLK switching.
+ * Gets total allocation required for the phantom viewport calculated by DML in bytes and
+ * converts to number of cache ways.
  *
  * @param [in] dc: current dc state
  * @param [in] context: new dc state
@@ -48,106 +110,19 @@ static bool is_dual_plane(enum surface_pixel_format format)
  *
  * ********************************************************************************************
  */
-uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_state *context)
+uint32_t dcn32_helper_calculate_num_ways_for_subvp(
+		struct dc *dc,
+		struct dc_state *context)
 {
-	uint32_t num_ways = 0;
-	uint32_t bytes_per_pixel = 0;
-	uint32_t cache_lines_used = 0;
-	uint32_t lines_per_way = 0;
-	uint32_t total_cache_lines = 0;
-	uint32_t bytes_in_mall = 0;
-	uint32_t num_mblks = 0;
-	uint32_t cache_lines_per_plane = 0;
-	uint32_t i = 0, j = 0;
-	uint16_t mblk_width = 0;
-	uint16_t mblk_height = 0;
-	uint32_t full_vp_width_blk_aligned = 0;
-	uint32_t full_vp_height_blk_aligned = 0;
-	uint32_t mall_alloc_width_blk_aligned = 0;
-	uint32_t mall_alloc_height_blk_aligned = 0;
-	uint16_t full_vp_height = 0;
-	bool subvp_in_use = false;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		/* Find the phantom pipes.
-		 * - For pipe split case we need to loop through the bottom and next ODM
-		 *   pipes or only half the viewport size is counted
-		 */
-		if (pipe->stream && pipe->plane_state &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-			struct pipe_ctx *main_pipe = NULL;
-
-			subvp_in_use = true;
-			/* Get full viewport height from main pipe (required for MBLK calculation) */
-			for (j = 0; j < dc->res_pool->pipe_count; j++) {
-				main_pipe = &context->res_ctx.pipe_ctx[j];
-				if (main_pipe->stream == pipe->stream->mall_stream_config.paired_stream) {
-					full_vp_height = main_pipe->plane_res.scl_data.viewport.height;
-					break;
-				}
-			}
-
-			bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
-			mblk_width = DCN3_2_MBLK_WIDTH;
-			mblk_height = bytes_per_pixel == 4 ? DCN3_2_MBLK_HEIGHT_4BPE : DCN3_2_MBLK_HEIGHT_8BPE;
-
-			/* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) -
-			 * FLOOR(vp_x_start, blk_width)
-			 */
-			full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x +
-					pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) -
-					(pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width);
-
-			/* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) -
-			 * FLOOR(vp_y_start, blk_height)
-			 */
-			full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y +
-					full_vp_height + mblk_height - 1) / mblk_height * mblk_height) -
-					(pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height);
-
-			/* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */
-			mall_alloc_width_blk_aligned = full_vp_width_blk_aligned;
-
-			/* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */
-			mall_alloc_height_blk_aligned = (pipe->plane_res.scl_data.viewport.height - 1 + mblk_height - 1) /
-					mblk_height * mblk_height + mblk_height;
-
-			/* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c;
-			 * full_mblk_height_ub_l/c = mall_alloc_height_blk_aligned_l/c;
-			 * num_mblk_l/c = (full_mblk_width_ub_l/c / mblk_width_l/c) * (full_mblk_height_ub_l/c / mblk_height_l/c);
-			 * (Should be divisible, but round up if not)
-			 */
-			num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
-					((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
-
-			/*For DCC:
-			 * meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1)
-			 */
-			if (pipe->plane_state->dcc.enable)
-				num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel +
-								(256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
-
-			bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
-			// cache lines used is total bytes / cache_line size. Add +2 for worst case alignment
-			// (MALL is 64-byte aligned)
-			cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2;
-
-			cache_lines_used += cache_lines_per_plane;
+	if (context->bw_ctx.bw.dcn.mall_subvp_size_bytes > 0) {
+		if (dc->debug.force_subvp_num_ways) {
+			return dc->debug.force_subvp_num_ways;
+		} else {
+			return dcn32_helper_mall_bytes_to_ways(dc, context->bw_ctx.bw.dcn.mall_subvp_size_bytes);
 		}
+	} else {
+		return 0;
 	}
-
-	total_cache_lines = dc->caps.max_cab_allocation_bytes / dc->caps.cache_line_size;
-	lines_per_way = total_cache_lines / dc->caps.cache_num_ways;
-	num_ways = cache_lines_used / lines_per_way;
-	if (cache_lines_used % lines_per_way > 0)
-		num_ways++;
-
-	if (subvp_in_use && dc->debug.force_subvp_num_ways > 0)
-		num_ways = dc->debug.force_subvp_num_ways;
-
-	return num_ways;
 }
 
 void dcn32_merge_pipes_for_subvp(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 0c7ddd6f05b5..f2597c966510 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -1291,7 +1291,6 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 	context->bw_ctx.bw.dcn.clk.p_state_change_support =
 			context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
 					!= dm_dram_clock_change_unsupported;
-	context->bw_ctx.bw.dcn.clk.num_ways = dcn32_helper_calculate_num_ways_for_subvp(dc, context);
 
 	context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
 	context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
@@ -1315,6 +1314,10 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 		unbounded_req_enabled = false;
 	}
 
+	context->bw_ctx.bw.dcn.mall_ss_size_bytes = 0;
+	context->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes = 0;
+	context->bw_ctx.bw.dcn.mall_subvp_size_bytes = 0;
+
 	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 		if (!context->res_ctx.pipe_ctx[i].stream)
 			continue;
@@ -1346,6 +1349,28 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 		else
 			context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0;
 		context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+
+		context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes = get_surface_size_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+		/* MALL Allocation Sizes */
+		/* count from active, top pipes only */
+		if (context->res_ctx.pipe_ctx[i].stream && context->res_ctx.pipe_ctx[i].plane_state &&
+				context->res_ctx.pipe_ctx[i].top_pipe == NULL &&
+				context->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
+			/* SS: all active surfaces stored in MALL */
+			if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type != SUBVP_PHANTOM) {
+				context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes;
+
+				if (context->res_ctx.pipe_ctx[i].stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) {
+					/* SS PSR On: all active surfaces part of streams not supporting PSR stored in MALL */
+					context->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes;
+				}
+			} else if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				/* SUBVP: phantom surfaces only stored in MALL */
+				context->bw_ctx.bw.dcn.mall_subvp_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes;
+			}
+		}
+
 		pipe_idx++;
 	}
 	/* If DCN isn't making memory requests we can allow pstate change and lower clocks */
@@ -1366,6 +1391,8 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 	context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz
 			* 1000;
 
+	context->bw_ctx.bw.dcn.clk.num_ways = dcn32_helper_calculate_num_ways_for_subvp(dc, context);
+
 	context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 8e6585dab20e..8cb28b7918db 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -202,6 +202,7 @@ dml_get_pipe_attr_func(vm_group_size_in_bytes, mode_lib->vba.vm_group_bytes);
 dml_get_pipe_attr_func(dpte_row_height_linear_l, mode_lib->vba.dpte_row_height_linear);
 dml_get_pipe_attr_func(pte_buffer_mode, mode_lib->vba.PTE_BUFFER_MODE);
 dml_get_pipe_attr_func(subviewport_lines_needed_in_mall, mode_lib->vba.SubViewportLinesNeededInMALL);
+dml_get_pipe_attr_func(surface_size_in_mall, mode_lib->vba.SurfaceSizeInMALL)
 
 double get_total_immediate_flip_bytes(
 		struct display_mode_lib *mode_lib,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 81e53e67cd0b..876b9b517ea2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -143,6 +143,7 @@ dml_get_pipe_attr_decl(vready_at_or_after_vsync);
 dml_get_pipe_attr_decl(min_dst_y_next_start);
 dml_get_pipe_attr_decl(vstartup_calculated);
 dml_get_pipe_attr_decl(subviewport_lines_needed_in_mall);
+dml_get_pipe_attr_decl(surface_size_in_mall);
 
 double get_total_immediate_flip_bytes(
 		struct display_mode_lib *mode_lib,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index b093ea495468..bebfcf8737b3 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -450,10 +450,11 @@ struct pipe_ctx {
 	struct _vcs_dpi_display_e2e_pipe_params_st dml_input;
 	int det_buffer_size_kb;
 	bool unbounded_req;
+	unsigned int surface_size_in_mall_bytes;
 
-	union pipe_update_flags update_flags;
 	struct dwbc *dwbc;
 	struct mcif_wb *mcif_wb;
+	union pipe_update_flags update_flags;
 };
 
 /* Data used for dynamic link encoder assignment.
@@ -507,6 +508,9 @@ struct dcn_bw_output {
 	struct dcn_watermark_set watermarks;
 	struct dcn_bw_writeback bw_writeback;
 	int compbuf_size_kb;
+	unsigned int mall_ss_size_bytes;
+	unsigned int mall_ss_psr_active_size_bytes;
+	unsigned int mall_subvp_size_bytes;
 	unsigned int legacy_svp_drr_stream_index;
 	bool legacy_svp_drr_stream_index_valid;
 };
-- 
2.39.0



More information about the amd-gfx mailing list