[PATCH 13/31] drm/amd/display: Update de-tile override to anticipate pipe splitting

Fri Jul 15 18:16:47 UTC 2022

From: Taimur Hassan <Syed.Hassan at amd.com>

[Why]
For certain MPO configurations, DML will split a pipe after DET buffer has
already been allocated by driver, resulting in allocation of more DET
segments than the configurable return buffer has, causing underflow.

[How]
Determine during DET override calculation whether or not a pipe will be
split later on by DML, and distribute DET segments based on expected
number of pipes.

Reviewed-by: Dmytro Laktyushkin <Dmytro.Laktyushkin at amd.com>
Acked-by: Alan Liu <HaoPing.Liu at amd.com>
Signed-off-by: Taimur Hassan <Syed.Hassan at amd.com>
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c |  22 ++--
 .../drm/amd/display/dc/dcn32/dcn32_resource.h |   6 +-
 .../display/dc/dcn32/dcn32_resource_helpers.c | 112 +++++++++---------
 3 files changed, 69 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 92d87745d933..631876832dfa 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -2984,7 +2984,7 @@ int dcn32_populate_dml_pipes_from_context(
 	int i, pipe_cnt;
 	struct resource_context *res_ctx = &context->res_ctx;
 	struct pipe_ctx *pipe;
-	bool subvp_in_use = false;
+	bool subvp_in_use = false, is_pipe_split_expected[MAX_PIPES];
 
 	dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
 
@@ -3046,6 +3046,9 @@ int dcn32_populate_dml_pipes_from_context(
 			if (dc->debug.enable_single_display_2to1_odm_policy)
 				pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
 		}
+
+		is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, pipes[i].pipe, i);
+
 		pipe_cnt++;
 	}
 
@@ -3053,8 +3056,7 @@ int dcn32_populate_dml_pipes_from_context(
 	 * the DET available for each pipe). Use the DET override input to maintain our driver
 	 * policy.
 	 */
-	switch (pipe_cnt) {
-	case 1:
+	if (pipe_cnt == 1 && !is_pipe_split_expected[0]) {
 		pipes[0].pipe.src.det_size_override = DCN3_2_MAX_DET_SIZE;
 		if (pipe->plane_state && !dc->debug.disable_z9_mpc) {
 			if (!is_dual_plane(pipe->plane_state->format)) {
@@ -3065,18 +3067,8 @@ int dcn32_populate_dml_pipes_from_context(
 					pipes[0].pipe.src.det_size_override = 320; // 5K or higher
 			}
 		}
-		break;
-	case 2:
-	case 3:
-	case 4:
-		// For 2 and 3 pipes, use (MAX_DET_SIZE / pipe_cnt), for 4 pipes use default size for each pipe
-		for (i = 0; i < pipe_cnt; i++) {
-			pipes[i].pipe.src.det_size_override = (pipe_cnt < 4) ? (DCN3_2_MAX_DET_SIZE / pipe_cnt) : DCN3_2_DEFAULT_DET_SIZE;
-		}
-		break;
-	}
-
-	dcn32_update_det_override_for_mpo(dc, context, pipes);
+	} else
+		dcn32_determine_det_override(context, pipes, is_pipe_split_expected, pipe_cnt);
 
 	// In general cases we want to keep the dram clock change requirement
 	// (prefer configs that support MCLK switch). Only override to false
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
index db4546317cb5..10254ab7e9d9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
@@ -100,7 +100,9 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
 bool dcn32_subvp_in_use(struct dc *dc,
 		struct dc_state *context);
 
-void dcn32_update_det_override_for_mpo(struct dc *dc, struct dc_state *context,
-	display_e2e_pipe_params_st *pipes);
+bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index);
+
+void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes,
+		bool *is_pipe_split_expected, int pipe_cnt);
 
 #endif /* _DCN32_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index e001f6d1f6c3..a6ef1dba01fe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -26,6 +26,8 @@
 // header file of functions being implemented
 #include "dcn32_resource.h"
 #include "dcn20/dcn20_resource.h"
+#include "dml/dcn32/display_mode_vba_util_32.h"
+
 /**
  * ********************************************************************************************
  * dcn32_helper_populate_phantom_dlg_params: Get DLG params for phantom pipes and populate pipe_ctx
@@ -195,66 +197,68 @@ bool dcn32_subvp_in_use(struct dc *dc,
 	return false;
 }
 
-/* For MPO we adjust the DET allocation to ensure we have enough DET buffer when an MPO pipe
- * is removed. For example for 1 MPO + 1 non-MPO normally we would allocate 6 DET segments
- * for each pipe [6, 6, 6]. But when transitioning out of MPO it would change from
- * [6, 6, 6] -> [9, 9]. However, if VUPDATE for the non-MPO pipe comes first we would be
- * trying to allocate more DET than what's currently available which would result in underflow.
- *
- * In this case we must ensure there is enough buffer when transitioning in and out of MPO:
- *
- * 1 MPO (2 plane) + 1 non-MPO case:
- * [4, 4, 9]<->[9, 9]: Allocate 4 each for MPO pipes, and maintain 9 for non-MPO pipe
- *
- * 1 MPO (2 plane) + 2 non-MPO case:
- * [3, 3, 5, 5]<->[6, 6, 6]
- *
- * 1 MPO (3 plane) + 1 non-MPO case:
- * [3, 3, 3, 9]<->[4, 4, 9] or [3, 3, 3, 6]<->[9, 9]
- *
- * For multi-display MPO case all pipes will have 4 segments:
- * Removing MPO on one of the displays will result in 3 pipes
- * (1 MPO and 1 non-MPO which is covered by single MPO stream case).
- */
-void dcn32_update_det_override_for_mpo(struct dc *dc, struct dc_state *context,
-	display_e2e_pipe_params_st *pipes)
+bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index)
 {
-	uint8_t i, mpo_stream_index, pipe_cnt;
-	uint8_t mpo_stream_count = 0;
-	uint8_t mpo_planes = 0; // Only used in single display MPO case
-	unsigned int j;
-	struct resource_context *res_ctx = &context->res_ctx;
+	double pscl_throughput, pscl_throughput_chroma, dpp_clk_single_dpp, clock,
+		clk_frequency = 0.0, vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz;
 
-	for (i = 0; i < context->stream_count; i++) {
-		if (context->stream_status[i].plane_count > 1) {
-			mpo_stream_index = i;
-			mpo_stream_count++;
-			mpo_planes = context->stream_status[i].plane_count;
-		}
-	}
+	dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe.scale_ratio_depth.hscl_ratio,
+			pipe.scale_ratio_depth.hscl_ratio_c,
+			pipe.scale_ratio_depth.vscl_ratio,
+			pipe.scale_ratio_depth.vscl_ratio_c,
+			context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk,
+			context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk,
+			pipe.dest.pixel_rate_mhz,
+			pipe.src.source_format,
+			pipe.scale_taps.htaps,
+			pipe.scale_taps.htaps_c,
+			pipe.scale_taps.vtaps,
+			pipe.scale_taps.vtaps_c,
 
-	if (mpo_stream_count == 1) {
-		for (j = 0, pipe_cnt = 0; j < dc->res_pool->pipe_count; j++) {
-			if (!res_ctx->pipe_ctx[j].stream)
-				continue;
+			/* Output */
+			&pscl_throughput, &pscl_throughput_chroma,
+			&dpp_clk_single_dpp);
 
-			if (context->res_ctx.pipe_ctx[j].stream == context->streams[mpo_stream_index]) {
-				// For 3 plane MPO + 1 non-MPO, do [3, 3, 3, 9]
-				// For 2 plane MPO + 1 non-MPO, do [4, 4, 9]
-				if (context->stream_count - mpo_stream_count == 1)
-					pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * (mpo_planes == 2 ? 4 : 3);
-				else if (context->stream_count - mpo_stream_count == 2)
-					pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * 3;
+	clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100);
+
+	if (clock > 0)
+		clk_frequency = vco_speed * 4.0 / ((int) (vco_speed * 4.0));
+
+	if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[index].dppclk_mhz)
+		return true;
+	else
+		return false;
+}
+
+void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes,
+		bool *is_pipe_split_expected, int pipe_cnt)
+{
+	int i, j, count, stream_segments, pipe_segments[MAX_PIPES];
+
+	if (context->stream_count > 0) {
+		stream_segments = 18 / context->stream_count;
+		for (i = 0, count = 0; i < context->stream_count; i++) {
+			for (j = 0; j < pipe_cnt; j++) {
+				if (context->res_ctx.pipe_ctx[j].stream == context->streams[i]) {
+					count++;
+					if (is_pipe_split_expected[j])
+						count++;
+				}
+			}
+			pipe_segments[i] = stream_segments / count;
+		}
 
-			} else if (context->res_ctx.pipe_ctx[j].stream &&
-					context->res_ctx.pipe_ctx[j].stream != context->streams[mpo_stream_index]) {
-				// Update for non-MPO pipes
-				if (context->stream_count - mpo_stream_count == 1)
-					pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * 9;
-				else if (context->stream_count - mpo_stream_count == 2)
-					pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * 5;
+		for (i = 0; i < pipe_cnt; i++) {
+			pipes[i].pipe.src.det_size_override = 0;
+			for (j = 0; j < context->stream_count; j++) {
+				if (context->res_ctx.pipe_ctx[i].stream == context->streams[j]) {
+					pipes[i].pipe.src.det_size_override = pipe_segments[j] * DCN3_2_DET_SEG_SIZE;
+					break;
+				}
 			}
-			pipe_cnt++;
 		}
+	} else {
+		for (i = 0; i < pipe_cnt; i++)
+			pipes[i].pipe.src.det_size_override = 4 * DCN3_2_DET_SEG_SIZE; //DCN3_2_DEFAULT_DET_SIZE
 	}
 }
-- 
2.37.0