[PATCH 2/2] drm/amd/display: Enable DCC on DCN401

Aurabindo Pillai aurabindo.pillai at amd.com
Mon Jun 17 22:14:26 UTC 2024


Add registers and entry points to enable DCC on DCN4x

Signed-off-by: Aurabindo Pillai <aurabindo.pillai at amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc.c      |  10 +
 .../drm/amd/display/dc/core/dc_hw_sequencer.c |  11 +
 drivers/gpu/drm/amd/display/dc/dc.h           |   4 +
 .../drm/amd/display/dc/dml2/dml2_wrapper.c    |   6 +
 .../drm/amd/display/dc/dml2/dml2_wrapper.h    |   2 +-
 .../display/dc/hubbub/dcn30/dcn30_hubbub.c    |   3 +
 .../display/dc/hubbub/dcn31/dcn31_hubbub.c    |   3 +
 .../display/dc/hubbub/dcn401/dcn401_hubbub.c  | 280 ++++++++++++++++++
 .../display/dc/hubbub/dcn401/dcn401_hubbub.h  |   5 +
 .../amd/display/dc/hubp/dcn20/dcn20_hubp.h    |  14 +
 .../amd/display/dc/hubp/dcn401/dcn401_hubp.c  |  21 ++
 .../amd/display/dc/hubp/dcn401/dcn401_hubp.h  |  14 +-
 .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c   |   4 +
 .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c |  25 ++
 .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h |   2 +
 .../amd/display/dc/hwss/dcn401/dcn401_init.c  |   1 +
 .../drm/amd/display/dc/hwss/hw_sequencer.h    |   9 +
 .../gpu/drm/amd/display/dc/inc/core_types.h   |   3 +
 .../gpu/drm/amd/display/dc/inc/hw/dchubbub.h  |   4 +
 drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h  |   1 +
 .../dc/resource/dcn401/dcn401_resource.c      |   9 +
 .../dc/resource/dcn401/dcn401_resource.h      |   2 +
 22 files changed, 431 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index a4ba6f99cd34..8c691bee49a3 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1264,6 +1264,9 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 				apply_ctx_interdependent_lock(dc, dc->current_state, old_stream, false);
 				dc->hwss.post_unlock_program_front_end(dc, dangling_context);
 			}
+
+			if (dc->res_pool->funcs->prepare_mcache_programming)
+				dc->res_pool->funcs->prepare_mcache_programming(dc, dangling_context);
 			if (dc->hwss.program_front_end_for_ctx) {
 				dc->hwss.interdependent_update_lock(dc, dc->current_state, true);
 				dc->hwss.program_front_end_for_ctx(dc, dangling_context);
@@ -2037,6 +2040,8 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 	}
 
 	/* Program all planes within new context*/
+	if (dc->res_pool->funcs->prepare_mcache_programming)
+		dc->res_pool->funcs->prepare_mcache_programming(dc, context);
 	if (dc->hwss.program_front_end_for_ctx) {
 		dc->hwss.interdependent_update_lock(dc, context, true);
 		dc->hwss.program_front_end_for_ctx(dc, context);
@@ -3884,6 +3889,7 @@ static void commit_planes_for_stream(struct dc *dc,
 				odm_pipe->ttu_regs.min_ttu_vblank = MAX_TTU;
 	}
 
+
 	if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
 		if (top_pipe_to_program &&
 			top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
@@ -3903,6 +3909,10 @@ static void commit_planes_for_stream(struct dc *dc,
 						top_pipe_to_program->stream_res.tg);
 		}
 
+	if (dc->hwss.wait_for_dcc_meta_propagation) {
+		dc->hwss.wait_for_dcc_meta_propagation(dc, top_pipe_to_program);
+	}
+
 	if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
 		if (dc->hwss.subvp_pipe_control_lock)
 			dc->hwss.subvp_pipe_control_lock(dc, context, true, should_lock_all_pipes, NULL, subvp_prev_use);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 5037474bf95c..87e36d51c56d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -595,6 +595,12 @@ void hwss_build_fast_sequence(struct dc *dc,
 	if (!plane || !stream)
 		return;
 
+	if (dc->hwss.wait_for_dcc_meta_propagation) {
+		block_sequence[*num_steps].params.wait_for_dcc_meta_propagation_params.dc = dc;
+		block_sequence[*num_steps].params.wait_for_dcc_meta_propagation_params.top_pipe_to_program = pipe_ctx;
+		block_sequence[*num_steps].func = HUBP_WAIT_FOR_DCC_META_PROP;
+		(*num_steps)++;
+	}
 	if (dc->hwss.subvp_pipe_control_lock_fast) {
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc;
 		block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = true;
@@ -835,6 +841,11 @@ void hwss_execute_sequence(struct dc *dc,
 		case DMUB_SUBVP_SAVE_SURF_ADDR:
 			hwss_subvp_save_surf_addr(params);
 			break;
+		case HUBP_WAIT_FOR_DCC_META_PROP:
+			dc->hwss.wait_for_dcc_meta_propagation(
+					params->wait_for_dcc_meta_propagation_params.dc,
+					params->wait_for_dcc_meta_propagation_params.top_pipe_to_program);
+			break;
 		case DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST:
 			dc->hwss.fams2_global_control_lock_fast(params);
 			break;
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index d0d1af451b64..9a59f6ef8106 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -333,6 +333,9 @@ struct dc_dcc_setting {
 		uint32_t dcc_128_128_uncontrained : 1;  //available in ASICs before DCN 3.0
 		uint32_t dcc_256_128_128 : 1;		//available starting with DCN 3.0
 		uint32_t dcc_256_256_unconstrained : 1;  //available in ASICs before DCN 3.0 (the best compression case)
+		uint32_t dcc_256_256 : 1;  //available in ASICs starting with DCN 4.0x (the best compression case)
+		uint32_t dcc_256_128 : 1;  //available in ASICs starting with DCN 4.0x
+		uint32_t dcc_256_64 : 1;   //available in ASICs starting with DCN 4.0x (the worst compression case)
 	} dcc_controls;
 };
 
@@ -1036,6 +1039,7 @@ struct dc_debug_options {
 	unsigned int static_screen_wait_frames;
 	uint32_t pwm_freq;
 	bool force_chroma_subsampling_1tap;
+	unsigned int dcc_meta_propagation_delay_us;
 	bool disable_422_left_edge_pixel;
 	bool dml21_force_pstate_method;
 	uint32_t dml21_force_pstate_method_value;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
index f6310408dbba..c09013794dea 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@@ -806,6 +806,12 @@ void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2,
 	*dram_clk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.DRAMClockChangeSupport[0];
 }
 
+void dml2_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2)
+{
+	if (dml2->architecture == dml2_architecture_21)
+		dml21_prepare_mcache_programming(in_dc, context, dml2);
+}
+
 void dml2_copy(struct dml2_context *dst_dml2,
 	struct dml2_context *src_dml2)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
index 20b3970c0857..2ad20378d2ef 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
@@ -301,5 +301,5 @@ bool dml2_validate(const struct dc *in_dc,
  */
 void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2,
 	unsigned int *fclk_change_support, unsigned int *dram_clk_change_support);
-
+void dml2_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2);
 #endif //_DML2_WRAPPER_H_
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c
index 6a5af3da4b45..fe741100c0f8 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c
@@ -339,6 +339,7 @@ bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub,
 		return false;
 
 	switch (dcc_control) {
+	case dcc_control__256_256:
 	case dcc_control__256_256_xxx:
 		output->grph.rgb.max_uncompressed_blk_size = 256;
 		output->grph.rgb.max_compressed_blk_size = 256;
@@ -346,6 +347,7 @@ bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub,
 		output->grph.rgb.dcc_controls.dcc_256_256_unconstrained = 1;
 		output->grph.rgb.dcc_controls.dcc_256_128_128 = 1;
 		break;
+	case dcc_control__256_128:
 	case dcc_control__128_128_xxx:
 		output->grph.rgb.max_uncompressed_blk_size = 128;
 		output->grph.rgb.max_compressed_blk_size = 128;
@@ -353,6 +355,7 @@ bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub,
 		output->grph.rgb.dcc_controls.dcc_128_128_uncontrained = 1;
 		output->grph.rgb.dcc_controls.dcc_256_128_128 = 1;
 		break;
+	case dcc_control__256_64:
 	case dcc_control__256_64_64:
 		output->grph.rgb.max_uncompressed_blk_size = 256;
 		output->grph.rgb.max_compressed_blk_size = 64;
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c
index b906db6e7355..7fb5523f9722 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c
@@ -866,6 +866,7 @@ static bool hubbub31_get_dcc_compression_cap(struct hubbub *hubbub,
 		return false;
 
 	switch (dcc_control) {
+	case dcc_control__256_256:
 	case dcc_control__256_256_xxx:
 		output->grph.rgb.max_uncompressed_blk_size = 256;
 		output->grph.rgb.max_compressed_blk_size = 256;
@@ -881,12 +882,14 @@ static bool hubbub31_get_dcc_compression_cap(struct hubbub *hubbub,
 		output->grph.rgb.dcc_controls.dcc_256_128_128 = 1;
 		break;
 	case dcc_control__256_64_64:
+	case dcc_control__256_64:
 		output->grph.rgb.max_uncompressed_blk_size = 256;
 		output->grph.rgb.max_compressed_blk_size = 64;
 		output->grph.rgb.independent_64b_blks = true;
 		output->grph.rgb.dcc_controls.dcc_256_64_64 = 1;
 		break;
 	case dcc_control__256_128_128:
+	case dcc_control__256_128:
 		output->grph.rgb.max_uncompressed_blk_size = 256;
 		output->grph.rgb.max_compressed_blk_size = 128;
 		output->grph.rgb.independent_64b_blks = false;
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c
index 054607c944a3..b9201b60bd41 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c
@@ -824,6 +824,285 @@ void hubbub401_det_request_size(
 		}
 	}
 }
+bool hubbub401_get_dcc_compression_cap(struct hubbub *hubbub,
+		const struct dc_dcc_surface_param *input,
+		struct dc_surface_dcc_cap *output)
+{
+	struct dc *dc = hubbub->ctx->dc;
+	/* DCN4_Programming_Guide_DCHUB.docx, Section 5.11.2.2 */
+	enum dcc_control dcc_control;
+	unsigned int plane0_bpe, plane1_bpe;
+	enum segment_order segment_order_horz, segment_order_vert;
+	enum segment_order p1_segment_order_horz, p1_segment_order_vert;
+	bool req128_horz_wc, req128_vert_wc;
+	unsigned int plane0_width = 0, plane0_height = 0, plane1_width = 0, plane1_height = 0;
+	bool p1_req128_horz_wc, p1_req128_vert_wc, is_dual_plane;
+
+	memset(output, 0, sizeof(*output));
+
+	if (dc->debug.disable_dcc == DCC_DISABLE)
+		return false;
+
+	switch (input->format) {
+	default:
+		is_dual_plane = false;
+
+		plane1_width = 0;
+		plane1_height = 0;
+
+		if (input->surface_size.width > 6144 + 16)
+			plane0_width = 6160;
+		else
+			plane0_width = input->surface_size.width;
+
+		if (input->surface_size.height > 6144 + 16)
+			plane0_height = 6160;
+		else
+			plane0_height = input->surface_size.height;
+
+		break;
+	case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+	case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+	case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+	case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+		is_dual_plane = true;
+
+		if (input->surface_size.width > 7680 + 16)
+			plane0_width = 7696;
+		else
+			plane0_width = input->surface_size.width;
+
+		if (input->surface_size.height > 4320 + 16)
+			plane0_height = 4336;
+		else
+			plane0_height = input->surface_size.height;
+
+		if (input->plane1_size.width > 7680 + 16)
+			plane1_width = 7696 / 2;
+		else
+			plane1_width = input->plane1_size.width;
+
+		if (input->plane1_size.height > 4320 + 16)
+			plane1_height = 4336 / 2;
+		else
+			plane1_height = input->plane1_size.height;
+
+		break;
+
+	case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
+		is_dual_plane = true;
+
+		if (input->surface_size.width > 5120 + 16)
+			plane0_width = 5136;
+		else
+			plane0_width = input->surface_size.width;
+
+		if (input->surface_size.height > 5120 + 16)
+			plane0_height = 5136;
+		else
+			plane0_height = input->surface_size.height;
+
+		if (input->plane1_size.width > 5120 + 16)
+			plane1_width = 5136;
+		else
+			plane1_width = input->plane1_size.width;
+
+		if (input->plane1_size.height > 5120 + 16)
+			plane1_height = 5136;
+		else
+			plane1_height = input->plane1_size.height;
+
+		break;
+	}
+
+	if (!hubbub->funcs->dcc_support_pixel_format_plane0_plane1(input->format,
+			&plane0_bpe, &plane1_bpe))
+		return false;
+
+	/* Find plane0 DCC Controls */
+	if (!is_dual_plane) {
+
+		if (!hubbub->funcs->dcc_support_swizzle_addr3(input->swizzle_mode_addr3,
+				input->plane0_pitch, plane0_bpe,
+				&segment_order_horz, &segment_order_vert))
+			return false;
+
+		hubbub401_det_request_size(TO_DCN20_HUBBUB(hubbub)->detile_buf_size, input->format,
+				plane0_height, plane0_width, plane0_bpe,
+				plane1_height, plane1_width, plane1_bpe,
+				&req128_horz_wc, &req128_vert_wc, &p1_req128_horz_wc, &p1_req128_vert_wc);
+
+		if (!req128_horz_wc && !req128_vert_wc) {
+			dcc_control = dcc_control__256_256;
+		} else if (input->scan == SCAN_DIRECTION_HORIZONTAL) {
+			if (!req128_horz_wc)
+				dcc_control = dcc_control__256_256;
+			else if (segment_order_horz == segment_order__contiguous)
+				dcc_control = dcc_control__256_128;
+			else
+				dcc_control = dcc_control__256_64;
+		} else if (input->scan == SCAN_DIRECTION_VERTICAL) {
+			if (!req128_vert_wc)
+				dcc_control = dcc_control__256_256;
+			else if (segment_order_vert == segment_order__contiguous)
+				dcc_control = dcc_control__256_128;
+			else
+				dcc_control = dcc_control__256_64;
+		} else {
+			if ((req128_horz_wc &&
+				segment_order_horz == segment_order__non_contiguous) ||
+				(req128_vert_wc &&
+				segment_order_vert == segment_order__non_contiguous))
+				/* access_dir not known, must use most constraining */
+				dcc_control = dcc_control__256_64;
+			else
+				/* req128 is true for either horz and vert
+				 * but segment_order is contiguous
+				 */
+				dcc_control = dcc_control__256_128;
+		}
+
+		if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE &&
+			dcc_control != dcc_control__256_256)
+			return false;
+
+		switch (dcc_control) {
+		case dcc_control__256_256:
+			output->grph.rgb.dcc_controls.dcc_256_256 = 1;
+			output->grph.rgb.dcc_controls.dcc_256_128 = 1;
+			output->grph.rgb.dcc_controls.dcc_256_64 = 1;
+			break;
+		case dcc_control__256_128:
+			output->grph.rgb.dcc_controls.dcc_256_128 = 1;
+			output->grph.rgb.dcc_controls.dcc_256_64 = 1;
+			break;
+		case dcc_control__256_64:
+			output->grph.rgb.dcc_controls.dcc_256_64 = 1;
+			break;
+		default:
+			/* Shouldnt get here */
+			ASSERT(0);
+			break;
+		}
+	} else {
+		/* For dual plane cases, need to examine both planes together */
+		if (!hubbub->funcs->dcc_support_swizzle_addr3(input->swizzle_mode_addr3,
+				input->plane0_pitch, plane0_bpe,
+				&segment_order_horz, &segment_order_vert))
+			return false;
+
+		if (!hubbub->funcs->dcc_support_swizzle_addr3(input->swizzle_mode_addr3,
+			input->plane1_pitch, plane1_bpe,
+			&p1_segment_order_horz, &p1_segment_order_vert))
+			return false;
+
+		hubbub401_det_request_size(TO_DCN20_HUBBUB(hubbub)->detile_buf_size, input->format,
+				plane0_height, plane0_width, plane0_bpe,
+				plane1_height, plane1_width, plane1_bpe,
+				&req128_horz_wc, &req128_vert_wc, &p1_req128_horz_wc, &p1_req128_vert_wc);
+
+		/* Determine Plane 0 DCC Controls */
+		if (!req128_horz_wc && !req128_vert_wc) {
+			dcc_control = dcc_control__256_256;
+		} else if (input->scan == SCAN_DIRECTION_HORIZONTAL) {
+			if (!req128_horz_wc)
+				dcc_control = dcc_control__256_256;
+			else if (segment_order_horz == segment_order__contiguous)
+				dcc_control = dcc_control__256_128;
+			else
+				dcc_control = dcc_control__256_64;
+		} else if (input->scan == SCAN_DIRECTION_VERTICAL) {
+			if (!req128_vert_wc)
+				dcc_control = dcc_control__256_256;
+			else if (segment_order_vert == segment_order__contiguous)
+				dcc_control = dcc_control__256_128;
+			else
+				dcc_control = dcc_control__256_64;
+		} else {
+			if ((req128_horz_wc &&
+				segment_order_horz == segment_order__non_contiguous) ||
+				(req128_vert_wc &&
+				segment_order_vert == segment_order__non_contiguous))
+				/* access_dir not known, must use most constraining */
+				dcc_control = dcc_control__256_64;
+			else
+				/* req128 is true for either horz and vert
+				 * but segment_order is contiguous
+				 */
+				dcc_control = dcc_control__256_128;
+		}
+
+		switch (dcc_control) {
+		case dcc_control__256_256:
+			output->video.luma.dcc_controls.dcc_256_256 = 1;
+			output->video.luma.dcc_controls.dcc_256_128 = 1;
+			output->video.luma.dcc_controls.dcc_256_64 = 1;
+			break;
+		case dcc_control__256_128:
+			output->video.luma.dcc_controls.dcc_256_128 = 1;
+			output->video.luma.dcc_controls.dcc_256_64 = 1;
+			break;
+		case dcc_control__256_64:
+			output->video.luma.dcc_controls.dcc_256_64 = 1;
+			break;
+		default:
+			ASSERT(0);
+			break;
+		}
+
+		/* Determine Plane 1 DCC Controls */
+		if (!p1_req128_horz_wc && !p1_req128_vert_wc) {
+			dcc_control = dcc_control__256_256;
+		} else if (input->scan == SCAN_DIRECTION_HORIZONTAL) {
+			if (!p1_req128_horz_wc)
+				dcc_control = dcc_control__256_256;
+			else if (p1_segment_order_horz == segment_order__contiguous)
+				dcc_control = dcc_control__256_128;
+			else
+				dcc_control = dcc_control__256_64;
+		} else if (input->scan == SCAN_DIRECTION_VERTICAL) {
+			if (!p1_req128_vert_wc)
+				dcc_control = dcc_control__256_256;
+			else if (p1_segment_order_vert == segment_order__contiguous)
+				dcc_control = dcc_control__256_128;
+			else
+				dcc_control = dcc_control__256_64;
+		} else {
+			if ((p1_req128_horz_wc &&
+				p1_segment_order_horz == segment_order__non_contiguous) ||
+				(p1_req128_vert_wc &&
+				p1_segment_order_vert == segment_order__non_contiguous))
+				/* access_dir not known, must use most constraining */
+				dcc_control = dcc_control__256_64;
+			else
+				/* req128 is true for either horz and vert
+				 * but segment_order is contiguous
+				 */
+				dcc_control = dcc_control__256_128;
+		}
+
+		switch (dcc_control) {
+		case dcc_control__256_256:
+			output->video.chroma.dcc_controls.dcc_256_256 = 1;
+			output->video.chroma.dcc_controls.dcc_256_128 = 1;
+			output->video.chroma.dcc_controls.dcc_256_64 = 1;
+			break;
+		case dcc_control__256_128:
+			output->video.chroma.dcc_controls.dcc_256_128 = 1;
+			output->video.chroma.dcc_controls.dcc_256_64 = 1;
+			break;
+		case dcc_control__256_64:
+			output->video.chroma.dcc_controls.dcc_256_64 = 1;
+			break;
+		default:
+			ASSERT(0);
+			break;
+		}
+	}
+
+	output->capable = true;
+	return true;
+}
 
 static void dcn401_program_det_segments(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg)
 {
@@ -891,6 +1170,7 @@ static const struct hubbub_funcs hubbub4_01_funcs = {
 	.init_vm_ctx = hubbub2_init_vm_ctx,
 	.dcc_support_swizzle_addr3 = hubbub401_dcc_support_swizzle,
 	.dcc_support_pixel_format_plane0_plane1 = hubbub401_dcc_support_pixel_format,
+	.get_dcc_compression_cap = hubbub401_get_dcc_compression_cap,
 	.wm_read_state = hubbub401_wm_read_state,
 	.get_dchub_ref_freq = hubbub2_get_dchub_ref_freq,
 	.program_watermarks = hubbub401_program_watermarks,
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h
index d8a57f64a70c..f35f19ba3e18 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h
@@ -180,6 +180,11 @@ void hubbub401_det_request_size(
 		bool *p0_req128_vert_wc,
 		bool *p1_req128_horz_wc,
 		bool *p1_req128_vert_wc);
+bool hubbub401_get_dcc_compression_cap(
+		struct hubbub *hubbub,
+		const struct dc_dcc_surface_param *input,
+		struct dc_surface_dcc_cap *output);
+
 void hubbub401_construct(struct dcn20_hubbub *hubbub2,
 	struct dc_context *ctx,
 	const struct dcn_hubbub_registers *hubbub_regs,
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h
index ecc0a2f37938..18e194507e36 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h
@@ -175,6 +175,8 @@
 	uint32_t HUBP_3DLUT_ADDRESS_LOW;\
 	uint32_t HUBP_3DLUT_CONTROL;\
 	uint32_t HUBP_3DLUT_DLG_PARAM;\
+	uint32_t DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE;\
+	uint32_t DCHUBP_MCACHEID_CONFIG
 
 #define DCN2_HUBP_REG_FIELD_VARIABLE_LIST(type) \
 	DCN_HUBP_REG_FIELD_BASE_LIST(type); \
@@ -269,6 +271,18 @@
 	type HUBP_3DLUT_ADDRESS_HIGH;\
 	type HUBP_3DLUT_ADDRESS_LOW;\
 	type REFCYC_PER_3DLUT_GROUP;\
+	type VIEWPORT_MCACHE_SPLIT_COORDINATE;\
+	type VIEWPORT_MCACHE_SPLIT_COORDINATE_C;\
+	type MCACHEID_REG_READ_1H_P0;\
+	type MCACHEID_REG_READ_2H_P0;\
+	type MCACHEID_REG_READ_1H_P1;\
+	type MCACHEID_REG_READ_2H_P1;\
+	type MCACHEID_MALL_PREF_1H_P0;\
+	type MCACHEID_MALL_PREF_2H_P0;\
+	type MCACHEID_MALL_PREF_1H_P1;\
+	type MCACHEID_MALL_PREF_2H_P1
+
+
 
 struct dcn_hubp2_registers {
 	DCN401_HUBP_REG_COMMON_VARIABLE_LIST;
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c
index 6692d57d5cce..40465fd0bcc0 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c
@@ -626,6 +626,26 @@ void hubp401_set_viewport(
 		  SEC_VIEWPORT_Y_START_C, viewport_c->y);
 }
 
+void hubp401_program_mcache_id_and_split_coordinate(
+	struct hubp *hubp,
+	struct dml2_hubp_pipe_mcache_regs *mcache_regs)
+{
+	struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+	REG_SET_8(DCHUBP_MCACHEID_CONFIG, 0,
+		MCACHEID_REG_READ_1H_P0, mcache_regs->main.p0.mcache_id_first,
+		MCACHEID_REG_READ_2H_P0, mcache_regs->main.p0.mcache_id_second,
+		MCACHEID_REG_READ_1H_P1, mcache_regs->main.p1.mcache_id_first,
+		MCACHEID_REG_READ_2H_P1, mcache_regs->main.p1.mcache_id_second,
+		MCACHEID_MALL_PREF_1H_P0, mcache_regs->mall.p0.mcache_id_first,
+		MCACHEID_MALL_PREF_2H_P0, mcache_regs->mall.p0.mcache_id_second,
+		MCACHEID_MALL_PREF_1H_P1, mcache_regs->mall.p1.mcache_id_first,
+		MCACHEID_MALL_PREF_2H_P1, mcache_regs->mall.p1.mcache_id_second);
+
+	REG_SET_2(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, 0,
+		VIEWPORT_MCACHE_SPLIT_COORDINATE, mcache_regs->main.p0.split_location,
+		VIEWPORT_MCACHE_SPLIT_COORDINATE_C, mcache_regs->main.p1.split_location);
+}
 void hubp401_set_flip_int(struct hubp *hubp)
 {
 	struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
@@ -993,6 +1013,7 @@ static struct hubp_funcs dcn401_hubp_funcs = {
 	.phantom_hubp_post_enable = hubp32_phantom_hubp_post_enable,
 	.hubp_update_mall_sel = hubp401_update_mall_sel,
 	.hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering,
+	.hubp_program_mcache_id_and_split_coordinate = hubp401_program_mcache_id_and_split_coordinate,
 	.hubp_update_3dlut_fl_bias_scale = hubp401_update_3dlut_fl_bias_scale,
 	.hubp_program_3dlut_fl_mode = hubp401_program_3dlut_fl_mode,
 	.hubp_program_3dlut_fl_format = hubp401_program_3dlut_fl_format,
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h
index e0cec898a2c0..e52fdb5b0cd0 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h
@@ -243,6 +243,16 @@
 	HUBP_SF(CURSOR0_0_HUBP_3DLUT_ADDRESS_HIGH, HUBP_3DLUT_ADDRESS_HIGH, mask_sh),\
 	HUBP_SF(CURSOR0_0_HUBP_3DLUT_ADDRESS_LOW, HUBP_3DLUT_ADDRESS_LOW, mask_sh),\
 	HUBP_SF(CURSOR0_0_HUBP_3DLUT_DLG_PARAM, REFCYC_PER_3DLUT_GROUP, mask_sh),\
+	HUBP_SF(HUBP0_DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, VIEWPORT_MCACHE_SPLIT_COORDINATE, mask_sh),\
+	HUBP_SF(HUBP0_DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, VIEWPORT_MCACHE_SPLIT_COORDINATE_C, mask_sh),\
+	HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_REG_READ_1H_P0, mask_sh),\
+	HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_REG_READ_2H_P0, mask_sh),\
+	HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_REG_READ_1H_P1, mask_sh),\
+	HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_REG_READ_2H_P1, mask_sh),\
+	HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_1H_P0, mask_sh),\
+	HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_2H_P0, mask_sh),\
+	HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_1H_P1, mask_sh),\
+	HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_2H_P1, mask_sh)
 
 void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor);
 
@@ -302,7 +312,9 @@ void hubp401_program_surface_config(
 void hubp401_set_viewport(struct hubp *hubp,
 		const struct rect *viewport,
 		const struct rect *viewport_c);
-
+void hubp401_program_mcache_id_and_split_coordinate(
+	struct hubp *hubp,
+	struct dml2_hubp_pipe_mcache_regs *mcache_regs);
 void hubp401_set_flip_int(struct hubp *hubp);
 
 bool hubp401_in_blank(struct hubp *hubp);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 82d1ded09561..0aaa5887ea64 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -1753,6 +1753,10 @@ static void dcn20_update_dchubp_dpp(
 			&pipe_ctx->plane_res.scl_data.viewport_c);
 		viewport_changed = true;
 	}
+		if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate)
+			hubp->funcs->hubp_program_mcache_id_and_split_coordinate(
+				hubp,
+				&pipe_ctx->mcache_regs);
 
 	/* Any updates are handled in dc interface, just need to apply existing for plane enable */
 	if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed ||
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
index 7a456618f313..43e73b436f29 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
@@ -1408,6 +1408,31 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable)
 	return true;
 }
 
+void dcn401_wait_for_dcc_meta_propagation(const struct dc *dc,
+		const struct pipe_ctx *top_pipe)
+{
+	bool is_wait_needed = false;
+	const struct pipe_ctx *pipe_ctx = top_pipe;
+
+	/* check if any surfaces are updating address while using flip immediate and dcc */
+	while (pipe_ctx != NULL) {
+		if (pipe_ctx->plane_state &&
+				pipe_ctx->plane_state->dcc.enable &&
+				pipe_ctx->plane_state->flip_immediate &&
+				pipe_ctx->plane_state->update_flags.bits.addr_update) {
+			is_wait_needed = true;
+			break;
+		}
+
+		/* check next pipe */
+		pipe_ctx = pipe_ctx->bottom_pipe;
+	}
+
+	if (is_wait_needed && dc->debug.dcc_meta_propagation_delay_us > 0) {
+		udelay(dc->debug.dcc_meta_propagation_delay_us);
+	}
+}
+
 void dcn401_prepare_bandwidth(struct dc *dc,
 	struct dc_state *context)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
index bada43d4b2eb..c1d4287d5a0d 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
@@ -61,6 +61,8 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable);
 
 struct ips_ono_region_state dcn401_read_ono_state(struct dc *dc,
 						  uint8_t region);
+void dcn401_wait_for_dcc_meta_propagation(const struct dc *dc,
+		const struct pipe_ctx *top_pipe_to_program);
 
 void dcn401_prepare_bandwidth(struct dc *dc,
 		struct dc_state *context);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c
index 1cf0608e1980..90cf284fdd98 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c
@@ -94,6 +94,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = {
 	.update_dsc_pg = dcn32_update_dsc_pg,
 	.apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom,
 	.blank_phantom = dcn32_blank_phantom,
+	.wait_for_dcc_meta_propagation = dcn401_wait_for_dcc_meta_propagation,
 	.is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless,
 	.fams2_global_control_lock = dcn401_fams2_global_control_lock,
 	.fams2_update_config = dcn401_fams2_update_config,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
index e9b85884edce..d05be65a2256 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
@@ -141,6 +141,11 @@ struct subvp_save_surf_addr {
 	uint8_t subvp_index;
 };
 
+struct wait_for_dcc_meta_propagation_params {
+	const struct dc *dc;
+	const struct pipe_ctx *top_pipe_to_program;
+};
+
 struct fams2_global_control_lock_fast_params {
 	struct dc *dc;
 	bool is_required;
@@ -165,6 +170,7 @@ union block_sequence_params {
 	struct set_output_csc_params set_output_csc_params;
 	struct set_ocsc_default_params set_ocsc_default_params;
 	struct subvp_save_surf_addr subvp_save_surf_addr;
+	struct wait_for_dcc_meta_propagation_params wait_for_dcc_meta_propagation_params;
 	struct fams2_global_control_lock_fast_params fams2_global_control_lock_fast_params;
 };
 
@@ -186,6 +192,7 @@ enum block_sequence_func {
 	MPC_SET_OUTPUT_CSC,
 	MPC_SET_OCSC_DEFAULT,
 	DMUB_SUBVP_SAVE_SURF_ADDR,
+	HUBP_WAIT_FOR_DCC_META_PROP,
 	DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST,
 
 };
@@ -443,6 +450,8 @@ struct hw_sequencer_funcs {
 	bool (*is_pipe_topology_transition_seamless)(struct dc *dc,
 			const struct dc_state *cur_ctx,
 			const struct dc_state *new_ctx);
+	void (*wait_for_dcc_meta_propagation)(const struct dc *dc,
+		const struct pipe_ctx *top_pipe_to_program);
 	void (*fams2_global_control_lock)(struct dc *dc,
 			struct dc_state *context,
 			bool lock);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index f58c27ad8b3e..4c8e6436c7e1 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -97,6 +97,9 @@ struct resource_funcs {
 	unsigned int (*calculate_mall_ways_from_bytes)(
 				const struct dc *dc,
 				unsigned int total_size_in_mall_bytes);
+	void (*prepare_mcache_programming)(
+					struct dc *dc,
+					struct dc_state *context);
 	/**
 	 * @populate_dml_pipes - Populate pipe data struct
 	 *
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
index a73cb8f731b3..dd2b2864876c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
@@ -40,6 +40,10 @@ enum dcc_control {
 	dcc_control__128_128_xxx,
 	dcc_control__256_64_64,
 	dcc_control__256_128_128,
+	dcc_control__256_256,
+	dcc_control__256_128,
+	dcc_control__256_64,
+
 };
 
 enum segment_order {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
index bcd7b22a1627..16580d624278 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
@@ -257,6 +257,7 @@ struct hubp_funcs {
 			unsigned int min_dst_y_next_start_optimized);
 
 	void (*hubp_wait_pipe_read_start)(struct hubp *hubp);
+	void (*hubp_program_mcache_id_and_split_coordinate)(struct hubp *hubp, struct dml2_hubp_pipe_mcache_regs *mcache_regs);
 	void (*hubp_update_3dlut_fl_bias_scale)(struct hubp *hubp, uint16_t bias, uint16_t scale);
 	void (*hubp_program_3dlut_fl_mode)(struct hubp *hubp,
 			enum hubp_3dlut_fl_mode mode);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
index 74fb21b88f12..4e27d2cee9fb 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
@@ -1617,6 +1617,14 @@ bool dcn401_validate_bandwidth(struct dc *dc,
 	return out;
 }
 
+void dcn401_prepare_mcache_programming(struct dc *dc,
+		struct dc_state *context)
+{
+	if (dc->debug.using_dml21)
+		dml2_prepare_mcache_programming(dc, context,
+				context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2);
+}
+
 static void dcn401_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx)
 {
 	const struct dc_stream_state *stream = pipe_ctx->stream;
@@ -1699,6 +1707,7 @@ static struct resource_funcs dcn401_res_pool_funcs = {
 	.patch_unknown_plane_state = dcn401_patch_unknown_plane_state,
 	.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
 	.add_phantom_pipes = dcn32_add_phantom_pipes,
+	.prepare_mcache_programming = dcn401_prepare_mcache_programming,
 	.build_pipe_pix_clk_params = dcn401_build_pipe_pix_clk_params,
 	.calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes,
 };
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
index c04c8b8f2114..26efeada4f41 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
@@ -26,6 +26,8 @@ bool dcn401_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate);
 
+void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context);
+
 /* Following are definitions for run time init of reg offsets */
 
 /* HUBP */
-- 
2.45.2



More information about the amd-gfx mailing list