[PATCH 3/3] drm/amd/display: Update dcn30_apply_idle_power_optimizations() code

Kazlauskas, Nicholas nicholas.kazlauskas at amd.com
Tue Jan 19 22:10:01 UTC 2021


On 2021-01-19 3:38 p.m., Bhawanpreet Lakha wrote:
> Update the function for idle optimizations
> -remove hardcoded size
> -enable no memory-request case
> -add cursor copy
> -update mall eligibility check case
> 
> Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha at amd.com>
> Signed-off-by: Joshua Aberback <joshua.aberback at amd.com>

Series is:

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas at amd.com>

Though you might want to update patch 1's commit message to explain a 
little more detail about watermark set D.

Regards,
Nicholas Kazlauskas

> ---
>   drivers/gpu/drm/amd/display/dc/dc.h           |   2 +
>   .../drm/amd/display/dc/dcn30/dcn30_hwseq.c    | 157 +++++++++++++-----
>   .../amd/display/dc/dcn302/dcn302_resource.c   |   4 +-
>   .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |   5 +
>   4 files changed, 129 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
> index e21d4602e427..71d46ade24e5 100644
> --- a/drivers/gpu/drm/amd/display/dc/dc.h
> +++ b/drivers/gpu/drm/amd/display/dc/dc.h
> @@ -502,6 +502,8 @@ struct dc_debug_options {
>   #if defined(CONFIG_DRM_AMD_DC_DCN)
>   	bool disable_idle_power_optimizations;
>   	unsigned int mall_size_override;
> +	unsigned int mall_additional_timer_percent;
> +	bool mall_error_as_fatal;
>   #endif
>   	bool dmub_command_table; /* for testing only */
>   	struct dc_bw_validation_profile bw_val_profile;
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> index 5c546b06f551..dff83c6a142a 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> @@ -710,8 +710,11 @@ void dcn30_program_dmdata_engine(struct pipe_ctx *pipe_ctx)
>   bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
>   {
>   	union dmub_rb_cmd cmd;
> -	unsigned int surface_size, refresh_hz, denom;
>   	uint32_t tmr_delay = 0, tmr_scale = 0;
> +	struct dc_cursor_attributes cursor_attr;
> +	bool cursor_cache_enable = false;
> +	struct dc_stream_state *stream = NULL;
> +	struct dc_plane_state *plane = NULL;
>   
>   	if (!dc->ctx->dmub_srv)
>   		return false;
> @@ -722,72 +725,150 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
>   
>   			/* First, check no-memory-requests case */
>   			for (i = 0; i < dc->current_state->stream_count; i++) {
> -				if (dc->current_state->stream_status[i]
> -					    .plane_count)
> +				if (dc->current_state->stream_status[i].plane_count)
>   					/* Fail eligibility on a visible stream */
>   					break;
>   			}
>   
> -			if (dc->current_state->stream_count == 1 // single display only
> -			    && dc->current_state->stream_status[0].plane_count == 1 // single surface only
> -			    && dc->current_state->stream_status[0].plane_states[0]->address.page_table_base.quad_part == 0 // no VM
> -			    // Only 8 and 16 bit formats
> -			    && dc->current_state->stream_status[0].plane_states[0]->format <= SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F
> -			    && dc->current_state->stream_status[0].plane_states[0]->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB8888) {
> -				surface_size = dc->current_state->stream_status[0].plane_states[0]->plane_size.surface_pitch *
> -					dc->current_state->stream_status[0].plane_states[0]->plane_size.surface_size.height *
> -					(dc->current_state->stream_status[0].plane_states[0]->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ?
> -					 8 : 4);
> -			} else {
> -				// TODO: remove hard code size
> -				surface_size = 128 * 1024 * 1024;
> +			if (i == dc->current_state->stream_count) {
> +				/* Enable no-memory-requests case */
> +				memset(&cmd, 0, sizeof(cmd));
> +				cmd.mall.header.type = DMUB_CMD__MALL;
> +				cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_NO_DF_REQ;
> +				cmd.mall.header.payload_bytes = sizeof(cmd.mall) - sizeof(cmd.mall.header);
> +
> +				dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
> +				dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
> +
> +				return true;
>   			}
>   
> -			// TODO: remove hard code size
> -			if (surface_size < 128 * 1024 * 1024) {
> -				refresh_hz = div_u64((unsigned long long) dc->current_state->streams[0]->timing.pix_clk_100hz *
> -						     100LL,
> -						     (dc->current_state->streams[0]->timing.v_total *
> -						      dc->current_state->streams[0]->timing.h_total));
> +			stream = dc->current_state->streams[0];
> +			plane = (stream ? dc->current_state->stream_status[0].plane_states[0] : NULL);
> +
> +			if (stream && plane) {
> +				cursor_cache_enable = stream->cursor_position.enable &&
> +						plane->address.grph.cursor_cache_addr.quad_part;
> +				cursor_attr = stream->cursor_attributes;
> +			}
> +
> +			/*
> +			 * Second, check MALL eligibility
> +			 *
> +			 * single display only, single surface only, 8 and 16 bit formats only, no VM,
> +			 * do not use MALL for displays that support PSR as they use D0i3.2 in DMCUB FW
> +			 *
> +			 * TODO: When we implement multi-display, PSR displays will be allowed if there is
> +			 * a non-PSR display present, since in that case we can't do D0i3.2
> +			 */
> +			if (dc->current_state->stream_count == 1 &&
> +					stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED &&
> +					dc->current_state->stream_status[0].plane_count == 1 &&
> +					plane->format <= SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F &&
> +					plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB8888 &&
> +					plane->address.page_table_base.quad_part == 0 &&
> +					dc->hwss.does_plane_fit_in_mall &&
> +					dc->hwss.does_plane_fit_in_mall(dc, plane,
> +							cursor_cache_enable ? &cursor_attr : NULL)) {
> +				unsigned int v_total = stream->adjust.v_total_max ?
> +						stream->adjust.v_total_max : stream->timing.v_total;
> +				unsigned int refresh_hz = (unsigned long long) stream->timing.pix_clk_100hz *
> +						100LL /	(v_total * stream->timing.h_total);
>   
>   				/*
> -				 * Delay_Us = 65.28 * (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
> -				 * Delay_Us / 65.28 = (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
> -				 * (Delay_Us / 65.28) / 2^MallFrameCacheTmrScale = 64 + MallFrameCacheTmrDly
> -				 * MallFrameCacheTmrDly = ((Delay_Us / 65.28) / 2^MallFrameCacheTmrScale) - 64
> -				 *                      = (1000000 / refresh) / 65.28 / 2^MallFrameCacheTmrScale - 64
> -				 *                      = 1000000 / (refresh * 65.28 * 2^MallFrameCacheTmrScale) - 64
> -				 *                      = (1000000 * 100) / (refresh * 6528 * 2^MallFrameCacheTmrScale) - 64
> +				 * one frame time in microsec:
> +				 * Delay_Us = 1000000 / refresh
> +				 * dynamic_delay_us = 1000000 / refresh + 2 * stutter_period
> +				 *
> +				 * one frame time modified by 'additional timer percent' (p):
> +				 * Delay_Us_modified = dynamic_delay_us + dynamic_delay_us * p / 100
> +				 *                   = dynamic_delay_us * (1 + p / 100)
> +				 *                   = (1000000 / refresh + 2 * stutter_period) * (100 + p) / 100
> +				 *                   = (1000000 + 2 * stutter_period * refresh) * (100 + p) / (100 * refresh)
> +				 *
> +				 * formula for timer duration based on parameters, from regspec:
> +				 * dynamic_delay_us = 65.28 * (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
> +				 *
> +				 * dynamic_delay_us / 65.28 = (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
> +				 * (dynamic_delay_us / 65.28) / 2^MallFrameCacheTmrScale = 64 + MallFrameCacheTmrDly
> +				 * MallFrameCacheTmrDly = ((dynamic_delay_us / 65.28) / 2^MallFrameCacheTmrScale) - 64
> +				 *                      = (1000000 + 2 * stutter_period * refresh) * (100 + p) / (100 * refresh) / 65.28 / 2^MallFrameCacheTmrScale - 64
> +				 *                      = (1000000 + 2 * stutter_period * refresh) * (100 + p) / (refresh * 6528 * 2^MallFrameCacheTmrScale) - 64
>   				 *
>   				 * need to round up the result of the division before the subtraction
>   				 */
> -				denom = refresh_hz * 6528;
> -				tmr_delay = div_u64((100000000LL + denom - 1), denom) - 64LL;
> +				unsigned int denom = refresh_hz * 6528;
> +				unsigned int stutter_period = dc->current_state->perf_params.stutter_period_us;
> +
> +				tmr_delay = (((1000000LL + 2 * stutter_period * refresh_hz) *
> +						(100LL + dc->debug.mall_additional_timer_percent) + denom - 1) /
> +						denom) - 64LL;
>   
>   				/* scale should be increased until it fits into 6 bits */
>   				while (tmr_delay & ~0x3F) {
>   					tmr_scale++;
>   
>   					if (tmr_scale > 3) {
> -						/* The delay exceeds the range of the hystersis timer */
> +						/* Delay exceeds range of hysteresis timer */
>   						ASSERT(false);
>   						return false;
>   					}
>   
>   					denom *= 2;
> -					tmr_delay = div_u64((100000000LL + denom - 1), denom) - 64LL;
> +					tmr_delay = (((1000000LL + 2 * stutter_period * refresh_hz) *
> +							(100LL + dc->debug.mall_additional_timer_percent) + denom - 1) /
> +							denom) - 64LL;
> +				}
> +
> +				/* Copy HW cursor */
> +				if (cursor_cache_enable) {
> +					memset(&cmd, 0, sizeof(cmd));
> +					cmd.mall.header.type = DMUB_CMD__MALL;
> +					cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_COPY_CURSOR;
> +					cmd.mall.header.payload_bytes =
> +							sizeof(cmd.mall) - sizeof(cmd.mall.header);
> +
> +					switch (cursor_attr.color_format) {
> +					case CURSOR_MODE_MONO:
> +						cmd.mall.cursor_bpp = 2;
> +						break;
> +					case CURSOR_MODE_COLOR_1BIT_AND:
> +					case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
> +					case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
> +						cmd.mall.cursor_bpp = 32;
> +						break;
> +
> +					case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
> +					case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
> +						cmd.mall.cursor_bpp = 64;
> +						break;
> +					}
> +
> +					cmd.mall.cursor_copy_src.quad_part = cursor_attr.address.quad_part;
> +					cmd.mall.cursor_copy_dst.quad_part =
> +							plane->address.grph.cursor_cache_addr.quad_part;
> +					cmd.mall.cursor_width = cursor_attr.width;
> +					cmd.mall.cursor_height = cursor_attr.height;
> +					cmd.mall.cursor_pitch = cursor_attr.pitch;
> +
> +					dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
> +					dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
> +					dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
> +
> +					/* Use copied cursor, and it's okay to not switch back */
> +					cursor_attr.address.quad_part =
> +							plane->address.grph.cursor_cache_addr.quad_part;
> +					dc_stream_set_cursor_attributes(stream, &cursor_attr);
>   				}
>   
>   				/* Enable MALL */
>   				memset(&cmd, 0, sizeof(cmd));
>   				cmd.mall.header.type = DMUB_CMD__MALL;
> -				cmd.mall.header.sub_type =
> -					DMUB_CMD__MALL_ACTION_ALLOW;
> -				cmd.mall.header.payload_bytes =
> -					sizeof(cmd.mall) -
> -					sizeof(cmd.mall.header);
> +				cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_ALLOW;
> +				cmd.mall.header.payload_bytes = sizeof(cmd.mall) - sizeof(cmd.mall.header);
>   				cmd.mall.tmr_delay = tmr_delay;
>   				cmd.mall.tmr_scale = tmr_scale;
> +				cmd.mall.debug_bits = dc->debug.mall_error_as_fatal;
>   
>   				dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
>   				dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
> index b96b32a37178..b4ff73e32279 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
> @@ -1316,7 +1316,9 @@ static bool dcn302_resource_construct(
>   	dc->caps.max_cursor_size = 256;
>   	dc->caps.min_horizontal_blanking_period = 80;
>   	dc->caps.dmdata_alloc_size = 2048;
> -
> +	dc->caps.mall_size_per_mem_channel = 4;
> +	/* total size = mall per channel * num channels * 1024 * 1024 */
> +	dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576;
>   	dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8;
>   	dc->caps.max_slave_planes = 1;
>   	dc->caps.post_blend_color_processing = true;
> diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
> index a50bdd6c7131..98dd8b5f463c 100644
> --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
> +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
> @@ -458,6 +458,10 @@ struct dmub_rb_cmd_mall {
>   	uint16_t cursor_pitch;
>   	uint16_t cursor_height;
>   	uint8_t cursor_bpp;
> +	uint8_t debug_bits;
> +
> +	uint8_t reserved1;
> +	uint8_t reserved2;
>   };
>   
>   struct dmub_cmd_digx_encoder_control_data {
> @@ -624,6 +628,7 @@ enum dmub_cmd_mall_type {
>   	DMUB_CMD__MALL_ACTION_ALLOW = 0,
>   	DMUB_CMD__MALL_ACTION_DISALLOW = 1,
>   	DMUB_CMD__MALL_ACTION_COPY_CURSOR = 2,
> +	DMUB_CMD__MALL_ACTION_NO_DF_REQ = 3,
>   };
>   
>   struct dmub_cmd_psr_copy_settings_data {
> 



More information about the amd-gfx mailing list