<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Arial;font-size:10pt;color:#0000FF;margin:5pt;" align="Left">
[AMD Official Use Only - General]<br>
</p>
<br>
<div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0); background-color: rgb(255, 255, 255);" class="elementToProof ContentPasted0">
Reviewed-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com><br>
</div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> amd-gfx <amd-gfx-bounces@lists.freedesktop.org> on behalf of Aurabindo Pillai <aurabindo.pillai@amd.com><br>
<b>Sent:</b> March 10, 2023 12:56 PM<br>
<b>To:</b> amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Cc:</b> Wentland, Harry <Harry.Wentland@amd.com>; Siqueira, Rodrigo <Rodrigo.Siqueira@amd.com>; Mahfooz, Hamza <Hamza.Mahfooz@amd.com><br>
<b>Subject:</b> Re: [PATCH 2/2] drm/amd/display: Enable FAMS for DCN3x</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText"><br>
<br>
On 3/10/23 12:48, Aurabindo Pillai wrote:<br>
> [Why&How]<br>
> Firmware Assisted Memclk Switching enables lowering mclk using DMCUB<br>
> when it cannot be normally done due to not having enough time within<br>
> vblank. FAMS extends vblank on monitors that support variable refresh<br>
> rate thereby allowing enough time to do an mclk switch sequence<br>
> during vblank.<br>
> <br>
> When tested with 4k@144Hz monitor on DCN32, power consumption of about<br>
> 40W was saved since multiple clocks like MCLK, SOCCLK, and FCLK<br>
> were brought down.<br>
> <br>
> Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com><br>
> Signed-off-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com><br>
> ---<br>
>   .../gpu/drm/amd/display/dc/dcn30/dcn30_optc.c |  7 +-<br>
>   .../drm/amd/display/dc/dcn30/dcn30_resource.h |  3 +<br>
>   .../drm/amd/display/dc/dcn31/dcn31_hwseq.c    |  4 ++<br>
>   .../drm/amd/display/dc/dcn32/dcn32_hwseq.c    |  2 +<br>
>   .../drm/amd/display/dc/dcn32/dcn32_resource.c |  2 +-<br>
>   .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.c  | 71 ++++++++++++++++---<br>
>   .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  |  5 ++<br>
>   7 files changed, 84 insertions(+), 10 deletions(-)<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c<br>
> index 08b92715e2e6..9963bffb1e07 100644<br>
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c<br>
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c<br>
> @@ -301,7 +301,12 @@ void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc)<br>
>   <br>
>   void optc3_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max)<br>
>   {<br>
> -     optc1_set_vtotal_min_max(optc, vtotal_min, vtotal_max);<br>
> +     struct dc *dc = optc->ctx->dc;<br>
> +<br>
> +     if (dc->caps.dmub_caps.mclk_sw && !dc->debug.disable_fams)<br>
> +             dc_dmub_srv_drr_update_cmd(dc, optc->inst, vtotal_min, vtotal_max);<br>
> +     else<br>
> +             optc1_set_vtotal_min_max(optc, vtotal_min, vtotal_max);<br>
>   }<br>
>   <br>
>   void optc3_tg_init(struct timing_generator *optc)<br>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h<br>
> index 8e6b8b7368fd..d8805618a9a1 100644<br>
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h<br>
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h<br>
> @@ -102,6 +102,9 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params<br>
>   <br>
>   bool dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);<br>
>   void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);<br>
> +<br>
> +void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);<br>
> +<br>
<br>
This is duplicate and will remove before applying.<br>
>   int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, struct dc_state *context,<br>
>                display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel);<br>
>   <br>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c<br>
> index 80a0c5a575a9..40080113ed5e 100644<br>
> --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c<br>
> +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c<br>
> @@ -295,6 +295,10 @@ void dcn31_init_hw(struct dc *dc)<br>
>        if (dc->res_pool->hubbub->funcs->init_crb)<br>
>                dc->res_pool->hubbub->funcs->init_crb(dc->res_pool->hubbub);<br>
>   #endif<br>
> +     /* Get DMCUB capabilities */<br>
> +     dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv->dmub);<br>
> +     dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr;<br>
> +     dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch;<br>
>   }<br>
>   <br>
>   void dcn31_dsc_pg_control(<br>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c<br>
> index f87db2271924..3220f9ad8a47 100644<br>
> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c<br>
> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c<br>
> @@ -919,6 +919,8 @@ void dcn32_init_hw(struct dc *dc)<br>
>        if (dc->ctx->dmub_srv) {<br>
>                dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv->dmub);<br>
>                dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr;<br>
> +             dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch;<br>
> +<br>
<br>
Will remove the extra newline before applying<br>
>        }<br>
>   }<br>
>   <br>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c<br>
> index 100b6df33b33..b1944e49a65d 100644<br>
> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c<br>
> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c<br>
> @@ -2013,7 +2013,7 @@ int dcn32_populate_dml_pipes_from_context(<br>
>        // In general cases we want to keep the dram clock change requirement<br>
>        // (prefer configs that support MCLK switch). Only override to false<br>
>        // for SubVP<br>
> -     if (subvp_in_use)<br>
> +     if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || subvp_in_use)<br>
>                context->bw_ctx.dml.soc.dram_clock_change_requirement_final = false;<br>
>        else<br>
>                context->bw_ctx.dml.soc.dram_clock_change_requirement_final = true;<br>
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c<br>
> index 4fa636364793..53f21b0b3630 100644<br>
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c<br>
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c<br>
> @@ -368,7 +368,9 @@ void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)<br>
>        dc_assert_fp_enabled();<br>
>   <br>
>        if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {<br>
> -             context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;<br>
> +             if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching ||<br>
> +                             context->bw_ctx.dml.soc.dram_clock_change_latency_us == 0)<br>
> +                     context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;<br>
>                context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;<br>
>                context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;<br>
>        }<br>
> @@ -384,9 +386,34 @@ void dcn30_fpu_calculate_wm_and_dlg(<br>
>        int i, pipe_idx;<br>
>        double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];<br>
>        bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;<br>
> +     unsigned int dummy_latency_index = 0;<br>
>   <br>
>        dc_assert_fp_enabled();<br>
>   <br>
> +     context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false;<br>
> +<br>
> +     if (!pstate_en) {<br>
> +             /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */<br>
> +             context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching =<br>
> +                     dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context);<br>
> +<br>
> +             if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {<br>
> +                     dummy_latency_index = dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(dc,<br>
> +                             context, pipes, pipe_cnt, vlevel);<br>
> +<br>
> +                     /* After calling dcn30_find_dummy_latency_index_for_fw_based_mclk_switch<br>
> +                      * we reinstate the original dram_clock_change_latency_us on the context<br>
> +                      * and all variables that may have changed up to this point, except the<br>
> +                      * newly found dummy_latency_index<br>
> +                      */<br>
> +                     context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;<br>
> +                     dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false, true);<br>
> +                     maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;<br>
> +                     dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];<br>
> +                     pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;<br>
> +             }<br>
> +     }<br>
> +<br>
>        if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)<br>
>                dcfclk = context->bw_ctx.dml.soc.min_dcfclk;<br>
>   <br>
> @@ -449,15 +476,29 @@ void dcn30_fpu_calculate_wm_and_dlg(<br>
>                unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;<br>
>                unsigned int min_dram_speed_mts_margin = 160;<br>
>   <br>
> -             if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported)<br>
> -                     min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16;<br>
> +             context->bw_ctx.dml.soc.dram_clock_change_latency_us =<br>
> +                     dc->clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us;<br>
>   <br>
> -             /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */<br>
> -             for (i = 3; i > 0; i--)<br>
> -                     if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)<br>
> -                             break;<br>
> +             if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] ==<br>
> +                     dm_dram_clock_change_unsupported) {<br>
> +                     int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1;<br>
> +<br>
> +                     min_dram_speed_mts =<br>
> +                             dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16;<br>
> +             }<br>
>   <br>
> -             context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;<br>
> +             if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {<br>
> +                     /* find largest table entry that is lower than dram speed,<br>
> +                      * but lower than DPM0 still uses DPM0<br>
> +                      */<br>
> +                     for (dummy_latency_index = 3; dummy_latency_index > 0; dummy_latency_index--)<br>
> +                             if (min_dram_speed_mts + min_dram_speed_mts_margin ><br>
> +                                     dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dram_speed_mts)<br>
> +                                     break;<br>
> +             }<br>
> +<br>
> +             context->bw_ctx.dml.soc.dram_clock_change_latency_us =<br>
> +                     dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;<br>
>   <br>
>                context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;<br>
>                context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;<br>
> @@ -520,6 +561,20 @@ void dcn30_fpu_calculate_wm_and_dlg(<br>
>                pipe_idx++;<br>
>        }<br>
>   <br>
> +     /* WA: restrict FW MCLK switch to use first non-strobe mode (Beige Goby BW issue) */<br>
> +     if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching &&<br>
> +                     dc->dml.soc.num_chans <= 4 &&<br>
> +                     context->bw_ctx.dml.vba.DRAMSpeed <= 1700 &&<br>
> +                     context->bw_ctx.dml.vba.DRAMSpeed >= 1500) {<br>
> +<br>
> +             for (i = 0; i < dc->dml.soc.num_states; i++) {<br>
> +                     if (dc->dml.soc.clock_limits[i].dram_speed_mts > 1700) {<br>
> +                             context->bw_ctx.dml.vba.DRAMSpeed = dc->dml.soc.clock_limits[i].dram_speed_mts;<br>
> +                             break;<br>
> +                     }<br>
> +             }<br>
> +     }<br>
> +<br>
>        dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);<br>
>   <br>
>        if (!pstate_en)<br>
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c<br>
> index 077674be452b..ee2683200799 100644<br>
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c<br>
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c<br>
> @@ -1331,6 +1331,11 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,<br>
>                        context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]<br>
>                                        != dm_dram_clock_change_unsupported;<br>
>   <br>
> +     /* Pstate change might not be supported by hardware, but it might be<br>
> +      * possible with firmware driven vertical blank stretching.<br>
> +      */<br>
> +     context->bw_ctx.bw.dcn.clk.p_state_change_support |= context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching;<br>
> +<br>
>        context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;<br>
>        context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);<br>
>        context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000;<br>
</div>
</span></font></div>
</div>
</body>
</html>