<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Arial;font-size:10pt;color:#0000FF;margin:5pt;" align="Left">
[AMD Official Use Only - General]<br>
</p>
<br>
<div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0); background-color: rgb(255, 255, 255);" class="elementToProof">
Please remove the file "<span style="color: rgb(0, 0, 0); font-family: "Segoe UI", "Segoe UI Web (West European)", "Segoe UI", -apple-system, BlinkMacSystemFont, Roboto, "Helvetica Neue", sans-serif; font-size: 14.6667px; background-color: rgb(255, 255, 255); display: inline !important;">/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c.rej"
if it's not necessary.</span></div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0); background-color: rgb(255, 255, 255);" class="elementToProof">
<span style="color: rgb(0, 0, 0); font-family: "Segoe UI", "Segoe UI Web (West European)", "Segoe UI", -apple-system, BlinkMacSystemFont, Roboto, "Helvetica Neue", sans-serif; font-size: 14.6667px; background-color: rgb(255, 255, 255); display: inline !important;"><br>
</span></div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0); background-color: rgb(255, 255, 255);" class="elementToProof">
<span style="color: rgb(0, 0, 0); font-family: "Segoe UI", "Segoe UI Web (West European)", "Segoe UI", -apple-system, BlinkMacSystemFont, Roboto, "Helvetica Neue", sans-serif; font-size: 14.6667px; background-color: rgb(255, 255, 255); display: inline !important;">Thanks,</span></div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0); background-color: rgb(255, 255, 255);" class="elementToProof">
<span style="color: rgb(0, 0, 0); font-family: "Segoe UI", "Segoe UI Web (West European)", "Segoe UI", -apple-system, BlinkMacSystemFont, Roboto, "Helvetica Neue", sans-serif; font-size: 14.6667px; background-color: rgb(255, 255, 255); display: inline !important;">Wayne</span></div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> Kotarac, Pavle <Pavle.Kotarac@amd.com><br>
<b>Sent:</b> Thursday, September 8, 2022 01:32<br>
<b>To:</b> amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Cc:</b> Wentland, Harry <Harry.Wentland@amd.com>; Li, Sun peng (Leo) <Sunpeng.Li@amd.com>; Lakha, Bhawanpreet <Bhawanpreet.Lakha@amd.com>; Siqueira, Rodrigo <Rodrigo.Siqueira@amd.com>; Pillai, Aurabindo <Aurabindo.Pillai@amd.com>; Zhuo, Qingqing (Lillian)
<Qingqing.Zhuo@amd.com>; Li, Roman <Roman.Li@amd.com>; Lin, Wayne <Wayne.Lin@amd.com>; Wang, Chao-kai (Stylon) <Stylon.Wang@amd.com>; Chiu, Solomon <Solomon.Chiu@amd.com>; Kotarac, Pavle <Pavle.Kotarac@amd.com>; Gutierrez, Agustin <Agustin.Gutierrez@amd.com>;
Cyr, Aric <Aric.Cyr@amd.com>; Stupar, Nevenko <Nevenko.Stupar@amd.com>; Kotarac, Pavle <Pavle.Kotarac@amd.com><br>
<b>Subject:</b> [PATCH 02/27] drm/amd/display: Optimizations for DML math</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">From: Aric Cyr <aric.cyr@amd.com><br>
<br>
[why]<br>
Conditionals in the DML basic math functions significantly impact mode<br>
enumeration.<br>
<br>
[how]<br>
Remove conditionals for floor/ceil operations which are used frequently<br>
in DML and add an assertion for invalid callers using zero granuality.<br>
Fix existing callers that rely on 0 granularity.<br>
<br>
Reviewed-by: Nevenko Stupar <Nevenko.Stupar@amd.com><br>
Acked-by: Pavle Kotarac <Pavle.Kotarac@amd.com><br>
Signed-off-by: Aric Cyr <aric.cyr@amd.com><br>
---<br>
.../amd/display/dc/dml/calcs/dcn_calc_auto.c | 22 ++++++-------<br>
.../amd/display/dc/dml/calcs/dcn_calc_math.c | 16 +++++-----<br>
.../dc/dml/dcn20/display_mode_vba_20v2.c | 10 +++---<br>
.../dc/dml/dcn21/display_mode_vba_21.c | 6 ++--<br>
.../dc/dml/dcn30/display_mode_vba_30.c | 8 ++---<br>
.../dc/dml/dcn31/display_mode_vba_31.c | 6 ++--<br>
.../dc/dml/dcn314/display_mode_vba_314.c | 6 ++--<br>
.../dc/dml/dcn32/display_mode_vba_util_32.c | 31 +++++++++----------<br>
.../dml/dcn32/display_mode_vba_util_32.c.rej | 12 +++++++<br>
.../drm/amd/display/dc/dml/dml_inline_defs.h | 9 ++----<br>
10 files changed, 65 insertions(+), 61 deletions(-)<br>
create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c.rej<br>
<br>
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c<br>
index 41284e263325..288d22a16cf2 100644<br>
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c<br>
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_auto.c<br>
@@ -526,10 +526,10 @@ void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v)<br>
}<br>
if (v->max_swath_height_c[k] > 0.0) {<br>
v->swath_width_granularity_c = 256.0 /dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / v->max_swath_height_c[k];<br>
- }<br>
- v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width_yper_state[i][j][k] / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pixel_in_detc[k] * v->max_swath_height_c[k];<br>
- if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {<br>
- v->rounded_up_max_swath_size_bytes_c =dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;<br>
+ v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width_yper_state[i][j][k] / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pixel_in_detc[k] * v->max_swath_height_c[k];<br>
+ if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {<br>
+ v->rounded_up_max_swath_size_bytes_c = dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;<br>
+ }<br>
}<br>
if (v->rounded_up_max_swath_size_bytes_y + v->rounded_up_max_swath_size_bytes_c <= v->det_buffer_size_in_kbyte * 1024.0 / 2.0) {<br>
v->swath_height_yper_state[i][j][k] = v->max_swath_height_y[k];<br>
@@ -552,14 +552,14 @@ void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v)<br>
v->lines_in_det_chroma = v->det_buffer_size_in_kbyte * 1024.0 / 3.0 / v->byte_per_pixel_in_dety[k] / (v->swath_width_yper_state[i][j][k] / 2.0);<br>
}<br>
v->effective_lb_latency_hiding_source_lines_luma =dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] /dcn_bw_max2(v->h_ratio[k], 1.0)), 1.0))
- (v->vtaps[k] - 1.0);<br>
- v->effective_lb_latency_hiding_source_lines_chroma =dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] / 2.0 /dcn_bw_max2(v->h_ratio[k] / 2.0,
1.0)), 1.0)) - (v->vta_pschroma[k] - 1.0);<br>
v->effective_detlb_lines_luma =dcn_bw_floor2(v->lines_in_det_luma +dcn_bw_min2(v->lines_in_det_luma * v->required_dispclk[i][j] * v->byte_per_pixel_in_dety[k] * v->pscl_factor[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_luma),
v->swath_height_yper_state[i][j][k]);<br>
- v->effective_detlb_lines_chroma =dcn_bw_floor2(v->lines_in_det_chroma +dcn_bw_min2(v->lines_in_det_chroma * v->required_dispclk[i][j] * v->byte_per_pixel_in_detc[k] * v->pscl_factor_chroma[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_chroma),
v->swath_height_cper_state[i][j][k]);<br>
if (v->byte_per_pixel_in_detc[k] == 0.0) {<br>
v->urgent_latency_support_us_per_state[i][j][k] = v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k],
1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]);<br>
}<br>
else {<br>
- v->urgent_latency_support_us_per_state[i][j][k] =dcn_bw_min2(v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k],
1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]), v->effective_detlb_lines_chroma * (v->htotal[k] / v->pixel_clock[k]) / (v->v_ratio[k] / 2.0) - v->effective_detlb_lines_chroma * v->swath_width_yper_state[i][j][k] / 2.0 *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k],
2.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]));<br>
+ v->effective_lb_latency_hiding_source_lines_chroma = dcn_bw_min2(v->max_line_buffer_lines, dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] / 2.0 / dcn_bw_max2(v->h_ratio[k]
/ 2.0, 1.0)), 1.0)) - (v->vta_pschroma[k] - 1.0);<br>
+ v->effective_detlb_lines_chroma = dcn_bw_floor2(v->lines_in_det_chroma + dcn_bw_min2(v->lines_in_det_chroma * v->required_dispclk[i][j] * v->byte_per_pixel_in_detc[k] * v->pscl_factor_chroma[k] / v->return_bw_per_state[i],
v->effective_lb_latency_hiding_source_lines_chroma), v->swath_height_cper_state[i][j][k]);<br>
+ v->urgent_latency_support_us_per_state[i][j][k] = dcn_bw_min2(v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] * dcn_bw_ceil2(v->byte_per_pixel_in_dety[k],
1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]), v->effective_detlb_lines_chroma * (v->htotal[k] / v->pixel_clock[k]) / (v->v_ratio[k] / 2.0) - v->effective_detlb_lines_chroma * v->swath_width_yper_state[i][j][k] / 2.0 * dcn_bw_ceil2(v->byte_per_pixel_in_detc[k],
2.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]));<br>
}<br>
}<br>
}<br>
@@ -1146,10 +1146,10 @@ void display_pipe_configuration(struct dcn_bw_internal_vars *v)<br>
}<br>
if (v->maximum_swath_height_c > 0.0) {<br>
v->swath_width_granularity_c = 256.0 /dcn_bw_ceil2(v->byte_per_pix_detc, 2.0) / v->maximum_swath_height_c;<br>
- }<br>
- v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pix_detc * v->maximum_swath_height_c;<br>
- if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {<br>
- v->rounded_up_max_swath_size_bytes_c =dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;<br>
+ v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pix_detc * v->maximum_swath_height_c;<br>
+ if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {<br>
+ v->rounded_up_max_swath_size_bytes_c = dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;<br>
+ }<br>
}<br>
if (v->rounded_up_max_swath_size_bytes_y + v->rounded_up_max_swath_size_bytes_c <= v->det_buffer_size_in_kbyte * 1024.0 / 2.0) {<br>
v->swath_height_y[k] = v->maximum_swath_height_y;<br>
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c<br>
index 07d18e78de49..cac72413a097 100644<br>
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c<br>
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calc_math.c<br>
@@ -23,6 +23,7 @@<br>
*<br>
*/<br>
<br>
+#include "os_types.h"<br>
#include "dcn_calc_math.h"<br>
<br>
#define isNaN(number) ((number) != (number))<br>
@@ -69,8 +70,8 @@ float dcn_bw_max2(const float arg1, const float arg2)<br>
<br>
float dcn_bw_floor2(const float arg, const float significance)<br>
{<br>
- if (significance == 0)<br>
- return 0;<br>
+ ASSERT(significance != 0);<br>
+<br>
return ((int) (arg / significance)) * significance;<br>
}<br>
float dcn_bw_floor(const float arg)<br>
@@ -80,17 +81,14 @@ float dcn_bw_floor(const float arg)<br>
<br>
float dcn_bw_ceil(const float arg)<br>
{<br>
- float flr = dcn_bw_floor2(arg, 1);<br>
-<br>
- return flr + 0.00001 >= arg ? arg : flr + 1;<br>
+ return (int) (arg + 0.99999);<br>
}<br>
<br>
float dcn_bw_ceil2(const float arg, const float significance)<br>
{<br>
- float flr = dcn_bw_floor2(arg, significance);<br>
- if (significance == 0)<br>
- return 0;<br>
- return flr + 0.00001 >= arg ? arg : flr + significance;<br>
+ ASSERT(significance != 0);<br>
+<br>
+ return ((int) (arg / significance + 0.99999)) * significance;<br>
}<br>
<br>
float dcn_bw_max3(float v1, float v2, float v3)<br>
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c<br>
index 63bbdf8b8678..edd098c7eb92 100644<br>
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c<br>
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c<br>
@@ -4478,17 +4478,17 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode<br>
locals->EffectiveLBLatencyHidingSourceLinesLuma),<br>
locals->SwathHeightYPerState[i][j][k]);<br>
<br>
- locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(<br>
- locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *<br>
- locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],<br>
- locals->EffectiveLBLatencyHidingSourceLinesChroma),<br>
- locals->SwathHeightCPerState[i][j][k]);<br>
<br>
if (locals->BytePerPixelInDETC[k] == 0) {<br>
locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])<br>
/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *<br>
dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]);<br>
} else {<br>
+ locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(<br>
+ locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *<br>
+ locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],<br>
+ locals->EffectiveLBLatencyHidingSourceLinesChroma),<br>
+ locals->SwathHeightCPerState[i][j][k]);<br>
locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min(<br>
locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])<br>
/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *<br>
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c<br>
index 8a7485e21d53..d40d32e380f4 100644<br>
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c<br>
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c<br>
@@ -806,10 +806,12 @@ static bool CalculatePrefetchSchedule(<br>
<br>
if (myPipe->SourceScan == dm_horz) {<br>
*swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY;<br>
- *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;<br>
+ if (myPipe->BlockWidth256BytesC > 0)<br>
+ *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;<br>
} else {<br>
*swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY;<br>
- *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;<br>
+ if (myPipe->BlockWidth256BytesC > 0)<br>
+ *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;<br>
}<br>
<br>
prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto;<br>
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c<br>
index b7fa003ffe06..c117a9724ae1 100644<br>
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c<br>
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c<br>
@@ -6322,10 +6322,6 @@ static void CalculateSwathWidth(<br>
<br>
for (k = 0; k < NumberOfActivePlanes; ++k) {<br>
enum odm_combine_mode MainPlaneODMCombine = 0;<br>
- surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);<br>
- surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);<br>
- surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);<br>
- surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);<br>
<br>
if (SourceScan[k] != dm_vert) {<br>
SwathWidthSingleDPPY[k] = ViewportWidth[k];<br>
@@ -6365,8 +6361,6 @@ static void CalculateSwathWidth(<br>
<br>
surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);<br>
surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);<br>
- surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);<br>
- surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);<br>
<br>
if (SourceScan[k] != dm_vert) {<br>
MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];<br>
@@ -6374,6 +6368,7 @@ static void CalculateSwathWidth(<br>
swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,<br>
Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);<br>
if (BytePerPixC[k] > 0) {<br>
+ surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);<br>
swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,<br>
Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);<br>
} else {<br>
@@ -6385,6 +6380,7 @@ static void CalculateSwathWidth(<br>
swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,<br>
Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);<br>
if (BytePerPixC[k] > 0) {<br>
+ surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);<br>
swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,<br>
Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);<br>
} else {<br>
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c<br>
index d63b4209b14c..8753f94bdd79 100644<br>
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c<br>
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c<br>
@@ -6933,8 +6933,6 @@ static void CalculateSwathWidth(<br>
{<br>
int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);<br>
int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);<br>
- int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);<br>
- int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);<br>
<br>
#ifdef __DML_VBA_DEBUG__<br>
dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);<br>
@@ -6945,6 +6943,8 @@ static void CalculateSwathWidth(<br>
MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];<br>
swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);<br>
if (BytePerPixC[k] > 0) {<br>
+ int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);<br>
+<br>
swath_width_chroma_ub[k] = dml_min(<br>
surface_width_ub_c,<br>
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);<br>
@@ -6956,6 +6956,8 @@ static void CalculateSwathWidth(<br>
MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];<br>
swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);<br>
if (BytePerPixC[k] > 0) {<br>
+ int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);<br>
+<br>
swath_width_chroma_ub[k] = dml_min(<br>
surface_height_ub_c,<br>
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);<br>
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c<br>
index fc4d7474c111..503d9ede0ac1 100644<br>
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c<br>
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c<br>
@@ -7049,8 +7049,6 @@ static void CalculateSwathWidth(<br>
{<br>
int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);<br>
int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);<br>
- int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);<br>
- int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);<br>
<br>
#ifdef __DML_VBA_DEBUG__<br>
dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);<br>
@@ -7061,6 +7059,8 @@ static void CalculateSwathWidth(<br>
MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];<br>
swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);<br>
if (BytePerPixC[k] > 0) {<br>
+ int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);<br>
+<br>
swath_width_chroma_ub[k] = dml_min(<br>
surface_width_ub_c,<br>
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);<br>
@@ -7072,6 +7072,8 @@ static void CalculateSwathWidth(<br>
MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];<br>
swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);<br>
if (BytePerPixC[k] > 0) {<br>
+ int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);<br>
+<br>
swath_width_chroma_ub[k] = dml_min(<br>
surface_height_ub_c,<br>
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);<br>
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c<br>
index dc501ee7d01a..c385c54832cb 100644<br>
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c<br>
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c<br>
@@ -719,8 +719,8 @@ void dml32_CalculateSwathWidth(<br>
<br>
unsigned int surface_width_ub_l;<br>
unsigned int surface_height_ub_l;<br>
- unsigned int surface_width_ub_c;<br>
- unsigned int surface_height_ub_c;<br>
+ unsigned int surface_width_ub_c = 0;<br>
+ unsigned int surface_height_ub_c = 0;<br>
<br>
#ifdef __DML_VBA_DEBUG__<br>
dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);<br>
@@ -784,21 +784,6 @@ void dml32_CalculateSwathWidth(<br>
<br>
surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);<br>
surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);<br>
- surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);<br>
- surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);<br>
-<br>
-#ifdef __DML_VBA_DEBUG__<br>
- dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);<br>
- dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);<br>
- dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);<br>
- dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);<br>
- dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);<br>
- dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);<br>
- dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);<br>
- dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);<br>
- dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);<br>
- dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);<br>
-#endif<br>
<br>
if (!IsVertical(SourceRotation[k])) {<br>
MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];<br>
@@ -818,6 +803,7 @@ void dml32_CalculateSwathWidth(<br>
Read256BytesBlockWidthY[k]);<br>
}<br>
if (BytePerPixC[k] > 0) {<br>
+ surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);<br>
if (ViewportStationary[k] && DPPPerSurface[k] == 1) {<br>
swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,<br>
dml_floor(ViewportXStartC[k] + SwathWidthC[k] +<br>
@@ -848,6 +834,7 @@ void dml32_CalculateSwathWidth(<br>
Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);<br>
}<br>
if (BytePerPixC[k] > 0) {<br>
+ surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);<br>
if (ViewportStationary[k] && DPPPerSurface[k] == 1) {<br>
swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,<br>
dml_floor(ViewportYStartC[k] + SwathWidthC[k] +<br>
@@ -866,6 +853,16 @@ void dml32_CalculateSwathWidth(<br>
}<br>
<br>
#ifdef __DML_VBA_DEBUG__<br>
+ dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);<br>
+ dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);<br>
+ dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);<br>
+ dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);<br>
+ dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);<br>
+ dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);<br>
+ dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);<br>
+ dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);<br>
+ dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);<br>
+ dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);<br>
dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);<br>
dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);<br>
dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);<br>
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c.rej b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c.rej<br>
new file mode 100644<br>
index 000000000000..dff2badbf820<br>
--- /dev/null<br>
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c.rej<br>
@@ -0,0 +1,12 @@<br>
+diff a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c (rejected hunks)<br>
+@@ -727,8 +727,8 @@ void dml32_CalculateSwathWidth(<br>
+ enum odm_combine_mode MainSurfaceODMMode;<br>
+ unsigned int surface_width_ub_l;<br>
+ unsigned int surface_height_ub_l;<br>
+- unsigned int surface_width_ub_c;<br>
+- unsigned int surface_height_ub_c;<br>
++ unsigned int surface_width_ub_c = 0;<br>
++ unsigned int surface_height_ub_c = 0;<br>
+ unsigned int k, j;<br>
+ <br>
+ #ifdef __DML_VBA_DEBUG__<br>
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h<br>
index 479d7d83220c..072bd0539605 100644<br>
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h<br>
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h<br>
@@ -76,14 +76,9 @@ static inline double dml_floor(double a, double granularity)<br>
<br>
static inline double dml_round(double a)<br>
{<br>
- double round_pt = 0.5;<br>
- double ceil = dml_ceil(a, 1);<br>
- double floor = dml_floor(a, 1);<br>
+ const double round_pt = 0.5;<br>
<br>
- if (a - floor >= round_pt)<br>
- return ceil;<br>
- else<br>
- return floor;<br>
+ return dml_floor(a + round_pt, 1);<br>
}<br>
<br>
/* float<br>
-- <br>
2.34.1<br>
<br>
</div>
</span></font></div>
</div>
</body>
</html>