[PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull
Rodrigo Siqueira Jordao
rjordrigo at amd.com
Sat Dec 11 12:20:18 UTC 2021
On 2021-12-09 11:46 a.m., Michel Dänzer wrote:
> From: Michel Dänzer <mdaenzer at redhat.com>
>
> Move code using the Pipe struct to a new helper function.
>
> Works around[0] this warning (resulting in failure to build a RHEL debug
> kernel with Werror enabled):
>
> ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c: In function ‘dml31_ModeSupportAndSystemConfigurationFull’:
> ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:5740:1: warning: the frame size of 2144 bytes is larger than 2048 bytes [-Wframe-larger-than=]
>
> The culprit seems to be the Pipe struct, so pull the relevant block out
> into its own sub-function. (This is porting
> a62427ef9b55 "drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull"
> from dml31 to dml21)
>
> [0] AFAICT this doesn't actually reduce the total amount of stack which
> can be used, just moves some of it from
> dml31_ModeSupportAndSystemConfigurationFull to the new helper function,
> so the former happens to no longer exceed the limit for a single
> function.
>
> Signed-off-by: Michel Dänzer <mdaenzer at redhat.com>
> ---
> .../dc/dml/dcn31/display_mode_vba_31.c | 185 ++++++++++--------
> 1 file changed, 99 insertions(+), 86 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
> index 7e937bdcea00..8965f9af9d0a 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
> @@ -3949,6 +3949,102 @@ static double TruncToValidBPP(
> return BPP_INVALID;
> }
>
> +static noinline void CalculatePrefetchSchedulePerPlane(
> + struct display_mode_lib *mode_lib,
> + double HostVMInefficiencyFactor,
> + int i,
> + unsigned j,
> + unsigned k)
> +{
> + struct vba_vars_st *v = &mode_lib->vba;
> + Pipe myPipe;
> +
> + myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
> + myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
> + myPipe.PixelClock = v->PixelClock[k];
> + myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
> + myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
> + myPipe.ScalerEnabled = v->ScalerEnabled[k];
> + myPipe.SourceScan = v->SourceScan[k];
> + myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
> + myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
> + myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
> + myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
> + myPipe.InterlaceEnable = v->Interlace[k];
> + myPipe.NumberOfCursors = v->NumberOfCursors[k];
> + myPipe.VBlank = v->VTotal[k] - v->VActive[k];
> + myPipe.HTotal = v->HTotal[k];
> + myPipe.DCCEnable = v->DCCEnable[k];
> + myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
> + || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
> + myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
> + myPipe.BytePerPixelY = v->BytePerPixelY[k];
> + myPipe.BytePerPixelC = v->BytePerPixelC[k];
> + myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
> + v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
> + mode_lib,
> + HostVMInefficiencyFactor,
> + &myPipe,
> + v->DSCDelayPerState[i][k],
> + v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
> + v->DPPCLKDelaySCL,
> + v->DPPCLKDelaySCLLBOnly,
> + v->DPPCLKDelayCNVCCursor,
> + v->DISPCLKDelaySubtotal,
> + v->SwathWidthYThisState[k] / v->HRatio[k],
> + v->OutputFormat[k],
> + v->MaxInterDCNTileRepeaters,
> + dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
> + v->MaximumVStartup[i][j][k],
> + v->GPUVMMaxPageTableLevels,
> + v->GPUVMEnable,
> + v->HostVMEnable,
> + v->HostVMMaxNonCachedPageTableLevels,
> + v->HostVMMinPageSize,
> + v->DynamicMetadataEnable[k],
> + v->DynamicMetadataVMEnabled,
> + v->DynamicMetadataLinesBeforeActiveRequired[k],
> + v->DynamicMetadataTransmittedBytes[k],
> + v->UrgLatency[i],
> + v->ExtraLatency,
> + v->TimeCalc,
> + v->PDEAndMetaPTEBytesPerFrame[i][j][k],
> + v->MetaRowBytes[i][j][k],
> + v->DPTEBytesPerRow[i][j][k],
> + v->PrefetchLinesY[i][j][k],
> + v->SwathWidthYThisState[k],
> + v->PrefillY[k],
> + v->MaxNumSwY[k],
> + v->PrefetchLinesC[i][j][k],
> + v->SwathWidthCThisState[k],
> + v->PrefillC[k],
> + v->MaxNumSwC[k],
> + v->swath_width_luma_ub_this_state[k],
> + v->swath_width_chroma_ub_this_state[k],
> + v->SwathHeightYThisState[k],
> + v->SwathHeightCThisState[k],
> + v->TWait,
> + &v->DSTXAfterScaler[k],
> + &v->DSTYAfterScaler[k],
> + &v->LineTimesForPrefetch[k],
> + &v->PrefetchBW[k],
> + &v->LinesForMetaPTE[k],
> + &v->LinesForMetaAndDPTERow[k],
> + &v->VRatioPreY[i][j][k],
> + &v->VRatioPreC[i][j][k],
> + &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
> + &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
> + &v->NoTimeForDynamicMetadata[i][j][k],
> + &v->Tno_bw[k],
> + &v->prefetch_vmrow_bw[k],
> + &v->dummy7[k],
> + &v->dummy8[k],
> + &v->dummy13[k],
> + &v->VUpdateOffsetPix[k],
> + &v->VUpdateWidthPix[k],
> + &v->VReadyOffsetPix[k]);
> +}
> +
> void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
> {
> struct vba_vars_st *v = &mode_lib->vba;
> @@ -5276,92 +5372,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
> v->SREnterPlusExitTime);
>
> for (k = 0; k < v->NumberOfActivePlanes; k++) {
> - Pipe myPipe;
> -
> - myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
> - myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
> - myPipe.PixelClock = v->PixelClock[k];
> - myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
> - myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
> - myPipe.ScalerEnabled = v->ScalerEnabled[k];
> - myPipe.SourceScan = v->SourceScan[k];
> - myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
> - myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
> - myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
> - myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
> - myPipe.InterlaceEnable = v->Interlace[k];
> - myPipe.NumberOfCursors = v->NumberOfCursors[k];
> - myPipe.VBlank = v->VTotal[k] - v->VActive[k];
> - myPipe.HTotal = v->HTotal[k];
> - myPipe.DCCEnable = v->DCCEnable[k];
> - myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
> - || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
> - myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
> - myPipe.BytePerPixelY = v->BytePerPixelY[k];
> - myPipe.BytePerPixelC = v->BytePerPixelC[k];
> - myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
> - v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
> - mode_lib,
> - HostVMInefficiencyFactor,
> - &myPipe,
> - v->DSCDelayPerState[i][k],
> - v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
> - v->DPPCLKDelaySCL,
> - v->DPPCLKDelaySCLLBOnly,
> - v->DPPCLKDelayCNVCCursor,
> - v->DISPCLKDelaySubtotal,
> - v->SwathWidthYThisState[k] / v->HRatio[k],
> - v->OutputFormat[k],
> - v->MaxInterDCNTileRepeaters,
> - dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
> - v->MaximumVStartup[i][j][k],
> - v->GPUVMMaxPageTableLevels,
> - v->GPUVMEnable,
> - v->HostVMEnable,
> - v->HostVMMaxNonCachedPageTableLevels,
> - v->HostVMMinPageSize,
> - v->DynamicMetadataEnable[k],
> - v->DynamicMetadataVMEnabled,
> - v->DynamicMetadataLinesBeforeActiveRequired[k],
> - v->DynamicMetadataTransmittedBytes[k],
> - v->UrgLatency[i],
> - v->ExtraLatency,
> - v->TimeCalc,
> - v->PDEAndMetaPTEBytesPerFrame[i][j][k],
> - v->MetaRowBytes[i][j][k],
> - v->DPTEBytesPerRow[i][j][k],
> - v->PrefetchLinesY[i][j][k],
> - v->SwathWidthYThisState[k],
> - v->PrefillY[k],
> - v->MaxNumSwY[k],
> - v->PrefetchLinesC[i][j][k],
> - v->SwathWidthCThisState[k],
> - v->PrefillC[k],
> - v->MaxNumSwC[k],
> - v->swath_width_luma_ub_this_state[k],
> - v->swath_width_chroma_ub_this_state[k],
> - v->SwathHeightYThisState[k],
> - v->SwathHeightCThisState[k],
> - v->TWait,
> - &v->DSTXAfterScaler[k],
> - &v->DSTYAfterScaler[k],
> - &v->LineTimesForPrefetch[k],
> - &v->PrefetchBW[k],
> - &v->LinesForMetaPTE[k],
> - &v->LinesForMetaAndDPTERow[k],
> - &v->VRatioPreY[i][j][k],
> - &v->VRatioPreC[i][j][k],
> - &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
> - &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
> - &v->NoTimeForDynamicMetadata[i][j][k],
> - &v->Tno_bw[k],
> - &v->prefetch_vmrow_bw[k],
> - &v->dummy7[k],
> - &v->dummy8[k],
> - &v->dummy13[k],
> - &v->VUpdateOffsetPix[k],
> - &v->VUpdateWidthPix[k],
> - &v->VReadyOffsetPix[k]);
> + CalculatePrefetchSchedulePerPlane(mode_lib,
> + HostVMInefficiencyFactor,
> + i, j, k);
> }
>
> for (k = 0; k < v->NumberOfActivePlanes; k++) {
>
Hi Michel,
Overwall I think this series is good. I also run it in our internal CI
and everything looks fine.
Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
And applied to amd-staging-drm-next.
Thanks
More information about the dri-devel
mailing list