[PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull
Michel Dänzer
michel at daenzer.net
Mon Dec 13 09:46:34 UTC 2021
On 2021-12-11 13:20, Rodrigo Siqueira Jordao wrote:
>
>
> On 2021-12-09 11:46 a.m., Michel Dänzer wrote:
>> From: Michel Dänzer <mdaenzer at redhat.com>
>>
>> Move code using the Pipe struct to a new helper function.
>>
>> Works around[0] this warning (resulting in failure to build a RHEL debug
>> kernel with Werror enabled):
>>
>> ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c: In function ‘dml31_ModeSupportAndSystemConfigurationFull’:
>> ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:5740:1: warning: the frame size of 2144 bytes is larger than 2048 bytes [-Wframe-larger-than=]
>>
>> The culprit seems to be the Pipe struct, so pull the relevant block out
>> into its own sub-function. (This is porting
>> a62427ef9b55 "drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull"
>> from dml31 to dml21)
>>
>> [0] AFAICT this doesn't actually reduce the total amount of stack which
>> can be used, just moves some of it from
>> dml31_ModeSupportAndSystemConfigurationFull to the new helper function,
>> so the former happens to no longer exceed the limit for a single
>> function.
>>
>> Signed-off-by: Michel Dänzer <mdaenzer at redhat.com>
>> ---
>> .../dc/dml/dcn31/display_mode_vba_31.c | 185 ++++++++++--------
>> 1 file changed, 99 insertions(+), 86 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
>> index 7e937bdcea00..8965f9af9d0a 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
>> @@ -3949,6 +3949,102 @@ static double TruncToValidBPP(
>> return BPP_INVALID;
>> }
>> +static noinline void CalculatePrefetchSchedulePerPlane(
>> + struct display_mode_lib *mode_lib,
>> + double HostVMInefficiencyFactor,
>> + int i,
>> + unsigned j,
>> + unsigned k)
>> +{
>> + struct vba_vars_st *v = &mode_lib->vba;
>> + Pipe myPipe;
>> +
>> + myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
>> + myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
>> + myPipe.PixelClock = v->PixelClock[k];
>> + myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
>> + myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
>> + myPipe.ScalerEnabled = v->ScalerEnabled[k];
>> + myPipe.SourceScan = v->SourceScan[k];
>> + myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
>> + myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
>> + myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
>> + myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
>> + myPipe.InterlaceEnable = v->Interlace[k];
>> + myPipe.NumberOfCursors = v->NumberOfCursors[k];
>> + myPipe.VBlank = v->VTotal[k] - v->VActive[k];
>> + myPipe.HTotal = v->HTotal[k];
>> + myPipe.DCCEnable = v->DCCEnable[k];
>> + myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
>> + || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
>> + myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
>> + myPipe.BytePerPixelY = v->BytePerPixelY[k];
>> + myPipe.BytePerPixelC = v->BytePerPixelC[k];
>> + myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
>> + v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
>> + mode_lib,
>> + HostVMInefficiencyFactor,
>> + &myPipe,
>> + v->DSCDelayPerState[i][k],
>> + v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
>> + v->DPPCLKDelaySCL,
>> + v->DPPCLKDelaySCLLBOnly,
>> + v->DPPCLKDelayCNVCCursor,
>> + v->DISPCLKDelaySubtotal,
>> + v->SwathWidthYThisState[k] / v->HRatio[k],
>> + v->OutputFormat[k],
>> + v->MaxInterDCNTileRepeaters,
>> + dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
>> + v->MaximumVStartup[i][j][k],
>> + v->GPUVMMaxPageTableLevels,
>> + v->GPUVMEnable,
>> + v->HostVMEnable,
>> + v->HostVMMaxNonCachedPageTableLevels,
>> + v->HostVMMinPageSize,
>> + v->DynamicMetadataEnable[k],
>> + v->DynamicMetadataVMEnabled,
>> + v->DynamicMetadataLinesBeforeActiveRequired[k],
>> + v->DynamicMetadataTransmittedBytes[k],
>> + v->UrgLatency[i],
>> + v->ExtraLatency,
>> + v->TimeCalc,
>> + v->PDEAndMetaPTEBytesPerFrame[i][j][k],
>> + v->MetaRowBytes[i][j][k],
>> + v->DPTEBytesPerRow[i][j][k],
>> + v->PrefetchLinesY[i][j][k],
>> + v->SwathWidthYThisState[k],
>> + v->PrefillY[k],
>> + v->MaxNumSwY[k],
>> + v->PrefetchLinesC[i][j][k],
>> + v->SwathWidthCThisState[k],
>> + v->PrefillC[k],
>> + v->MaxNumSwC[k],
>> + v->swath_width_luma_ub_this_state[k],
>> + v->swath_width_chroma_ub_this_state[k],
>> + v->SwathHeightYThisState[k],
>> + v->SwathHeightCThisState[k],
>> + v->TWait,
>> + &v->DSTXAfterScaler[k],
>> + &v->DSTYAfterScaler[k],
>> + &v->LineTimesForPrefetch[k],
>> + &v->PrefetchBW[k],
>> + &v->LinesForMetaPTE[k],
>> + &v->LinesForMetaAndDPTERow[k],
>> + &v->VRatioPreY[i][j][k],
>> + &v->VRatioPreC[i][j][k],
>> + &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
>> + &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
>> + &v->NoTimeForDynamicMetadata[i][j][k],
>> + &v->Tno_bw[k],
>> + &v->prefetch_vmrow_bw[k],
>> + &v->dummy7[k],
>> + &v->dummy8[k],
>> + &v->dummy13[k],
>> + &v->VUpdateOffsetPix[k],
>> + &v->VUpdateWidthPix[k],
>> + &v->VReadyOffsetPix[k]);
>> +}
>> +
>> void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
>> {
>> struct vba_vars_st *v = &mode_lib->vba;
>> @@ -5276,92 +5372,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
>> v->SREnterPlusExitTime);
>> for (k = 0; k < v->NumberOfActivePlanes; k++) {
>> - Pipe myPipe;
>> -
>> - myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
>> - myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
>> - myPipe.PixelClock = v->PixelClock[k];
>> - myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
>> - myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
>> - myPipe.ScalerEnabled = v->ScalerEnabled[k];
>> - myPipe.SourceScan = v->SourceScan[k];
>> - myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
>> - myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
>> - myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
>> - myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
>> - myPipe.InterlaceEnable = v->Interlace[k];
>> - myPipe.NumberOfCursors = v->NumberOfCursors[k];
>> - myPipe.VBlank = v->VTotal[k] - v->VActive[k];
>> - myPipe.HTotal = v->HTotal[k];
>> - myPipe.DCCEnable = v->DCCEnable[k];
>> - myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
>> - || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
>> - myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
>> - myPipe.BytePerPixelY = v->BytePerPixelY[k];
>> - myPipe.BytePerPixelC = v->BytePerPixelC[k];
>> - myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
>> - v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
>> - mode_lib,
>> - HostVMInefficiencyFactor,
>> - &myPipe,
>> - v->DSCDelayPerState[i][k],
>> - v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
>> - v->DPPCLKDelaySCL,
>> - v->DPPCLKDelaySCLLBOnly,
>> - v->DPPCLKDelayCNVCCursor,
>> - v->DISPCLKDelaySubtotal,
>> - v->SwathWidthYThisState[k] / v->HRatio[k],
>> - v->OutputFormat[k],
>> - v->MaxInterDCNTileRepeaters,
>> - dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
>> - v->MaximumVStartup[i][j][k],
>> - v->GPUVMMaxPageTableLevels,
>> - v->GPUVMEnable,
>> - v->HostVMEnable,
>> - v->HostVMMaxNonCachedPageTableLevels,
>> - v->HostVMMinPageSize,
>> - v->DynamicMetadataEnable[k],
>> - v->DynamicMetadataVMEnabled,
>> - v->DynamicMetadataLinesBeforeActiveRequired[k],
>> - v->DynamicMetadataTransmittedBytes[k],
>> - v->UrgLatency[i],
>> - v->ExtraLatency,
>> - v->TimeCalc,
>> - v->PDEAndMetaPTEBytesPerFrame[i][j][k],
>> - v->MetaRowBytes[i][j][k],
>> - v->DPTEBytesPerRow[i][j][k],
>> - v->PrefetchLinesY[i][j][k],
>> - v->SwathWidthYThisState[k],
>> - v->PrefillY[k],
>> - v->MaxNumSwY[k],
>> - v->PrefetchLinesC[i][j][k],
>> - v->SwathWidthCThisState[k],
>> - v->PrefillC[k],
>> - v->MaxNumSwC[k],
>> - v->swath_width_luma_ub_this_state[k],
>> - v->swath_width_chroma_ub_this_state[k],
>> - v->SwathHeightYThisState[k],
>> - v->SwathHeightCThisState[k],
>> - v->TWait,
>> - &v->DSTXAfterScaler[k],
>> - &v->DSTYAfterScaler[k],
>> - &v->LineTimesForPrefetch[k],
>> - &v->PrefetchBW[k],
>> - &v->LinesForMetaPTE[k],
>> - &v->LinesForMetaAndDPTERow[k],
>> - &v->VRatioPreY[i][j][k],
>> - &v->VRatioPreC[i][j][k],
>> - &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
>> - &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
>> - &v->NoTimeForDynamicMetadata[i][j][k],
>> - &v->Tno_bw[k],
>> - &v->prefetch_vmrow_bw[k],
>> - &v->dummy7[k],
>> - &v->dummy8[k],
>> - &v->dummy13[k],
>> - &v->VUpdateOffsetPix[k],
>> - &v->VUpdateWidthPix[k],
>> - &v->VReadyOffsetPix[k]);
>> + CalculatePrefetchSchedulePerPlane(mode_lib,
>> + HostVMInefficiencyFactor,
>> + i, j, k);
>> }
>> for (k = 0; k < v->NumberOfActivePlanes; k++) {
>>
>
> Hi Michel,
>
> Overwall I think this series is good. I also run it in our internal CI and everything looks fine.
>
> Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira at amd.com>
>
> And applied to amd-staging-drm-next.
Thanks!
Are there any plans for actually reducing the combined amount of stack used by ModeSupportAndSystemConfigurationFull + CalculatePrefetchSchedulePerPlane?
Also, did you check that UseMinimumDCFCLK now modifying mode_lib->vba.DCFCLKState[i][j] and possibly other values in mode_lib->vba makes sense?
--
Earthling Michel Dänzer | https://redhat.com
Libre software enthusiast | Mesa and Xwayland developer
More information about the dri-devel
mailing list