[Why & How]
With Werror enabled in the kernel we were failing the clang build since
dml21_ModeSupportAndSystemConfigurationFull's stack frame is 1064 when
building with clang, and exceeding the default 1024 stack frame limit.
The culprit seems to be the Pipe struct, so pull the relevant block
out into its own sub-function.
Signed-off-by: Harry Wentland <[email protected]>
Fixes: 3fe617ccafd6 ("Enable '-Werror' by default for all kernel builds")
Cc: Nick Desaulniers <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: [email protected]
Cc: Linux Kernel Mailing List <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Leo Li <[email protected]>
Cc: Alex Deucher <[email protected]>
Cc: Christian König <[email protected]>
Cc: Xinhui Pan <[email protected]>
Cc: Nathan Chancellor <[email protected]>
Cc: Guenter Roeck <[email protected]>
Cc: [email protected]
---
.../dc/dml/dcn21/display_mode_vba_21.c | 236 +++++++++---------
1 file changed, 123 insertions(+), 113 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
index 4136eb8256cb..8a7485e21d53 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
@@ -3394,6 +3394,127 @@ static unsigned int TruncToValidBPP(
}
}
+
+static noinline void CalculatePrefetchSchedulePerPlane(
+ struct display_mode_lib *mode_lib,
+ int i,
+ unsigned j,
+ unsigned k)
+{
+ struct vba_vars_st *locals = &mode_lib->vba;
+ Pipe myPipe;
+ HostVM myHostVM;
+
+ if (mode_lib->vba.XFCEnabled[k] == true) {
+ mode_lib->vba.XFCRemoteSurfaceFlipDelay =
+ CalculateRemoteSurfaceFlipDelay(
+ mode_lib,
+ mode_lib->vba.VRatio[k],
+ locals->SwathWidthYThisState[k],
+ dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
+ mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+ mode_lib->vba.XFCTSlvVupdateOffset,
+ mode_lib->vba.XFCTSlvVupdateWidth,
+ mode_lib->vba.XFCTSlvVreadyOffset,
+ mode_lib->vba.XFCXBUFLatencyTolerance,
+ mode_lib->vba.XFCFillBWOverhead,
+ mode_lib->vba.XFCSlvChunkSize,
+ mode_lib->vba.XFCBusTransportTime,
+ mode_lib->vba.TimeCalc,
+ mode_lib->vba.TWait,
+ &mode_lib->vba.SrcActiveDrainRate,
+ &mode_lib->vba.TInitXFill,
+ &mode_lib->vba.TslvChk);
+ } else {
+ mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
+ }
+
+ myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
+ myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
+ myPipe.PixelClock = mode_lib->vba.PixelClock[k];
+ myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
+ myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
+ myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
+ myPipe.SourceScan = mode_lib->vba.SourceScan[k];
+ myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k];
+ myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k];
+ myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k];
+ myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k];
+ myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
+ myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
+ myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
+ myPipe.HTotal = mode_lib->vba.HTotal[k];
+
+
+ myHostVM.Enable = mode_lib->vba.HostVMEnable;
+ myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
+ myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
+
+
+ mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule(
+ mode_lib,
+ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
+ mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
+ &myPipe,
+ locals->DSCDelayPerState[i][k],
+ mode_lib->vba.DPPCLKDelaySubtotal,
+ mode_lib->vba.DPPCLKDelaySCL,
+ mode_lib->vba.DPPCLKDelaySCLLBOnly,
+ mode_lib->vba.DPPCLKDelayCNVCFormater,
+ mode_lib->vba.DPPCLKDelayCNVCCursor,
+ mode_lib->vba.DISPCLKDelaySubtotal,
+ locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
+ mode_lib->vba.OutputFormat[k],
+ mode_lib->vba.MaxInterDCNTileRepeaters,
+ dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]),
+ locals->MaximumVStartup[0][0][k],
+ mode_lib->vba.GPUVMMaxPageTableLevels,
+ mode_lib->vba.GPUVMEnable,
+ &myHostVM,
+ mode_lib->vba.DynamicMetadataEnable[k],
+ mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
+ mode_lib->vba.DynamicMetadataTransmittedBytes[k],
+ mode_lib->vba.DCCEnable[k],
+ mode_lib->vba.UrgentLatency,
+ mode_lib->vba.ExtraLatency,
+ mode_lib->vba.TimeCalc,
+ locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
+ locals->MetaRowBytes[0][0][k],
+ locals->DPTEBytesPerRow[0][0][k],
+ locals->PrefetchLinesY[0][0][k],
+ locals->SwathWidthYThisState[k],
+ locals->BytePerPixelInDETY[k],
+ locals->PrefillY[k],
+ locals->MaxNumSwY[k],
+ locals->PrefetchLinesC[0][0][k],
+ locals->BytePerPixelInDETC[k],
+ locals->PrefillC[k],
+ locals->MaxNumSwC[k],
+ locals->SwathHeightYThisState[k],
+ locals->SwathHeightCThisState[k],
+ mode_lib->vba.TWait,
+ mode_lib->vba.XFCEnabled[k],
+ mode_lib->vba.XFCRemoteSurfaceFlipDelay,
+ mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
+ &locals->dst_x_after_scaler,
+ &locals->dst_y_after_scaler,
+ &locals->LineTimesForPrefetch[k],
+ &locals->PrefetchBW[k],
+ &locals->LinesForMetaPTE[k],
+ &locals->LinesForMetaAndDPTERow[k],
+ &locals->VRatioPreY[i][j][k],
+ &locals->VRatioPreC[i][j][k],
+ &locals->RequiredPrefetchPixelDataBWLuma[i][j][k],
+ &locals->RequiredPrefetchPixelDataBWChroma[i][j][k],
+ &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
+ &locals->Tno_bw[k],
+ &locals->prefetch_vmrow_bw[k],
+ locals->swath_width_luma_ub,
+ locals->swath_width_chroma_ub,
+ &mode_lib->vba.VUpdateOffsetPix[k],
+ &mode_lib->vba.VUpdateWidthPix[k],
+ &mode_lib->vba.VReadyOffsetPix[k]);
+}
void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
{
struct vba_vars_st *locals = &mode_lib->vba;
@@ -4676,120 +4797,9 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
mode_lib->vba.DRAMClockChangeLatency,
mode_lib->vba.UrgentLatency,
mode_lib->vba.SREnterPlusExitTime);
- for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
- Pipe myPipe;
- HostVM myHostVM;
-
- if (mode_lib->vba.XFCEnabled[k] == true) {
- mode_lib->vba.XFCRemoteSurfaceFlipDelay =
- CalculateRemoteSurfaceFlipDelay(
- mode_lib,
- mode_lib->vba.VRatio[k],
- locals->SwathWidthYThisState[k],
- dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
- mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
- mode_lib->vba.XFCTSlvVupdateOffset,
- mode_lib->vba.XFCTSlvVupdateWidth,
- mode_lib->vba.XFCTSlvVreadyOffset,
- mode_lib->vba.XFCXBUFLatencyTolerance,
- mode_lib->vba.XFCFillBWOverhead,
- mode_lib->vba.XFCSlvChunkSize,
- mode_lib->vba.XFCBusTransportTime,
- mode_lib->vba.TimeCalc,
- mode_lib->vba.TWait,
- &mode_lib->vba.SrcActiveDrainRate,
- &mode_lib->vba.TInitXFill,
- &mode_lib->vba.TslvChk);
- } else {
- mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
- }
+ for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
+ CalculatePrefetchSchedulePerPlane(mode_lib, i, j, k);
- myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
- myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
- myPipe.PixelClock = mode_lib->vba.PixelClock[k];
- myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
- myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
- myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
- myPipe.SourceScan = mode_lib->vba.SourceScan[k];
- myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k];
- myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k];
- myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k];
- myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k];
- myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
- myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
- myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
- myPipe.HTotal = mode_lib->vba.HTotal[k];
-
-
- myHostVM.Enable = mode_lib->vba.HostVMEnable;
- myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
- myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
-
-
- mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule(
- mode_lib,
- mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
- mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
- &myPipe,
- locals->DSCDelayPerState[i][k],
- mode_lib->vba.DPPCLKDelaySubtotal,
- mode_lib->vba.DPPCLKDelaySCL,
- mode_lib->vba.DPPCLKDelaySCLLBOnly,
- mode_lib->vba.DPPCLKDelayCNVCFormater,
- mode_lib->vba.DPPCLKDelayCNVCCursor,
- mode_lib->vba.DISPCLKDelaySubtotal,
- locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
- mode_lib->vba.OutputFormat[k],
- mode_lib->vba.MaxInterDCNTileRepeaters,
- dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]),
- locals->MaximumVStartup[0][0][k],
- mode_lib->vba.GPUVMMaxPageTableLevels,
- mode_lib->vba.GPUVMEnable,
- &myHostVM,
- mode_lib->vba.DynamicMetadataEnable[k],
- mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
- mode_lib->vba.DynamicMetadataTransmittedBytes[k],
- mode_lib->vba.DCCEnable[k],
- mode_lib->vba.UrgentLatency,
- mode_lib->vba.ExtraLatency,
- mode_lib->vba.TimeCalc,
- locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
- locals->MetaRowBytes[0][0][k],
- locals->DPTEBytesPerRow[0][0][k],
- locals->PrefetchLinesY[0][0][k],
- locals->SwathWidthYThisState[k],
- locals->BytePerPixelInDETY[k],
- locals->PrefillY[k],
- locals->MaxNumSwY[k],
- locals->PrefetchLinesC[0][0][k],
- locals->BytePerPixelInDETC[k],
- locals->PrefillC[k],
- locals->MaxNumSwC[k],
- locals->SwathHeightYThisState[k],
- locals->SwathHeightCThisState[k],
- mode_lib->vba.TWait,
- mode_lib->vba.XFCEnabled[k],
- mode_lib->vba.XFCRemoteSurfaceFlipDelay,
- mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
- &locals->dst_x_after_scaler,
- &locals->dst_y_after_scaler,
- &locals->LineTimesForPrefetch[k],
- &locals->PrefetchBW[k],
- &locals->LinesForMetaPTE[k],
- &locals->LinesForMetaAndDPTERow[k],
- &locals->VRatioPreY[i][j][k],
- &locals->VRatioPreC[i][j][k],
- &locals->RequiredPrefetchPixelDataBWLuma[i][j][k],
- &locals->RequiredPrefetchPixelDataBWChroma[i][j][k],
- &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
- &locals->Tno_bw[k],
- &locals->prefetch_vmrow_bw[k],
- locals->swath_width_luma_ub,
- locals->swath_width_chroma_ub,
- &mode_lib->vba.VUpdateOffsetPix[k],
- &mode_lib->vba.VUpdateWidthPix[k],
- &mode_lib->vba.VReadyOffsetPix[k]);
- }
mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0;
mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0;
for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
--
2.33.0
On Tue, Sep 14, 2021 at 11:05 PM Harry Wentland <[email protected]> wrote:
>
> [Why & How]
> With Werror enabled in the kernel we were failing the clang build since
> dml21_ModeSupportAndSystemConfigurationFull's stack frame is 1064 when
> building with clang, and exceeding the default 1024 stack frame limit.
>
> The culprit seems to be the Pipe struct, so pull the relevant block
> out into its own sub-function.
I suspect it's not the Pipe struct but rather the way that you call another
function with a crazy number of arguments here. After your change,
this likely gets inlined and you avoid the problem, so the patch ends
up doing the right thing.
If you do more patches like this, I would suggest mentioning the new
stack usage of the calling function and the new noinline function, to
make sure that the combined number isn't actually worse than the old
number.
You can get these numbers by recompiling the file with the frame
size warning set to a low value, e.g. adding -Wframe-larger-than=100
to the command line.
Acked-by: Arnd Bergmann <[email protected]>
On 2021-09-14 17:05, Harry Wentland wrote:
> [Why & How]
> With Werror enabled in the kernel we were failing the clang build since
> dml21_ModeSupportAndSystemConfigurationFull's stack frame is 1064 when
> building with clang, and exceeding the default 1024 stack frame limit.
>
> The culprit seems to be the Pipe struct, so pull the relevant block
> out into its own sub-function.
>
> Signed-off-by: Harry Wentland<[email protected]>
> Fixes: 3fe617ccafd6 ("Enable '-Werror' by default for all kernel builds")
> Cc: Nick Desaulniers<[email protected]>
> Cc: Linus Torvalds<[email protected]>
> Cc:[email protected]
> Cc: Linux Kernel Mailing List<[email protected]>
> Cc: Arnd Bergmann<[email protected]>
> Cc: Leo Li<[email protected]>
> Cc: Alex Deucher<[email protected]>
> Cc: Christian König<[email protected]>
> Cc: Xinhui Pan<[email protected]>
> Cc: Nathan Chancellor<[email protected]>
> Cc: Guenter Roeck<[email protected]>
> Cc:[email protected]
> ---
Reviewed-by: Leo Li <[email protected]>