Quelle display_mode_core.c
Sprache: C
|
|
/* SPDX-License-Identifier: MIT */
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#include "display_mode_core.h"
#include "display_mode_util.h"
#include "display_mode_lib_defines.h"
#include "dml_assert.h"
#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
#define TB_BORROWED_MAX 400
#define DML_MAX_VSTARTUP_START 1023
// ---------------------------
// Declaration Begins
// ---------------------------
static void CalculateBytePerPixelAndBlockSizes(
enum dml_source_format_class SourcePixelFormat,
enum dml_swizzle_mode SurfaceTiling,
// Output
dml_uint_t *BytePerPixelY,
dml_uint_t *BytePerPixelC,
dml_float_t *BytePerPixelDETY,
dml_float_t *BytePerPixelDETC,
dml_uint_t *BlockHeight256BytesY,
dml_uint_t *BlockHeight256BytesC,
dml_uint_t *BlockWidth256BytesY,
dml_uint_t *BlockWidth256BytesC,
dml_uint_t *MacroTileHeightY,
dml_uint_t *MacroTileHeightC,
dml_uint_t *MacroTileWidthY,
dml_uint_t *MacroTileWidthC);
static dml_float_t CalculateWriteBackDISPCLK(
enum dml_source_format_class WritebackPixelFormat,
dml_float_t PixelClock,
dml_float_t WritebackHRatio,
dml_float_t WritebackVRatio,
dml_uint_t WritebackHTaps,
dml_uint_t WritebackVTaps,
dml_uint_t WritebackSourceWidth,
dml_uint_t WritebackDestinationWidth,
dml_uint_t HTotal,
dml_uint_t WritebackLineBufferSize,
dml_float_t DISPCLKDPPCLKVCOSpeed);
static void CalculateVMRowAndSwath(
struct display_mode_lib_scratch_st *s,
struct CalculateVMRowAndSwath_params_st *p);
static void CalculateOutputLink(
dml_float_t PHYCLKPerState,
dml_float_t PHYCLKD18PerState,
dml_float_t PHYCLKD32PerState,
dml_float_t Downspreading,
dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,
enum dml_output_encoder_class Output,
enum dml_output_format_class OutputFormat,
dml_uint_t HTotal,
dml_uint_t HActive,
dml_float_t PixelClockBackEnd,
dml_float_t ForcedOutputLinkBPP,
dml_uint_t DSCInputBitPerComponent,
dml_uint_t NumberOfDSCSlices,
dml_float_t AudioSampleRate,
dml_uint_t AudioSampleLayout,
enum dml_odm_mode ODMModeNoDSC,
enum dml_odm_mode ODMModeDSC,
enum dml_dsc_enable DSCEnable,
dml_uint_t OutputLinkDPLanes,
enum dml_output_link_dp_rate OutputLinkDPRate,
// Output
dml_bool_t *RequiresDSC,
dml_bool_t *RequiresFEC,
dml_float_t *OutBpp,
enum dml_output_type_and_rate__type *OutputType,
enum dml_output_type_and_rate__rate *OutputRate,
dml_uint_t *RequiredSlots);
static void CalculateODMMode(
dml_uint_t MaximumPixelsPerLinePerDSCUnit,
dml_uint_t HActive,
enum dml_output_encoder_class Output,
enum dml_output_format_class OutputFormat,
enum dml_odm_use_policy ODMUse,
dml_float_t StateDispclk,
dml_float_t MaxDispclk,
dml_bool_t DSCEnable,
dml_uint_t TotalNumberOfActiveDPP,
dml_uint_t MaxNumDPP,
dml_float_t PixelClock,
dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
dml_float_t DISPCLKRampingMargin,
dml_float_t DISPCLKDPPCLKVCOSpeed,
dml_uint_t NumberOfDSCSlices,
// Output
dml_bool_t *TotalAvailablePipesSupport,
dml_uint_t *NumberOfDPP,
enum dml_odm_mode *ODMMode,
dml_float_t *RequiredDISPCLKPerSurface);
static dml_float_t CalculateRequiredDispclk(
enum dml_odm_mode ODMMode,
dml_float_t PixelClock,
dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
dml_float_t DISPCLKRampingMargin,
dml_float_t DISPCLKDPPCLKVCOSpeed,
dml_float_t MaxDispclkSingle);
static void CalculateSinglePipeDPPCLKAndSCLThroughput(
dml_float_t HRatio,
dml_float_t HRatioChroma,
dml_float_t VRatio,
dml_float_t VRatioChroma,
dml_float_t MaxDCHUBToPSCLThroughput,
dml_float_t MaxPSCLToLBThroughput,
dml_float_t PixelClock,
enum dml_source_format_class SourcePixelFormat,
dml_uint_t HTaps,
dml_uint_t HTapsChroma,
dml_uint_t VTaps,
dml_uint_t VTapsChroma,
// Output
dml_float_t *PSCL_THROUGHPUT,
dml_float_t *PSCL_THROUGHPUT_CHROMA,
dml_float_t *DPPCLKUsingSingleDPP);
static void CalculateDPPCLK(
dml_uint_t NumberOfActiveSurfaces,
dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
dml_float_t DISPCLKDPPCLKVCOSpeed,
dml_float_t DPPCLKUsingSingleDPP[],
dml_uint_t DPPPerSurface[],
// Output
dml_float_t *GlobalDPPCLK,
dml_float_t Dppclk[]);
static void CalculateMALLUseForStaticScreen(
dml_uint_t NumberOfActiveSurfaces,
dml_uint_t MALLAllocatedForDCNFinal,
enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
dml_uint_t SurfaceSizeInMALL[],
dml_bool_t one_row_per_frame_fits_in_buffer[],
// Output
dml_bool_t UsesMALLForStaticScreen[]);
static dml_uint_t dscceComputeDelay(
dml_uint_t bpc,
dml_float_t BPP,
dml_uint_t sliceWidth,
dml_uint_t numSlices,
enum dml_output_format_class pixelFormat,
enum dml_output_encoder_class Output);
static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat,
enum dml_output_encoder_class Output);
static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
struct CalculatePrefetchSchedule_params_st *p);
static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed);
static void CalculateDCCConfiguration(
dml_bool_t DCCEnabled,
dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
enum dml_source_format_class SourcePixelFormat,
dml_uint_t SurfaceWidthLuma,
dml_uint_t SurfaceWidthChroma,
dml_uint_t SurfaceHeightLuma,
dml_uint_t SurfaceHeightChroma,
dml_uint_t nomDETInKByte,
dml_uint_t RequestHeight256ByteLuma,
dml_uint_t RequestHeight256ByteChroma,
enum dml_swizzle_mode TilingFormat,
dml_uint_t BytePerPixelY,
dml_uint_t BytePerPixelC,
dml_float_t BytePerPixelDETY,
dml_float_t BytePerPixelDETC,
enum dml_rotation_angle SourceScan,
// Output
dml_uint_t *MaxUncompressedBlockLuma,
dml_uint_t *MaxUncompressedBlockChroma,
dml_uint_t *MaxCompressedBlockLuma,
dml_uint_t *MaxCompressedBlockChroma,
dml_uint_t *IndependentBlockLuma,
dml_uint_t *IndependentBlockChroma);
static dml_uint_t CalculatePrefetchSourceLines(
dml_float_t VRatio,
dml_uint_t VTaps,
dml_bool_t Interlace,
dml_bool_t ProgressiveToInterlaceUnitInOPP,
dml_uint_t SwathHeight,
enum dml_rotation_angle SourceScan,
dml_bool_t ViewportStationary,
dml_uint_t SwathWidth,
dml_uint_t ViewportHeight,
dml_uint_t ViewportXStart,
dml_uint_t ViewportYStart,
// Output
dml_uint_t *VInitPreFill,
dml_uint_t *MaxNumSwath);
static dml_uint_t CalculateVMAndRowBytes(
dml_bool_t ViewportStationary,
dml_bool_t DCCEnable,
dml_uint_t NumberOfDPPs,
dml_uint_t BlockHeight256Bytes,
dml_uint_t BlockWidth256Bytes,
enum dml_source_format_class SourcePixelFormat,
dml_uint_t SurfaceTiling,
dml_uint_t BytePerPixel,
enum dml_rotation_angle SourceScan,
dml_uint_t SwathWidth,
dml_uint_t ViewportHeight,
dml_uint_t ViewportXStart,
dml_uint_t ViewportYStart,
dml_bool_t GPUVMEnable,
dml_uint_t GPUVMMaxPageTableLevels,
dml_uint_t GPUVMMinPageSizeKBytes,
dml_uint_t PTEBufferSizeInRequests,
dml_uint_t Pitch,
dml_uint_t DCCMetaPitch,
dml_uint_t MacroTileWidth,
dml_uint_t MacroTileHeight,
// Output
dml_uint_t *MetaRowByte,
dml_uint_t *PixelPTEBytesPerRow,
dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
dml_uint_t *dpte_row_width_ub,
dml_uint_t *dpte_row_height,
dml_uint_t *dpte_row_height_linear,
dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
dml_uint_t *dpte_row_width_ub_one_row_per_frame,
dml_uint_t *dpte_row_height_one_row_per_frame,
dml_uint_t *MetaRequestWidth,
dml_uint_t *MetaRequestHeight,
dml_uint_t *meta_row_width,
dml_uint_t *meta_row_height,
dml_uint_t *PixelPTEReqWidth,
dml_uint_t *PixelPTEReqHeight,
dml_uint_t *PTERequestSize,
dml_uint_t *DPDE0BytesFrame,
dml_uint_t *MetaPTEBytesFrame);
static dml_float_t CalculateTWait(
dml_uint_t PrefetchMode,
enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
dml_bool_t DRRDisplay,
dml_float_t DRAMClockChangeLatency,
dml_float_t FCLKChangeLatency,
dml_float_t UrgentLatency,
dml_float_t SREnterPlusExitTime);
static void CalculatePrefetchMode(
enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
dml_uint_t *MinPrefetchMode,
dml_uint_t *MaxPrefetchMode);
static void CalculateRowBandwidth(
dml_bool_t GPUVMEnable,
enum dml_source_format_class SourcePixelFormat,
dml_float_t VRatio,
dml_float_t VRatioChroma,
dml_bool_t DCCEnable,
dml_float_t LineTime,
dml_uint_t MetaRowByteLuma,
dml_uint_t MetaRowByteChroma,
dml_uint_t meta_row_height_luma,
dml_uint_t meta_row_height_chroma,
dml_uint_t PixelPTEBytesPerRowLuma,
dml_uint_t PixelPTEBytesPerRowChroma,
dml_uint_t dpte_row_height_luma,
dml_uint_t dpte_row_height_chroma,
// Output
dml_float_t *meta_row_bw,
dml_float_t *dpte_row_bw);
static void CalculateFlipSchedule(
dml_float_t HostVMInefficiencyFactor,
dml_float_t UrgentExtraLatency,
dml_float_t UrgentLatency,
dml_uint_t GPUVMMaxPageTableLevels,
dml_bool_t HostVMEnable,
dml_uint_t HostVMMaxNonCachedPageTableLevels,
dml_bool_t GPUVMEnable,
dml_uint_t HostVMMinPageSize,
dml_float_t PDEAndMetaPTEBytesPerFrame,
dml_float_t MetaRowBytes,
dml_float_t DPTEBytesPerRow,
dml_float_t BandwidthAvailableForImmediateFlip,
dml_uint_t TotImmediateFlipBytes,
enum dml_source_format_class SourcePixelFormat,
dml_float_t LineTime,
dml_float_t VRatio,
dml_float_t VRatioChroma,
dml_float_t Tno_bw,
dml_bool_t DCCEnable,
dml_uint_t dpte_row_height,
dml_uint_t meta_row_height,
dml_uint_t dpte_row_height_chroma,
dml_uint_t meta_row_height_chroma,
dml_bool_t use_one_row_for_frame_flip,
// Output
dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
dml_float_t *final_flip_bw,
dml_bool_t *ImmediateFlipSupportedForPipe);
static dml_float_t CalculateWriteBackDelay(
enum dml_source_format_class WritebackPixelFormat,
dml_float_t WritebackHRatio,
dml_float_t WritebackVRatio,
dml_uint_t WritebackVTaps,
dml_uint_t WritebackDestinationWidth,
dml_uint_t WritebackDestinationHeight,
dml_uint_t WritebackSourceHeight,
dml_uint_t HTotal);
static void CalculateVUpdateAndDynamicMetadataParameters(
dml_uint_t MaxInterDCNTileRepeaters,
dml_float_t Dppclk,
dml_float_t DISPCLK,
dml_float_t DCFClkDeepSleep,
dml_float_t PixelClock,
dml_uint_t HTotal,
dml_uint_t VBlank,
dml_uint_t DynamicMetadataTransmittedBytes,
dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
dml_uint_t InterlaceEnable,
dml_bool_t ProgressiveToInterlaceUnitInOPP,
dml_float_t *TSetup,
dml_float_t *Tdmbf,
dml_float_t *Tdmec,
dml_float_t *Tdmsks,
dml_uint_t *VUpdateOffsetPix,
dml_uint_t *VUpdateWidthPix,
dml_uint_t *VReadyOffsetPix);
static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported);
static dml_float_t TruncToValidBPP(
dml_float_t LinkBitRate,
dml_uint_t Lanes,
dml_uint_t HTotal,
dml_uint_t HActive,
dml_float_t PixelClock,
dml_float_t DesiredBPP,
dml_bool_t DSCEnable,
enum dml_output_encoder_class Output,
enum dml_output_format_class Format,
dml_uint_t DSCInputBitPerComponent,
dml_uint_t DSCSlices,
dml_uint_t AudioRate,
dml_uint_t AudioLayout,
enum dml_odm_mode ODMModeNoDSC,
enum dml_odm_mode ODMModeDSC,
// Output
dml_uint_t *RequiredSlotsSingle);
static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
struct display_mode_lib_scratch_st *s,
struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p);
static void CalculateDCFCLKDeepSleep(
dml_uint_t NumberOfActiveSurfaces,
dml_uint_t BytePerPixelY[],
dml_uint_t BytePerPixelC[],
dml_float_t VRatio[],
dml_float_t VRatioChroma[],
dml_uint_t SwathWidthY[],
dml_uint_t SwathWidthC[],
dml_uint_t DPPPerSurface[],
dml_float_t HRatio[],
dml_float_t HRatioChroma[],
dml_float_t PixelClock[],
dml_float_t PSCL_THROUGHPUT[],
dml_float_t PSCL_THROUGHPUT_CHROMA[],
dml_float_t Dppclk[],
dml_float_t ReadBandwidthLuma[],
dml_float_t ReadBandwidthChroma[],
dml_uint_t ReturnBusWidth,
// Output
dml_float_t *DCFCLKDeepSleep);
static void CalculateUrgentBurstFactor(
enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
dml_uint_t swath_width_luma_ub,
dml_uint_t swath_width_chroma_ub,
dml_uint_t SwathHeightY,
dml_uint_t SwathHeightC,
dml_float_t LineTime,
dml_float_t UrgentLatency,
dml_float_t CursorBufferSize,
dml_uint_t CursorWidth,
dml_uint_t CursorBPP,
dml_float_t VRatio,
dml_float_t VRatioC,
dml_float_t BytePerPixelInDETY,
dml_float_t BytePerPixelInDETC,
dml_uint_t DETBufferSizeY,
dml_uint_t DETBufferSizeC,
// Output
dml_float_t *UrgentBurstFactorCursor,
dml_float_t *UrgentBurstFactorLuma,
dml_float_t *UrgentBurstFactorChroma,
dml_bool_t *NotEnoughUrgentLatencyHiding);
static dml_float_t RequiredDTBCLK(
dml_bool_t DSCEnable,
dml_float_t PixelClock,
enum dml_output_format_class OutputFormat,
dml_float_t OutputBpp,
dml_uint_t DSCSlices,
dml_uint_t HTotal,
dml_uint_t HActive,
dml_uint_t AudioRate,
dml_uint_t AudioLayoutSingle);
static void UseMinimumDCFCLK(
struct display_mode_lib_scratch_st *scratch,
struct UseMinimumDCFCLK_params_st *p);
static void CalculatePixelDeliveryTimes(
dml_uint_t NumberOfActiveSurfaces,
dml_float_t VRatio[],
dml_float_t VRatioChroma[],
dml_float_t VRatioPrefetchY[],
dml_float_t VRatioPrefetchC[],
dml_uint_t swath_width_luma_ub[],
dml_uint_t swath_width_chroma_ub[],
dml_uint_t DPPPerSurface[],
dml_float_t HRatio[],
dml_float_t HRatioChroma[],
dml_float_t PixelClock[],
dml_float_t PSCL_THROUGHPUT[],
dml_float_t PSCL_THROUGHPUT_CHROMA[],
dml_float_t Dppclk[],
dml_uint_t BytePerPixelC[],
enum dml_rotation_angle SourceScan[],
dml_uint_t NumberOfCursors[],
dml_uint_t CursorWidth[],
dml_uint_t CursorBPP[],
dml_uint_t BlockWidth256BytesY[],
dml_uint_t BlockHeight256BytesY[],
dml_uint_t BlockWidth256BytesC[],
dml_uint_t BlockHeight256BytesC[],
// Output
dml_float_t DisplayPipeLineDeliveryTimeLuma[],
dml_float_t DisplayPipeLineDeliveryTimeChroma[],
dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],
dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],
dml_float_t DisplayPipeRequestDeliveryTimeLuma[],
dml_float_t DisplayPipeRequestDeliveryTimeChroma[],
dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],
dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],
dml_float_t CursorRequestDeliveryTime[],
dml_float_t CursorRequestDeliveryTimePrefetch[]);
static void CalculateMetaAndPTETimes(
dml_bool_t use_one_row_for_frame[],
dml_uint_t NumberOfActiveSurfaces,
dml_bool_t GPUVMEnable,
dml_uint_t MetaChunkSize,
dml_uint_t MinMetaChunkSizeBytes,
dml_uint_t HTotal[],
dml_float_t VRatio[],
dml_float_t VRatioChroma[],
dml_float_t DestinationLinesToRequestRowInVBlank[],
dml_float_t DestinationLinesToRequestRowInImmediateFlip[],
dml_bool_t DCCEnable[],
dml_float_t PixelClock[],
dml_uint_t BytePerPixelY[],
dml_uint_t BytePerPixelC[],
enum dml_rotation_angle SourceScan[],
dml_uint_t dpte_row_height[],
dml_uint_t dpte_row_height_chroma[],
dml_uint_t meta_row_width[],
dml_uint_t meta_row_width_chroma[],
dml_uint_t meta_row_height[],
dml_uint_t meta_row_height_chroma[],
dml_uint_t meta_req_width[],
dml_uint_t meta_req_width_chroma[],
dml_uint_t meta_req_height[],
dml_uint_t meta_req_height_chroma[],
dml_uint_t dpte_group_bytes[],
dml_uint_t PTERequestSizeY[],
dml_uint_t PTERequestSizeC[],
dml_uint_t PixelPTEReqWidthY[],
dml_uint_t PixelPTEReqHeightY[],
dml_uint_t PixelPTEReqWidthC[],
dml_uint_t PixelPTEReqHeightC[],
dml_uint_t dpte_row_width_luma_ub[],
dml_uint_t dpte_row_width_chroma_ub[],
// Output
dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],
dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],
dml_float_t DST_Y_PER_META_ROW_NOM_L[],
dml_float_t DST_Y_PER_META_ROW_NOM_C[],
dml_float_t TimePerMetaChunkNominal[],
dml_float_t TimePerChromaMetaChunkNominal[],
dml_float_t TimePerMetaChunkVBlank[],
dml_float_t TimePerChromaMetaChunkVBlank[],
dml_float_t TimePerMetaChunkFlip[],
dml_float_t TimePerChromaMetaChunkFlip[],
dml_float_t time_per_pte_group_nom_luma[],
dml_float_t time_per_pte_group_vblank_luma[],
dml_float_t time_per_pte_group_flip_luma[],
dml_float_t time_per_pte_group_nom_chroma[],
dml_float_t time_per_pte_group_vblank_chroma[],
dml_float_t time_per_pte_group_flip_chroma[]);
static void CalculateVMGroupAndRequestTimes(
dml_uint_t NumberOfActiveSurfaces,
dml_bool_t GPUVMEnable,
dml_uint_t GPUVMMaxPageTableLevels,
dml_uint_t HTotal[],
dml_uint_t BytePerPixelC[],
dml_float_t DestinationLinesToRequestVMInVBlank[],
dml_float_t DestinationLinesToRequestVMInImmediateFlip[],
dml_bool_t DCCEnable[],
dml_float_t PixelClock[],
dml_uint_t dpte_row_width_luma_ub[],
dml_uint_t dpte_row_width_chroma_ub[],
dml_uint_t vm_group_bytes[],
dml_uint_t dpde0_bytes_per_frame_ub_l[],
dml_uint_t dpde0_bytes_per_frame_ub_c[],
dml_uint_t meta_pte_bytes_per_frame_ub_l[],
dml_uint_t meta_pte_bytes_per_frame_ub_c[],
// Output
dml_float_t TimePerVMGroupVBlank[],
dml_float_t TimePerVMGroupFlip[],
dml_float_t TimePerVMRequestVBlank[],
dml_float_t TimePerVMRequestFlip[]);
static void CalculateStutterEfficiency(
struct display_mode_lib_scratch_st *scratch,
struct CalculateStutterEfficiency_params_st *p);
static void CalculateSwathAndDETConfiguration(
struct display_mode_lib_scratch_st *scratch,
struct CalculateSwathAndDETConfiguration_params_st *p);
static void CalculateSwathWidth(
dml_bool_t ForceSingleDPP,
dml_uint_t NumberOfActiveSurfaces,
enum dml_source_format_class SourcePixelFormat[],
enum dml_rotation_angle SourceScan[],
dml_bool_t ViewportStationary[],
dml_uint_t ViewportWidth[],
dml_uint_t ViewportHeight[],
dml_uint_t ViewportXStart[],
dml_uint_t ViewportYStart[],
dml_uint_t ViewportXStartC[],
dml_uint_t ViewportYStartC[],
dml_uint_t SurfaceWidthY[],
dml_uint_t SurfaceWidthC[],
dml_uint_t SurfaceHeightY[],
dml_uint_t SurfaceHeightC[],
enum dml_odm_mode ODMMode[],
dml_uint_t BytePerPixY[],
dml_uint_t BytePerPixC[],
dml_uint_t Read256BytesBlockHeightY[],
dml_uint_t Read256BytesBlockHeightC[],
dml_uint_t Read256BytesBlockWidthY[],
dml_uint_t Read256BytesBlockWidthC[],
dml_uint_t BlendingAndTiming[],
dml_uint_t HActive[],
dml_float_t HRatio[],
dml_uint_t DPPPerSurface[],
// Output
dml_uint_t SwathWidthSingleDPPY[],
dml_uint_t SwathWidthSingleDPPC[],
dml_uint_t SwathWidthY[],
dml_uint_t SwathWidthC[],
dml_uint_t MaximumSwathHeightY[],
dml_uint_t MaximumSwathHeightC[],
dml_uint_t swath_width_luma_ub[],
dml_uint_t swath_width_chroma_ub[]);
static dml_float_t CalculateExtraLatency(
dml_uint_t RoundTripPingLatencyCycles,
dml_uint_t ReorderingBytes,
dml_float_t DCFCLK,
dml_uint_t TotalNumberOfActiveDPP,
dml_uint_t PixelChunkSizeInKByte,
dml_uint_t TotalNumberOfDCCActiveDPP,
dml_uint_t MetaChunkSize,
dml_float_t ReturnBW,
dml_bool_t GPUVMEnable,
dml_bool_t HostVMEnable,
dml_uint_t NumberOfActiveSurfaces,
dml_uint_t NumberOfDPP[],
dml_uint_t dpte_group_bytes[],
dml_float_t HostVMInefficiencyFactor,
dml_uint_t HostVMMinPageSize,
dml_uint_t HostVMMaxNonCachedPageTableLevels);
static dml_uint_t CalculateExtraLatencyBytes(
dml_uint_t ReorderingBytes,
dml_uint_t TotalNumberOfActiveDPP,
dml_uint_t PixelChunkSizeInKByte,
dml_uint_t TotalNumberOfDCCActiveDPP,
dml_uint_t MetaChunkSize,
dml_bool_t GPUVMEnable,
dml_bool_t HostVMEnable,
dml_uint_t NumberOfActiveSurfaces,
dml_uint_t NumberOfDPP[],
dml_uint_t dpte_group_bytes[],
dml_float_t HostVMInefficiencyFactor,
dml_uint_t HostVMMinPageSize,
dml_uint_t HostVMMaxNonCachedPageTableLevels);
static dml_float_t CalculateUrgentLatency(
dml_float_t UrgentLatencyPixelDataOnly,
dml_float_t UrgentLatencyPixelMixedWithVMData,
dml_float_t UrgentLatencyVMDataOnly,
dml_bool_t DoUrgentLatencyAdjustment,
dml_float_t UrgentLatencyAdjustmentFabricClockComponent,
dml_float_t UrgentLatencyAdjustmentFabricClockReference,
dml_float_t FabricClockSingle);
static dml_bool_t UnboundedRequest(
enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,
dml_uint_t TotalNumberOfActiveDPP,
dml_bool_t NoChromaOrLinear,
enum dml_output_encoder_class Output);
static void CalculateSurfaceSizeInMall(
dml_uint_t NumberOfActiveSurfaces,
dml_uint_t MALLAllocatedForDCN,
enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
dml_bool_t DCCEnable[],
dml_bool_t ViewportStationary[],
dml_uint_t ViewportXStartY[],
dml_uint_t ViewportYStartY[],
dml_uint_t ViewportXStartC[],
dml_uint_t ViewportYStartC[],
dml_uint_t ViewportWidthY[],
dml_uint_t ViewportHeightY[],
dml_uint_t BytesPerPixelY[],
dml_uint_t ViewportWidthC[],
dml_uint_t ViewportHeightC[],
dml_uint_t BytesPerPixelC[],
dml_uint_t SurfaceWidthY[],
dml_uint_t SurfaceWidthC[],
dml_uint_t SurfaceHeightY[],
dml_uint_t SurfaceHeightC[],
dml_uint_t Read256BytesBlockWidthY[],
dml_uint_t Read256BytesBlockWidthC[],
dml_uint_t Read256BytesBlockHeightY[],
dml_uint_t Read256BytesBlockHeightC[],
dml_uint_t ReadBlockWidthY[],
dml_uint_t ReadBlockWidthC[],
dml_uint_t ReadBlockHeightY[],
dml_uint_t ReadBlockHeightC[],
// Output
dml_uint_t SurfaceSizeInMALL[],
dml_bool_t *ExceededMALLSize);
static void CalculateDETBufferSize(
dml_uint_t DETSizeOverride[],
enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
dml_bool_t ForceSingleDPP,
dml_uint_t NumberOfActiveSurfaces,
dml_bool_t UnboundedRequestEnabled,
dml_uint_t nomDETInKByte,
dml_uint_t MaxTotalDETInKByte,
dml_uint_t ConfigReturnBufferSizeInKByte,
dml_uint_t MinCompressedBufferSizeInKByte,
dml_uint_t ConfigReturnBufferSegmentSizeInkByte,
dml_uint_t CompressedBufferSegmentSizeInkByteFinal,
enum dml_source_format_class SourcePixelFormat[],
dml_float_t ReadBandwidthLuma[],
dml_float_t ReadBandwidthChroma[],
dml_uint_t RotesY[],
dml_uint_t RoundedUpMaxSwathSizeBytesC[],
dml_uint_t DPPPerSurface[],
// Output
dml_uint_t DETBufferSizeInKByte[],
dml_uint_t *CompressedBufferSizeInkByte);
static void CalculateMaxDETAndMinCompressedBufferSize(
dml_uint_t ConfigReturnBufferSizeInKByte,
dml_uint_t ConfigReturnBufferSegmentSizeInKByte,
dml_uint_t ROBBufferSizeInKByte,
dml_uint_t MaxNumDPP,
dml_bool_t nomDETInKByteOverrideEnable,
dml_uint_t nomDETInKByteOverrideValue,
// Output
dml_uint_t *MaxTotalDETInKByte,
dml_uint_t *nomDETInKByte,
dml_uint_t *MinCompressedBufferSizeInKByte);
static dml_uint_t DSCDelayRequirement(
dml_bool_t DSCEnabled,
enum dml_odm_mode ODMMode,
dml_uint_t DSCInputBitPerComponent,
dml_float_t OutputBpp,
dml_uint_t HActive,
dml_uint_t HTotal,
dml_uint_t NumberOfDSCSlices,
enum dml_output_format_class OutputFormat,
enum dml_output_encoder_class Output,
dml_float_t PixelClock,
dml_float_t PixelClockBackEnd);
static dml_bool_t CalculateVActiveBandwithSupport(
dml_uint_t NumberOfActiveSurfaces,
dml_float_t ReturnBW,
dml_bool_t NotUrgentLatencyHiding[],
dml_float_t ReadBandwidthLuma[],
dml_float_t ReadBandwidthChroma[],
dml_float_t cursor_bw[],
dml_float_t meta_row_bandwidth[],
dml_float_t dpte_row_bandwidth[],
dml_uint_t NumberOfDPP[],
dml_float_t UrgentBurstFactorLuma[],
dml_float_t UrgentBurstFactorChroma[],
dml_float_t UrgentBurstFactorCursor[]);
static void CalculatePrefetchBandwithSupport(
dml_uint_t NumberOfActiveSurfaces,
dml_float_t ReturnBW,
enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
dml_bool_t NotUrgentLatencyHiding[],
dml_float_t ReadBandwidthLuma[],
dml_float_t ReadBandwidthChroma[],
dml_float_t PrefetchBandwidthLuma[],
dml_float_t PrefetchBandwidthChroma[],
dml_float_t cursor_bw[],
dml_float_t meta_row_bandwidth[],
dml_float_t dpte_row_bandwidth[],
dml_float_t cursor_bw_pre[],
dml_float_t prefetch_vmrow_bw[],
dml_uint_t NumberOfDPP[],
dml_float_t UrgentBurstFactorLuma[],
dml_float_t UrgentBurstFactorChroma[],
dml_float_t UrgentBurstFactorCursor[],
dml_float_t UrgentBurstFactorLumaPre[],
dml_float_t UrgentBurstFactorChromaPre[],
dml_float_t UrgentBurstFactorCursorPre[],
// Output
dml_float_t *PrefetchBandwidth,
dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch,
dml_float_t *FractionOfUrgentBandwidth,
dml_bool_t *PrefetchBandwidthSupport);
static dml_float_t CalculateBandwidthAvailableForImmediateFlip(
dml_uint_t NumberOfActiveSurfaces,
dml_float_t ReturnBW,
dml_float_t ReadBandwidthLuma[],
dml_float_t ReadBandwidthChroma[],
dml_float_t PrefetchBandwidthLuma[],
dml_float_t PrefetchBandwidthChroma[],
dml_float_t cursor_bw[],
dml_float_t cursor_bw_pre[],
dml_uint_t NumberOfDPP[],
dml_float_t UrgentBurstFactorLuma[],
dml_float_t UrgentBurstFactorChroma[],
dml_float_t UrgentBurstFactorCursor[],
dml_float_t UrgentBurstFactorLumaPre[],
dml_float_t UrgentBurstFactorChromaPre[],
dml_float_t UrgentBurstFactorCursorPre[]);
static void CalculateImmediateFlipBandwithSupport(
dml_uint_t NumberOfActiveSurfaces,
dml_float_t ReturnBW,
enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
enum dml_immediate_flip_requirement ImmediateFlipRequirement[],
dml_float_t final_flip_bw[],
dml_float_t ReadBandwidthLuma[],
dml_float_t ReadBandwidthChroma[],
dml_float_t PrefetchBandwidthLuma[],
dml_float_t PrefetchBandwidthChroma[],
dml_float_t cursor_bw[],
dml_float_t meta_row_bandwidth[],
dml_float_t dpte_row_bandwidth[],
dml_float_t cursor_bw_pre[],
dml_float_t prefetch_vmrow_bw[],
dml_uint_t NumberOfDPP[],
dml_float_t UrgentBurstFactorLuma[],
dml_float_t UrgentBurstFactorChroma[],
dml_float_t UrgentBurstFactorCursor[],
dml_float_t UrgentBurstFactorLumaPre[],
dml_float_t UrgentBurstFactorChromaPre[],
dml_float_t UrgentBurstFactorCursorPre[],
// Output
dml_float_t *TotalBandwidth,
dml_float_t *TotalBandwidthNotIncludingMALLPrefetch,
dml_float_t *FractionOfUrgentBandwidth,
dml_bool_t *ImmediateFlipBandwidthSupport);
// ---------------------------
// Declaration Ends
// ---------------------------
static dml_uint_t dscceComputeDelay(
dml_uint_t bpc,
dml_float_t BPP,
dml_uint_t sliceWidth,
dml_uint_t numSlices,
enum dml_output_format_class pixelFormat,
enum dml_output_encoder_class Output)
{
// valid bpc = source bits per component in the set of {8, 10, 12}
// valid bpp = increments of 1/16 of a bit
// min = 6/7/8 in N420/N422/444, respectively
// max = such that compression is 1:1
//valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
//valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
// fixed value
dml_uint_t rcModelSize = 8192;
// N422/N420 operate at 2 pixels per clock
dml_uint_t pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
Delay, pixels;
if (pixelFormat == dml_420)
pixelsPerClock = 2;
// #all other modes operate at 1 pixel per clock
else if (pixelFormat == dml_444)
pixelsPerClock = 1;
else if (pixelFormat == dml_n422)
pixelsPerClock = 2;
else
pixelsPerClock = 1;
//initial transmit delay as per PPS
initalXmitDelay = (dml_uint_t)(dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock, 1));
//compute ssm delay
if (bpc == 8)
D = 81;
else if (bpc == 10)
D = 89;
else
D = 113;
//divide by pixel per cycle to compute slice width as seen by DSC
w = sliceWidth / pixelsPerClock;
//422 mode has an additional cycle of delay
if (pixelFormat == dml_420 || pixelFormat == dml_444 || pixelFormat == dml_n422)
s = 0;
else
s = 1;
//main calculation for the dscce
ix = initalXmitDelay + 45;
wx = (w + 2) / 3;
p = 3 * wx - w;
l0 = ix / w;
a = ix + p * l0;
ax = (a + 2) / 3 + D + 6 + 1;
L = (ax + wx - 1) / wx;
if ((ix % w) == 0 && p != 0)
lstall = 1;
else
lstall = 0;
Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
pixels = Delay * 3 * pixelsPerClock;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: bpc: %u\n", __func__, bpc);
dml_print("DML::%s: BPP: %f\n", __func__, BPP);
dml_print("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
dml_print("DML::%s: numSlices: %u\n", __func__, numSlices);
dml_print("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
dml_print("DML::%s: Output: %u\n", __func__, Output);
dml_print("DML::%s: pixels: %u\n", __func__, pixels);
#endif
return pixels;
}
static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, enum dml_output_encoder_class Output)
{
dml_uint_t Delay = 0;
if (pixelFormat == dml_420) {
// sfr
Delay = Delay + 2;
// dsccif
Delay = Delay + 0;
// dscc - input deserializer
Delay = Delay + 3;
// dscc gets pixels every other cycle
Delay = Delay + 2;
// dscc - input cdc fifo
Delay = Delay + 12;
// dscc gets pixels every other cycle
Delay = Delay + 13;
// dscc - cdc uncertainty
Delay = Delay + 2;
// dscc - output cdc fifo
Delay = Delay + 7;
// dscc gets pixels every other cycle
Delay = Delay + 3;
// dscc - cdc uncertainty
Delay = Delay + 2;
// dscc - output serializer
Delay = Delay + 1;
// sft
Delay = Delay + 1;
} else if (pixelFormat == dml_n422) {
// sfr
Delay = Delay + 2;
// dsccif
Delay = Delay + 1;
// dscc - input deserializer
Delay = Delay + 5;
// dscc - input cdc fifo
Delay = Delay + 25;
// dscc - cdc uncertainty
Delay = Delay + 2;
// dscc - output cdc fifo
Delay = Delay + 10;
// dscc - cdc uncertainty
Delay = Delay + 2;
// dscc - output serializer
Delay = Delay + 1;
// sft
Delay = Delay + 1;
} else {
// sfr
Delay = Delay + 2;
// dsccif
Delay = Delay + 0;
// dscc - input deserializer
Delay = Delay + 3;
// dscc - input cdc fifo
Delay = Delay + 12;
// dscc - cdc uncertainty
Delay = Delay + 2;
// dscc - output cdc fifo
Delay = Delay + 7;
// dscc - output serializer
Delay = Delay + 1;
// dscc - cdc uncertainty
Delay = Delay + 2;
// sft
Delay = Delay + 1;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
dml_print("DML::%s: Delay = %u\n", __func__, Delay);
#endif
return Delay;
}
static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
struct CalculatePrefetchSchedule_params_st *p)
{
struct CalculatePrefetchSchedule_locals_st *s = &scratch->CalculatePrefetchSchedule_locals;
s->MyError = false;
s->DPPCycles = 0;
s->DISPCLKCycles = 0;
s->DSTTotalPixelsAfterScaler = 0.0;
s->LineTime = 0.0;
s->dst_y_prefetch_equ = 0.0;
s->prefetch_bw_oto = 0.0;
s->Tvm_oto = 0.0;
s->Tr0_oto = 0.0;
s->Tvm_oto_lines = 0.0;
s->Tr0_oto_lines = 0.0;
s->dst_y_prefetch_oto = 0.0;
s->TimeForFetchingMetaPTE = 0.0;
s->TimeForFetchingRowInVBlank = 0.0;
s->LinesToRequestPrefetchPixelData = 0.0;
s->HostVMDynamicLevelsTrips = 0;
s->trip_to_mem = 0.0;
s->Tvm_trips = 0.0;
s->Tr0_trips = 0.0;
s->Tvm_trips_rounded = 0.0;
s->Tr0_trips_rounded = 0.0;
s->max_Tsw = 0.0;
s->Lsw_oto = 0.0;
s->Tpre_rounded = 0.0;
s->prefetch_bw_equ = 0.0;
s->Tvm_equ = 0.0;
s->Tr0_equ = 0.0;
s->Tdmbf = 0.0;
s->Tdmec = 0.0;
s->Tdmsks = 0.0;
s->prefetch_sw_bytes = 0.0;
s->prefetch_bw_pr = 0.0;
s->bytes_pp = 0.0;
s->dep_bytes = 0.0;
s->min_Lsw_oto = 0.0;
s->Tsw_est1 = 0.0;
s->Tsw_est3 = 0.0;
if (p->GPUVMEnable == true && p->HostVMEnable == true) {
s->HostVMDynamicLevelsTrips = p->HostVMMaxNonCachedPageTableLevels;
} else {
s->HostVMDynamicLevelsTrips = 0;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
dml_print("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->GPUVMPageTableLevels);
dml_print("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
dml_print("DML::%s: VStartup = %u\n", __func__, p->VStartup);
dml_print("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup);
dml_print("DML::%s: HostVMEnable = %u\n", __func__, p->HostVMEnable);
dml_print("DML::%s: HostVMInefficiencyFactor= %f\n", __func__, p->HostVMInefficiencyFactor);
dml_print("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
#endif
CalculateVUpdateAndDynamicMetadataParameters(
p->MaxInterDCNTileRepeaters,
p->myPipe->Dppclk,
p->myPipe->Dispclk,
p->myPipe->DCFClkDeepSleep,
p->myPipe->PixelClock,
p->myPipe->HTotal,
p->myPipe->VBlank,
p->DynamicMetadataTransmittedBytes,
p->DynamicMetadataLinesBeforeActiveRequired,
p->myPipe->InterlaceEnable,
p->myPipe->ProgressiveToInterlaceUnitInOPP,
p->TSetup,
// Output
&s->Tdmbf,
&s->Tdmec,
&s->Tdmsks,
p->VUpdateOffsetPix,
p->VUpdateWidthPix,
p->VReadyOffsetPix);
s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
s->trip_to_mem = p->UrgentLatency;
s->Tvm_trips = p->UrgentExtraLatency + s->trip_to_mem * (p->GPUVMPageTableLevels * (s->HostVMDynamicLevelsTrips + 1) - 1);
if (p->DynamicMetadataVMEnabled == true) {
*p->Tdmdl = p->TWait + s->Tvm_trips + s->trip_to_mem;
} else {
*p->Tdmdl = p->TWait + p->UrgentExtraLatency;
}
#ifdef __DML_VBA_ALLOW_DELTA__
if (DynamicMetadataEnable == false) {
*Tdmdl = 0.0;
}
#endif
if (p->DynamicMetadataEnable == true) {
if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
*p->NotEnoughTimeForDynamicMetadata = true;
dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
} else {
*p->NotEnoughTimeForDynamicMetadata = false;
}
} else {
*p->NotEnoughTimeForDynamicMetadata = false;
}
*p->Tdmdl_vm = (p->DynamicMetadataEnable == true && p->DynamicMetadataVMEnabled == true && p->GPUVMEnable == true ? p->TWait + s->Tvm_trips : 0);
if (p->myPipe->ScalerEnabled)
s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
else
s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
s->DPPCycles = (dml_uint_t)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
s->DISPCLKCycles = (dml_uint_t)p->DISPCLKDelaySubtotal;
if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
return true;
*p->DSTXAfterScaler = (dml_uint_t) dml_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay, 1.0);
*p->DSTXAfterScaler = (dml_uint_t) dml_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
((p->myPipe->ODMMode == dml_odm_mode_split_1to2 || p->myPipe->ODMMode == dml_odm_mode_mso_1to2) ? (dml_float_t)p->myPipe->HActive / 2.0 : 0) +
((p->myPipe->ODMMode == dml_odm_mode_mso_1to4) ? (dml_float_t)p->myPipe->HActive * 3.0 / 4.0 : 0), 1.0);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
dml_print("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
dml_print("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
dml_print("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
dml_print("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
dml_print("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
dml_print("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
dml_print("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
dml_print("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
#endif
if (p->OutputFormat == dml_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
*p->DSTYAfterScaler = 1;
else
*p->DSTYAfterScaler = 0;
s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
*p->DSTYAfterScaler = (dml_uint_t)(dml_floor(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
*p->DSTXAfterScaler = (dml_uint_t)(s->DSTTotalPixelsAfterScaler - ((dml_float_t) (*p->DSTYAfterScaler * p->myPipe->HTotal)));
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
dml_print("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
#endif
s->MyError = false;
s->Tr0_trips = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
if (p->GPUVMEnable == true) {
s->Tvm_trips_rounded = dml_ceil(4.0 * s->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
if (p->GPUVMPageTableLevels >= 3) {
*p->Tno_bw = p->UrgentExtraLatency + s->trip_to_mem * (dml_float_t) ((p->GPUVMPageTableLevels - 2) * (s->HostVMDynamicLevelsTrips + 1) - 1);
} else if (p->GPUVMPageTableLevels == 1 && p->myPipe->DCCEnable != true) {
s->Tr0_trips_rounded = dml_ceil(4.0 * p->UrgentExtraLatency / s->LineTime, 1.0) / 4.0 * s->LineTime;
*p->Tno_bw = p->UrgentExtraLatency;
} else {
*p->Tno_bw = 0;
}
} else if (p->myPipe->DCCEnable == true) {
s->Tvm_trips_rounded = s->LineTime / 4.0;
s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
*p->Tno_bw = 0;
} else {
s->Tvm_trips_rounded = s->LineTime / 4.0;
s->Tr0_trips_rounded = s->LineTime / 2.0;
*p->Tno_bw = 0;
}
s->Tvm_trips_rounded = dml_max(s->Tvm_trips_rounded, s->LineTime / 4.0);
s->Tr0_trips_rounded = dml_max(s->Tr0_trips_rounded, s->LineTime / 4.0);
if (p->myPipe->SourcePixelFormat == dml_420_8 || p->myPipe->SourcePixelFormat == dml_420_10 || p->myPipe->SourcePixelFormat == dml_420_12) {
s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4;
} else {
s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
}
s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (dml_float_t)p->myPipe->DPPPerSurface;
if (p->myPipe->VRatio < 1.0)
s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr;
s->max_Tsw = (dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime);
s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
s->prefetch_bw_oto = dml_max(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw);
s->min_Lsw_oto = dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML_MAX_VRATIO_PRE_OTO__;
s->min_Lsw_oto = dml_max(s->min_Lsw_oto, 1.0);
s->Lsw_oto = dml_ceil(4.0 * dml_max(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0;
if (p->GPUVMEnable == true) {
s->Tvm_oto = dml_max3(
s->Tvm_trips,
*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
s->LineTime / 4.0);
} else
s->Tvm_oto = s->LineTime / 4.0;
if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
s->Tr0_oto = dml_max4(
s->Tr0_trips,
(p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto,
(s->LineTime - s->Tvm_oto)/2.0,
s->LineTime / 4.0);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto);
dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, s->Tr0_trips);
dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime - s->Tvm_oto);
dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, s->LineTime / 4);
#endif
} else
s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 2.0;
s->Tvm_oto_lines = dml_ceil(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
s->Tr0_oto_lines = dml_ceil(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal);
s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
dml_print("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
dml_print("DML::%s: *Tno_bw = %f\n", __func__, *p->Tno_bw);
dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, p->UrgentExtraLatency);
dml_print("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
dml_print("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
dml_print("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes);
dml_print("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
dml_print("DML::%s: Tvm_trips = %f\n", __func__, s->Tvm_trips);
dml_print("DML::%s: Tr0_trips = %f\n", __func__, s->Tr0_trips);
dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
dml_print("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
dml_print("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
dml_print("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
#endif
s->dst_y_prefetch_equ = dml_floor(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
dml_print("DML::%s: LineTime: %f\n", __func__, s->LineTime);
dml_print("DML::%s: VStartup: %u\n", __func__, p->VStartup);
dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
dml_print("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
dml_print("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
dml_print("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
s->dep_bytes = dml_max(p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor, p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor);
if (s->prefetch_sw_bytes < s->dep_bytes) {
s->prefetch_sw_bytes = 2 * s->dep_bytes;
}
*p->DestinationLinesToRequestVMInVBlank = 0;
*p->DestinationLinesToRequestRowInVBlank = 0;
*p->VRatioPrefetchY = 0;
*p->VRatioPrefetchC = 0;
*p->RequiredPrefetchPixDataBWLuma = 0;
if (s->dst_y_prefetch_equ > 1) {
if (s->Tpre_rounded - *p->Tno_bw > 0) {
s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte
+ 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor
+ s->prefetch_sw_bytes)
/ (s->Tpre_rounded - *p->Tno_bw);
s->Tsw_est1 = s->prefetch_sw_bytes / s->PrefetchBandwidth1;
} else
s->PrefetchBandwidth1 = 0;
if (p->VStartup == p->MaxVStartup && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0) {
s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) /
(s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
}
if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0)
s->PrefetchBandwidth2 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
(s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded);
else
s->PrefetchBandwidth2 = 0;
if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) {
s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
(s->Tpre_rounded - s->Tvm_trips_rounded);
s->Tsw_est3 = s->prefetch_sw_bytes / s->PrefetchBandwidth3;
}
else
s->PrefetchBandwidth3 = 0;
if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded > 0) {
s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
}
if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
s->PrefetchBandwidth4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
else
s->PrefetchBandwidth4 = 0;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Tpre_rounded: %f\n", __func__, s->Tpre_rounded);
dml_print("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, s->Tvm_trips_rounded);
dml_print("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
dml_print("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, s->PrefetchBandwidth1);
dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, s->PrefetchBandwidth2);
dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, s->PrefetchBandwidth3);
dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, s->PrefetchBandwidth4);
#endif
{
dml_bool_t Case1OK;
dml_bool_t Case2OK;
dml_bool_t Case3OK;
if (s->PrefetchBandwidth1 > 0) {
if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth1 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth1 >= s->Tr0_trips_rounded) {
Case1OK = true;
} else {
Case1OK = false;
}
} else {
Case1OK = false;
}
if (s->PrefetchBandwidth2 > 0) {
if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth2 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth2 < s->Tr0_trips_rounded) {
Case2OK = true;
} else {
Case2OK = false;
}
} else {
Case2OK = false;
}
if (s->PrefetchBandwidth3 > 0) {
if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth3 < s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth3 >= s->Tr0_trips_rounded) {
Case3OK = true;
} else {
Case3OK = false;
}
} else {
Case3OK = false;
}
if (Case1OK) {
s->prefetch_bw_equ = s->PrefetchBandwidth1;
} else if (Case2OK) {
s->prefetch_bw_equ = s->PrefetchBandwidth2;
} else if (Case3OK) {
s->prefetch_bw_equ = s->PrefetchBandwidth3;
} else {
s->prefetch_bw_equ = s->PrefetchBandwidth4;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Case1OK: %u\n", __func__, Case1OK);
dml_print("DML::%s: Case2OK: %u\n", __func__, Case2OK);
dml_print("DML::%s: Case3OK: %u\n", __func__, Case3OK);
dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
#endif
if (s->prefetch_bw_equ > 0) {
if (p->GPUVMEnable == true) {
s->Tvm_equ = dml_max3(*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, s->Tvm_trips, s->LineTime / 4);
} else {
s->Tvm_equ = s->LineTime / 4;
}
if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
s->Tr0_equ = dml_max4((p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_equ, s->Tr0_trips, (s->LineTime - s->Tvm_equ) / 2, s->LineTime / 4);
} else {
s->Tr0_equ = (s->LineTime - s->Tvm_equ) / 2;
}
} else {
s->Tvm_equ = 0;
s->Tr0_equ = 0;
dml_print("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
}
}
if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
*p->DestinationLinesForPrefetch = s->dst_y_prefetch_oto;
s->TimeForFetchingMetaPTE = s->Tvm_oto;
s->TimeForFetchingRowInVBlank = s->Tr0_oto;
*p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
*p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
} else {
*p->DestinationLinesForPrefetch = s->dst_y_prefetch_equ;
s->TimeForFetchingMetaPTE = s->Tvm_equ;
s->TimeForFetchingRowInVBlank = s->Tr0_equ;
if (p->VStartup == p->MaxVStartup && p->EnhancedPrefetchScheduleAccelerationFinal != 0) {
*p->DestinationLinesToRequestVMInVBlank = dml_floor(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
*p->DestinationLinesToRequestRowInVBlank = dml_floor(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
} else {
*p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
*p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
}
}
s->LinesToRequestPrefetchPixelData = *p->DestinationLinesForPrefetch - *p->DestinationLinesToRequestVMInVBlank - 2 * *p->DestinationLinesToRequestRowInVBlank;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *p->DestinationLinesForPrefetch);
dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
#endif
if (s->LinesToRequestPrefetchPixelData >= 1 && s->prefetch_bw_equ > 0) {
*p->VRatioPrefetchY = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
*p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY, 1.0);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
dml_print("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
dml_print("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
#endif
if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
*p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY,
(dml_float_t)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
} else {
s->MyError = true;
dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
*p->VRatioPrefetchY = 0;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
dml_print("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
#endif
}
*p->VRatioPrefetchC = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
*p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, 1.0);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
dml_print("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
dml_print("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
#endif
if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
*p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, (dml_float_t)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
} else {
s->MyError = true;
dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
*p->VRatioPrefetchC = 0;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
dml_print("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
#endif
}
*p->RequiredPrefetchPixDataBWLuma = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData
* p->myPipe->BytePerPixelY
* p->swath_width_luma_ub / s->LineTime;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixDataBWLuma);
#endif
*p->RequiredPrefetchPixDataBWChroma = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData
*p->myPipe->BytePerPixelC
*p->swath_width_chroma_ub / s->LineTime;
} else {
s->MyError = true;
dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", __func__, s->LinesToRequestPrefetchPixelData);
*p->VRatioPrefetchY = 0;
*p->VRatioPrefetchC = 0;
*p->RequiredPrefetchPixDataBWLuma = 0;
*p->RequiredPrefetchPixDataBWChroma = 0;
}
dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingMetaPTE);
dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", s->TimeForFetchingMetaPTE);
dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", s->TimeForFetchingRowInVBlank);
dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime);
dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime);
dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingMetaPTE - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
} else {
s->MyError = true;
dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
s->TimeForFetchingMetaPTE = 0;
s->TimeForFetchingRowInVBlank = 0;
*p->DestinationLinesToRequestVMInVBlank = 0;
*p->DestinationLinesToRequestRowInVBlank = 0;
s->LinesToRequestPrefetchPixelData = 0;
*p->VRatioPrefetchY = 0;
*p->VRatioPrefetchC = 0;
*p->RequiredPrefetchPixDataBWLuma = 0;
*p->RequiredPrefetchPixDataBWChroma = 0;
}
{
dml_float_t prefetch_vm_bw;
dml_float_t prefetch_row_bw;
if (p->PDEAndMetaPTEBytesFrame == 0) {
prefetch_vm_bw = 0;
} else if (*p->DestinationLinesToRequestVMInVBlank > 0) {
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
#endif
prefetch_vm_bw = p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / (*p->DestinationLinesToRequestVMInVBlank * s->LineTime);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
#endif
} else {
prefetch_vm_bw = 0;
s->MyError = true;
dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
}
if (p->MetaRowByte + p->PixelPTEBytesPerRow == 0) {
prefetch_row_bw = 0;
} else if (*p->DestinationLinesToRequestRowInVBlank > 0) {
prefetch_row_bw = (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (*p->DestinationLinesToRequestRowInVBlank * s->LineTime);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
#endif
} else {
prefetch_row_bw = 0;
s->MyError = true;
dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
}
*p->prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
}
if (s->MyError) {
s->TimeForFetchingMetaPTE = 0;
s->TimeForFetchingRowInVBlank = 0;
*p->DestinationLinesToRequestVMInVBlank = 0;
*p->DestinationLinesToRequestRowInVBlank = 0;
*p->DestinationLinesForPrefetch = 0;
s->LinesToRequestPrefetchPixelData = 0;
*p->VRatioPrefetchY = 0;
*p->VRatioPrefetchC = 0;
*p->RequiredPrefetchPixDataBWLuma = 0;
*p->RequiredPrefetchPixDataBWChroma = 0;
}
return s->MyError;
} // CalculatePrefetchSchedule
static void CalculateBytePerPixelAndBlockSizes(
enum dml_source_format_class SourcePixelFormat,
enum dml_swizzle_mode SurfaceTiling,
// Output
dml_uint_t *BytePerPixelY,
dml_uint_t *BytePerPixelC,
dml_float_t *BytePerPixelDETY,
dml_float_t *BytePerPixelDETC,
dml_uint_t *BlockHeight256BytesY,
dml_uint_t *BlockHeight256BytesC,
dml_uint_t *BlockWidth256BytesY,
dml_uint_t *BlockWidth256BytesC,
dml_uint_t *MacroTileHeightY,
dml_uint_t *MacroTileHeightC,
dml_uint_t *MacroTileWidthY,
dml_uint_t *MacroTileWidthC)
{
if (SourcePixelFormat == dml_444_64) {
*BytePerPixelDETY = 8;
*BytePerPixelDETC = 0;
*BytePerPixelY = 8;
*BytePerPixelC = 0;
} else if (SourcePixelFormat == dml_444_32 || SourcePixelFormat == dml_rgbe) {
*BytePerPixelDETY = 4;
*BytePerPixelDETC = 0;
*BytePerPixelY = 4;
*BytePerPixelC = 0;
} else if (SourcePixelFormat == dml_444_16 || SourcePixelFormat == dml_mono_16) {
*BytePerPixelDETY = 2;
*BytePerPixelDETC = 0;
*BytePerPixelY = 2;
*BytePerPixelC = 0;
} else if (SourcePixelFormat == dml_444_8 || SourcePixelFormat == dml_mono_8) {
*BytePerPixelDETY = 1;
*BytePerPixelDETC = 0;
*BytePerPixelY = 1;
*BytePerPixelC = 0;
} else if (SourcePixelFormat == dml_rgbe_alpha) {
*BytePerPixelDETY = 4;
*BytePerPixelDETC = 1;
*BytePerPixelY = 4;
*BytePerPixelC = 1;
} else if (SourcePixelFormat == dml_420_8) {
*BytePerPixelDETY = 1;
*BytePerPixelDETC = 2;
*BytePerPixelY = 1;
*BytePerPixelC = 2;
} else if (SourcePixelFormat == dml_420_12) {
*BytePerPixelDETY = 2;
*BytePerPixelDETC = 4;
*BytePerPixelY = 2;
*BytePerPixelC = 4;
} else {
*BytePerPixelDETY = (dml_float_t) (4.0 / 3);
*BytePerPixelDETC = (dml_float_t) (8.0 / 3);
*BytePerPixelY = 2;
*BytePerPixelC = 4;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
dml_print("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
dml_print("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
#endif
if ((SourcePixelFormat == dml_444_64 || SourcePixelFormat == dml_444_32
|| SourcePixelFormat == dml_444_16
|| SourcePixelFormat == dml_444_8
|| SourcePixelFormat == dml_mono_16
|| SourcePixelFormat == dml_mono_8
|| SourcePixelFormat == dml_rgbe)) {
if (SurfaceTiling == dml_sw_linear) {
*BlockHeight256BytesY = 1;
} else if (SourcePixelFormat == dml_444_64) {
*BlockHeight256BytesY = 4;
} else if (SourcePixelFormat == dml_444_8) {
*BlockHeight256BytesY = 16;
} else {
*BlockHeight256BytesY = 8;
}
*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
*BlockHeight256BytesC = 0;
*BlockWidth256BytesC = 0;
} else {
if (SurfaceTiling == dml_sw_linear) {
*BlockHeight256BytesY = 1;
*BlockHeight256BytesC = 1;
} else if (SourcePixelFormat == dml_rgbe_alpha) {
*BlockHeight256BytesY = 8;
*BlockHeight256BytesC = 16;
} else if (SourcePixelFormat == dml_420_8) {
*BlockHeight256BytesY = 16;
*BlockHeight256BytesC = 8;
} else {
*BlockHeight256BytesY = 8;
*BlockHeight256BytesC = 8;
}
*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
dml_print("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
dml_print("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
dml_print("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
#endif
if (SurfaceTiling == dml_sw_linear) {
*MacroTileHeightY = *BlockHeight256BytesY;
*MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
*MacroTileHeightC = *BlockHeight256BytesC;
if (*MacroTileHeightC == 0) {
*MacroTileWidthC = 0;
} else {
*MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
}
} else if (SurfaceTiling == dml_sw_64kb_d || SurfaceTiling == dml_sw_64kb_d_t || SurfaceTiling == dml_sw_64kb_d_x || SurfaceTiling == dml_sw_64kb_r_x) {
*MacroTileHeightY = 16 * *BlockHeight256BytesY;
*MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
*MacroTileHeightC = 16 * *BlockHeight256BytesC;
if (*MacroTileHeightC == 0) {
*MacroTileWidthC = 0;
} else {
*MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
}
} else {
*MacroTileHeightY = 32 * *BlockHeight256BytesY;
*MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
*MacroTileHeightC = 32 * *BlockHeight256BytesC;
if (*MacroTileHeightC == 0) {
*MacroTileWidthC = 0;
} else {
*MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
}
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
dml_print("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
dml_print("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
dml_print("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
#endif
} // CalculateBytePerPixelAndBlockSizes
static noinline_for_stack dml_float_t CalculateTWait(
dml_uint_t PrefetchMode,
enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
dml_bool_t DRRDisplay,
dml_float_t DRAMClockChangeLatency,
dml_float_t FCLKChangeLatency,
dml_float_t UrgentLatency,
dml_float_t SREnterPlusExitTime)
{
dml_float_t TWait = 0.0;
if (PrefetchMode == 0 &&
!(UseMALLForPStateChange == dml_use_mall_pstate_change_full_frame) && !(UseMALLForPStateChange == dml_use_mall_pstate_change_sub_viewport) &&
!(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe) && !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
} else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) {
TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
} else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) {
TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
} else {
TWait = UrgentLatency;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: PrefetchMode = %u\n", __func__, PrefetchMode);
dml_print("DML::%s: TWait = %f\n", __func__, TWait);
#endif
return TWait;
} // CalculateTWait
/// @brief Calculate the "starting point" for prefetch calculation
/// if AllowForPStateChangeOrStutterInVBlank is set as a particular requirement, then the mode evalulation
/// will only be done at the given mode. If no specific requirement (i.e. *_if_possible), then will just go from
/// try all the prefetch mode in decreasing order of "difficulty" (start from 0 which means all power saving
/// features).
static void CalculatePrefetchMode(
enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
dml_uint_t *MinPrefetchMode,
dml_uint_t *MaxPrefetchMode)
{
if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter_if_possible) {
*MinPrefetchMode = 0; // consider all pwr saving features
*MaxPrefetchMode = 3; // consider just urgent latency
} else {
if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_none) {
*MinPrefetchMode = 3;
} else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_stutter) {
*MinPrefetchMode = 2;
} else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_fclk_and_stutter) {
*MinPrefetchMode = 1;
} else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter) {
*MinPrefetchMode = 0;
} else {
dml_print("ERROR: Invalid AllowForPStateChangeOrStutterInVBlank setting! val=%u\n", AllowForPStateChangeOrStutterInVBlank);
ASSERT(0);
}
*MaxPrefetchMode = *MinPrefetchMode;
}
} // CalculatePrefetchMode
static dml_float_t CalculateWriteBackDISPCLK(
enum dml_source_format_class WritebackPixelFormat,
dml_float_t PixelClock,
dml_float_t WritebackHRatio,
dml_float_t WritebackVRatio,
dml_uint_t WritebackHTaps,
dml_uint_t WritebackVTaps,
dml_uint_t WritebackSourceWidth,
dml_uint_t WritebackDestinationWidth,
dml_uint_t HTotal,
dml_uint_t WritebackLineBufferSize,
dml_float_t DISPCLKDPPCLKVCOSpeed)
{
dml_float_t DISPCLK_H, DISPCLK_V, DISPCLK_HB;
DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / (dml_float_t) HTotal;
DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (dml_float_t) WritebackSourceWidth;
return RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
}
static dml_float_t CalculateWriteBackDelay(
enum dml_source_format_class WritebackPixelFormat,
dml_float_t WritebackHRatio,
dml_float_t WritebackVRatio,
dml_uint_t WritebackVTaps,
dml_uint_t WritebackDestinationWidth,
dml_uint_t WritebackDestinationHeight,
dml_uint_t WritebackSourceHeight,
dml_uint_t HTotal)
{
dml_float_t CalculateWriteBackDelay;
dml_float_t Line_length;
dml_float_t Output_lines_last_notclamped;
dml_float_t WritebackVInit;
WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
Line_length = dml_max((dml_float_t) WritebackDestinationWidth, dml_ceil((dml_float_t)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil(((dml_float_t)WritebackSourceHeight - (dml_float_t) WritebackVInit) / (dml_float_t)WritebackVRatio, 1.0);
if (Output_lines_last_notclamped < 0) {
CalculateWriteBackDelay = 0;
} else {
CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
}
return CalculateWriteBackDelay;
}
static void CalculateVUpdateAndDynamicMetadataParameters(
dml_uint_t MaxInterDCNTileRepeaters,
dml_float_t Dppclk,
dml_float_t Dispclk,
dml_float_t DCFClkDeepSleep,
dml_float_t PixelClock,
dml_uint_t HTotal,
dml_uint_t VBlank,
dml_uint_t DynamicMetadataTransmittedBytes,
dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
dml_uint_t InterlaceEnable,
dml_bool_t ProgressiveToInterlaceUnitInOPP,
// Output
dml_float_t *TSetup,
dml_float_t *Tdmbf,
dml_float_t *Tdmec,
dml_float_t *Tdmsks,
dml_uint_t *VUpdateOffsetPix,
dml_uint_t *VUpdateWidthPix,
dml_uint_t *VReadyOffsetPix)
{
dml_float_t TotalRepeaterDelayTime;
TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
*VUpdateWidthPix = (dml_uint_t)(dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
*VReadyOffsetPix = (dml_uint_t)(dml_ceil(dml_max(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
*VUpdateOffsetPix = (dml_uint_t)(dml_ceil(HTotal / 4.0, 1.0));
*TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
*Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
*Tdmec = HTotal / PixelClock;
if (DynamicMetadataLinesBeforeActiveRequired == 0) {
*Tdmsks = VBlank * HTotal / PixelClock / 2.0;
} else {
*Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
}
if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
*Tdmsks = *Tdmsks / 2;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
dml_print("DML::%s: VBlank = %u\n", __func__, VBlank);
dml_print("DML::%s: HTotal = %u\n", __func__, HTotal);
dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
dml_print("DML::%s: Dppclk = %f\n", __func__, Dppclk);
dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
dml_print("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
dml_print("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
dml_print("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
dml_print("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
dml_print("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
#endif
}
static void CalculateRowBandwidth(
dml_bool_t GPUVMEnable,
enum dml_source_format_class SourcePixelFormat,
dml_float_t VRatio,
dml_float_t VRatioChroma,
dml_bool_t DCCEnable,
dml_float_t LineTime,
dml_uint_t MetaRowByteLuma,
dml_uint_t MetaRowByteChroma,
dml_uint_t meta_row_height_luma,
dml_uint_t meta_row_height_chroma,
dml_uint_t PixelPTEBytesPerRowLuma,
dml_uint_t PixelPTEBytesPerRowChroma,
dml_uint_t dpte_row_height_luma,
dml_uint_t dpte_row_height_chroma,
// Output
dml_float_t *meta_row_bw,
dml_float_t *dpte_row_bw)
{
if (DCCEnable != true) {
*meta_row_bw = 0;
} else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) {
*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
+ VRatioChroma * MetaRowByteChroma
/ (meta_row_height_chroma * LineTime);
} else {
*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
}
if (GPUVMEnable != true) {
*dpte_row_bw = 0;
} else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) {
*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
+ VRatioChroma * PixelPTEBytesPerRowChroma
/ (dpte_row_height_chroma * LineTime);
} else {
*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
}
}
/// @brief Determine immediate flip schedule given bw remaining after considering the prefetch schedule
/// @param BandwidthAvailableForImmediateFlip Bandwidth available for iflip for all planes
static void CalculateFlipSchedule(
dml_float_t HostVMInefficiencyFactor,
dml_float_t UrgentExtraLatency,
dml_float_t UrgentLatency,
dml_uint_t GPUVMMaxPageTableLevels,
dml_bool_t HostVMEnable,
dml_uint_t HostVMMaxNonCachedPageTableLevels,
dml_bool_t GPUVMEnable,
dml_uint_t HostVMMinPageSize,
dml_float_t PDEAndMetaPTEBytesPerFrame,
dml_float_t MetaRowBytes,
dml_float_t DPTEBytesPerRow,
dml_float_t BandwidthAvailableForImmediateFlip,
dml_uint_t TotImmediateFlipBytes,
enum dml_source_format_class SourcePixelFormat,
dml_float_t LineTime,
dml_float_t VRatio,
dml_float_t VRatioChroma,
dml_float_t Tno_bw,
dml_bool_t DCCEnable,
dml_uint_t dpte_row_height,
dml_uint_t meta_row_height,
dml_uint_t dpte_row_height_chroma,
dml_uint_t meta_row_height_chroma,
dml_bool_t use_one_row_for_frame_flip,
// Output
dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
dml_float_t *final_flip_bw,
dml_bool_t *ImmediateFlipSupportedForPipe)
{
dml_float_t min_row_time = 0.0;
dml_uint_t HostVMDynamicLevelsTrips = 0;
dml_float_t TimeForFetchingMetaPTEImmediateFlip = 0;
dml_float_t TimeForFetchingRowInVBlankImmediateFlip = 0;
dml_float_t ImmediateFlipBW = 0; // @brief The immediate flip bandwidth for this pipe
if (GPUVMEnable == true && HostVMEnable == true) {
HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
} else {
HostVMDynamicLevelsTrips = 0;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
#endif
if (TotImmediateFlipBytes > 0) {
if (use_one_row_for_frame_flip) {
ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2.0 * DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes;
} else {
ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes;
}
if (GPUVMEnable == true) {
TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
LineTime / 4.0);
} else {
TimeForFetchingMetaPTEImmediateFlip = 0;
}
if ((GPUVMEnable == true || DCCEnable == true)) {
TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
} else {
TimeForFetchingRowInVBlankImmediateFlip = 0;
}
*DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
*DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
if (GPUVMEnable == true) {
*final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
} else if ((GPUVMEnable == true || DCCEnable == true)) {
*final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
} else {
*final_flip_bw = 0;
}
} else {
TimeForFetchingMetaPTEImmediateFlip = 0;
TimeForFetchingRowInVBlankImmediateFlip = 0;
*DestinationLinesToRequestVMInImmediateFlip = 0;
*DestinationLinesToRequestRowInImmediateFlip = 0;
*final_flip_bw = 0;
}
if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_rgbe_alpha) {
if (GPUVMEnable == true && DCCEnable != true) {
min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
} else if (GPUVMEnable != true && DCCEnable == true) {
min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
} else {
min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
}
} else {
if (GPUVMEnable == true && DCCEnable != true) {
min_row_time = dpte_row_height * LineTime / VRatio;
} else if (GPUVMEnable != true && DCCEnable == true) {
min_row_time = meta_row_height * LineTime / VRatio;
} else {
min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
}
}
if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
*ImmediateFlipSupportedForPipe = false;
} else {
*ImmediateFlipSupportedForPipe = true;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable);
dml_print("DML::%s: MetaRowBytes = %f\n", __func__, MetaRowBytes);
dml_print("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
dml_print("DML::%s: ImmediateFlipBW = %f\n", __func__, ImmediateFlipBW);
dml_print("DML::%s: PDEAndMetaPTEBytesPerFrame = %f\n", __func__, PDEAndMetaPTEBytesPerFrame);
dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
dml_print("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
dml_print("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
#endif
} // CalculateFlipSchedule
static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed)
{
if (Clock <= 0.0)
return 0.0;
else {
if (round_up)
return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
else
return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
}
}
static void CalculateDCCConfiguration(
dml_bool_t DCCEnabled,
dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
enum dml_source_format_class SourcePixelFormat,
dml_uint_t SurfaceWidthLuma,
dml_uint_t SurfaceWidthChroma,
dml_uint_t SurfaceHeightLuma,
dml_uint_t SurfaceHeightChroma,
dml_uint_t nomDETInKByte,
dml_uint_t RequestHeight256ByteLuma,
dml_uint_t RequestHeight256ByteChroma,
enum dml_swizzle_mode TilingFormat,
dml_uint_t BytePerPixelY,
dml_uint_t BytePerPixelC,
dml_float_t BytePerPixelDETY,
dml_float_t BytePerPixelDETC,
enum dml_rotation_angle SourceScan,
// Output
dml_uint_t *MaxUncompressedBlockLuma,
dml_uint_t *MaxUncompressedBlockChroma,
dml_uint_t *MaxCompressedBlockLuma,
dml_uint_t *MaxCompressedBlockChroma,
dml_uint_t *IndependentBlockLuma,
dml_uint_t *IndependentBlockChroma)
{
dml_uint_t DETBufferSizeForDCC = nomDETInKByte * 1024;
dml_uint_t yuv420;
dml_uint_t horz_div_l;
dml_uint_t horz_div_c;
dml_uint_t vert_div_l;
dml_uint_t vert_div_c;
dml_uint_t swath_buf_size;
dml_float_t detile_buf_vp_horz_limit;
dml_float_t detile_buf_vp_vert_limit;
dml_uint_t MAS_vp_horz_limit;
dml_uint_t MAS_vp_vert_limit;
dml_uint_t max_vp_horz_width;
dml_uint_t max_vp_vert_height;
dml_uint_t eff_surf_width_l;
dml_uint_t eff_surf_width_c;
dml_uint_t eff_surf_height_l;
dml_uint_t eff_surf_height_c;
dml_uint_t full_swath_bytes_horz_wc_l;
dml_uint_t full_swath_bytes_horz_wc_c;
dml_uint_t full_swath_bytes_vert_wc_l;
dml_uint_t full_swath_bytes_vert_wc_c;
dml_uint_t req128_horz_wc_l;
dml_uint_t req128_horz_wc_c;
dml_uint_t req128_vert_wc_l;
dml_uint_t req128_vert_wc_c;
dml_uint_t segment_order_horz_contiguous_luma;
dml_uint_t segment_order_horz_contiguous_chroma;
dml_uint_t segment_order_vert_contiguous_luma;
dml_uint_t segment_order_vert_contiguous_chroma;
typedef enum{
REQ_256Bytes,
REQ_128BytesNonContiguous,
REQ_128BytesContiguous,
REQ_NA
} RequestType;
RequestType RequestLuma;
RequestType RequestChroma;
yuv420 = ((SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12) ? 1 : 0);
horz_div_l = 1;
horz_div_c = 1;
vert_div_l = 1;
vert_div_c = 1;
if (BytePerPixelY == 1)
vert_div_l = 0;
if (BytePerPixelC == 1)
vert_div_c = 0;
if (BytePerPixelC == 0) {
swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
} else {
swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (dml_float_t) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
}
if (SourcePixelFormat == dml_420_10) {
detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
}
detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
MAS_vp_horz_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : 6144;
MAS_vp_vert_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
max_vp_horz_width = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_horz_limit, detile_buf_vp_horz_limit));
max_vp_vert_height = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_vert_limit, detile_buf_vp_vert_limit));
eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
if (BytePerPixelC > 0) {
full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
} else {
full_swath_bytes_horz_wc_c = 0;
full_swath_bytes_vert_wc_c = 0;
}
if (SourcePixelFormat == dml_420_10) {
full_swath_bytes_horz_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
full_swath_bytes_horz_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
full_swath_bytes_vert_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
full_swath_bytes_vert_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
}
if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
req128_horz_wc_l = 0;
req128_horz_wc_c = 0;
} else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
req128_horz_wc_l = 0;
req128_horz_wc_c = 1;
} else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
req128_horz_wc_l = 1;
req128_horz_wc_c = 0;
} else {
req128_horz_wc_l = 1;
req128_horz_wc_c = 1;
}
if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
req128_vert_wc_l = 0;
req128_vert_wc_c = 0;
} else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
req128_vert_wc_l = 0;
req128_vert_wc_c = 1;
} else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
req128_vert_wc_l = 1;
req128_vert_wc_c = 0;
} else {
req128_vert_wc_l = 1;
req128_vert_wc_c = 1;
}
if (BytePerPixelY == 2) {
segment_order_horz_contiguous_luma = 0;
segment_order_vert_contiguous_luma = 1;
} else {
segment_order_horz_contiguous_luma = 1;
segment_order_vert_contiguous_luma = 0;
}
if (BytePerPixelC == 2) {
segment_order_horz_contiguous_chroma = 0;
segment_order_vert_contiguous_chroma = 1;
} else {
segment_order_horz_contiguous_chroma = 1;
segment_order_vert_contiguous_chroma = 0;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
dml_print("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
dml_print("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
dml_print("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
dml_print("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
dml_print("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
dml_print("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
dml_print("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
#endif
if (DCCProgrammingAssumesScanDirectionUnknown == true) {
if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
RequestLuma = REQ_256Bytes;
} else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
RequestLuma = REQ_128BytesNonContiguous;
} else {
RequestLuma = REQ_128BytesContiguous;
}
if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
RequestChroma = REQ_256Bytes;
} else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
RequestChroma = REQ_128BytesNonContiguous;
} else {
RequestChroma = REQ_128BytesContiguous;
}
} else if (!dml_is_vertical_rotation(SourceScan)) {
if (req128_horz_wc_l == 0) {
RequestLuma = REQ_256Bytes;
} else if (segment_order_horz_contiguous_luma == 0) {
RequestLuma = REQ_128BytesNonContiguous;
} else {
RequestLuma = REQ_128BytesContiguous;
}
if (req128_horz_wc_c == 0) {
RequestChroma = REQ_256Bytes;
} else if (segment_order_horz_contiguous_chroma == 0) {
RequestChroma = REQ_128BytesNonContiguous;
} else {
RequestChroma = REQ_128BytesContiguous;
}
} else {
if (req128_vert_wc_l == 0) {
RequestLuma = REQ_256Bytes;
} else if (segment_order_vert_contiguous_luma == 0) {
RequestLuma = REQ_128BytesNonContiguous;
} else {
RequestLuma = REQ_128BytesContiguous;
}
if (req128_vert_wc_c == 0) {
RequestChroma = REQ_256Bytes;
} else if (segment_order_vert_contiguous_chroma == 0) {
RequestChroma = REQ_128BytesNonContiguous;
} else {
RequestChroma = REQ_128BytesContiguous;
}
}
if (RequestLuma == REQ_256Bytes) {
*MaxUncompressedBlockLuma = 256;
*MaxCompressedBlockLuma = 256;
*IndependentBlockLuma = 0;
} else if (RequestLuma == REQ_128BytesContiguous) {
*MaxUncompressedBlockLuma = 256;
*MaxCompressedBlockLuma = 128;
*IndependentBlockLuma = 128;
} else {
*MaxUncompressedBlockLuma = 256;
*MaxCompressedBlockLuma = 64;
*IndependentBlockLuma = 64;
}
if (RequestChroma == REQ_256Bytes) {
*MaxUncompressedBlockChroma = 256;
*MaxCompressedBlockChroma = 256;
*IndependentBlockChroma = 0;
} else if (RequestChroma == REQ_128BytesContiguous) {
*MaxUncompressedBlockChroma = 256;
*MaxCompressedBlockChroma = 128;
*IndependentBlockChroma = 128;
} else {
*MaxUncompressedBlockChroma = 256;
*MaxCompressedBlockChroma = 64;
*IndependentBlockChroma = 64;
}
if (DCCEnabled != true || BytePerPixelC == 0) {
*MaxUncompressedBlockChroma = 0;
*MaxCompressedBlockChroma = 0;
*IndependentBlockChroma = 0;
}
if (DCCEnabled != true) {
*MaxUncompressedBlockLuma = 0;
*MaxCompressedBlockLuma = 0;
*IndependentBlockLuma = 0;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
dml_print("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
dml_print("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
dml_print("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
dml_print("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
dml_print("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
#endif
} // CalculateDCCConfiguration
static dml_uint_t CalculatePrefetchSourceLines(
dml_float_t VRatio,
dml_uint_t VTaps,
dml_bool_t Interlace,
dml_bool_t ProgressiveToInterlaceUnitInOPP,
dml_uint_t SwathHeight,
enum dml_rotation_angle SourceScan,
dml_bool_t ViewportStationary,
dml_uint_t SwathWidth,
dml_uint_t ViewportHeight,
dml_uint_t ViewportXStart,
dml_uint_t ViewportYStart,
// Output
dml_uint_t *VInitPreFill,
dml_uint_t *MaxNumSwath)
{
dml_uint_t vp_start_rot = 0;
dml_uint_t sw0_tmp = 0;
dml_uint_t MaxPartialSwath = 0;
dml_float_t numLines = 0;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
dml_print("DML::%s: VTaps = %u\n", __func__, VTaps);
dml_print("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
dml_print("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
dml_print("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
dml_print("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
#endif
if (ProgressiveToInterlaceUnitInOPP)
*VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1) / 2.0, 1));
else
*VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1));
if (ViewportStationary) {
if (SourceScan == dml_rotation_180 || SourceScan == dml_rotation_180m) {
vp_start_rot = SwathHeight - (((dml_uint_t) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
} else if (SourceScan == dml_rotation_270 || SourceScan == dml_rotation_90m) {
vp_start_rot = ViewportXStart;
} else if (SourceScan == dml_rotation_90 || SourceScan == dml_rotation_270m) {
vp_start_rot = SwathHeight - (((dml_uint_t)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
} else {
vp_start_rot = ViewportYStart;
}
sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
if (sw0_tmp < *VInitPreFill) {
*MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - sw0_tmp) / (dml_float_t) SwathHeight, 1) + 1);
} else {
*MaxNumSwath = 1;
}
MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (vp_start_rot + *VInitPreFill - 1) % SwathHeight));
} else {
*MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - 1.0) / (dml_float_t) SwathHeight, 1) + 1);
if (*VInitPreFill > 1) {
MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill - 2) % SwathHeight));
} else {
MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill + SwathHeight - 2) % SwathHeight));
}
}
numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
dml_print("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
dml_print("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
dml_print("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
#endif
return (dml_uint_t)(numLines);
} // CalculatePrefetchSourceLines
static dml_uint_t CalculateVMAndRowBytes(
dml_bool_t ViewportStationary,
dml_bool_t DCCEnable,
dml_uint_t NumberOfDPPs,
dml_uint_t BlockHeight256Bytes,
dml_uint_t BlockWidth256Bytes,
enum dml_source_format_class SourcePixelFormat,
dml_uint_t SurfaceTiling,
dml_uint_t BytePerPixel,
enum dml_rotation_angle SourceScan,
dml_uint_t SwathWidth,
dml_uint_t ViewportHeight,
dml_uint_t ViewportXStart,
dml_uint_t ViewportYStart,
dml_bool_t GPUVMEnable,
dml_uint_t GPUVMMaxPageTableLevels,
dml_uint_t GPUVMMinPageSizeKBytes,
dml_uint_t PTEBufferSizeInRequests,
dml_uint_t Pitch,
dml_uint_t DCCMetaPitch,
dml_uint_t MacroTileWidth,
dml_uint_t MacroTileHeight,
// Output
dml_uint_t *MetaRowByte,
dml_uint_t *PixelPTEBytesPerRow, // for bandwidth calculation
dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
dml_uint_t *dpte_row_width_ub,
dml_uint_t *dpte_row_height,
dml_uint_t *dpte_row_height_linear,
dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
dml_uint_t *dpte_row_width_ub_one_row_per_frame,
dml_uint_t *dpte_row_height_one_row_per_frame,
dml_uint_t *MetaRequestWidth,
dml_uint_t *MetaRequestHeight,
dml_uint_t *meta_row_width,
dml_uint_t *meta_row_height,
dml_uint_t *PixelPTEReqWidth,
dml_uint_t *PixelPTEReqHeight,
dml_uint_t *PTERequestSize,
dml_uint_t *DPDE0BytesFrame,
dml_uint_t *MetaPTEBytesFrame)
{
dml_uint_t MPDEBytesFrame;
dml_uint_t DCCMetaSurfaceBytes;
dml_uint_t ExtraDPDEBytesFrame;
dml_uint_t PDEAndMetaPTEBytesFrame;
dml_uint_t MacroTileSizeBytes;
dml_uint_t vp_height_meta_ub;
dml_uint_t vp_height_dpte_ub;
dml_uint_t PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
*MetaRequestHeight = 8 * BlockHeight256Bytes;
*MetaRequestWidth = 8 * BlockWidth256Bytes;
if (SurfaceTiling == dml_sw_linear) {
*meta_row_height = 32;
*meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth));
} else if (!dml_is_vertical_rotation(SourceScan)) {
*meta_row_height = *MetaRequestHeight;
if (ViewportStationary && NumberOfDPPs == 1) {
*meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth));
} else {
*meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth);
}
*MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0);
} else {
*meta_row_height = *MetaRequestWidth;
if (ViewportStationary && NumberOfDPPs == 1) {
*meta_row_width = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight));
} else {
*meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight);
}
*MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0);
}
if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) {
vp_height_meta_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes));
} else if (!dml_is_vertical_rotation(SourceScan)) {
vp_height_meta_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes);
} else {
vp_height_meta_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes);
}
DCCMetaSurfaceBytes = (dml_uint_t)(DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0);
if (GPUVMEnable == true) {
*MetaPTEBytesFrame = (dml_uint_t)((dml_ceil((dml_float_t) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64);
MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
} else {
*MetaPTEBytesFrame = 0;
MPDEBytesFrame = 0;
}
if (DCCEnable != true) {
*MetaPTEBytesFrame = 0;
MPDEBytesFrame = 0;
*MetaRowByte = 0;
}
MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) {
vp_height_dpte_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + MacroTileHeight - 1, MacroTileHeight) - dml_floor(ViewportYStart, MacroTileHeight));
} else if (!dml_is_vertical_rotation(SourceScan)) {
vp_height_dpte_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight);
} else {
vp_height_dpte_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight);
}
if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
*DPDE0BytesFrame = (dml_uint_t)(64 * (dml_ceil((dml_float_t) (Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / (dml_float_t) (8 * 2097152), 1) + 1));
ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
} else {
*DPDE0BytesFrame = 0;
ExtraDPDEBytesFrame = 0;
}
PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable);
dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
dml_print("DML::%s: SwModeLinear = %u\n", __func__, SurfaceTiling == dml_sw_linear);
dml_print("DML::%s: BytePerPixel = %u\n", __func__, BytePerPixel);
dml_print("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, GPUVMMaxPageTableLevels);
dml_print("DML::%s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
dml_print("DML::%s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
dml_print("DML::%s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
dml_print("DML::%s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
dml_print("DML::%s: MetaPTEBytesFrame = %u\n", __func__, *MetaPTEBytesFrame);
dml_print("DML::%s: MPDEBytesFrame = %u\n", __func__, MPDEBytesFrame);
dml_print("DML::%s: DPDE0BytesFrame = %u\n", __func__, *DPDE0BytesFrame);
dml_print("DML::%s: ExtraDPDEBytesFrame= %u\n", __func__, ExtraDPDEBytesFrame);
dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, PDEAndMetaPTEBytesFrame);
dml_print("DML::%s: ViewportHeight = %u\n", __func__, ViewportHeight);
dml_print("DML::%s: SwathWidth = %u\n", __func__, SwathWidth);
dml_print("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
#endif
if (SurfaceTiling == dml_sw_linear) {
*PixelPTEReqHeight = 1;
*PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
*PTERequestSize = 64;
} else if (GPUVMMinPageSizeKBytes == 4) {
*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
*PixelPTEReqWidth = 16 * BlockWidth256Bytes;
*PTERequestSize = 128;
} else {
*PixelPTEReqHeight = MacroTileHeight;
*PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
*PTERequestSize = 64;
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
dml_print("DML::%s: PixelPTEReqHeight = %u\n", __func__, *PixelPTEReqHeight);
dml_print("DML::%s: PixelPTEReqWidth = %u\n", __func__, *PixelPTEReqWidth);
dml_print("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
dml_print("DML::%s: PTERequestSize = %u\n", __func__, *PTERequestSize);
dml_print("DML::%s: Pitch = %u\n", __func__, Pitch);
#endif
*dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
*dpte_row_width_ub_one_row_per_frame = (dml_uint_t)((dml_ceil(((dml_float_t)Pitch * (dml_float_t) *dpte_row_height_one_row_per_frame / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * (dml_float_t) *PixelPTEReqWidth);
*PixelPTEBytesPerRow_one_row_per_frame = (dml_uint_t)((dml_float_t) *dpte_row_width_ub_one_row_per_frame / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize);
if (SurfaceTiling == dml_sw_linear) {
*dpte_row_height = (dml_uint_t)(dml_min(128, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)));
dml_print("DML::%s: dpte_row_height term 1 = %u\n", __func__, PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
dml_print("DML::%s: dpte_row_height term 2 = %f\n", __func__, dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
dml_print("DML::%s: dpte_row_height term 3 = %f\n", __func__, dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
dml_print("DML::%s: dpte_row_height term 4 = %u\n", __func__, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height);
*dpte_row_width_ub = (dml_uint_t)(dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height - 1), (dml_float_t) *PixelPTEReqWidth) + *PixelPTEReqWidth);
*PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize);
// VBA_DELTA, VBA doesn't have programming value for pte row height linear.
*dpte_row_height_linear = 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * PixelPTEReqWidth_linear / Pitch), 1);
if (*dpte_row_height_linear > 128)
*dpte_row_height_linear = 128;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *dpte_row_width_ub);
#endif
} else if (!dml_is_vertical_rotation(SourceScan)) {
*dpte_row_height = *PixelPTEReqHeight;
if (GPUVMMinPageSizeKBytes > 64) {
*dpte_row_width_ub = (dml_uint_t)((dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth);
} else if (ViewportStationary && (NumberOfDPPs == 1)) {
*dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - dml_floor(ViewportXStart, *PixelPTEReqWidth));
} else {
*dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t)*PixelPTEReqWidth, 1) + 1.0) * *PixelPTEReqWidth);
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *dpte_row_width_ub);
#endif
ASSERT(*PixelPTEReqWidth);
if (*PixelPTEReqWidth != 0)
*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
} else {
*dpte_row_height = (dml_uint_t)(dml_min(*PixelPTEReqWidth, MacroTileWidth));
if (ViewportStationary && (NumberOfDPPs == 1)) {
*dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight));
} else {
*dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t) *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight);
}
*PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqHeight * *PTERequestSize);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *dpte_row_width_ub);
#endif
}
if (GPUVMEnable != true)
*PixelPTEBytesPerRow = 0;
*PixelPTEBytesPerRowStorage = *PixelPTEBytesPerRow;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height);
dml_print("DML::%s: dpte_row_height_linear = %u\n", __func__, *dpte_row_height_linear);
dml_print("DML::%s: dpte_row_width_ub = %u\n", __func__, *dpte_row_width_ub);
dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *PixelPTEBytesPerRow);
dml_print("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *PixelPTEBytesPerRowStorage);
dml_print("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
dml_print("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *dpte_row_height_one_row_per_frame);
dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *dpte_row_width_ub_one_row_per_frame);
dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *PixelPTEBytesPerRow_one_row_per_frame);
#endif
dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
return PDEAndMetaPTEBytesFrame;
} // CalculateVMAndRowBytes
static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported)
{
dml_uint_t num_active_planes = dml_get_num_active_planes(display_cfg);
//Progressive To Interlace Unit Effect
for (dml_uint_t k = 0; k < num_active_planes; ++k) {
display_cfg->output.PixelClockBackEnd[k] = display_cfg->timing.PixelClock[k];
if (display_cfg->timing.Interlace[k] == 1 && ptoi_supported == true) {
display_cfg->timing.PixelClock[k] = 2 * display_cfg->timing.PixelClock[k];
}
}
}
static dml_float_t TruncToValidBPP(
dml_float_t LinkBitRate,
dml_uint_t Lanes,
dml_uint_t HTotal,
dml_uint_t HActive,
dml_float_t PixelClock,
dml_float_t DesiredBPP,
dml_bool_t DSCEnable,
enum dml_output_encoder_class Output,
enum dml_output_format_class Format,
dml_uint_t DSCInputBitPerComponent,
dml_uint_t DSCSlices,
dml_uint_t AudioRate,
dml_uint_t AudioLayout,
enum dml_odm_mode ODMModeNoDSC,
enum dml_odm_mode ODMModeDSC,
// Output
dml_uint_t *RequiredSlots)
{
dml_float_t MaxLinkBPP;
dml_uint_t MinDSCBPP;
dml_float_t MaxDSCBPP;
dml_uint_t NonDSCBPP0;
dml_uint_t NonDSCBPP1;
dml_uint_t NonDSCBPP2;
if (Format == dml_420) {
NonDSCBPP0 = 12;
NonDSCBPP1 = 15;
NonDSCBPP2 = 18;
MinDSCBPP = 6;
MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
} else if (Format == dml_444) {
NonDSCBPP0 = 24;
NonDSCBPP1 = 30;
NonDSCBPP2 = 36;
MinDSCBPP = 8;
MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
} else {
if (Output == dml_hdmi) {
NonDSCBPP0 = 24;
NonDSCBPP1 = 24;
NonDSCBPP2 = 24;
} else {
NonDSCBPP0 = 16;
NonDSCBPP1 = 20;
NonDSCBPP2 = 24;
}
if (Format == dml_n422) {
MinDSCBPP = 7;
MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
} else {
MinDSCBPP = 8;
MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
}
}
if (Output == dml_dp2p0) {
MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
} else if (DSCEnable && Output == dml_dp) {
MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
} else {
MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
}
if (DSCEnable) {
if (ODMModeDSC == dml_odm_mode_combine_4to1) {
MaxLinkBPP = dml_min(MaxLinkBPP, 16);
} else if (ODMModeDSC == dml_odm_mode_combine_2to1) {
MaxLinkBPP = dml_min(MaxLinkBPP, 32);
} else if (ODMModeDSC == dml_odm_mode_split_1to2) {
MaxLinkBPP = 2 * MaxLinkBPP;
}
} else {
if (ODMModeNoDSC == dml_odm_mode_combine_4to1) {
MaxLinkBPP = dml_min(MaxLinkBPP, 16);
} else if (ODMModeNoDSC == dml_odm_mode_combine_2to1) {
MaxLinkBPP = dml_min(MaxLinkBPP, 32);
} else if (ODMModeNoDSC == dml_odm_mode_split_1to2) {
MaxLinkBPP = 2 * MaxLinkBPP;
}
}
*RequiredSlots = (dml_uint_t)(dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1));
if (DesiredBPP == 0) {
if (DSCEnable) {
if (MaxLinkBPP < MinDSCBPP) {
return __DML_DPP_INVALID__;
} else if (MaxLinkBPP >= MaxDSCBPP) {
return MaxDSCBPP;
} else {
return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
}
} else {
if (MaxLinkBPP >= NonDSCBPP2) {
return NonDSCBPP2;
} else if (MaxLinkBPP >= NonDSCBPP1) {
return NonDSCBPP1;
} else if (MaxLinkBPP >= NonDSCBPP0) {
return NonDSCBPP0;
} else {
return __DML_DPP_INVALID__;
}
}
} else {
if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
(DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
return __DML_DPP_INVALID__;
} else {
return DesiredBPP;
}
}
} // TruncToValidBPP
static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
struct display_mode_lib_scratch_st *scratch,
struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p)
{
struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; | |