[Mesa-dev] [PATCH 047/140] amdgpu/addrlib: add matchStencilTileCfg and tcCompatible fixes

Marek Olšák maraeo at gmail.com
Mon Mar 20 22:42:57 UTC 2017


From: Xavi Zhang <xavi.zhang at amd.com>

The usage should be client first call AddrComputeSurfaceInfo() on
depth surface with flag "matchStencilTilecfg", AddrLib will use
2DThin1 tile index for depth as much as possible and do not down grade
unless alignment requirement cannot be met.

1. If there is a matched 2DThin1 tile index for stencil which make
sure they will share same tile config parameters, then return the
stencil 2DThin1 tile index as well.
2. If using 2DThin1 tile mode cannot make sure such thing happen, and
TcCompatible flag was set, then ignore this flag then try 2DThin1 tile
mode for depth and stencil again.
3. If 2DThin1 tile mode cannot make sure depth and stencil to have
same tile config parameters, then down grade depth surface tile mode
to 1DThin1.
4. If depth surface's tile mode was 1DThin1, then return 1DThin1 tile
index for stencil.
5. If depth surface's tile mode is PRT, then return invalid tile index
to stencil since their tile config parameters will never be met.

Client driver then check the returned tile index of stencil -- if it
is not invalid tile index, then call AddrComputeSurfaceInfo() on
stencil surface with the returned stencil tile index to get full
output information. Please note, client needs to set flag
"useTileIndex" when AddrLib get created.
---
 src/amd/addrlib/addrinterface.h    |   6 +-
 src/amd/addrlib/core/addrlib1.cpp  |  19 +++--
 src/amd/addrlib/r800/ciaddrlib.cpp | 140 +++++++++++++++++++++++++++++++++----
 src/amd/addrlib/r800/ciaddrlib.h   |   7 ++
 4 files changed, 152 insertions(+), 20 deletions(-)

diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h
index c68cacf..2a61b2b 100644
--- a/src/amd/addrlib/addrinterface.h
+++ b/src/amd/addrlib/addrinterface.h
@@ -512,21 +512,23 @@ typedef union _ADDR_SURFACE_FLAGS
         UINT_32 nonSplit             : 1; ///< CI: depth texture should not be split
         UINT_32 disableLinearOpt     : 1; ///< Disable tile mode optimization to linear
         UINT_32 needEquation         : 1; ///< Make the surface tile setting equation compatible.
                                           ///  This flag indicates we need to override tile
                                           ///  mode to PRT_* tile mode to disable slice rotation,
                                           ///  which is needed by swizzle pattern equation.
         UINT_32 skipIndicesOutput    : 1; ///< Skipping indices in output.
         UINT_32 rotateDisplay        : 1; ///< Rotate micro tile type
         UINT_32 minimizeAlignment    : 1; ///< Minimize alignment
         UINT_32 preferEquation       : 1; ///< Return equation index without adjusting tile mode
-        UINT_32 reserved             : 4; ///< Reserved bits
+        UINT_32 matchStencilTileCfg  : 1; ///< Select tile index of stencil as well as depth surface
+                                          ///  to make sure they share same tile config parameters
+        UINT_32 reserved             : 3; ///< Reserved bits
     };
 
     UINT_32 value;
 } ADDR_SURFACE_FLAGS;
 
 /**
 ****************************************************************************************************
 *   ADDR_COMPUTE_SURFACE_INFO_INPUT
 *
 *   @brief
@@ -621,20 +623,22 @@ typedef struct _ADDR_COMPUTE_SURFACE_INFO_OUTPUT
     };
 
     UINT_32         equationIndex;     ///< Equation index in the equation table;
 
     UINT_32         blockWidth;        ///< Width in element inside one block(1D->Micro, 2D->Macro)
     UINT_32         blockHeight;       ///< Height in element inside one block(1D->Micro, 2D->Macro)
     UINT_32         blockSlices;       ///< Slice number inside one block(1D->Micro, 2D->Macro)
 
     /// Stereo info
     ADDR_QBSTEREOINFO*  pStereoInfo;///< Stereo information, needed when .qbStereo flag is TRUE
+
+    INT_32          stencilTileIdx; ///< stencil tile index output when matchStencilTileCfg was set
 } ADDR_COMPUTE_SURFACE_INFO_OUTPUT;
 
 /**
 ****************************************************************************************************
 *   AddrComputeSurfaceInfo
 *
 *   @brief
 *       Compute surface width/height/depth/alignments and suitable tiling mode
 ****************************************************************************************************
 */
diff --git a/src/amd/addrlib/core/addrlib1.cpp b/src/amd/addrlib/core/addrlib1.cpp
index 9cdc459..fb33145 100644
--- a/src/amd/addrlib/core/addrlib1.cpp
+++ b/src/amd/addrlib/core/addrlib1.cpp
@@ -3550,34 +3550,36 @@ BOOL_32 Lib::DegradeTo1D(
 *       Check if base level's tile mode can be optimized (degraded)
 *   @return
 *       N/A
 ****************************************************************************************************
 */
 VOID Lib::OptimizeTileMode(
     ADDR_COMPUTE_SURFACE_INFO_INPUT*  pInOut     ///< [in, out] structure for surface info
     ) const
 {
     AddrTileMode tileMode = pInOut->tileMode;
+
     BOOL_32 doOpt = (pInOut->flags.opt4Space == TRUE) ||
                     (pInOut->flags.minimizeAlignment == TRUE) ||
                     (pInOut->maxBaseAlign != 0);
 
+    BOOL_32 convertToPrt = FALSE;
+
     // Optimization can only be done on level 0 and samples <= 1
     if ((doOpt == TRUE)                     &&
         (pInOut->mipLevel == 0)             &&
         (IsPrtTileMode(tileMode) == FALSE)  &&
         (pInOut->flags.prt == FALSE))
     {
         UINT_32 width = pInOut->width;
         UINT_32 height = pInOut->height;
         UINT_32 thickness = Thickness(tileMode);
-        BOOL_32 convertToPrt = FALSE;
         BOOL_32 macroTiledOK = TRUE;
         UINT_32 macroWidthAlign = 0;
         UINT_32 macroHeightAlign = 0;
         UINT_32 macroSizeAlign = 0;
 
         if (IsMacroTiled(tileMode))
         {
             macroTiledOK = HwlGetAlignmentInfoMacroTiled(pInOut,
                                                          &macroWidthAlign,
                                                          &macroHeightAlign,
@@ -3673,30 +3675,37 @@ VOID Lib::OptimizeTileMode(
                                        ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
                         }
                         else
                         {
                             convertToPrt = TRUE;
                         }
                     }
                 }
             }
         }
+    }
 
-        if (convertToPrt)
+    if (convertToPrt)
+    {
+        if ((pInOut->flags.matchStencilTileCfg == TRUE) && (pInOut->numSamples <= 1))
         {
-            HwlSetPrtTileMode(pInOut);
+            pInOut->tileMode = ADDR_TM_1D_TILED_THIN1;
         }
-        else if (tileMode != pInOut->tileMode)
+        else
         {
-            pInOut->tileMode = tileMode;
+            HwlSetPrtTileMode(pInOut);
         }
     }
+    else if (tileMode != pInOut->tileMode)
+    {
+        pInOut->tileMode = tileMode;
+    }
 
     HwlOptimizeTileMode(pInOut);
 }
 
 /**
 ****************************************************************************************************
 *   Lib::DegradeLargeThickTile
 *
 *   @brief
 *       Check if the thickness needs to be reduced if a tile is too large
diff --git a/src/amd/addrlib/r800/ciaddrlib.cpp b/src/amd/addrlib/r800/ciaddrlib.cpp
index 2c62979..5ccc5da 100644
--- a/src/amd/addrlib/r800/ciaddrlib.cpp
+++ b/src/amd/addrlib/r800/ciaddrlib.cpp
@@ -702,30 +702,74 @@ ADDR_E_RETURNCODE CiLib::HwlComputeSurfaceInfo(
     const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
     ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
     ) const
 {
     // If tileIndex is invalid, force macroModeIndex to be invalid, too
     if (pIn->tileIndex == TileIndexInvalid)
     {
         pOut->macroModeIndex = TileIndexInvalid;
     }
 
-    // Pass tcCompatible flag from input to output; and turn off it if tile split occurs
-    pOut->tcCompatible = pIn->flags.tcCompatible;
-
-    ADDR_E_RETURNCODE retCode = SiLib::HwlComputeSurfaceInfo(pIn,pOut);
+    ADDR_E_RETURNCODE retCode = SiLib::HwlComputeSurfaceInfo(pIn, pOut);
 
     if (pOut->macroModeIndex == TileIndexNoMacroIndex)
     {
         pOut->macroModeIndex = TileIndexInvalid;
     }
 
+    if ((pIn->flags.matchStencilTileCfg == TRUE) &&
+        (pIn->flags.depth == TRUE))
+    {
+        pOut->stencilTileIdx = TileIndexInvalid;
+
+        if ((MinDepth2DThinIndex <= pOut->tileIndex) &&
+            (MaxDepth2DThinIndex >= pOut->tileIndex))
+        {
+            BOOL_32 depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut);
+
+            if ((depthStencil2DTileConfigMatch == FALSE) &&
+                (pOut->tcCompatible == TRUE))
+            {
+                pOut->macroModeIndex = TileIndexInvalid;
+
+                ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
+                localIn.tileIndex = TileIndexInvalid;
+                localIn.pTileInfo = NULL;
+                localIn.flags.tcCompatible = FALSE;
+
+                SiLib::HwlComputeSurfaceInfo(&localIn, pOut);
+
+                ADDR_ASSERT((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex));
+
+                depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut);
+            }
+
+            if ((depthStencil2DTileConfigMatch == FALSE) &&
+                (pIn->numSamples <= 1))
+            {
+                pOut->macroModeIndex = TileIndexInvalid;
+
+                ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
+                localIn.tileMode = ADDR_TM_1D_TILED_THIN1;
+                localIn.tileIndex = TileIndexInvalid;
+                localIn.pTileInfo = NULL;
+
+                retCode = SiLib::HwlComputeSurfaceInfo(&localIn, pOut);
+            }
+        }
+
+        if (pOut->tileIndex == Depth1DThinIndex)
+        {
+            pOut->stencilTileIdx = Depth1DThinIndex;
+        }
+    }
+
     return retCode;
 }
 
 /**
 ****************************************************************************************************
 *   CiLib::HwlFmaskSurfaceInfo
 *   @brief
 *       Entry of r800's ComputeFmaskInfo
 *   @return
 *       ADDR_E_RETURNCODE
@@ -1143,25 +1187,25 @@ VOID CiLib::HwlSelectTileMode(
     }
 
     pInOut->tileMode = tileMode;
     pInOut->tileType = tileType;
 
     if ((pInOut->flags.dccCompatible == FALSE) &&
         (pInOut->flags.tcCompatible == FALSE))
     {
         pInOut->flags.opt4Space = TRUE;
         pInOut->maxBaseAlign = Block64K;
-
-        // Optimize tile mode if possible
-        OptimizeTileMode(pInOut);
     }
 
+    // Optimize tile mode if possible
+    OptimizeTileMode(pInOut);
+
     HwlOverrideTileMode(pInOut);
 }
 
 /**
 ****************************************************************************************************
 *   CiLib::HwlSetPrtTileMode
 *
 *   @brief
 *       Set PRT tile mode.
 *
@@ -1249,40 +1293,48 @@ VOID CiLib::HwlSetupTileInfo(
         {
             inTileType = ADDR_NON_DISPLAYABLE;
         }
 
         if (flags.depth || flags.stencil)
         {
             inTileType = ADDR_DEPTH_SAMPLE_ORDER;
         }
     }
 
+    // tcCompatible flag is only meaningful for gfx8.
+    if (m_settings.isVolcanicIslands == FALSE)
+    {
+        flags.tcCompatible = FALSE;
+    }
+
     if (IsTileInfoAllZero(pTileInfo))
     {
         // See table entries 0-4
         if (flags.depth || flags.stencil)
         {
             // tileSize = thickness * bpp * numSamples * 8 * 8 / 8
             UINT_32 tileSize = thickness * bpp * numSamples * 8;
 
             // Turn off tc compatible if row_size is smaller than tile size (tile split occurs).
             if (m_rowSize < tileSize)
             {
                 flags.tcCompatible = FALSE;
-                pOut->tcCompatible = FALSE;
             }
 
-            if (flags.depth && (flags.nonSplit || flags.tcCompatible || flags.needEquation))
+            if (flags.nonSplit | flags.tcCompatible | flags.needEquation)
             {
                 // Texture readable depth surface should not be split
                 switch (tileSize)
                 {
+                    case 64:
+                        index = 0;
+                        break;
                     case 128:
                         index = 1;
                         break;
                     case 256:
                         index = 2;
                         break;
                     case 512:
                         index = 3;
                         break;
                     default:
@@ -1444,21 +1496,21 @@ VOID CiLib::HwlSetupTileInfo(
                     index += 1;
 
                     tileInfo.pipeConfig = m_tileTable[index].info.pipeConfig;
 
                     macroTileBytes = (bpp >> 3) * 64 * numSamples * thickness *
                                      HwlGetPipes(&tileInfo) * tileInfo.banks *
                                      tileInfo.bankWidth * tileInfo.bankHeight;
 
                     ADDR_ASSERT(macroTileBytes == PrtTileBytes);
 
-                    pOut->tcCompatible = FALSE;
+                    flags.tcCompatible = FALSE;
                     pOut->dccUnsupport = TRUE;
                 }
             }
         }
     }
     else
     {
         // A pre-filled tile info is ready
         index = pOut->tileIndex;
         macroModeIndex = pOut->macroModeIndex;
@@ -1468,21 +1520,20 @@ VOID CiLib::HwlSetupTileInfo(
 
         if (flags.depth || flags.stencil)
         {
             // tileSize = thickness * bpp * numSamples * 8 * 8 / 8
             UINT_32 tileSize = thickness * bpp * numSamples * 8;
 
             // Turn off tc compatible if row_size is smaller than tile size (tile split occurs).
             if (m_rowSize < tileSize)
             {
                 flags.tcCompatible = FALSE;
-                pOut->tcCompatible = FALSE;
             }
         }
 
         UINT_32 numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
 
         if (m_pipes != numPipes)
         {
             pOut->dccUnsupport = TRUE;
         }
     }
@@ -1503,21 +1554,21 @@ VOID CiLib::HwlSetupTileInfo(
 
         // Copy linear-aligned entry??
         *pTileInfo = m_tileTable[8].info;
     }
     else if (tileMode == ADDR_TM_LINEAR_ALIGNED)
     {
         pOut->tileIndex = 8;
         *pTileInfo = m_tileTable[8].info;
     }
 
-    if (pOut->tcCompatible)
+    if (flags.tcCompatible)
     {
         if (IsMacroTiled(tileMode))
         {
             if (inTileType != ADDR_DEPTH_SAMPLE_ORDER)
             {
                 // Turn off tcCompatible for color surface if tileSplit happens. Depth/stencil
                 // tileSplit case was handled at tileIndex selecting time.
                 INT_32 tileIndex = pOut->tileIndex;
 
                 if ((tileIndex == TileIndexInvalid) && (IsTileInfoAllZero(pTileInfo) == FALSE))
@@ -1528,31 +1579,33 @@ VOID CiLib::HwlSetupTileInfo(
                 if (tileIndex != TileIndexInvalid)
                 {
                     ADDR_ASSERT(static_cast<UINT_32>(tileIndex) < TileTableSize);
                     // Non-depth entries store a split factor
                     UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes;
                     UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness);
                     UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x);
 
                     if (m_rowSize < colorTileSplit)
                     {
-                        pOut->tcCompatible = FALSE;
+                        flags.tcCompatible = FALSE;
                     }
                 }
             }
         }
         else
         {
             // Client should not enable tc compatible for linear and 1D tile modes.
-            pOut->tcCompatible = FALSE;
+            flags.tcCompatible = FALSE;
         }
     }
+
+    pOut->tcCompatible = flags.tcCompatible;
 }
 
 /**
 ****************************************************************************************************
 *   CiLib::ReadGbTileMode
 *
 *   @brief
 *       Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG.
 ****************************************************************************************************
 */
@@ -2170,12 +2223,71 @@ ADDR_E_RETURNCODE CiLib::HwlGetMaxAlignments(
     }
 
     if (pOut != NULL)
     {
         pOut->baseAlign = maxBaseAlign;
     }
 
     return ADDR_OK;
 }
 
+/**
+****************************************************************************************************
+*   CiLib::DepthStencilTileCfgMatch
+*
+*   @brief
+*       Try to find a tile index for stencil which makes its tile config parameters matches to depth
+*   @return
+*       TRUE if such tile index for stencil can be found
+****************************************************************************************************
+*/
+BOOL_32 CiLib::DepthStencilTileCfgMatch(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    BOOL_32 depthStencil2DTileConfigMatch = FALSE;
+
+    for (INT_32 stencilTileIndex = MinDepth2DThinIndex;
+         stencilTileIndex <= MaxDepth2DThinIndex;
+         stencilTileIndex++)
+    {
+        ADDR_TILEINFO tileInfo = {0};
+        INT_32 stencilMacroIndex = HwlComputeMacroModeIndex(stencilTileIndex,
+                                                            pIn->flags,
+                                                            8,
+                                                            pIn->numSamples,
+                                                            &tileInfo);
+
+        if (stencilMacroIndex != TileIndexNoMacroIndex)
+        {
+            if ((m_macroTileTable[stencilMacroIndex].banks ==
+                 m_macroTileTable[pOut->macroModeIndex].banks) &&
+                (m_macroTileTable[stencilMacroIndex].bankWidth ==
+                 m_macroTileTable[pOut->macroModeIndex].bankWidth) &&
+                (m_macroTileTable[stencilMacroIndex].bankHeight ==
+                 m_macroTileTable[pOut->macroModeIndex].bankHeight) &&
+                (m_macroTileTable[stencilMacroIndex].macroAspectRatio ==
+                 m_macroTileTable[pOut->macroModeIndex].macroAspectRatio) &&
+                (m_macroTileTable[stencilMacroIndex].pipeConfig ==
+                 m_macroTileTable[pOut->macroModeIndex].pipeConfig))
+            {
+                if ((pOut->tcCompatible == FALSE) ||
+                    (tileInfo.tileSplitBytes >= MicroTileWidth * MicroTileHeight * pIn->numSamples))
+                {
+                    depthStencil2DTileConfigMatch = TRUE;
+                    pOut->stencilTileIdx = stencilTileIndex;
+                    break;
+                }
+            }
+        }
+        else
+        {
+            ADDR_ASSERT_ALWAYS();
+        }
+    }
+
+    return depthStencil2DTileConfigMatch;
+}
+
 } // V1
 } // Addr
diff --git a/src/amd/addrlib/r800/ciaddrlib.h b/src/amd/addrlib/r800/ciaddrlib.h
index 93b2853..48835b3 100644
--- a/src/amd/addrlib/r800/ciaddrlib.h
+++ b/src/amd/addrlib/r800/ciaddrlib.h
@@ -197,22 +197,29 @@ private:
         UINT_64 dataBaseByteAddress,
         UINT_64 metadataBaseByteAddress,
         UINT_32 metadataBitSize,
         UINT_32 elementBitSize,
         UINT_32 blockByteSize,
         UINT_32 pipeInterleaveBytes,
         UINT_32 numOfPipes,
         UINT_32 numOfBanks,
         UINT_32 numOfSamplesPerSplit) const;
 
+    BOOL_32 DepthStencilTileCfgMatch(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut) const;
+
     static const UINT_32    MacroTileTableSize = 16;
     static const UINT_32    PrtMacroModeOffset = MacroTileTableSize / 2;
+    static const INT_32     MinDepth2DThinIndex = 0;
+    static const INT_32     MaxDepth2DThinIndex = 4;
+    static const INT_32     Depth1DThinIndex = 5;
 
     ADDR_TILEINFO           m_macroTileTable[MacroTileTableSize];
     UINT_32                 m_noOfMacroEntries;
     BOOL_32                 m_allowNonDispThickModes;
 
     CIChipSettings          m_settings;
 };
 
 } // V1
 } // Addr
-- 
2.7.4



More information about the mesa-dev mailing list