[Mesa-dev] [PATCH 007/140] amdgpu/addrlib: Rewrite tile mode optmization code

Marek Olšák maraeo at gmail.com
Mon Mar 20 22:42:17 UTC 2017


From: Xavi Zhang <xavi.zhang at amd.com>

Note: remove reference to degrade4Space and use opt4Space instead.
---
 src/amd/addrlib/addrinterface.h                |  6 ++--
 src/amd/addrlib/core/addrcommon.h              |  3 +-
 src/amd/addrlib/core/addrlib.cpp               | 47 ++++++++++++++++----------
 src/amd/addrlib/core/addrlib.h                 |  2 +-
 src/amd/addrlib/r800/egbaddrlib.cpp            | 16 +++++++++
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 12 +++----
 6 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h
index d05c6ef..a50717c 100644
--- a/src/amd/addrlib/addrinterface.h
+++ b/src/amd/addrlib/addrinterface.h
@@ -239,23 +239,22 @@ typedef union _ADDR_CREATE_FLAGS
 {
     struct
     {
         UINT_32 noCubeMipSlicesPad     : 1;    ///< Turn cubemap faces padding off
         UINT_32 fillSizeFields         : 1;    ///< If clients fill size fields in all input and
                                                ///  output structure
         UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in input valid
         UINT_32 useCombinedSwizzle     : 1;    ///< Use combined tile swizzle
         UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub level
         UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice alignment
-        UINT_32 degradeBaseLevel       : 1;    ///< Degrade to 1D modes automatically for base level
         UINT_32 allowLargeThickTile    : 1;    ///< Allow 64*thickness*bytesPerPixel > rowSize
-        UINT_32 reserved               : 24;   ///< Reserved bits for future use
+        UINT_32 reserved               : 25;   ///< Reserved bits for future use
     };
 
     UINT_32 value;
 } ADDR_CREATE_FLAGS;
 
 /**
 ***************************************************************************************************
 *   ADDR_REGISTER_VALUE
 *
 *   @brief
@@ -433,29 +432,28 @@ typedef union _ADDR_SURFACE_FLAGS
         UINT_32 compressZ       : 1; ///< Flag indicates z buffer is compressed
         UINT_32 overlay         : 1; ///< Flag indicates this is an overlay surface
         UINT_32 noStencil       : 1; ///< Flag indicates this depth has no separate stencil
         UINT_32 display         : 1; ///< Flag indicates this should match display controller req.
         UINT_32 opt4Space       : 1; ///< Flag indicates this surface should be optimized for space
                                      ///  i.e. save some memory but may lose performance
         UINT_32 prt             : 1; ///< Flag for partially resident texture
         UINT_32 qbStereo        : 1; ///< Quad buffer stereo surface
         UINT_32 pow2Pad         : 1; ///< SI: Pad to pow2, must set for mipmap (include level0)
         UINT_32 interleaved     : 1; ///< Special flag for interleaved YUV surface padding
-        UINT_32 degrade4Space   : 1; ///< Degrade base level's tile mode to save memory
         UINT_32 tcCompatible    : 1; ///< Flag indicates surface needs to be shader readable
         UINT_32 dispTileType    : 1; ///< NI: force display Tiling for 128 bit shared resoruce
         UINT_32 dccCompatible   : 1; ///< VI: whether to support dcc fast clear
         UINT_32 czDispCompatible: 1; ///< SI+: CZ family has a HW bug needs special alignment.
                                      ///  This flag indicates we need to follow the alignment with
                                      ///  CZ families or other ASICs under PX configuration + CZ.
         UINT_32 nonSplit        : 1; ///< CI: depth texture should not be split
-        UINT_32 reserved        : 9; ///< Reserved bits
+        UINT_32 reserved        : 10; ///< Reserved bits
     };
 
     UINT_32 value;
 } ADDR_SURFACE_FLAGS;
 
 /**
 ***************************************************************************************************
 *   ADDR_COMPUTE_SURFACE_INFO_INPUT
 *
 *   @brief
diff --git a/src/amd/addrlib/core/addrcommon.h b/src/amd/addrlib/core/addrcommon.h
index f996c9a..88cbad0 100644
--- a/src/amd/addrlib/core/addrcommon.h
+++ b/src/amd/addrlib/core/addrcommon.h
@@ -125,23 +125,22 @@ union ADDR_CONFIG_FLAGS
         /// There flags are set up by AddrLib inside thru AddrInitGlobalParamsFromRegister
         UINT_32 optimalBankSwap        : 1;    ///< New bank tiling for RV770 only
         UINT_32 noCubeMipSlicesPad     : 1;    ///< Disables faces padding for cubemap mipmaps
         UINT_32 fillSizeFields         : 1;    ///< If clients fill size fields in all input and
                                                ///  output structure
         UINT_32 ignoreTileInfo         : 1;    ///< Don't use tile info structure
         UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in input valid
         UINT_32 useCombinedSwizzle     : 1;    ///< Use combined swizzle
         UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub level
         UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice alignment
-        UINT_32 degradeBaseLevel       : 1;    ///< Degrade to 1D modes automatically for base level
         UINT_32 allowLargeThickTile    : 1;    ///< Allow 64*thickness*bytesPerPixel > rowSize
-        UINT_32 reserved               : 22;   ///< Reserved bits for future use
+        UINT_32 reserved               : 23;   ///< Reserved bits for future use
     };
 
     UINT_32 value;
 };
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 // Platform specific debug break defines
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #if DEBUG
     #if defined(__GNUC__)
diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp
index 8cf4a24..b92568e 100644
--- a/src/amd/addrlib/core/addrlib.cpp
+++ b/src/amd/addrlib/core/addrlib.cpp
@@ -257,21 +257,20 @@ ADDR_E_RETURNCODE AddrLib::Create(
     {
         BOOL_32 initValid;
 
         // Pass createFlags to configFlags first since these flags may be overwritten
         pLib->m_configFlags.noCubeMipSlicesPad  = pCreateIn->createFlags.noCubeMipSlicesPad;
         pLib->m_configFlags.fillSizeFields      = pCreateIn->createFlags.fillSizeFields;
         pLib->m_configFlags.useTileIndex        = pCreateIn->createFlags.useTileIndex;
         pLib->m_configFlags.useCombinedSwizzle  = pCreateIn->createFlags.useCombinedSwizzle;
         pLib->m_configFlags.checkLast2DLevel    = pCreateIn->createFlags.checkLast2DLevel;
         pLib->m_configFlags.useHtileSliceAlign  = pCreateIn->createFlags.useHtileSliceAlign;
-        pLib->m_configFlags.degradeBaseLevel    = pCreateIn->createFlags.degradeBaseLevel;
         pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile;
 
         pLib->SetAddrChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision);
 
         pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels);
 
         // Global parameters initialized and remaining configFlags bits are set as well
         initValid = pLib->HwlInitGlobalParams(pCreateIn);
 
         if (initValid)
@@ -552,22 +551,22 @@ ADDR_E_RETURNCODE AddrLib::ComputeSurfaceInfo(
         {
             AddrTileMode tileMode = localIn.tileMode;
             AddrTileType tileType = localIn.tileType;
 
             // HWL layer may override tile mode if necessary
             if (HwlOverrideTileMode(&localIn, &tileMode, &tileType))
             {
                 localIn.tileMode = tileMode;
                 localIn.tileType = tileType;
             }
-            // Degrade base level if applicable
-            if (DegradeBaseLevel(&localIn, &tileMode))
+            // Optimize tile mode if possible
+            if (OptimizeTileMode(&localIn, &tileMode))
             {
                 localIn.tileMode = tileMode;
             }
         }
 
         // Call main function to compute surface info
         if (returnCode == ADDR_OK)
         {
             returnCode = HwlComputeSurfaceInfo(&localIn, pOut);
         }
@@ -3486,70 +3485,84 @@ VOID AddrLib::ComputeMipLevel(
             pIn->width = PowTwoAlign(pIn->width, 4);
             pIn->height = PowTwoAlign(pIn->height, 4);
         }
     }
 
     HwlComputeMipLevel(pIn);
 }
 
 /**
 ***************************************************************************************************
-*   AddrLib::DegradeBaseLevel
+*   AddrLib::OptimizeTileMode
 *
 *   @brief
-*       Check if base level's tile mode can be degraded
+*       Check if base level's tile mode can be optimized (degraded)
 *   @return
 *       TRUE if degraded, also returns degraded tile mode (unchanged if not degraded)
 ***************************************************************************************************
 */
-BOOL_32 AddrLib::DegradeBaseLevel(
+BOOL_32 AddrLib::OptimizeTileMode(
     const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] Input structure for surface info
     AddrTileMode*                           pTileMode   ///< [out] Degraded tile mode
     ) const
 {
-    BOOL_32 degraded = FALSE;
     AddrTileMode tileMode = pIn->tileMode;
     UINT_32 thickness = ComputeSurfaceThickness(tileMode);
 
-    if (m_configFlags.degradeBaseLevel) // This is a global setting
+    // Optimization can only be done on level 0 and samples <= 1
+    if ((pIn->flags.opt4Space == TRUE)      &&
+        (pIn->mipLevel == 0)                &&
+        (pIn->numSamples <= 1)              &&
+        (pIn->flags.display == FALSE)       &&
+        (IsPrtTileMode(tileMode) == FALSE)  &&
+        (pIn->flags.prt == FALSE))
     {
-        if (pIn->flags.degrade4Space        && // Degradation per surface
-            pIn->mipLevel == 0              &&
-            pIn->numSamples == 1            &&
-            IsMacroTiled(tileMode))
+        // Check if linear mode is optimal
+        if ((pIn->height == 1) &&
+            (IsLinear(tileMode) == FALSE) &&
+            (AddrElemLib::IsBlockCompressed(pIn->format) == FALSE) &&
+            (pIn->flags.depth == FALSE) &&
+            (pIn->flags.stencil == FALSE))
+        {
+            tileMode = ADDR_TM_LINEAR_ALIGNED;
+        }
+        else if (IsMacroTiled(tileMode))
         {
             if (HwlDegradeBaseLevel(pIn))
             {
-                *pTileMode = thickness == 1 ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
-                degraded = TRUE;
+                tileMode = (thickness == 1) ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
             }
             else if (thickness > 1)
             {
                 // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to
                 // thinner modes, we should re-evaluate whether the corresponding thinner modes
                 // need to be degraded. If so, we choose 1D thick mode instead.
                 tileMode = DegradeLargeThickTile(pIn->tileMode, pIn->bpp);
                 if (tileMode != pIn->tileMode)
                 {
                     ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pIn;
                     input.tileMode = tileMode;
                     if (HwlDegradeBaseLevel(&input))
                     {
-                        *pTileMode = ADDR_TM_1D_TILED_THICK;
-                        degraded = TRUE;
+                        tileMode = ADDR_TM_1D_TILED_THICK;
                     }
                 }
             }
         }
     }
 
-    return degraded;
+    BOOL_32 optimized = (tileMode != pIn->tileMode);
+    if (optimized)
+    {
+        *pTileMode = tileMode;
+    }
+    return optimized;
 }
 
 /**
 ***************************************************************************************************
 *   AddrLib::DegradeLargeThickTile
 *
 *   @brief
 *       Check if the thickness needs to be reduced if a tile is too large
 *   @return
 *       The degraded tile mode (unchanged if not degraded)
diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h
index 43c55ff..d693fd2 100644
--- a/src/amd/addrlib/core/addrlib.h
+++ b/src/amd/addrlib/core/addrlib.h
@@ -645,21 +645,21 @@ private:
     //
     VOID    ComputeTileDataWidthAndHeight(
         UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo,
         UINT_32* pMacroWidth, UINT_32* pMacroHeight) const;
 
     UINT_32 ComputeXmaskCoordYFromPipe(
         UINT_32 pipe, UINT_32 x) const;
 
     VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels);
 
-    BOOL_32 DegradeBaseLevel(
+    BOOL_32 OptimizeTileMode(
         const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, AddrTileMode* pTileMode) const;
 
 protected:
     AddrLibClass        m_class;        ///< Store class type (HWL type)
 
     AddrChipFamily      m_chipFamily;   ///< Chip family translated from the one in atiid.h
 
     UINT_32             m_chipRevision; ///< Revision id from xxx_id.h
 
     UINT_32             m_version;      ///< Current version
diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp b/src/amd/addrlib/r800/egbaddrlib.cpp
index abd1a79..5d80906 100644
--- a/src/amd/addrlib/r800/egbaddrlib.cpp
+++ b/src/amd/addrlib/r800/egbaddrlib.cpp
@@ -1151,20 +1151,36 @@ BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel(
                                                pIn->mipLevel,
                                                pIn->numSamples,
                                                &tileInfo,
                                                &baseAlign,
                                                &pitchAlign,
                                                &heightAlign);
 
     if (valid)
     {
         degrade = (pIn->width < pitchAlign || pIn->height < heightAlign);
+        // Check whether 2D tiling still has too much footprint
+        if (degrade == FALSE)
+        {
+            // Only check width and height as slices are aligned to thickness
+            UINT_64 unalignedSize = pIn->width * pIn->height;
+
+            UINT_32 alignedPitch = PowTwoAlign(pIn->width, pitchAlign);
+            UINT_32 alignedHeight = PowTwoAlign(pIn->height, heightAlign);
+            UINT_64 alignedSize = alignedPitch * alignedHeight;
+
+            // alignedSize > 1.5 * unalignedSize
+            if (2 * alignedSize > 3 * unalignedSize)
+            {
+                degrade = TRUE;
+            }
+        }
     }
     else
     {
         degrade = TRUE;
     }
 
     return degrade;
 }
 
 /**
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index abe2b2a..8632f06 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -117,21 +117,20 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
    if (ws->info.chip_class == SI) {
       regValue.pMacroTileConfig = NULL;
       regValue.noOfMacroEntries = 0;
    } else {
       regValue.pMacroTileConfig = ws->amdinfo.gb_macro_tile_mode;
       regValue.noOfMacroEntries = ARRAY_SIZE(ws->amdinfo.gb_macro_tile_mode);
    }
 
    createFlags.value = 0;
    createFlags.useTileIndex = 1;
-   createFlags.degradeBaseLevel = 1;
    createFlags.useHtileSliceAlign = 1;
 
    addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
    addrCreateInput.chipFamily = ws->family;
    addrCreateInput.chipRevision = ws->rev_id;
    addrCreateInput.createFlags = createFlags;
    addrCreateInput.callbacks.allocSysMem = allocSysMem;
    addrCreateInput.callbacks.freeSysMem = freeSysMem;
    addrCreateInput.callbacks.debugPrint = 0;
    addrCreateInput.regValue = regValue;
@@ -394,25 +393,24 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
    AddrSurfInfoIn.flags.depth = (flags & RADEON_SURF_ZBUFFER) != 0;
    AddrSurfInfoIn.flags.cube = tex->target == PIPE_TEXTURE_CUBE;
    AddrSurfInfoIn.flags.fmask = (flags & RADEON_SURF_FMASK) != 0;
    AddrSurfInfoIn.flags.display = (flags & RADEON_SURF_SCANOUT) != 0;
    AddrSurfInfoIn.flags.pow2Pad = tex->last_level > 0;
    AddrSurfInfoIn.flags.tcCompatible = (flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
 
    /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
     * requested, because TC-compatible HTILE requires 2D tiling.
     */
-   AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible &&
-                                        !AddrSurfInfoIn.flags.fmask &&
-                                        tex->nr_samples <= 1 &&
-                                        (flags & RADEON_SURF_OPTIMIZE_FOR_SPACE);
-   AddrSurfInfoIn.flags.opt4Space = AddrSurfInfoIn.flags.degrade4Space;
+   AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible &&
+                                    !AddrSurfInfoIn.flags.fmask &&
+                                    tex->nr_samples <= 1 &&
+                                    (flags & RADEON_SURF_OPTIMIZE_FOR_SPACE);
 
    /* DCC notes:
     * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
     *   with samples >= 4.
     * - Mipmapped array textures have low performance (discovered by a closed
     *   driver team).
     */
    AddrSurfInfoIn.flags.dccCompatible = ws->info.chip_class >= VI &&
                                         !(flags & RADEON_SURF_Z_OR_SBUFFER) &&
                                         !(flags & RADEON_SURF_DISABLE_DCC) &&
@@ -440,21 +438,21 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
       assert(!(flags & RADEON_SURF_FMASK));
 
       /* If any of these parameters are incorrect, the calculation
        * will fail. */
       AddrTileInfoIn.banks = surf->num_banks;
       AddrTileInfoIn.bankWidth = surf->bankw;
       AddrTileInfoIn.bankHeight = surf->bankh;
       AddrTileInfoIn.macroAspectRatio = surf->mtilea;
       AddrTileInfoIn.tileSplitBytes = surf->tile_split;
       AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */
-      AddrSurfInfoIn.flags.degrade4Space = 0;
+      AddrSurfInfoIn.flags.opt4Space = 0;
       AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
 
       /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
        * the tile index, because we are expected to know it if
        * we know the other parameters.
        *
        * This is something that can easily be fixed in Addrlib.
        * For now, just figure it out here.
        * Note that only 2D_TILE_THIN1 is handled here.
        */
-- 
2.7.4



More information about the mesa-dev mailing list