Mesa (staging/20.1): amd/addrlib: don't recompute DCC info for every ComputeDccAddrFromCoord call

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jun 17 21:17:57 UTC 2020


Module: Mesa
Branch: staging/20.1
Commit: 1c724dcc92a0f8e6cf1c2103cf45bf9278e12f7e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=1c724dcc92a0f8e6cf1c2103cf45bf9278e12f7e

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Tue Jun  9 02:40:20 2020 -0400

amd/addrlib: don't recompute DCC info for every ComputeDccAddrFromCoord call

This decreases the DCC retile map overhead from 23% to 18%.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
(cherry picked from commit a99f4d5382c2a3053c2938f9035b8872ab2c542f)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5422>

---

 .pick_status.json                          |   2 +-
 src/amd/addrlib/inc/addrinterface.h        |   9 +++
 src/amd/addrlib/src/gfx10/gfx10addrlib.cpp | 113 ++++++++++++-----------------
 src/amd/addrlib/src/gfx9/gfx9addrlib.cpp   |  75 +++++++------------
 src/amd/common/ac_surface.c                |   9 +++
 5 files changed, 92 insertions(+), 116 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 514cb853957..01786f5aef5 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -2407,7 +2407,7 @@
         "description": "amd/addrlib: don't recompute DCC info for every ComputeDccAddrFromCoord call",
         "nominated": false,
         "nomination_type": null,
-        "resolution": 4,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/addrlib/inc/addrinterface.h b/src/amd/addrlib/inc/addrinterface.h
index 5fb3c46e489..b640dee272e 100644
--- a/src/amd/addrlib/inc/addrinterface.h
+++ b/src/amd/addrlib/inc/addrinterface.h
@@ -3360,6 +3360,15 @@ typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
     UINT_32             numFrags;            ///< Color surface fragment number
 
     UINT_32             pipeXor;             ///< pipe Xor setting
+    UINT_32             pitch;               ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::pitch
+    UINT_32             height;              ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::height
+    UINT_32             compressBlkWidth;    ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkWidth
+    UINT_32             compressBlkHeight;   ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkHeight
+    UINT_32             compressBlkDepth;    ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkDepth
+    UINT_32             metaBlkWidth;        ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkWidth
+    UINT_32             metaBlkHeight;       ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkHeight
+    UINT_32             metaBlkDepth;        ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkDepth
+    UINT_32             dccRamSliceSize;     ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::dccRamSliceSize
 } ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT;
 
 /**
diff --git a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
index eea3deefff8..fe0baa5de0c 100644
--- a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
+++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
@@ -673,88 +673,67 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
     }
     else
     {
-        ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
-        input.size            = sizeof(input);
-        input.dccKeyFlags     = pIn->dccKeyFlags;
-        input.colorFlags      = pIn->colorFlags;
-        input.swizzleMode     = pIn->swizzleMode;
-        input.resourceType    = pIn->resourceType;
-        input.bpp             = pIn->bpp;
-        input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
-        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
-        input.numSlices       = Max(pIn->numSlices,       1u);
-        input.numFrags        = Max(pIn->numFrags,        1u);
-        input.numMipLevels    = Max(pIn->numMipLevels,    1u);
+        const UINT_32  elemLog2    = Log2(pIn->bpp >> 3);
+        const UINT_32  numPipeLog2 = m_pipesLog2;
+        const UINT_32  pipeMask    = (1 << numPipeLog2) - 1;
+        UINT_32        index       = m_dccBaseIndex + elemLog2;
+        const UINT_8*  patIdxTable;
 
-        ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
-        output.size = sizeof(output);
-
-        returnCode = ComputeDccInfo(&input, &output);
-
-        if (returnCode == ADDR_OK)
+        if (m_settings.supportRbPlus)
         {
-            const UINT_32  elemLog2    = Log2(pIn->bpp >> 3);
-            const UINT_32  numPipeLog2 = m_pipesLog2;
-            const UINT_32  pipeMask    = (1 << numPipeLog2) - 1;
-            UINT_32        index       = m_dccBaseIndex + elemLog2;
-            const UINT_8*  patIdxTable;
+            patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
 
-            if (m_settings.supportRbPlus)
+            if (pIn->dccKeyFlags.pipeAligned)
             {
-                patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
+                index += MaxNumOfBpp;
 
-                if (pIn->dccKeyFlags.pipeAligned)
+                if (m_numPkrLog2 < 2)
                 {
-                    index += MaxNumOfBpp;
-
-                    if (m_numPkrLog2 < 2)
-                    {
-                        index += m_pipesLog2 * MaxNumOfBpp;
-                    }
-                    else
-                    {
-                        // 4 groups for "m_numPkrLog2 < 2" case
-                        index += 4 * MaxNumOfBpp;
+                    index += m_pipesLog2 * MaxNumOfBpp;
+                }
+                else
+                {
+                    // 4 groups for "m_numPkrLog2 < 2" case
+                    index += 4 * MaxNumOfBpp;
 
-                        const UINT_32 dccPipePerPkr = 3;
+                    const UINT_32 dccPipePerPkr = 3;
 
-                        index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
-                                 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
-                    }
+                    index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
+                             (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
                 }
             }
+        }
+        else
+        {
+            patIdxTable = DCC_64K_R_X_PATIDX;
+
+            if (pIn->dccKeyFlags.pipeAligned)
+            {
+                index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
+            }
             else
             {
-                patIdxTable = DCC_64K_R_X_PATIDX;
-
-                if (pIn->dccKeyFlags.pipeAligned)
-                {
-                    index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
-                }
-                else
-                {
-                    index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
-                }
+                index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
             }
+        }
 
-            const UINT_32  blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) + elemLog2 - 8;
-            const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
-            const UINT_32  blkOffset   = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
-                                                                         blkSizeLog2 + 1, // +1 for nibble offset
-                                                                         pIn->x,
-                                                                         pIn->y,
-                                                                         pIn->slice,
-                                                                         0);
-            const UINT_32 xb       = pIn->x / output.metaBlkWidth;
-            const UINT_32 yb       = pIn->y / output.metaBlkHeight;
-            const UINT_32 pb       = output.pitch / output.metaBlkWidth;
-            const UINT_32 blkIndex = (yb * pb) + xb;
-            const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
+        const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
+        const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
+        const UINT_32  blkOffset   = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
+                                                                     blkSizeLog2 + 1, // +1 for nibble offset
+                                                                     pIn->x,
+                                                                     pIn->y,
+                                                                     pIn->slice,
+                                                                     0);
+        const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
+        const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
+        const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
+        const UINT_32 blkIndex = (yb * pb) + xb;
+        const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
 
-            pOut->addr = (static_cast<UINT_64>(output.dccRamSliceSize) * pIn->slice) +
-                         (blkIndex * (1 << blkSizeLog2)) +
-                         ((blkOffset >> 1) ^ pipeXor);
-        }
+        pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
+                     (blkIndex * (1 << blkSizeLog2)) +
+                     ((blkOffset >> 1) ^ pipeXor);
     }
 
     return returnCode;
diff --git a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp
index ece83592fc9..cc4d5af4c4f 100644
--- a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp
+++ b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp
@@ -987,62 +987,41 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
     }
     else
     {
-        ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
-        input.size            = sizeof(input);
-        input.dccKeyFlags     = pIn->dccKeyFlags;
-        input.colorFlags      = pIn->colorFlags;
-        input.swizzleMode     = pIn->swizzleMode;
-        input.resourceType    = pIn->resourceType;
-        input.bpp             = pIn->bpp;
-        input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
-        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
-        input.numSlices       = Max(pIn->numSlices, 1u);
-        input.numFrags        = Max(pIn->numFrags, 1u);
-        input.numMipLevels    = Max(pIn->numMipLevels, 1u);
-
-        ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
-        output.size = sizeof(output);
-
-        returnCode = ComputeDccInfo(&input, &output);
+        UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
+        UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
+        UINT_32 metaBlkWidthLog2  = Log2(pIn->metaBlkWidth);
+        UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
+        UINT_32 metaBlkDepthLog2  = Log2(pIn->metaBlkDepth);
+        UINT_32 compBlkWidthLog2  = Log2(pIn->compressBlkWidth);
+        UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
+        UINT_32 compBlkDepthLog2  = Log2(pIn->compressBlkDepth);
+
+        MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
+                                     Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
+                                     metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
+                                     compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
 
-        if (returnCode == ADDR_OK)
-        {
-            UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
-            UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
-            UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
-            UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
-            UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
-            UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
-            UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
-            UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
-
-            MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
-                                         Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
-                                         metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
-                                         compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
+        const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
 
-            const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
+        UINT_32 xb = pIn->x / pIn->metaBlkWidth;
+        UINT_32 yb = pIn->y / pIn->metaBlkHeight;
+        UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
 
-            UINT_32 xb = pIn->x / output.metaBlkWidth;
-            UINT_32 yb = pIn->y / output.metaBlkHeight;
-            UINT_32 zb = pIn->slice / output.metaBlkDepth;
-
-            UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
-            UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
-            UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
+        UINT_32 pitchInBlock     = pIn->pitch / pIn->metaBlkWidth;
+        UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
+        UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
 
-            UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
-            UINT_64 address = pMetaEq->solve(coords);
+        UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
+        UINT_64 address = pMetaEq->solve(coords);
 
-            pOut->addr = address >> 1;
+        pOut->addr = address >> 1;
 
-            UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
-                                                               pIn->swizzleMode);
+        UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
+                                                           pIn->swizzleMode);
 
-            UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
+        UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
 
-            pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
-        }
+        pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
     }
 
     return returnCode;
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 5ce1f834c52..1facd6b5c31 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -1357,6 +1357,15 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
 					addrin.numSlices        = 1;
 					addrin.numMipLevels     = 1;
 					addrin.numFrags         = 1;
+					addrin.pitch            = dout.pitch;
+					addrin.height           = dout.height;
+					addrin.compressBlkWidth = dout.compressBlkWidth;
+					addrin.compressBlkHeight = dout.compressBlkHeight;
+					addrin.compressBlkDepth = dout.compressBlkDepth;
+					addrin.metaBlkWidth     = dout.metaBlkWidth;
+					addrin.metaBlkHeight    = dout.metaBlkHeight;
+					addrin.metaBlkDepth     = dout.metaBlkDepth;
+					addrin.dccRamSliceSize  = dout.dccRamSliceSize;
 
 					ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {};
 					addrout.size = sizeof(addrout);



More information about the mesa-commit mailing list