Mesa (master): amd/addrlib: don't recompute DCC info for every ComputeDccAddrFromCoord call
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jun 10 16:12:54 UTC 2020
Module: Mesa
Branch: master
Commit: a99f4d5382c2a3053c2938f9035b8872ab2c542f
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a99f4d5382c2a3053c2938f9035b8872ab2c542f
Author: Marek Olšák <marek.olsak at amd.com>
Date: Tue Jun 9 02:40:20 2020 -0400
amd/addrlib: don't recompute DCC info for every ComputeDccAddrFromCoord call
This decreases the DCC retile map overhead from 23% to 18%.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5398>
---
src/amd/addrlib/inc/addrinterface.h | 9 +++
src/amd/addrlib/src/gfx10/gfx10addrlib.cpp | 113 ++++++++++++-----------------
src/amd/addrlib/src/gfx9/gfx9addrlib.cpp | 75 +++++++------------
src/amd/common/ac_surface.c | 9 +++
4 files changed, 91 insertions(+), 115 deletions(-)
diff --git a/src/amd/addrlib/inc/addrinterface.h b/src/amd/addrlib/inc/addrinterface.h
index 5fb3c46e489..b640dee272e 100644
--- a/src/amd/addrlib/inc/addrinterface.h
+++ b/src/amd/addrlib/inc/addrinterface.h
@@ -3360,6 +3360,15 @@ typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
UINT_32 numFrags; ///< Color surface fragment number
UINT_32 pipeXor; ///< pipe Xor setting
+ UINT_32 pitch; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::pitch
+ UINT_32 height; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::height
+ UINT_32 compressBlkWidth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkWidth
+ UINT_32 compressBlkHeight; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkHeight
+ UINT_32 compressBlkDepth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkDepth
+ UINT_32 metaBlkWidth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkWidth
+ UINT_32 metaBlkHeight; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkHeight
+ UINT_32 metaBlkDepth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkDepth
+ UINT_32 dccRamSliceSize; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::dccRamSliceSize
} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT;
/**
diff --git a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
index 49f31550c19..4033c2398d7 100644
--- a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
+++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
@@ -673,88 +673,67 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord(
}
else
{
- ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
- input.size = sizeof(input);
- input.dccKeyFlags = pIn->dccKeyFlags;
- input.colorFlags = pIn->colorFlags;
- input.swizzleMode = pIn->swizzleMode;
- input.resourceType = pIn->resourceType;
- input.bpp = pIn->bpp;
- input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
- input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
- input.numSlices = Max(pIn->numSlices, 1u);
- input.numFrags = Max(pIn->numFrags, 1u);
- input.numMipLevels = Max(pIn->numMipLevels, 1u);
+ const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
+ const UINT_32 numPipeLog2 = m_pipesLog2;
+ const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
+ UINT_32 index = m_dccBaseIndex + elemLog2;
+ const UINT_8* patIdxTable;
- ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
- output.size = sizeof(output);
-
- returnCode = ComputeDccInfo(&input, &output);
-
- if (returnCode == ADDR_OK)
+ if (m_settings.supportRbPlus)
{
- const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
- const UINT_32 numPipeLog2 = m_pipesLog2;
- const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
- UINT_32 index = m_dccBaseIndex + elemLog2;
- const UINT_8* patIdxTable;
+ patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
- if (m_settings.supportRbPlus)
+ if (pIn->dccKeyFlags.pipeAligned)
{
- patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX;
+ index += MaxNumOfBpp;
- if (pIn->dccKeyFlags.pipeAligned)
+ if (m_numPkrLog2 < 2)
{
- index += MaxNumOfBpp;
-
- if (m_numPkrLog2 < 2)
- {
- index += m_pipesLog2 * MaxNumOfBpp;
- }
- else
- {
- // 4 groups for "m_numPkrLog2 < 2" case
- index += 4 * MaxNumOfBpp;
+ index += m_pipesLog2 * MaxNumOfBpp;
+ }
+ else
+ {
+ // 4 groups for "m_numPkrLog2 < 2" case
+ index += 4 * MaxNumOfBpp;
- const UINT_32 dccPipePerPkr = 3;
+ const UINT_32 dccPipePerPkr = 3;
- index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
- (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
- }
+ index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
+ (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
}
}
+ }
+ else
+ {
+ patIdxTable = DCC_64K_R_X_PATIDX;
+
+ if (pIn->dccKeyFlags.pipeAligned)
+ {
+ index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
+ }
else
{
- patIdxTable = DCC_64K_R_X_PATIDX;
-
- if (pIn->dccKeyFlags.pipeAligned)
- {
- index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
- }
- else
- {
- index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
- }
+ index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
}
+ }
- const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) + elemLog2 - 8;
- const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
- const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
- blkSizeLog2 + 1, // +1 for nibble offset
- pIn->x,
- pIn->y,
- pIn->slice,
- 0);
- const UINT_32 xb = pIn->x / output.metaBlkWidth;
- const UINT_32 yb = pIn->y / output.metaBlkHeight;
- const UINT_32 pb = output.pitch / output.metaBlkWidth;
- const UINT_32 blkIndex = (yb * pb) + xb;
- const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
+ const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
+ const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
+ const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
+ blkSizeLog2 + 1, // +1 for nibble offset
+ pIn->x,
+ pIn->y,
+ pIn->slice,
+ 0);
+ const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
+ const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
+ const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
+ const UINT_32 blkIndex = (yb * pb) + xb;
+ const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
- pOut->addr = (static_cast<UINT_64>(output.dccRamSliceSize) * pIn->slice) +
- (blkIndex * (1 << blkSizeLog2)) +
- ((blkOffset >> 1) ^ pipeXor);
- }
+ pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
+ (blkIndex * (1 << blkSizeLog2)) +
+ ((blkOffset >> 1) ^ pipeXor);
}
return returnCode;
diff --git a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp
index ece83592fc9..cc4d5af4c4f 100644
--- a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp
+++ b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp
@@ -987,62 +987,41 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
}
else
{
- ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
- input.size = sizeof(input);
- input.dccKeyFlags = pIn->dccKeyFlags;
- input.colorFlags = pIn->colorFlags;
- input.swizzleMode = pIn->swizzleMode;
- input.resourceType = pIn->resourceType;
- input.bpp = pIn->bpp;
- input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
- input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
- input.numSlices = Max(pIn->numSlices, 1u);
- input.numFrags = Max(pIn->numFrags, 1u);
- input.numMipLevels = Max(pIn->numMipLevels, 1u);
-
- ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
- output.size = sizeof(output);
-
- returnCode = ComputeDccInfo(&input, &output);
+ UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
+ UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
+ UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth);
+ UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
+ UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth);
+ UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth);
+ UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
+ UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth);
+
+ MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
+ Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
+ metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
+ compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
- if (returnCode == ADDR_OK)
- {
- UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
- UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
- UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
- UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
- UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
- UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
- UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
- UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
-
- MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
- Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
- metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
- compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
- const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
+ UINT_32 xb = pIn->x / pIn->metaBlkWidth;
+ UINT_32 yb = pIn->y / pIn->metaBlkHeight;
+ UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
- UINT_32 xb = pIn->x / output.metaBlkWidth;
- UINT_32 yb = pIn->y / output.metaBlkHeight;
- UINT_32 zb = pIn->slice / output.metaBlkDepth;
-
- UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
- UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
- UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
+ UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth;
+ UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
+ UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
- UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
- UINT_64 address = pMetaEq->solve(coords);
+ UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex };
+ UINT_64 address = pMetaEq->solve(coords);
- pOut->addr = address >> 1;
+ pOut->addr = address >> 1;
- UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
- pIn->swizzleMode);
+ UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
+ pIn->swizzleMode);
- UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
+ UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
- pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
- }
+ pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
}
return returnCode;
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index c6180865e23..732aea871d6 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -1378,6 +1378,15 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
addrin.numSlices = 1;
addrin.numMipLevels = 1;
addrin.numFrags = 1;
+ addrin.pitch = dout.pitch;
+ addrin.height = dout.height;
+ addrin.compressBlkWidth = dout.compressBlkWidth;
+ addrin.compressBlkHeight = dout.compressBlkHeight;
+ addrin.compressBlkDepth = dout.compressBlkDepth;
+ addrin.metaBlkWidth = dout.metaBlkWidth;
+ addrin.metaBlkHeight = dout.metaBlkHeight;
+ addrin.metaBlkDepth = dout.metaBlkDepth;
+ addrin.dccRamSliceSize = dout.dccRamSliceSize;
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {};
addrout.size = sizeof(addrout);
More information about the mesa-commit
mailing list