[Mesa-dev] [PATCH 1/2] amd/addrlib: update to the latest version for Vega12
Marek Olšák
maraeo at gmail.com
Thu Mar 22 15:15:43 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
---
src/amd/addrlib/addrinterface.cpp | 32 ++++-
src/amd/addrlib/addrinterface.h | 62 ++++++----
src/amd/addrlib/addrtypes.h | 6 +-
src/amd/addrlib/amdgpu_asic_addr.h | 3 +
src/amd/addrlib/core/addrlib.cpp | 80 +++++++++++--
src/amd/addrlib/core/addrlib.h | 36 +++++-
src/amd/addrlib/core/addrlib1.cpp | 14 ++-
src/amd/addrlib/core/addrlib2.cpp | 10 ++
src/amd/addrlib/core/addrlib2.h | 6 -
src/amd/addrlib/gfx9/gfx9addrlib.cpp | 224 +++++++++++++++++++++++++----------
src/amd/addrlib/gfx9/gfx9addrlib.h | 21 ++--
src/amd/addrlib/r800/ciaddrlib.cpp | 40 +++++--
src/amd/addrlib/r800/ciaddrlib.h | 4 +-
src/amd/addrlib/r800/egbaddrlib.cpp | 8 +-
src/amd/addrlib/r800/siaddrlib.cpp | 35 ++++--
src/amd/addrlib/r800/siaddrlib.h | 4 +-
src/amd/common/ac_surface.c | 2 +-
17 files changed, 439 insertions(+), 148 deletions(-)
diff --git a/src/amd/addrlib/addrinterface.cpp b/src/amd/addrlib/addrinterface.cpp
index 5fdf7fc3c65..112431e2cb4 100644
--- a/src/amd/addrlib/addrinterface.cpp
+++ b/src/amd/addrlib/addrinterface.cpp
@@ -1047,38 +1047,68 @@ ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
*
* @brief
* Convert maximum alignments
*
* @return
* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments(
ADDR_HANDLE hLib, ///< address lib handle
- ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) ///< [out] output structure
+ ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure
{
Addr::Lib* pLib = Lib::GetLib(hLib);
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (pLib != NULL)
{
returnCode = pLib->GetMaxAlignments(pOut);
}
else
{
returnCode = ADDR_ERROR;
}
return returnCode;
}
+/**
+****************************************************************************************************
+* AddrGetMaxMetaAlignments
+*
+* @brief
+* Convert maximum alignments for metadata
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments(
+ ADDR_HANDLE hLib, ///< address lib handle
+ ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure
+{
+ Addr::Lib* pLib = Lib::GetLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->GetMaxMetaAlignments(pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Surface functions for Addr2
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
****************************************************************************************************
* Addr2ComputeSurfaceInfo
*
diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h
index 8124b745f21..be9e5c2b81e 100644
--- a/src/amd/addrlib/addrinterface.h
+++ b/src/amd/addrlib/addrinterface.h
@@ -521,21 +521,22 @@ typedef union _ADDR_SURFACE_FLAGS
UINT_32 needEquation : 1; ///< Make the surface tile setting equation compatible.
/// This flag indicates we need to override tile
/// mode to PRT_* tile mode to disable slice rotation,
/// which is needed by swizzle pattern equation.
UINT_32 skipIndicesOutput : 1; ///< Skipping indices in output.
UINT_32 rotateDisplay : 1; ///< Rotate micro tile type
UINT_32 minimizeAlignment : 1; ///< Minimize alignment
UINT_32 preferEquation : 1; ///< Return equation index without adjusting tile mode
UINT_32 matchStencilTileCfg : 1; ///< Select tile index of stencil as well as depth surface
/// to make sure they share same tile config parameters
- UINT_32 reserved : 2; ///< Reserved bits
+ UINT_32 disallowLargeThickDegrade : 1; ///< Disallow large thick tile degrade
+ UINT_32 reserved : 1; ///< Reserved bits
};
UINT_32 value;
} ADDR_SURFACE_FLAGS;
/**
****************************************************************************************************
* ADDR_COMPUTE_SURFACE_INFO_INPUT
*
* @brief
@@ -2266,21 +2267,21 @@ typedef struct _ADDR_COMPUTE_DCCINFO_INPUT
****************************************************************************************************
* ADDR_COMPUTE_DCCINFO_OUTPUT
*
* @brief
* Output structure of AddrComputeDccInfo
****************************************************************************************************
*/
typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT
{
UINT_32 size; ///< Size of this structure in bytes
- UINT_64 dccRamBaseAlign; ///< Base alignment of dcc key
+ UINT_32 dccRamBaseAlign; ///< Base alignment of dcc key
UINT_64 dccRamSize; ///< Size of dcc key
UINT_64 dccFastClearSize; ///< Size of dcc key portion that can be fast cleared
BOOL_32 subLvlCompressible; ///< Whether sub resource is compressiable
BOOL_32 dccRamSizeAligned; ///< Whether the dcc key size is aligned
} ADDR_COMPUTE_DCCINFO_OUTPUT;
/**
****************************************************************************************************
* AddrComputeDccInfo
*
@@ -2291,45 +2292,55 @@ typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT
*/
ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
ADDR_HANDLE hLib,
const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
ADDR_COMPUTE_DCCINFO_OUTPUT* pOut);
/**
****************************************************************************************************
-* ADDR_GET_MAX_ALIGNMENTS_OUTPUT
+* ADDR_GET_MAX_ALINGMENTS_OUTPUT
*
* @brief
* Output structure of AddrGetMaxAlignments
****************************************************************************************************
*/
-typedef struct _ADDR_GET_MAX_ALIGNMENTS_OUTPUT
+typedef struct _ADDR_GET_MAX_ALINGMENTS_OUTPUT
{
UINT_32 size; ///< Size of this structure in bytes
- UINT_64 baseAlign; ///< Maximum base alignment in bytes
-} ADDR_GET_MAX_ALIGNMENTS_OUTPUT;
+ UINT_32 baseAlign; ///< Maximum base alignment in bytes
+} ADDR_GET_MAX_ALINGMENTS_OUTPUT;
/**
****************************************************************************************************
* AddrGetMaxAlignments
*
* @brief
* Gets maximnum alignments
****************************************************************************************************
*/
ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments(
ADDR_HANDLE hLib,
- ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut);
-
+ ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut);
+/**
+****************************************************************************************************
+* AddrGetMaxMetaAlignments
+*
+* @brief
+* Gets maximnum alignments for metadata
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments(
+ ADDR_HANDLE hLib,
+ ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut);
/**
****************************************************************************************************
* Address library interface version 2
* available from Gfx9 hardware
****************************************************************************************************
* Addr2ComputeSurfaceInfo()
* Addr2ComputeSurfaceAddrFromCoord()
* Addr2ComputeSurfaceCoordFromAddr()
@@ -2359,36 +2370,39 @@ ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments(
* ADDR2_SURFACE_FLAGS
*
* @brief
* Surface flags
****************************************************************************************************
*/
typedef union _ADDR2_SURFACE_FLAGS
{
struct
{
- UINT_32 color : 1; ///< This resource is a color buffer, can be used with RTV
- UINT_32 depth : 1; ///< Thie resource is a depth buffer, can be used with DSV
- UINT_32 stencil : 1; ///< Thie resource is a stencil buffer, can be used with DSV
- UINT_32 fmask : 1; ///< This is an fmask surface
- UINT_32 overlay : 1; ///< This is an overlay surface
- UINT_32 display : 1; ///< This resource is displable, can be used with DRV
- UINT_32 prt : 1; ///< This is a partially resident texture
- UINT_32 qbStereo : 1; ///< This is a quad buffer stereo surface
- UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding
- UINT_32 texture : 1; ///< This resource can be used with SRV
- UINT_32 unordered : 1; ///< This resource can be used with UAV
- UINT_32 rotated : 1; ///< This resource is rotated and displable
- UINT_32 needEquation : 1; ///< This resource needs equation to be generated if possible
- UINT_32 opt4space : 1; ///< This resource should be optimized for space
- UINT_32 minimizeAlign : 1; ///< This resource should use minimum alignment
- UINT_32 reserved : 17; ///< Reserved bits
+ UINT_32 color : 1; ///< This resource is a color buffer, can be used with RTV
+ UINT_32 depth : 1; ///< Thie resource is a depth buffer, can be used with DSV
+ UINT_32 stencil : 1; ///< Thie resource is a stencil buffer, can be used with DSV
+ UINT_32 fmask : 1; ///< This is an fmask surface
+ UINT_32 overlay : 1; ///< This is an overlay surface
+ UINT_32 display : 1; ///< This resource is displable, can be used with DRV
+ UINT_32 prt : 1; ///< This is a partially resident texture
+ UINT_32 qbStereo : 1; ///< This is a quad buffer stereo surface
+ UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding
+ UINT_32 texture : 1; ///< This resource can be used with SRV
+ UINT_32 unordered : 1; ///< This resource can be used with UAV
+ UINT_32 rotated : 1; ///< This resource is rotated and displable
+ UINT_32 needEquation : 1; ///< This resource needs equation to be generated if possible
+ UINT_32 opt4space : 1; ///< This resource should be optimized for space
+ UINT_32 minimizeAlign : 1; ///< This resource should use minimum alignment
+ UINT_32 noMetadata : 1; ///< This resource has no metadata
+ UINT_32 metaRbUnaligned : 1; ///< This resource has rb unaligned metadata
+ UINT_32 metaPipeUnaligned : 1; ///< This resource has pipe unaligned metadata
+ UINT_32 reserved : 14; ///< Reserved bits
};
UINT_32 value;
} ADDR2_SURFACE_FLAGS;
/**
****************************************************************************************************
* ADDR2_COMPUTE_SURFACE_INFO_INPUT
*
* @brief
diff --git a/src/amd/addrlib/addrtypes.h b/src/amd/addrlib/addrtypes.h
index f8f96d54a10..c63ad96ff97 100644
--- a/src/amd/addrlib/addrtypes.h
+++ b/src/amd/addrlib/addrtypes.h
@@ -69,32 +69,34 @@ typedef int INT;
#ifndef ADDR_CDECL
#if defined(__GNUC__)
#define ADDR_CDECL __attribute__((cdecl))
#else
#define ADDR_CDECL __cdecl
#endif
#endif
#ifndef ADDR_STDCALL
#if defined(__GNUC__)
- #if defined(__AMD64__)
+ #if defined(__amd64__) || defined(__x86_64__)
#define ADDR_STDCALL
#else
#define ADDR_STDCALL __attribute__((stdcall))
#endif
#else
#define ADDR_STDCALL __stdcall
#endif
#endif
#ifndef ADDR_FASTCALL
- #if defined(__GNUC__)
+ #if defined(BRAHMA_ARM)
+ #define ADDR_FASTCALL
+ #elif defined(__GNUC__)
#if defined(__i386__)
#define ADDR_FASTCALL __attribute__((regparm(0)))
#else
#define ADDR_FASTCALL
#endif
#else
#define ADDR_FASTCALL __fastcall
#endif
#endif
diff --git a/src/amd/addrlib/amdgpu_asic_addr.h b/src/amd/addrlib/amdgpu_asic_addr.h
index ea957a88b4d..d7232ba14a2 100644
--- a/src/amd/addrlib/amdgpu_asic_addr.h
+++ b/src/amd/addrlib/amdgpu_asic_addr.h
@@ -78,20 +78,21 @@
#define AMDGPU_POLARIS10_RANGE 0x50, 0x5A
#define AMDGPU_POLARIS11_RANGE 0x5A, 0x64
#define AMDGPU_POLARIS12_RANGE 0x64, 0x6E
#define AMDGPU_CARRIZO_RANGE 0x01, 0x21
#define AMDGPU_BRISTOL_RANGE 0x10, 0x21
#define AMDGPU_STONEY_RANGE 0x61, 0xFF
#define AMDGPU_VEGA10_RANGE 0x01, 0x14
+#define AMDGPU_VEGA12_RANGE 0x14, 0x28
#define AMDGPU_RAVEN_RANGE 0x01, 0x81
#define AMDGPU_EXPAND_FIX(x) x
#define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max))
#define AMDGPU_IN_RANGE(val, ...) AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__))
// ASICREV_IS(eRevisionId, revisionName)
#define ASICREV_IS(r, rn) AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE)
@@ -116,14 +117,16 @@
#define ASICREV_IS_POLARIS10_P(r) ASICREV_IS(r, POLARIS10)
#define ASICREV_IS_POLARIS11_M(r) ASICREV_IS(r, POLARIS11)
#define ASICREV_IS_POLARIS12_V(r) ASICREV_IS(r, POLARIS12)
#define ASICREV_IS_CARRIZO(r) ASICREV_IS(r, CARRIZO)
#define ASICREV_IS_CARRIZO_BRISTOL(r) ASICREV_IS(r, BRISTOL)
#define ASICREV_IS_STONEY(r) ASICREV_IS(r, STONEY)
#define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10)
#define ASICREV_IS_VEGA10_P(r) ASICREV_IS(r, VEGA10)
+#define ASICREV_IS_VEGA12_P(r) ASICREV_IS(r, VEGA12)
+#define ASICREV_IS_VEGA12_p(r) ASICREV_IS(r, VEGA12)
#define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN)
#endif // _AMDGPU_ASIC_ADDR_H
diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp
index a6ac5ecf836..5af6dd1e339 100644
--- a/src/amd/addrlib/core/addrlib.cpp
+++ b/src/amd/addrlib/core/addrlib.cpp
@@ -278,24 +278,26 @@ ADDR_E_RETURNCODE Lib::Create(
}
}
pCreateOut->hLib = pLib;
if ((pLib != NULL) &&
(returnCode == ADDR_OK))
{
pCreateOut->numEquations =
pLib->HwlGetEquationTableInfo(&pCreateOut->pEquationTable);
- }
- if ((pLib == NULL) &&
- (returnCode == ADDR_OK))
+ pLib->SetMaxAlignments();
+
+ }
+ else if ((pLib == NULL) &&
+ (returnCode == ADDR_OK))
{
// Unknown failures, we return the general error code
returnCode = ADDR_ERROR;
}
return returnCode;
}
/**
****************************************************************************************************
@@ -329,20 +331,37 @@ VOID Lib::SetChipFamily(
* @return
* N/A
****************************************************************************************************
*/
VOID Lib::SetMinPitchAlignPixels(
UINT_32 minPitchAlignPixels) ///< [in] minmum pitch alignment in pixels
{
m_minPitchAlignPixels = (minPitchAlignPixels == 0) ? 1 : minPitchAlignPixels;
}
+/**
+****************************************************************************************************
+* Lib::SetMaxAlignments
+*
+* @brief
+* Set max alignments
+*
+* @return
+* N/A
+****************************************************************************************************
+*/
+VOID Lib::SetMaxAlignments()
+{
+ m_maxBaseAlign = HwlComputeMaxBaseAlignments();
+ m_maxMetaBaseAlign = HwlComputeMaxMetaBaseAlignments();
+}
+
/**
****************************************************************************************************
* Lib::GetLib
*
* @brief
* Get AddrLib pointer
*
* @return
* An AddrLib class pointer
****************************************************************************************************
@@ -351,43 +370,90 @@ Lib* Lib::GetLib(
ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE
{
return static_cast<Addr::Lib*>(hLib);
}
/**
****************************************************************************************************
* Lib::GetMaxAlignments
*
* @brief
-* Gets maximum alignments
+* Gets maximum alignments for data surface (include FMask)
*
* @return
* ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::GetMaxAlignments(
- ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure
+ ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure
) const
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (GetFillSizeFieldsFlags() == TRUE)
{
- if (pOut->size != sizeof(ADDR_GET_MAX_ALIGNMENTS_OUTPUT))
+ if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT))
{
returnCode = ADDR_PARAMSIZEMISMATCH;
}
}
if (returnCode == ADDR_OK)
{
- returnCode = HwlGetMaxAlignments(pOut);
+ if (m_maxBaseAlign != 0)
+ {
+ pOut->baseAlign = m_maxBaseAlign;
+ }
+ else
+ {
+ returnCode = ADDR_NOTIMPLEMENTED;
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+****************************************************************************************************
+* Lib::GetMaxMetaAlignments
+*
+* @brief
+* Gets maximum alignments for metadata (CMask, DCC and HTile)
+*
+* @return
+* ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::GetMaxMetaAlignments(
+ ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ if (m_maxMetaBaseAlign != 0)
+ {
+ pOut->baseAlign = m_maxMetaBaseAlign;
+ }
+ else
+ {
+ returnCode = ADDR_NOTIMPLEMENTED;
+ }
}
return returnCode;
}
/**
****************************************************************************************************
* Lib::Bits2Number
*
* @brief
diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h
index 8db65a61c87..0cbb4e0186f 100644
--- a/src/amd/addrlib/core/addrlib.h
+++ b/src/amd/addrlib/core/addrlib.h
@@ -275,28 +275,52 @@ public:
ADDR_E_RETURNCODE Flt32ToDepthPixel(
const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const;
ADDR_E_RETURNCODE Flt32ToColorPixel(
const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const;
BOOL_32 GetExportNorm(const ELEM_GETEXPORTNORM_INPUT* pIn) const;
- ADDR_E_RETURNCODE GetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const;
+ ADDR_E_RETURNCODE GetMaxAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE GetMaxMetaAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const;
protected:
Lib(); // Constructor is protected
Lib(const Client* pClient);
- /// Pure virtual function to get max alignments
- virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const = 0;
+ /// Pure virtual function to get max base alignments
+ virtual UINT_32 HwlComputeMaxBaseAlignments() const = 0;
+
+ /// Gets maximum alignements for metadata
+ virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const
+ {
+ ADDR_NOT_IMPLEMENTED();
+
+ return 0;
+ }
+
+ VOID ValidBaseAlignments(UINT_32 alignment) const
+ {
+#if DEBUG
+ ADDR_ASSERT(alignment <= m_maxBaseAlign);
+#endif
+ }
+
+ VOID ValidMetaBaseAlignments(UINT_32 metaAlignment) const
+ {
+#if DEBUG
+ ADDR_ASSERT(metaAlignment <= m_maxMetaBaseAlign);
+#endif
+ }
//
// Initialization
//
/// Pure Virtual function for Hwl computing internal global parameters from h/w registers
virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) = 0;
/// Pure Virtual function for Hwl converting chip family
virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0;
@@ -334,20 +358,22 @@ private:
// Disallow the copy constructor
Lib(const Lib& a);
// Disallow the assignment operator
Lib& operator=(const Lib& a);
VOID SetChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels);
+ VOID SetMaxAlignments();
+
protected:
LibClass m_class; ///< Store class type (HWL type)
ChipFamily m_chipFamily; ///< Chip family translated from the one in atiid.h
UINT_32 m_chipRevision; ///< Revision id from xxx_id.h
UINT_32 m_version; ///< Current version
//
@@ -363,20 +389,24 @@ protected:
UINT_32 m_pipeInterleaveBytes;
///< Specifies the size of contiguous address space
/// within each tiling pipe when making linear
/// accesses. (Formerly Group Size)
UINT_32 m_rowSize; ///< DRAM row size, in bytes
UINT_32 m_minPitchAlignPixels; ///< Minimum pitch alignment in pixels
UINT_32 m_maxSamples; ///< Max numSamples
+
+ UINT_32 m_maxBaseAlign; ///< Max base alignment for data surface
+ UINT_32 m_maxMetaBaseAlign; ///< Max base alignment for metadata
+
private:
ElemLib* m_pElemLib; ///< Element Lib pointer
};
Lib* SiHwlInit (const Client* pClient);
Lib* CiHwlInit (const Client* pClient);
Lib* Gfx9HwlInit (const Client* pClient);
} // Addr
diff --git a/src/amd/addrlib/core/addrlib1.cpp b/src/amd/addrlib/core/addrlib1.cpp
index c796a63436c..9c1d84289b3 100644
--- a/src/amd/addrlib/core/addrlib1.cpp
+++ b/src/amd/addrlib/core/addrlib1.cpp
@@ -421,20 +421,22 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
}
}
}
pOut->pitchTileMax = pOut->pitch / 8 - 1;
pOut->heightTileMax = pOut->height / 8 - 1;
pOut->sliceTileMax = pOut->pitch * pOut->height / 64 - 1;
}
}
+ ValidBaseAlignments(pOut->baseAlign);
+
return returnCode;
}
/**
****************************************************************************************************
* Lib::ComputeSurfaceInfo
*
* @brief
* Interface function stub of AddrComputeSurfaceInfo.
*
@@ -888,20 +890,22 @@ ADDR_E_RETURNCODE Lib::ComputeFmaskInfo(
}
else
{
memset(pOut, 0, sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT));
returnCode = ADDR_INVALIDPARAMS;
}
}
}
+ ValidBaseAlignments(pOut->baseAlign);
+
return returnCode;
}
/**
****************************************************************************************************
* Lib::ComputeFmaskAddrFromCoord
*
* @brief
* Interface function stub of ComputeFmaskAddrFromCoord.
*
@@ -1326,20 +1330,22 @@ ADDR_E_RETURNCODE Lib::ComputeHtileInfo(
&pOut->height,
&pOut->htileBytes,
&pOut->macroWidth,
&pOut->macroHeight,
&pOut->sliceSize,
&pOut->baseAlign);
}
}
}
+ ValidMetaBaseAlignments(pOut->baseAlign);
+
return returnCode;
}
/**
****************************************************************************************************
* Lib::ComputeCmaskInfo
*
* @brief
* Interface function stub of AddrComputeCmaskInfo
*
@@ -1392,20 +1398,22 @@ ADDR_E_RETURNCODE Lib::ComputeCmaskInfo(
&pOut->height,
&pOut->cmaskBytes,
&pOut->macroWidth,
&pOut->macroHeight,
&pOut->sliceSize,
&pOut->baseAlign,
&pOut->blockMax);
}
}
+ ValidMetaBaseAlignments(pOut->baseAlign);
+
return returnCode;
}
/**
****************************************************************************************************
* Lib::ComputeDccInfo
*
* @brief
* Interface function to compute DCC key info
*
@@ -1436,23 +1444,25 @@ ADDR_E_RETURNCODE Lib::ComputeDccInfo(
if (UseTileIndex(pIn->tileIndex))
{
input = *pIn;
ret = HwlSetupTileCfg(input.bpp, input.tileIndex, input.macroModeIndex,
&input.tileInfo, &input.tileMode);
pIn = &input;
}
- if (ADDR_OK == ret)
+ if (ret == ADDR_OK)
{
ret = HwlComputeDccInfo(pIn, pOut);
+
+ ValidMetaBaseAlignments(pOut->dccRamBaseAlign);
}
}
return ret;
}
/**
****************************************************************************************************
* Lib::ComputeHtileAddrFromCoord
*
@@ -3645,21 +3655,21 @@ VOID Lib::OptimizeTileMode(
{
tileMode = ADDR_TM_LINEAR_ALIGNED;
}
else if (IsMacroTiled(tileMode) && (pInOut->flags.tcCompatible == FALSE))
{
if (DegradeTo1D(width, height, macroWidthAlign, macroHeightAlign))
{
tileMode = (thickness == 1) ?
ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
}
- else if (thickness > 1)
+ else if ((thickness > 1) && (pInOut->flags.disallowLargeThickDegrade == 0))
{
// As in the following HwlComputeSurfaceInfo, thick modes may be degraded to
// thinner modes, we should re-evaluate whether the corresponding
// thinner modes should be degraded. If so, we choose 1D thick mode instead.
tileMode = DegradeLargeThickTile(pInOut->tileMode, pInOut->bpp);
if (tileMode != pInOut->tileMode)
{
// Get thickness again after large thick degrade
thickness = Thickness(tileMode);
diff --git a/src/amd/addrlib/core/addrlib2.cpp b/src/amd/addrlib/core/addrlib2.cpp
index ddaf597f9dd..fc9b71f3ee4 100644
--- a/src/amd/addrlib/core/addrlib2.cpp
+++ b/src/amd/addrlib/core/addrlib2.cpp
@@ -288,20 +288,22 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
if (pOut->pStereoInfo != NULL)
{
ComputeQbStereoInfo(pOut);
}
}
}
}
ADDR_ASSERT(pOut->surfSize != 0);
+ ValidBaseAlignments(pOut->baseAlign);
+
return returnCode;
}
/**
************************************************************************************************************************
* Lib::ComputeSurfaceInfo
*
* @brief
* Interface function stub of AddrComputeSurfaceInfo.
*
@@ -440,20 +442,22 @@ ADDR_E_RETURNCODE Lib::ComputeHtileInfo(
if ((GetFillSizeFieldsFlags() == TRUE) &&
((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT)) ||
(pOut->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT))))
{
returnCode = ADDR_INVALIDPARAMS;
}
else
{
returnCode = HwlComputeHtileInfo(pIn, pOut);
+
+ ValidMetaBaseAlignments(pOut->baseAlign);
}
return returnCode;
}
/**
************************************************************************************************************************
* Lib::ComputeHtileAddrFromCoord
*
* @brief
@@ -538,20 +542,22 @@ ADDR_E_RETURNCODE Lib::ComputeCmaskInfo(
{
returnCode = ADDR_INVALIDPARAMS;
}
else if (pIn->cMaskFlags.linear)
{
returnCode = ADDR_INVALIDPARAMS;
}
else
{
returnCode = HwlComputeCmaskInfo(pIn, pOut);
+
+ ValidMetaBaseAlignments(pOut->baseAlign);
}
return returnCode;
}
/**
************************************************************************************************************************
* Lib::ComputeCmaskAddrFromCoord
*
* @brief
@@ -681,20 +687,22 @@ ADDR_E_RETURNCODE Lib::ComputeFmaskInfo(
pOut->height = localOut.height;
pOut->baseAlign = localOut.baseAlign;
pOut->numSlices = localOut.numSlices;
pOut->fmaskBytes = static_cast<UINT_32>(localOut.surfSize);
pOut->sliceSize = static_cast<UINT_32>(localOut.sliceSize);
pOut->bpp = localIn.bpp;
pOut->numSamples = 1;
}
}
+ ValidBaseAlignments(pOut->baseAlign);
+
return returnCode;
}
/**
************************************************************************************************************************
* Lib::ComputeFmaskAddrFromCoord
*
* @brief
* Interface function stub of ComputeFmaskAddrFromCoord.
*
@@ -757,20 +765,22 @@ ADDR_E_RETURNCODE Lib::ComputeDccInfo(
if ((GetFillSizeFieldsFlags() == TRUE) &&
((pIn->size != sizeof(ADDR2_COMPUTE_DCCINFO_INPUT)) ||
(pOut->size != sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT))))
{
returnCode = ADDR_INVALIDPARAMS;
}
else
{
returnCode = HwlComputeDccInfo(pIn, pOut);
+
+ ValidMetaBaseAlignments(pOut->dccRamBaseAlign);
}
return returnCode;
}
/**
************************************************************************************************************************
* Lib::ComputeDccAddrFromCoord
*
* @brief
diff --git a/src/amd/addrlib/core/addrlib2.h b/src/amd/addrlib/core/addrlib2.h
index bea2a485a61..d82e6c0984b 100644
--- a/src/amd/addrlib/core/addrlib2.h
+++ b/src/amd/addrlib/core/addrlib2.h
@@ -473,26 +473,20 @@ protected:
return ADDR_INVALID_EQUATION_INDEX;
}
UINT_32 GetEquationIndex(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const
{
return HwlGetEquationIndex(pIn, pOut);
}
- virtual UINT_32 HwlComputeSurfaceBaseAlign(AddrSwizzleMode swizzleMode) const
- {
- ADDR_NOT_IMPLEMENTED();
- return 0;
- }
-
virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
index e06f13c0afe..b88d3243228 100644
--- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
@@ -182,24 +182,24 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
}
else
{
numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
}
}
numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
- Dim3d metaBlkDim = {8, 8, 1};
+ Dim3d metaBlkDim = {8, 8, 1};
UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
- UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
- UINT_32 heightAmp = totalAmpBits - widthAmp;
+ UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
+ UINT_32 heightAmp = totalAmpBits - widthAmp;
metaBlkDim.w <<= widthAmp;
metaBlkDim.h <<= heightAmp;
#if DEBUG
Dim3d metaBlkDimDbg = {8, 8, 1};
for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
{
if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
{
@@ -214,53 +214,56 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
#endif
UINT_32 numMetaBlkX;
UINT_32 numMetaBlkY;
UINT_32 numMetaBlkZ;
GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
&numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
- UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
+ const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
+ UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
- if (m_settings.htileAlignFix)
+ if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
{
- sizeAlign <<= 1;
+ align *= (numPipeTotal >> 1);
}
- pOut->pitch = numMetaBlkX * metaBlkDim.w;
- pOut->height = numMetaBlkY * metaBlkDim.h;
- pOut->sliceSize = numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk * 4;
-
- pOut->metaBlkWidth = metaBlkDim.w;
- pOut->metaBlkHeight = metaBlkDim.h;
- pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
-
- pOut->baseAlign = Max(numCompressBlkPerMetaBlk * 4, sizeAlign);
+ align = Max(align, metaBlkSize);
if (m_settings.metaBaseAlignFix)
{
- pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
+ align = Max(align, GetBlockSize(pIn->swizzleMode));
}
- if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
+ if (m_settings.htileAlignFix)
{
- UINT_32 additionalAlign = numPipeTotal * numCompressBlkPerMetaBlk * 2;
+ const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
+ const INT_32 htileCachelineSizeLog2 = 11;
+ const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
- if (additionalAlign > sizeAlign)
- {
- sizeAlign = additionalAlign;
- }
+ INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
+
+ align <<= rbMaskPadding;
}
- pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
+ pOut->pitch = numMetaBlkX * metaBlkDim.w;
+ pOut->height = numMetaBlkY * metaBlkDim.h;
+ pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
+
+ pOut->metaBlkWidth = metaBlkDim.w;
+ pOut->metaBlkHeight = metaBlkDim.h;
+ pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
+
+ pOut->baseAlign = align;
+ pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
return ADDR_OK;
}
/**
************************************************************************************************************************
* Gfx9Lib::HwlComputeCmaskInfo
*
* @brief
* Interface function stub of AddrComputeCmaskInfo
@@ -326,31 +329,31 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
}
ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
#endif
UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
+ if (m_settings.metaBaseAlignFix)
+ {
+ sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
+ }
+
pOut->pitch = numMetaBlkX * metaBlkDim.w;
pOut->height = numMetaBlkY * metaBlkDim.h;
pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
- if (m_settings.metaBaseAlignFix)
- {
- pOut->baseAlign = Max(pOut->baseAlign, GetBlockSize(pIn->swizzleMode));
- }
-
pOut->metaBlkWidth = metaBlkDim.w;
pOut->metaBlkHeight = metaBlkDim.h;
pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
return ADDR_OK;
}
/**
************************************************************************************************************************
@@ -631,30 +634,30 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
&numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
if (numFrags > m_maxCompFrag)
{
sizeAlign *= (numFrags / m_maxCompFrag);
}
+ if (m_settings.metaBaseAlignFix)
+ {
+ sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
+ }
+
pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
numCompressBlkPerMetaBlk * numFrags;
pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
- if (m_settings.metaBaseAlignFix)
- {
- pOut->dccRamBaseAlign = Max(pOut->dccRamBaseAlign, GetBlockSize(pIn->swizzleMode));
- }
-
pOut->pitch = numMetaBlkX * metaBlkDim.w;
pOut->height = numMetaBlkY * metaBlkDim.h;
pOut->depth = numMetaBlkZ * metaBlkDim.d;
pOut->compressBlkWidth = compressBlkDim.w;
pOut->compressBlkHeight = compressBlkDim.h;
pOut->compressBlkDepth = compressBlkDim.d;
pOut->metaBlkWidth = metaBlkDim.w;
pOut->metaBlkHeight = metaBlkDim.h;
@@ -663,35 +666,92 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
pOut->fastClearSizePerSlice =
pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
}
return ADDR_OK;
}
/**
************************************************************************************************************************
-* Gfx9Lib::HwlGetMaxAlignments
+* Gfx9Lib::HwlComputeMaxBaseAlignments
*
* @brief
* Gets maximum alignments
* @return
-* ADDR_E_RETURNCODE
+* maximum alignments
************************************************************************************************************************
*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlGetMaxAlignments(
- ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure
- ) const
+UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
{
- pOut->baseAlign = HwlComputeSurfaceBaseAlign(ADDR_SW_64KB);
+ return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
+}
- return ADDR_OK;
+/**
+************************************************************************************************************************
+* Gfx9Lib::HwlComputeMaxMetaBaseAlignments
+*
+* @brief
+* Gets maximum alignments for metadata
+* @return
+* maximum alignments for metadata
+************************************************************************************************************************
+*/
+UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
+{
+ // Max base alignment for Htile
+ const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
+ const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
+
+ // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
+ // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
+ ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
+ const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
+
+ UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
+
+ if (maxNumPipeTotal > 2)
+ {
+ maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
+ }
+
+ maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
+
+ if (m_settings.metaBaseAlignFix)
+ {
+ maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
+ }
+
+ if (m_settings.htileAlignFix)
+ {
+ maxBaseAlignHtile *= maxNumPipeTotal;
+ }
+
+ // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
+
+ // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
+ UINT_32 maxBaseAlignDcc3D = 65536;
+
+ if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
+ {
+ maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
+ }
+
+ // Max base alignment for Msaa Dcc
+ UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
+
+ if (m_settings.metaBaseAlignFix)
+ {
+ maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
+ }
+
+ return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
}
/**
************************************************************************************************************************
* Gfx9Lib::HwlComputeCmaskAddrFromCoord
*
* @brief
* Interface function stub of AddrComputeCmaskAddrFromCoord
*
* @return
@@ -717,23 +777,25 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
if (returnCode == ADDR_OK)
{
UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
- const CoordEq* pMetaEq = GetMetaEquation({0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
- Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
+ MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
+ Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
+
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
UINT_32 zb = pIn->slice;
UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
@@ -791,23 +853,25 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
returnCode = ComputeHtileInfo(&input, &output);
if (returnCode == ADDR_OK)
{
UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
- const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
- Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
+ MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
+ Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
+
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
UINT_32 zb = pIn->slice;
UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
@@ -863,23 +927,25 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
returnCode = ComputeHtileInfo(&input, &output);
if (returnCode == ADDR_OK)
{
UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
- const CoordEq* pMetaEq = GetMetaEquation({0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
- Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
- metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0});
+ MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
+ Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
+ metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
+
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
pIn->swizzleMode);
UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
@@ -941,24 +1007,26 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
{
UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth);
UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth);
UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth);
- const CoordEq* pMetaEq = GetMetaEquation({pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
- Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
- metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
- compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2});
+ MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
+ Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
+ metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
+ compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
+
+ const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
UINT_32 xb = pIn->x / output.metaBlkWidth;
UINT_32 yb = pIn->y / output.metaBlkHeight;
UINT_32 zb = pIn->slice / output.metaBlkDepth;
UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
@@ -1048,20 +1116,24 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams(
break;
case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
m_pipeInterleaveLog2 = 11;
break;
default:
ADDR_ASSERT_ALWAYS();
break;
}
+ // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
+ // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
+ ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
+
switch (gbAddrConfig.bits.NUM_BANKS)
{
case ADDR_CONFIG_1_BANK:
m_banks = 1;
m_banksLog2 = 0;
break;
case ADDR_CONFIG_2_BANK:
m_banks = 2;
m_banksLog2 = 1;
break;
@@ -1144,20 +1216,33 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams(
break;
default:
ADDR_ASSERT_ALWAYS();
break;
}
m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
+
+ if ((m_rbPerSeLog2 == 1) &&
+ (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
+ ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
+ {
+ ADDR_ASSERT(m_settings.isVega10 == FALSE);
+ ADDR_ASSERT(m_settings.isRaven == FALSE);
+
+ if (m_settings.isVega12)
+ {
+ m_settings.htileCacheRbConflict = 1;
+ }
+ }
}
else
{
valid = FALSE;
ADDR_NOT_IMPLEMENTED();
}
if (valid)
{
InitEquationTable();
@@ -1180,20 +1265,21 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
{
ChipFamily family = ADDR_CHIP_FAMILY_AI;
switch (uChipFamily)
{
case FAMILY_AI:
m_settings.isArcticIsland = 1;
m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
+ m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
m_settings.isDce12 = 1;
if (m_settings.isVega10 == 0)
{
m_settings.htileAlignFix = 1;
m_settings.applyAliasFix = 1;
}
m_settings.metaBaseAlignFix = 1;
@@ -3272,31 +3358,42 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE);
// Filter out improper swType and blockSet by HW restriction
if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
{
ADDR_ASSERT(IsTex2d(pOut->resourceType));
blockSet.value = AddrBlockSetMacro;
addrPreferredSwSet.value = AddrSwSetZ;
addrValidSwSet.value = AddrSwSetZ;
- if (pIn->flags.depth && pIn->flags.texture)
+ if (pIn->flags.noMetadata == FALSE)
{
- if (((bpp == 16) && (numFrags >= 4)) ||
- ((bpp == 32) && (numFrags >= 2)))
+ if (pIn->flags.depth &&
+ pIn->flags.texture &&
+ (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
{
// When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
// equation from wrong address within memory range a tile covered and use the
// garbage data for compressed Z reading which finally leads to corruption.
pOut->canXor = FALSE;
prtXor = FALSE;
}
+
+ if (m_settings.htileCacheRbConflict &&
+ (pIn->flags.depth || pIn->flags.stencil) &&
+ (slice > 1) &&
+ (pIn->flags.metaRbUnaligned == FALSE) &&
+ (pIn->flags.metaPipeUnaligned == FALSE))
+ {
+ // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
+ pOut->canXor = FALSE;
+ }
}
}
else if (ElemLib::IsBlockCompressed(pIn->format))
{
// block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.
// Not sure under what circumstances "_D" would be appropriate as these formats
// are not displayable.
blockSet.value = AddrBlockSetMacro;
// This isn't to be used as texture and caller doesn't allow macro tiled.
@@ -3395,26 +3492,26 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
prtXor = FALSE;
addrValidSwSet.value = AddrSwSetD | AddrSwSetR;
}
else if (m_settings.isDcn1)
{
// _R is not supported by Dcn1
if (pIn->bpp == 64)
{
addrPreferredSwSet.value = AddrSwSetD;
- addrValidSwSet.value = AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
}
else
{
addrPreferredSwSet.value = AddrSwSetS;
- addrValidSwSet.value = AddrSwSetS | AddrSwSetD;
+ addrValidSwSet.value = AddrSwSetS;
}
blockSet.micro = FALSE;
}
else
{
ADDR_NOT_IMPLEMENTED();
returnCode = ADDR_NOTSUPPORTED;
}
}
@@ -4030,21 +4127,21 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
{
pOut->pMipInfo[0].pitch = pOut->pitch;
pOut->pMipInfo[0].height = pOut->height;
pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
pOut->pMipInfo[0].offset = 0;
}
pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
(pIn->bpp >> 3) * pIn->numFrags;
pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
- pOut->baseAlign = HwlComputeSurfaceBaseAlign(pIn->swizzleMode);
+ pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
if (pIn->flags.prt)
{
pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
}
}
}
return returnCode;
}
@@ -4755,29 +4852,26 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
}
returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
bankBits, pipeBits, &blockOffset);
blockOffset %= (1 << log2blkSize);
UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
- UINT_32 macroBlockIndex =
+ UINT_64 macroBlockIndex =
(pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
((pIn->x / localOut.blockWidth) + mipStartPos.w);
- UINT_64 macroBlockOffset = (static_cast<UINT_64>(macroBlockIndex) <<
- GetBlockSizeLog2(pIn->swizzleMode));
-
- pOut->addr = blockOffset | macroBlockOffset;
+ pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
}
else
{
UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
(pIn->y / microBlockDim.h),
(pIn->slice / microBlockDim.d),
@@ -4828,21 +4922,21 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
blockOffset %= (1 << log2blkSize);
UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
UINT_32 sliceSizeInBlock =
(localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
- UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
+ UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
pOut->addr = blockOffset | (blockIndex << log2blkSize);
}
}
else
{
returnCode = ADDR_INVALIDPARAMS;
}
return returnCode;
diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.h b/src/amd/addrlib/gfx9/gfx9addrlib.h
index 1f233a4ff91..7c61a40880e 100644
--- a/src/amd/addrlib/gfx9/gfx9addrlib.h
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.h
@@ -48,33 +48,33 @@ namespace V2
************************************************************************************************************************
*/
struct Gfx9ChipSettings
{
struct
{
// Asic/Generation name
UINT_32 isArcticIsland : 1;
UINT_32 isVega10 : 1;
UINT_32 isRaven : 1;
- UINT_32 reserved0 : 29;
+ UINT_32 isVega12 : 1;
// Display engine IP version name
UINT_32 isDce12 : 1;
UINT_32 isDcn1 : 1;
- UINT_32 reserved1 : 29;
// Misc configuration bits
UINT_32 metaBaseAlignFix : 1;
UINT_32 depthPipeXorDisable : 1;
UINT_32 htileAlignFix : 1;
UINT_32 applyAliasFix : 1;
- UINT_32 reserved2 : 28;
+ UINT_32 htileCacheRbConflict: 1;
+ UINT_32 reserved2 : 27;
};
};
/**
************************************************************************************************************************
* @brief GFX9 data surface type.
************************************************************************************************************************
*/
enum Gfx9DataType
{
@@ -114,23 +114,20 @@ struct MetaEqParams
class Gfx9Lib : public Lib
{
public:
/// Creates Gfx9Lib object
static Addr::Lib* CreateObj(const Client* pClient)
{
VOID* pMem = Object::ClientAlloc(sizeof(Gfx9Lib), pClient);
return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL;
}
- virtual BOOL_32 IsValidDisplaySwizzleMode(
- const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
-
protected:
Gfx9Lib(const Client* pClient);
virtual ~Gfx9Lib();
virtual BOOL_32 HwlIsStandardSwizzle(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isStd ||
(IsTex3d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp);
@@ -217,21 +214,21 @@ protected:
*ppEquationTable = m_equationTable;
return m_numEquations;
}
virtual BOOL_32 IsEquationSupported(
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
UINT_32 elementBytesLog2) const;
- virtual UINT_32 HwlComputeSurfaceBaseAlign(AddrSwizzleMode swizzleMode) const
+ UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const
{
UINT_32 baseAlign;
if (IsXor(swizzleMode))
{
baseAlign = GetBlockSize(swizzleMode);
}
else
{
baseAlign = 256;
@@ -393,25 +390,25 @@ protected:
ADDR_EQUATION m_equationTable[EquationTableSize];
// Number of equation entries in the table
UINT_32 m_numEquations;
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2];
static const UINT_32 MaxCachedMetaEq = 2;
private:
- virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(
- ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const;
+ virtual UINT_32 HwlComputeMaxBaseAlignments() const;
- virtual BOOL_32 HwlInitGlobalParams(
- const ADDR_CREATE_INPUT* pCreateIn);
+ virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
+
+ virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);
VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const;
VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2) const;
VOID GetPipeEquation(CoordEq* pPipeEq, CoordEq* pDataEq,
UINT_32 pipeInterleaveLog2, UINT_32 numPipesLog2,
UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType,
@@ -427,20 +424,22 @@ private:
const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams);
virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim,
BOOL_32 dataThick, ADDR2_META_MIP_INFO* pInfo,
UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth,
UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const;
+ BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
ADDR_E_RETURNCODE ComputeSurfaceLinearPadding(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
UINT_32* pMipmap0PaddedWidth,
UINT_32* pSlice0PaddedHeight,
ADDR2_MIP_INFO* pMipInfo = NULL) const;
Gfx9ChipSettings m_settings;
CoordEq m_cachedMetaEq[MaxCachedMetaEq];
MetaEqParams m_cachedMetaEqKey[MaxCachedMetaEq];
diff --git a/src/amd/addrlib/r800/ciaddrlib.cpp b/src/amd/addrlib/r800/ciaddrlib.cpp
index 322dcf64ffd..1b982c5c08b 100644
--- a/src/amd/addrlib/r800/ciaddrlib.cpp
+++ b/src/amd/addrlib/r800/ciaddrlib.cpp
@@ -729,21 +729,21 @@ ADDR_E_RETURNCODE CiLib::HwlComputeSurfaceInfo(
{
pOut->macroModeIndex = TileIndexInvalid;
ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
localIn.tileIndex = TileIndexInvalid;
localIn.pTileInfo = NULL;
localIn.flags.tcCompatible = FALSE;
SiLib::HwlComputeSurfaceInfo(&localIn, pOut);
- ADDR_ASSERT(((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex)) || pOut->tileIndex == Depth1DThinIndex);
+ ADDR_ASSERT((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex));
depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut);
}
if ((depthStencil2DTileConfigMatch == FALSE) &&
(pIn->numSamples <= 1))
{
pOut->macroModeIndex = TileIndexInvalid;
ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
@@ -2150,57 +2150,75 @@ VOID CiLib::HwlPadDimensions(
*pPitchAlign = dccFastClearPitchAlignInPixels;
}
}
}
}
}
/**
****************************************************************************************************
-* CiLib::HwlGetMaxAlignments
+* CiLib::HwlComputeMaxBaseAlignments
*
* @brief
* Gets maximum alignments
* @return
-* ADDR_E_RETURNCODE
+* maximum alignments
****************************************************************************************************
*/
-ADDR_E_RETURNCODE CiLib::HwlGetMaxAlignments(
- ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure
- ) const
+UINT_32 CiLib::HwlComputeMaxBaseAlignments() const
{
const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info);
// Initial size is 64 KiB for PRT.
- UINT_64 maxBaseAlign = 64 * 1024;
+ UINT_32 maxBaseAlign = 64 * 1024;
for (UINT_32 i = 0; i < m_noOfMacroEntries; i++)
{
// The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice.
UINT_32 tileSize = m_macroTileTable[i].tileSplitBytes;
- UINT_64 baseAlign = tileSize * pipes * m_macroTileTable[i].banks *
+ UINT_32 baseAlign = tileSize * pipes * m_macroTileTable[i].banks *
m_macroTileTable[i].bankWidth * m_macroTileTable[i].bankHeight;
if (baseAlign > maxBaseAlign)
{
maxBaseAlign = baseAlign;
}
}
- if (pOut != NULL)
+ return maxBaseAlign;
+}
+
+/**
+****************************************************************************************************
+* CiLib::HwlComputeMaxMetaBaseAlignments
+*
+* @brief
+* Gets maximum alignments for metadata
+* @return
+* maximum alignments for metadata
+****************************************************************************************************
+*/
+UINT_32 CiLib::HwlComputeMaxMetaBaseAlignments() const
+{
+ UINT_32 maxBank = 1;
+
+ for (UINT_32 i = 0; i < m_noOfMacroEntries; i++)
{
- pOut->baseAlign = maxBaseAlign;
+ if ((m_settings.isVolcanicIslands) && IsMacroTiled(m_tileTable[i].mode))
+ {
+ maxBank = Max(maxBank, m_macroTileTable[i].banks);
+ }
}
- return ADDR_OK;
+ return SiLib::HwlComputeMaxMetaBaseAlignments() * maxBank;
}
/**
****************************************************************************************************
* CiLib::DepthStencilTileCfgMatch
*
* @brief
* Try to find a tile index for stencil which makes its tile config parameters matches to depth
* @return
* TRUE if such tile index for stencil can be found
diff --git a/src/amd/addrlib/r800/ciaddrlib.h b/src/amd/addrlib/r800/ciaddrlib.h
index c11b678574f..28c19f06031 100644
--- a/src/amd/addrlib/r800/ciaddrlib.h
+++ b/src/amd/addrlib/r800/ciaddrlib.h
@@ -130,21 +130,23 @@ protected:
ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const;
- virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const;
+ virtual UINT_32 HwlComputeMaxBaseAlignments() const;
+
+ virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
virtual VOID HwlPadDimensions(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel,
UINT_32* pPitch, UINT_32 *PitchAlign, UINT_32 height, UINT_32 heightAlign) const;
virtual VOID HwlComputeSurfaceAlignmentsMacroTiled(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp b/src/amd/addrlib/r800/egbaddrlib.cpp
index 99aa6cf4cdb..3947cfda2fd 100644
--- a/src/amd/addrlib/r800/egbaddrlib.cpp
+++ b/src/amd/addrlib/r800/egbaddrlib.cpp
@@ -93,25 +93,27 @@ BOOL_32 EgBasedLib::DispatchComputeSurfaceInfo(
UINT_32 numSamples = pIn->numSamples;
UINT_32 numFrags = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
UINT_32 pitch = pIn->width;
UINT_32 height = pIn->height;
UINT_32 numSlices = pIn->numSlices;
UINT_32 mipLevel = pIn->mipLevel;
ADDR_SURFACE_FLAGS flags = pIn->flags;
ADDR_TILEINFO tileInfoDef = {0};
ADDR_TILEINFO* pTileInfo = &tileInfoDef;
-
- UINT_32 padDims = 0;
+ UINT_32 padDims = 0;
BOOL_32 valid;
- tileMode = DegradeLargeThickTile(tileMode, bpp);
+ if (pIn->flags.disallowLargeThickDegrade == 0)
+ {
+ tileMode = DegradeLargeThickTile(tileMode, bpp);
+ }
// Only override numSamples for NI above
if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
{
if (numFrags != numSamples) // This means EQAA
{
// The real surface size needed is determined by number of fragments
numSamples = numFrags;
}
diff --git a/src/amd/addrlib/r800/siaddrlib.cpp b/src/amd/addrlib/r800/siaddrlib.cpp
index 0fb5c2befdc..3c17a7aa8d7 100644
--- a/src/amd/addrlib/r800/siaddrlib.cpp
+++ b/src/amd/addrlib/r800/siaddrlib.cpp
@@ -3461,62 +3461,77 @@ VOID SiLib::HwlSelectTileMode(
pInOut->flags.opt4Space = TRUE;
// Optimize tile mode if possible
OptimizeTileMode(pInOut);
HwlOverrideTileMode(pInOut);
}
/**
****************************************************************************************************
-* SiLib::HwlGetMaxAlignments
+* SiLib::HwlComputeMaxBaseAlignments
*
* @brief
* Gets maximum alignments
* @return
-* ADDR_E_RETURNCODE
+* maximum alignments
****************************************************************************************************
*/
-ADDR_E_RETURNCODE SiLib::HwlGetMaxAlignments(
- ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure
- ) const
+UINT_32 SiLib::HwlComputeMaxBaseAlignments() const
{
const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info);
// Initial size is 64 KiB for PRT.
- UINT_64 maxBaseAlign = 64 * 1024;
+ UINT_32 maxBaseAlign = 64 * 1024;
for (UINT_32 i = 0; i < m_noOfEntries; i++)
{
if ((IsMacroTiled(m_tileTable[i].mode) == TRUE) &&
(IsPrtTileMode(m_tileTable[i].mode) == FALSE))
{
// The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice.
UINT_32 tileSize = Min(m_tileTable[i].info.tileSplitBytes,
MicroTilePixels * 8 * 16);
- UINT_64 baseAlign = tileSize * pipes * m_tileTable[i].info.banks *
+ UINT_32 baseAlign = tileSize * pipes * m_tileTable[i].info.banks *
m_tileTable[i].info.bankWidth * m_tileTable[i].info.bankHeight;
if (baseAlign > maxBaseAlign)
{
maxBaseAlign = baseAlign;
}
}
}
- if (pOut != NULL)
+ return maxBaseAlign;
+}
+
+/**
+****************************************************************************************************
+* SiLib::HwlComputeMaxMetaBaseAlignments
+*
+* @brief
+* Gets maximum alignments for metadata
+* @return
+* maximum alignments for metadata
+****************************************************************************************************
+*/
+UINT_32 SiLib::HwlComputeMaxMetaBaseAlignments() const
+{
+ UINT_32 maxPipe = 1;
+
+ for (UINT_32 i = 0; i < m_noOfEntries; i++)
{
- pOut->baseAlign = maxBaseAlign;
+ maxPipe = Max(maxPipe, HwlGetPipes(&m_tileTable[i].info));
}
- return ADDR_OK;
+ return m_pipeInterleaveBytes * maxPipe;
}
/**
****************************************************************************************************
* SiLib::HwlComputeSurfaceAlignmentsMacroTiled
*
* @brief
* Hardware layer function to compute alignment request for macro tile mode
*
* @return
diff --git a/src/amd/addrlib/r800/siaddrlib.h b/src/amd/addrlib/r800/siaddrlib.h
index f07fc31a57d..9c879fe6c36 100644
--- a/src/amd/addrlib/r800/siaddrlib.h
+++ b/src/amd/addrlib/r800/siaddrlib.h
@@ -256,21 +256,23 @@ protected:
UINT_32* pNumSamples) const;
virtual BOOL_32 HwlReduceBankWidthHeight(
UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
UINT_32 bankHeightAlign, UINT_32 pipes,
ADDR_TILEINFO* pTileInfo) const
{
return TRUE;
}
- virtual ADDR_E_RETURNCODE HwlGetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const;
+ virtual UINT_32 HwlComputeMaxBaseAlignments() const;
+
+ virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
virtual VOID HwlComputeSurfaceAlignmentsMacroTiled(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
// Get equation table pointer and number of equations
virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
{
*ppEquationTable = m_equationTable;
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 92bdf1dedec..603b7058bdc 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -156,21 +156,21 @@ static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT * pInpu
}
ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info,
const struct amdgpu_gpu_info *amdinfo,
uint64_t *max_alignment)
{
ADDR_CREATE_INPUT addrCreateInput = {0};
ADDR_CREATE_OUTPUT addrCreateOutput = {0};
ADDR_REGISTER_VALUE regValue = {0};
ADDR_CREATE_FLAGS createFlags = {{0}};
- ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0};
+ ADDR_GET_MAX_ALINGMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0};
ADDR_E_RETURNCODE addrRet;
addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
regValue.gbAddrConfig = amdinfo->gb_addr_cfg;
createFlags.value = 0;
addrlib_family_rev_id(info->family, &addrCreateInput.chipFamily, &addrCreateInput.chipRevision);
if (addrCreateInput.chipFamily == FAMILY_UNKNOWN)
--
2.15.1
More information about the mesa-dev
mailing list