Mesa (main): amd: update addrlib - trivial changes
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Sat Jul 9 21:27:30 UTC 2022
Module: Mesa
Branch: main
Commit: 3514b732445d0cc22bdbd869d4117af80aa1f805
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3514b732445d0cc22bdbd869d4117af80aa1f805
Author: Marek Olšák <marek.olsak at amd.com>
Date: Sun Jul 3 12:49:51 2022 -0400
amd: update addrlib - trivial changes
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17410>
---
src/amd/addrlib/src/amdgpu_asic_addr.h | 115 ++++++++++++------------
src/amd/addrlib/src/core/addrlib2.cpp | 2 +-
src/amd/addrlib/src/core/addrlib2.h | 2 +
src/amd/addrlib/src/gfx10/gfx10SwizzlePattern.h | 3 +
src/amd/addrlib/src/gfx10/gfx10addrlib.cpp | 53 ++++++++---
src/amd/addrlib/src/gfx10/gfx10addrlib.h | 23 +++--
src/amd/common/ac_gpu_info.c | 6 ++
7 files changed, 127 insertions(+), 77 deletions(-)
diff --git a/src/amd/addrlib/src/amdgpu_asic_addr.h b/src/amd/addrlib/src/amdgpu_asic_addr.h
index 28faf83e7e1..36d39e5e4eb 100644
--- a/src/amd/addrlib/src/amdgpu_asic_addr.h
+++ b/src/amd/addrlib/src/amdgpu_asic_addr.h
@@ -33,20 +33,20 @@
#define AMDGPU_VENDOR_IS_AMD(v) ((v == ATI_VENDOR_ID) || (v == AMD_VENDOR_ID))
#define FAMILY_UNKNOWN 0x00
-#define FAMILY_TN 0x69
-#define FAMILY_SI 0x6E
-#define FAMILY_CI 0x78
-#define FAMILY_KV 0x7D
-#define FAMILY_VI 0x82
-#define FAMILY_POLARIS 0x82
-#define FAMILY_CZ 0x87
-#define FAMILY_AI 0x8D
-#define FAMILY_RV 0x8E
-#define FAMILY_NV 0x8F
-#define FAMILY_VGH 0x90
+#define FAMILY_TN 0x69 //# 105 / Trinity APUs
+#define FAMILY_SI 0x6E //# 110 / Southern Islands: Tahiti, Pitcairn, CapeVerde, Oland, Hainan
+#define FAMILY_CI 0x78 //# 120 / Sea Islands: Bonaire, Hawaii
+#define FAMILY_KV 0x7D //# 125 / Kaveri APUs: Spectre, Spooky, Kalindi, Godavari
+#define FAMILY_VI 0x82 //# 130 / Volcanic Islands: Iceland, Tonga, Fiji
+#define FAMILY_POLARIS 0x82 //# 130 / Polaris: 10, 11, 12
+#define FAMILY_CZ 0x87 //# 135 / Carrizo APUs: Carrizo, Stoney
+#define FAMILY_AI 0x8D //# 141 / Vega: 10, 20
+#define FAMILY_RV 0x8E //# 142 / Raven
+#define FAMILY_NV 0x8F //# 143 / Navi: 10
+#define FAMILY_VGH 0x90 //# 144 / Van Gogh
#define FAMILY_GFX1100 0x91
#define FAMILY_GFX1103 0x94
-#define FAMILY_RMB 0x92
+#define FAMILY_RMB 0x92 //# 146 / Rembrandt
#define FAMILY_GC_10_3_6 0x95
#define FAMILY_GC_10_3_7 0x97
@@ -62,69 +62,67 @@
#define FAMILY_IS_AI(f) FAMILY_IS(f, AI)
#define FAMILY_IS_RV(f) FAMILY_IS(f, RV)
#define FAMILY_IS_NV(f) FAMILY_IS(f, NV)
-#define FAMILY_IS_RMB(f) FAMILY_IS(f, RMB)
#define FAMILY_IS_GFX1100(f) FAMILY_IS(f, GFX1100)
-#define FAMILY_IS_GFX1103(f) FAMILY_IS(f, GFX1103)
+#define FAMILY_IS_RMB(f) FAMILY_IS(f, RMB)
#define AMDGPU_UNKNOWN 0xFF
-#define AMDGPU_TAHITI_RANGE 0x05, 0x14
-#define AMDGPU_PITCAIRN_RANGE 0x15, 0x28
-#define AMDGPU_CAPEVERDE_RANGE 0x29, 0x3C
-#define AMDGPU_OLAND_RANGE 0x3C, 0x46
-#define AMDGPU_HAINAN_RANGE 0x46, 0xFF
+#define AMDGPU_TAHITI_RANGE 0x05, 0x14 //# 5 <= x < 20
+#define AMDGPU_PITCAIRN_RANGE 0x15, 0x28 //# 21 <= x < 40
+#define AMDGPU_CAPEVERDE_RANGE 0x29, 0x3C //# 41 <= x < 60
+#define AMDGPU_OLAND_RANGE 0x3C, 0x46 //# 60 <= x < 70
+#define AMDGPU_HAINAN_RANGE 0x46, 0xFF //# 70 <= x < max
-#define AMDGPU_BONAIRE_RANGE 0x14, 0x28
-#define AMDGPU_HAWAII_RANGE 0x28, 0x3C
+#define AMDGPU_BONAIRE_RANGE 0x14, 0x28 //# 20 <= x < 40
+#define AMDGPU_HAWAII_RANGE 0x28, 0x3C //# 40 <= x < 60
-#define AMDGPU_SPECTRE_RANGE 0x01, 0x41
-#define AMDGPU_SPOOKY_RANGE 0x41, 0x81
-#define AMDGPU_KALINDI_RANGE 0x81, 0xA1
-#define AMDGPU_GODAVARI_RANGE 0xA1, 0xFF
+#define AMDGPU_SPECTRE_RANGE 0x01, 0x41 //# 1 <= x < 65
+#define AMDGPU_SPOOKY_RANGE 0x41, 0x81 //# 65 <= x < 129
+#define AMDGPU_KALINDI_RANGE 0x81, 0xA1 //# 129 <= x < 161
+#define AMDGPU_GODAVARI_RANGE 0xA1, 0xFF //# 161 <= x < max
-#define AMDGPU_ICELAND_RANGE 0x01, 0x14
-#define AMDGPU_TONGA_RANGE 0x14, 0x28
-#define AMDGPU_FIJI_RANGE 0x3C, 0x50
+#define AMDGPU_ICELAND_RANGE 0x01, 0x14 //# 1 <= x < 20
+#define AMDGPU_TONGA_RANGE 0x14, 0x28 //# 20 <= x < 40
+#define AMDGPU_FIJI_RANGE 0x3C, 0x50 //# 60 <= x < 80
-#define AMDGPU_POLARIS10_RANGE 0x50, 0x5A
-#define AMDGPU_POLARIS11_RANGE 0x5A, 0x64
-#define AMDGPU_POLARIS12_RANGE 0x64, 0x6E
-#define AMDGPU_VEGAM_RANGE 0x6E, 0xFF
+#define AMDGPU_POLARIS10_RANGE 0x50, 0x5A //# 80 <= x < 90
+#define AMDGPU_POLARIS11_RANGE 0x5A, 0x64 //# 90 <= x < 100
+#define AMDGPU_POLARIS12_RANGE 0x64, 0x6E //# 100 <= x < 110
+#define AMDGPU_VEGAM_RANGE 0x6E, 0xFF //# 110 <= x < max
-#define AMDGPU_CARRIZO_RANGE 0x01, 0x21
-#define AMDGPU_STONEY_RANGE 0x61, 0xFF
+#define AMDGPU_CARRIZO_RANGE 0x01, 0x21 //# 1 <= x < 33
+#define AMDGPU_BRISTOL_RANGE 0x10, 0x21 //# 16 <= x < 33
+#define AMDGPU_STONEY_RANGE 0x61, 0xFF //# 97 <= x < max
-#define AMDGPU_VEGA10_RANGE 0x01, 0x14
-#define AMDGPU_VEGA12_RANGE 0x14, 0x28
-#define AMDGPU_VEGA20_RANGE 0x28, 0x32
-#define AMDGPU_ARCTURUS_RANGE 0x32, 0x3C
-#define AMDGPU_ALDEBARAN_RANGE 0x3C, 0xFF
+#define AMDGPU_VEGA10_RANGE 0x01, 0x14 //# 1 <= x < 20
+#define AMDGPU_VEGA12_RANGE 0x14, 0x28 //# 20 <= x < 40
+#define AMDGPU_VEGA20_RANGE 0x28, 0xFF //# 40 <= x < max
-#define AMDGPU_RAVEN_RANGE 0x01, 0x81
-#define AMDGPU_RAVEN2_RANGE 0x81, 0x91
-#define AMDGPU_RENOIR_RANGE 0x91, 0xFF
+#define AMDGPU_RAVEN_RANGE 0x01, 0x81 //# 1 <= x < 129
+#define AMDGPU_RAVEN2_RANGE 0x81, 0x90 //# 129 <= x < 144
+#define AMDGPU_RENOIR_RANGE 0x91, 0xFF //# 145 <= x < max
-#define AMDGPU_NAVI10_RANGE 0x01, 0x0A
-#define AMDGPU_NAVI12_RANGE 0x0A, 0x14
-#define AMDGPU_NAVI14_RANGE 0x14, 0x28
-#define AMDGPU_NAVI21_RANGE 0x28, 0x32
-#define AMDGPU_NAVI22_RANGE 0x32, 0x3C
-#define AMDGPU_NAVI23_RANGE 0x3C, 0x46
-#define AMDGPU_NAVI24_RANGE 0x46, 0x50
+#define AMDGPU_NAVI10_RANGE 0x01, 0x0A //# 1 <= x < 10
+#define AMDGPU_NAVI12_RANGE 0x0A, 0x14 //# 10 <= x < 20
+#define AMDGPU_NAVI14_RANGE 0x14, 0x28 //# 20 <= x < 40
+#define AMDGPU_NAVI21_RANGE 0x28, 0x32 //# 40 <= x < 50
+#define AMDGPU_NAVI22_RANGE 0x32, 0x3C //# 50 <= x < 60
+#define AMDGPU_NAVI23_RANGE 0x3C, 0x46 //# 60 <= x < 70
+#define AMDGPU_NAVI24_RANGE 0x46, 0x50 //# 70 <= x < 80
-#define AMDGPU_VANGOGH_RANGE 0x01, 0xFF
+#define AMDGPU_VANGOGH_RANGE 0x01, 0xFF //# 1 <= x < max
-#define AMDGPU_GFX1100_RANGE 0x01, 0x10
-#define AMDGPU_GFX1101_RANGE 0x20, 0xFF
-#define AMDGPU_GFX1102_RANGE 0x10, 0x20
+#define AMDGPU_GFX1100_RANGE 0x01, 0x10 //# 01 <= x < 16
+#define AMDGPU_GFX1101_RANGE 0x20, 0xFF //# 32 <= x < 255
+#define AMDGPU_GFX1102_RANGE 0x10, 0x20 //# 16 <= x < 32
-#define AMDGPU_GFX1103_RANGE 0x01, 0xFF
+#define AMDGPU_GFX1103_RANGE 0x01, 0xFF //# 1 <= x < max
-#define AMDGPU_REMBRANDT_RANGE 0x01, 0xFF
+#define AMDGPU_REMBRANDT_RANGE 0x01, 0xFF //# 01 <= x < 255
-#define AMDGPU_GFX1036_RANGE 0x01, 0xFF
+#define AMDGPU_GFX1036_RANGE 0x01, 0xFF //# 1 <= x < max
-#define AMDGPU_GFX1037_RANGE 0x01, 0xFF
+#define AMDGPU_GFX1037_RANGE 0x01, 0xFF //# 1 <= x < max
#define AMDGPU_EXPAND_FIX(x) x
#define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max))
@@ -157,6 +155,7 @@
#define ASICREV_IS_VEGAM_P(r) ASICREV_IS(r, VEGAM)
#define ASICREV_IS_CARRIZO(r) ASICREV_IS(r, CARRIZO)
+#define ASICREV_IS_CARRIZO_BRISTOL(r) ASICREV_IS(r, BRISTOL)
#define ASICREV_IS_STONEY(r) ASICREV_IS(r, STONEY)
#define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10)
@@ -164,8 +163,6 @@
#define ASICREV_IS_VEGA12_P(r) ASICREV_IS(r, VEGA12)
#define ASICREV_IS_VEGA12_p(r) ASICREV_IS(r, VEGA12)
#define ASICREV_IS_VEGA20_P(r) ASICREV_IS(r, VEGA20)
-#define ASICREV_IS_ARCTURUS(r) ASICREV_IS(r, ARCTURUS)
-#define ASICREV_IS_ALDEBARAN(r) ASICREV_IS(r, ALDEBARAN)
#define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN)
#define ASICREV_IS_RAVEN2(r) ASICREV_IS(r, RAVEN2)
diff --git a/src/amd/addrlib/src/core/addrlib2.cpp b/src/amd/addrlib/src/core/addrlib2.cpp
index a1638c2f478..e8e77d2d97d 100644
--- a/src/amd/addrlib/src/core/addrlib2.cpp
+++ b/src/amd/addrlib/src/core/addrlib2.cpp
@@ -2065,7 +2065,7 @@ BOOL_32 Lib::IsBlockTypeAvaiable(
* Lib::BlockTypeWithinMemoryBudget
*
* @brief
-* Determine whether a new block type is acceptible based on memory waste ratio
+* Determine whether a new block type is acceptable based on memory waste ratio. Will favor larger block types.
*
* @return
* N/A
diff --git a/src/amd/addrlib/src/core/addrlib2.h b/src/amd/addrlib/src/core/addrlib2.h
index 6bf8368f7b6..e381054fff6 100644
--- a/src/amd/addrlib/src/core/addrlib2.h
+++ b/src/amd/addrlib/src/core/addrlib2.h
@@ -148,6 +148,8 @@ union ADDR_BIT_SETTING
* @brief Swizzle pattern information
************************************************************************************************************************
*/
+// Accessed by index representing the logbase2 of (8bpp/16bpp/32bpp/64bpp/128bpp)
+// contains the indices which map to 2D arrays SW_PATTERN_NIBBLE[0-9] which contain sections of an index equation. They are dependant on pipe# and bpe #
struct ADDR_SW_PATINFO
{
UINT_8 maxItemCount;
diff --git a/src/amd/addrlib/src/gfx10/gfx10SwizzlePattern.h b/src/amd/addrlib/src/gfx10/gfx10SwizzlePattern.h
index 4ac52fd2787..65af3812a17 100644
--- a/src/amd/addrlib/src/gfx10/gfx10SwizzlePattern.h
+++ b/src/amd/addrlib/src/gfx10/gfx10SwizzlePattern.h
@@ -3738,6 +3738,7 @@ const ADDR_SW_PATINFO GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO[] =
{ 3, 27, 344, 365, 124, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus
};
+// Nibble 2D arrays contain arrays of sections of the index equation. An index equation depends on the swizzle mode, pipe#, and bpe#
const UINT_64 GFX10_SW_PATTERN_NIBBLE01[][8] =
{
{X0, X1, X2, X3, Y0, Y1, Y2, Y3, }, // 0
@@ -4294,6 +4295,8 @@ const UINT_64 GFX10_SW_PATTERN_NIBBLE2[][4] =
{Y4^X9^Y9, X4^Y4, Y2^Y5^X8, Z2^X5^Y8, }, // 507
{Z3^Y4^X9^Y9, X4^Y4, Y1^Y5^X8, Z2^X5^Y8, }, // 508
{Z3^Y4^X9^Y9, Y1^X4^Y4, X1^Y5^X8, Z2^X5^Y8, }, // 509
+ {Y3^X5^Y5, Z0^X4^Y4, Y2, X3, }, // 510
+ {Y3^X5^Y5, X2^X4^Y4, Y2, X3, }, // 511
};
const UINT_64 GFX10_SW_PATTERN_NIBBLE3[][4] =
diff --git a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
index 134715b73d4..d130487e6dc 100644
--- a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
+++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
@@ -93,7 +93,7 @@ const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
{{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_S_X
{{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_D_X
- {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
+ {{0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_4KB_R_X
{{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_Z_X
{{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_S_X
@@ -1460,12 +1460,12 @@ VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
const
{
+ // Get full swizzle pattern and store it as an ADDR_BIT_SETTING list
ADDR_BIT_SETTING fullSwizzlePattern[20];
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
-
pEquation->numBits = blockSizeLog2;
pEquation->stackedDepthSlices = FALSE;
@@ -1994,37 +1994,46 @@ VOID Gfx10Lib::InitEquationTable()
{
memset(m_equationTable, 0, sizeof(m_equationTable));
+ // Iterate through resourceTypes, up to MaxRsrcType where a "resourceType" refers to AddrResourceType (1D/2D/3D)
+ // resources. This starts with rsrcTypeIdx = 0, however there is an offset added that will start us off at
+ // computing 2D resources.
for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
{
+ // Add offset. Start iterating from ADDR_RSRC_TEX_2D
const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
+ // Iterate through the maximum number of swizzlemodes a type can hold
for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
{
const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
+ // Iterate through the different bits-per-pixel settings (8bpp/16bpp/32bpp/64bpp/128bpp)
for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
{
UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
+ // May or may not return a ADDR_SW_PATINFO for a completely different swizzle mode, essentially
+ // overwriting the choice.
const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
if (pPatInfo != NULL)
{
ADDR_ASSERT(IsValidSwMode(swMode));
- if (pPatInfo->maxItemCount <= 3)
+ if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
{
ADDR_EQUATION equation = {};
+ // Passing in pPatInfo to get the addr equation
ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
equationIndex = m_numEquations;
ADDR_ASSERT(equationIndex < EquationTableSize);
-
+ // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
m_equationTable[equationIndex] = equation;
-
+ // Increment m_numEquations
m_numEquations++;
}
- else
+ else // There is no equationIndex
{
// We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
@@ -2033,7 +2042,8 @@ VOID Gfx10Lib::InitEquationTable()
ADDR_ASSERT(m_settings.supportRbPlus == 1);
}
}
-
+ // equationIndex, which is used to look up equations in m_equationTable, will be cached for every
+ // iteration in this nested for-loop
m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
}
}
@@ -2756,7 +2766,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
* Gfx10Lib::HwlGetPreferredSurfaceSetting
*
* @brief
-* Internal function to get suggested surface information for cliet to use
+* Internal function to get suggested surface information for client to use
*
* @return
* ADDR_E_RETURNCODE
@@ -3008,6 +3018,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
}
+
if (allowedSwModeSet.value != 0)
{
#if DEBUG
@@ -3077,11 +3088,13 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
allowedSwModeSet.swLinear = 0;
}
+ // A bitfield where each bit represents a block type. Each swizzle mode maps to a block.
ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
// Determine block size if there are 2 or more block type candidates
if (IsPow2(allowedBlockSet.value) == FALSE)
{
+ // Tracks a valid SwizzleMode for each valid block type
AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
@@ -3104,16 +3117,18 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
}
+ // Tracks the size of each valid swizzle mode's surface in bytes
UINT_64 padSize[AddrBlockMaxTiledType] = {};
const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
- UINT_32 minSizeBlk = AddrBlockMicro;
- UINT_64 minSize = 0;
+ UINT_32 minSizeBlk = AddrBlockMicro; // Tracks the most optimal block to use
+ UINT_64 minSize = 0; // Tracks the minimum acceptable block type
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
+ // Iterate through all block types
for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
{
if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
@@ -3140,6 +3155,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
}
else
{
+ // Checks if the block type is within the memory budget but favors larger blocks
if (BlockTypeWithinMemoryBudget(
minSize,
padSize[i],
@@ -4073,6 +4089,8 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
UINT_32 numFrag ///< Number of fragment
) const
{
+ // Now elemLog2 is going to be used to access the correct index insode of the pPatInfo array so we will start from
+ // the right location
const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
const ADDR_SW_PATINFO* patInfo = NULL;
const UINT_32 swizzleMask = 1 << swizzleMode;
@@ -4135,8 +4153,15 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
{
if (IsRtOptSwizzle(swizzleMode))
{
- patInfo = m_settings.supportRbPlus ?
- GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
+ if (swizzleMode == ADDR_SW_4KB_R_X)
+ {
+ patInfo = NULL;
+ }
+ else
+ {
+ patInfo = m_settings.supportRbPlus ?
+ GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
+ }
}
else if (IsZOrderSwizzle(swizzleMode))
{
@@ -4230,6 +4255,10 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
patInfo = m_settings.supportRbPlus ?
GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
}
+ else if (swizzleMode == ADDR_SW_4KB_R_X)
+ {
+ patInfo = NULL;
+ }
else
{
ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
diff --git a/src/amd/addrlib/src/gfx10/gfx10addrlib.h b/src/amd/addrlib/src/gfx10/gfx10addrlib.h
index a5f8424e81e..528672a1e5a 100644
--- a/src/amd/addrlib/src/gfx10/gfx10addrlib.h
+++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.h
@@ -58,8 +58,8 @@ struct Gfx10ChipSettings
UINT_32 supportRbPlus : 1;
UINT_32 dsMipmapHtileFix : 1;
UINT_32 dccUnsup3DSwDis : 1;
- UINT_32 : 2;
- UINT_32 reserved2 : 26;
+ UINT_32 : 3;
+ UINT_32 reserved2 : 25;
};
};
@@ -80,10 +80,14 @@ const UINT_32 Gfx10LinearSwModeMask = (1u << ADDR_SW_LINEAR);
const UINT_32 Gfx10Blk256BSwModeMask = (1u << ADDR_SW_256B_S) |
(1u << ADDR_SW_256B_D);
+
+const UINT_32 Gfx10Blk4K_R_XMask = (1u << ADDR_SW_4KB_R_X);
+
const UINT_32 Gfx10Blk4KBSwModeMask = (1u << ADDR_SW_4KB_S) |
(1u << ADDR_SW_4KB_D) |
(1u << ADDR_SW_4KB_S_X) |
- (1u << ADDR_SW_4KB_D_X);
+ (1u << ADDR_SW_4KB_D_X) |
+ (1u << ADDR_SW_4KB_R_X);
const UINT_32 Gfx10Blk64KBSwModeMask = (1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_64KB_D) |
@@ -119,6 +123,7 @@ const UINT_32 Gfx10RenderSwModeMask = (1u << ADDR_SW_64KB_R_X) |
const UINT_32 Gfx10XSwModeMask = (1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_4KB_D_X) |
+ (1u << ADDR_SW_4KB_R_X) |
(1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) |
@@ -146,6 +151,7 @@ const UINT_32 Gfx10Rsrc3dSwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_4KB_S_X) |
+ (1u << ADDR_SW_4KB_R_X) |
(1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) |
@@ -167,8 +173,9 @@ const UINT_32 Gfx10Rsrc3dThick4KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10
const UINT_32 Gfx10Rsrc3dThick64KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk64KBSwModeMask;
-const UINT_32 Gfx10MsaaSwModeMask = Gfx10ZSwModeMask |
- Gfx10RenderSwModeMask;
+const UINT_32 Gfx10MsaaSwModeMask = (Gfx10ZSwModeMask |
+ Gfx10RenderSwModeMask) &
+ ~Gfx10Blk4K_R_XMask;
const UINT_32 Dcn20NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_4KB_S) |
@@ -397,6 +404,12 @@ private:
UINT_32 log2Elem,
UINT_32 numFrag) const;
+ /**
+ * Will use the indices, "nibbles", to build an index equation inside pSwizzle
+ *
+ * @param pPatInfo Pointer to a patInfo. Contains indices mapping to the 2D nibble arrays which will be used to build an index equation.
+ * @param pSwizzle Array to write the index equation to.
+ */
VOID GetSwizzlePatternFromPatternInfo(
const ADDR_SW_PATINFO* pPatInfo,
ADDR_BIT_SETTING (&pSwizzle)[20]) const
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 9ce4dc5f918..23bc349998f 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -38,6 +38,12 @@
#include <stdio.h>
#include <ctype.h>
+#define AMDGPU_ARCTURUS_RANGE 0x32, 0x3C
+#define AMDGPU_ALDEBARAN_RANGE 0x3C, 0xFF
+
+#define ASICREV_IS_ARCTURUS(r) ASICREV_IS(r, ARCTURUS)
+#define ASICREV_IS_ALDEBARAN(r) ASICREV_IS(r, ALDEBARAN)
+
#ifdef _WIN32
#define DRM_CAP_ADDFB2_MODIFIERS 0x10
#define DRM_CAP_SYNCOBJ 0x13
More information about the mesa-commit
mailing list