[Mesa-dev] [PATCH 007/140] amdgpu/addrlib: Rewrite tile mode optmization code
Marek Olšák
maraeo at gmail.com
Mon Mar 20 22:42:17 UTC 2017
From: Xavi Zhang <xavi.zhang at amd.com>
Note: remove reference to degrade4Space and use opt4Space instead.
---
src/amd/addrlib/addrinterface.h | 6 ++--
src/amd/addrlib/core/addrcommon.h | 3 +-
src/amd/addrlib/core/addrlib.cpp | 47 ++++++++++++++++----------
src/amd/addrlib/core/addrlib.h | 2 +-
src/amd/addrlib/r800/egbaddrlib.cpp | 16 +++++++++
src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 12 +++----
6 files changed, 55 insertions(+), 31 deletions(-)
diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h
index d05c6ef..a50717c 100644
--- a/src/amd/addrlib/addrinterface.h
+++ b/src/amd/addrlib/addrinterface.h
@@ -239,23 +239,22 @@ typedef union _ADDR_CREATE_FLAGS
{
struct
{
UINT_32 noCubeMipSlicesPad : 1; ///< Turn cubemap faces padding off
UINT_32 fillSizeFields : 1; ///< If clients fill size fields in all input and
/// output structure
UINT_32 useTileIndex : 1; ///< Make tileIndex field in input valid
UINT_32 useCombinedSwizzle : 1; ///< Use combined tile swizzle
UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level
UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment
- UINT_32 degradeBaseLevel : 1; ///< Degrade to 1D modes automatically for base level
UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize
- UINT_32 reserved : 24; ///< Reserved bits for future use
+ UINT_32 reserved : 25; ///< Reserved bits for future use
};
UINT_32 value;
} ADDR_CREATE_FLAGS;
/**
***************************************************************************************************
* ADDR_REGISTER_VALUE
*
* @brief
@@ -433,29 +432,28 @@ typedef union _ADDR_SURFACE_FLAGS
UINT_32 compressZ : 1; ///< Flag indicates z buffer is compressed
UINT_32 overlay : 1; ///< Flag indicates this is an overlay surface
UINT_32 noStencil : 1; ///< Flag indicates this depth has no separate stencil
UINT_32 display : 1; ///< Flag indicates this should match display controller req.
UINT_32 opt4Space : 1; ///< Flag indicates this surface should be optimized for space
/// i.e. save some memory but may lose performance
UINT_32 prt : 1; ///< Flag for partially resident texture
UINT_32 qbStereo : 1; ///< Quad buffer stereo surface
UINT_32 pow2Pad : 1; ///< SI: Pad to pow2, must set for mipmap (include level0)
UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding
- UINT_32 degrade4Space : 1; ///< Degrade base level's tile mode to save memory
UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable
UINT_32 dispTileType : 1; ///< NI: force display Tiling for 128 bit shared resoruce
UINT_32 dccCompatible : 1; ///< VI: whether to support dcc fast clear
UINT_32 czDispCompatible: 1; ///< SI+: CZ family has a HW bug needs special alignment.
/// This flag indicates we need to follow the alignment with
/// CZ families or other ASICs under PX configuration + CZ.
UINT_32 nonSplit : 1; ///< CI: depth texture should not be split
- UINT_32 reserved : 9; ///< Reserved bits
+ UINT_32 reserved : 10; ///< Reserved bits
};
UINT_32 value;
} ADDR_SURFACE_FLAGS;
/**
***************************************************************************************************
* ADDR_COMPUTE_SURFACE_INFO_INPUT
*
* @brief
diff --git a/src/amd/addrlib/core/addrcommon.h b/src/amd/addrlib/core/addrcommon.h
index f996c9a..88cbad0 100644
--- a/src/amd/addrlib/core/addrcommon.h
+++ b/src/amd/addrlib/core/addrcommon.h
@@ -125,23 +125,22 @@ union ADDR_CONFIG_FLAGS
/// There flags are set up by AddrLib inside thru AddrInitGlobalParamsFromRegister
UINT_32 optimalBankSwap : 1; ///< New bank tiling for RV770 only
UINT_32 noCubeMipSlicesPad : 1; ///< Disables faces padding for cubemap mipmaps
UINT_32 fillSizeFields : 1; ///< If clients fill size fields in all input and
/// output structure
UINT_32 ignoreTileInfo : 1; ///< Don't use tile info structure
UINT_32 useTileIndex : 1; ///< Make tileIndex field in input valid
UINT_32 useCombinedSwizzle : 1; ///< Use combined swizzle
UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level
UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment
- UINT_32 degradeBaseLevel : 1; ///< Degrade to 1D modes automatically for base level
UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize
- UINT_32 reserved : 22; ///< Reserved bits for future use
+ UINT_32 reserved : 23; ///< Reserved bits for future use
};
UINT_32 value;
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// Platform specific debug break defines
///////////////////////////////////////////////////////////////////////////////////////////////////
#if DEBUG
#if defined(__GNUC__)
diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp
index 8cf4a24..b92568e 100644
--- a/src/amd/addrlib/core/addrlib.cpp
+++ b/src/amd/addrlib/core/addrlib.cpp
@@ -257,21 +257,20 @@ ADDR_E_RETURNCODE AddrLib::Create(
{
BOOL_32 initValid;
// Pass createFlags to configFlags first since these flags may be overwritten
pLib->m_configFlags.noCubeMipSlicesPad = pCreateIn->createFlags.noCubeMipSlicesPad;
pLib->m_configFlags.fillSizeFields = pCreateIn->createFlags.fillSizeFields;
pLib->m_configFlags.useTileIndex = pCreateIn->createFlags.useTileIndex;
pLib->m_configFlags.useCombinedSwizzle = pCreateIn->createFlags.useCombinedSwizzle;
pLib->m_configFlags.checkLast2DLevel = pCreateIn->createFlags.checkLast2DLevel;
pLib->m_configFlags.useHtileSliceAlign = pCreateIn->createFlags.useHtileSliceAlign;
- pLib->m_configFlags.degradeBaseLevel = pCreateIn->createFlags.degradeBaseLevel;
pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile;
pLib->SetAddrChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision);
pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels);
// Global parameters initialized and remaining configFlags bits are set as well
initValid = pLib->HwlInitGlobalParams(pCreateIn);
if (initValid)
@@ -552,22 +551,22 @@ ADDR_E_RETURNCODE AddrLib::ComputeSurfaceInfo(
{
AddrTileMode tileMode = localIn.tileMode;
AddrTileType tileType = localIn.tileType;
// HWL layer may override tile mode if necessary
if (HwlOverrideTileMode(&localIn, &tileMode, &tileType))
{
localIn.tileMode = tileMode;
localIn.tileType = tileType;
}
- // Degrade base level if applicable
- if (DegradeBaseLevel(&localIn, &tileMode))
+ // Optimize tile mode if possible
+ if (OptimizeTileMode(&localIn, &tileMode))
{
localIn.tileMode = tileMode;
}
}
// Call main function to compute surface info
if (returnCode == ADDR_OK)
{
returnCode = HwlComputeSurfaceInfo(&localIn, pOut);
}
@@ -3486,70 +3485,84 @@ VOID AddrLib::ComputeMipLevel(
pIn->width = PowTwoAlign(pIn->width, 4);
pIn->height = PowTwoAlign(pIn->height, 4);
}
}
HwlComputeMipLevel(pIn);
}
/**
***************************************************************************************************
-* AddrLib::DegradeBaseLevel
+* AddrLib::OptimizeTileMode
*
* @brief
-* Check if base level's tile mode can be degraded
+* Check if base level's tile mode can be optimized (degraded)
* @return
* TRUE if degraded, also returns degraded tile mode (unchanged if not degraded)
***************************************************************************************************
*/
-BOOL_32 AddrLib::DegradeBaseLevel(
+BOOL_32 AddrLib::OptimizeTileMode(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure for surface info
AddrTileMode* pTileMode ///< [out] Degraded tile mode
) const
{
- BOOL_32 degraded = FALSE;
AddrTileMode tileMode = pIn->tileMode;
UINT_32 thickness = ComputeSurfaceThickness(tileMode);
- if (m_configFlags.degradeBaseLevel) // This is a global setting
+ // Optimization can only be done on level 0 and samples <= 1
+ if ((pIn->flags.opt4Space == TRUE) &&
+ (pIn->mipLevel == 0) &&
+ (pIn->numSamples <= 1) &&
+ (pIn->flags.display == FALSE) &&
+ (IsPrtTileMode(tileMode) == FALSE) &&
+ (pIn->flags.prt == FALSE))
{
- if (pIn->flags.degrade4Space && // Degradation per surface
- pIn->mipLevel == 0 &&
- pIn->numSamples == 1 &&
- IsMacroTiled(tileMode))
+ // Check if linear mode is optimal
+ if ((pIn->height == 1) &&
+ (IsLinear(tileMode) == FALSE) &&
+ (AddrElemLib::IsBlockCompressed(pIn->format) == FALSE) &&
+ (pIn->flags.depth == FALSE) &&
+ (pIn->flags.stencil == FALSE))
+ {
+ tileMode = ADDR_TM_LINEAR_ALIGNED;
+ }
+ else if (IsMacroTiled(tileMode))
{
if (HwlDegradeBaseLevel(pIn))
{
- *pTileMode = thickness == 1 ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
- degraded = TRUE;
+ tileMode = (thickness == 1) ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
}
else if (thickness > 1)
{
// As in the following HwlComputeSurfaceInfo, thick modes may be degraded to
// thinner modes, we should re-evaluate whether the corresponding thinner modes
// need to be degraded. If so, we choose 1D thick mode instead.
tileMode = DegradeLargeThickTile(pIn->tileMode, pIn->bpp);
if (tileMode != pIn->tileMode)
{
ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pIn;
input.tileMode = tileMode;
if (HwlDegradeBaseLevel(&input))
{
- *pTileMode = ADDR_TM_1D_TILED_THICK;
- degraded = TRUE;
+ tileMode = ADDR_TM_1D_TILED_THICK;
}
}
}
}
}
- return degraded;
+ BOOL_32 optimized = (tileMode != pIn->tileMode);
+ if (optimized)
+ {
+ *pTileMode = tileMode;
+ }
+ return optimized;
}
/**
***************************************************************************************************
* AddrLib::DegradeLargeThickTile
*
* @brief
* Check if the thickness needs to be reduced if a tile is too large
* @return
* The degraded tile mode (unchanged if not degraded)
diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h
index 43c55ff..d693fd2 100644
--- a/src/amd/addrlib/core/addrlib.h
+++ b/src/amd/addrlib/core/addrlib.h
@@ -645,21 +645,21 @@ private:
//
VOID ComputeTileDataWidthAndHeight(
UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo,
UINT_32* pMacroWidth, UINT_32* pMacroHeight) const;
UINT_32 ComputeXmaskCoordYFromPipe(
UINT_32 pipe, UINT_32 x) const;
VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels);
- BOOL_32 DegradeBaseLevel(
+ BOOL_32 OptimizeTileMode(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, AddrTileMode* pTileMode) const;
protected:
AddrLibClass m_class; ///< Store class type (HWL type)
AddrChipFamily m_chipFamily; ///< Chip family translated from the one in atiid.h
UINT_32 m_chipRevision; ///< Revision id from xxx_id.h
UINT_32 m_version; ///< Current version
diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp b/src/amd/addrlib/r800/egbaddrlib.cpp
index abd1a79..5d80906 100644
--- a/src/amd/addrlib/r800/egbaddrlib.cpp
+++ b/src/amd/addrlib/r800/egbaddrlib.cpp
@@ -1151,20 +1151,36 @@ BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel(
pIn->mipLevel,
pIn->numSamples,
&tileInfo,
&baseAlign,
&pitchAlign,
&heightAlign);
if (valid)
{
degrade = (pIn->width < pitchAlign || pIn->height < heightAlign);
+ // Check whether 2D tiling still has too much footprint
+ if (degrade == FALSE)
+ {
+ // Only check width and height as slices are aligned to thickness
+ UINT_64 unalignedSize = pIn->width * pIn->height;
+
+ UINT_32 alignedPitch = PowTwoAlign(pIn->width, pitchAlign);
+ UINT_32 alignedHeight = PowTwoAlign(pIn->height, heightAlign);
+ UINT_64 alignedSize = alignedPitch * alignedHeight;
+
+ // alignedSize > 1.5 * unalignedSize
+ if (2 * alignedSize > 3 * unalignedSize)
+ {
+ degrade = TRUE;
+ }
+ }
}
else
{
degrade = TRUE;
}
return degrade;
}
/**
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index abe2b2a..8632f06 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -117,21 +117,20 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
if (ws->info.chip_class == SI) {
regValue.pMacroTileConfig = NULL;
regValue.noOfMacroEntries = 0;
} else {
regValue.pMacroTileConfig = ws->amdinfo.gb_macro_tile_mode;
regValue.noOfMacroEntries = ARRAY_SIZE(ws->amdinfo.gb_macro_tile_mode);
}
createFlags.value = 0;
createFlags.useTileIndex = 1;
- createFlags.degradeBaseLevel = 1;
createFlags.useHtileSliceAlign = 1;
addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
addrCreateInput.chipFamily = ws->family;
addrCreateInput.chipRevision = ws->rev_id;
addrCreateInput.createFlags = createFlags;
addrCreateInput.callbacks.allocSysMem = allocSysMem;
addrCreateInput.callbacks.freeSysMem = freeSysMem;
addrCreateInput.callbacks.debugPrint = 0;
addrCreateInput.regValue = regValue;
@@ -394,25 +393,24 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrSurfInfoIn.flags.depth = (flags & RADEON_SURF_ZBUFFER) != 0;
AddrSurfInfoIn.flags.cube = tex->target == PIPE_TEXTURE_CUBE;
AddrSurfInfoIn.flags.fmask = (flags & RADEON_SURF_FMASK) != 0;
AddrSurfInfoIn.flags.display = (flags & RADEON_SURF_SCANOUT) != 0;
AddrSurfInfoIn.flags.pow2Pad = tex->last_level > 0;
AddrSurfInfoIn.flags.tcCompatible = (flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
/* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
* requested, because TC-compatible HTILE requires 2D tiling.
*/
- AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible &&
- !AddrSurfInfoIn.flags.fmask &&
- tex->nr_samples <= 1 &&
- (flags & RADEON_SURF_OPTIMIZE_FOR_SPACE);
- AddrSurfInfoIn.flags.opt4Space = AddrSurfInfoIn.flags.degrade4Space;
+ AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible &&
+ !AddrSurfInfoIn.flags.fmask &&
+ tex->nr_samples <= 1 &&
+ (flags & RADEON_SURF_OPTIMIZE_FOR_SPACE);
/* DCC notes:
* - If we add MSAA support, keep in mind that CB can't decompress 8bpp
* with samples >= 4.
* - Mipmapped array textures have low performance (discovered by a closed
* driver team).
*/
AddrSurfInfoIn.flags.dccCompatible = ws->info.chip_class >= VI &&
!(flags & RADEON_SURF_Z_OR_SBUFFER) &&
!(flags & RADEON_SURF_DISABLE_DCC) &&
@@ -440,21 +438,21 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
assert(!(flags & RADEON_SURF_FMASK));
/* If any of these parameters are incorrect, the calculation
* will fail. */
AddrTileInfoIn.banks = surf->num_banks;
AddrTileInfoIn.bankWidth = surf->bankw;
AddrTileInfoIn.bankHeight = surf->bankh;
AddrTileInfoIn.macroAspectRatio = surf->mtilea;
AddrTileInfoIn.tileSplitBytes = surf->tile_split;
AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */
- AddrSurfInfoIn.flags.degrade4Space = 0;
+ AddrSurfInfoIn.flags.opt4Space = 0;
AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
/* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
* the tile index, because we are expected to know it if
* we know the other parameters.
*
* This is something that can easily be fixed in Addrlib.
* For now, just figure it out here.
* Note that only 2D_TILE_THIN1 is handled here.
*/
--
2.7.4
More information about the mesa-dev
mailing list