[Mesa-dev] [PATCH 15/28] swr: [rasterizer] Backend code adjustments
Tim Rowley
timothy.o.rowley at intel.com
Thu Mar 16 00:13:09 UTC 2017
---
.../drivers/swr/rasterizer/common/simdintrin.h | 9 ++
src/gallium/drivers/swr/rasterizer/core/api.cpp | 7 +-
.../drivers/swr/rasterizer/core/backend.cpp | 1 +
src/gallium/drivers/swr/rasterizer/core/backend.h | 96 ++++++++++++----------
src/gallium/drivers/swr/rasterizer/core/state.h | 4 +-
.../scripts/templates/backend_template.cpp | 3 +-
6 files changed, 73 insertions(+), 47 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
index ea79902..562408d 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
@@ -660,6 +660,15 @@ simdscalar vMask(int32_t mask)
}
INLINE
+simdscalari vMaski(int32_t mask)
+{
+ __m256i vec = _mm256_set1_epi32(mask);
+ const __m256i bit = _mm256_set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
+ vec = _simd_and_si(vec, bit);
+ return _simd_cmplt_epi32(_mm256_setzero_si256(), vec);
+}
+
+INLINE
void _simd_mov(simdscalar &r, unsigned int rlane, simdscalar& s, unsigned int slane)
{
OSALIGNSIMD(float) rArray[KNOB_SIMD_WIDTH], sArray[KNOB_SIMD_WIDTH];
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 78c1bd7..9e9a022 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -786,8 +786,8 @@ void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
// templated backend function tables
extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
extern PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT][2][2];
-extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_MSAA_SAMPLE_PATTERN_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2][2];
extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2];
+extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_MSAA_SAMPLE_PATTERN_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2][2];
void SetupPipeline(DRAW_CONTEXT *pDC)
{
SWR_CONTEXT* pContext = pDC->pContext;
@@ -807,7 +807,6 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0;
const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0;
-
SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
// select backend function
@@ -818,7 +817,9 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
{
// always need to generate I & J per sample for Z interpolation
barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
- backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage][centroid][forcedSampleCount][canEarlyZ];
+ backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage]
+ [centroid][forcedSampleCount][canEarlyZ]
+ ;
}
else
{
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index b1bcdb0..b915e32 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -39,6 +39,7 @@
typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect);
static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
+
//////////////////////////////////////////////////////////////////////////
/// @brief Process compute work.
/// @param pDC - pointer to draw context (dispatch).
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h
index b6f6069..9f1fd89 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -597,8 +597,10 @@ struct PixelRateZTestLoop
if(psState.writesODepth)
{
- // broadcast and test oDepth(psContext.vZ) written from the PS for each sample
- vZ[sample] = psContext.vZ;
+ {
+ // broadcast and test oDepth(psContext.vZ) written from the PS for each sample
+ vZ[sample] = psContext.vZ;
+ }
}
else
{
@@ -705,23 +707,26 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
uint8_t *pColorSample = pColorBase[rt] + rasterTileColorOffset;
const SWR_RENDER_TARGET_BLEND_STATE *pRTBlend = &pBlendState->renderTarget[rt];
- // pfnBlendFunc may not update all channels. Initialize with PS output.
- /// TODO: move this into the blend JIT.
- blendOut = psContext.shaded[rt];
- // Blend outputs and update coverage mask for alpha test
- if(pfnBlendFunc[rt] != nullptr)
{
- pfnBlendFunc[rt](
- pBlendState,
- psContext.shaded[rt],
- psContext.shaded[1],
- psContext.shaded[0].w,
- sample,
- pColorSample,
- blendOut,
- &psContext.oMask,
- (simdscalari*)&coverageMask);
+ // pfnBlendFunc may not update all channels. Initialize with PS output.
+ /// TODO: move this into the blend JIT.
+ blendOut = psContext.shaded[rt];
+
+ // Blend outputs and update coverage mask for alpha test
+ if(pfnBlendFunc[rt] != nullptr)
+ {
+ pfnBlendFunc[rt](
+ pBlendState,
+ psContext.shaded[rt],
+ psContext.shaded[1],
+ psContext.shaded[0].w,
+ sample,
+ pColorSample,
+ blendOut,
+ &psContext.oMask,
+ (simdscalari*)&coverageMask);
+ }
}
// final write mask
@@ -774,9 +779,6 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
simdscalar *pColorSample = reinterpret_cast<simdscalar *>(pColorBase[rt] + rasterTileColorOffset);
const SWR_RENDER_TARGET_BLEND_STATE *pRTBlend = &pBlendState->renderTarget[rt];
- // pfnBlendFunc may not update all channels. Initialize with PS output.
- /// TODO: move this into the blend JIT.
- blendOut = psContext.shaded[rt];
if (colorBufferBit & colorBufferEnableMask)
{
@@ -786,19 +788,25 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
blendSrc[3] = pColorSample[6];
}
- // Blend outputs and update coverage mask for alpha test
- if (pfnBlendFunc[rt] != nullptr)
{
- pfnBlendFunc[rt](
- pBlendState,
- psContext.shaded[rt],
- psContext.shaded[1],
- psContext.shaded[0].w,
- sample,
- reinterpret_cast<uint8_t *>(&blendSrc),
- blendOut,
- &psContext.oMask,
- reinterpret_cast<simdscalari *>(&coverageMask));
+ // pfnBlendFunc may not update all channels. Initialize with PS output.
+ /// TODO: move this into the blend JIT.
+ blendOut = psContext.shaded[rt];
+
+ // Blend outputs and update coverage mask for alpha test
+ if(pfnBlendFunc[rt] != nullptr)
+ {
+ pfnBlendFunc[rt](
+ pBlendState,
+ psContext.shaded[rt],
+ psContext.shaded[1],
+ psContext.shaded[0].w,
+ sample,
+ reinterpret_cast<uint8_t *>(&blendSrc),
+ blendOut,
+ &psContext.oMask,
+ reinterpret_cast<simdscalari *>(&coverageMask));
+ }
}
// final write mask
@@ -832,6 +840,9 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
template<typename T>
void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
{
+ ///@todo: Need to move locals off stack to prevent __chkstk's from being generated for the backend
+
+
SWR_CONTEXT *pContext = pDC->pContext;
AR_BEGIN(BEPixelRateBackend, pDC->drawId);
@@ -842,12 +853,12 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
BarycentricCoeffs coeffs;
SetupBarycentricCoeffs(&coeffs, work);
- uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], *pDepthBuffer, *pStencilBuffer;
- SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
-
SWR_PS_CONTEXT psContext;
SetupPixelShaderContext<T>(&psContext, work);
+ uint8_t *pDepthBuffer, *pStencilBuffer;
+ SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
+
AR_END(BESetup, 0);
PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.rastState.clipDistanceMask);
@@ -967,10 +978,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// broadcast the results of the PS to all passing pixels
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
-#else
- OutputMerger4x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets);
-#endif
+ OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
+#else // USE_8x2_TILE_BACKEND
+ OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets);
+#endif // USE_8x2_TILE_BACKEND
if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
{
@@ -1001,13 +1012,13 @@ Endtile:
{
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
{
- pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+ psContext.pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
}
#else
for(uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
{
- pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+ psContext.pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
@@ -1027,7 +1038,8 @@ Endtile:
}
template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t samplePattern = SWR_MSAA_STANDARD_PATTERN,
- uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0>
+ uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0
+ >
struct SwrBackendTraits
{
static const bool bIsStandardPattern = (samplePattern == SWR_MSAA_STANDARD_PATTERN);
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 05347dc..bb1336c 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -330,6 +330,8 @@ struct SWR_PS_CONTEXT
uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
+ uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS];
+ // IN: Pointers to render target hottiles
};
//////////////////////////////////////////////////////////////////////////
@@ -511,6 +513,7 @@ struct SWR_SURFACE_STATE
uint8_t *pAuxBaseAddress; // Used for compression, append/consume counter, etc.
SWR_AUX_MODE auxMode; // @llvm_enum
+
bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
};
@@ -1087,7 +1090,6 @@ struct SWR_PS_STATE
uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
uint32_t usesUAV : 1; // pixel shader accesses UAV
uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
-
};
// depth bounds state
diff --git a/src/gallium/drivers/swr/rasterizer/scripts/templates/backend_template.cpp b/src/gallium/drivers/swr/rasterizer/scripts/templates/backend_template.cpp
index 1b08d0a..c22a3ec 100644
--- a/src/gallium/drivers/swr/rasterizer/scripts/templates/backend_template.cpp
+++ b/src/gallium/drivers/swr/rasterizer/scripts/templates/backend_template.cpp
@@ -35,7 +35,8 @@ extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
[SWR_INPUT_COVERAGE_COUNT]
[2] // centroid
[2] // forcedSampleCount
- [2]; // canEarlyZ
+ [2] // canEarlyZ
+ ;
void InitBackendPixelRate${fileNum}()
{
--
2.7.4
More information about the mesa-dev
mailing list