[Mesa-dev] [PATCH 1/6] swr/rast: Support render target mask instead of render target count
Tim Rowley
timothy.o.rowley at intel.com
Wed Jul 12 15:38:26 UTC 2017
WIP to support read-only render targets.
---
src/gallium/drivers/swr/rasterizer/core/api.cpp | 24 ++++++----
.../drivers/swr/rasterizer/core/backend_impl.h | 54 ++++++++++++++--------
.../drivers/swr/rasterizer/core/backend_sample.cpp | 16 +++++--
.../swr/rasterizer/core/backend_singlesample.cpp | 16 +++++--
.../drivers/swr/rasterizer/core/rasterizer_impl.h | 20 ++++----
src/gallium/drivers/swr/rasterizer/core/state.h | 2 +-
src/gallium/drivers/swr/swr_state.cpp | 2 +-
7 files changed, 85 insertions(+), 49 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 087a24a..4905890 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -957,20 +957,26 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
(pState->state.depthStencilState.stencilTestEnable ||
pState->state.depthStencilState.stencilWriteEnable)) ? true : false;
- uint32_t numRTs = pState->state.psState.numRenderTargets;
- pState->state.colorHottileEnable = 0;
+ pState->state.colorHottileEnable = pState->state.psState.renderTargetMask;
+
+ // Disable hottile for surfaces with no writes
if (psState.pfnPixelShader != nullptr)
{
- for (uint32_t rt = 0; rt < numRTs; ++rt)
+ DWORD rt;
+ uint32_t rtMask = pState->state.colorHottileEnable;
+ while (_BitScanForward(&rt, rtMask))
{
- pState->state.colorHottileEnable |=
- (!pState->state.blendState.renderTarget[rt].writeDisableAlpha ||
- !pState->state.blendState.renderTarget[rt].writeDisableRed ||
- !pState->state.blendState.renderTarget[rt].writeDisableGreen ||
- !pState->state.blendState.renderTarget[rt].writeDisableBlue) ? (1 << rt) : 0;
+ rtMask &= ~(1 << rt);
+
+ if (pState->state.blendState.renderTarget[rt].writeDisableAlpha &&
+ pState->state.blendState.renderTarget[rt].writeDisableRed &&
+ pState->state.blendState.renderTarget[rt].writeDisableGreen &&
+ pState->state.blendState.renderTarget[rt].writeDisableBlue)
+ {
+ pState->state.colorHottileEnable &= ~(1 << rt);
+ }
}
}
-
// Setup depth quantization function
if (pState->state.depthHottileEnable)
{
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
index 2e32e4d..b6a86b5 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
@@ -475,16 +475,15 @@ inline void SetupBarycentricCoeffs(BarycentricCoeffs *coeffs, const SWR_TRIANGLE
coeffs->vCOneOverW = _simd_broadcast_ss(&work.OneOverW[2]);
}
-inline void SetupRenderBuffers(uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], uint8_t **pDepthBuffer, uint8_t **pStencilBuffer, uint32_t colorBufferCount, RenderOutputBuffers &renderBuffers)
+inline void SetupRenderBuffers(uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], uint8_t **pDepthBuffer, uint8_t **pStencilBuffer, uint32_t colorHotTileMask, RenderOutputBuffers &renderBuffers)
{
- assert(colorBufferCount <= SWR_NUM_RENDERTARGETS);
-
- if (pColorBuffer)
+
+ DWORD index;
+ while (_BitScanForward(&index, colorHotTileMask))
{
- for (uint32_t index = 0; index < colorBufferCount; index += 1)
- {
- pColorBuffer[index] = renderBuffers.pColor[index];
- }
+ assert(index < SWR_NUM_RENDERTARGETS);
+ colorHotTileMask &= ~(1 << index);
+ pColorBuffer[index] = renderBuffers.pColor[index];
}
if (pDepthBuffer)
@@ -712,14 +711,16 @@ static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_P
// Merge Output to 4x2 SIMD Tile Format
INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
- const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT)
+ const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, uint32_t renderTargetMask)
{
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
const uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
simdvector blendOut;
- for(uint32_t rt = 0; rt < NumRT; ++rt)
+ DWORD rt = 0;
+ while (_BitScanForward(&rt, renderTargetMask))
{
+ renderTargetMask &= ~(1 << rt);
uint8_t *pColorSample = pColorBase[rt] + rasterTileColorOffset;
const SWR_RENDER_TARGET_BLEND_STATE *pRTBlend = &pBlendState->renderTarget[rt];
@@ -776,7 +777,7 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
#if USE_8x2_TILE_BACKEND
// Merge Output to 8x2 SIMD16 Tile Format
INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
- const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, const uint32_t NumRT, const uint32_t colorBufferEnableMask, bool useAlternateOffset)
+ const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset)
{
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
@@ -789,20 +790,27 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
simdvector blendSrc;
simdvector blendOut;
- uint32_t colorBufferBit = 1;
- for (uint32_t rt = 0; rt < NumRT; rt += 1, colorBufferBit <<= 1)
+ DWORD rt;
+ while (_BitScanForward(&rt, renderTargetMask))
{
- simdscalar *pColorSample = reinterpret_cast<simdscalar *>(pColorBase[rt] + rasterTileColorOffset);
+ renderTargetMask &= ~(1 << rt);
const SWR_RENDER_TARGET_BLEND_STATE *pRTBlend = &pBlendState->renderTarget[rt];
- if (colorBufferBit & colorBufferEnableMask)
+ simdscalar* pColorSample;
+ bool hotTileEnable = !pRTBlend->writeDisableAlpha || !pRTBlend->writeDisableRed || !pRTBlend->writeDisableGreen || !pRTBlend->writeDisableBlue;
+ if (hotTileEnable)
{
+ pColorSample = reinterpret_cast<simdscalar *>(pColorBase[rt] + rasterTileColorOffset);
blendSrc[0] = pColorSample[0];
blendSrc[1] = pColorSample[2];
blendSrc[2] = pColorSample[4];
blendSrc[3] = pColorSample[6];
}
+ else
+ {
+ pColorSample = nullptr;
+ }
{
// pfnBlendFunc may not update all channels. Initialize with PS output.
@@ -874,7 +882,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
SetupPixelShaderContext<T>(&psContext, samplePos, work);
uint8_t *pDepthBuffer, *pStencilBuffer;
- SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
+ SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
AR_END(BESetup, 0);
@@ -994,9 +1002,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// broadcast the results of the PS to all passing pixels
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
+ OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset);
#else // USE_8x2_TILE_BACKEND
- OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.numRenderTargets);
+ OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask);
#endif // USE_8x2_TILE_BACKEND
if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
@@ -1026,14 +1034,20 @@ Endtile:
#if USE_8x2_TILE_BACKEND
if (useAlternateOffset)
{
- for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
+ DWORD rt;
+ uint32_t rtMask = state.colorHottileEnable;
+ while (_BitScanForward(&rt, rtMask))
{
+ rtMask &= ~(1 << rt);
psContext.pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
}
#else
- for(uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
+ DWORD rt;
+ uint32_t rtMask = state.colorHottileEnable;
+ while (_BitScanForward(&rt, rtMask))
{
+ rtMask &= ~(1 << rt);
psContext.pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
index 2dca5d8..d81352a 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
@@ -55,7 +55,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
SetupPixelShaderContext<T>(&psContext, samplePos, work);
uint8_t *pDepthBuffer, *pStencilBuffer;
- SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
+ SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
AR_END(BESetup, 0);
@@ -198,9 +198,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
// output merger
AR_BEGIN(BEOutputMerger, pDC->drawId);
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
+ OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
#else
- OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets);
+ OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask);
#endif
// do final depth write after all pixel kills
@@ -227,14 +227,20 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
#if USE_8x2_TILE_BACKEND
if (useAlternateOffset)
{
- for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
+ DWORD rt;
+ uint32_t rtMask = state.colorHottileEnable;
+ while (_BitScanForward(&rt, rtMask))
{
+ rtMask &= ~(1 << rt);
psContext.pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
}
#else
- for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
+ DWORD rt;
+ uint32_t rtMask = state.colorHottileEnable;
+ while (_BitScanForward(&rt, rtMask))
{
+ rtMask &= ~(1 << rt);
psContext.pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
index 8ae2cf4..34875d3 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
@@ -55,7 +55,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
SetupPixelShaderContext<T>(&psContext, samplePos, work);
uint8_t *pDepthBuffer, *pStencilBuffer;
- SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
+ SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
AR_END(BESetup, 1);
@@ -183,9 +183,9 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
// output merger
AR_BEGIN(BEOutputMerger, pDC->drawId);
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
+ OutputMerger8x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
#else
- OutputMerger4x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets);
+ OutputMerger4x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask);
#endif
// do final depth write after all pixel kills
@@ -209,14 +209,20 @@ Endtile:
#if USE_8x2_TILE_BACKEND
if (useAlternateOffset)
{
- for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
+ DWORD rt;
+ uint32_t rtMask = state.colorHottileEnable;
+ while(_BitScanForward(&rt, rtMask))
{
+ rtMask &= ~(1 << rt);
psContext.pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
}
#else
- for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
+ DWORD rt;
+ uint32_t rtMask = state.colorHottileEnable;
+ while (_BitScanForward(&rt, rtMask))
{
+ rtMask &= ~(1 << rt);
psContext.pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h
index b73a99b..081e4dd 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h
@@ -42,9 +42,9 @@ extern PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPU
template <uint32_t numSamples = 1>
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex);
template <typename RT>
-void StepRasterTileX(uint32_t MaxRT, RenderOutputBuffers &buffers);
+void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers &buffers);
template <typename RT>
-void StepRasterTileY(uint32_t MaxRT, RenderOutputBuffers &buffers, RenderOutputBuffers &startBufferRow);
+void StepRasterTileY(uint32_t colorHotTileMask, RenderOutputBuffers &buffers, RenderOutputBuffers &startBufferRow);
#define MASKTOVEC(i3,i2,i1,i0) {-i0,-i1,-i2,-i3}
static const __m256d gMaskToVecpd[] =
@@ -1281,7 +1281,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
{
vEdgeFix16[e] = _mm256_add_pd(vEdgeFix16[e], _mm256_set1_pd(rastEdges[e].stepRasterTileX));
}
- StepRasterTileX<RT>(state.psState.numRenderTargets, renderBuffers);
+ StepRasterTileX<RT>(state.colorHottileEnable, renderBuffers);
}
// step to the next tile in Y
@@ -1289,7 +1289,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
{
vEdgeFix16[e] = _mm256_add_pd(vStartOfRowEdge[e], _mm256_set1_pd(rastEdges[e].stepRasterTileY));
}
- StepRasterTileY<RT>(state.psState.numRenderTargets, renderBuffers, currentRenderBufferRow);
+ StepRasterTileY<RT>(state.colorHottileEnable, renderBuffers, currentRenderBufferRow);
}
AR_END(BERasterizeTriangle, 1);
@@ -1348,10 +1348,12 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint
}
template <typename RT>
-INLINE void StepRasterTileX(uint32_t NumRT, RenderOutputBuffers &buffers)
+INLINE void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers &buffers)
{
- for(uint32_t rt = 0; rt < NumRT; ++rt)
+ DWORD rt = 0;
+ while (_BitScanForward(&rt, colorHotTileMask))
{
+ colorHotTileMask &= ~(1 << rt);
buffers.pColor[rt] += RT::colorRasterTileStep;
}
@@ -1360,10 +1362,12 @@ INLINE void StepRasterTileX(uint32_t NumRT, RenderOutputBuffers &buffers)
}
template <typename RT>
-INLINE void StepRasterTileY(uint32_t NumRT, RenderOutputBuffers &buffers, RenderOutputBuffers &startBufferRow)
+INLINE void StepRasterTileY(uint32_t colorHotTileMask, RenderOutputBuffers &buffers, RenderOutputBuffers &startBufferRow)
{
- for(uint32_t rt = 0; rt < NumRT; ++rt)
+ DWORD rt = 0;
+ while (_BitScanForward(&rt, colorHotTileMask))
{
+ colorHotTileMask &= ~(1 << rt);
startBufferRow.pColor[rt] += RT::colorRasterTileRowStep;
buffers.pColor[rt] = startBufferRow.pColor[rt];
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 4fbd74d..d9e9280 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -1139,7 +1139,7 @@ struct SWR_PS_STATE
uint32_t writesODepth : 1; // pixel shader writes to depth
uint32_t usesSourceDepth : 1; // pixel shader reads depth
uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
- uint32_t numRenderTargets : 4; // number of render target outputs in use (0-8)
+ uint32_t renderTargetMask : 8; // number of render target outputs in use (0-8)
uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
uint32_t usesUAV : 1; // pixel shader accesses UAV
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
index 4eef606..0fd2f9e 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1446,7 +1446,7 @@ swr_update_derived(struct pipe_context *pipe,
psState.writesODepth = ctx->fs->info.base.writes_z;
psState.usesSourceDepth = ctx->fs->info.base.reads_z;
psState.shadingRate = SWR_SHADING_RATE_PIXEL;
- psState.numRenderTargets = ctx->framebuffer.nr_cbufs;
+ psState.renderTargetMask = (1 << ctx->framebuffer.nr_cbufs) - 1;
psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE;
uint32_t barycentricsMask = 0;
#if 0
--
2.7.4
More information about the mesa-dev
mailing list