[Mesa-dev] [PATCH 2/2] [swr] fix index buffers with non-zero indices
Cherniak, Bruce
bruce.cherniak at intel.com
Tue Feb 21 23:49:42 UTC 2017
Reviewed-by: Bruce Cherniak <bruce.cherniak at intel.com>
> On Feb 17, 2017, at 2:30 PM, George Kyriazis <george.kyriazis at intel.com> wrote:
>
> Fix issue with index buffers that do not contain a 0 index. 0 index
> can be a non-valid index if the (copied) vertex buffers are a subset of the
> user's (which happens because we only copy the range between min & max).
> Core will use an index passed in from the driver to replace invalid indices.
>
> Only do this for calls that contain non-zero indices, to minimize performance
> cost.
> ---
> src/gallium/drivers/swr/rasterizer/core/state.h | 1 +
> .../drivers/swr/rasterizer/jitter/fetch_jit.cpp | 60 +++++++++++++++++++---
> .../drivers/swr/rasterizer/jitter/fetch_jit.h | 2 +
> src/gallium/drivers/swr/swr_draw.cpp | 1 +
> src/gallium/drivers/swr/swr_state.cpp | 4 ++
> 5 files changed, 62 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
> index 2f3b913..05347dc 100644
> --- a/src/gallium/drivers/swr/rasterizer/core/state.h
> +++ b/src/gallium/drivers/swr/rasterizer/core/state.h
> @@ -524,6 +524,7 @@ struct SWR_VERTEX_BUFFER_STATE
> const uint8_t *pData;
> uint32_t size;
> uint32_t numaNode;
> + uint32_t minVertex; // min vertex (for bounds checking)
> uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
> uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
> };
> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> index 901bce6..ffa7605 100644
> --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> @@ -309,11 +309,29 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str
>
> Value* startVertexOffset = MUL(Z_EXT(startOffset, mInt64Ty), stride);
>
> + Value *minVertex = NULL;
> + Value *minVertexOffset = NULL;
> + if (fetchState.bPartialVertexBuffer) {
> + // fetch min index for low bounds checking
> + minVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_minVertex)});
> + minVertex = LOAD(minVertex);
> + if (!fetchState.bDisableIndexOOBCheck) {
> + minVertexOffset = MUL(Z_EXT(minVertex, mInt64Ty), stride);
> + }
> + }
> +
> // Load from the stream.
> for(uint32_t lane = 0; lane < mVWidth; ++lane)
> {
> // Get index
> Value* index = VEXTRACT(vCurIndices, C(lane));
> +
> + if (fetchState.bPartialVertexBuffer) {
> + // clamp below minvertex
> + Value *isBelowMin = ICMP_SLT(index, minVertex);
> + index = SELECT(isBelowMin, minVertex, index);
> + }
> +
> index = Z_EXT(index, mInt64Ty);
>
> Value* offset = MUL(index, stride);
> @@ -321,10 +339,14 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str
> offset = ADD(offset, startVertexOffset);
>
> if (!fetchState.bDisableIndexOOBCheck) {
> - // check for out of bound access, including partial OOB, and mask them to 0
> + // check for out of bound access, including partial OOB, and replace them with minVertex
> Value *endOffset = ADD(offset, C((int64_t)info.Bpp));
> Value *oob = ICMP_ULE(endOffset, size);
> - offset = SELECT(oob, offset, ConstantInt::get(mInt64Ty, 0));
> + if (fetchState.bPartialVertexBuffer) {
> + offset = SELECT(oob, offset, minVertexOffset);
> + } else {
> + offset = SELECT(oob, offset, ConstantInt::get(mInt64Ty, 0));
> + }
> }
>
> Value* pointer = GEP(stream, offset);
> @@ -732,6 +754,13 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
> Value *maxVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_maxVertex)});
> maxVertex = LOAD(maxVertex);
>
> + Value *minVertex = NULL;
> + if (fetchState.bPartialVertexBuffer) {
> + // min vertex index for low bounds OOB checking
> + minVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_minVertex)});
> + minVertex = LOAD(minVertex);
> + }
> +
> Value *vCurIndices;
> Value *startOffset;
> if(ied.InstanceEnable)
> @@ -769,9 +798,16 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
>
> // if we have a start offset, subtract from max vertex. Used for OOB check
> maxVertex = SUB(Z_EXT(maxVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
> - Value* neg = ICMP_SLT(maxVertex, C((int64_t)0));
> + Value* maxNeg = ICMP_SLT(maxVertex, C((int64_t)0));
> // if we have a negative value, we're already OOB. clamp at 0.
> - maxVertex = SELECT(neg, C(0), TRUNC(maxVertex, mInt32Ty));
> + maxVertex = SELECT(maxNeg, C(0), TRUNC(maxVertex, mInt32Ty));
> +
> + if (fetchState.bPartialVertexBuffer) {
> + // similary for min vertex
> + minVertex = SUB(Z_EXT(minVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
> + Value *minNeg = ICMP_SLT(minVertex, C((int64_t)0));
> + minVertex = SELECT(minNeg, C(0), TRUNC(minVertex, mInt32Ty));
> + }
>
> // Load the in bounds size of a partially valid vertex
> Value *partialInboundsSize = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_partialInboundsSize)});
> @@ -791,8 +827,20 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
> Value* vMaxVertex = VBROADCAST(maxVertex);
> Value* vPartialOOBMask = ICMP_EQ(vCurIndices, vMaxVertex);
>
> - // are vertices are fully in bounds?
> - Value* vGatherMask = ICMP_ULT(vCurIndices, vMaxVertex);
> + // are vertices fully in bounds?
> + Value* vMaxGatherMask = ICMP_ULT(vCurIndices, vMaxVertex);
> +
> + Value *vGatherMask;
> + if (fetchState.bPartialVertexBuffer) {
> + // are vertices below minVertex limit?
> + Value *vMinVertex = VBROADCAST(minVertex);
> + Value *vMinGatherMask = ICMP_UGE(vCurIndices, vMinVertex);
> +
> + // only fetch lanes that pass both tests
> + vGatherMask = AND(vMaxGatherMask, vMinGatherMask);
> + } else {
> + vGatherMask = vMaxGatherMask;
> + }
>
> // blend in any partially OOB indices that have valid elements
> vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask);
> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
> index 622608a..68c6f60 100644
> --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
> +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
> @@ -104,6 +104,7 @@ struct FETCH_COMPILE_STATE
> bool bDisableIndexOOBCheck; // If enabled, FetchJit will exclude index OOB check
> bool bEnableCutIndex{ false }; // Compares indices with the cut index and returns a cut mask
> bool bVertexIDOffsetEnable{ false }; // Offset vertexID by StartVertex for non-indexed draws or BaseVertex for indexed draws
> + bool bPartialVertexBuffer{ false }; // for indexed draws, map illegal indices to a known resident vertex
>
> FETCH_COMPILE_STATE(bool disableVGATHER = false, bool diableIndexOOBCheck = false):
> bDisableVGATHER(disableVGATHER), bDisableIndexOOBCheck(diableIndexOOBCheck){ };
> @@ -117,6 +118,7 @@ struct FETCH_COMPILE_STATE
> if (bEnableCutIndex != other.bEnableCutIndex) return false;
> if (cutIndex != other.cutIndex) return false;
> if (bVertexIDOffsetEnable != other.bVertexIDOffsetEnable) return false;
> + if (bPartialVertexBuffer != other.bPartialVertexBuffer) return false;
>
> for(uint32_t i = 0; i < numAttribs; ++i)
> {
> diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp
> index 4bdd3bb..f764efe 100644
> --- a/src/gallium/drivers/swr/swr_draw.cpp
> +++ b/src/gallium/drivers/swr/swr_draw.cpp
> @@ -143,6 +143,7 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
> struct swr_vertex_element_state *velems = ctx->velems;
> velems->fsState.cutIndex = info->restart_index;
> velems->fsState.bEnableCutIndex = info->primitive_restart;
> + velems->fsState.bPartialVertexBuffer = (info->min_index > 0);
>
> swr_jit_fetch_key key;
> swr_generate_fetch_key(key, velems);
> diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
> index 116f19f..5e3d58d 100644
> --- a/src/gallium/drivers/swr/swr_state.cpp
> +++ b/src/gallium/drivers/swr/swr_state.cpp
> @@ -1106,6 +1106,7 @@ swr_update_derived(struct pipe_context *pipe,
> SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
> for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
> uint32_t size, pitch, elems, partial_inbounds;
> + uint32_t min_vertex_index;
> const uint8_t *p_data;
> struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
>
> @@ -1117,6 +1118,7 @@ swr_update_derived(struct pipe_context *pipe,
> size = vb->buffer->width0;
> elems = size / pitch;
> partial_inbounds = size % pitch;
> + min_vertex_index = 0;
>
> p_data = swr_resource_data(vb->buffer) + vb->buffer_offset;
> } else {
> @@ -1128,6 +1130,7 @@ swr_update_derived(struct pipe_context *pipe,
> uint32_t base;
> swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
> partial_inbounds = 0;
> + min_vertex_index = info.min_index;
>
> /* Copy only needed vertices to scratch space */
> size = AlignUp(size, 4);
> @@ -1143,6 +1146,7 @@ swr_update_derived(struct pipe_context *pipe,
> swrVertexBuffers[i].pitch = pitch;
> swrVertexBuffers[i].pData = p_data;
> swrVertexBuffers[i].size = size;
> + swrVertexBuffers[i].minVertex = min_vertex_index;
> swrVertexBuffers[i].maxVertex = elems;
> swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
> }
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list