[Mesa-dev] [PATCH 2/2] [swr] fix index buffers with non-zero indices

Cherniak, Bruce bruce.cherniak at intel.com
Tue Feb 21 23:49:42 UTC 2017


Reviewed-by: Bruce Cherniak <bruce.cherniak at intel.com>

> On Feb 17, 2017, at 2:30 PM, George Kyriazis <george.kyriazis at intel.com> wrote:
> 
> Fix issue with index buffers that do not contain a 0 index.  0 index
> can be a non-valid index if the (copied) vertex buffers are a subset of the
> user's (which happens because we only copy the range between min & max).
> Core will use an index passed in from the driver to replace invalid indices.
> 
> Only do this for calls that contain non-zero indices, to minimize performance
> cost.
> ---
> src/gallium/drivers/swr/rasterizer/core/state.h    |  1 +
> .../drivers/swr/rasterizer/jitter/fetch_jit.cpp    | 60 +++++++++++++++++++---
> .../drivers/swr/rasterizer/jitter/fetch_jit.h      |  2 +
> src/gallium/drivers/swr/swr_draw.cpp               |  1 +
> src/gallium/drivers/swr/swr_state.cpp              |  4 ++
> 5 files changed, 62 insertions(+), 6 deletions(-)
> 
> diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
> index 2f3b913..05347dc 100644
> --- a/src/gallium/drivers/swr/rasterizer/core/state.h
> +++ b/src/gallium/drivers/swr/rasterizer/core/state.h
> @@ -524,6 +524,7 @@ struct SWR_VERTEX_BUFFER_STATE
>     const uint8_t *pData;
>     uint32_t size;
>     uint32_t numaNode;
> +    uint32_t minVertex;             // min vertex (for bounds checking)
>     uint32_t maxVertex;             // size / pitch.  precalculated value used by fetch shader for OOB checks
>     uint32_t partialInboundsSize;   // size % pitch.  precalculated value used by fetch shader for partially OOB vertices
> };
> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> index 901bce6..ffa7605 100644
> --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
> @@ -309,11 +309,29 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str
> 
>         Value* startVertexOffset = MUL(Z_EXT(startOffset, mInt64Ty), stride);
> 
> +        Value *minVertex = NULL;
> +        Value *minVertexOffset = NULL;
> +        if (fetchState.bPartialVertexBuffer) {
> +            // fetch min index for low bounds checking
> +            minVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_minVertex)});
> +            minVertex = LOAD(minVertex);
> +            if (!fetchState.bDisableIndexOOBCheck) {
> +                minVertexOffset = MUL(Z_EXT(minVertex, mInt64Ty), stride);
> +            }
> +        }
> +
>         // Load from the stream.
>         for(uint32_t lane = 0; lane < mVWidth; ++lane)
>         {
>             // Get index
>             Value* index = VEXTRACT(vCurIndices, C(lane));
> +
> +            if (fetchState.bPartialVertexBuffer) {
> +                // clamp below minvertex
> +                Value *isBelowMin = ICMP_SLT(index, minVertex);
> +                index = SELECT(isBelowMin, minVertex, index);
> +            }
> +
>             index = Z_EXT(index, mInt64Ty);
> 
>             Value*    offset = MUL(index, stride);
> @@ -321,10 +339,14 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str
>             offset = ADD(offset, startVertexOffset);
> 
>             if (!fetchState.bDisableIndexOOBCheck) {
> -                // check for out of bound access, including partial OOB, and mask them to 0
> +                // check for out of bound access, including partial OOB, and replace them with minVertex
>                 Value *endOffset = ADD(offset, C((int64_t)info.Bpp));
>                 Value *oob = ICMP_ULE(endOffset, size);
> -                offset = SELECT(oob, offset, ConstantInt::get(mInt64Ty, 0));
> +                if (fetchState.bPartialVertexBuffer) {
> +                    offset = SELECT(oob, offset, minVertexOffset);
> +                } else {
> +                    offset = SELECT(oob, offset, ConstantInt::get(mInt64Ty, 0));
> +                }
>             }
> 
>             Value*    pointer = GEP(stream, offset);
> @@ -732,6 +754,13 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
>         Value *maxVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_maxVertex)});
>         maxVertex = LOAD(maxVertex);
> 
> +        Value *minVertex = NULL;
> +        if (fetchState.bPartialVertexBuffer) {
> +            // min vertex index for low bounds OOB checking
> +            minVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_minVertex)});
> +            minVertex = LOAD(minVertex);
> +        }
> +
>         Value *vCurIndices;
>         Value *startOffset;
>         if(ied.InstanceEnable)
> @@ -769,9 +798,16 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
> 
>         // if we have a start offset, subtract from max vertex. Used for OOB check
>         maxVertex = SUB(Z_EXT(maxVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
> -        Value* neg = ICMP_SLT(maxVertex, C((int64_t)0));
> +        Value* maxNeg = ICMP_SLT(maxVertex, C((int64_t)0));
>         // if we have a negative value, we're already OOB. clamp at 0.
> -        maxVertex = SELECT(neg, C(0), TRUNC(maxVertex, mInt32Ty));
> +        maxVertex = SELECT(maxNeg, C(0), TRUNC(maxVertex, mInt32Ty));
> +
> +        if (fetchState.bPartialVertexBuffer) {
> +            // similary for min vertex
> +            minVertex = SUB(Z_EXT(minVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
> +            Value *minNeg = ICMP_SLT(minVertex, C((int64_t)0));
> +            minVertex = SELECT(minNeg, C(0), TRUNC(minVertex, mInt32Ty));
> +        }
> 
>         // Load the in bounds size of a partially valid vertex
>         Value *partialInboundsSize = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_partialInboundsSize)});
> @@ -791,8 +827,20 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
>         Value* vMaxVertex = VBROADCAST(maxVertex);
>         Value* vPartialOOBMask = ICMP_EQ(vCurIndices, vMaxVertex);
> 
> -        // are vertices are fully in bounds?
> -        Value* vGatherMask = ICMP_ULT(vCurIndices, vMaxVertex);
> +        // are vertices fully in bounds?
> +        Value* vMaxGatherMask = ICMP_ULT(vCurIndices, vMaxVertex);
> +
> +        Value *vGatherMask;
> +        if (fetchState.bPartialVertexBuffer) {
> +            // are vertices below minVertex limit?
> +            Value *vMinVertex = VBROADCAST(minVertex);
> +            Value *vMinGatherMask = ICMP_UGE(vCurIndices, vMinVertex);
> +
> +            // only fetch lanes that pass both tests
> +            vGatherMask = AND(vMaxGatherMask, vMinGatherMask);
> +        } else {
> +            vGatherMask = vMaxGatherMask;
> +        }
> 
>         // blend in any partially OOB indices that have valid elements
>         vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask);
> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
> index 622608a..68c6f60 100644
> --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
> +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
> @@ -104,6 +104,7 @@ struct FETCH_COMPILE_STATE
>     bool bDisableIndexOOBCheck;             // If enabled, FetchJit will exclude index OOB check
>     bool bEnableCutIndex{ false };          // Compares indices with the cut index and returns a cut mask
>     bool bVertexIDOffsetEnable{ false };    // Offset vertexID by StartVertex for non-indexed draws or BaseVertex for indexed draws
> +    bool bPartialVertexBuffer{ false };     // for indexed draws, map illegal indices to a known resident vertex
> 
>     FETCH_COMPILE_STATE(bool disableVGATHER = false, bool diableIndexOOBCheck = false):
>         bDisableVGATHER(disableVGATHER), bDisableIndexOOBCheck(diableIndexOOBCheck){ };
> @@ -117,6 +118,7 @@ struct FETCH_COMPILE_STATE
>         if (bEnableCutIndex != other.bEnableCutIndex) return false;
>         if (cutIndex != other.cutIndex) return false;
>         if (bVertexIDOffsetEnable != other.bVertexIDOffsetEnable) return false;
> +        if (bPartialVertexBuffer != other.bPartialVertexBuffer) return false;
> 
>         for(uint32_t i = 0; i < numAttribs; ++i)
>         {
> diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp
> index 4bdd3bb..f764efe 100644
> --- a/src/gallium/drivers/swr/swr_draw.cpp
> +++ b/src/gallium/drivers/swr/swr_draw.cpp
> @@ -143,6 +143,7 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
>    struct swr_vertex_element_state *velems = ctx->velems;
>    velems->fsState.cutIndex = info->restart_index;
>    velems->fsState.bEnableCutIndex = info->primitive_restart;
> +   velems->fsState.bPartialVertexBuffer = (info->min_index > 0);
> 
>    swr_jit_fetch_key key;
>    swr_generate_fetch_key(key, velems);
> diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
> index 116f19f..5e3d58d 100644
> --- a/src/gallium/drivers/swr/swr_state.cpp
> +++ b/src/gallium/drivers/swr/swr_state.cpp
> @@ -1106,6 +1106,7 @@ swr_update_derived(struct pipe_context *pipe,
>       SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
>       for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
>          uint32_t size, pitch, elems, partial_inbounds;
> +         uint32_t min_vertex_index;
>          const uint8_t *p_data;
>          struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
> 
> @@ -1117,6 +1118,7 @@ swr_update_derived(struct pipe_context *pipe,
>             size = vb->buffer->width0;
>             elems = size / pitch;
>             partial_inbounds = size % pitch;
> +            min_vertex_index = 0;
> 
>             p_data = swr_resource_data(vb->buffer) + vb->buffer_offset;
>          } else {
> @@ -1128,6 +1130,7 @@ swr_update_derived(struct pipe_context *pipe,
>             uint32_t base;
>             swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
>             partial_inbounds = 0;
> +            min_vertex_index = info.min_index;
> 
>             /* Copy only needed vertices to scratch space */
>             size = AlignUp(size, 4);
> @@ -1143,6 +1146,7 @@ swr_update_derived(struct pipe_context *pipe,
>          swrVertexBuffers[i].pitch = pitch;
>          swrVertexBuffers[i].pData = p_data;
>          swrVertexBuffers[i].size = size;
> +         swrVertexBuffers[i].minVertex = min_vertex_index;
>          swrVertexBuffers[i].maxVertex = elems;
>          swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
>       }
> -- 
> 2.7.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list