[Mesa-dev] [PATCH] swr: fix range computation for instanced client-side arrays

Cherniak, Bruce bruce.cherniak at intel.com
Wed Nov 30 17:54:56 UTC 2016


Reviewed-by: Bruce Cherniak <bruce.cherniak at intel.com> 

> On Nov 29, 2016, at 8:18 PM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
> 
> We need to take the instance divisor and number of instances into
> account for instanced client-side arrays, rather than the vertex
> parameters.
> 
> Loosely based on the comparable nvc0 logic.
> 
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
> src/gallium/drivers/swr/swr_state.cpp | 74 +++++++++++++++++++++++------------
> src/gallium/drivers/swr/swr_state.h   |  2 +
> 2 files changed, 52 insertions(+), 24 deletions(-)
> 
> diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
> index f1cf4ae..9f6b5b0 100644
> --- a/src/gallium/drivers/swr/swr_state.cpp
> +++ b/src/gallium/drivers/swr/swr_state.cpp
> @@ -494,6 +494,15 @@ swr_create_vertex_elements_state(struct pipe_context *pipe,
>          const SWR_FORMAT_INFO &swr_desc = GetFormatInfo(
>             mesa_to_swr_format(attribs[i].src_format));
>          velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp;
> +
> +         if (attribs[i].instance_divisor != 0) {
> +            velems->instanced_bufs |= 1U << attribs[i].vertex_buffer_index;
> +            uint32_t *min_instance_div =
> +               &velems->min_instance_div[attribs[i].vertex_buffer_index];
> +            if (!*min_instance_div ||
> +                attribs[i].instance_divisor < *min_instance_div)
> +               *min_instance_div = attribs[i].instance_divisor;
> +         }
>       }
>    }
> 
> @@ -870,6 +879,34 @@ swr_change_rt(struct swr_context *ctx,
>    return need_fence;
> }
> 
> +static inline void
> +swr_user_vbuf_range(const struct pipe_draw_info *info,
> +                    const struct swr_vertex_element_state *velems,
> +                    const struct pipe_vertex_buffer *vb,
> +                    uint32_t i,
> +                    uint32_t *totelems,
> +                    uint32_t *base,
> +                    uint32_t *size)
> +{
> +   /* FIXME: The size is too large - we don't access the full extra stride. */
> +   unsigned elems;
> +   if (velems->instanced_bufs & (1U << i)) {
> +      elems = info->instance_count / velems->min_instance_div[i] + 1;
> +      *totelems = info->start_instance + elems;
> +      *base = info->start_instance * vb->stride;
> +      *size = elems * vb->stride;
> +   } else if (vb->stride) {
> +      elems = info->max_index - info->min_index + 1;
> +      *totelems = info->max_index + 1;
> +      *base = info->min_index * vb->stride;
> +      *size = elems * vb->stride;
> +   } else {
> +      *totelems = 1;
> +      *base = 0;
> +      *size = velems->stream_pitch[i];
> +   }
> +}
> +
> void
> swr_update_derived(struct pipe_context *pipe,
>                    const struct pipe_draw_info *p_draw_info)
> @@ -1039,8 +1076,7 @@ swr_update_derived(struct pipe_context *pipe,
>    /* Set vertex & index buffers */
>    /* (using draw info if called by swr_draw_vbo) */
>    if (ctx->dirty & SWR_NEW_VERTEX) {
> -      uint32_t size, pitch, max_vertex, partial_inbounds, scratch_total;
> -      const uint8_t *p_data;
> +      uint32_t scratch_total;
>       uint8_t *scratch = NULL;
> 
>       /* If being called by swr_draw_vbo, copy draw details */
> @@ -1056,14 +1092,8 @@ swr_update_derived(struct pipe_context *pipe,
>          if (!vb->user_buffer)
>             continue;
> 
> -         if (vb->stride) {
> -            size = (info.max_index - info.min_index + 1) * vb->stride;
> -         } else {
> -            /* pitch = 0, means constant value
> -             * set size to 1 vertex */
> -            size = ctx->velems->stream_pitch[i];
> -         }
> -
> +         uint32_t elems, base, size;
> +         swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
>          scratch_total += AlignUp(size, 4);
>       }
> 
> @@ -1075,6 +1105,8 @@ swr_update_derived(struct pipe_context *pipe,
>       /* vertex buffers */
>       SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
>       for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
> +         uint32_t size, pitch, elems, partial_inbounds;
> +         const uint8_t *p_data;
>          struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
> 
>          pitch = vb->stride;
> @@ -1083,7 +1115,7 @@ swr_update_derived(struct pipe_context *pipe,
>              * size is based on buffer->width0 rather than info.max_index
>              * to prevent having to validate VBO on each draw */
>             size = vb->buffer->width0;
> -            max_vertex = size / pitch;
> +            elems = size / pitch;
>             partial_inbounds = size % pitch;
> 
>             p_data = swr_resource_data(vb->buffer) + vb->buffer_offset;
> @@ -1093,25 +1125,17 @@ swr_update_derived(struct pipe_context *pipe,
>              * revalidate on each draw */
>             post_update_dirty_flags |= SWR_NEW_VERTEX;
> 
> -            if (pitch) {
> -               size = (info.max_index - info.min_index + 1) * pitch;
> -            } else {
> -               /* pitch = 0, means constant value
> -                * set size to 1 vertex */
> -               size = ctx->velems->stream_pitch[i];
> -            }
> -
> -            max_vertex = info.max_index + 1;
> +            uint32_t base;
> +            swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
>             partial_inbounds = 0;
> 
>             /* Copy only needed vertices to scratch space */
>             size = AlignUp(size, 4);
> -            const void *ptr = (const uint8_t *) vb->user_buffer
> -               + info.min_index * pitch;
> +            const void *ptr = (const uint8_t *) vb->user_buffer + base;
>             memcpy(scratch, ptr, size);
>             ptr = scratch;
>             scratch += size;
> -            p_data = (const uint8_t *)ptr - info.min_index * pitch;
> +            p_data = (const uint8_t *)ptr - base;
>          }
> 
>          swrVertexBuffers[i] = {0};
> @@ -1119,7 +1143,7 @@ swr_update_derived(struct pipe_context *pipe,
>          swrVertexBuffers[i].pitch = pitch;
>          swrVertexBuffers[i].pData = p_data;
>          swrVertexBuffers[i].size = size;
> -         swrVertexBuffers[i].maxVertex = max_vertex;
> +         swrVertexBuffers[i].maxVertex = elems;
>          swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
>       }
> 
> @@ -1129,6 +1153,8 @@ swr_update_derived(struct pipe_context *pipe,
>       /* index buffer, if required (info passed in by swr_draw_vbo) */
>       SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */
>       if (info.indexed) {
> +         const uint8_t *p_data;
> +         uint32_t size, pitch;
>          struct pipe_index_buffer *ib = &ctx->index_buffer;
> 
>          pitch = ib->index_size ? ib->index_size : sizeof(uint32_t);
> diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h
> index 17d4873..1960abd 100644
> --- a/src/gallium/drivers/swr/swr_state.h
> +++ b/src/gallium/drivers/swr/swr_state.h
> @@ -72,6 +72,8 @@ struct swr_vertex_element_state {
>    FETCH_COMPILE_STATE fsState;
>    PFN_FETCH_FUNC fsFunc;
>    uint32_t stream_pitch[PIPE_MAX_ATTRIBS];
> +   uint32_t min_instance_div[PIPE_MAX_ATTRIBS];
> +   uint32_t instanced_bufs;
> };
> 
> struct swr_blend_state {
> -- 
> 2.7.3
> 



More information about the mesa-dev mailing list