[Mesa-dev] [PATCH 16/17] st/mesa: upload zero-stride vertex attributes here

Wed May 3 16:04:15 UTC 2017

On 01.05.2017 14:53, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> This is the best place to do it. Now drivers without u_vbuf don't have to
> do it.
> ---
>  src/mesa/state_tracker/st_atom_array.c | 56 ++++++++++++++++++++++++----------
>  src/mesa/state_tracker/st_context.c    |  2 ++
>  src/mesa/state_tracker/st_context.h    |  1 +
>  3 files changed, 43 insertions(+), 16 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
> index cc9cac1..813468b 100644
> --- a/src/mesa/state_tracker/st_atom_array.c
> +++ b/src/mesa/state_tracker/st_atom_array.c
> @@ -37,20 +37,21 @@
>   */
>
>  #include "st_context.h"
>  #include "st_atom.h"
>  #include "st_cb_bufferobjects.h"
>  #include "st_draw.h"
>  #include "st_program.h"
>
>  #include "cso_cache/cso_context.h"
>  #include "util/u_math.h"
> +#include "util/u_upload_mgr.h"
>  #include "main/bufferobj.h"
>  #include "main/glformats.h"
>
>  /* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
>  static const uint16_t vertex_formats[][4][4] = {
>     { /* GL_BYTE */
>        {
>           PIPE_FORMAT_R8_SSCALED,
>           PIPE_FORMAT_R8G8_SSCALED,
>           PIPE_FORMAT_R8G8B8_SSCALED,
> @@ -327,20 +328,25 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
>     for (attr = 0; attr < num_inputs; attr++) {
>        const struct gl_vertex_array *array;
>        const struct gl_buffer_object *bufObj;
>        GLsizei stride;
>
>        array = get_client_array(arrays, vp->index_to_input[attr]);
>        if (!array)
>  	 continue;
>
>        stride = array->StrideB; /* in bytes */
> +
> +      /* To keep things simple, don't allow interleaved zero-stride attribs. */
> +      if (stride == 0)
> +         return false;
> +
>        bufObj = array->BufferObj;
>        if (attr == 0) {
>           /* save info about the first array */
>           firstStride = stride;
>           firstPtr = array->Ptr;
>           firstBufObj = bufObj;
>           userSpaceBuffer = !bufObj || !bufObj->Name;
>        }
>        else {
>           /* check if other arrays interleave with the first, in same buffer */
> @@ -564,20 +570,21 @@ setup_interleaved_attribs(struct st_context *st,
>  static void
>  setup_non_interleaved_attribs(struct st_context *st,
>                                const struct st_vertex_program *vp,
>                                const struct gl_vertex_array **arrays,
>                                unsigned num_inputs)
>  {
>     struct gl_context *ctx = st->ctx;
>     struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
>     struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
>     unsigned num_vbuffers = 0;
> +   unsigned unref_buffers = 0;
>     GLuint attr;
>
>     for (attr = 0; attr < num_inputs;) {
>        const unsigned mesaAttr = vp->index_to_input[attr];
>        const struct gl_vertex_array *array;
>        struct gl_buffer_object *bufobj;
>        GLsizei stride;
>        unsigned src_format;
>        unsigned bufidx;
>
> @@ -601,54 +608,71 @@ setup_non_interleaved_attribs(struct st_context *st,
>           if (!stobj || !stobj->buffer) {
>              st->vertex_array_out_of_memory = true;
>              return; /* out-of-memory error probably */
>           }
>
>           vbuffer[bufidx].buffer.resource = stobj->buffer;
>           vbuffer[bufidx].is_user_buffer = false;
>           vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr);
>        }
>        else {
> -         /* wrap user data */
> -         void *ptr;
> -
> -         if (array->Ptr) {
> -            ptr = (void *) array->Ptr;
> -         }
> -         else {
> -            /* no array, use ctx->Current.Attrib[] value */
> -            ptr = (void *) ctx->Current.Attrib[mesaAttr];
> -            stride = 0;
> +         if (stride == 0) {
> +            void *ptr = array->Ptr ? (void*)array->Ptr :
> +                                     (void*)ctx->Current.Attrib[mesaAttr];
> +
> +            vbuffer[bufidx].is_user_buffer = false;
> +            vbuffer[bufidx].buffer.resource = NULL;
> +
> +            /* Use const_uploader for zero-stride vertex attributes, because
> +             * it may use a better memory placement than stream_uploader.
> +             * The reason is that zero-stride attributes can be fetched many
> +             * times (thousands of times), so a better placement is going to
> +             * perform better.
> +             *
> +             * Upload the maximum possible size, which is 4x GLdouble = 32.
> +             */
> +            u_upload_data(st->can_bind_const_buffer_as_vertex ?
> +                             st->pipe->const_uploader :
> +                             st->pipe->stream_uploader,
> +                          0, 32, 32, ptr,
> +                          &vbuffer[bufidx].buffer_offset,
> +                          &vbuffer[bufidx].buffer.resource);
> +            unref_buffers |= 1u << bufidx;

In the array->Ptr != NULL case, can we get a more accurate size here? 
Otherwise we might read off the end of allocated memory and crash.

This wasn't an issue before in radeonsi, because we still use u_vbuf 
with compatibility profiles.

Cheers,
Nicolai

> +         } else {
> +            assert(array->Ptr);
> +            vbuffer[bufidx].buffer.user = array->Ptr;
> +            vbuffer[bufidx].is_user_buffer = true;
> +            vbuffer[bufidx].buffer_offset = 0;
>           }
> -
> -         assert(ptr);
> -
> -         vbuffer[bufidx].buffer.user = ptr;
> -         vbuffer[bufidx].is_user_buffer = !!ptr; /* if NULL, then unbind */
> -         vbuffer[bufidx].buffer_offset = 0;
>        }
>
>        /* common-case setup */
>        vbuffer[bufidx].stride = stride; /* in bytes */
>
>        src_format = st_pipe_vertex_format(array->Type,
>                                           array->Size,
>                                           array->Format,
>                                           array->Normalized,
>                                           array->Integer);
>
>        init_velement_lowered(vp, velements, 0, src_format,
>                              array->InstanceDivisor, bufidx,
>                              array->Size, array->Doubles, &attr);
>     }
>
>     set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs);
> +
> +   /* Unreference uploaded zero-stride vertex buffers. */
> +   while (unref_buffers) {
> +      unsigned i = u_bit_scan(&unref_buffers);
> +      pipe_resource_reference(&vbuffer[i].buffer.resource, NULL);
> +   }
>  }
>
>  void st_update_array(struct st_context *st)
>  {
>     struct gl_context *ctx = st->ctx;
>     const struct gl_vertex_array **arrays = ctx->Array._DrawArrays;
>     const struct st_vertex_program *vp;
>     unsigned num_inputs;
>
>     st->vertex_array_out_of_memory = FALSE;
> diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
> index d997911..c901764 100644
> --- a/src/mesa/state_tracker/st_context.c
> +++ b/src/mesa/state_tracker/st_context.c
> @@ -331,20 +331,22 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
>     st->ctx = ctx;
>     st->pipe = pipe;
>
>     /* state tracker needs the VBO module */
>     _vbo_CreateContext(ctx);
>
>     st->dirty = ST_ALL_STATES_MASK;
>
>     st->has_user_constbuf =
>        screen->get_param(screen, PIPE_CAP_USER_CONSTANT_BUFFERS);
> +   st->can_bind_const_buffer_as_vertex =
> +      screen->get_param(screen, PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX);
>
>     /* Drivers still have to upload zero-stride vertex attribs manually
>      * with the GL core profile, but they don't have to deal with any complex
>      * user vertex buffer uploads.
>      */
>     unsigned vbuf_flags =
>        ctx->API == API_OPENGL_CORE ? U_VBUF_FLAG_NO_USER_VBOS : 0;
>     st->cso_context = cso_create_context(pipe, vbuf_flags);
>
>     st_init_atoms( st );
> diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
> index eb1a163..68fd9b9 100644
> --- a/src/mesa/state_tracker/st_context.h
> +++ b/src/mesa/state_tracker/st_context.h
> @@ -97,20 +97,21 @@ struct st_context
>     boolean has_time_elapsed;
>     boolean has_shader_model3;
>     boolean has_etc1;
>     boolean has_etc2;
>     boolean prefer_blit_based_texture_transfer;
>     boolean force_persample_in_shader;
>     boolean has_shareable_shaders;
>     boolean has_half_float_packing;
>     boolean has_multi_draw_indirect;
>     boolean has_user_constbuf;
> +   boolean can_bind_const_buffer_as_vertex;
>
>     /**
>      * If a shader can be created when we get its source.
>      * This means it has only 1 variant, not counting glBitmap and
>      * glDrawPixels.
>      */
>     boolean shader_has_one_variant[MESA_SHADER_STAGES];
>
>     boolean needs_texcoord_semantic;
>     boolean apply_texture_swizzle_to_border_color;
>

-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.