[Mesa-dev] [PATCH 16/17] st/mesa: upload zero-stride vertex attributes here

Thu May 4 09:46:13 UTC 2017

On 04.05.2017 00:02, Marek Olšák wrote:
> On Wed, May 3, 2017 at 6:04 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
>> On 01.05.2017 14:53, Marek Olšák wrote:
>>>
>>> From: Marek Olšák <marek.olsak at amd.com>
>>>
>>> This is the best place to do it. Now drivers without u_vbuf don't have to
>>> do it.
>>> ---
>>>  src/mesa/state_tracker/st_atom_array.c | 56
>>> ++++++++++++++++++++++++----------
>>>  src/mesa/state_tracker/st_context.c    |  2 ++
>>>  src/mesa/state_tracker/st_context.h    |  1 +
>>>  3 files changed, 43 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/src/mesa/state_tracker/st_atom_array.c
>>> b/src/mesa/state_tracker/st_atom_array.c
>>> index cc9cac1..813468b 100644
>>> --- a/src/mesa/state_tracker/st_atom_array.c
>>> +++ b/src/mesa/state_tracker/st_atom_array.c
>>> @@ -37,20 +37,21 @@
>>>   */
>>>
>>>  #include "st_context.h"
>>>  #include "st_atom.h"
>>>  #include "st_cb_bufferobjects.h"
>>>  #include "st_draw.h"
>>>  #include "st_program.h"
>>>
>>>  #include "cso_cache/cso_context.h"
>>>  #include "util/u_math.h"
>>> +#include "util/u_upload_mgr.h"
>>>  #include "main/bufferobj.h"
>>>  #include "main/glformats.h"
>>>
>>>  /* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
>>>  static const uint16_t vertex_formats[][4][4] = {
>>>     { /* GL_BYTE */
>>>        {
>>>           PIPE_FORMAT_R8_SSCALED,
>>>           PIPE_FORMAT_R8G8_SSCALED,
>>>           PIPE_FORMAT_R8G8B8_SSCALED,
>>> @@ -327,20 +328,25 @@ is_interleaved_arrays(const struct st_vertex_program
>>> *vp,
>>>     for (attr = 0; attr < num_inputs; attr++) {
>>>        const struct gl_vertex_array *array;
>>>        const struct gl_buffer_object *bufObj;
>>>        GLsizei stride;
>>>
>>>        array = get_client_array(arrays, vp->index_to_input[attr]);
>>>        if (!array)
>>>          continue;
>>>
>>>        stride = array->StrideB; /* in bytes */
>>> +
>>> +      /* To keep things simple, don't allow interleaved zero-stride
>>> attribs. */
>>> +      if (stride == 0)
>>> +         return false;
>>> +
>>>        bufObj = array->BufferObj;
>>>        if (attr == 0) {
>>>           /* save info about the first array */
>>>           firstStride = stride;
>>>           firstPtr = array->Ptr;
>>>           firstBufObj = bufObj;
>>>           userSpaceBuffer = !bufObj || !bufObj->Name;
>>>        }
>>>        else {
>>>           /* check if other arrays interleave with the first, in same
>>> buffer */
>>> @@ -564,20 +570,21 @@ setup_interleaved_attribs(struct st_context *st,
>>>  static void
>>>  setup_non_interleaved_attribs(struct st_context *st,
>>>                                const struct st_vertex_program *vp,
>>>                                const struct gl_vertex_array **arrays,
>>>                                unsigned num_inputs)
>>>  {
>>>     struct gl_context *ctx = st->ctx;
>>>     struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
>>>     struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
>>>     unsigned num_vbuffers = 0;
>>> +   unsigned unref_buffers = 0;
>>>     GLuint attr;
>>>
>>>     for (attr = 0; attr < num_inputs;) {
>>>        const unsigned mesaAttr = vp->index_to_input[attr];
>>>        const struct gl_vertex_array *array;
>>>        struct gl_buffer_object *bufobj;
>>>        GLsizei stride;
>>>        unsigned src_format;
>>>        unsigned bufidx;
>>>
>>> @@ -601,54 +608,71 @@ setup_non_interleaved_attribs(struct st_context *st,
>>>           if (!stobj || !stobj->buffer) {
>>>              st->vertex_array_out_of_memory = true;
>>>              return; /* out-of-memory error probably */
>>>           }
>>>
>>>           vbuffer[bufidx].buffer.resource = stobj->buffer;
>>>           vbuffer[bufidx].is_user_buffer = false;
>>>           vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr);
>>>        }
>>>        else {
>>> -         /* wrap user data */
>>> -         void *ptr;
>>> -
>>> -         if (array->Ptr) {
>>> -            ptr = (void *) array->Ptr;
>>> -         }
>>> -         else {
>>> -            /* no array, use ctx->Current.Attrib[] value */
>>> -            ptr = (void *) ctx->Current.Attrib[mesaAttr];
>>> -            stride = 0;
>>> +         if (stride == 0) {
>>> +            void *ptr = array->Ptr ? (void*)array->Ptr :
>>> +
>>> (void*)ctx->Current.Attrib[mesaAttr];
>>> +
>>> +            vbuffer[bufidx].is_user_buffer = false;
>>> +            vbuffer[bufidx].buffer.resource = NULL;
>>> +
>>> +            /* Use const_uploader for zero-stride vertex attributes,
>>> because
>>> +             * it may use a better memory placement than stream_uploader.
>>> +             * The reason is that zero-stride attributes can be fetched
>>> many
>>> +             * times (thousands of times), so a better placement is going
>>> to
>>> +             * perform better.
>>> +             *
>>> +             * Upload the maximum possible size, which is 4x GLdouble =
>>> 32.
>>> +             */
>>> +            u_upload_data(st->can_bind_const_buffer_as_vertex ?
>>> +                             st->pipe->const_uploader :
>>> +                             st->pipe->stream_uploader,
>>> +                          0, 32, 32, ptr,
>>> +                          &vbuffer[bufidx].buffer_offset,
>>> +                          &vbuffer[bufidx].buffer.resource);
>>> +            unref_buffers |= 1u << bufidx;
>>
>>
>> In the array->Ptr != NULL case, can we get a more accurate size here?
>> Otherwise we might read off the end of allocated memory and crash.
>>
>> This wasn't an issue before in radeonsi, because we still use u_vbuf with
>> compatibility profiles.
>
> OK, I'm adding this. Rb?
>
> diff --git a/src/mesa/state_tracker/st_atom_array.c
> b/src/mesa/state_tracker/st_atom_array.c
> index 813468b..e8c95f3 100644
> --- a/src/mesa/state_tracker/st_atom_array.c
> +++ b/src/mesa/state_tracker/st_atom_array.c
> @@ -616,6 +616,7 @@ setup_non_interleaved_attribs(struct st_context *st,
>        }
>        else {
>           if (stride == 0) {
> +            unsigned size = array->_ElementSize;
>              void *ptr = array->Ptr ? (void*)array->Ptr :
>                                       (void*)ctx->Current.Attrib[mesaAttr];
>
> @@ -633,7 +634,7 @@ setup_non_interleaved_attribs(struct st_context *st,
>              u_upload_data(st->can_bind_const_buffer_as_vertex ?
>                               st->pipe->const_uploader :
>                               st->pipe->stream_uploader,
> -                          0, 32, 32, ptr,
> +                          0, size, size, ptr,

Alignment should still be 32, because _ElementSize can be 
non-power-of-two. Or you could use util_next_power_of_two, that could be 
a bit nicer on caches. With either option:

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

>                            &vbuffer[bufidx].buffer_offset,
>                            &vbuffer[bufidx].buffer.resource);
>              unref_buffers |= 1u << bufidx;
>
> Marek
>

-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.