[Mesa-dev] [PATCH] vbo: Avoid unnecessary copy to/from current in vertex format upgrade.

Francisco Jerez currojerez at riseup.net
Tue Nov 2 11:21:21 PDT 2010


Keith Whitwell <keithw at vmware.com> writes:

> Francisco,
>
> This looks good - my only comment is that there seem to be two distinct
> changes in this patch -- the modification to VBO behaviour when adding a
> new attribute being one, but the changes to vbo_exec_draw.c seem to be
> an unrelated cleanup.  Is that correct?
>
> Ordinarily I wouldn't bother pointing it out, but VBO is fairly complex
> code & being able to isolate any regression to the smallest change
> possible would be preferable -- would it be possible to split the patch
> into two pieces & commit separately?
>
Yes, I can split it in two pieces if you want, but that hunk isn't an
unrelated cleanup, the bulk of this patch deals with fixing several
places that assume that vertex attributes are stored in VBO_ATTRIB
order, vbo_exec_bind_arrays() was one of them.

> Also regarding testing, it makes sense to run through all the mesa-progs
> demos/ and samples/ directory, as they tend to have a good selection of
> odd cases for this code.
>
Thanks, will do.

> Keith
>
>
> On Mon, 2010-11-01 at 13:06 -0700, Francisco Jerez wrote:
>> Rebuilding the vertex format from scratch every time we see a new
>> vertex attribute is rather costly, new attributes can be appended at
>> the end avoiding a copy to current and then back again, and the full
>> attr pointer recalculation.
>> 
>> In the not so likely case of an already existing attribute having its
>> size increased the old behavior is preserved, this could be optimized
>> more, not sure if it's worth it.
>> 
>> It's a modest improvement in FlightGear (that game punishes the VBO
>> module pretty hard in general, framerate goes from some 46 FPS to 50
>> FPS with the nouveau classic driver).
>> ---
>> I've run piglit before and after and apparently there are no
>> regressions, Tested-by's on different hardware would be appreciated
>> though.
>> 
>>  src/mesa/vbo/vbo_exec_api.c  |   98
>> +++++++++++++++++++++++++-----------------
>>  src/mesa/vbo/vbo_exec_draw.c |   13 ++---
>>  2 files changed, 63 insertions(+), 48 deletions(-)
>> 
>> diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
>> index 4988c51..9b2d59f 100644
>> --- a/src/mesa/vbo/vbo_exec_api.c
>> +++ b/src/mesa/vbo/vbo_exec_api.c
>> @@ -220,8 +220,9 @@ static void vbo_exec_wrap_upgrade_vertex( struct
>> vbo_exec_context *exec,
>>     struct gl_context *ctx = exec->ctx;
>>     struct vbo_context *vbo = vbo_context(ctx);
>>     GLint lastcount = exec->vtx.vert_count;
>> -   GLfloat *tmp;
>> -   GLuint oldsz;
>> +   GLfloat *old_attrptr[VBO_ATTRIB_MAX];
>> +   GLuint old_vtx_size = exec->vtx.vertex_size;
>> +   GLuint oldsz = exec->vtx.attrsz[attr];
>>     GLuint i;
>>  
>>     /* Run pipeline on current vertices, copy wrapped vertices
>> @@ -229,86 +230,103 @@ static void
>> vbo_exec_wrap_upgrade_vertex( struct vbo_exec_context *exec,
>>      */
>>     vbo_exec_wrap_buffers( exec );
>>  
>> +   if (unlikely(exec->vtx.copied.nr)) {
>> +      /* We're in the middle of a primitive, keep the old vertex
>> +       * format around to be able to translate the copied vertices to
>> +       * the new format.
>> +       */
>> +      memcpy(old_attrptr, exec->vtx.attrptr, sizeof(old_attrptr));
>> +   }
>>  
>> -   /* Do a COPY_TO_CURRENT to ensure back-copying works for the case
>> -    * when the attribute already exists in the vertex and is having
>> -    * its size increased.  
>> -    */
>> -   vbo_exec_copy_to_current( exec );
>> -
>> +   if (unlikely(oldsz)) {
>> +      /* Do a COPY_TO_CURRENT to ensure back-copying works for the
>> +       * case when the attribute already exists in the vertex and is
>> +       * having its size increased.
>> +       */
>> +      vbo_exec_copy_to_current( exec );
>> +   }
>>  
>>     /* Heuristic: Attempt to isolate attributes received outside
>>      * begin/end so that they don't bloat the vertices.
>>      */
>>     if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END &&
>> -       exec->vtx.attrsz[attr] == 0 && 
>> -       lastcount > 8 &&
>> -       exec->vtx.vertex_size) {
>> +       !oldsz && lastcount > 8 && exec->vtx.vertex_size) {
>> +      vbo_exec_copy_to_current( exec );
>>        reset_attrfv( exec );
>>     }
>>  
>>     /* Fix up sizes:
>>      */
>> -   oldsz = exec->vtx.attrsz[attr];
>>     exec->vtx.attrsz[attr] = newsz;
>> -
>>     exec->vtx.vertex_size += newsz - oldsz;
>>     exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE -
>> exec->vtx.buffer_used) / 
>>                           (exec->vtx.vertex_size * sizeof(GLfloat)));
>>     exec->vtx.vert_count = 0;
>>     exec->vtx.buffer_ptr = exec->vtx.buffer_map;
>> -   
>>  
>> -   /* Recalculate all the attrptr[] values
>> -    */
>> -   for (i = 0, tmp = exec->vtx.vertex ; i < VBO_ATTRIB_MAX ; i++) {
>> -      if (exec->vtx.attrsz[i]) {
>> -        exec->vtx.attrptr[i] = tmp;
>> -        tmp += exec->vtx.attrsz[i];
>> +   if (unlikely(oldsz)) {
>> +      /* Size changed, recalculate all the attrptr[] values
>> +       */
>> +      GLfloat *tmp = exec->vtx.vertex;
>> +
>> +      for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) {
>> +        if (exec->vtx.attrsz[i]) {
>> +           exec->vtx.attrptr[i] = tmp;
>> +           tmp += exec->vtx.attrsz[i];
>> +        }
>> +        else
>> +           exec->vtx.attrptr[i] = NULL; /* will not be dereferenced
>> */
>>        }
>> -      else 
>> -        exec->vtx.attrptr[i] = NULL; /* will not be dereferenced */
>> -   }
>>  
>> -   /* Copy from current to repopulate the vertex with correct values.
>> -   vbo_exec_copy_from_current( exec );
>> +      /* Copy from current to repopulate the vertex with correct
>> +       * values.
>> +       */
>> +      vbo_exec_copy_from_current( exec );
>> +
>> +   } else {
>> +      /* Just have to append the new attribute at the end */
>> +      exec->vtx.attrptr[attr] = exec->vtx.vertex +
>> +        exec->vtx.vertex_size - newsz;
>> +   }
>>  
>>     /* Replay stored vertices to translate them
>>      * to new format here.
>>      *
>>      * -- No need to replay - just copy piecewise
>>      */
>> -   if (exec->vtx.copied.nr)
>> -   {
>> +   if (unlikely(exec->vtx.copied.nr)) {
>>        GLfloat *data = exec->vtx.copied.buffer;
>>        GLfloat *dest = exec->vtx.buffer_ptr;
>>        GLuint j;
>>  
>>        assert(exec->vtx.buffer_ptr == exec->vtx.buffer_map);
>> -      
>> +
>>        for (i = 0 ; i < exec->vtx.copied.nr ; i++) {
>>          for (j = 0 ; j < VBO_ATTRIB_MAX ; j++) {
>> -           if (exec->vtx.attrsz[j]) {
>> +           GLuint sz = exec->vtx.attrsz[j];
>> +
>> +           if (sz) {
>> +              GLint old_offset = old_attrptr[j] - exec->vtx.vertex;
>> +              GLint new_offset = exec->vtx.attrptr[j] -
>> exec->vtx.vertex;
>> +
>>                if (j == attr) {
>>                   if (oldsz) {
>> -                    COPY_CLEAN_4V( dest, oldsz, data );
>> -                    data += oldsz;
>> -                    dest += newsz;
>> +                    GLfloat tmp[4];
>> +                    COPY_CLEAN_4V(tmp, oldsz, data + old_offset);
>> +                    COPY_SZ_4V(dest + new_offset, newsz, tmp);
>>                   } else {
>> -                    const GLfloat *current = (const GLfloat
>> *)vbo->currval[j].Ptr;
>> -                    COPY_SZ_4V( dest, newsz, current );
>> -                    dest += newsz;
>> +                    GLfloat *current = (GLfloat
>> *)vbo->currval[j].Ptr;
>> +                    COPY_SZ_4V(dest + new_offset, sz, current);
>>                   }
>>                }
>>                else {
>> -                 GLuint sz = exec->vtx.attrsz[j];
>> -                 COPY_SZ_4V( dest, sz, data );
>> -                 dest += sz;
>> -                 data += sz;
>> +                 COPY_SZ_4V(dest + new_offset, sz, data +
>> old_offset);
>>                }
>>             }
>>          }
>> +
>> +        data += old_vtx_size;
>> +        dest += exec->vtx.vertex_size;
>>        }
>>  
>>        exec->vtx.buffer_ptr = dest;
>> diff --git a/src/mesa/vbo/vbo_exec_draw.c
>> b/src/mesa/vbo/vbo_exec_draw.c
>> index 71ac006..94aa021 100644
>> --- a/src/mesa/vbo/vbo_exec_draw.c
>> +++ b/src/mesa/vbo/vbo_exec_draw.c
>> @@ -160,7 +160,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
>>     struct vbo_exec_context *exec = &vbo->exec;
>>     struct gl_client_array *arrays = exec->vtx.arrays;
>>     const GLuint count = exec->vtx.vert_count;
>> -   const GLubyte *data = (GLubyte *) exec->vtx.buffer_map;
>>     const GLuint *map;
>>     GLuint attr;
>>     GLbitfield varying_inputs = 0x0;
>> @@ -215,6 +214,9 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
>>        const GLuint src = map[attr];
>>  
>>        if (exec->vtx.attrsz[src]) {
>> +        GLsizeiptr offset = (GLbyte *)exec->vtx.attrptr[src] -
>> +           (GLbyte *)exec->vtx.vertex;
>> +
>>           /* override the default array set above */
>>           ASSERT(attr < Elements(exec->vtx.inputs));
>>           ASSERT(attr < Elements(exec->vtx.arrays)); /* arrays[] */
>> @@ -222,17 +224,13 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
>>  
>>           if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
>>              /* a real buffer obj: Ptr is an offset, not a pointer*/
>> -            GLsizeiptr offset;
>>              assert(exec->vtx.bufferobj->Pointer);  /* buf should be
>> mapped */
>> -            offset = (GLbyte *) data -
>> -              (GLbyte *) exec->vtx.bufferobj->Pointer +
>> -              exec->vtx.bufferobj->Offset;
>>              assert(offset >= 0);
>> -            arrays[attr].Ptr = (void *) offset;
>> +            arrays[attr].Ptr = (GLubyte *)exec->vtx.bufferobj->Offset
>> + offset;
>>           }
>>           else {
>>              /* Ptr into ordinary app memory */
>> -            arrays[attr].Ptr = (void *) data;
>> +            arrays[attr].Ptr = (GLubyte *)exec->vtx.buffer_map +
>> offset;
>>           }
>>          arrays[attr].Size = exec->vtx.attrsz[src];
>>          arrays[attr].StrideB = exec->vtx.vertex_size *
>> sizeof(GLfloat);
>> @@ -245,7 +243,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
>>                                         exec->vtx.bufferobj);
>>          arrays[attr]._MaxElement = count; /* ??? */
>>  
>> -        data += exec->vtx.attrsz[src] * sizeof(GLfloat);
>>           varying_inputs |= 1 << attr;
>>        }
>>     }
>> -- 
>> 1.6.4.4
>> 
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 229 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20101102/103aac3c/attachment.pgp>


More information about the mesa-dev mailing list