[Mesa-dev] [PATCH] vbo: Avoid unnecessary copy to/from current in vertex format upgrade.

Keith Whitwell keithw at vmware.com
Tue Nov 2 04:01:07 PDT 2010


Francisco,

This looks good - my only comment is that there seem to be two distinct
changes in this patch -- the modification to VBO behaviour when adding a
new attribute being one, but the changes to vbo_exec_draw.c seem to be
an unrelated cleanup.  Is that correct?

Ordinarily I wouldn't bother pointing it out, but VBO is fairly complex
code & being able to isolate any regression to the smallest change
possible would be preferable -- would it be possible to split the patch
into two pieces & commit separately?

Also regarding testing, it makes sense to run through all the mesa-progs
demos/ and samples/ directory, as they tend to have a good selection of
odd cases for this code.

Keith


On Mon, 2010-11-01 at 13:06 -0700, Francisco Jerez wrote:
> Rebuilding the vertex format from scratch every time we see a new
> vertex attribute is rather costly, new attributes can be appended at
> the end avoiding a copy to current and then back again, and the full
> attr pointer recalculation.
> 
> In the not so likely case of an already existing attribute having its
> size increased the old behavior is preserved, this could be optimized
> more, not sure if it's worth it.
> 
> It's a modest improvement in FlightGear (that game punishes the VBO
> module pretty hard in general, framerate goes from some 46 FPS to 50
> FPS with the nouveau classic driver).
> ---
> I've run piglit before and after and apparently there are no
> regressions, Tested-by's on different hardware would be appreciated
> though.
> 
>  src/mesa/vbo/vbo_exec_api.c  |   98
> +++++++++++++++++++++++++-----------------
>  src/mesa/vbo/vbo_exec_draw.c |   13 ++---
>  2 files changed, 63 insertions(+), 48 deletions(-)
> 
> diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
> index 4988c51..9b2d59f 100644
> --- a/src/mesa/vbo/vbo_exec_api.c
> +++ b/src/mesa/vbo/vbo_exec_api.c
> @@ -220,8 +220,9 @@ static void vbo_exec_wrap_upgrade_vertex( struct
> vbo_exec_context *exec,
>     struct gl_context *ctx = exec->ctx;
>     struct vbo_context *vbo = vbo_context(ctx);
>     GLint lastcount = exec->vtx.vert_count;
> -   GLfloat *tmp;
> -   GLuint oldsz;
> +   GLfloat *old_attrptr[VBO_ATTRIB_MAX];
> +   GLuint old_vtx_size = exec->vtx.vertex_size;
> +   GLuint oldsz = exec->vtx.attrsz[attr];
>     GLuint i;
>  
>     /* Run pipeline on current vertices, copy wrapped vertices
> @@ -229,86 +230,103 @@ static void
> vbo_exec_wrap_upgrade_vertex( struct vbo_exec_context *exec,
>      */
>     vbo_exec_wrap_buffers( exec );
>  
> +   if (unlikely(exec->vtx.copied.nr)) {
> +      /* We're in the middle of a primitive, keep the old vertex
> +       * format around to be able to translate the copied vertices to
> +       * the new format.
> +       */
> +      memcpy(old_attrptr, exec->vtx.attrptr, sizeof(old_attrptr));
> +   }
>  
> -   /* Do a COPY_TO_CURRENT to ensure back-copying works for the case
> -    * when the attribute already exists in the vertex and is having
> -    * its size increased.  
> -    */
> -   vbo_exec_copy_to_current( exec );
> -
> +   if (unlikely(oldsz)) {
> +      /* Do a COPY_TO_CURRENT to ensure back-copying works for the
> +       * case when the attribute already exists in the vertex and is
> +       * having its size increased.
> +       */
> +      vbo_exec_copy_to_current( exec );
> +   }
>  
>     /* Heuristic: Attempt to isolate attributes received outside
>      * begin/end so that they don't bloat the vertices.
>      */
>     if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END &&
> -       exec->vtx.attrsz[attr] == 0 && 
> -       lastcount > 8 &&
> -       exec->vtx.vertex_size) {
> +       !oldsz && lastcount > 8 && exec->vtx.vertex_size) {
> +      vbo_exec_copy_to_current( exec );
>        reset_attrfv( exec );
>     }
>  
>     /* Fix up sizes:
>      */
> -   oldsz = exec->vtx.attrsz[attr];
>     exec->vtx.attrsz[attr] = newsz;
> -
>     exec->vtx.vertex_size += newsz - oldsz;
>     exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE -
> exec->vtx.buffer_used) / 
>                           (exec->vtx.vertex_size * sizeof(GLfloat)));
>     exec->vtx.vert_count = 0;
>     exec->vtx.buffer_ptr = exec->vtx.buffer_map;
> -   
>  
> -   /* Recalculate all the attrptr[] values
> -    */
> -   for (i = 0, tmp = exec->vtx.vertex ; i < VBO_ATTRIB_MAX ; i++) {
> -      if (exec->vtx.attrsz[i]) {
> -        exec->vtx.attrptr[i] = tmp;
> -        tmp += exec->vtx.attrsz[i];
> +   if (unlikely(oldsz)) {
> +      /* Size changed, recalculate all the attrptr[] values
> +       */
> +      GLfloat *tmp = exec->vtx.vertex;
> +
> +      for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) {
> +        if (exec->vtx.attrsz[i]) {
> +           exec->vtx.attrptr[i] = tmp;
> +           tmp += exec->vtx.attrsz[i];
> +        }
> +        else
> +           exec->vtx.attrptr[i] = NULL; /* will not be dereferenced
> */
>        }
> -      else 
> -        exec->vtx.attrptr[i] = NULL; /* will not be dereferenced */
> -   }
>  
> -   /* Copy from current to repopulate the vertex with correct values.
> -   vbo_exec_copy_from_current( exec );
> +      /* Copy from current to repopulate the vertex with correct
> +       * values.
> +       */
> +      vbo_exec_copy_from_current( exec );
> +
> +   } else {
> +      /* Just have to append the new attribute at the end */
> +      exec->vtx.attrptr[attr] = exec->vtx.vertex +
> +        exec->vtx.vertex_size - newsz;
> +   }
>  
>     /* Replay stored vertices to translate them
>      * to new format here.
>      *
>      * -- No need to replay - just copy piecewise
>      */
> -   if (exec->vtx.copied.nr)
> -   {
> +   if (unlikely(exec->vtx.copied.nr)) {
>        GLfloat *data = exec->vtx.copied.buffer;
>        GLfloat *dest = exec->vtx.buffer_ptr;
>        GLuint j;
>  
>        assert(exec->vtx.buffer_ptr == exec->vtx.buffer_map);
> -      
> +
>        for (i = 0 ; i < exec->vtx.copied.nr ; i++) {
>          for (j = 0 ; j < VBO_ATTRIB_MAX ; j++) {
> -           if (exec->vtx.attrsz[j]) {
> +           GLuint sz = exec->vtx.attrsz[j];
> +
> +           if (sz) {
> +              GLint old_offset = old_attrptr[j] - exec->vtx.vertex;
> +              GLint new_offset = exec->vtx.attrptr[j] -
> exec->vtx.vertex;
> +
>                if (j == attr) {
>                   if (oldsz) {
> -                    COPY_CLEAN_4V( dest, oldsz, data );
> -                    data += oldsz;
> -                    dest += newsz;
> +                    GLfloat tmp[4];
> +                    COPY_CLEAN_4V(tmp, oldsz, data + old_offset);
> +                    COPY_SZ_4V(dest + new_offset, newsz, tmp);
>                   } else {
> -                    const GLfloat *current = (const GLfloat
> *)vbo->currval[j].Ptr;
> -                    COPY_SZ_4V( dest, newsz, current );
> -                    dest += newsz;
> +                    GLfloat *current = (GLfloat
> *)vbo->currval[j].Ptr;
> +                    COPY_SZ_4V(dest + new_offset, sz, current);
>                   }
>                }
>                else {
> -                 GLuint sz = exec->vtx.attrsz[j];
> -                 COPY_SZ_4V( dest, sz, data );
> -                 dest += sz;
> -                 data += sz;
> +                 COPY_SZ_4V(dest + new_offset, sz, data +
> old_offset);
>                }
>             }
>          }
> +
> +        data += old_vtx_size;
> +        dest += exec->vtx.vertex_size;
>        }
>  
>        exec->vtx.buffer_ptr = dest;
> diff --git a/src/mesa/vbo/vbo_exec_draw.c
> b/src/mesa/vbo/vbo_exec_draw.c
> index 71ac006..94aa021 100644
> --- a/src/mesa/vbo/vbo_exec_draw.c
> +++ b/src/mesa/vbo/vbo_exec_draw.c
> @@ -160,7 +160,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
>     struct vbo_exec_context *exec = &vbo->exec;
>     struct gl_client_array *arrays = exec->vtx.arrays;
>     const GLuint count = exec->vtx.vert_count;
> -   const GLubyte *data = (GLubyte *) exec->vtx.buffer_map;
>     const GLuint *map;
>     GLuint attr;
>     GLbitfield varying_inputs = 0x0;
> @@ -215,6 +214,9 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
>        const GLuint src = map[attr];
>  
>        if (exec->vtx.attrsz[src]) {
> +        GLsizeiptr offset = (GLbyte *)exec->vtx.attrptr[src] -
> +           (GLbyte *)exec->vtx.vertex;
> +
>           /* override the default array set above */
>           ASSERT(attr < Elements(exec->vtx.inputs));
>           ASSERT(attr < Elements(exec->vtx.arrays)); /* arrays[] */
> @@ -222,17 +224,13 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
>  
>           if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
>              /* a real buffer obj: Ptr is an offset, not a pointer*/
> -            GLsizeiptr offset;
>              assert(exec->vtx.bufferobj->Pointer);  /* buf should be
> mapped */
> -            offset = (GLbyte *) data -
> -              (GLbyte *) exec->vtx.bufferobj->Pointer +
> -              exec->vtx.bufferobj->Offset;
>              assert(offset >= 0);
> -            arrays[attr].Ptr = (void *) offset;
> +            arrays[attr].Ptr = (GLubyte *)exec->vtx.bufferobj->Offset
> + offset;
>           }
>           else {
>              /* Ptr into ordinary app memory */
> -            arrays[attr].Ptr = (void *) data;
> +            arrays[attr].Ptr = (GLubyte *)exec->vtx.buffer_map +
> offset;
>           }
>          arrays[attr].Size = exec->vtx.attrsz[src];
>          arrays[attr].StrideB = exec->vtx.vertex_size *
> sizeof(GLfloat);
> @@ -245,7 +243,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
>                                         exec->vtx.bufferobj);
>          arrays[attr]._MaxElement = count; /* ??? */
>  
> -        data += exec->vtx.attrsz[src] * sizeof(GLfloat);
>           varying_inputs |= 1 << attr;
>        }
>     }
> -- 
> 1.6.4.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev 



More information about the mesa-dev mailing list