[Mesa-dev] [PATCH] vbo: Avoid unnecessary copy to/from current in vertex format upgrade.
Keith Whitwell
keithw at vmware.com
Tue Nov 2 04:01:07 PDT 2010
Francisco,
This looks good - my only comment is that there seem to be two distinct
changes in this patch -- the modification to VBO behaviour when adding a
new attribute being one, but the changes to vbo_exec_draw.c seem to be
an unrelated cleanup. Is that correct?
Ordinarily I wouldn't bother pointing it out, but VBO is fairly complex
code & being able to isolate any regression to the smallest change
possible would be preferable -- would it be possible to split the patch
into two pieces & commit separately?
Also regarding testing, it makes sense to run through all the mesa-progs
demos/ and samples/ directory, as they tend to have a good selection of
odd cases for this code.
Keith
On Mon, 2010-11-01 at 13:06 -0700, Francisco Jerez wrote:
> Rebuilding the vertex format from scratch every time we see a new
> vertex attribute is rather costly, new attributes can be appended at
> the end avoiding a copy to current and then back again, and the full
> attr pointer recalculation.
>
> In the not so likely case of an already existing attribute having its
> size increased the old behavior is preserved, this could be optimized
> more, not sure if it's worth it.
>
> It's a modest improvement in FlightGear (that game punishes the VBO
> module pretty hard in general, framerate goes from some 46 FPS to 50
> FPS with the nouveau classic driver).
> ---
> I've run piglit before and after and apparently there are no
> regressions, Tested-by's on different hardware would be appreciated
> though.
>
> src/mesa/vbo/vbo_exec_api.c | 98
> +++++++++++++++++++++++++-----------------
> src/mesa/vbo/vbo_exec_draw.c | 13 ++---
> 2 files changed, 63 insertions(+), 48 deletions(-)
>
> diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
> index 4988c51..9b2d59f 100644
> --- a/src/mesa/vbo/vbo_exec_api.c
> +++ b/src/mesa/vbo/vbo_exec_api.c
> @@ -220,8 +220,9 @@ static void vbo_exec_wrap_upgrade_vertex( struct
> vbo_exec_context *exec,
> struct gl_context *ctx = exec->ctx;
> struct vbo_context *vbo = vbo_context(ctx);
> GLint lastcount = exec->vtx.vert_count;
> - GLfloat *tmp;
> - GLuint oldsz;
> + GLfloat *old_attrptr[VBO_ATTRIB_MAX];
> + GLuint old_vtx_size = exec->vtx.vertex_size;
> + GLuint oldsz = exec->vtx.attrsz[attr];
> GLuint i;
>
> /* Run pipeline on current vertices, copy wrapped vertices
> @@ -229,86 +230,103 @@ static void
> vbo_exec_wrap_upgrade_vertex( struct vbo_exec_context *exec,
> */
> vbo_exec_wrap_buffers( exec );
>
> + if (unlikely(exec->vtx.copied.nr)) {
> + /* We're in the middle of a primitive, keep the old vertex
> + * format around to be able to translate the copied vertices to
> + * the new format.
> + */
> + memcpy(old_attrptr, exec->vtx.attrptr, sizeof(old_attrptr));
> + }
>
> - /* Do a COPY_TO_CURRENT to ensure back-copying works for the case
> - * when the attribute already exists in the vertex and is having
> - * its size increased.
> - */
> - vbo_exec_copy_to_current( exec );
> -
> + if (unlikely(oldsz)) {
> + /* Do a COPY_TO_CURRENT to ensure back-copying works for the
> + * case when the attribute already exists in the vertex and is
> + * having its size increased.
> + */
> + vbo_exec_copy_to_current( exec );
> + }
>
> /* Heuristic: Attempt to isolate attributes received outside
> * begin/end so that they don't bloat the vertices.
> */
> if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END &&
> - exec->vtx.attrsz[attr] == 0 &&
> - lastcount > 8 &&
> - exec->vtx.vertex_size) {
> + !oldsz && lastcount > 8 && exec->vtx.vertex_size) {
> + vbo_exec_copy_to_current( exec );
> reset_attrfv( exec );
> }
>
> /* Fix up sizes:
> */
> - oldsz = exec->vtx.attrsz[attr];
> exec->vtx.attrsz[attr] = newsz;
> -
> exec->vtx.vertex_size += newsz - oldsz;
> exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE -
> exec->vtx.buffer_used) /
> (exec->vtx.vertex_size * sizeof(GLfloat)));
> exec->vtx.vert_count = 0;
> exec->vtx.buffer_ptr = exec->vtx.buffer_map;
> -
>
> - /* Recalculate all the attrptr[] values
> - */
> - for (i = 0, tmp = exec->vtx.vertex ; i < VBO_ATTRIB_MAX ; i++) {
> - if (exec->vtx.attrsz[i]) {
> - exec->vtx.attrptr[i] = tmp;
> - tmp += exec->vtx.attrsz[i];
> + if (unlikely(oldsz)) {
> + /* Size changed, recalculate all the attrptr[] values
> + */
> + GLfloat *tmp = exec->vtx.vertex;
> +
> + for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) {
> + if (exec->vtx.attrsz[i]) {
> + exec->vtx.attrptr[i] = tmp;
> + tmp += exec->vtx.attrsz[i];
> + }
> + else
> + exec->vtx.attrptr[i] = NULL; /* will not be dereferenced
> */
> }
> - else
> - exec->vtx.attrptr[i] = NULL; /* will not be dereferenced */
> - }
>
> - /* Copy from current to repopulate the vertex with correct values.
> - vbo_exec_copy_from_current( exec );
> + /* Copy from current to repopulate the vertex with correct
> + * values.
> + */
> + vbo_exec_copy_from_current( exec );
> +
> + } else {
> + /* Just have to append the new attribute at the end */
> + exec->vtx.attrptr[attr] = exec->vtx.vertex +
> + exec->vtx.vertex_size - newsz;
> + }
>
> /* Replay stored vertices to translate them
> * to new format here.
> *
> * -- No need to replay - just copy piecewise
> */
> - if (exec->vtx.copied.nr)
> - {
> + if (unlikely(exec->vtx.copied.nr)) {
> GLfloat *data = exec->vtx.copied.buffer;
> GLfloat *dest = exec->vtx.buffer_ptr;
> GLuint j;
>
> assert(exec->vtx.buffer_ptr == exec->vtx.buffer_map);
> -
> +
> for (i = 0 ; i < exec->vtx.copied.nr ; i++) {
> for (j = 0 ; j < VBO_ATTRIB_MAX ; j++) {
> - if (exec->vtx.attrsz[j]) {
> + GLuint sz = exec->vtx.attrsz[j];
> +
> + if (sz) {
> + GLint old_offset = old_attrptr[j] - exec->vtx.vertex;
> + GLint new_offset = exec->vtx.attrptr[j] -
> exec->vtx.vertex;
> +
> if (j == attr) {
> if (oldsz) {
> - COPY_CLEAN_4V( dest, oldsz, data );
> - data += oldsz;
> - dest += newsz;
> + GLfloat tmp[4];
> + COPY_CLEAN_4V(tmp, oldsz, data + old_offset);
> + COPY_SZ_4V(dest + new_offset, newsz, tmp);
> } else {
> - const GLfloat *current = (const GLfloat
> *)vbo->currval[j].Ptr;
> - COPY_SZ_4V( dest, newsz, current );
> - dest += newsz;
> + GLfloat *current = (GLfloat
> *)vbo->currval[j].Ptr;
> + COPY_SZ_4V(dest + new_offset, sz, current);
> }
> }
> else {
> - GLuint sz = exec->vtx.attrsz[j];
> - COPY_SZ_4V( dest, sz, data );
> - dest += sz;
> - data += sz;
> + COPY_SZ_4V(dest + new_offset, sz, data +
> old_offset);
> }
> }
> }
> +
> + data += old_vtx_size;
> + dest += exec->vtx.vertex_size;
> }
>
> exec->vtx.buffer_ptr = dest;
> diff --git a/src/mesa/vbo/vbo_exec_draw.c
> b/src/mesa/vbo/vbo_exec_draw.c
> index 71ac006..94aa021 100644
> --- a/src/mesa/vbo/vbo_exec_draw.c
> +++ b/src/mesa/vbo/vbo_exec_draw.c
> @@ -160,7 +160,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
> struct vbo_exec_context *exec = &vbo->exec;
> struct gl_client_array *arrays = exec->vtx.arrays;
> const GLuint count = exec->vtx.vert_count;
> - const GLubyte *data = (GLubyte *) exec->vtx.buffer_map;
> const GLuint *map;
> GLuint attr;
> GLbitfield varying_inputs = 0x0;
> @@ -215,6 +214,9 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
> const GLuint src = map[attr];
>
> if (exec->vtx.attrsz[src]) {
> + GLsizeiptr offset = (GLbyte *)exec->vtx.attrptr[src] -
> + (GLbyte *)exec->vtx.vertex;
> +
> /* override the default array set above */
> ASSERT(attr < Elements(exec->vtx.inputs));
> ASSERT(attr < Elements(exec->vtx.arrays)); /* arrays[] */
> @@ -222,17 +224,13 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
>
> if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
> /* a real buffer obj: Ptr is an offset, not a pointer*/
> - GLsizeiptr offset;
> assert(exec->vtx.bufferobj->Pointer); /* buf should be
> mapped */
> - offset = (GLbyte *) data -
> - (GLbyte *) exec->vtx.bufferobj->Pointer +
> - exec->vtx.bufferobj->Offset;
> assert(offset >= 0);
> - arrays[attr].Ptr = (void *) offset;
> + arrays[attr].Ptr = (GLubyte *)exec->vtx.bufferobj->Offset
> + offset;
> }
> else {
> /* Ptr into ordinary app memory */
> - arrays[attr].Ptr = (void *) data;
> + arrays[attr].Ptr = (GLubyte *)exec->vtx.buffer_map +
> offset;
> }
> arrays[attr].Size = exec->vtx.attrsz[src];
> arrays[attr].StrideB = exec->vtx.vertex_size *
> sizeof(GLfloat);
> @@ -245,7 +243,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
> exec->vtx.bufferobj);
> arrays[attr]._MaxElement = count; /* ??? */
>
> - data += exec->vtx.attrsz[src] * sizeof(GLfloat);
> varying_inputs |= 1 << attr;
> }
> }
> --
> 1.6.4.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list