[Mesa-dev] [PATCH 5/6] st/mesa: set vertex arrays state only when necessary
Marek Olšák
maraeo at gmail.com
Sat Feb 12 11:04:13 PST 2011
The vertex arrays state should be set only when (_NEW_ARRAY | _NEW_PROGRAM)
is dirty. This assumes user buffer content is mutable, which will be
sorted out in the next commit. The following usage case should be much faster
now:
for (i = 0; i < 1000; i++) {
glDrawElements(...);
}
Or even:
for (i = 0; i < 1000; i++) {
glSomeStateChangeOtherThanArraysOrProgram(...);
glDrawElements(...);
}
The performance increase from this may be significant in some apps and
negligible in others. It is especially noticable in the Torcs game (r300g):
Before: 15.4 fps
After: 20 fps
Also less looping over attribs in st_draw_vbo yields slight speed-up
in apps with lots of glDraw* calls.
---
src/mesa/state_tracker/st_draw.c | 292 ++++++++++++++------------------------
1 files changed, 110 insertions(+), 182 deletions(-)
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 4cbcecf..34f75a3 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -243,13 +243,11 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
static GLboolean
is_interleaved_arrays(const struct st_vertex_program *vp,
const struct st_vp_variant *vpv,
- const struct gl_client_array **arrays,
- GLboolean *userSpace)
+ const struct gl_client_array **arrays)
{
GLuint attr;
const struct gl_buffer_object *firstBufObj = NULL;
GLint firstStride = -1;
- GLuint num_client_arrays = 0;
const GLubyte *client_addr = NULL;
for (attr = 0; attr < vpv->num_inputs; attr++) {
@@ -263,9 +261,8 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
else if (firstStride != stride) {
return GL_FALSE;
}
-
+
if (!bufObj || !bufObj->Name) {
- num_client_arrays++;
/* Try to detect if the client-space arrays are
* "close" to each other.
*/
@@ -285,57 +282,11 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
}
}
- *userSpace = (num_client_arrays == vpv->num_inputs);
- /* debug_printf("user space: %s (%d arrays, %d inputs)\n",
- (int)*userSpace ? "Yes" : "No", num_client_arrays, vp->num_inputs); */
-
return GL_TRUE;
}
/**
- * Compute the memory range occupied by the arrays.
- */
-static void
-get_arrays_bounds(const struct st_vertex_program *vp,
- const struct st_vp_variant *vpv,
- const struct gl_client_array **arrays,
- GLuint max_index,
- const GLubyte **low, const GLubyte **high)
-{
- const GLubyte *low_addr = NULL;
- const GLubyte *high_addr = NULL;
- GLuint attr;
-
- /* debug_printf("get_arrays_bounds: Handling %u attrs\n", vpv->num_inputs); */
-
- for (attr = 0; attr < vpv->num_inputs; attr++) {
- const GLuint mesaAttr = vp->index_to_input[attr];
- const GLint stride = arrays[mesaAttr]->StrideB;
- const GLubyte *start = arrays[mesaAttr]->Ptr;
- const unsigned sz = (arrays[mesaAttr]->Size *
- _mesa_sizeof_type(arrays[mesaAttr]->Type));
- const GLubyte *end = start + (max_index * stride) + sz;
-
- /* debug_printf("attr %u: stride %d size %u start %p end %p\n",
- attr, stride, sz, start, end); */
-
- if (attr == 0) {
- low_addr = start;
- high_addr = end;
- }
- else {
- low_addr = MIN2(low_addr, start);
- high_addr = MAX2(high_addr, end);
- }
- }
-
- *low = low_addr;
- *high = high_addr;
-}
-
-
-/**
* Set up for drawing interleaved arrays that all live in one VBO
* or all live in user space.
* \param vbuffer returns vertex buffer info
@@ -346,15 +297,21 @@ setup_interleaved_attribs(struct gl_context *ctx,
const struct st_vertex_program *vp,
const struct st_vp_variant *vpv,
const struct gl_client_array **arrays,
- GLuint max_index,
- GLboolean userSpace,
struct pipe_vertex_buffer *vbuffer,
- struct pipe_vertex_element velements[])
+ struct pipe_vertex_element velements[],
+ unsigned max_index)
{
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
GLuint attr;
- const GLubyte *offset0 = NULL;
+ const GLubyte *low_addr = NULL;
+
+ /* Find the lowest address. */
+ for (attr = 0; attr < vpv->num_inputs; attr++) {
+ const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
+
+ low_addr = !low_addr ? start : MIN2(low_addr, start);
+ }
for (attr = 0; attr < vpv->num_inputs; attr++) {
const GLuint mesaAttr = vp->index_to_input[attr];
@@ -362,39 +319,23 @@ setup_interleaved_attribs(struct gl_context *ctx,
struct st_buffer_object *stobj = st_buffer_object(bufobj);
GLsizei stride = arrays[mesaAttr]->StrideB;
- /*printf("stobj %u = %p\n", attr, (void*)stobj);*/
-
if (attr == 0) {
- const GLubyte *low, *high;
-
- get_arrays_bounds(vp, vpv, arrays, max_index, &low, &high);
- /* debug_printf("buffer range: %p %p range %d max index %u\n",
- low, high, high - low, max_index); */
-
- offset0 = low;
- if (userSpace) {
+ if (bufobj && bufobj->Name) {
+ vbuffer->buffer = NULL;
+ pipe_resource_reference(&vbuffer->buffer, stobj->buffer);
+ vbuffer->buffer_offset = pointer_to_offset(low_addr);
+ } else {
vbuffer->buffer =
- pipe_user_buffer_create(pipe->screen, (void *) low, high - low,
+ pipe_user_buffer_create(pipe->screen, (void*)low_addr,
+ stride * (max_index + 1),
PIPE_BIND_VERTEX_BUFFER);
vbuffer->buffer_offset = 0;
}
- else {
- vbuffer->buffer = NULL;
- pipe_resource_reference(&vbuffer->buffer, stobj->buffer);
- vbuffer->buffer_offset = pointer_to_offset(low);
- }
vbuffer->stride = stride; /* in bytes */
}
- /*
- if (arrays[mesaAttr]->InstanceDivisor)
- vbuffer[attr].max_index = arrays[mesaAttr]->_MaxElement;
- else
- vbuffer[attr].max_index = max_index;
- */
-
velements[attr].src_offset =
- (unsigned) (arrays[mesaAttr]->Ptr - offset0);
+ (unsigned) (arrays[mesaAttr]->Ptr - low_addr);
velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
velements[attr].vertex_buffer_index = 0;
velements[attr].src_format =
@@ -418,10 +359,9 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
const struct st_vertex_program *vp,
const struct st_vp_variant *vpv,
const struct gl_client_array **arrays,
- GLuint max_index,
- GLboolean *userSpace,
struct pipe_vertex_buffer vbuffer[],
- struct pipe_vertex_element velements[])
+ struct pipe_vertex_element velements[],
+ unsigned max_index)
{
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
@@ -432,8 +372,6 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
GLsizei stride = arrays[mesaAttr]->StrideB;
- *userSpace = GL_FALSE;
-
if (bufobj && bufobj->Name) {
/* Attribute data is in a VBO.
* Recall that for VBOs, the gl_client_array->Ptr field is
@@ -441,37 +379,23 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
*/
struct st_buffer_object *stobj = st_buffer_object(bufobj);
assert(stobj->buffer);
- /*printf("stobj %u = %p\n", attr, (void*) stobj);*/
vbuffer[attr].buffer = NULL;
pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer);
vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr);
}
else {
- /* attribute data is in user-space memory, not a VBO */
- uint bytes;
- /*printf("user-space array %d stride %d\n", attr, stride);*/
-
- *userSpace = GL_TRUE;
-
/* wrap user data */
if (arrays[mesaAttr]->Ptr) {
- /* user's vertex array */
- if (arrays[mesaAttr]->StrideB) {
- bytes = arrays[mesaAttr]->StrideB * (max_index + 1);
- }
- else {
- bytes = arrays[mesaAttr]->Size
- * _mesa_sizeof_type(arrays[mesaAttr]->Type);
- }
vbuffer[attr].buffer =
pipe_user_buffer_create(pipe->screen,
- (void *) arrays[mesaAttr]->Ptr, bytes,
+ (void *) arrays[mesaAttr]->Ptr,
+ stride * (max_index + 1),
PIPE_BIND_VERTEX_BUFFER);
}
else {
/* no array, use ctx->Current.Attrib[] value */
- bytes = sizeof(ctx->Current.Attrib[0]);
+ uint bytes = sizeof(ctx->Current.Attrib[0]);
vbuffer[attr].buffer =
pipe_user_buffer_create(pipe->screen,
(void *) ctx->Current.Attrib[mesaAttr],
@@ -483,8 +407,6 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
vbuffer[attr].buffer_offset = 0;
}
- assert(velements[attr].src_offset <= 2048); /* 11-bit field */
-
/* common-case setup */
vbuffer[attr].stride = stride; /* in bytes */
@@ -604,6 +526,54 @@ translate_prim(const struct gl_context *ctx, unsigned prim)
}
+static void
+st_validate_varrays(struct gl_context *ctx,
+ const struct gl_client_array **arrays,
+ unsigned max_index)
+{
+ struct st_context *st = st_context(ctx);
+ const struct st_vertex_program *vp;
+ const struct st_vp_variant *vpv;
+ struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
+ struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
+ unsigned num_vbuffers, num_velements;
+ GLuint attr;
+
+ /* must get these after state validation! */
+ vp = st->vp;
+ vpv = st->vp_variant;
+
+ memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
+ /*
+ * Setup the vbuffer[] and velements[] arrays.
+ */
+ if (is_interleaved_arrays(vp, vpv, arrays)) {
+ setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements,
+ max_index);
+ num_vbuffers = 1;
+ num_velements = vpv->num_inputs;
+ if (num_velements == 0)
+ num_vbuffers = 0;
+ }
+ else {
+ setup_non_interleaved_attribs(ctx, vp, vpv, arrays,
+ vbuffer, velements, max_index);
+ num_vbuffers = vpv->num_inputs;
+ num_velements = vpv->num_inputs;
+ }
+
+ cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer);
+ cso_set_vertex_elements(st->cso_context, num_velements, velements);
+
+ /* unreference buffers (frees wrapped user-space buffer objects)
+ * This is OK, because the pipe driver should reference buffers by itself
+ * in set_vertex_buffers. */
+ for (attr = 0; attr < num_vbuffers; attr++) {
+ pipe_resource_reference(&vbuffer[attr].buffer, NULL);
+ assert(!vbuffer[attr].buffer);
+ }
+}
+
/**
* This function gets plugged into the VBO module and is called when
@@ -622,90 +592,59 @@ st_draw_vbo(struct gl_context *ctx,
{
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
- const struct st_vertex_program *vp;
- const struct st_vp_variant *vpv;
- struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
- GLuint attr;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
- unsigned num_vbuffers, num_velements;
struct pipe_index_buffer ibuffer;
- GLboolean userSpace = GL_FALSE;
- GLboolean vertDataEdgeFlags;
struct pipe_draw_info info;
unsigned i;
+ GLboolean new_array =
+ st->dirty.st && (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM)) != 0;
/* Mesa core state should have been validated already */
assert(ctx->NewState == 0x0);
- /* Gallium probably doesn't want this in some cases. */
- if (!index_bounds_valid)
- if (!vbo_all_varyings_in_vbos(arrays))
- vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
-
- /* sanity check for pointer arithmetic below */
- assert(sizeof(arrays[0]->Ptr[0]) == 1);
-
- vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj &&
- arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name;
- if (vertDataEdgeFlags != st->vertdata_edgeflags) {
- st->vertdata_edgeflags = vertDataEdgeFlags;
- st->dirty.st |= ST_NEW_EDGEFLAGS_DATA;
+ if (ib) {
+ /* Gallium probably doesn't want this in some cases. */
+ if (!index_bounds_valid)
+ if (!vbo_all_varyings_in_vbos(arrays))
+ vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
+ } else {
+ /* Get min/max index for non-indexed drawing. */
+ min_index = ~0;
+ max_index = 0;
+
+ for (i = 0; i < nr_prims; i++) {
+ min_index = MIN2(min_index, prims[i].start);
+ max_index = MAX2(max_index, prims[i].start + prims[i].count - 1);
+ }
}
- st_validate_state(st);
+ /* Validate state. */
+ if (st->dirty.st) {
+ GLboolean vertDataEdgeFlags;
- /* must get these after state validation! */
- vp = st->vp;
- vpv = st->vp_variant;
+ /* sanity check for pointer arithmetic below */
+ assert(sizeof(arrays[0]->Ptr[0]) == 1);
-#if 0
- if (MESA_VERBOSE & VERBOSE_GLSL) {
- check_uniforms(ctx);
- }
-#else
- (void) check_uniforms;
-#endif
+ vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj &&
+ arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name;
+ if (vertDataEdgeFlags != st->vertdata_edgeflags) {
+ st->vertdata_edgeflags = vertDataEdgeFlags;
+ st->dirty.st |= ST_NEW_EDGEFLAGS_DATA;
+ }
- memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
- /*
- * Setup the vbuffer[] and velements[] arrays.
- */
- if (is_interleaved_arrays(vp, vpv, arrays, &userSpace)) {
- /*printf("Draw interleaved\n");*/
- setup_interleaved_attribs(ctx, vp, vpv, arrays, max_index, userSpace,
- vbuffer, velements);
- num_vbuffers = 1;
- num_velements = vpv->num_inputs;
- if (num_velements == 0)
- num_vbuffers = 0;
- }
- else {
- /*printf("Draw non-interleaved\n");*/
- setup_non_interleaved_attribs(ctx, vp, vpv, arrays, max_index,
- &userSpace, vbuffer, velements);
- num_vbuffers = vpv->num_inputs;
- num_velements = vpv->num_inputs;
- }
+ st_validate_state(st);
-#if 0
- {
- GLuint i;
- for (i = 0; i < num_vbuffers; i++) {
- printf("buffers[%d].stride = %u\n", i, vbuffer[i].stride);
- printf("buffers[%d].max_index = %u\n", i, vbuffer[i].max_index);
- printf("buffers[%d].buffer_offset = %u\n", i, vbuffer[i].buffer_offset);
- printf("buffers[%d].buffer = %p\n", i, (void*) vbuffer[i].buffer);
+ if (new_array) {
+ st_validate_varrays(ctx, arrays, max_index);
}
- for (i = 0; i < num_velements; i++) {
- printf("vlements[%d].vbuffer_index = %u\n", i, velements[i].vertex_buffer_index);
- printf("vlements[%d].src_offset = %u\n", i, velements[i].src_offset);
- printf("vlements[%d].format = %s\n", i, util_format_name(velements[i].src_format));
+
+#if 0
+ if (MESA_VERBOSE & VERBOSE_GLSL) {
+ check_uniforms(ctx);
}
- }
+#else
+ (void) check_uniforms;
#endif
-
- cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer);
- cso_set_vertex_elements(st->cso_context, num_velements, velements);
+ }
setup_index_buffer(ctx, ib, &ibuffer);
pipe->set_index_buffer(pipe, &ibuffer);
@@ -739,17 +678,6 @@ st_draw_vbo(struct gl_context *ctx,
}
pipe_resource_reference(&ibuffer.buffer, NULL);
-
- /* unreference buffers (frees wrapped user-space buffer objects) */
- for (attr = 0; attr < num_vbuffers; attr++) {
- pipe_resource_reference(&vbuffer[attr].buffer, NULL);
- assert(!vbuffer[attr].buffer);
- }
-
- if (userSpace)
- {
- pipe->set_vertex_buffers(pipe, 0, NULL);
- }
}
--
1.7.1
More information about the mesa-dev
mailing list