Mesa (master): vbo/dlist: implement primitive merging
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Dec 8 09:42:19 UTC 2020
Module: Mesa
Branch: master
Commit: 310991415ee1f5bcdd91460f00205190e8e0c2d9
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=310991415ee1f5bcdd91460f00205190e8e0c2d9
Author: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Date: Thu Oct 8 14:49:29 2020 +0200
vbo/dlist: implement primitive merging
Merge consecutive primitives using the same mode while constructing the index buffer.
This improves performance a lot (x3 - x10) SPECviewperf13 snx-03 test by reducing the
number of draw calls per frame.
Here are some numbers for 4 of the tests:
| Num draw calls | GPU-load |
------|----------------|-----------------|
| Before | After | Before | After |
------|--------|-------|---------|-------|
test1 | 390k | 16k | 68% | 90% |
test2 | 370k | 16k | 40% | 90% |
test3 | 1.2M | 35k | 38% | 78% |
test10| 3.5M | 35k | 36% | 58% |
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7078>
---
src/mesa/vbo/vbo_save_api.c | 89 +++++++++++++++++++++++++++++++++++++--------
1 file changed, 74 insertions(+), 15 deletions(-)
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index ac02674ced5..e1ddbaf21f1 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -605,25 +605,67 @@ compile_vertex_list(struct gl_context *ctx)
int end = node->prims[node->prim_count - 1].start +
node->prims[node->prim_count - 1].count;
int total_vert_count = end - node->prims[0].start;
- int max_indices_count = total_vert_count * 2;
+ /* Estimate for the worst case: all prims are line strips (the +1 is because
+ * wrap_buffers may call use but the last primitive may not be complete) */
+ int max_indices_count = MAX2(total_vert_count * 2 - (node->prim_count * 2) + 1,
+ total_vert_count);
int size = max_indices_count * sizeof(uint32_t);
uint32_t* indices = (uint32_t*) malloc(size);
uint32_t max_index = 0, min_index = 0xFFFFFFFF;
int idx = 0;
+ int last_valid_prim = -1;
/* Construct indices array. */
for (unsigned i = 0; i < node->prim_count; i++) {
assert(node->prims[i].basevertex == 0);
+ GLubyte mode = node->prims[i].mode;
+
int vertex_count = node->prims[i].count;
+ if (!vertex_count) {
+ continue;
+ }
+
+ /* Line strips get converted to lines */
+ if (mode == GL_LINE_STRIP)
+ mode = GL_LINES;
+
+ /* If 2 consecutive prims use the same mode => merge them. */
+ bool merge_prims = last_valid_prim >= 0 &&
+ mode == node->prims[last_valid_prim].mode &&
+ mode != GL_LINE_LOOP && mode != GL_TRIANGLE_FAN &&
+ mode != GL_QUAD_STRIP && mode != GL_POLYGON &&
+ mode != GL_PATCHES;
+
+ /* To be able to merge consecutive triangle strips we need to insert
+ * a degenerate triangle.
+ */
+ if (merge_prims &&
+ mode == GL_TRIANGLE_STRIP) {
+ /* Insert a degenerate triangle */
+ assert(node->prims[last_valid_prim].mode == GL_TRIANGLE_STRIP);
+ unsigned tri_count = node->prims[last_valid_prim].count - 2;
+
+ indices[idx] = indices[idx - 1];
+ indices[idx + 1] = node->prims[i].start;
+ idx += 2;
+ node->prims[last_valid_prim].count += 2;
+
+ if (tri_count % 2) {
+ /* Add another index to preserve winding order */
+ indices[idx++] = node->prims[i].start;
+ node->prims[last_valid_prim].count++;
+ }
+ }
+
int start = idx;
/* Convert line strips to lines if it'll allow if the previous
- * prim mode is GL_LINES or if the next primitive mode is
- * GL_LINES or GL_LINE_LOOP.
+ * prim mode is GL_LINES (so merge_prims is true) or if the next
+ * primitive mode is GL_LINES or GL_LINE_LOOP.
*/
if (node->prims[i].mode == GL_LINE_STRIP &&
- ((i > 0 && node->prims[i - 1].mode == GL_LINES) ||
+ (merge_prims ||
(i < node->prim_count - 1 &&
(node->prims[i + 1].mode == GL_LINE_STRIP ||
node->prims[i + 1].mode == GL_LINES)))) {
@@ -635,7 +677,7 @@ compile_vertex_list(struct gl_context *ctx)
node->prims[i].count++;
}
}
- node->prims[i].mode = GL_LINES;
+ node->prims[i].mode = mode;
} else {
for (unsigned j = 0; j < vertex_count; j++) {
indices[idx++] = node->prims[i].start + j;
@@ -645,15 +687,27 @@ compile_vertex_list(struct gl_context *ctx)
min_index = MIN2(min_index, indices[start]);
max_index = MAX2(max_index, indices[idx - 1]);
- node->prims[i].start = start;
+ if (merge_prims) {
+ /* Update vertex count. */
+ node->prims[last_valid_prim].count += idx - start;
+ } else {
+ /* Keep this primitive */
+ last_valid_prim += 1;
+ assert(last_valid_prim <= i);
+ node->prims[i].start = start;
+ node->prims[last_valid_prim] = node->prims[i];
+ }
}
+ if (idx == 0)
+ goto skip_node;
+
assert(idx <= max_indices_count);
+ node->prim_count = last_valid_prim + 1;
node->ib.ptr = NULL;
node->ib.count = idx;
node->ib.index_size_shift = (GL_UNSIGNED_INT - GL_UNSIGNED_BYTE) >> 1;
-
node->min_index = min_index;
node->max_index = max_index;
@@ -663,14 +717,19 @@ compile_vertex_list(struct gl_context *ctx)
idx * sizeof(uint32_t), indices,
GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT,
node->ib.obj);
- assert(success);
- if (!success) {
- node->min_index = node->max_index = 0;
- ctx->Driver.DeleteBuffer(ctx, node->ib.obj);
- node->ib.obj = NULL;
- node->vertex_count = 0;
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "IB allocation");
- }
+
+ if (success)
+ goto out;
+
+ ctx->Driver.DeleteBuffer(ctx, node->ib.obj);
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "IB allocation");
+
+ skip_node:
+ node->ib.obj = NULL;
+ node->vertex_count = 0;
+ node->prim_count = 0;
+
+ out:
free(indices);
} else {
node->ib.obj = NULL;
More information about the mesa-commit
mailing list