[Mesa-dev] [PATCH] i965: Don't check for draw-time errors that cannot occur in core profile
Ian Romanick
idr at freedesktop.org
Mon Aug 10 10:12:50 PDT 2015
From: Ian Romanick <ian.d.romanick at intel.com>
On many CPU-limited applications, this is *the* hot path. The idea is
to generate per-API versions of brw_draw_prims that elide some checks.
This patch removes render-mode and "is everything in VBOs" checks from
core-profile contexts.
On my IVB laptop (which may have experienced thermal throttling):
Gl32Batch7: 3.70955% +/- 1.11344%
OglBatch7: 1.04398% +/- 0.772788%
These are the same benchmark, but Gl32Batch7 uses an OpenGL 3.2 Core
Profile context.
v2: Reorder parameters to brw_try_draw_prims to reduce data shuffling.
v3: Pass a gl_api into draw_prims instead of a must-be-core-profile
flag. This will make it easier to expand to other profiles later.
v4: Make brw_draw_prims_generic be a dispatcher. This way we always use
the correct per-API version. This should reduce cache pollution when
brw_draw_prims_core is used, but it didn't seem to affect performance
one way or the other on my IVB.
Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
src/mesa/drivers/dri/i965/brw_draw.c | 148 +++++++++++++++++-----
src/mesa/drivers/dri/i965/brw_draw.h | 54 ++++++--
src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 6 +-
src/mesa/drivers/dri/i965/brw_primitive_restart.c | 4 +-
4 files changed, 164 insertions(+), 48 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index a23e9c0..bfd113f 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -416,10 +416,10 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
*/
static void
brw_try_draw_prims(struct gl_context *ctx,
- const struct gl_client_array *arrays[],
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
+ const struct gl_client_array *arrays[],
GLuint min_index,
GLuint max_index,
struct gl_buffer_object *indirect)
@@ -572,23 +572,26 @@ retry:
return;
}
-void
-brw_draw_prims(struct gl_context *ctx,
- const struct _mesa_prim *prims,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLboolean index_bounds_valid,
- GLuint min_index,
- GLuint max_index,
- struct gl_transform_feedback_object *unused_tfb_object,
- unsigned stream,
- struct gl_buffer_object *indirect)
+/**
+ * \warning
+ * This function must be static, inline, and always_inline. This is the only
+ * thing that allows the compiler to optimize the tests of \c
+ * must_be_core_profile away.
+ */
+static inline __attribute__((always_inline)) void
+draw_prims(struct gl_context *ctx,
+ const struct _mesa_prim *prims,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index,
+ struct gl_buffer_object *indirect,
+ gl_api API)
{
struct brw_context *brw = brw_context(ctx);
const struct gl_client_array **arrays = ctx->Array._DrawArrays;
- assert(unused_tfb_object == NULL);
-
if (!brw_check_conditional_render(brw))
return;
@@ -598,38 +601,108 @@ brw_draw_prims(struct gl_context *ctx,
return;
}
- /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
- * won't support all the extensions we support.
+ /* Core profile removed GL_SELECT and GL_FEEDBACK.
+ *
+ * FINISHME: OpenGL ES (all versions) also remove GL_SELECT and
+ * FINISHME: GL_FEEDBACK. We could take advantage of this easily.
*/
- if (ctx->RenderMode != GL_RENDER) {
- perf_debug("%s render mode not supported in hardware\n",
- _mesa_enum_to_string(ctx->RenderMode));
- _swsetup_Wakeup(ctx);
- _tnl_wakeup(ctx);
- _tnl_draw_prims(ctx, prims, nr_prims, ib,
- index_bounds_valid, min_index, max_index, NULL, 0, NULL);
- return;
+ if (API == API_OPENGL_COMPAT) {
+ /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
+ * won't support all the extensions we support.
+ */
+ if (ctx->RenderMode != GL_RENDER) {
+ perf_debug("%s render mode not supported in hardware\n",
+ _mesa_enum_to_string(ctx->RenderMode));
+ _swsetup_Wakeup(ctx);
+ _tnl_wakeup(ctx);
+ _tnl_draw_prims(ctx, prims, nr_prims, ib,
+ index_bounds_valid, min_index, max_index, NULL, 0,
+ NULL);
+ return;
+ }
}
- /* If we're going to have to upload any of the user's vertex arrays, then
- * get the minimum and maximum of their index buffer so we know what range
- * to upload.
+ /* Core profile requires that all vertex data be stored in VBOs, so there
+ * is no need to check whether or not all the data is in VBOs.
*/
- if (!index_bounds_valid && !vbo_all_varyings_in_vbos(arrays)) {
- perf_debug("Scanning index buffer to compute index buffer bounds. "
- "Use glDrawRangeElements() to avoid this.\n");
- vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims);
+ if (API != API_OPENGL_CORE) {
+ /* If we're going to have to upload any of the user's vertex arrays, then
+ * get the minimum and maximum of their index buffer so we know what range
+ * to upload.
+ */
+ if (!index_bounds_valid && !vbo_all_varyings_in_vbos(arrays)) {
+ perf_debug("Scanning index buffer to compute index buffer bounds. "
+ "Use glDrawRangeElements() to avoid this.\n");
+ vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims);
+ }
}
/* Try drawing with the hardware, but don't do anything else if we can't
* manage it. swrast doesn't support our featureset, so we can't fall back
* to it.
*/
- brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index,
+ brw_try_draw_prims(ctx, prims, nr_prims, ib, arrays, min_index, max_index,
indirect);
}
void
+brw_draw_prims_core(struct gl_context *ctx,
+ const struct _mesa_prim *prims,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index,
+ struct gl_transform_feedback_object *unused_tfb_object,
+ unsigned stream,
+ struct gl_buffer_object *indirect)
+{
+ (void) index_bounds_valid;
+ (void) unused_tfb_object;
+ (void) stream;
+
+ assert(unused_tfb_object == NULL);
+
+ draw_prims(ctx,
+ prims,
+ nr_prims,
+ ib,
+ true, /* index_bounds_valid */
+ min_index,
+ max_index,
+ indirect,
+ API_OPENGL_CORE);
+}
+
+void
+brw_draw_prims_compat(struct gl_context *ctx,
+ const struct _mesa_prim *prims,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index,
+ struct gl_transform_feedback_object *unused_tfb_object,
+ unsigned stream,
+ struct gl_buffer_object *indirect)
+{
+ (void) unused_tfb_object;
+ (void) stream;
+
+ assert(unused_tfb_object == NULL);
+
+ draw_prims(ctx,
+ prims,
+ nr_prims,
+ ib,
+ index_bounds_valid,
+ min_index,
+ max_index,
+ indirect,
+ API_OPENGL_COMPAT);
+}
+
+void
brw_draw_init(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
@@ -637,7 +710,16 @@ brw_draw_init(struct brw_context *brw)
/* Register our drawing function:
*/
- vbo->draw_prims = brw_draw_prims;
+ switch (ctx->API) {
+ case API_OPENGL_COMPAT:
+ case API_OPENGLES:
+ case API_OPENGLES2:
+ vbo->draw_prims = brw_draw_prims_compat;
+ break;
+ case API_OPENGL_CORE:
+ vbo->draw_prims = brw_draw_prims_core;
+ break;
+ }
for (int i = 0; i < VERT_ATTRIB_MAX; i++)
brw->vb.inputs[i].buffer = -1;
diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h
index f994726..4ae32e1 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.h
+++ b/src/mesa/drivers/dri/i965/brw_draw.h
@@ -30,20 +30,54 @@
#include "main/mtypes.h" /* for struct gl_context... */
#include "vbo/vbo.h"
+#include "vbo/vbo_context.h"
struct brw_context;
-void brw_draw_prims(struct gl_context *ctx,
- const struct _mesa_prim *prims,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLboolean index_bounds_valid,
- GLuint min_index,
- GLuint max_index,
- struct gl_transform_feedback_object *unused_tfb_object,
- unsigned stream,
- struct gl_buffer_object *indirect );
+/* This is the draw_prims to call in paths where the API is not known at
+ * compile time.
+ */
+static inline void
+brw_draw_prims_generic(struct gl_context *ctx,
+ const struct _mesa_prim *prims,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index,
+ struct gl_transform_feedback_object *unused_tfb_object,
+ unsigned stream,
+ struct gl_buffer_object *indirect)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+
+ vbo->draw_prims(ctx, prims, nr_prims, ib, index_bounds_valid,
+ min_index, max_index, unused_tfb_object, stream,
+ indirect);
+}
+
+void brw_draw_prims_compat(struct gl_context *ctx,
+ const struct _mesa_prim *prims,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index,
+ struct gl_transform_feedback_object *unused_tfb_object,
+ unsigned stream,
+ struct gl_buffer_object *indirect);
+
+void brw_draw_prims_core(struct gl_context *ctx,
+ const struct _mesa_prim *prims,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index,
+ struct gl_transform_feedback_object *unused_tfb_object,
+ unsigned stream,
+ struct gl_buffer_object *indirect);
void brw_draw_init( struct brw_context *brw );
void brw_draw_destroy( struct brw_context *brw );
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index f5ecbb5..d4b09c0 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -198,9 +198,9 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances)
/* Make sure our internal prim value doesn't clash with a valid GL value. */
assert(!_mesa_is_valid_prim_mode(ctx, prim.mode));
- brw_draw_prims(ctx, &prim, 1, NULL,
- GL_TRUE, start, start + count - 1,
- NULL, 0, NULL);
+ brw_draw_prims_generic(ctx, &prim, 1, NULL,
+ GL_TRUE, start, start + count - 1,
+ NULL, 0, NULL);
}
static void
diff --git a/src/mesa/drivers/dri/i965/brw_primitive_restart.c b/src/mesa/drivers/dri/i965/brw_primitive_restart.c
index 6ed79d7..9a83dac 100644
--- a/src/mesa/drivers/dri/i965/brw_primitive_restart.c
+++ b/src/mesa/drivers/dri/i965/brw_primitive_restart.c
@@ -161,8 +161,8 @@ brw_handle_primitive_restart(struct gl_context *ctx,
/* Cut index should work for primitive restart, so use it
*/
brw->prim_restart.enable_cut_index = true;
- brw_draw_prims(ctx, prims, nr_prims, ib, GL_FALSE, -1, -1, NULL, 0,
- indirect);
+ brw_draw_prims_generic(ctx, prims, nr_prims, ib, GL_FALSE, -1, -1, NULL,
+ 0, indirect);
brw->prim_restart.enable_cut_index = false;
} else {
/* Not all the primitive draw modes are supported by the cut index,
--
2.1.0
More information about the mesa-dev
mailing list