[Mesa-dev] [PATCH] i965: Don't check for draw-time errors that cannot occur in core profile

Ian Romanick idr at freedesktop.org
Mon Aug 10 10:12:50 PDT 2015


From: Ian Romanick <ian.d.romanick at intel.com>

On many CPU-limited applications, this is *the* hot path.  The idea is
to generate per-API versions of brw_draw_prims that elide some checks.
This patch removes render-mode and "is everything in VBOs" checks from
core-profile contexts.

On my IVB laptop (which may have experienced thermal throttling):

Gl32Batch7:     3.70955% +/- 1.11344%
OglBatch7:      1.04398% +/- 0.772788%

These are the same benchmark, but Gl32Batch7 uses an OpenGL 3.2 Core
Profile context.

v2: Reorder parameters to brw_try_draw_prims to reduce data shuffling.

v3: Pass a gl_api into draw_prims instead of a must-be-core-profile
flag.  This will make it easier to expand to other profiles later.

v4: Make brw_draw_prims_generic be a dispatcher.  This way we always use
the correct per-API version.  This should reduce cache pollution when
brw_draw_prims_core is used, but it didn't seem to affect performance
one way or the other on my IVB.

Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
 src/mesa/drivers/dri/i965/brw_draw.c              | 148 +++++++++++++++++-----
 src/mesa/drivers/dri/i965/brw_draw.h              |  54 ++++++--
 src/mesa/drivers/dri/i965/brw_meta_fast_clear.c   |   6 +-
 src/mesa/drivers/dri/i965/brw_primitive_restart.c |   4 +-
 4 files changed, 164 insertions(+), 48 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index a23e9c0..bfd113f 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -416,10 +416,10 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
  */
 static void
 brw_try_draw_prims(struct gl_context *ctx,
-                   const struct gl_client_array *arrays[],
                    const struct _mesa_prim *prims,
                    GLuint nr_prims,
                    const struct _mesa_index_buffer *ib,
+                   const struct gl_client_array *arrays[],
                    GLuint min_index,
                    GLuint max_index,
                    struct gl_buffer_object *indirect)
@@ -572,23 +572,26 @@ retry:
    return;
 }
 
-void
-brw_draw_prims(struct gl_context *ctx,
-               const struct _mesa_prim *prims,
-               GLuint nr_prims,
-               const struct _mesa_index_buffer *ib,
-               GLboolean index_bounds_valid,
-               GLuint min_index,
-               GLuint max_index,
-               struct gl_transform_feedback_object *unused_tfb_object,
-               unsigned stream,
-               struct gl_buffer_object *indirect)
+/**
+ * \warning
+ * This function must be static, inline, and always_inline.  This is the only
+ * thing that allows the compiler to optimize the tests of \c
+ * must_be_core_profile away.
+ */
+static inline __attribute__((always_inline)) void
+draw_prims(struct gl_context *ctx,
+           const struct _mesa_prim *prims,
+           GLuint nr_prims,
+           const struct _mesa_index_buffer *ib,
+           GLboolean index_bounds_valid,
+           GLuint min_index,
+           GLuint max_index,
+           struct gl_buffer_object *indirect,
+           gl_api API)
 {
    struct brw_context *brw = brw_context(ctx);
    const struct gl_client_array **arrays = ctx->Array._DrawArrays;
 
-   assert(unused_tfb_object == NULL);
-
    if (!brw_check_conditional_render(brw))
       return;
 
@@ -598,38 +601,108 @@ brw_draw_prims(struct gl_context *ctx,
       return;
    }
 
-   /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
-    * won't support all the extensions we support.
+   /* Core profile removed GL_SELECT and GL_FEEDBACK.
+    *
+    * FINISHME: OpenGL ES (all versions) also remove GL_SELECT and
+    * FINISHME: GL_FEEDBACK.  We could take advantage of this easily.
     */
-   if (ctx->RenderMode != GL_RENDER) {
-      perf_debug("%s render mode not supported in hardware\n",
-                 _mesa_enum_to_string(ctx->RenderMode));
-      _swsetup_Wakeup(ctx);
-      _tnl_wakeup(ctx);
-      _tnl_draw_prims(ctx, prims, nr_prims, ib,
-                      index_bounds_valid, min_index, max_index, NULL, 0, NULL);
-      return;
+   if (API == API_OPENGL_COMPAT) {
+      /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
+       * won't support all the extensions we support.
+       */
+      if (ctx->RenderMode != GL_RENDER) {
+         perf_debug("%s render mode not supported in hardware\n",
+                    _mesa_enum_to_string(ctx->RenderMode));
+         _swsetup_Wakeup(ctx);
+         _tnl_wakeup(ctx);
+         _tnl_draw_prims(ctx, prims, nr_prims, ib,
+                         index_bounds_valid, min_index, max_index, NULL, 0,
+                         NULL);
+         return;
+      }
    }
 
-   /* If we're going to have to upload any of the user's vertex arrays, then
-    * get the minimum and maximum of their index buffer so we know what range
-    * to upload.
+   /* Core profile requires that all vertex data be stored in VBOs, so there
+    * is no need to check whether or not all the data is in VBOs.
     */
-   if (!index_bounds_valid && !vbo_all_varyings_in_vbos(arrays)) {
-      perf_debug("Scanning index buffer to compute index buffer bounds.  "
-                 "Use glDrawRangeElements() to avoid this.\n");
-      vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims);
+   if (API != API_OPENGL_CORE) {
+      /* If we're going to have to upload any of the user's vertex arrays, then
+       * get the minimum and maximum of their index buffer so we know what range
+       * to upload.
+       */
+      if (!index_bounds_valid && !vbo_all_varyings_in_vbos(arrays)) {
+         perf_debug("Scanning index buffer to compute index buffer bounds.  "
+                    "Use glDrawRangeElements() to avoid this.\n");
+         vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims);
+      }
    }
 
    /* Try drawing with the hardware, but don't do anything else if we can't
     * manage it.  swrast doesn't support our featureset, so we can't fall back
     * to it.
     */
-   brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index,
+   brw_try_draw_prims(ctx, prims, nr_prims, ib, arrays, min_index, max_index,
                       indirect);
 }
 
 void
+brw_draw_prims_core(struct gl_context *ctx,
+                    const struct _mesa_prim *prims,
+                    GLuint nr_prims,
+                    const struct _mesa_index_buffer *ib,
+                    GLboolean index_bounds_valid,
+                    GLuint min_index,
+                    GLuint max_index,
+                    struct gl_transform_feedback_object *unused_tfb_object,
+                    unsigned stream,
+                    struct gl_buffer_object *indirect)
+{
+   (void) index_bounds_valid;
+   (void) unused_tfb_object;
+   (void) stream;
+
+   assert(unused_tfb_object == NULL);
+
+   draw_prims(ctx,
+              prims,
+              nr_prims,
+              ib,
+              true, /* index_bounds_valid */
+              min_index,
+              max_index,
+              indirect,
+              API_OPENGL_CORE);
+}
+
+void
+brw_draw_prims_compat(struct gl_context *ctx,
+                      const struct _mesa_prim *prims,
+                      GLuint nr_prims,
+                      const struct _mesa_index_buffer *ib,
+                      GLboolean index_bounds_valid,
+                      GLuint min_index,
+                      GLuint max_index,
+                      struct gl_transform_feedback_object *unused_tfb_object,
+                      unsigned stream,
+                      struct gl_buffer_object *indirect)
+{
+   (void) unused_tfb_object;
+   (void) stream;
+
+   assert(unused_tfb_object == NULL);
+
+   draw_prims(ctx,
+              prims,
+              nr_prims,
+              ib,
+              index_bounds_valid,
+              min_index,
+              max_index,
+              indirect,
+              API_OPENGL_COMPAT);
+}
+
+void
 brw_draw_init(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
@@ -637,7 +710,16 @@ brw_draw_init(struct brw_context *brw)
 
    /* Register our drawing function:
     */
-   vbo->draw_prims = brw_draw_prims;
+   switch (ctx->API) {
+   case API_OPENGL_COMPAT:
+   case API_OPENGLES:
+   case API_OPENGLES2:
+      vbo->draw_prims = brw_draw_prims_compat;
+      break;
+   case API_OPENGL_CORE:
+      vbo->draw_prims = brw_draw_prims_core;
+      break;
+   }
 
    for (int i = 0; i < VERT_ATTRIB_MAX; i++)
       brw->vb.inputs[i].buffer = -1;
diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h
index f994726..4ae32e1 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.h
+++ b/src/mesa/drivers/dri/i965/brw_draw.h
@@ -30,20 +30,54 @@
 
 #include "main/mtypes.h"		/* for struct gl_context... */
 #include "vbo/vbo.h"
+#include "vbo/vbo_context.h"
 
 struct brw_context;
 
 
-void brw_draw_prims(struct gl_context *ctx,
-		     const struct _mesa_prim *prims,
-		     GLuint nr_prims,
-		     const struct _mesa_index_buffer *ib,
-		     GLboolean index_bounds_valid,
-		     GLuint min_index,
-		     GLuint max_index,
-		     struct gl_transform_feedback_object *unused_tfb_object,
-                     unsigned stream,
-		     struct gl_buffer_object *indirect );
+/* This is the draw_prims to call in paths where the API is not known at
+ * compile time.
+ */
+static inline void
+brw_draw_prims_generic(struct gl_context *ctx,
+                       const struct _mesa_prim *prims,
+                       GLuint nr_prims,
+                       const struct _mesa_index_buffer *ib,
+                       GLboolean index_bounds_valid,
+                       GLuint min_index,
+                       GLuint max_index,
+                       struct gl_transform_feedback_object *unused_tfb_object,
+                       unsigned stream,
+                       struct gl_buffer_object *indirect)
+{
+   struct vbo_context *vbo = vbo_context(ctx);
+
+   vbo->draw_prims(ctx, prims, nr_prims, ib, index_bounds_valid,
+                   min_index, max_index, unused_tfb_object, stream,
+                   indirect);
+}
+
+void brw_draw_prims_compat(struct gl_context *ctx,
+                           const struct _mesa_prim *prims,
+                           GLuint nr_prims,
+                           const struct _mesa_index_buffer *ib,
+                           GLboolean index_bounds_valid,
+                           GLuint min_index,
+                           GLuint max_index,
+                           struct gl_transform_feedback_object *unused_tfb_object,
+                           unsigned stream,
+                           struct gl_buffer_object *indirect);
+
+void brw_draw_prims_core(struct gl_context *ctx,
+                         const struct _mesa_prim *prims,
+                         GLuint nr_prims,
+                         const struct _mesa_index_buffer *ib,
+                         GLboolean index_bounds_valid,
+                         GLuint min_index,
+                         GLuint max_index,
+                         struct gl_transform_feedback_object *unused_tfb_object,
+                         unsigned stream,
+                         struct gl_buffer_object *indirect);
 
 void brw_draw_init( struct brw_context *brw );
 void brw_draw_destroy( struct brw_context *brw );
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index f5ecbb5..d4b09c0 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -198,9 +198,9 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances)
    /* Make sure our internal prim value doesn't clash with a valid GL value. */
    assert(!_mesa_is_valid_prim_mode(ctx, prim.mode));
 
-   brw_draw_prims(ctx, &prim, 1, NULL,
-                  GL_TRUE, start, start + count - 1,
-                  NULL, 0, NULL);
+   brw_draw_prims_generic(ctx, &prim, 1, NULL,
+                          GL_TRUE, start, start + count - 1,
+                          NULL, 0, NULL);
 }
 
 static void
diff --git a/src/mesa/drivers/dri/i965/brw_primitive_restart.c b/src/mesa/drivers/dri/i965/brw_primitive_restart.c
index 6ed79d7..9a83dac 100644
--- a/src/mesa/drivers/dri/i965/brw_primitive_restart.c
+++ b/src/mesa/drivers/dri/i965/brw_primitive_restart.c
@@ -161,8 +161,8 @@ brw_handle_primitive_restart(struct gl_context *ctx,
       /* Cut index should work for primitive restart, so use it
        */
       brw->prim_restart.enable_cut_index = true;
-      brw_draw_prims(ctx, prims, nr_prims, ib, GL_FALSE, -1, -1, NULL, 0,
-                     indirect);
+      brw_draw_prims_generic(ctx, prims, nr_prims, ib, GL_FALSE, -1, -1, NULL,
+                             0, indirect);
       brw->prim_restart.enable_cut_index = false;
    } else {
       /* Not all the primitive draw modes are supported by the cut index,
-- 
2.1.0



More information about the mesa-dev mailing list