[Mesa-dev] [PATCH 2/2 v3] i965: Use predicate enable bit for conditional rendering w/o stalling

Neil Roberts neil at linux.intel.com
Fri May 8 11:04:31 PDT 2015


Previously whenever a primitive is drawn the driver would call
_mesa_check_conditional_render which blocks waiting for the result of
the query to determine whether to render. On Gen7+ there is a bit in
the 3DPRIMITIVE command which can be used to disable the primitive
based on the value of a state bit. This state bit can be set based on
whether two registers have different values using the MI_PREDICATE
command. We can load these two registers with the pixel count values
stored in the query begin and end to implement conditional rendering
without stalling.

Unfortunately these two source registers were not in the whitelist of
available registers in the kernel driver until v3.19. This patch uses
the command parser version from intel_screen to detect whether to
attempt to set the predicate data registers.

The predicate enable bit is currently only used for drawing 3D
primitives. For blits, clears, bitmaps, copypixels and drawpixels it
still causes a stall. For most of these it would probably just work to
call the new brw_check_conditional_render function instead of
_mesa_check_conditional_render because they already work in terms of
rendering primitives. However it's a bit trickier for blits because it
can use the BLT ring or the blorp codepath. I think these operations
are less useful for conditional rendering than rendering primitives so
it might be best to leave it for a later patch.

v2: Use the command parser version to detect whether we can write to
    the predicate data registers instead of trying to execute a
    register load command.
v3: Simple rebase
---
 src/mesa/drivers/dri/i965/Makefile.sources         |   1 +
 src/mesa/drivers/dri/i965/brw_conditional_render.c | 167 +++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_context.c            |   4 +
 src/mesa/drivers/dri/i965/brw_context.h            |  21 +++
 src/mesa/drivers/dri/i965/brw_defines.h            |   1 +
 src/mesa/drivers/dri/i965/brw_draw.c               |  16 +-
 src/mesa/drivers/dri/i965/brw_queryobj.c           |  17 ++-
 src/mesa/drivers/dri/i965/intel_extensions.c       |   5 +
 src/mesa/drivers/dri/i965/intel_reg.h              |  23 +++
 9 files changed, 243 insertions(+), 12 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_conditional_render.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 1ae93e1..a24c20a 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -18,6 +18,7 @@ i965_FILES = \
 	brw_clip_unfilled.c \
 	brw_clip_util.c \
 	brw_compute.c \
+	brw_conditional_render.c \
 	brw_context.c \
 	brw_context.h \
 	brw_cs.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c b/src/mesa/drivers/dri/i965/brw_conditional_render.c
new file mode 100644
index 0000000..e676aa0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Neil Roberts <neil at linux.intel.com>
+ */
+
+/** @file brw_conditional_render.c
+ *
+ * Support for conditional rendering based on query objects
+ * (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gen7+.
+ */
+
+#include "main/imports.h"
+#include "main/condrender.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+set_predicate_enable(struct brw_context *brw,
+                     bool value)
+{
+   if (value)
+      brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
+   else
+      brw->predicate.state = BRW_PREDICATE_STATE_DONT_RENDER;
+}
+
+static void
+load_64bit_register(struct brw_context *brw,
+                    uint32_t reg,
+                    drm_intel_bo *bo,
+                    uint32_t offset)
+{
+   int i;
+
+   for (i = 0; i < 2; i++) {
+      brw_load_register_mem(brw,
+                            reg + i * 4,
+                            bo,
+                            I915_GEM_DOMAIN_INSTRUCTION,
+                            0, /* write domain */
+                            offset + i * 4);
+   }
+}
+
+static void
+set_predicate_for_result(struct brw_context *brw,
+                         struct brw_query_object *query,
+                         bool inverted)
+{
+   int load_op;
+
+   assert(query->bo != NULL);
+
+   load_64bit_register(brw, MI_PREDICATE_SRC0, query->bo, 0);
+   load_64bit_register(brw, MI_PREDICATE_SRC1, query->bo, 8);
+
+   if (inverted)
+      load_op = MI_PREDICATE_LOADOP_LOAD;
+   else
+      load_op = MI_PREDICATE_LOADOP_LOADINV;
+
+   BEGIN_BATCH(1);
+   OUT_BATCH(GEN7_MI_PREDICATE |
+             load_op |
+             MI_PREDICATE_COMBINEOP_SET |
+             MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+   ADVANCE_BATCH();
+
+   brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
+}
+
+static void
+brw_begin_conditional_render(struct gl_context *ctx,
+                             struct gl_query_object *q,
+                             GLenum mode)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *) q;
+   bool inverted;
+
+   if (!brw->predicate.supported)
+      return;
+
+   switch (mode) {
+   case GL_QUERY_WAIT:
+   case GL_QUERY_NO_WAIT:
+   case GL_QUERY_BY_REGION_WAIT:
+   case GL_QUERY_BY_REGION_NO_WAIT:
+      inverted = false;
+      break;
+   case GL_QUERY_WAIT_INVERTED:
+   case GL_QUERY_NO_WAIT_INVERTED:
+   case GL_QUERY_BY_REGION_WAIT_INVERTED:
+   case GL_QUERY_BY_REGION_NO_WAIT_INVERTED:
+      inverted = true;
+      break;
+   default:
+      unreachable("Unexpected conditional render mode");
+   }
+
+   /* If there are already samples from a BLT operation or if the query object
+    * is ready then we can avoid looking at the values in the buffer and just
+    * decide whether to draw using the CPU without stalling */
+   if (query->Base.Result || query->Base.Ready)
+      set_predicate_enable(brw, (query->Base.Result != 0) ^ inverted);
+   else
+      set_predicate_for_result(brw, query, inverted);
+}
+
+static void
+brw_end_conditional_render(struct gl_context *ctx,
+                           struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   /* When there is no longer a conditional render in progress it should
+    * always render */
+   brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
+}
+
+void
+brw_init_conditional_render_functions(struct dd_function_table *functions)
+{
+   functions->BeginConditionalRender = brw_begin_conditional_render;
+   functions->EndConditionalRender = brw_end_conditional_render;
+}
+
+bool
+brw_check_conditional_render(struct brw_context *brw)
+{
+   if (brw->predicate.supported) {
+      /* In some cases it is possible to determine that the primitives should
+       * be skipped without needing the predicate enable bit and still without
+       * stalling */
+      return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER;
+   } else {
+      if (brw->ctx.Query.CondRenderQuery) {
+         perf_debug("Conditional rendering is implemented in software and may "
+                    "stall.\n");
+      }
+
+      return _mesa_check_conditional_render(&brw->ctx);
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index fd7420a..673529a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -289,6 +289,8 @@ brw_init_driver_functions(struct brw_context *brw,
    else
       gen4_init_queryobj_functions(functions);
    brw_init_compute_functions(functions);
+   if (brw->gen >= 7)
+      brw_init_conditional_render_functions(functions);
 
    functions->QuerySamplesForFormat = brw_query_samples_for_format;
 
@@ -891,6 +893,8 @@ brwCreateContext(gl_api api,
    brw->gs.enabled = false;
    brw->sf.viewport_transform_enable = true;
 
+   brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
+
    ctx->VertexProgram._MaintainTnlProgram = true;
    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 834aaa4..b9383fa 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -966,6 +966,18 @@ struct brw_stage_state
    uint32_t sampler_offset;
 };
 
+enum brw_predicate_state {
+   /* The first two states are used if we can determine whether to draw
+    * without having to look at the values in the query object buffer. This
+    * will happen if there is no conditional render in progress, if the query
+    * object is already completed or if something else has already added
+    * samples to the preliminary result such as via a BLT command. */
+   BRW_PREDICATE_STATE_RENDER,
+   BRW_PREDICATE_STATE_DONT_RENDER,
+   /* In this case whether to draw or not depends on the result of an
+    * MI_PREDICATE command so the predicate enable bit needs to be checked */
+   BRW_PREDICATE_STATE_USE_BIT
+};
 
 /**
  * brw_context is derived from gl_context.
@@ -1402,6 +1414,11 @@ struct brw_context
    } query;
 
    struct {
+      enum brw_predicate_state state;
+      bool supported;
+   } predicate;
+
+   struct {
       /** A map from pipeline statistics counter IDs to MMIO addresses. */
       const int *statistics_registers;
 
@@ -1600,6 +1617,10 @@ void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *bo, int idx);
 void brw_store_register_mem64(struct brw_context *brw,
                               drm_intel_bo *bo, uint32_t reg, int idx);
 
+/** brw_conditional_render.c */
+void brw_init_conditional_render_functions(struct dd_function_table *functions);
+bool brw_check_conditional_render(struct brw_context *brw);
+
 /** intel_batchbuffer.c */
 void brw_load_register_mem(struct brw_context *brw,
                            uint32_t reg,
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 83d7a35..11cb3fa 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -51,6 +51,7 @@
 # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
 # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 15)
 # define GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE      (1 << 10)
+# define GEN7_3DPRIM_PREDICATE_ENABLE               (1 << 8)
 /* DW1 */
 # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
 # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 8)
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 96e2369..a7164db 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -178,6 +178,7 @@ static void brw_emit_prim(struct brw_context *brw,
    int verts_per_instance;
    int vertex_access_type;
    int indirect_flag;
+   int predicate_enable;
 
    DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
        prim->start, prim->count);
@@ -258,10 +259,14 @@ static void brw_emit_prim(struct brw_context *brw,
       indirect_flag = 0;
    }
 
-
    if (brw->gen >= 7) {
+      if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
+         predicate_enable = GEN7_3DPRIM_PREDICATE_ENABLE;
+      else
+         predicate_enable = 0;
+
       BEGIN_BATCH(7);
-      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag);
+      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable);
       OUT_BATCH(hw_prim | vertex_access_type);
    } else {
       BEGIN_BATCH(6);
@@ -561,12 +566,7 @@ void brw_draw_prims( struct gl_context *ctx,
 
    assert(unused_tfb_object == NULL);
 
-   if (ctx->Query.CondRenderQuery) {
-      perf_debug("Conditional rendering is implemented in software and may "
-                 "stall.  This should be fixed in the driver.\n");
-   }
-
-   if (!_mesa_check_conditional_render(ctx))
+   if (!brw_check_conditional_render(brw))
       return;
 
    /* Handle primitive restart if needed */
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index 667c900..f0a50fe 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -66,10 +66,19 @@ brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
 void
 brw_write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
 {
-   brw_emit_pipe_control_write(brw,
-                               PIPE_CONTROL_WRITE_DEPTH_COUNT
-                               | PIPE_CONTROL_DEPTH_STALL,
-                               query_bo, idx * sizeof(uint64_t), 0, 0);
+   uint32_t flags;
+
+   flags = (PIPE_CONTROL_WRITE_DEPTH_COUNT |
+            PIPE_CONTROL_DEPTH_STALL);
+
+   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
+    * command when loading the values into the predicate source registers for
+    * conditional rendering */
+   if (brw->predicate.supported)
+      flags |= PIPE_CONTROL_FLUSH_ENABLE;
+
+   brw_emit_pipe_control_write(brw, flags, query_bo,
+                               idx * sizeof(uint64_t), 0, 0);
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index c3eee31..cafb774 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -320,6 +320,8 @@ intelInitExtensions(struct gl_context *ctx)
       }
    }
 
+   brw->predicate.supported = false;
+
    if (brw->gen >= 7) {
       ctx->Extensions.ARB_conservative_depth = true;
       ctx->Extensions.ARB_derivative_control = true;
@@ -333,6 +335,9 @@ intelInitExtensions(struct gl_context *ctx)
          ctx->Extensions.ARB_transform_feedback2 = true;
          ctx->Extensions.ARB_transform_feedback3 = true;
          ctx->Extensions.ARB_transform_feedback_instanced = true;
+
+         if (brw->intelScreen->cmd_parser_version >= 2)
+            brw->predicate.supported = true;
       }
 
       /* Only enable this in core profile because other parts of Mesa behave
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index 488fb5b..bd14e18 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -48,6 +48,20 @@
 #define GEN7_MI_LOAD_REGISTER_MEM	(CMD_MI | (0x29 << 23))
 # define MI_LOAD_REGISTER_MEM_USE_GGTT		(1 << 22)
 
+/* Manipulate the predicate bit based on some register values. Only on Gen7+ */
+#define GEN7_MI_PREDICATE		(CMD_MI | (0xC << 23))
+# define MI_PREDICATE_LOADOP_KEEP		(0 << 6)
+# define MI_PREDICATE_LOADOP_LOAD		(2 << 6)
+# define MI_PREDICATE_LOADOP_LOADINV		(3 << 6)
+# define MI_PREDICATE_COMBINEOP_SET		(0 << 3)
+# define MI_PREDICATE_COMBINEOP_AND		(1 << 3)
+# define MI_PREDICATE_COMBINEOP_OR		(2 << 3)
+# define MI_PREDICATE_COMBINEOP_XOR		(3 << 3)
+# define MI_PREDICATE_COMPAREOP_TRUE		(0 << 0)
+# define MI_PREDICATE_COMPAREOP_FALSE		(1 << 0)
+# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL	(2 << 0)
+# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL	(3 << 0)
+
 /** @{
  *
  * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
@@ -69,6 +83,7 @@
 #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE	(1 << 10) /* GM45+ only */
 #define PIPE_CONTROL_ISP_DIS		(1 << 9)
 #define PIPE_CONTROL_INTERRUPT_ENABLE	(1 << 8)
+#define PIPE_CONTROL_FLUSH_ENABLE	(1 << 7) /* Gen7+ only */
 /* GT */
 #define PIPE_CONTROL_DATA_CACHE_INVALIDATE	(1 << 5)
 #define PIPE_CONTROL_VF_CACHE_INVALIDATE	(1 << 4)
@@ -147,3 +162,11 @@
 # define GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC (1 << 1)
 # define GEN8_HIZ_PMA_MASK_BITS \
    ((GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE) << 16)
+
+/* Predicate registers */
+#define MI_PREDICATE_SRC0               0x2400
+#define MI_PREDICATE_SRC1               0x2408
+#define MI_PREDICATE_DATA               0x2410
+#define MI_PREDICATE_RESULT             0x2418
+#define MI_PREDICATE_RESULT_1           0x241C
+#define MI_PREDICATE_RESULT_2           0x2214
-- 
1.9.3



More information about the mesa-dev mailing list