[Mesa-dev] [PATCH] i965: Fix indirect parameters draw during conditional rendering on hsw+

Thu Nov 15 14:52:06 UTC 2018

Both extensions GL_ARB_indirect_parameters and GL_NV_conditional_render
use MI_PREDICATE for their work so when conditional rendering was enabled
GL_ARB_indirect_parameters incorrectly handled already present predicate
result and didn't restore it in the end.

Instead special code path for this case was added.

for each draw call mi_math is used to compute mi_predicate_result as:
( (draw index < draw count) && stored_mi_predicate_result )

After the loop mi_predicate is restored to the original value.

Also the amount of loadings to GPU registers was reduced by moving
loadings which were constant in respect to the draw loop outside of it.

Signed-off-by: Illia Iorin <illia.iorin at gloaballogic.com>
Signed-off-by: Danylo Piliaiev <danylo.piliaiev at globallogic.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108759
---
I haven’t tested this patch on intel CI.
I going to test this patch when piglit test
is merged https://patchwork.freedesktop.org/patch/262127/

 src/mesa/drivers/dri/i965/brw_draw.c | 108 ++++++++++++++++++++++-----
 1 file changed, 88 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 8536c04010..98d9e9c553 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -1030,6 +1030,7 @@ brw_draw_prims(struct gl_context *ctx,
    unsigned i;
    struct brw_context *brw = brw_context(ctx);
    int predicate_state = brw->predicate.state;
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
    struct brw_transform_feedback_object *xfb_obj =
       (struct brw_transform_feedback_object *) gl_xfb_obj;
 
@@ -1072,11 +1073,9 @@ brw_draw_prims(struct gl_context *ctx,
     * to it.
     */
 
-   for (i = 0; i < nr_prims; i++) {
-      /* Implementation of ARB_indirect_parameters via predicates */
-      if (brw->draw.draw_params_count_bo) {
-         brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
-
+   if (brw->draw.draw_params_count_bo) {
+      /* Preparing registers for ARB_indirect_parameters  */
+      if (brw->predicate.state != BRW_PREDICATE_STATE_USE_BIT) {
          /* Upload the current draw count from the draw parameters buffer to
           * MI_PREDICATE_SRC0.
           */
@@ -1085,25 +1084,94 @@ brw_draw_prims(struct gl_context *ctx,
                                brw->draw.draw_params_count_offset);
          /* Zero the top 32-bits of MI_PREDICATE_SRC0 */
          brw_load_register_imm32(brw, MI_PREDICATE_SRC0 + 4, 0);
-         /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
-         brw_load_register_imm64(brw, MI_PREDICATE_SRC1, prims[i].draw_id);
-
-         BEGIN_BATCH(1);
-         if (i == 0 && brw->predicate.state != BRW_PREDICATE_STATE_USE_BIT) {
-            OUT_BATCH(GEN7_MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
-                      MI_PREDICATE_COMBINEOP_SET |
-                      MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
-         } else {
-            OUT_BATCH(GEN7_MI_PREDICATE |
-                      MI_PREDICATE_LOADOP_LOAD | MI_PREDICATE_COMBINEOP_XOR |
-                      MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
-         }
+      } else if (devinfo->gen >= 8 || devinfo->is_haswell) {
+         /* Upload the current MI_PREDICATE_RESULT buffer to GPR0. */
+         brw_load_register_reg64(brw, MI_PREDICATE_RESULT, HSW_CS_GPR(0));
+         /* Upload the current draw count from the draw parameters buffer to
+          * GPR1.
+          */
+         brw_load_register_mem(brw, HSW_CS_GPR(1),
+                               brw->draw.draw_params_count_bo,
+                               brw->draw.draw_params_count_offset);
+
+         /* Zero the top 32-bits of GPR1 */
+         brw_load_register_imm32(brw, HSW_CS_GPR(1) + 4, 0);
+      } else {
+         /* TODO
+          * Implement slow code path  on pre hsw:
+          * read value from count buffer.
+          */
+         _mesa_warning(ctx, "Usage of GL_ARB_indirect_parameters " 
+            "functions during GL_NV_conditional_render "
+            "unsupported on pre hsw platform");
+         brw_finish_drawing(ctx);
+         brw->predicate.state = predicate_state;
+         return;
+      }
+   }
+
+   if (brw->draw.draw_params_count_bo &&
+       (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) &&
+       (devinfo->gen >= 8 || devinfo->is_haswell)) {
+      for (i = 0; i < nr_prims; i++) {
+
+         static const uint32_t maths[] = {
+            /* Compute (draw index < draw count).
+             * We do this by subtracting and storing the carry bit.
+             */ 
+            MI_MATH_ALU2(LOAD, SRCA, R2),
+            MI_MATH_ALU2(LOAD, SRCB, R1),
+            MI_MATH_ALU0(SUB),
+            MI_MATH_ALU2(STORE, R3, CF),
+            /* Compute (subtracting result & MI_PREDICAT). */ 
+            MI_MATH_ALU2(LOAD, SRCA, R3),
+            MI_MATH_ALU2(LOAD, SRCB, R0),
+            MI_MATH_ALU0(AND),
+            MI_MATH_ALU2(STORE, R3, ACCU),
+         };
+         brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
+         /* Upload the id of the current primitive to GPR2. */
+         brw_load_register_imm64(brw, HSW_CS_GPR(2), prims[i].draw_id);
+
+         BEGIN_BATCH(1 + ARRAY_SIZE(maths));
+         OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
+
+         for (int m = 0; m < ARRAY_SIZE(maths); m++)
+            OUT_BATCH(maths[m]);
+
          ADVANCE_BATCH();
+         /* Store result of mi_math operation to MI_PREDICATE_RESULT */
+         brw_load_register_reg64(brw, HSW_CS_GPR(3), MI_PREDICATE_RESULT);
 
-         brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
+         brw_draw_single_prim(ctx, &prims[i], i, xfb_obj, stream, indirect);
       }
 
-      brw_draw_single_prim(ctx, &prims[i], i, xfb_obj, stream, indirect);
+      brw_load_register_reg64(brw, HSW_CS_GPR(0), MI_PREDICATE_RESULT);
+   } else {
+      for (i = 0; i < nr_prims; i++) {
+         /* Implementation of ARB_indirect_parameters via predicates */
+         if (brw->draw.draw_params_count_bo) {
+            brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
+            /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
+            brw_load_register_imm64(brw, MI_PREDICATE_SRC1, prims[i].draw_id);
+
+            BEGIN_BATCH(1);
+            if (i == 0) {
+               OUT_BATCH(GEN7_MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
+                         MI_PREDICATE_COMBINEOP_SET |
+                         MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+            } else {
+               OUT_BATCH(GEN7_MI_PREDICATE |
+                         MI_PREDICATE_LOADOP_LOAD | MI_PREDICATE_COMBINEOP_XOR
+                         | MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+            }
+            ADVANCE_BATCH();
+
+            brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
+         }
+
+         brw_draw_single_prim(ctx, &prims[i], i, xfb_obj, stream, indirect);
+      }
    }
 
    brw_finish_drawing(ctx);
-- 
2.17.1