[Mesa-dev] [PATCH 4/5] i965/gs: Use new NIR intrinsics.

Kenneth Graunke kenneth at whitecape.org
Thu Sep 3 01:48:49 PDT 2015


By performing the vertex counting in NIR, we're able to elide a ton of
useless safety checks around every EmitVertex() call:

total instructions in shared programs: 3952 -> 3720 (-5.87%)
instructions in affected programs:     3491 -> 3259 (-6.65%)
helped:                                11
HURT:                                  0

Improves performance in Gl32GSCloth by 0.671742% +/- 0.142202% (n=621)
on Haswell GT3e at 1024x768.

This should also make it easier to implement Broadwell's "Static Vertex
Count" feature someday.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_nir.c               |  5 ++++
 src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp     | 13 +++++++++--
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 28 ++++++++++++-----------
 src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp     | 28 ++++++++++++++---------
 4 files changed, 48 insertions(+), 26 deletions(-)

There are a couple regressions left with this patch, which are actually bugs
in the NIR control flow graph modification code.  I wrote a fix, which IIRC
got this to zero regressions.  But I've found even more bugs in the NIR CFG
code, so I'm working on yet more fixes.  I'll sort that out before landing
this, but figured I may as well send it out for review in the meantime - I've
had these patches a long time.

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 8f3edc5..15e79ba 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -93,6 +93,11 @@ brw_create_nir(struct brw_context *brw,
    }
    nir_validate_shader(nir);
 
+   if (stage == MESA_SHADER_GEOMETRY) {
+      nir_lower_gs_intrinsics(nir);
+      nir_validate_shader(nir);
+   }
+
    nir_lower_global_vars_to_local(nir);
    nir_validate_shader(nir);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
index 8a8dd57..4f4e1e1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
@@ -92,16 +92,25 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    src_reg src;
 
    switch (instr->intrinsic) {
-   case nir_intrinsic_emit_vertex: {
+   case nir_intrinsic_emit_vertex_with_counter: {
+      this->vertex_count =
+         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
       int stream_id = instr->const_index[0];
       gs_emit_vertex(stream_id);
       break;
    }
 
-   case nir_intrinsic_end_primitive:
+   case nir_intrinsic_end_primitive_with_counter:
+      this->vertex_count =
+         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
       gs_end_primitive();
       break;
 
+   case nir_intrinsic_set_vertex_count:
+      this->vertex_count =
+         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
+      break;
+
    case nir_intrinsic_load_invocation_id: {
       src_reg invocation_id =
          src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 019efec..70c0f8e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -484,14 +484,6 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
    if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
       return;
 
-   /* To ensure that we don't output more vertices than the shader specified
-    * using max_vertices, do the logic inside a conditional of the form "if
-    * (vertex_count < MAX)"
-    */
-   unsigned num_output_vertices = c->gp->program.VerticesOut;
-   emit(CMP(dst_null_d(), this->vertex_count,
-            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
-   emit(IF(BRW_PREDICATE_NORMAL));
    {
       /* If we're outputting 32 control data bits or less, then we can wait
        * until the shader is over to output them all.  Otherwise we need to
@@ -562,12 +554,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
           this->current_annotation = "emit vertex: Stream control data bits";
           set_stream_control_data_bits(stream_id);
       }
-
-      this->current_annotation = "emit vertex: increment vertex count";
-      emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
-               src_reg(1u)));
    }
-   emit(BRW_OPCODE_ENDIF);
 
    this->current_annotation = NULL;
 }
@@ -575,7 +562,22 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
 void
 vec4_gs_visitor::visit(ir_emit_vertex *ir)
 {
+   /* To ensure that we don't output more vertices than the shader specified
+    * using max_vertices, do the logic inside a conditional of the form "if
+    * (vertex_count < MAX)"
+    */
+   unsigned num_output_vertices = c->gp->program.VerticesOut;
+   emit(CMP(dst_null_d(), this->vertex_count,
+            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+   emit(IF(BRW_PREDICATE_NORMAL));
+
    gs_emit_vertex(ir->stream_id());
+
+   this->current_annotation = "emit vertex: increment vertex count";
+   emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
+            src_reg(1u)));
+
+   emit(BRW_OPCODE_ENDIF);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index 68e443d..5cfff7b 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -149,19 +149,29 @@ gen6_gs_visitor::emit_prolog()
 void
 gen6_gs_visitor::visit(ir_emit_vertex *ir)
 {
+   /* To ensure that we don't output more vertices than the shader specified
+    * using max_vertices, do the logic inside a conditional of the form "if
+    * (vertex_count < MAX)"
+    */
+   unsigned num_output_vertices = c->gp->program.VerticesOut;
+   emit(CMP(dst_null_d(), this->vertex_count,
+            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+   emit(IF(BRW_PREDICATE_NORMAL));
+
    gs_emit_vertex(ir->stream_id());
+
+   this->current_annotation = "emit vertex: increment vertex count";
+   emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
+            src_reg(1u)));
+
+   emit(BRW_OPCODE_ENDIF);
 }
+
 void
 gen6_gs_visitor::gs_emit_vertex(int stream_id)
 {
    this->current_annotation = "gen6 emit vertex";
-   /* Honor max_vertex layout indication in geometry shader by ignoring any
-    * vertices coming after c->gp->program.VerticesOut.
-    */
-   unsigned num_output_vertices = c->gp->program.VerticesOut;
-   emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices),
-            BRW_CONDITIONAL_L));
-   emit(IF(BRW_PREDICATE_NORMAL));
+
    {
       /* Buffer all output slots for this vertex in vertex_output */
       for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
@@ -219,11 +229,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
       }
       emit(ADD(dst_reg(this->vertex_output_offset),
                this->vertex_output_offset, 1u));
-
-      /* Update vertex count */
-      emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u));
    }
-   emit(BRW_OPCODE_ENDIF);
 }
 
 void
-- 
2.5.0



More information about the mesa-dev mailing list