[Mesa-dev] [PATCH 4/5] i965/gs: Use new NIR intrinsics.

Jason Ekstrand jason at jlekstrand.net
Mon Sep 7 11:07:17 PDT 2015


On Mon, Sep 7, 2015 at 11:06 AM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> On Thu, Sep 3, 2015 at 1:48 AM, Kenneth Graunke <kenneth at whitecape.org> wrote:
>> By performing the vertex counting in NIR, we're able to elide a ton of
>> useless safety checks around every EmitVertex() call:
>>
>> total instructions in shared programs: 3952 -> 3720 (-5.87%)
>> instructions in affected programs:     3491 -> 3259 (-6.65%)
>> helped:                                11
>> HURT:                                  0
>>
>> Improves performance in Gl32GSCloth by 0.671742% +/- 0.142202% (n=621)
>> on Haswell GT3e at 1024x768.
>>
>> This should also make it easier to implement Broadwell's "Static Vertex
>> Count" feature someday.
>>
>> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
>> ---
>>  src/mesa/drivers/dri/i965/brw_nir.c               |  5 ++++
>>  src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp     | 13 +++++++++--
>>  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 28 ++++++++++++-----------
>>  src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp     | 28 ++++++++++++++---------
>>  4 files changed, 48 insertions(+), 26 deletions(-)
>>
>> There are a couple regressions left with this patch, which are actually bugs
>> in the NIR control flow graph modification code.  I wrote a fix, which IIRC
>> got this to zero regressions.  But I've found even more bugs in the NIR CFG
>> code, so I'm working on yet more fixes.  I'll sort that out before landing
>> this, but figured I may as well send it out for review in the meantime - I've
>> had these patches a long time.
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
>> index 8f3edc5..15e79ba 100644
>> --- a/src/mesa/drivers/dri/i965/brw_nir.c
>> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
>> @@ -93,6 +93,11 @@ brw_create_nir(struct brw_context *brw,
>>     }
>>     nir_validate_shader(nir);
>>
>> +   if (stage == MESA_SHADER_GEOMETRY) {
>> +      nir_lower_gs_intrinsics(nir);
>> +      nir_validate_shader(nir);
>> +   }
>> +
>>     nir_lower_global_vars_to_local(nir);
>>     nir_validate_shader(nir);
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
>> index 8a8dd57..4f4e1e1 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
>> @@ -92,16 +92,25 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
>>     src_reg src;
>>
>>     switch (instr->intrinsic) {
>> -   case nir_intrinsic_emit_vertex: {
>> +   case nir_intrinsic_emit_vertex_with_counter: {
>> +      this->vertex_count =
>> +         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
>>        int stream_id = instr->const_index[0];
>>        gs_emit_vertex(stream_id);
>>        break;
>>     }
>>
>> -   case nir_intrinsic_end_primitive:
>> +   case nir_intrinsic_end_primitive_with_counter:
>> +      this->vertex_count =
>> +         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
>
> It seems kind of odd to me that we are seetting a vertex_count state
> variable in this case and in the one above.  Wouldn't it be better to
> pass the vertex count in to end_primitive() and emit_vertex()?  That
> can be a refactor for another day though; after we delete the old
> vec4_visitor code.
>
> Modulo getting the CF bugs fixed, this series is

And the comments on patch 3 of course.

> Reviewed-by: Jason Ekstrand <jason.ekstrand at intel.com>
>
>>        gs_end_primitive();
>>        break;
>>
>> +   case nir_intrinsic_set_vertex_count:
>> +      this->vertex_count =
>> +         retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
>> +      break;
>> +
>>     case nir_intrinsic_load_invocation_id: {
>>        src_reg invocation_id =
>>           src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
>> index 019efec..70c0f8e 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
>> @@ -484,14 +484,6 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
>>     if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
>>        return;
>>
>> -   /* To ensure that we don't output more vertices than the shader specified
>> -    * using max_vertices, do the logic inside a conditional of the form "if
>> -    * (vertex_count < MAX)"
>> -    */
>> -   unsigned num_output_vertices = c->gp->program.VerticesOut;
>> -   emit(CMP(dst_null_d(), this->vertex_count,
>> -            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
>> -   emit(IF(BRW_PREDICATE_NORMAL));
>>     {
>>        /* If we're outputting 32 control data bits or less, then we can wait
>>         * until the shader is over to output them all.  Otherwise we need to
>> @@ -562,12 +554,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
>>            this->current_annotation = "emit vertex: Stream control data bits";
>>            set_stream_control_data_bits(stream_id);
>>        }
>> -
>> -      this->current_annotation = "emit vertex: increment vertex count";
>> -      emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
>> -               src_reg(1u)));
>>     }
>> -   emit(BRW_OPCODE_ENDIF);
>>
>>     this->current_annotation = NULL;
>>  }
>> @@ -575,7 +562,22 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
>>  void
>>  vec4_gs_visitor::visit(ir_emit_vertex *ir)
>>  {
>> +   /* To ensure that we don't output more vertices than the shader specified
>> +    * using max_vertices, do the logic inside a conditional of the form "if
>> +    * (vertex_count < MAX)"
>> +    */
>> +   unsigned num_output_vertices = c->gp->program.VerticesOut;
>> +   emit(CMP(dst_null_d(), this->vertex_count,
>> +            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
>> +   emit(IF(BRW_PREDICATE_NORMAL));
>> +
>>     gs_emit_vertex(ir->stream_id());
>> +
>> +   this->current_annotation = "emit vertex: increment vertex count";
>> +   emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
>> +            src_reg(1u)));
>> +
>> +   emit(BRW_OPCODE_ENDIF);
>>  }
>>
>>  void
>> diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
>> index 68e443d..5cfff7b 100644
>> --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
>> @@ -149,19 +149,29 @@ gen6_gs_visitor::emit_prolog()
>>  void
>>  gen6_gs_visitor::visit(ir_emit_vertex *ir)
>>  {
>> +   /* To ensure that we don't output more vertices than the shader specified
>> +    * using max_vertices, do the logic inside a conditional of the form "if
>> +    * (vertex_count < MAX)"
>> +    */
>> +   unsigned num_output_vertices = c->gp->program.VerticesOut;
>> +   emit(CMP(dst_null_d(), this->vertex_count,
>> +            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
>> +   emit(IF(BRW_PREDICATE_NORMAL));
>> +
>>     gs_emit_vertex(ir->stream_id());
>> +
>> +   this->current_annotation = "emit vertex: increment vertex count";
>> +   emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
>> +            src_reg(1u)));
>> +
>> +   emit(BRW_OPCODE_ENDIF);
>>  }
>> +
>>  void
>>  gen6_gs_visitor::gs_emit_vertex(int stream_id)
>>  {
>>     this->current_annotation = "gen6 emit vertex";
>> -   /* Honor max_vertex layout indication in geometry shader by ignoring any
>> -    * vertices coming after c->gp->program.VerticesOut.
>> -    */
>> -   unsigned num_output_vertices = c->gp->program.VerticesOut;
>> -   emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices),
>> -            BRW_CONDITIONAL_L));
>> -   emit(IF(BRW_PREDICATE_NORMAL));
>> +
>>     {
>>        /* Buffer all output slots for this vertex in vertex_output */
>>        for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
>> @@ -219,11 +229,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
>>        }
>>        emit(ADD(dst_reg(this->vertex_output_offset),
>>                 this->vertex_output_offset, 1u));
>> -
>> -      /* Update vertex count */
>> -      emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u));
>>     }
>> -   emit(BRW_OPCODE_ENDIF);
>>  }
>>
>>  void
>> --
>> 2.5.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list