[Mesa-dev] [PATCH 28/37] i965/gen6/gs: implement transform feedback support in gen6_gs_visitor

Jordan Justen jljusten at gmail.com
Thu Sep 18 16:05:48 PDT 2014


On Thu, Aug 14, 2014 at 4:12 AM, Iago Toral Quiroga <itoral at igalia.com> wrote:
> From: Samuel Iglesias Gonsalvez <siglesias at igalia.com>
>
> This takes care of generating code required to handle transform feedback.
> Notice that transform feedback isn't enabled yet, since that requires
> additional setups in other parts of the code that will come in later patches.
>
> Signed-off-by: Samuel Iglesias Gonsalvez <siglesias at igalia.com>
> ---
>  src/mesa/drivers/dri/i965/brw_context.h       | 113 ++++++----
>  src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 309 +++++++++++++++++++++++++-
>  src/mesa/drivers/dri/i965/gen6_gs_visitor.h   |  14 ++
>  3 files changed, 391 insertions(+), 45 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 7439da1..3418b76 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -553,48 +553,6 @@ struct brw_vs_prog_data {
>     bool uses_vertexid;
>  };
>
> -
> -/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
> - * this struct!
> - */
> -struct brw_gs_prog_data
> -{
> -   struct brw_vec4_prog_data base;
> -
> -   /**
> -    * Size of an output vertex, measured in HWORDS (32 bytes).
> -    */
> -   unsigned output_vertex_size_hwords;
> -
> -   unsigned output_topology;
> -
> -   /**
> -    * Size of the control data (cut bits or StreamID bits), in hwords (32
> -    * bytes).  0 if there is no control data.
> -    */
> -   unsigned control_data_header_size_hwords;
> -
> -   /**
> -    * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
> -    * if the control data is StreamID bits, or
> -    * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
> -    * Ignored if control_data_header_size is 0.
> -    */
> -   unsigned control_data_format;
> -
> -   bool include_primitive_id;
> -
> -   int invocations;
> -
> -   /**
> -    * Dispatch mode, can be any of:
> -    * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT
> -    * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE
> -    * GEN7_GS_DISPATCH_MODE_SINGLE
> -    */
> -   int dispatch_mode;
> -};
> -
>  /** Number of texture sampler units */
>  #define BRW_MAX_TEX_UNIT 32
>
> @@ -641,6 +599,77 @@ struct brw_gs_prog_data
>  #define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
>  #define BRW_MAX_GEN6_GS_SURFACES       SURF_INDEX_GEN6_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
>
> +/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
> + * this struct!
> + */
> +struct brw_gs_prog_data
> +{
> +   struct brw_vec4_prog_data base;
> +
> +   /**
> +    * Size of an output vertex, measured in HWORDS (32 bytes).
> +    */
> +   unsigned output_vertex_size_hwords;
> +
> +   unsigned output_topology;
> +
> +   /**
> +    * Size of the control data (cut bits or StreamID bits), in hwords (32
> +    * bytes).  0 if there is no control data.
> +    */
> +   unsigned control_data_header_size_hwords;
> +
> +   /**
> +    * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
> +    * if the control data is StreamID bits, or
> +    * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
> +    * Ignored if control_data_header_size is 0.
> +    */
> +   unsigned control_data_format;
> +
> +   bool include_primitive_id;
> +
> +   int invocations;
> +
> +   /**
> +    * Dispatch mode, can be any of:
> +    * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT
> +    * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE
> +    * GEN7_GS_DISPATCH_MODE_SINGLE
> +    */
> +   int dispatch_mode;
> +
> +   /**
> +    * Gen6 transform feedback enabled flag.
> +    */
> +   bool gen6_xfb_enabled;
> +
> +   /**
> +    * Gen6: Provoking vertex convention for odd-numbered triangles
> +    * in tristrips.
> +    */
> +   GLuint pv_first:1;
> +
> +   /**
> +    * Gen6: Number of varyings that are output to transform feedback.
> +    */
> +   GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
> +
> +   /**
> +    * Gen6: Map from the index of a transform feedback binding table entry to the
> +    * gl_varying_slot that should be streamed out through that binding table
> +    * entry.
> +    */
> +   unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS];
> +
> +   /**
> +    * Gen6: Map from the index of a transform feedback binding table entry to the
> +    * swizzles that should be used when streaming out data through that
> +    * binding table entry.
> +    */
> +   unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS];
> +};
> +
>  /**
>   * Stride in bytes between shader_time entries.
>   *
> diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
> index c1cfe75..b8eaa58 100644
> --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
> @@ -97,6 +97,45 @@ gen6_gs_visitor::emit_prolog()
>     this->prim_count = src_reg(this, glsl_type::uint_type);
>     emit(MOV(dst_reg(this->prim_count), 0u));
>
> +   if (c->prog_data.gen6_xfb_enabled) {
> +      const struct gl_transform_feedback_info *linked_xfb_info =
> +         &this->shader_prog->LinkedTransformFeedback;
> +
> +      /* Gen6 geometry shaders are required to ask for Streamed Vertex Buffer
> +       * Indices values via FF_SYNC message, when Transform Feedback is
> +       * enabled.
> +       *
> +       * To achieve this we buffer the Transform feedback outputs for each
> +       * emitted vertex in xfb_output during operation. Then, when we have
> +       * processed the last vertex (that is, at thread end time), we know all
> +       * the required data for the FF_SYNC message header in order to receive
> +       * the SVBI in the writeback.
> +       *
> +       * For each emitted vertex, xfb_output will hold
> +       * num_transform_feedback_bindings data items plus one, which will
> +       * indicate the end of the primitive. Next vertex's data comes right
> +       * after.
> +       */
> +      this->xfb_output = src_reg(this,
> +                                 glsl_type::uint_type,
> +                                 linked_xfb_info->NumOutputs *
> +                                 c->gp->program.VerticesOut);
> +      this->xfb_output_offset = src_reg(this, glsl_type::uint_type);
> +      emit(MOV(dst_reg(this->xfb_output_offset), src_reg(0u)));
> +      /* Create a virtual register to hold destination indices in SOL */
> +      this->destination_indices = src_reg(this, glsl_type::uvec4_type);
> +      /* Create a virtual register to hold temporal values in SOL */
> +      this->sol_temp = src_reg(this, glsl_type::uvec4_type);

What is the duration of liveness for sol_temp?

Would it be better to generate a new temp in each function to help out
register allocation?

-Jordan

> +      /* Create a virtual register to hold number of written primitives */
> +      this->sol_prim_written = src_reg(this, glsl_type::uint_type);
> +      /* Create a virtual register to hold Streamed Vertex Buffer Indices */
> +      this->svbi = src_reg(this, glsl_type::uvec4_type);
> +      /* Create a virtual register to hold max values of SVBI */
> +      this->max_svbi = src_reg(this, glsl_type::uvec4_type);
> +      emit(MOV(dst_reg(this->max_svbi),
> +               src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD))));
> +   }
> +
>     /* PrimitveID is delivered in r0.1 of the thread payload. If the program
>      * needs it we have to move it to a separate register where we can map
>      * the atttribute.
> @@ -134,6 +173,9 @@ gen6_gs_visitor::visit(ir_emit_vertex *)
>              BRW_CONDITIONAL_L));
>     emit(IF(BRW_PREDICATE_NORMAL));
>     {
> +      if (c->prog_data.gen6_xfb_enabled)
> +         xfb_buffer_output();
> +
>        /* Buffer all output slots for this vertex in vertex_output */
>        for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
>           /* We will handle PSIZ for each vertex at thread end time since it
> @@ -330,9 +372,21 @@ gen6_gs_visitor::emit_thread_end()
>     emit(IF(BRW_PREDICATE_NORMAL));
>     {
>        this->current_annotation = "gen6 thread end: ff_sync";
> -      emit(GS_OPCODE_FF_SYNC,
> -           dst_reg(MRF, base_mrf), this->temp, this->prim_count,
> -           brw_imm_ud(0u));
> +
> +      if (c->prog_data.gen6_xfb_enabled) {
> +         emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
> +              dst_reg(this->svbi),
> +              this->vertex_count,
> +              this->prim_count,
> +              this->sol_temp);
> +         emit(GS_OPCODE_FF_SYNC,
> +              dst_reg(MRF, base_mrf), this->temp, this->prim_count,
> +              this->svbi);
> +      } else {
> +         emit(GS_OPCODE_FF_SYNC,
> +              dst_reg(MRF, base_mrf), this->temp, this->prim_count,
> +              brw_imm_ud(0u));
> +      }
>
>        /* Loop over all buffered vertices and emit URB write messages */
>        this->current_annotation = "gen6 thread end: urb writes init";
> @@ -412,6 +466,9 @@ gen6_gs_visitor::emit_thread_end()
>           emit(ADD(dst_reg(vertex), vertex, 1u));
>        }
>        emit(BRW_OPCODE_WHILE);
> +
> +      if (c->prog_data.gen6_xfb_enabled)
> +         xfb_write();
>     }
>     emit(BRW_OPCODE_ENDIF);
>
> @@ -431,6 +488,15 @@ gen6_gs_visitor::emit_thread_end()
>      * the EOT message.
>      */
>     this->current_annotation = "gen6 thread end: EOT";
> +
> +   if (c->prog_data.gen6_xfb_enabled) {
> +      /* When emitting EOT, set SONumPrimsWritten Increment Value. */
> +      src_reg data(this, glsl_type::uint_type);
> +      emit(AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu)));
> +      emit(SHL(dst_reg(data), data, brw_imm_ud(16u)));
> +      emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data);
> +   }
> +
>     vec4_instruction *inst = emit(GS_OPCODE_THREAD_END);
>     inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED;
>     inst->base_mrf = base_mrf;
> @@ -478,4 +544,241 @@ gen6_gs_visitor::setup_payload()
>     this->first_non_payload_grf = reg;
>  }
>
> +void
> +gen6_gs_visitor::xfb_buffer_output()
> +{
> +   static const unsigned swizzle_for_offset[4] = {
> +      BRW_SWIZZLE4(0, 1, 2, 3),
> +      BRW_SWIZZLE4(1, 2, 3, 3),
> +      BRW_SWIZZLE4(2, 3, 3, 3),
> +      BRW_SWIZZLE4(3, 3, 3, 3)
> +   };
> +
> +   struct brw_gs_prog_data *prog_data =
> +      (struct brw_gs_prog_data *) &c->prog_data;
> +
> +   if (!prog_data->num_transform_feedback_bindings) {
> +      const struct gl_transform_feedback_info *linked_xfb_info =
> +         &this->shader_prog->LinkedTransformFeedback;
> +      int i;
> +
> +      /* Make sure that the VUE slots won't overflow the unsigned chars in
> +       * prog_data->transform_feedback_bindings[].
> +       */
> +      STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
> +
> +      /* Make sure that we don't need more binding table entries than we've
> +       * set aside for use in transform feedback.  (We shouldn't, since we
> +       * set aside enough binding table entries to have one per component).
> +       */
> +      assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
> +
> +      prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
> +      for (i = 0; i < prog_data->num_transform_feedback_bindings; i++) {
> +         prog_data->transform_feedback_bindings[i] =
> +            linked_xfb_info->Outputs[i].OutputRegister;
> +         prog_data->transform_feedback_swizzles[i] =
> +            swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
> +      }
> +   }
> +
> +   /* Buffer all TF outputs for this vertex in xfb_output */
> +   for (int binding = 0; binding < prog_data->num_transform_feedback_bindings;
> +        binding++) {
> +      /* We will handle PSIZ for each vertex at thread end time since it
> +       * is not computed by the GS algorithm and requires specific handling.
> +       */
> +      unsigned varying =
> +         prog_data->transform_feedback_bindings[binding];
> +      if (varying != VARYING_SLOT_PSIZ) {
> +         dst_reg dst(this->xfb_output);
> +         dst.reladdr = ralloc(mem_ctx, src_reg);
> +         memcpy(dst.reladdr, &this->xfb_output_offset, sizeof(src_reg));
> +         dst.type = output_reg[varying].type;
> +
> +         this->current_annotation = output_reg_annotation[varying];
> +         src_reg out_reg = src_reg(output_reg[varying]);
> +         out_reg.swizzle = prog_data->transform_feedback_swizzles[binding];
> +         emit(MOV(dst, out_reg));
> +      }
> +      emit(ADD(dst_reg(this->xfb_output_offset), this->xfb_output_offset, 1u));
> +   }
> +}
> +
> +void
> +gen6_gs_visitor::xfb_write()
> +{
> +   unsigned num_verts;
> +   struct brw_gs_prog_data *prog_data =
> +      (struct brw_gs_prog_data *) &c->prog_data;
> +
> +   if (!prog_data->num_transform_feedback_bindings)
> +      return;
> +
> +   switch (c->prog_data.output_topology) {
> +   case _3DPRIM_POINTLIST:
> +      num_verts = 1;
> +      break;
> +   case _3DPRIM_LINELIST:
> +   case _3DPRIM_LINESTRIP:
> +   case _3DPRIM_LINELOOP:
> +      num_verts = 2;
> +      break;
> +   case _3DPRIM_TRILIST:
> +   case _3DPRIM_TRIFAN:
> +   case _3DPRIM_TRISTRIP:
> +   case _3DPRIM_RECTLIST:
> +      num_verts = 3;
> +      break;
> +   case _3DPRIM_QUADLIST:
> +   case _3DPRIM_QUADSTRIP:
> +   case _3DPRIM_POLYGON:
> +      num_verts = 3;
> +      break;
> +   default:
> +      unreachable("Unexpected primitive type in Gen6 SOL program.");
> +   }
> +
> +   this->current_annotation = "gen6 thread end: svb writes init";
> +
> +   emit(MOV(dst_reg(this->xfb_output_offset), 0u));
> +   emit(MOV(dst_reg(this->sol_prim_written), 0u));
> +
> +   /* Check that at least one primitive can be written
> +    *
> +    * Note: since we use the binding table to keep track of buffer offsets
> +    * and stride, the GS doesn't need to keep track of a separate pointer
> +    * into each buffer; it uses a single pointer which increments by 1 for
> +    * each vertex.  So we use SVBI0 for this pointer, regardless of whether
> +    * transform feedback is in interleaved or separate attribs mode.
> +    */
> +   emit(ADD(dst_reg(this->sol_temp), this->svbi, brw_imm_ud(num_verts)));
> +
> +   /* Compare SVBI calculated number with the maximum value, which is
> +    * in R1.4 (previously saved in this->max_svbi) for gen6.
> +    */
> +   emit(CMP(dst_null_d(), this->sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
> +   emit(IF(BRW_PREDICATE_NORMAL));
> +   {
> +      struct src_reg destination_indices_uw =
> +         retype(destination_indices, BRW_REGISTER_TYPE_UW);
> +
> +      vec4_instruction *inst = emit(MOV(dst_reg(destination_indices_uw),
> +                                        brw_imm_v(0x00020100))); /* (0, 1, 2) */
> +      inst->force_writemask_all = true;
> +
> +      emit(ADD(dst_reg(this->destination_indices),
> +               this->destination_indices,
> +               this->svbi));
> +   }
> +   emit(BRW_OPCODE_ENDIF);
> +
> +   this->current_vertex = 0;
> +   /* Make sure we do not emit more transform feedback data than the amount
> +    * we have buffered.
> +    */
> +   for (int i = 0; i < c->gp->program.VerticesOut; i++) {
> +      emit(MOV(dst_reg(this->sol_temp), i));
> +      emit(CMP(dst_null_d(), this->sol_temp, this->vertex_count,
> +               BRW_CONDITIONAL_L));
> +      emit(IF(BRW_PREDICATE_NORMAL));
> +      {
> +         xfb_program(num_verts);
> +      }
> +      emit(BRW_OPCODE_ENDIF);
> +   }
> +}
> +
> +void
> +gen6_gs_visitor::xfb_program(unsigned num_verts)
> +{
> +   struct brw_gs_prog_data *prog_data =
> +      (struct brw_gs_prog_data *) &c->prog_data;
> +   unsigned binding;
> +   unsigned num_bindings = prog_data->num_transform_feedback_bindings;
> +
> +   /* Check if we can write one primitive more */
> +   emit(ADD(dst_reg(this->sol_temp), this->sol_prim_written, 1u));
> +   emit(MUL(dst_reg(this->sol_temp), this->sol_temp, brw_imm_ud(num_verts)));
> +   emit(ADD(dst_reg(this->sol_temp), this->sol_temp, this->svbi));
> +   emit(CMP(dst_null_d(), this->sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
> +   emit(IF(BRW_PREDICATE_NORMAL));
> +   {
> +      if (this->current_vertex >= num_verts)
> +         this->current_vertex = 0;
> +
> +      /* Avoid overwriting MRF 1 as it is used as URB write message header */
> +      dst_reg mrf_reg(MRF, 2);
> +
> +      this->current_annotation = "gen6: emit SOL vertex data";
> +      /* For each vertex, generate code to output each varying using the
> +       * appropriate binding table entry.
> +       */
> +      for (binding = 0; binding < num_bindings; ++binding) {
> +         /* Set up the correct destination index for this vertex */
> +         vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX,
> +                                       mrf_reg,
> +                                       this->destination_indices);
> +         inst->sol_vertex = this->current_vertex;
> +
> +         unsigned char varying =
> +            prog_data->transform_feedback_bindings[binding];
> +
> +         /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
> +          *
> +          *   "Prior to End of Thread with a URB_WRITE, the kernel must
> +          *   ensure that all writes are complete by sending the final
> +          *   write as a committed write."
> +          */
> +         bool final_write = binding == (unsigned) num_bindings - 1 &&
> +                            this->current_vertex == num_verts - 1;
> +
> +         /* Compute offset of this varying for the current vertex
> +          * in xfb_output
> +          */
> +         src_reg data(this->xfb_output);
> +         data.reladdr = ralloc(mem_ctx, src_reg);
> +         memcpy(data.reladdr, &this->xfb_output_offset, sizeof(src_reg));
> +         src_reg out_reg;
> +         this->current_annotation = output_reg_annotation[varying];
> +
> +         if (varying == VARYING_SLOT_PSIZ) {
> +            /* We did not buffer PSIZ, emit it directly here */
> +            out_reg = src_reg(output_reg[varying]);
> +            out_reg.swizzle = BRW_SWIZZLE_WWWW;
> +         } else {
> +            /* Copy this varying to the appropriate message register */
> +            out_reg = src_reg(this, glsl_type::uvec4_type);
> +            out_reg.type = output_reg[varying].type;
> +
> +            data.type = output_reg[varying].type;
> +            emit(MOV(dst_reg(out_reg), data));
> +         }
> +
> +         /* Write data and send SVB Write */
> +         inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, out_reg, this->sol_temp);
> +         inst->sol_binding = binding;
> +         inst->sol_final_write = final_write;
> +
> +         emit(ADD(dst_reg(this->xfb_output_offset),
> +                  this->xfb_output_offset, 1u));
> +
> +         if (final_write) {
> +            /* This is the last vertex of the primitive, then increment
> +             * SO num primitive counter and destination indices.
> +             */
> +            emit(ADD(dst_reg(this->destination_indices),
> +                     this->destination_indices,
> +                     brw_imm_ud(num_verts)));
> +            emit(ADD(dst_reg(this->sol_prim_written),
> +                     this->sol_prim_written, 1u));
> +         }
> +
> +      }
> +      this->current_vertex++;
> +      this->current_annotation = NULL;
> +   }
> +   emit(BRW_OPCODE_ENDIF);
> +}
> +
>  } /* namespace brw */
> diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
> index 8d2386c..595a46f 100644
> --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
> +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
> @@ -55,12 +55,26 @@ protected:
>     virtual void setup_payload();
>
>  private:
> +   void xfb_write();
> +   void xfb_buffer_output();
> +   void xfb_program(unsigned num_verts);
> +
>     src_reg vertex_output;
>     src_reg vertex_output_offset;
>     src_reg temp;
>     src_reg first_vertex;
>     src_reg prim_count;
>     src_reg primitive_id;
> +
> +   /* Transform Feedback members */
> +   src_reg xfb_output;
> +   src_reg xfb_output_offset;
> +   src_reg sol_temp;
> +   src_reg sol_prim_written;
> +   src_reg svbi;
> +   src_reg max_svbi;
> +   src_reg destination_indices;
> +   unsigned current_vertex;
>  };
>
>  } /* namespace brw */
> --
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list