[Mesa-dev] [PATCH 28/37] i965/gen6/gs: implement transform feedback support in gen6_gs_visitor
Jordan Justen
jljusten at gmail.com
Thu Sep 18 16:05:48 PDT 2014
On Thu, Aug 14, 2014 at 4:12 AM, Iago Toral Quiroga <itoral at igalia.com> wrote:
> From: Samuel Iglesias Gonsalvez <siglesias at igalia.com>
>
> This takes care of generating code required to handle transform feedback.
> Notice that transform feedback isn't enabled yet, since that requires
> additional setups in other parts of the code that will come in later patches.
>
> Signed-off-by: Samuel Iglesias Gonsalvez <siglesias at igalia.com>
> ---
> src/mesa/drivers/dri/i965/brw_context.h | 113 ++++++----
> src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 309 +++++++++++++++++++++++++-
> src/mesa/drivers/dri/i965/gen6_gs_visitor.h | 14 ++
> 3 files changed, 391 insertions(+), 45 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 7439da1..3418b76 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -553,48 +553,6 @@ struct brw_vs_prog_data {
> bool uses_vertexid;
> };
>
> -
> -/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
> - * this struct!
> - */
> -struct brw_gs_prog_data
> -{
> - struct brw_vec4_prog_data base;
> -
> - /**
> - * Size of an output vertex, measured in HWORDS (32 bytes).
> - */
> - unsigned output_vertex_size_hwords;
> -
> - unsigned output_topology;
> -
> - /**
> - * Size of the control data (cut bits or StreamID bits), in hwords (32
> - * bytes). 0 if there is no control data.
> - */
> - unsigned control_data_header_size_hwords;
> -
> - /**
> - * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
> - * if the control data is StreamID bits, or
> - * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
> - * Ignored if control_data_header_size is 0.
> - */
> - unsigned control_data_format;
> -
> - bool include_primitive_id;
> -
> - int invocations;
> -
> - /**
> - * Dispatch mode, can be any of:
> - * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT
> - * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE
> - * GEN7_GS_DISPATCH_MODE_SINGLE
> - */
> - int dispatch_mode;
> -};
> -
> /** Number of texture sampler units */
> #define BRW_MAX_TEX_UNIT 32
>
> @@ -641,6 +599,77 @@ struct brw_gs_prog_data
> #define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
> #define BRW_MAX_GEN6_GS_SURFACES SURF_INDEX_GEN6_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
>
> +/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
> + * this struct!
> + */
> +struct brw_gs_prog_data
> +{
> + struct brw_vec4_prog_data base;
> +
> + /**
> + * Size of an output vertex, measured in HWORDS (32 bytes).
> + */
> + unsigned output_vertex_size_hwords;
> +
> + unsigned output_topology;
> +
> + /**
> + * Size of the control data (cut bits or StreamID bits), in hwords (32
> + * bytes). 0 if there is no control data.
> + */
> + unsigned control_data_header_size_hwords;
> +
> + /**
> + * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
> + * if the control data is StreamID bits, or
> + * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
> + * Ignored if control_data_header_size is 0.
> + */
> + unsigned control_data_format;
> +
> + bool include_primitive_id;
> +
> + int invocations;
> +
> + /**
> + * Dispatch mode, can be any of:
> + * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT
> + * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE
> + * GEN7_GS_DISPATCH_MODE_SINGLE
> + */
> + int dispatch_mode;
> +
> + /**
> + * Gen6 transform feedback enabled flag.
> + */
> + bool gen6_xfb_enabled;
> +
> + /**
> + * Gen6: Provoking vertex convention for odd-numbered triangles
> + * in tristrips.
> + */
> + GLuint pv_first:1;
> +
> + /**
> + * Gen6: Number of varyings that are output to transform feedback.
> + */
> + GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
> +
> + /**
> + * Gen6: Map from the index of a transform feedback binding table entry to the
> + * gl_varying_slot that should be streamed out through that binding table
> + * entry.
> + */
> + unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS];
> +
> + /**
> + * Gen6: Map from the index of a transform feedback binding table entry to the
> + * swizzles that should be used when streaming out data through that
> + * binding table entry.
> + */
> + unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS];
> +};
> +
> /**
> * Stride in bytes between shader_time entries.
> *
> diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
> index c1cfe75..b8eaa58 100644
> --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
> @@ -97,6 +97,45 @@ gen6_gs_visitor::emit_prolog()
> this->prim_count = src_reg(this, glsl_type::uint_type);
> emit(MOV(dst_reg(this->prim_count), 0u));
>
> + if (c->prog_data.gen6_xfb_enabled) {
> + const struct gl_transform_feedback_info *linked_xfb_info =
> + &this->shader_prog->LinkedTransformFeedback;
> +
> + /* Gen6 geometry shaders are required to ask for Streamed Vertex Buffer
> + * Indices values via FF_SYNC message, when Transform Feedback is
> + * enabled.
> + *
> + * To achieve this we buffer the Transform feedback outputs for each
> + * emitted vertex in xfb_output during operation. Then, when we have
> + * processed the last vertex (that is, at thread end time), we know all
> + * the required data for the FF_SYNC message header in order to receive
> + * the SVBI in the writeback.
> + *
> + * For each emitted vertex, xfb_output will hold
> + * num_transform_feedback_bindings data items plus one, which will
> + * indicate the end of the primitive. Next vertex's data comes right
> + * after.
> + */
> + this->xfb_output = src_reg(this,
> + glsl_type::uint_type,
> + linked_xfb_info->NumOutputs *
> + c->gp->program.VerticesOut);
> + this->xfb_output_offset = src_reg(this, glsl_type::uint_type);
> + emit(MOV(dst_reg(this->xfb_output_offset), src_reg(0u)));
> + /* Create a virtual register to hold destination indices in SOL */
> + this->destination_indices = src_reg(this, glsl_type::uvec4_type);
> + /* Create a virtual register to hold temporal values in SOL */
> + this->sol_temp = src_reg(this, glsl_type::uvec4_type);
What is the duration of liveness for sol_temp?
Would it be better to generate a new temp in each function to help out
register allocation?
-Jordan
> + /* Create a virtual register to hold number of written primitives */
> + this->sol_prim_written = src_reg(this, glsl_type::uint_type);
> + /* Create a virtual register to hold Streamed Vertex Buffer Indices */
> + this->svbi = src_reg(this, glsl_type::uvec4_type);
> + /* Create a virtual register to hold max values of SVBI */
> + this->max_svbi = src_reg(this, glsl_type::uvec4_type);
> + emit(MOV(dst_reg(this->max_svbi),
> + src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD))));
> + }
> +
> /* PrimitveID is delivered in r0.1 of the thread payload. If the program
> * needs it we have to move it to a separate register where we can map
> * the atttribute.
> @@ -134,6 +173,9 @@ gen6_gs_visitor::visit(ir_emit_vertex *)
> BRW_CONDITIONAL_L));
> emit(IF(BRW_PREDICATE_NORMAL));
> {
> + if (c->prog_data.gen6_xfb_enabled)
> + xfb_buffer_output();
> +
> /* Buffer all output slots for this vertex in vertex_output */
> for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
> /* We will handle PSIZ for each vertex at thread end time since it
> @@ -330,9 +372,21 @@ gen6_gs_visitor::emit_thread_end()
> emit(IF(BRW_PREDICATE_NORMAL));
> {
> this->current_annotation = "gen6 thread end: ff_sync";
> - emit(GS_OPCODE_FF_SYNC,
> - dst_reg(MRF, base_mrf), this->temp, this->prim_count,
> - brw_imm_ud(0u));
> +
> + if (c->prog_data.gen6_xfb_enabled) {
> + emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
> + dst_reg(this->svbi),
> + this->vertex_count,
> + this->prim_count,
> + this->sol_temp);
> + emit(GS_OPCODE_FF_SYNC,
> + dst_reg(MRF, base_mrf), this->temp, this->prim_count,
> + this->svbi);
> + } else {
> + emit(GS_OPCODE_FF_SYNC,
> + dst_reg(MRF, base_mrf), this->temp, this->prim_count,
> + brw_imm_ud(0u));
> + }
>
> /* Loop over all buffered vertices and emit URB write messages */
> this->current_annotation = "gen6 thread end: urb writes init";
> @@ -412,6 +466,9 @@ gen6_gs_visitor::emit_thread_end()
> emit(ADD(dst_reg(vertex), vertex, 1u));
> }
> emit(BRW_OPCODE_WHILE);
> +
> + if (c->prog_data.gen6_xfb_enabled)
> + xfb_write();
> }
> emit(BRW_OPCODE_ENDIF);
>
> @@ -431,6 +488,15 @@ gen6_gs_visitor::emit_thread_end()
> * the EOT message.
> */
> this->current_annotation = "gen6 thread end: EOT";
> +
> + if (c->prog_data.gen6_xfb_enabled) {
> + /* When emitting EOT, set SONumPrimsWritten Increment Value. */
> + src_reg data(this, glsl_type::uint_type);
> + emit(AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu)));
> + emit(SHL(dst_reg(data), data, brw_imm_ud(16u)));
> + emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data);
> + }
> +
> vec4_instruction *inst = emit(GS_OPCODE_THREAD_END);
> inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED;
> inst->base_mrf = base_mrf;
> @@ -478,4 +544,241 @@ gen6_gs_visitor::setup_payload()
> this->first_non_payload_grf = reg;
> }
>
> +void
> +gen6_gs_visitor::xfb_buffer_output()
> +{
> + static const unsigned swizzle_for_offset[4] = {
> + BRW_SWIZZLE4(0, 1, 2, 3),
> + BRW_SWIZZLE4(1, 2, 3, 3),
> + BRW_SWIZZLE4(2, 3, 3, 3),
> + BRW_SWIZZLE4(3, 3, 3, 3)
> + };
> +
> + struct brw_gs_prog_data *prog_data =
> + (struct brw_gs_prog_data *) &c->prog_data;
> +
> + if (!prog_data->num_transform_feedback_bindings) {
> + const struct gl_transform_feedback_info *linked_xfb_info =
> + &this->shader_prog->LinkedTransformFeedback;
> + int i;
> +
> + /* Make sure that the VUE slots won't overflow the unsigned chars in
> + * prog_data->transform_feedback_bindings[].
> + */
> + STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
> +
> + /* Make sure that we don't need more binding table entries than we've
> + * set aside for use in transform feedback. (We shouldn't, since we
> + * set aside enough binding table entries to have one per component).
> + */
> + assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
> +
> + prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
> + for (i = 0; i < prog_data->num_transform_feedback_bindings; i++) {
> + prog_data->transform_feedback_bindings[i] =
> + linked_xfb_info->Outputs[i].OutputRegister;
> + prog_data->transform_feedback_swizzles[i] =
> + swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
> + }
> + }
> +
> + /* Buffer all TF outputs for this vertex in xfb_output */
> + for (int binding = 0; binding < prog_data->num_transform_feedback_bindings;
> + binding++) {
> + /* We will handle PSIZ for each vertex at thread end time since it
> + * is not computed by the GS algorithm and requires specific handling.
> + */
> + unsigned varying =
> + prog_data->transform_feedback_bindings[binding];
> + if (varying != VARYING_SLOT_PSIZ) {
> + dst_reg dst(this->xfb_output);
> + dst.reladdr = ralloc(mem_ctx, src_reg);
> + memcpy(dst.reladdr, &this->xfb_output_offset, sizeof(src_reg));
> + dst.type = output_reg[varying].type;
> +
> + this->current_annotation = output_reg_annotation[varying];
> + src_reg out_reg = src_reg(output_reg[varying]);
> + out_reg.swizzle = prog_data->transform_feedback_swizzles[binding];
> + emit(MOV(dst, out_reg));
> + }
> + emit(ADD(dst_reg(this->xfb_output_offset), this->xfb_output_offset, 1u));
> + }
> +}
> +
> +void
> +gen6_gs_visitor::xfb_write()
> +{
> + unsigned num_verts;
> + struct brw_gs_prog_data *prog_data =
> + (struct brw_gs_prog_data *) &c->prog_data;
> +
> + if (!prog_data->num_transform_feedback_bindings)
> + return;
> +
> + switch (c->prog_data.output_topology) {
> + case _3DPRIM_POINTLIST:
> + num_verts = 1;
> + break;
> + case _3DPRIM_LINELIST:
> + case _3DPRIM_LINESTRIP:
> + case _3DPRIM_LINELOOP:
> + num_verts = 2;
> + break;
> + case _3DPRIM_TRILIST:
> + case _3DPRIM_TRIFAN:
> + case _3DPRIM_TRISTRIP:
> + case _3DPRIM_RECTLIST:
> + num_verts = 3;
> + break;
> + case _3DPRIM_QUADLIST:
> + case _3DPRIM_QUADSTRIP:
> + case _3DPRIM_POLYGON:
> + num_verts = 3;
> + break;
> + default:
> + unreachable("Unexpected primitive type in Gen6 SOL program.");
> + }
> +
> + this->current_annotation = "gen6 thread end: svb writes init";
> +
> + emit(MOV(dst_reg(this->xfb_output_offset), 0u));
> + emit(MOV(dst_reg(this->sol_prim_written), 0u));
> +
> + /* Check that at least one primitive can be written
> + *
> + * Note: since we use the binding table to keep track of buffer offsets
> + * and stride, the GS doesn't need to keep track of a separate pointer
> + * into each buffer; it uses a single pointer which increments by 1 for
> + * each vertex. So we use SVBI0 for this pointer, regardless of whether
> + * transform feedback is in interleaved or separate attribs mode.
> + */
> + emit(ADD(dst_reg(this->sol_temp), this->svbi, brw_imm_ud(num_verts)));
> +
> + /* Compare SVBI calculated number with the maximum value, which is
> + * in R1.4 (previously saved in this->max_svbi) for gen6.
> + */
> + emit(CMP(dst_null_d(), this->sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
> + emit(IF(BRW_PREDICATE_NORMAL));
> + {
> + struct src_reg destination_indices_uw =
> + retype(destination_indices, BRW_REGISTER_TYPE_UW);
> +
> + vec4_instruction *inst = emit(MOV(dst_reg(destination_indices_uw),
> + brw_imm_v(0x00020100))); /* (0, 1, 2) */
> + inst->force_writemask_all = true;
> +
> + emit(ADD(dst_reg(this->destination_indices),
> + this->destination_indices,
> + this->svbi));
> + }
> + emit(BRW_OPCODE_ENDIF);
> +
> + this->current_vertex = 0;
> + /* Make sure we do not emit more transform feedback data than the amount
> + * we have buffered.
> + */
> + for (int i = 0; i < c->gp->program.VerticesOut; i++) {
> + emit(MOV(dst_reg(this->sol_temp), i));
> + emit(CMP(dst_null_d(), this->sol_temp, this->vertex_count,
> + BRW_CONDITIONAL_L));
> + emit(IF(BRW_PREDICATE_NORMAL));
> + {
> + xfb_program(num_verts);
> + }
> + emit(BRW_OPCODE_ENDIF);
> + }
> +}
> +
> +void
> +gen6_gs_visitor::xfb_program(unsigned num_verts)
> +{
> + struct brw_gs_prog_data *prog_data =
> + (struct brw_gs_prog_data *) &c->prog_data;
> + unsigned binding;
> + unsigned num_bindings = prog_data->num_transform_feedback_bindings;
> +
> + /* Check if we can write one primitive more */
> + emit(ADD(dst_reg(this->sol_temp), this->sol_prim_written, 1u));
> + emit(MUL(dst_reg(this->sol_temp), this->sol_temp, brw_imm_ud(num_verts)));
> + emit(ADD(dst_reg(this->sol_temp), this->sol_temp, this->svbi));
> + emit(CMP(dst_null_d(), this->sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
> + emit(IF(BRW_PREDICATE_NORMAL));
> + {
> + if (this->current_vertex >= num_verts)
> + this->current_vertex = 0;
> +
> + /* Avoid overwriting MRF 1 as it is used as URB write message header */
> + dst_reg mrf_reg(MRF, 2);
> +
> + this->current_annotation = "gen6: emit SOL vertex data";
> + /* For each vertex, generate code to output each varying using the
> + * appropriate binding table entry.
> + */
> + for (binding = 0; binding < num_bindings; ++binding) {
> + /* Set up the correct destination index for this vertex */
> + vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX,
> + mrf_reg,
> + this->destination_indices);
> + inst->sol_vertex = this->current_vertex;
> +
> + unsigned char varying =
> + prog_data->transform_feedback_bindings[binding];
> +
> + /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
> + *
> + * "Prior to End of Thread with a URB_WRITE, the kernel must
> + * ensure that all writes are complete by sending the final
> + * write as a committed write."
> + */
> + bool final_write = binding == (unsigned) num_bindings - 1 &&
> + this->current_vertex == num_verts - 1;
> +
> + /* Compute offset of this varying for the current vertex
> + * in xfb_output
> + */
> + src_reg data(this->xfb_output);
> + data.reladdr = ralloc(mem_ctx, src_reg);
> + memcpy(data.reladdr, &this->xfb_output_offset, sizeof(src_reg));
> + src_reg out_reg;
> + this->current_annotation = output_reg_annotation[varying];
> +
> + if (varying == VARYING_SLOT_PSIZ) {
> + /* We did not buffer PSIZ, emit it directly here */
> + out_reg = src_reg(output_reg[varying]);
> + out_reg.swizzle = BRW_SWIZZLE_WWWW;
> + } else {
> + /* Copy this varying to the appropriate message register */
> + out_reg = src_reg(this, glsl_type::uvec4_type);
> + out_reg.type = output_reg[varying].type;
> +
> + data.type = output_reg[varying].type;
> + emit(MOV(dst_reg(out_reg), data));
> + }
> +
> + /* Write data and send SVB Write */
> + inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, out_reg, this->sol_temp);
> + inst->sol_binding = binding;
> + inst->sol_final_write = final_write;
> +
> + emit(ADD(dst_reg(this->xfb_output_offset),
> + this->xfb_output_offset, 1u));
> +
> + if (final_write) {
> + /* This is the last vertex of the primitive, then increment
> + * SO num primitive counter and destination indices.
> + */
> + emit(ADD(dst_reg(this->destination_indices),
> + this->destination_indices,
> + brw_imm_ud(num_verts)));
> + emit(ADD(dst_reg(this->sol_prim_written),
> + this->sol_prim_written, 1u));
> + }
> +
> + }
> + this->current_vertex++;
> + this->current_annotation = NULL;
> + }
> + emit(BRW_OPCODE_ENDIF);
> +}
> +
> } /* namespace brw */
> diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
> index 8d2386c..595a46f 100644
> --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
> +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
> @@ -55,12 +55,26 @@ protected:
> virtual void setup_payload();
>
> private:
> + void xfb_write();
> + void xfb_buffer_output();
> + void xfb_program(unsigned num_verts);
> +
> src_reg vertex_output;
> src_reg vertex_output_offset;
> src_reg temp;
> src_reg first_vertex;
> src_reg prim_count;
> src_reg primitive_id;
> +
> + /* Transform Feedback members */
> + src_reg xfb_output;
> + src_reg xfb_output_offset;
> + src_reg sol_temp;
> + src_reg sol_prim_written;
> + src_reg svbi;
> + src_reg max_svbi;
> + src_reg destination_indices;
> + unsigned current_vertex;
> };
>
> } /* namespace brw */
> --
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list