[Mesa-dev] [PATCH 4/9] i965/gs: Set control data header size/format appropriately for EndPrimitive().
Kenneth Graunke
kenneth at whitecape.org
Mon Sep 9 17:56:46 PDT 2013
On 09/09/2013 08:20 AM, Paul Berry wrote:
> The gen7 geometry shader uses a "control data header" at the beginning
> of the output URB entry to store either
>
> (a) flag bits (1 bit/vertex) indicating whether EndPrimitive() was
> called after each vertex, or
>
> (b) stream ID bits (2 bits/vertex) indicating which stream each vertex
> should be sent to (when multiple transform feedback streams are in
> use).
>
> Fortunately, OpenGL only requires separate streams to be supported
> when the output type is points, and EndPrimitive() only has an effect
> when the input type is line_strip or triangle_strip, so it's not a
> problem that these two uses of the control data header are mutually
> exclusive.
>
> This patch modifies do_vec4_gs_prog() to determine the correct
> hardware settings for configuring the control data header, and
> modifies upload_gs_state() to propagate these settings to the
> hardware.
>
> In addition, it modifies do_vec4_gs_prog() to ensure that the output
> URB entry is large enough to contain both the output vertices *and*
> the control data header.
>
> Finally, it modifies vec4_gs_visitor so that it accounts for the size
> of the control data header when computing the offset within the URB
> where output vertex data should be stored.
> ---
> src/mesa/drivers/dri/i965/brw_context.h | 14 ++++++++++
> src/mesa/drivers/dri/i965/brw_defines.h | 4 +++
> src/mesa/drivers/dri/i965/brw_vec4_gs.c | 33 +++++++++++++++++++++++
> src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 1 +
> src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h | 3 +++
> src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +-
> src/mesa/drivers/dri/i965/gen7_gs_state.c | 4 +++
> 7 files changed, 60 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 57f086b..c566bba 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -548,6 +548,20 @@ struct brw_gs_prog_data
> unsigned output_vertex_size_hwords;
>
> unsigned output_topology;
> +
> + /**
> + * Size of the control data (cut bits or StreamID bits), in hwords (32
> + * bytes). 0 if there is no control data.
> + */
> + unsigned control_data_header_size_hwords;
> +
> + /**
> + * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
> + * if the control data is StreamID bits, or
> + * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
> + * Ignored if control_data_header_size is 0.
> + */
> + unsigned control_data_format;
> };
>
> /** Number of texture sampler units */
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 0406c4d..6db2570 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1337,6 +1337,10 @@ enum brw_message_target {
> /* DW5 */
> # define GEN6_GS_MAX_THREADS_SHIFT 25
> # define HSW_GS_MAX_THREADS_SHIFT 24
> +# define GEN7_GS_CONTROL_DATA_FORMAT_SHIFT 24
> +# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT 0
> +# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1
> +# define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT 20
This won't work for Haswell (note the overlap GSCTL and MAX_THREADS).
Apparently GSCTL is stored in DW6 at bit 31 on Haswell.
I think it probably makes sense to address that in this patch.
Otherwise this looks fine.
> # define GEN7_GS_DISPATCH_MODE_SINGLE (0 << 11)
> # define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1 << 11)
> # define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 << 11)
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> index 7ab03ac..f67ae2b 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> @@ -62,6 +62,38 @@ do_gs_prog(struct brw_context *brw,
> c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
> c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count);
>
> + if (gp->program.OutputType == GL_POINTS) {
> + /* When the output type is points, the geometry shader may output data
> + * to multiple streams, and EndPrimitive() has no effect. So we
> + * configure the hardware to interpret the control data as stream ID.
> + */
> + c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
> +
> + /* However, StreamID is not yet supported, so we output zero bits of
> + * control data per vertex.
> + */
> + c.control_data_bits_per_vertex = 0;
> + } else {
> + /* When the output type is triangle_strip or line_strip, EndPrimitive()
> + * may be used to terminate the current strip and start a new one
> + * (similar to primitive restart), and outputting data to multiple
> + * streams is not supported. So we configure the hardware to interpret
> + * the control data as EndPrimitive information (a.k.a. "cut bits").
> + */
> + c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
> +
> + /* We only need to output control data if the shader actually calls
> + * EndPrimitive().
> + */
> + c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
> + }
> + c.control_data_header_size_bits =
> + gp->program.VerticesOut * c.control_data_bits_per_vertex;
> +
> + /* 1 HWORD = 32 bytes = 256 bits */
> + c.prog_data.control_data_header_size_hwords =
> + ALIGN(c.control_data_header_size_bits, 256) / 256;
> +
> brw_compute_vue_map(brw, &c.prog_data.base.vue_map,
> gp->program.Base.OutputsWritten,
> c.key.base.userclip_active);
> @@ -148,6 +180,7 @@ do_gs_prog(struct brw_context *brw,
> */
> unsigned output_size_bytes =
> c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
> + output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
>
> assert(output_size_bytes >= 1);
> if (output_size_bytes > GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES)
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> index d82a26e..37cde64 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> @@ -200,6 +200,7 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
> (void) complete;
>
> vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE);
> + inst->offset = c->prog_data.control_data_header_size_hwords;
> inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
> return inst;
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
> index fba0ac6..1193e28 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
> @@ -51,6 +51,9 @@ struct brw_gs_compile
> struct brw_gs_prog_data prog_data;
>
> struct brw_geometry_program *gp;
> +
> + unsigned control_data_bits_per_vertex;
> + unsigned control_data_header_size_bits;
> };
>
> #ifdef __cplusplus
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index ca52fd3..004a884 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -2722,7 +2722,7 @@ vec4_visitor::emit_vertex()
> * URB row increments, and each of our MRFs is half of one of
> * those, since we're doing interleaved writes.
> */
> - inst->offset = (max_usable_mrf - base_mrf) / 2;
> + inst->offset += (max_usable_mrf - base_mrf) / 2;
> }
> }
>
> diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
> index f21512e..40aaaff 100644
> --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
> @@ -106,6 +106,10 @@ upload_gs_state(struct brw_context *brw)
> GEN6_GS_DISPATCH_START_GRF_SHIFT));
>
> OUT_BATCH(((brw->max_gs_threads - 1) << max_threads_shift) |
> + (brw->gs.prog_data->control_data_format <<
> + GEN7_GS_CONTROL_DATA_FORMAT_SHIFT) |
> + (brw->gs.prog_data->control_data_header_size_hwords <<
> + GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
> GEN7_GS_DISPATCH_MODE_DUAL_OBJECT |
> GEN6_GS_STATISTICS_ENABLE |
> GEN7_GS_ENABLE);
>
More information about the mesa-dev
mailing list