Mesa (master): i965/gs: Set control data header size/ format appropriately for EndPrimitive().

Paul Berry stereotype441 at kemper.freedesktop.org
Wed Sep 11 19:06:20 UTC 2013


Module: Mesa
Branch: master
Commit: 247f90c77e8f3894e963d796628246ba0bde27b5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=247f90c77e8f3894e963d796628246ba0bde27b5

Author: Paul Berry <stereotype441 at gmail.com>
Date:   Sun Aug 18 21:18:19 2013 -0700

i965/gs: Set control data header size/format appropriately for EndPrimitive().

The gen7 geometry shader uses a "control data header" at the beginning
of the output URB entry to store either

(a) flag bits (1 bit/vertex) indicating whether EndPrimitive() was
    called after each vertex, or

(b) stream ID bits (2 bits/vertex) indicating which stream each vertex
    should be sent to (when multiple transform feedback streams are in
    use).

Fortunately, OpenGL only requires separate streams to be supported
when the output type is points, and EndPrimitive() only has an effect
when the output type is line_strip or triangle_strip, so it's not a
problem that these two uses of the control data header are mutually
exclusive.

This patch modifies do_vec4_gs_prog() to determine the correct
hardware settings for configuring the control data header, and
modifies upload_gs_state() to propagate these settings to the
hardware.

In addition, it modifies do_vec4_gs_prog() to ensure that the output
URB entry is large enough to contain both the output vertices *and*
the control data header.

Finally, it modifies vec4_gs_visitor so that it accounts for the size
of the control data header when computing the offset within the URB
where output vertex data should be stored.

Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>

v2: Fixed incorrect handling of IVB/HSW differences.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

---

 src/mesa/drivers/dri/i965/brw_context.h           |   14 +++++++
 src/mesa/drivers/dri/i965/brw_defines.h           |    5 +++
 src/mesa/drivers/dri/i965/brw_vec4_gs.c           |   33 +++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |    1 +
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |    3 ++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp    |    2 +-
 src/mesa/drivers/dri/i965/gen7_gs_state.c         |   41 ++++++++++++++-------
 7 files changed, 84 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 57f086b..c566bba 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -548,6 +548,20 @@ struct brw_gs_prog_data
    unsigned output_vertex_size_hwords;
 
    unsigned output_topology;
+
+   /**
+    * Size of the control data (cut bits or StreamID bits), in hwords (32
+    * bytes).  0 if there is no control data.
+    */
+   unsigned control_data_header_size_hwords;
+
+   /**
+    * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
+    * if the control data is StreamID bits, or
+    * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
+    * Ignored if control_data_header_size is 0.
+    */
+   unsigned control_data_format;
 };
 
 /** Number of texture sampler units */
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 0406c4d..85e414d 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1337,6 +1337,10 @@ enum brw_message_target {
 /* DW5 */
 # define GEN6_GS_MAX_THREADS_SHIFT			25
 # define HSW_GS_MAX_THREADS_SHIFT			24
+# define IVB_GS_CONTROL_DATA_FORMAT_SHIFT		24
+# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT		0
+# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID		1
+# define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT		20
 # define GEN7_GS_DISPATCH_MODE_SINGLE			(0 << 11)
 # define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE		(1 << 11)
 # define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT		(2 << 11)
@@ -1346,6 +1350,7 @@ enum brw_message_target {
 # define GEN7_GS_INCLUDE_PRIMITIVE_ID			(1 << 4)
 # define GEN7_GS_ENABLE					(1 << 0)
 /* DW6 */
+# define HSW_GS_CONTROL_DATA_FORMAT_SHIFT		31
 # define GEN6_GS_REORDER				(1 << 30)
 # define GEN6_GS_DISCARD_ADJACENCY			(1 << 29)
 # define GEN6_GS_SVBI_PAYLOAD_ENABLE			(1 << 28)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index 7ab03ac..f67ae2b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -62,6 +62,38 @@ do_gs_prog(struct brw_context *brw,
    c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
    c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count);
 
+   if (gp->program.OutputType == GL_POINTS) {
+      /* When the output type is points, the geometry shader may output data
+       * to multiple streams, and EndPrimitive() has no effect.  So we
+       * configure the hardware to interpret the control data as stream ID.
+       */
+      c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
+
+      /* However, StreamID is not yet supported, so we output zero bits of
+       * control data per vertex.
+       */
+      c.control_data_bits_per_vertex = 0;
+   } else {
+      /* When the output type is triangle_strip or line_strip, EndPrimitive()
+       * may be used to terminate the current strip and start a new one
+       * (similar to primitive restart), and outputting data to multiple
+       * streams is not supported.  So we configure the hardware to interpret
+       * the control data as EndPrimitive information (a.k.a. "cut bits").
+       */
+      c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
+
+      /* We only need to output control data if the shader actually calls
+       * EndPrimitive().
+       */
+      c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
+   }
+   c.control_data_header_size_bits =
+      gp->program.VerticesOut * c.control_data_bits_per_vertex;
+
+   /* 1 HWORD = 32 bytes = 256 bits */
+   c.prog_data.control_data_header_size_hwords =
+      ALIGN(c.control_data_header_size_bits, 256) / 256;
+
    brw_compute_vue_map(brw, &c.prog_data.base.vue_map,
                        gp->program.Base.OutputsWritten,
                        c.key.base.userclip_active);
@@ -148,6 +180,7 @@ do_gs_prog(struct brw_context *brw,
     */
    unsigned output_size_bytes =
       c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
+   output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
 
    assert(output_size_bytes >= 1);
    if (output_size_bytes > GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index d82a26e..37cde64 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -200,6 +200,7 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
    (void) complete;
 
    vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE);
+   inst->offset = c->prog_data.control_data_header_size_hwords;
    inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
    return inst;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
index fba0ac6..1193e28 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
@@ -51,6 +51,9 @@ struct brw_gs_compile
    struct brw_gs_prog_data prog_data;
 
    struct brw_geometry_program *gp;
+
+   unsigned control_data_bits_per_vertex;
+   unsigned control_data_header_size_bits;
 };
 
 #ifdef __cplusplus
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 86ecd21..4760a53 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2727,7 +2727,7 @@ vec4_visitor::emit_vertex()
        * URB row increments, and each of our MRFs is half of one of
        * those, since we're doing interleaved writes.
        */
-      inst->offset = (max_usable_mrf - base_mrf) / 2;
+      inst->offset += (max_usable_mrf - base_mrf) / 2;
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index 3e3c331..231e3c9 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -95,21 +95,34 @@ upload_gs_state(struct brw_context *brw)
          OUT_BATCH(0);
       }
 
-      OUT_BATCH(((brw->gs.prog_data->output_vertex_size_hwords * 2 - 1) <<
-                 GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
-                (brw->gs.prog_data->output_topology <<
-                 GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
-                (prog_data->urb_read_length <<
-                 GEN6_GS_URB_READ_LENGTH_SHIFT) |
-                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
-                (prog_data->dispatch_grf_start_reg <<
-                 GEN6_GS_DISPATCH_START_GRF_SHIFT));
-
-      OUT_BATCH(((brw->max_gs_threads - 1) << max_threads_shift) |
-                GEN7_GS_DISPATCH_MODE_DUAL_OBJECT |
-                GEN6_GS_STATISTICS_ENABLE |
-                GEN7_GS_ENABLE);
+      uint32_t dw5 =
+         ((brw->gs.prog_data->output_vertex_size_hwords * 2 - 1) <<
+          GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
+         (brw->gs.prog_data->output_topology <<
+          GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
+         (prog_data->urb_read_length <<
+          GEN6_GS_URB_READ_LENGTH_SHIFT) |
+         (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
+         (prog_data->dispatch_grf_start_reg <<
+          GEN6_GS_DISPATCH_START_GRF_SHIFT);
+      uint32_t dw6 =
+         ((brw->max_gs_threads - 1) << max_threads_shift) |
+         (brw->gs.prog_data->control_data_header_size_hwords <<
+          GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
+         GEN7_GS_DISPATCH_MODE_DUAL_OBJECT |
+         GEN6_GS_STATISTICS_ENABLE |
+         GEN7_GS_ENABLE;
+
+      if (brw->is_haswell) {
+         dw6 |= brw->gs.prog_data->control_data_format <<
+            HSW_GS_CONTROL_DATA_FORMAT_SHIFT;
+      } else {
+         dw5 |= brw->gs.prog_data->control_data_format <<
+            IVB_GS_CONTROL_DATA_FORMAT_SHIFT;
+      }
 
+      OUT_BATCH(dw5);
+      OUT_BATCH(dw6);
       OUT_BATCH(0);
       ADVANCE_BATCH();
    } else {




More information about the mesa-commit mailing list