[Mesa-dev] [PATCH v2 8/8] i965 gen6: Implement pass-through GS for transform feedback.

Paul Berry stereotype441 at gmail.com
Wed Dec 7 11:09:16 PST 2011


In Gen6, transform feedback is accomplished by having the geometry
shader send vertex data to the data port using "Streamed Vertex Buffer
Write" messages, while simultaneously passing vertices through to the
rest of the graphics pipeline (if rendering is enabled).

This patch adds a geometry shader program that simply passes vertices
through to the rest of the graphics pipeline.  The rest of transform
feedback functionality will be added in future patches.

To make the new geometry shader easier to test, I've added an
environment variable "INTEL_FORCE_GS".  If this environment variable
is enabled, then the pass-through geometry shader will always be used,
regardless of whether transform feedback is in effect.

On my Sandy Bridge laptop, I'm able to enable INTEL_FORCE_GS with no
Piglit regressions.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_defines.h   |    3 +
 src/mesa/drivers/dri/i965/brw_eu.h        |    5 ++
 src/mesa/drivers/dri/i965/brw_gs.c        |  105 ++++++++++++++++++++--------
 src/mesa/drivers/dri/i965/brw_gs.h        |    2 +
 src/mesa/drivers/dri/i965/brw_gs_emit.c   |   92 +++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/gen6_gs_state.c |   46 ++++++++-----
 6 files changed, 207 insertions(+), 46 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index d949231..95039aa 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1075,6 +1075,9 @@ enum brw_message_target {
 # define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK		INTEL_MASK(25, 16)
 # define GEN6_GS_ENABLE					(1 << 15)
 
+# define BRW_GS_EDGE_INDICATOR_0			(1 << 8)
+# define BRW_GS_EDGE_INDICATOR_1			(1 << 9)
+
 #define _3DSTATE_HS                             0x781B /* GEN7+ */
 #define _3DSTATE_TE                             0x781C /* GEN7+ */
 #define _3DSTATE_DS                             0x781D /* GEN7+ */
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index dcb1fc9..596be02 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -650,6 +650,11 @@ static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
    return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
 }
 
+static INLINE struct brw_reg get_element_d( struct brw_reg reg, GLuint elt )
+{
+   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt));
+}
+
 
 static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
 					    GLuint x,
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index e72ff5e..e3703ae 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -53,12 +53,6 @@ static void compile_gs_prog( struct brw_context *brw,
    void *mem_ctx;
    GLuint program_size;
 
-   /* Gen6: VF has already converted into polygon, and LINELOOP is
-    * converted to LINESTRIP at the beginning of the 3D pipeline.
-    */
-   if (intel->gen >= 6)
-      return;
-
    memset(&c, 0, sizeof(c));
    
    c.key = *key;
@@ -80,24 +74,60 @@ static void compile_gs_prog( struct brw_context *brw,
     */
    brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
 
-
-   /* Note that primitives which don't require a GS program have
-    * already been weeded out by this stage:
-    */
-
-   switch (key->primitive) {
-   case _3DPRIM_QUADLIST:
-      brw_gs_quads( &c, key );
-      break;
-   case _3DPRIM_QUADSTRIP:
-      brw_gs_quad_strip( &c, key );
-      break;
-   case _3DPRIM_LINELOOP:
-      brw_gs_lines( &c );
-      break;
-   default:
-      ralloc_free(mem_ctx);
-      return;
+   if (intel->gen >= 6) {
+      unsigned num_verts;
+      bool check_edge_flag;
+      /* On Sandybridge, we use the GS for implementing transform feedback
+       * (called "Stream Out" in the PRM).
+       */
+      switch (key->primitive) {
+      case _3DPRIM_POINTLIST:
+         num_verts = 1;
+         check_edge_flag = false;
+	 break;
+      case _3DPRIM_LINELIST:
+      case _3DPRIM_LINESTRIP:
+      case _3DPRIM_LINELOOP:
+         num_verts = 2;
+         check_edge_flag = false;
+	 break;
+      case _3DPRIM_TRILIST:
+      case _3DPRIM_TRIFAN:
+      case _3DPRIM_TRISTRIP:
+      case _3DPRIM_RECTLIST:
+	 num_verts = 3;
+         check_edge_flag = false;
+         break;
+      case _3DPRIM_QUADLIST:
+      case _3DPRIM_QUADSTRIP:
+      case _3DPRIM_POLYGON:
+         num_verts = 3;
+         check_edge_flag = true;
+         break;
+      default:
+	 assert(!"Unexpected primitive type in Gen6 SOL program.");
+	 return;
+      }
+      gen6_sol_program(&c, key, num_verts, check_edge_flag);
+   } else {
+      /* On Gen4-5, we use the GS to decompose certain types of primitives.
+       * Note that primitives which don't require a GS program have already
+       * been weeded out by now.
+       */
+      switch (key->primitive) {
+      case _3DPRIM_QUADLIST:
+	 brw_gs_quads( &c, key );
+	 break;
+      case _3DPRIM_QUADSTRIP:
+	 brw_gs_quad_strip( &c, key );
+	 break;
+      case _3DPRIM_LINELOOP:
+	 brw_gs_lines( &c );
+	 break;
+      default:
+	 ralloc_free(mem_ctx);
+	 return;
+      }
    }
 
    /* get the program
@@ -148,11 +178,25 @@ static void populate_key( struct brw_context *brw,
    /* _NEW_TRANSFORM */
    key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
 
-   key->need_gs_prog = (intel->gen >= 6)
-      ? 0
-      : (brw->primitive == _3DPRIM_QUADLIST ||
-	 brw->primitive == _3DPRIM_QUADSTRIP ||
-	 brw->primitive == _3DPRIM_LINELOOP);
+   if (intel->gen >= 7) {
+      /* On Gen7 and later, we don't use GS (yet). */
+      key->need_gs_prog = false;
+   } else if (intel->gen == 6) {
+      /* On Gen6, GS is used for transform feedback. */
+      key->need_gs_prog = ctx->TransformFeedback.CurrentObject->Active;
+   } else {
+      /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
+       * into simpler primitives.
+       */
+      key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST ||
+                           brw->primitive == _3DPRIM_QUADSTRIP ||
+                           brw->primitive == _3DPRIM_LINELOOP);
+   }
+   /* For testing, the environment variable INTEL_FORCE_GS can be used to
+    * force a GS program to be used, even if it's not necessary.
+    */
+   if (getenv("INTEL_FORCE_GS"))
+      key->need_gs_prog = true;
 }
 
 /* Calculate interpolants for triangle and line rasterization.
@@ -183,7 +227,8 @@ brw_upload_gs_prog(struct brw_context *brw)
 const struct brw_tracked_state brw_gs_prog = {
    .dirty = {
       .mesa  = (_NEW_LIGHT |
-                _NEW_TRANSFORM),
+                _NEW_TRANSFORM |
+                _NEW_TRANSFORM_FEEDBACK),
       .brw   = BRW_NEW_PRIMITIVE,
       .cache = CACHE_NEW_VS_PROG
    },
diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h
index 93448a7..abcb0b2 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.h
+++ b/src/mesa/drivers/dri/i965/brw_gs.h
@@ -73,5 +73,7 @@ struct brw_gs_compile {
 void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
 void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
 void brw_gs_lines( struct brw_gs_compile *c );
+void gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
+                      unsigned num_verts, bool check_edge_flag);
 
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index 9b1dfbf..322f9bd 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -101,6 +101,37 @@ static void brw_gs_overwrite_header_dw2(struct brw_gs_compile *c,
 }
 
 /**
+ * Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0.
+ *
+ * When the thread is spawned, GRF 0 contains the primitive type in bits 4:0
+ * of DWORD 2.  URB_WRITE messages need the primitive type in bits 6:2 of
+ * DWORD 2.  So this function extracts the primitive type field, bitshifts it
+ * appropriately, and stores it in c->reg.header.
+ */
+static void brw_gs_overwrite_header_dw2_from_r0(struct brw_gs_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2),
+           brw_imm_ud(0x1f));
+   brw_SHL(p, get_element_ud(c->reg.header, 2),
+           get_element_ud(c->reg.header, 2), brw_imm_ud(2));
+}
+
+/**
+ * Apply an additive offset to DWORD 2 of c->reg.header.
+ *
+ * This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately
+ * for each vertex.
+ */
+static void brw_gs_offset_header_dw2(struct brw_gs_compile *c, int offset)
+{
+   struct brw_compile *p = &c->func;
+   brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2),
+           brw_imm_d(offset));
+}
+
+
+/**
  * Emit a vertex using the URB_WRITE message.  Use the contents of
  * c->reg.header for the message header, and the registers starting at \c vert
  * for the vertex data.
@@ -269,3 +300,64 @@ void brw_gs_lines( struct brw_gs_compile *c )
           | URB_WRITE_PRIM_END));
    brw_gs_emit_vue(c, c->reg.vertex[1], 1);
 }
+
+/**
+ * Generate the geometry shader program used on Gen6 to perform stream output
+ * (transform feedback).
+ */
+void
+gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
+	         unsigned num_verts, bool check_edge_flags)
+{
+   struct brw_compile *p = &c->func;
+
+   brw_gs_alloc_regs(c, num_verts);
+   brw_gs_initialize_header(c);
+
+   brw_gs_ff_sync(c, 1);
+
+   brw_gs_overwrite_header_dw2_from_r0(c);
+   switch (num_verts) {
+   case 1:
+      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
+      brw_gs_emit_vue(c, c->reg.vertex[0], true);
+      break;
+   case 2:
+      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
+      brw_gs_emit_vue(c, c->reg.vertex[0], false);
+      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
+      brw_gs_emit_vue(c, c->reg.vertex[1], true);
+      break;
+   case 3:
+      if (check_edge_flags) {
+         /* Only emit vertices 0 and 1 if this is the first triangle of the
+          * polygon.  Otherwise they are redundant.
+          */
+         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+                 get_element_ud(c->reg.R0, 2),
+                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
+         brw_IF(p, BRW_EXECUTE_1);
+      }
+      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
+      brw_gs_emit_vue(c, c->reg.vertex[0], false);
+      brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
+      brw_gs_emit_vue(c, c->reg.vertex[1], false);
+      if (check_edge_flags) {
+         brw_ENDIF(p);
+         /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
+          * of the polygon.  Otherwise leave the primitive incomplete because
+          * there are more polygon vertices coming.
+          */
+         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+                 get_element_ud(c->reg.R0, 2),
+                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
+         brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+      }
+      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      brw_gs_emit_vue(c, c->reg.vertex[2], true);
+      break;
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
index d29f029..42962a6 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
@@ -44,22 +44,36 @@ upload_gs_state(struct brw_context *brw)
    OUT_BATCH(0);
    ADVANCE_BATCH();
 
-   // GS should never be used on Gen6.  Disable it.
-   assert(!brw->gs.prog_active);
-   BEGIN_BATCH(7);
-   OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
-   OUT_BATCH(0); /* prog_bo */
-   OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
-	     (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
-   OUT_BATCH(0); /* scratch space base offset */
-   OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
-	     (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
-	     (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
-   OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
-	     GEN6_GS_STATISTICS_ENABLE |
-	     GEN6_GS_RENDERING_ENABLE);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
+   if (brw->gs.prog_active) {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+      OUT_BATCH(brw->gs.prog_offset);
+      OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE);
+      OUT_BATCH(0); /* no scratch space */
+      OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+	        (brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
+      OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
+	        GEN6_GS_STATISTICS_ENABLE |
+		GEN6_GS_SO_STATISTICS_ENABLE |
+		GEN6_GS_RENDERING_ENABLE);
+      OUT_BATCH(GEN6_GS_ENABLE);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+      OUT_BATCH(0); /* prog_bo */
+      OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+		(0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+      OUT_BATCH(0); /* scratch space base offset */
+      OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+		(0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+		(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+      OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+		GEN6_GS_STATISTICS_ENABLE |
+		GEN6_GS_RENDERING_ENABLE);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
 }
 
 const struct brw_tracked_state gen6_gs_state = {
-- 
1.7.6.4



More information about the mesa-dev mailing list