[Mesa-dev] [PATCH 2/5] i965: Add support for URB transposed reads present on g45 through ILK.

Eric Anholt eric at anholt.net
Mon Feb 7 23:26:06 PST 2011


This saves SF URB size by having the WM unit do the transpose from the
SF-friendly coefficient-major layout to the WM-friendly
attribute-major layout.
---
 src/mesa/drivers/dri/i965/brw_context.c  |    1 +
 src/mesa/drivers/dri/i965/brw_context.h  |    1 +
 src/mesa/drivers/dri/i965/brw_sf.c       |   29 ++++++++++-
 src/mesa/drivers/dri/i965/brw_sf_emit.c  |   87 ++++++++++++------------------
 src/mesa/drivers/dri/i965/brw_wm_state.c |    2 +
 5 files changed, 66 insertions(+), 54 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 9483ec6..8618781 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -175,6 +175,7 @@ GLboolean brwCreateContext( int api,
 	  brw->has_compr4 = GL_TRUE;
       brw->has_aa_line_parameters = GL_TRUE;
       brw->has_pln = GL_TRUE;
+      brw->has_transposed_read = GL_TRUE;
   } else {
       brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965;
       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 8dfd152..a58ca9f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -452,6 +452,7 @@ struct brw_context
    GLuint primitive;
 
    GLboolean emit_state_always;
+   GLboolean has_transposed_read;
    GLboolean has_surface_tile_offset;
    GLboolean has_compr4;
    GLboolean has_negative_rhw_bug;
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 6da155b..b6c1923 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -65,7 +65,34 @@ static void compile_sf_prog( struct brw_context *brw,
    c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
 
    c.prog_data.urb_read_length = c.nr_attr_regs;
-   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+   /* Number of 512-bit URB rows produced. */
+   if (brw->has_transposed_read) {
+      /* Transposed reads: The 3 coefficients we produce are packed
+       * in the URB entry:
+       *
+       * row0: a0.x_x a0.y_x a0.z_x a0.w_x a1.x_x a1.y_x a1.z_x a1.w_x
+       *       a0.x_y a0.y_y a0.z_y a0.w_y a1.x_y a1.y_y a1.z_y a1.w_y
+       * row1: a0.x_c a0.y_c a0.z_c a0.w_c a1.x_c a1.y_c a1.z_c a1.w_c
+       *       a2.x_x a2.y_x a2.z_x a2.w_x a3.x_x a3.y_x a3.z_x a3.w_x
+       * row2: a2.x_y a2.y_y a2.z_y a2.w_y a3.x_y a3.y_y a3.z_y a3.w_y
+       *       a2.x_c a2.y_c a2.z_c a2.w_c a3.x_c a3.y_c a3.z_c a3.w_c
+       *
+       * The WM gets programmed as if it was reading from the else
+       * block below.
+       */
+      c.prog_data.urb_entry_size = ((c.nr_setup_attrs + 3) / 4) * 3;
+   } else {
+      /* Transposed writes into URB.  The rows look like:
+       *
+       * row0: a0.x_x a0.x_y null a0.x_c a0.y_x a0.y_y null a0.y_c
+       *       a0.z_x a0.z_y null a0.z_c a0.w_x a0.w_y null a0.w_c
+       *
+       * So we use a whole row per attribute (and since we write
+       * two-attribute groups to the URB, align the size in case the
+       * disabled last attribute actually gets written).
+       */
+      c.prog_data.urb_entry_size = ALIGN(c.nr_setup_attrs, 2);
+   }
 
    /* Construct map from attribute number to position in the vertex.
     */
diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c
index d3c9756..b35e509 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@@ -42,6 +42,34 @@
 #include "brw_util.h"
 #include "brw_sf.h"
 
+static void
+do_urb_write(struct brw_sf_compile *c, int attr_pair, bool last)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   uint32_t offset, swizzle;
+
+   if (brw->has_transposed_read) {
+      offset = attr_pair * 3;
+      swizzle = BRW_URB_SWIZZLE_NONE;
+   } else {
+      offset = attr_pair * 4;
+      swizzle = BRW_URB_SWIZZLE_TRANSPOSE;
+   }
+
+   brw_urb_WRITE(p,
+		 brw_null_reg(),
+		 0,
+		 brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+		 0, 	/* allocate */
+		 1,	/* used */
+		 4, 	/* msg len */
+		 0,	/* response len */
+		 last,	/* eot */
+		 last, 	/* writes complete */
+		 offset,
+		 swizzle);
+}
 
 static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
 				    struct brw_reg vert,
@@ -445,6 +473,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
       }
 
       {
+
 	 brw_set_predicate_control_flag_value(p, pc); 
 	 /* start point for interpolation
 	  */
@@ -453,18 +482,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
 	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
 	  * the send instruction:
 	  */	 
-	 brw_urb_WRITE(p, 
-		       brw_null_reg(),
-		       0,
-		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
-		       0, 	/* allocate */
-		       1,	/* used */
-		       4, 	/* msg len */
-		       0,	/* response len */
-		       last,	/* eot */
-		       last, 	/* writes complete */
-		       i*4,	/* offset */
-		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+	 do_urb_write(c, i, last);
       }
    }
 }
@@ -525,20 +543,7 @@ void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate)
 	  */
 	 brw_MOV(p, c->m3C0, a0);
 
-	 /* Copy m0..m3 to URB. 
-	  */
-	 brw_urb_WRITE(p, 
-		       brw_null_reg(),
-		       0,
-		       brw_vec8_grf(0, 0),
-		       0, 	/* allocate */
-		       1, 	/* used */
-		       4, 	/* msg len */
-		       0,	/* response len */
-		       last, 	/* eot */
-		       last, 	/* writes complete */
-		       i*4,	/* urb destination offset */
-		       BRW_URB_SWIZZLE_TRANSPOSE); 
+	 do_urb_write(c, i, last);
       }
    } 
 }
@@ -617,19 +622,8 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
 
 
       brw_set_predicate_control_flag_value(p, pc);
-      /* Copy m0..m3 to URB. */
-      brw_urb_WRITE(p,
-		    brw_null_reg(),
-		    0,
-		    brw_vec8_grf(0, 0),
-		    0, 	/* allocate */
-		    1,	/* used */
-		    4, 	/* msg len */
-		    0,	/* response len */
-		    last, 	/* eot */
-		    last, 	/* writes complete */
-		    i*4,	/* urb destination offset */
-		    BRW_URB_SWIZZLE_TRANSPOSE);
+
+      do_urb_write(c, i, last);
    }
 }
 
@@ -676,20 +670,7 @@ void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate)
 
 	 brw_MOV(p, c->m3C0, a0); /* constant value */
 
-	 /* Copy m0..m3 to URB. 
-	  */
-	 brw_urb_WRITE(p, 
-		       brw_null_reg(),
-		       0,
-		       brw_vec8_grf(0, 0),
-		       0, 	/* allocate */
-		       1,	/* used */
-		       4, 	/* msg len */
-		       0,	/* response len */
-		       last, 	/* eot */
-		       last, 	/* writes complete */
-		       i*4,	/* urb destination offset */
-		       BRW_URB_SWIZZLE_TRANSPOSE);
+	 do_urb_write(c, i, last);
       }
    }
 }
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index e9ef635..dc81fae 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -230,6 +230,8 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
    wm.wm5.line_endcap_aa_region_width = 1;
 
    wm.wm5.polygon_stipple = key->polygon_stipple;
+   if (brw->has_transposed_read)
+      wm.wm5.transposed_urb_read_enable = 1;
 
    if (key->offset_enable) {
       wm.wm5.depth_offset = 1;
-- 
1.7.2.3



More information about the mesa-dev mailing list