[Mesa-dev] [PATCH 2/5] i965: Add support for URB transposed reads present on g45 through ILK.
Eric Anholt
eric at anholt.net
Mon Feb 7 23:26:06 PST 2011
This saves SF URB size by having the WM unit do the transpose from the
SF-friendly coefficient-major layout to the WM-friendly
attribute-major layout.
---
src/mesa/drivers/dri/i965/brw_context.c | 1 +
src/mesa/drivers/dri/i965/brw_context.h | 1 +
src/mesa/drivers/dri/i965/brw_sf.c | 29 ++++++++++-
src/mesa/drivers/dri/i965/brw_sf_emit.c | 87 ++++++++++++------------------
src/mesa/drivers/dri/i965/brw_wm_state.c | 2 +
5 files changed, 66 insertions(+), 54 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 9483ec6..8618781 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -175,6 +175,7 @@ GLboolean brwCreateContext( int api,
brw->has_compr4 = GL_TRUE;
brw->has_aa_line_parameters = GL_TRUE;
brw->has_pln = GL_TRUE;
+ brw->has_transposed_read = GL_TRUE;
} else {
brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965;
brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 8dfd152..a58ca9f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -452,6 +452,7 @@ struct brw_context
GLuint primitive;
GLboolean emit_state_always;
+ GLboolean has_transposed_read;
GLboolean has_surface_tile_offset;
GLboolean has_compr4;
GLboolean has_negative_rhw_bug;
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 6da155b..b6c1923 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -65,7 +65,34 @@ static void compile_sf_prog( struct brw_context *brw,
c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
c.prog_data.urb_read_length = c.nr_attr_regs;
- c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+ /* Number of 512-bit URB rows produced. */
+ if (brw->has_transposed_read) {
+ /* Transposed reads: The 3 coefficients we produce are packed
+ * in the URB entry:
+ *
+ * row0: a0.x_x a0.y_x a0.z_x a0.w_x a1.x_x a1.y_x a1.z_x a1.w_x
+ * a0.x_y a0.y_y a0.z_y a0.w_y a1.x_y a1.y_y a1.z_y a1.w_y
+ * row1: a0.x_c a0.y_c a0.z_c a0.w_c a1.x_c a1.y_c a1.z_c a1.w_c
+ * a2.x_x a2.y_x a2.z_x a2.w_x a3.x_x a3.y_x a3.z_x a3.w_x
+ * row2: a2.x_y a2.y_y a2.z_y a2.w_y a3.x_y a3.y_y a3.z_y a3.w_y
+ * a2.x_c a2.y_c a2.z_c a2.w_c a3.x_c a3.y_c a3.z_c a3.w_c
+ *
+ * The WM gets programmed as if it was reading from the else
+ * block below.
+ */
+ c.prog_data.urb_entry_size = ((c.nr_setup_attrs + 3) / 4) * 3;
+ } else {
+ /* Transposed writes into URB. The rows look like:
+ *
+ * row0: a0.x_x a0.x_y null a0.x_c a0.y_x a0.y_y null a0.y_c
+ * a0.z_x a0.z_y null a0.z_c a0.w_x a0.w_y null a0.w_c
+ *
+ * So we use a whole row per attribute (and since we write
+ * two-attribute groups to the URB, align the size in case the
+ * disabled last attribute actually gets written).
+ */
+ c.prog_data.urb_entry_size = ALIGN(c.nr_setup_attrs, 2);
+ }
/* Construct map from attribute number to position in the vertex.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c
index d3c9756..b35e509 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@@ -42,6 +42,34 @@
#include "brw_util.h"
#include "brw_sf.h"
+static void
+do_urb_write(struct brw_sf_compile *c, int attr_pair, bool last)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
+ uint32_t offset, swizzle;
+
+ if (brw->has_transposed_read) {
+ offset = attr_pair * 3;
+ swizzle = BRW_URB_SWIZZLE_NONE;
+ } else {
+ offset = attr_pair * 4;
+ swizzle = BRW_URB_SWIZZLE_TRANSPOSE;
+ }
+
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ offset,
+ swizzle);
+}
static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
struct brw_reg vert,
@@ -445,6 +473,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
}
{
+
brw_set_predicate_control_flag_value(p, pc);
/* start point for interpolation
*/
@@ -453,18 +482,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
/* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
* the send instruction:
*/
- brw_urb_WRITE(p,
- brw_null_reg(),
- 0,
- brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
- 0, /* allocate */
- 1, /* used */
- 4, /* msg len */
- 0, /* response len */
- last, /* eot */
- last, /* writes complete */
- i*4, /* offset */
- BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+ do_urb_write(c, i, last);
}
}
}
@@ -525,20 +543,7 @@ void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate)
*/
brw_MOV(p, c->m3C0, a0);
- /* Copy m0..m3 to URB.
- */
- brw_urb_WRITE(p,
- brw_null_reg(),
- 0,
- brw_vec8_grf(0, 0),
- 0, /* allocate */
- 1, /* used */
- 4, /* msg len */
- 0, /* response len */
- last, /* eot */
- last, /* writes complete */
- i*4, /* urb destination offset */
- BRW_URB_SWIZZLE_TRANSPOSE);
+ do_urb_write(c, i, last);
}
}
}
@@ -617,19 +622,8 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
brw_set_predicate_control_flag_value(p, pc);
- /* Copy m0..m3 to URB. */
- brw_urb_WRITE(p,
- brw_null_reg(),
- 0,
- brw_vec8_grf(0, 0),
- 0, /* allocate */
- 1, /* used */
- 4, /* msg len */
- 0, /* response len */
- last, /* eot */
- last, /* writes complete */
- i*4, /* urb destination offset */
- BRW_URB_SWIZZLE_TRANSPOSE);
+
+ do_urb_write(c, i, last);
}
}
@@ -676,20 +670,7 @@ void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate)
brw_MOV(p, c->m3C0, a0); /* constant value */
- /* Copy m0..m3 to URB.
- */
- brw_urb_WRITE(p,
- brw_null_reg(),
- 0,
- brw_vec8_grf(0, 0),
- 0, /* allocate */
- 1, /* used */
- 4, /* msg len */
- 0, /* response len */
- last, /* eot */
- last, /* writes complete */
- i*4, /* urb destination offset */
- BRW_URB_SWIZZLE_TRANSPOSE);
+ do_urb_write(c, i, last);
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index e9ef635..dc81fae 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -230,6 +230,8 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.wm5.line_endcap_aa_region_width = 1;
wm.wm5.polygon_stipple = key->polygon_stipple;
+ if (brw->has_transposed_read)
+ wm.wm5.transposed_urb_read_enable = 1;
if (key->offset_enable) {
wm.wm5.depth_offset = 1;
--
1.7.2.3
More information about the mesa-dev
mailing list