[Mesa-dev] [PATCH 10/36] i965: new VS: use the VUE map to write out vertex attributes.

Paul Berry stereotype441 at gmail.com
Fri Sep 2 09:06:49 PDT 2011


Previously, the new VS backend used two functions,
emit_vue_header_gen6() and emit_vue_header_gen4() to emit the fixed
parts of the VUE, and then a pair of carefully-constructed loops to
emit the rest of the VUE, leaving out the parts that were already
emitted as part of the header.

This patch changes the new VS backend to use the VUE map to emit the
entire VUE.
---
 src/mesa/drivers/dri/i965/brw_vec4.h           |    3 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |  165 ++++++++----------------
 2 files changed, 55 insertions(+), 113 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 01313ec..360aa60 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -466,8 +466,7 @@ public:
    void emit_ndc_computation();
    void emit_psiz_and_flags(struct brw_reg reg);
    void emit_clip_distances(struct brw_reg reg, int offset);
-   int emit_vue_header_gen6(int header_mrf);
-   int emit_vue_header_gen4(int header_mrf);
+   void emit_urb_slot(int mrf, int vert_result);
    void emit_urb_writes(void);
 
    src_reg get_scratch_offset(vec4_instruction *inst,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 374cf8a..b20f4f2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1804,83 +1804,53 @@ vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
    }
 }
 
-int
-vec4_visitor::emit_vue_header_gen4(int header_mrf)
+void
+vec4_visitor::emit_urb_slot(int mrf, int vert_result)
 {
-   emit_ndc_computation();
+   struct brw_reg reg = brw_message_reg(mrf);
 
-   emit_psiz_and_flags(brw_message_reg(header_mrf++));
-
-   if (intel->gen == 5) {
-      /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
-       * dword 0-3 (m1) of the header is indices, point width, clip flags.
-       * dword 4-7 (m2) is the ndc position (set above)
-       * dword 8-11 (m3) of the vertex header is the 4D space position
-       * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
-       * m6 is a pad so that the vertex element data is aligned
-       * m7 is the first vertex data we fill.
-       */
-      current_annotation = "NDC";
-      emit(MOV(brw_message_reg(header_mrf++),
-               src_reg(output_reg[BRW_VERT_RESULT_NDC])));
-
-      current_annotation = "gl_Position";
-      emit(MOV(brw_message_reg(header_mrf++),
-               src_reg(output_reg[VERT_RESULT_HPOS])));
-
-      /* user clip distance. */
-      emit_clip_distances(brw_message_reg(header_mrf++), 0);
-      emit_clip_distances(brw_message_reg(header_mrf++), 4);
-
-      /* Pad so that vertex element data is aligned. */
-      header_mrf++;
-   } else {
-      /* There are 8 dwords in VUE header pre-Ironlake:
-       * dword 0-3 (m1) is indices, point width, clip flags.
-       * dword 4-7 (m2) is ndc position (set above)
-       *
-       * dword 8-11 (m3) is the first vertex data.
-       */
+   switch (vert_result) {
+   case VERT_RESULT_PSIZ:
+      /* PSIZ is always in slot 0, and is coupled with other flags. */
+      current_annotation = "indices, point width, clip flags";
+      emit_psiz_and_flags(reg);
+      break;
+   case BRW_VERT_RESULT_NDC:
       current_annotation = "NDC";
-      emit(MOV(brw_message_reg(header_mrf++),
-               src_reg(output_reg[BRW_VERT_RESULT_NDC])));
-
+      emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
+      break;
+   case BRW_VERT_RESULT_HPOS_DUPLICATE:
+   case VERT_RESULT_HPOS:
       current_annotation = "gl_Position";
-      emit(MOV(brw_message_reg(header_mrf++),
-               src_reg(output_reg[VERT_RESULT_HPOS])));
+      emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
+      break;
+   case BRW_VERT_RESULT_CLIP0:
+      current_annotation = "user clip distances";
+      emit_clip_distances(reg, 0);
+      break;
+   case BRW_VERT_RESULT_CLIP1:
+      current_annotation = "user clip distances";
+      emit_clip_distances(reg, 4);
+      break;
+   case BRW_VERT_RESULT_PAD:
+      /* No need to write to this slot */
+      break;
+   default: {
+      assert (vert_result < VERT_RESULT_MAX);
+      current_annotation = NULL;
+      /* Copy the register, saturating if necessary */
+      vec4_instruction *inst = emit(MOV(reg,
+                                        src_reg(output_reg[vert_result])));
+      if ((vert_result == VERT_RESULT_COL0 ||
+	   vert_result == VERT_RESULT_COL1 ||
+	   vert_result == VERT_RESULT_BFC0 ||
+	   vert_result == VERT_RESULT_BFC1) &&
+	  c->key.clamp_vertex_color) {
+	 inst->saturate = true;
+      }
    }
-
-   return header_mrf;
-}
-
-int
-vec4_visitor::emit_vue_header_gen6(int header_mrf)
-{
-   /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
-    * dword 0-3 (m2) of the header is indices, point width, clip flags.
-    * dword 4-7 (m3) is the 4D space position
-    * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
-    * enabled.
-    *
-    * m4 or 6 is the first vertex element data we fill.
-    */
-
-   current_annotation = "indices, point width, clip flags";
-   emit_psiz_and_flags(brw_message_reg(header_mrf++));
-
-   current_annotation = "gl_Position";
-   emit(MOV(brw_message_reg(header_mrf++),
-	    src_reg(output_reg[VERT_RESULT_HPOS])));
-
-   current_annotation = "user clip distances";
-   if (c->key.nr_userclip) {
-      emit_clip_distances(brw_message_reg(header_mrf++), 0);
-      emit_clip_distances(brw_message_reg(header_mrf++), 4);
+      break;
    }
-
-   current_annotation = NULL;
-
-   return header_mrf;
 }
 
 static int
@@ -1919,7 +1889,6 @@ vec4_visitor::emit_urb_writes()
    int base_mrf = 1;
    int mrf = base_mrf;
    int urb_entry_size;
-   uint64_t outputs_remaining = c->prog_data.outputs_written;
    /* In the process of generating our URB write message contents, we
     * may need to unspill a register or load from an array.  Those
     * reads would use MRFs 14-15.
@@ -1928,45 +1897,22 @@ vec4_visitor::emit_urb_writes()
 
    /* FINISHME: edgeflag */
 
+   brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip,
+                       c->key.two_side_color, c->prog_data.outputs_written);
+
    /* First mrf is the g0-based message header containing URB handles and such,
     * which is implied in VS_OPCODE_URB_WRITE.
     */
    mrf++;
 
-   if (intel->gen >= 6) {
-      mrf = emit_vue_header_gen6(mrf);
-   } else {
-      mrf = emit_vue_header_gen4(mrf);
+   if (intel->gen < 6) {
+      emit_ndc_computation();
    }
 
    /* Set up the VUE data for the first URB write */
-   int attr;
-   for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
-      if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
-	 continue;
-
-      outputs_remaining &= ~BITFIELD64_BIT(attr);
-
-      /* This is set up in the VUE header. */
-      if (attr == VERT_RESULT_HPOS)
-	 continue;
-
-      /* This is loaded into the VUE header, and thus doesn't occupy
-       * an attribute slot.
-       */
-      if (attr == VERT_RESULT_PSIZ)
-	 continue;
-
-      vec4_instruction *inst = emit(MOV(brw_message_reg(mrf++),
-					src_reg(output_reg[attr])));
-
-      if ((attr == VERT_RESULT_COL0 ||
-	   attr == VERT_RESULT_COL1 ||
-	   attr == VERT_RESULT_BFC0 ||
-	   attr == VERT_RESULT_BFC1) &&
-	  c->key.clamp_vertex_color) {
-	 inst->saturate = true;
-      }
+   int slot;
+   for (slot = 0; slot < c->vue_map.num_slots; ++slot) {
+      emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
 
       /* If this was MRF 15, we can't fit anything more into this URB
        * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
@@ -1974,7 +1920,7 @@ vec4_visitor::emit_urb_writes()
        * gen6's requirements for length alignment.
        */
       if (mrf > max_usable_mrf) {
-	 attr++;
+	 slot++;
 	 break;
       }
    }
@@ -1982,21 +1928,18 @@ vec4_visitor::emit_urb_writes()
    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
    inst->base_mrf = base_mrf;
    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
-   inst->eot = !outputs_remaining;
+   inst->eot = (slot >= c->vue_map.num_slots);
 
    urb_entry_size = mrf - base_mrf;
 
    /* Optional second URB write */
-   if (outputs_remaining) {
+   if (!inst->eot) {
       mrf = base_mrf + 1;
 
-      for (; attr < VERT_RESULT_MAX; attr++) {
-	 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
-	    continue;
-
+      for (; slot < c->vue_map.num_slots; ++slot) {
 	 assert(mrf < max_usable_mrf);
 
-	 emit(MOV(brw_message_reg(mrf++), src_reg(output_reg[attr])));
+         emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
       }
 
       inst = emit(VS_OPCODE_URB_WRITE);
-- 
1.7.6



More information about the mesa-dev mailing list