Mesa (master): i965/vec4: Generate URB writes using a loop.

Mon Sep 16 20:07:16 UTC 2013

Module: Mesa
Branch: master
Commit: 784044c206efd774ce1f7a481311480f85446887
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=784044c206efd774ce1f7a481311480f85446887

Author: Paul Berry <stereotype441 at gmail.com>
Date:   Tue Sep  3 12:30:06 2013 -0700

i965/vec4: Generate URB writes using a loop.

Previously we only ever did 1 or 2 URB writes, since the maximum
number of varyings we support is small enough to fit in 2 URB writes.
But GL 3.2 requires the geometry shader to support 128 output varying
components, and this could require up to 3 URB writes.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

---

 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   52 ++++++++++--------------
 1 files changed, 21 insertions(+), 31 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 304636a..874e6e3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2694,47 +2694,37 @@ vec4_visitor::emit_vertex()
       emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
    }
 
-   /* Set up the VUE data for the first URB write */
-   int slot;
-   for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
-      emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
-
-      /* If this was max_usable_mrf, we can't fit anything more into this URB
-       * WRITE.
+   /* We may need to split this up into several URB writes, so do them in a
+    * loop.
+    */
+   int slot = 0;
+   bool complete = false;
+   do {
+      /* URB offset is in URB row increments, and each of our MRFs is half of
+       * one of those, since we're doing interleaved writes.
        */
-      if (mrf > max_usable_mrf) {
-	 slot++;
-	 break;
-      }
-   }
-
-   bool complete = slot >= prog_data->vue_map.num_slots;
-   current_annotation = "URB write";
-   vec4_instruction *inst = emit_urb_write_opcode(complete);
-   inst->base_mrf = base_mrf;
-   inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+      int offset = slot / 2;
 
-   /* Optional second URB write */
-   if (!complete) {
       mrf = base_mrf + 1;
-
       for (; slot < prog_data->vue_map.num_slots; ++slot) {
-	 assert(mrf < max_usable_mrf);
-
          emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
+
+         /* If this was max_usable_mrf, we can't fit anything more into this
+          * URB WRITE.
+          */
+         if (mrf > max_usable_mrf) {
+            slot++;
+            break;
+         }
       }
 
+      complete = slot >= prog_data->vue_map.num_slots;
       current_annotation = "URB write";
-      inst = emit_urb_write_opcode(true /* complete */);
+      vec4_instruction *inst = emit_urb_write_opcode(complete);
       inst->base_mrf = base_mrf;
       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
-      /* URB destination offset.  In the previous write, we got MRFs
-       * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
-       * URB row increments, and each of our MRFs is half of one of
-       * those, since we're doing interleaved writes.
-       */
-      inst->offset += (max_usable_mrf - base_mrf) / 2;
-   }
+      inst->offset += offset;
+   } while(!complete);
 }