Mesa (master): i965/vs: Pack live uniform vectors together in the push constant upload.

Eric Anholt anholt at kemper.freedesktop.org
Tue Aug 30 12:10:33 PDT 2011


Module: Mesa
Branch: master
Commit: 72cfc6f3778d8297e52c254a5861a88eb62e4d67
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=72cfc6f3778d8297e52c254a5861a88eb62e4d67

Author: Eric Anholt <eric at anholt.net>
Date:   Tue Aug 23 12:13:14 2011 -0700

i965/vs: Pack live uniform vectors together in the push constant upload.

At some point we need to also move uniform accesses out to pull
constants when there are just too many in use, but we lack tests for
that at the moment.

Fixes glsl-vs-large-uniform-array.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

---

 src/mesa/drivers/dri/i965/brw_vec4.cpp         |   89 ++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4.h           |    1 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    |    5 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |    6 ++
 4 files changed, 98 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index cdbbb55..9d64a40 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -188,4 +188,93 @@ vec4_visitor::split_uniform_registers()
    }
 }
 
+void
+vec4_visitor::pack_uniform_registers()
+{
+   bool uniform_used[this->uniforms];
+   int new_loc[this->uniforms];
+   int new_chan[this->uniforms];
+
+   memset(uniform_used, 0, sizeof(uniform_used));
+   memset(new_loc, 0, sizeof(new_loc));
+   memset(new_chan, 0, sizeof(new_chan));
+
+   /* Find which uniform vectors are actually used by the program.  We
+    * expect unused vector elements when we've moved array access out
+    * to pull constants, and from some GLSL code generators like wine.
+    */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      for (int i = 0 ; i < 3; i++) {
+	 if (inst->src[i].file != UNIFORM)
+	    continue;
+
+	 uniform_used[inst->src[i].reg] = true;
+      }
+   }
+
+   int new_uniform_count = 0;
+
+   /* Now, figure out a packing of the live uniform vectors into our
+    * push constants.
+    */
+   for (int src = 0; src < uniforms; src++) {
+      int size = this->uniform_vector_size[src];
+
+      if (!uniform_used[src]) {
+	 this->uniform_vector_size[src] = 0;
+	 continue;
+      }
+
+      int dst;
+      /* Find the lowest place we can slot this uniform in. */
+      for (dst = 0; dst < src; dst++) {
+	 if (this->uniform_vector_size[dst] + size <= 4)
+	    break;
+      }
+
+      if (src == dst) {
+	 new_loc[src] = dst;
+	 new_chan[src] = 0;
+      } else {
+	 new_loc[src] = dst;
+	 new_chan[src] = this->uniform_vector_size[dst];
+
+	 /* Move the references to the data */
+	 for (int j = 0; j < size; j++) {
+	    c->prog_data.param[dst * 4 + new_chan[src] + j] =
+	       c->prog_data.param[src * 4 + j];
+	 }
+
+	 this->uniform_vector_size[dst] += size;
+	 this->uniform_vector_size[src] = 0;
+      }
+
+      new_uniform_count = MAX2(new_uniform_count, dst + 1);
+   }
+
+   this->uniforms = new_uniform_count;
+
+   /* Now, update the instructions for our repacked uniforms. */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      for (int i = 0 ; i < 3; i++) {
+	 int src = inst->src[i].reg;
+
+	 if (inst->src[i].file != UNIFORM)
+	    continue;
+
+	 inst->src[i].reg = new_loc[src];
+
+	 int sx = BRW_GET_SWZ(inst->src[i].swizzle, 0) + new_chan[src];
+	 int sy = BRW_GET_SWZ(inst->src[i].swizzle, 1) + new_chan[src];
+	 int sz = BRW_GET_SWZ(inst->src[i].swizzle, 2) + new_chan[src];
+	 int sw = BRW_GET_SWZ(inst->src[i].swizzle, 3) + new_chan[src];
+	 inst->src[i].swizzle = BRW_SWIZZLE4(sx, sy, sz, sw);
+      }
+   }
+}
+
 } /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 945eea5..3279773 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -389,6 +389,7 @@ public:
    void move_grf_array_access_to_scratch();
    void move_uniform_array_access_to_pull_constants();
    void split_uniform_registers();
+   void pack_uniform_registers();
    void calculate_live_intervals();
    bool dead_code_eliminate();
    bool virtual_grf_interferes(int a, int b);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 4951407..f084a7f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -128,9 +128,6 @@ vec4_visitor::setup_uniforms(int reg)
       reg += ALIGN(uniforms, 2) / 2;
    }
 
-   /* for now, we are not doing any elimination of unused slots, nor
-    * are we packing our uniforms.
-    */
    c->prog_data.nr_params = this->uniforms * 4;
 
    c->prog_data.curb_read_length = reg - 1;
@@ -607,6 +604,8 @@ vec4_visitor::run()
       progress = dead_code_eliminate() || progress;
    } while (progress);
 
+   pack_uniform_registers();
+
    if (failed)
       return false;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index dc11d98..6939904 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -136,6 +136,12 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
    /* The gen6 math instruction ignores the source modifiers --
     * swizzle, abs, negate, and at least some parts of the register
     * region description.
+    *
+    * While it would seem that this MOV could be avoided at this point
+    * in the case that the swizzle is matched up with the destination
+    * writemask, note that uniform packing and register allocation
+    * could rearrange our swizzle, so let's leave this matter up to
+    * copy propagation later.
     */
    src_reg temp_src = src_reg(this, glsl_type::vec4_type);
    emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);



More information about the mesa-commit mailing list