Mesa (master): i965: Add support for pull constants to the new FS backend.

Eric Anholt anholt at kemper.freedesktop.org
Fri Oct 22 21:56:10 UTC 2010


Module: Mesa
Branch: master
Commit: 07cd8f46acc34b04308f81de2faf05ba33da264b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=07cd8f46acc34b04308f81de2faf05ba33da264b

Author: Eric Anholt <eric at anholt.net>
Date:   Fri Oct 22 12:57:00 2010 -0700

i965: Add support for pull constants to the new FS backend.

Fixes glsl-fs-uniform-array-5, but not 6 which fails in ir_to_mesa.

---

 src/mesa/drivers/dri/i965/brw_context.h           |    6 +-
 src/mesa/drivers/dri/i965/brw_eu.h                |   24 ++--
 src/mesa/drivers/dri/i965/brw_eu_emit.c           |  102 +++++++++----------
 src/mesa/drivers/dri/i965/brw_fs.cpp              |  116 +++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_fs.h                |    5 +
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |    1 +
 src/mesa/drivers/dri/i965/brw_wm_emit.c           |    2 +-
 src/mesa/drivers/dri/i965/brw_wm_glsl.c           |   42 ++++----
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c  |   18 +++-
 src/mesa/drivers/dri/i965/gen6_wm_state.c         |    2 +-
 10 files changed, 213 insertions(+), 105 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index f205c07..4a0709b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -173,8 +173,6 @@ struct brw_fragment_program {
    GLuint id;  /**< serial no. to identify frag progs, never re-used */
    GLboolean isGLSL;  /**< really, any IF/LOOP/CONT/BREAK instructions */
 
-   GLboolean use_const_buffer;
-
    /** for debugging, which texture units are referenced */
    GLbitfield tex_units_used;
 };
@@ -204,12 +202,14 @@ struct brw_wm_prog_data {
    GLuint total_scratch;
 
    GLuint nr_params;       /**< number of float params/constants */
+   GLuint nr_pull_params;
    GLboolean error;
 
    /* Pointer to tracked values (only valid once
     * _mesa_load_state_parameters has been called at runtime).
     */
-   const GLfloat *param[BRW_MAX_CURBE];
+   const GLfloat *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
+   const GLfloat *pull_param[MAX_UNIFORMS * 4];
 };
 
 struct brw_sf_prog_data {
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 0e3ccfa..15c2f23 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -900,14 +900,19 @@ void brw_math2(struct brw_compile *p,
 void brw_oword_block_read(struct brw_compile *p,
 			  struct brw_reg dest,
 			  struct brw_reg mrf,
-			  int num_regs,
-			  GLuint offset);
+			  uint32_t offset,
+			  uint32_t bind_table_index);
 
-void brw_dp_READ_4( struct brw_compile *p,
-                    struct brw_reg dest,
-                    GLboolean relAddr,
-                    GLuint location,
-                    GLuint bind_table_index );
+void brw_oword_block_read_scratch(struct brw_compile *p,
+				  struct brw_reg dest,
+				  struct brw_reg mrf,
+				  int num_regs,
+				  GLuint offset);
+
+void brw_oword_block_write_scratch(struct brw_compile *p,
+				   struct brw_reg mrf,
+				   int num_regs,
+				   GLuint offset);
 
 void brw_dp_READ_4_vs( struct brw_compile *p,
                        struct brw_reg dest,
@@ -920,11 +925,6 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
 			       GLuint offset,
 			       GLuint bind_table_index);
 
-void brw_oword_block_write(struct brw_compile *p,
-			   struct brw_reg mrf,
-			   int num_regs,
-			   GLuint offset);
-
 /* If/else/endif.  Works by manipulating the execution flags on each
  * channel.
  */
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 6fbc396..fe3a029 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1359,10 +1359,10 @@ void brw_math_16( struct brw_compile *p,
  * The offset must be aligned to oword size (16 bytes).  Used for
  * register spilling.
  */
-void brw_oword_block_write(struct brw_compile *p,
-			   struct brw_reg mrf,
-			   int num_regs,
-			   GLuint offset)
+void brw_oword_block_write_scratch(struct brw_compile *p,
+				   struct brw_reg mrf,
+				   int num_regs,
+				   GLuint offset)
 {
    struct intel_context *intel = &p->brw->intel;
    uint32_t msg_control;
@@ -1458,11 +1458,11 @@ void brw_oword_block_write(struct brw_compile *p,
  * spilling.
  */
 void
-brw_oword_block_read(struct brw_compile *p,
-		     struct brw_reg dest,
-		     struct brw_reg mrf,
-		     int num_regs,
-		     GLuint offset)
+brw_oword_block_read_scratch(struct brw_compile *p,
+			     struct brw_reg dest,
+			     struct brw_reg mrf,
+			     int num_regs,
+			     GLuint offset)
 {
    uint32_t msg_control;
    int rlen;
@@ -1517,65 +1517,57 @@ brw_oword_block_read(struct brw_compile *p,
    }
 }
 
-
 /**
  * Read a float[4] vector from the data port Data Cache (const buffer).
  * Location (in buffer) should be a multiple of 16.
  * Used for fetching shader constants.
- * If relAddr is true, we'll do an indirect fetch using the address register.
  */
-void brw_dp_READ_4( struct brw_compile *p,
-                    struct brw_reg dest,
-                    GLboolean relAddr,
-                    GLuint location,
-                    GLuint bind_table_index )
+void brw_oword_block_read(struct brw_compile *p,
+			  struct brw_reg dest,
+			  struct brw_reg mrf,
+			  uint32_t offset,
+			  uint32_t bind_table_index)
 {
-   /* XXX: relAddr not implemented */
-   GLuint msg_reg_nr = 1;
-   {
-      struct brw_reg b;
-      brw_push_insn_state(p);
-      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
+   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
 
-   /* Setup MRF[1] with location/offset into const buffer */
-      b = brw_message_reg(msg_reg_nr);
-      b = retype(b, BRW_REGISTER_TYPE_UD);
-      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
-       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
-       */
-      brw_MOV(p, b, brw_imm_ud(location));
-      brw_pop_insn_state(p);
-   }
+   brw_push_insn_state(p);
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
 
-   {
-      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
-   
-      insn->header.predicate_control = BRW_PREDICATE_NONE;
-      insn->header.compression_control = BRW_COMPRESSION_NONE; 
-      insn->header.destreg__conditionalmod = msg_reg_nr;
-      insn->header.mask_control = BRW_MASK_DISABLE;
-  
-      /* cast dest to a uword[8] vector */
-      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+   brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
 
-      brw_set_dest(insn, dest);
-      brw_set_src0(insn, brw_null_reg());
+   /* set message header global offset field (reg 0, element 2) */
+   brw_MOV(p,
+	   retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+			       mrf.nr,
+			       2), BRW_REGISTER_TYPE_UD),
+	   brw_imm_ud(offset));
 
-      brw_set_dp_read_message(p->brw,
-			      insn,
-			      bind_table_index,
-			      0,  /* msg_control (0 means 1 Oword) */
-			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
-			      0, /* source cache = data cache */
-			      1, /* msg_length */
-			      1, /* response_length (1 Oword) */
-			      0); /* eot */
-   }
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   insn->header.destreg__conditionalmod = mrf.nr;
+
+   /* cast dest to a uword[8] vector */
+   dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, brw_null_reg());
+
+   brw_set_dp_read_message(p->brw,
+			   insn,
+			   bind_table_index,
+			   BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+			   BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
+			   0, /* source cache = data cache */
+			   1, /* msg_length */
+			   1, /* response_length (1 reg, 2 owords!) */
+			   0); /* eot */
+
+   brw_pop_insn_state(p);
 }
 
 
+
 /**
  * Read float[4] constant(s) from VS constant buffer.
  * For relative addressing, two float[4] constants will be read into 'dest'.
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index bade5e4..c687fde 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -286,6 +286,7 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
    case GLSL_TYPE_BOOL:
       vec_values = fp->Base.Parameters->ParameterValues[loc];
       for (unsigned int i = 0; i < type->vector_elements; i++) {
+	 assert(c->prog_data.nr_params < ARRAY_SIZE(c->prog_data.param));
 	 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
       }
       return 1;
@@ -2230,7 +2231,8 @@ fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
    brw_MOV(p,
 	   retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
 	   retype(src, BRW_REGISTER_TYPE_UD));
-   brw_oword_block_write(p, brw_message_reg(inst->base_mrf), 1, inst->offset);
+   brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
+				 inst->offset);
 }
 
 void
@@ -2251,8 +2253,39 @@ fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
    if (intel->gen == 4 && !intel->is_g4x)
       brw_MOV(p, brw_null_reg(), dst);
 
-   brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), 1,
-			inst->offset);
+   brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
+				inst->offset);
+
+   if (intel->gen == 4 && !intel->is_g4x) {
+      /* gen4 errata: destination from a send can't be used as a
+       * destination until it's been read.  Just read it so we don't
+       * have to worry.
+       */
+      brw_MOV(p, brw_null_reg(), dst);
+   }
+}
+
+
+void
+fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
+{
+   assert(inst->mlen != 0);
+
+   /* Clear any post destination dependencies that would be ignored by
+    * the block read.  See the B-Spec for pre-gen5 send instruction.
+    *
+    * This could use a better solution, since texture sampling and
+    * math reads could potentially run into it as well -- anywhere
+    * that we have a SEND with a destination that is a register that
+    * was written but not read within the last N instructions (what's
+    * N?  unsure).  This is rare because of dead code elimination, but
+    * not impossible.
+    */
+   if (intel->gen == 4 && !intel->is_g4x)
+      brw_MOV(p, brw_null_reg(), dst);
+
+   brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
+			inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
 
    if (intel->gen == 4 && !intel->is_g4x) {
       /* gen4 errata: destination from a send can't be used as a
@@ -2433,6 +2466,66 @@ fs_visitor::split_virtual_grfs()
    }
 }
 
+/**
+ * Choose accesses from the UNIFORM file to demote to using the pull
+ * constant buffer.
+ *
+ * We allow a fragment shader to have more than the specified minimum
+ * maximum number of fragment shader uniform components (64).  If
+ * there are too many of these, they'd fill up all of register space.
+ * So, this will push some of them out to the pull constant buffer and
+ * update the program to load them.
+ */
+void
+fs_visitor::setup_pull_constants()
+{
+   /* Only allow 16 registers (128 uniform components) as push constants. */
+   unsigned int max_uniform_components = 16 * 8;
+   if (c->prog_data.nr_params <= max_uniform_components)
+      return;
+
+   /* Just demote the end of the list.  We could probably do better
+    * here, demoting things that are rarely used in the program first.
+    */
+   int pull_uniform_base = max_uniform_components;
+   int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      fs_inst *inst = (fs_inst *)iter.get();
+
+      for (int i = 0; i < 3; i++) {
+	 if (inst->src[i].file != UNIFORM)
+	    continue;
+
+	 int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+	 if (uniform_nr < pull_uniform_base)
+	    continue;
+
+	 fs_reg dst = fs_reg(this, glsl_type::float_type);
+	 fs_inst *pull = new(mem_ctx) fs_inst(FS_OPCODE_PULL_CONSTANT_LOAD,
+					      dst);
+	 pull->offset = ((uniform_nr - pull_uniform_base) * 4) & ~15;
+	 pull->ir = inst->ir;
+	 pull->annotation = inst->annotation;
+	 pull->base_mrf = 14;
+	 pull->mlen = 1;
+
+	 inst->insert_before(pull);
+
+	 inst->src[i].file = GRF;
+	 inst->src[i].reg = dst.reg;
+	 inst->src[i].reg_offset = 0;
+	 inst->src[i].smear = (uniform_nr - pull_uniform_base) & 3;
+      }
+   }
+
+   for (int i = 0; i < pull_uniform_count; i++) {
+      c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i];
+   }
+   c->prog_data.nr_params -= pull_uniform_count;
+   c->prog_data.nr_pull_params = pull_uniform_count;
+}
+
 void
 fs_visitor::calculate_live_intervals()
 {
@@ -2721,6 +2814,7 @@ fs_visitor::register_coalesce()
 	       scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
 	       scan_inst->src[i].abs |= inst->src[0].abs;
 	       scan_inst->src[i].negate ^= inst->src[0].negate;
+	       scan_inst->src[i].smear = inst->src[0].smear;
 	    }
 	 }
       }
@@ -2749,7 +2843,7 @@ fs_visitor::compute_to_mrf()
 	  inst->predicated ||
 	  inst->dst.file != MRF || inst->src[0].file != GRF ||
 	  inst->dst.type != inst->src[0].type ||
-	  inst->src[0].abs || inst->src[0].negate)
+	  inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1)
 	 continue;
 
       /* Can't compute-to-MRF this GRF if someone else was going to
@@ -2897,8 +2991,13 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
    case GRF:
    case ARF:
    case MRF:
-      brw_reg = brw_vec8_reg(reg->file,
-			    reg->hw_reg, 0);
+      if (reg->smear == -1) {
+	 brw_reg = brw_vec8_reg(reg->file,
+				reg->hw_reg, 0);
+      } else {
+	 brw_reg = brw_vec1_reg(reg->file,
+				reg->hw_reg, reg->smear);
+      }
       brw_reg = retype(brw_reg, reg->type);
       break;
    case IMM:
@@ -3136,6 +3235,10 @@ fs_visitor::generate_code()
 	 generate_unspill(inst, dst);
 	 break;
 
+      case FS_OPCODE_PULL_CONSTANT_LOAD:
+	 generate_pull_constant_load(inst, dst);
+	 break;
+
       case FS_OPCODE_FB_WRITE:
 	 generate_fb_write(inst);
 	 break;
@@ -3221,6 +3324,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
       v.emit_fb_writes();
 
       v.split_virtual_grfs();
+      v.setup_pull_constants();
 
       v.assign_curb_setup();
       v.assign_urb_setup();
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index b571c23..9b7fcde 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -76,6 +76,7 @@ enum fs_opcodes {
    FS_OPCODE_DISCARD_AND,
    FS_OPCODE_SPILL,
    FS_OPCODE_UNSPILL,
+   FS_OPCODE_PULL_CONSTANT_LOAD,
 };
 
 
@@ -100,6 +101,7 @@ public:
       this->negate = 0;
       this->abs = 0;
       this->hw_reg = -1;
+      this->smear = -1;
    }
 
    /** Generic unset register constructor. */
@@ -162,6 +164,7 @@ public:
    bool negate;
    bool abs;
    struct brw_reg fixed_hw_reg;
+   int smear; /* -1, or a channel of the reg to smear to all channels. */
 
    /** Value for file == BRW_IMMMEDIATE_FILE */
    union {
@@ -366,6 +369,7 @@ public:
    int choose_spill_reg(struct ra_graph *g);
    void spill_reg(int spill_reg);
    void split_virtual_grfs();
+   void setup_pull_constants();
    void calculate_live_intervals();
    bool propagate_constants();
    bool register_coalesce();
@@ -384,6 +388,7 @@ public:
    void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
    void generate_spill(fs_inst *inst, struct brw_reg src);
    void generate_unspill(fs_inst *inst, struct brw_reg dst);
+   void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst);
 
    void emit_dummy_fs();
    fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index b5bfd00..d7acc30 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -401,6 +401,7 @@ fs_visitor::spill_reg(int spill_reg)
 	 spill_src.reg_offset = 0;
 	 spill_src.abs = false;
 	 spill_src.negate = false;
+	 spill_src.smear = -1;
 
 	 for (int chan = 0; chan < size; chan++) {
 	    fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 88bc64e..d06c49f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1576,7 +1576,7 @@ static void emit_spill( struct brw_wm_compile *c,
      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
    */
-   brw_oword_block_write(p, brw_message_reg(1), 2, slot);
+   brw_oword_block_write_scratch(p, brw_message_reg(1), 2, slot);
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 55aceea..d325f85 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -307,21 +307,20 @@ static void prealloc_reg(struct brw_wm_compile *c)
 
         /* use a real constant buffer, or just use a section of the GRF? */
         /* XXX this heuristic may need adjustment... */
-        if ((nr_params + nr_temps) * 4 + reg_index > 80)
-           c->fp->use_const_buffer = GL_TRUE;
-        else
-           c->fp->use_const_buffer = GL_FALSE;
-        /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
+        if ((nr_params + nr_temps) * 4 + reg_index > 80) {
+	   for (i = 0; i < nr_params; i++) {
+	      float *pv = c->fp->program.Base.Parameters->ParameterValues[i];
+	      for (j = 0; j < 4; j++) {
+		 c->prog_data.pull_param[c->prog_data.nr_pull_params] = &pv[j];
+		 c->prog_data.nr_pull_params++;
+	      }
+	   }
 
-        if (c->fp->use_const_buffer) {
-           /* We'll use a real constant buffer and fetch constants from
-            * it with a dataport read message.
-            */
+	   c->prog_data.nr_params = 0;
+	}
+        /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
 
-           /* number of float constants in CURBE */
-           c->prog_data.nr_params = 0;
-        }
-        else {
+        if (!c->prog_data.nr_pull_params) {
            const struct gl_program_parameter_list *plist = 
               c->fp->program.Base.Parameters;
            int index = 0;
@@ -463,7 +462,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
      * They'll be found in these registers.
      * XXX alloc these on demand!
      */
-    if (c->fp->use_const_buffer) {
+    if (c->prog_data.nr_pull_params) {
        for (i = 0; i < 3; i++) {
           c->current_const[i].index = -1;
           c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
@@ -501,12 +500,11 @@ static void fetch_constants(struct brw_wm_compile *c,
 #endif
 
 	 /* need to fetch the constant now */
-	 brw_dp_READ_4(p,
-		       c->current_const[i].reg,  /* writeback dest */
-		       src->RelAddr,             /* relative indexing? */
-		       16 * src->Index,          /* byte offset */
-		       SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
-		       );
+	 brw_oword_block_read(p,
+			      c->current_const[i].reg,
+			      brw_message_reg(1),
+			      16 * src->Index,
+			      SURF_INDEX_FRAG_CONST_BUFFER);
       }
    }
 }
@@ -606,7 +604,7 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
        }
     }
 
-    if (c->fp->use_const_buffer &&
+    if (c->prog_data.nr_pull_params &&
         (src->File == PROGRAM_STATE_VAR ||
          src->File == PROGRAM_CONSTANT ||
          src->File == PROGRAM_UNIFORM)) {
@@ -729,7 +727,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 #endif
 
         /* fetch any constants that this instruction needs */
-        if (c->fp->use_const_buffer)
+        if (c->prog_data.nr_pull_params)
            fetch_constants(c, inst);
 
 	if (inst->Opcode != OPCODE_ARL) {
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 5588702..dd5ddea 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -319,13 +319,14 @@ prepare_wm_constants(struct brw_context *brw)
    struct intel_context *intel = &brw->intel;
    struct brw_fragment_program *fp =
       (struct brw_fragment_program *) brw->fragment_program;
-   const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
-   const int size = params->NumParameters * 4 * sizeof(GLfloat);
+   const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
+   float *constants;
+   unsigned int i;
 
    _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
 
    /* BRW_NEW_FRAGMENT_PROGRAM */
-   if (!fp->use_const_buffer) {
+   if (brw->wm.prog_data->nr_pull_params == 0) {
       if (brw->wm.const_bo) {
 	 drm_intel_bo_unreference(brw->wm.const_bo);
 	 brw->wm.const_bo = NULL;
@@ -335,11 +336,18 @@ prepare_wm_constants(struct brw_context *brw)
    }
 
    drm_intel_bo_unreference(brw->wm.const_bo);
-   brw->wm.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+   brw->wm.const_bo = drm_intel_bo_alloc(intel->bufmgr, "WM const bo",
 					 size, 64);
 
    /* _NEW_PROGRAM_CONSTANTS */
-   drm_intel_bo_subdata(brw->wm.const_bo, 0, size, params->ParameterValues);
+   drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
+   constants = brw->wm.const_bo->virtual;
+   for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
+      constants[i] = *brw->wm.prog_data->pull_param[i];
+   }
+   drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
+
+   brw->state.dirty.brw |= BRW_NEW_WM_CONSTBUF;
 }
 
 const struct brw_tracked_state brw_wm_constants = {
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 21059be..ce489f0 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -87,7 +87,7 @@ upload_wm_state(struct brw_context *brw)
       brw_fragment_program_const(brw->fragment_program);
    uint32_t dw2, dw4, dw5, dw6;
 
-   if (fp->use_const_buffer || brw->wm.prog_data->nr_params == 0) {
+   if (brw->wm.prog_data->nr_params == 0) {
       /* Disable the push constant buffers. */
       BEGIN_BATCH(5);
       OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2));




More information about the mesa-commit mailing list