Mesa (master): i965: checkpoint commit: VS constant buffers

Brian Paul brianp at kemper.freedesktop.org
Tue Apr 14 17:11:49 UTC 2009


Module: Mesa
Branch: master
Commit: cafea7528052624c8d3e4cd1c5b26a61bf04d1d0
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=cafea7528052624c8d3e4cd1c5b26a61bf04d1d0

Author: Brian Paul <brianp at vmware.com>
Date:   Tue Apr 14 11:08:42 2009 -0600

i965: checkpoint commit: VS constant buffers

Hook up a constant buffer, binding table, etc for the VS unit.
This will allow using large constant buffers with vertex shaders.
The new code is disabled at this time (use_const_buffer=FALSE).

---

 src/mesa/drivers/dri/i965/brw_context.h          |   30 +++-
 src/mesa/drivers/dri/i965/brw_curbe.c            |    2 +
 src/mesa/drivers/dri/i965/brw_eu.h               |   11 +-
 src/mesa/drivers/dri/i965/brw_eu_emit.c          |   66 ++++++-
 src/mesa/drivers/dri/i965/brw_misc_state.c       |    7 +-
 src/mesa/drivers/dri/i965/brw_vs.h               |    7 +
 src/mesa/drivers/dri/i965/brw_vs_emit.c          |  229 ++++++++++++++++++----
 src/mesa/drivers/dri/i965/brw_vs_state.c         |    8 +
 src/mesa/drivers/dri/i965/brw_vtbl.c             |    1 +
 src/mesa/drivers/dri/i965/brw_wm_state.c         |    2 +-
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  205 ++++++++++++++++---
 11 files changed, 477 insertions(+), 91 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 6a9252d..4c2d3af 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -141,7 +141,8 @@ struct brw_context;
 #define BRW_NEW_BATCH			0x10000
 /** brw->depth_region updated */
 #define BRW_NEW_DEPTH_BUFFER		0x20000
-#define BRW_NEW_NR_SURFACES		0x40000
+#define BRW_NEW_NR_WM_SURFACES		0x40000
+#define BRW_NEW_NR_VS_SURFACES		0x80000
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
@@ -245,20 +246,30 @@ struct brw_vs_ouput_sizes {
 #define BRW_MAX_TEX_UNIT 16
 
 /**
- * Size of our surface binding table.
+ * Size of our surface binding table for the WM.
  * This contains pointers to the drawing surfaces and current texture
  * objects and shader constant buffers (+2).
  */
-#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2)
+#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
 
 /**
  * Helpers to convert drawing buffers, textures and constant buffers
- * to surface binding table indexes.
+ * to surface binding table indexes, for WM.
  */
 #define SURF_INDEX_DRAW(d)           (d)
-#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS + 0) 
-#define SURF_INDEX_VERT_CONST_BUFFER (MAX_DRAW_BUFFERS + 1)
-#define SURF_INDEX_TEXTURE(t)        (MAX_DRAW_BUFFERS + 2 + t)
+#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) 
+#define SURF_INDEX_TEXTURE(t)        (MAX_DRAW_BUFFERS + 1 + (t))
+
+/**
+ * Size of surface binding table for the VS.
+ * Only one constant buffer for now.
+ */
+#define BRW_VS_MAX_SURF 1
+
+/**
+ * Only a VS constant buffer
+ */
+#define SURF_INDEX_VERT_CONST_BUFFER 0
 
 
 enum brw_cache_id {
@@ -566,6 +577,11 @@ struct brw_context
 
       dri_bo *prog_bo;
       dri_bo *state_bo;
+
+      /** Binding table of pointers to surf_bo entries */
+      dri_bo *bind_bo;
+      dri_bo *surf_bo[BRW_VS_MAX_SURF];
+      GLuint nr_surfaces;      
    } vs;
 
    struct {
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 94bf2c0..dfab14a 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -357,6 +357,7 @@ update_constant_buffer(struct brw_context *brw,
 }
 
 
+/** Copy current vertex program's parameters into the constant buffer */
 static void
 update_vertex_constant_buffer(struct brw_context *brw)
 {
@@ -366,6 +367,7 @@ update_vertex_constant_buffer(struct brw_context *brw)
 }
 
 
+/** Copy current fragment program's parameters into the constant buffer */
 static void
 update_fragment_constant_buffer(struct brw_context *brw)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index d05f2e6..e492ce1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -862,9 +862,18 @@ void brw_dp_READ_4( struct brw_compile *p,
                     struct brw_reg dest,
                     GLuint msg_reg_nr,
                     GLboolean relAddr,
-                    GLuint scratch_offset,
+                    GLuint location,
                     GLuint bind_table_index );
 
+/* XXX this function is temporary - merge with brw_dp_READ_4() above. */
+void brw_dp_READ_4_vs( struct brw_compile *p,
+                       struct brw_reg dest,
+                       struct brw_reg src,
+                       GLuint msg_reg_nr,
+                       GLboolean relAddr,
+                       GLuint location,
+                       GLuint bind_table_index );
+
 void brw_dp_WRITE_16( struct brw_compile *p,
 		      struct brw_reg src,
 		      GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index ec4d7fa..bb7ea5c 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -952,7 +952,7 @@ void brw_dp_READ_16( struct brw_compile *p,
 
 /**
  * Read a float[4] vector from the data port Data Cache (const buffer).
- * Scratch offset should be a multiple of 16.
+ * Location (in buffer) should be a multiple of 16.
  * Used for fetching shader constants.
  * If relAddr is true, we'll do an indirect fetch using the address register.
  */
@@ -960,7 +960,7 @@ void brw_dp_READ_4( struct brw_compile *p,
                     struct brw_reg dest,
                     GLuint msg_reg_nr,
                     GLboolean relAddr,
-                    GLuint scratch_offset,
+                    GLuint location,
                     GLuint bind_table_index )
 {
    {
@@ -971,7 +971,7 @@ void brw_dp_READ_4( struct brw_compile *p,
       /* set message header global offset field (reg 0, element 2) */
       brw_MOV(p,
 	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
-	      brw_imm_d(scratch_offset));
+	      brw_imm_d(location));
       brw_pop_insn_state(p);
    }
 
@@ -1001,6 +1001,66 @@ void brw_dp_READ_4( struct brw_compile *p,
 }
 
 
+/* XXX this function is temporary - merge with brw_dp_READ_4() above. */
+void brw_dp_READ_4_vs(struct brw_compile *p,
+                      struct brw_reg dest,
+                      struct brw_reg src,
+                      GLuint msg_reg_nr,
+                      GLboolean relAddr,
+                      GLuint location,
+                      GLuint bind_table_index)
+{
+   {
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+      /*src.nr = 0;*/
+
+      /* set message header global offset field (reg 0, element 2) */
+      brw_MOV(p,
+#if 1
+	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+#elif 0
+	      retype(brw_vec1_grf(src.nr, 2), BRW_REGISTER_TYPE_UD),
+#endif
+	      brw_imm_d(location));
+
+      brw_pop_insn_state(p);
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = BRW_PREDICATE_NONE;
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditonalmod = msg_reg_nr;
+      insn->header.mask_control = BRW_MASK_DISABLE;
+  
+      /* cast dest to a uword[8] vector */
+      //      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+      brw_set_dest(insn, dest);
+#if 1
+      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+#elif 0
+      brw_set_src0(insn, retype(brw_vec8_grf(src.nr, 0), BRW_REGISTER_TYPE_UW));
+#endif
+
+      printf("vs const read msg, location %u, msg_reg_nr %d\n", location, msg_reg_nr);
+      brw_set_dp_read_message(insn,
+			      bind_table_index,
+			      0,  /* msg_control (0 means 1 Oword) */
+			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+			      0, /* source cache = data cache */
+			      1, /* msg_length */
+			      1, /* response_length (1 Oword) */
+			      0); /* eot */
+   }
+}
+
+
+
 void brw_fb_WRITE(struct brw_compile *p,
                   struct brw_reg dest,
                   GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 5c94a49..9bc5c35 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -101,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = {
 
 static void prepare_binding_table_pointers(struct brw_context *brw)
 {
+   brw_add_validated_bo(brw, brw->vs.bind_bo);
    brw_add_validated_bo(brw, brw->wm.bind_bo);
 }
 
@@ -117,13 +118,11 @@ static void upload_binding_table_pointers(struct brw_context *brw)
 
    BEGIN_BATCH(6, IGNORE_CLIPRECTS);
    OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
-   OUT_BATCH(0); /* vs */
+   OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
    OUT_BATCH(0); /* gs */
    OUT_BATCH(0); /* clip */
    OUT_BATCH(0); /* sf */
-   OUT_RELOC(brw->wm.bind_bo,
-	     I915_GEM_DOMAIN_SAMPLER, 0,
-	     0);
+   OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
    ADVANCE_BATCH();
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 99d0e93..d20cf78 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -75,6 +75,13 @@ struct brw_vs_compile {
 
    struct brw_reg userplane[6];
 
+   /** using a real constant buffer? */
+   GLboolean use_const_buffer;
+   /** we may need up to 3 constants per instruction (if use_const_buffer) */
+   struct {
+      GLint index;
+      struct brw_reg reg;
+   } current_const[3];
 };
 
 void brw_vs_emit( struct brw_vs_compile *c );
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 0d6c6ab..d21f279 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -38,8 +38,31 @@
 #include "brw_vs.h"
 
 
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+   struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+   if (++c->last_tmp > c->prog_data.total_grf)
+      c->prog_data.total_grf = c->last_tmp;
 
-/* Do things as simply as possible.  Allocate and populate all regs
+   return tmp;
+}
+
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+   if (tmp.nr == c->last_tmp-1)
+      c->last_tmp--;
+}
+			       
+static void release_tmps( struct brw_vs_compile *c )
+{
+   c->last_tmp = c->first_tmp;
+}
+
+
+/**
+ * Preallocate GRF register before code emit.
+ * Do things as simply as possible.  Allocate and populate all regs
  * ahead of time.
  */
 static void brw_vs_alloc_regs( struct brw_vs_compile *c )
@@ -47,6 +70,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    GLuint i, reg = 0, mrf;
    GLuint nr_params;
 
+#if 0
+   if (c->vp->program.Base.Parameters->NumParameters >= 6)
+      c->use_const_buffer = 1;
+   else
+#endif
+      c->use_const_buffer = GL_FALSE;
+   /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/
+
    /* r0 -- reserved as usual
     */
    c->r0 = brw_vec8_grf(reg, 0);
@@ -66,13 +97,19 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 
    /* Vertex program parameters from curbe:
     */
-   nr_params = c->vp->program.Base.Parameters->NumParameters;
-   for (i = 0; i < nr_params; i++) {
-      c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
-   }     
-   reg += (nr_params + 1) / 2;
-
-   c->prog_data.curb_read_length = reg - 1;
+   if (c->use_const_buffer) {
+      /* get constants from a real constant buffer */
+      c->prog_data.curb_read_length = 0;
+   }
+   else {
+      /* use a section of the GRF for constants */
+      nr_params = c->vp->program.Base.Parameters->NumParameters;
+      for (i = 0; i < nr_params; i++) {
+         c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+      }
+      reg += (nr_params + 1) / 2;
+      c->prog_data.curb_read_length = reg - 1;
+   }
 
    /* Allocate input regs:  
     */
@@ -157,6 +194,13 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4;
    c->prog_data.total_grf = reg;
 
+   if (c->use_const_buffer) {
+       for (i = 0; i < 3; i++) {
+          c->current_const[i].index = -1;
+          c->current_const[i].reg = get_tmp(c);
+       }
+   }
+
    if (INTEL_DEBUG & DEBUG_VS) {
       _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
       _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
@@ -165,28 +209,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 }
 
 
-static struct brw_reg get_tmp( struct brw_vs_compile *c )
-{
-   struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
-
-   if (++c->last_tmp > c->prog_data.total_grf)
-      c->prog_data.total_grf = c->last_tmp;
-
-   return tmp;
-}
-
-static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
-{
-   if (tmp.nr == c->last_tmp-1)
-      c->last_tmp--;
-}
-			       
-static void release_tmps( struct brw_vs_compile *c )
-{
-   c->last_tmp = c->first_tmp;
-}
-
-
 /**
  * If an instruction uses a temp reg both as a src and the dest, we
  * sometimes need to allocate an intermediate temporary.
@@ -673,13 +695,59 @@ static void emit_nrm( struct brw_vs_compile *c,
 }
 
 
+static struct brw_reg
+get_constant(struct brw_vs_compile *c,
+             const struct prog_instruction *inst,
+             GLuint argIndex)
+{
+   const struct prog_src_register *src = &inst->SrcReg[argIndex];
+   struct brw_compile *p = &c->func;
+   struct brw_reg const_reg;
+
+   if (c->current_const[argIndex].index != src->Index) {
+      struct brw_reg src_reg = get_tmp(c);
+      struct brw_reg t = get_tmp(c);
+
+      c->current_const[argIndex].index = src->Index;
+
+      brw_MOV(p, t, brw_vec8_grf(0, 0));/*SAVE*/
+
+#if 0
+      printf("  fetch const[%d] for arg %d into reg %d\n",
+             src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+
+      /* need to fetch the constant now */
+      brw_dp_READ_4_vs(p,
+                       c->current_const[argIndex].reg, /* writeback dest */
+                       src_reg,                        /* src reg */
+                       1,                              /* msg_reg */
+                       src->RelAddr,                   /* relative indexing? */
+                       16 * src->Index,                /* byte offset */
+                       SURF_INDEX_VERT_CONST_BUFFER    /* binding table index */
+                       );
+
+      brw_MOV(p, brw_vec8_grf(0, 0), t);/*RESTORE*/
+      release_tmp(c, src_reg);
+      release_tmp(c, t);
+   }
+
+   /* replicate lower four floats into upper four floats (to get XYZWXYZW) */
+   const_reg = c->current_const[argIndex].reg;
+   const_reg = stride(const_reg, 0, 4, 0);
+   const_reg.subnr = 0;
+
+   return const_reg;
+}
+
+
+
 /* TODO: relative addressing!
  */
 static struct brw_reg get_reg( struct brw_vs_compile *c,
 			       gl_register_file file,
 			       GLuint index )
 {
-
    switch (file) {
    case PROGRAM_TEMPORARY:
    case PROGRAM_INPUT:
@@ -708,13 +776,63 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
 }
 
 
+/**
+ * Get brw reg corresponding to the instruction's [argIndex] src reg.
+ * TODO: relative addressing!
+ */
+static struct brw_reg
+get_src_reg( struct brw_vs_compile *c,
+             const struct prog_instruction *inst,
+             GLuint argIndex )
+{
+   const GLuint file = inst->SrcReg[argIndex].File;
+   const GLint index = inst->SrcReg[argIndex].Index;
+
+   switch (file) {
+   case PROGRAM_TEMPORARY:
+   case PROGRAM_INPUT:
+   case PROGRAM_OUTPUT:
+      assert(c->regs[file][index].nr != 0);
+      return c->regs[file][index];
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_CONSTANT:
+   case PROGRAM_UNIFORM:
+      if (c->use_const_buffer) {
+         return get_constant(c, inst, argIndex);
+      }
+      else {
+         assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+         return c->regs[PROGRAM_STATE_VAR][index];
+      }
+   case PROGRAM_ADDRESS:
+      assert(index == 0);
+      return c->regs[file][index];
+
+   case PROGRAM_UNDEFINED:
+      /* this is a normal case since we loop over all three src args */
+      return brw_null_reg();
+
+   case PROGRAM_LOCAL_PARAM: 
+   case PROGRAM_ENV_PARAM: 
+   case PROGRAM_WRITE_ONLY:
+   default:
+      assert(0);
+      return brw_null_reg();
+   }
+}
+
+
+/**
+ * Indirect addressing:  get reg[[arg] + offset].
+ */
 static struct brw_reg deref( struct brw_vs_compile *c,
 			     struct brw_reg arg,
 			     GLint offset)
 {
    struct brw_compile *p = &c->func;
    struct brw_reg tmp = vec4(get_tmp(c));
-   struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
+   struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+   struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
    GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
    struct brw_reg indirect = brw_vec4_indirect(0,0);
 
@@ -758,22 +876,29 @@ static void emit_arl( struct brw_vs_compile *c,
 }
 
 
-/* Will return mangled results for SWZ op.  The emit_swz() function
+/**
+ * Return the brw reg for the given instruction's src argument.
+ * Will return mangled results for SWZ op.  The emit_swz() function
  * ignores this result and recalculates taking extended swizzles into
  * account.
  */
 static struct brw_reg get_arg( struct brw_vs_compile *c,
-			       struct prog_src_register *src )
+                               const struct prog_instruction *inst,
+                               GLuint argIndex )
 {
+   const struct prog_src_register *src = &inst->SrcReg[argIndex];
    struct brw_reg reg;
 
    if (src->File == PROGRAM_UNDEFINED)
       return brw_null_reg();
 
-   if (src->RelAddr) 
+   if (src->RelAddr) {
+      /* XXX fix */
       reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
-   else
-      reg = get_reg(c, src->File, src->Index);
+   }
+   else {
+      reg = get_src_reg(c, inst, argIndex);
+   }
 
    /* Convert 3-bit swizzle to 2-bit.  
     */
@@ -790,10 +915,28 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
 }
 
 
+/**
+ * Get brw register for the given program dest register.
+ */
 static struct brw_reg get_dst( struct brw_vs_compile *c,
 			       struct prog_dst_register dst )
 {
-   struct brw_reg reg = get_reg(c, dst.File, dst.Index);
+   struct brw_reg reg;
+
+   switch (dst.File) {
+   case PROGRAM_TEMPORARY:
+   case PROGRAM_OUTPUT:
+      assert(c->regs[dst.File][dst.Index].nr != 0);
+      reg = c->regs[dst.File][dst.Index];
+      break;
+   case PROGRAM_UNDEFINED:
+      /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
+      reg = brw_null_reg();
+      break;
+   default:
+      assert(0);
+      reg = brw_null_reg();
+   }
 
    reg.dw1.bits.writemask = dst.WriteMask;
 
@@ -803,8 +946,10 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
 
 static void emit_swz( struct brw_vs_compile *c, 
 		      struct brw_reg dst,
-		      struct prog_src_register src )
+                      const struct prog_instruction *inst)
 {
+   const GLuint argIndex = 0;
+   const struct prog_src_register src = inst->SrcReg[argIndex];
    struct brw_compile *p = &c->func;
    GLuint zeros_mask = 0;
    GLuint ones_mask = 0;
@@ -847,7 +992,7 @@ static void emit_swz( struct brw_vs_compile *c,
       if (src.RelAddr) 
 	 arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
       else
-	 arg0 = get_reg(c, src.File, src.Index);
+	 arg0 = get_src_reg(c, inst, argIndex);
 
       arg0 = brw_swizzle(arg0, 
 			 src_swz[0], src_swz[1], 
@@ -1053,7 +1198,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
 	      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
 		  args[i] = c->output_regs[index].reg;
 	      else
-		  args[i] = get_arg(c, src);
+                  args[i] = get_arg(c, inst, i);
 	  }
 
       /* Get dest regs.  Note that it is possible for a reg to be both
@@ -1181,7 +1326,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
 	 /* The args[0] value can't be used here as it won't have
 	  * correctly encoded the full swizzle:
 	  */
-	 emit_swz(c, dst, inst->SrcReg[0] );
+	 emit_swz(c, dst, inst);
 	 break;
       case OPCODE_TRUNC:
          /* round toward zero */
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index 1a63766..3d29538 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -44,6 +44,8 @@ struct brw_vs_unit_key {
    unsigned int curbe_offset;
 
    unsigned int nr_urb_entries, urb_size;
+
+   unsigned int nr_surfaces;
 };
 
 static void
@@ -62,6 +64,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
    key->nr_urb_entries = brw->urb.nr_vs_entries;
    key->urb_size = brw->urb.vsize;
 
+   /* BRW_NEW_NR_VS_SURFACES */
+   key->nr_surfaces = brw->vs.nr_surfaces;
+
    /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
    if (ctx->Transform.ClipPlanesEnabled) {
       /* Note that we read in the userclip planes as well, hence
@@ -92,6 +97,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
     * brw_urb_WRITE() results.
     */
    vs.thread1.single_program_flow = 0;
+   vs.thread1.binding_table_entry_count = key->nr_surfaces;
+
    vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
    vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
    vs.thread3.dispatch_grf_start_reg = 1;
@@ -158,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM,
       .brw   = (BRW_NEW_CURBE_OFFSETS |
+                BRW_NEW_NR_VS_SURFACES |
 		BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_VS_PROG
    },
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 960bbb3..ba03afd 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -79,6 +79,7 @@ static void brw_destroy_context( struct intel_context *intel )
    dri_bo_release(&brw->curbe.curbe_bo);
    dri_bo_release(&brw->vs.prog_bo);
    dri_bo_release(&brw->vs.state_bo);
+   dri_bo_release(&brw->vs.bind_bo);
    dri_bo_release(&brw->gs.prog_bo);
    dri_bo_release(&brw->gs.state_bo);
    dri_bo_release(&brw->clip.prog_bo);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 58fa6aa..67b4117 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -290,7 +290,7 @@ const struct brw_tracked_state brw_wm_unit = {
 
       .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
 	      BRW_NEW_CURBE_OFFSETS |
-	      BRW_NEW_NR_SURFACES),
+	      BRW_NEW_NR_WM_SURFACES),
 
       .cache = (CACHE_NEW_WM_PROG |
 		CACHE_NEW_SAMPLER)
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 095759f..ce5dbb3 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -176,7 +176,11 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
    }
 }
 
-struct brw_wm_surface_key {
+
+/**
+ * Use same key for WM and VS surfaces.
+ */
+struct brw_surface_key {
    GLenum target, depthmode;
    dri_bo *bo;
    GLint format, internal_format;
@@ -187,6 +191,7 @@ struct brw_wm_surface_key {
    GLuint offset;
 };
 
+
 static void
 brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
 {
@@ -208,7 +213,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
 
 static dri_bo *
 brw_create_texture_surface( struct brw_context *brw,
-			    struct brw_wm_surface_key *key )
+			    struct brw_surface_key *key )
 {
    struct brw_surface_state surf;
    dri_bo *bo;
@@ -287,7 +292,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
    struct intel_texture_object *intelObj = intel_texture_object(tObj);
    struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
-   struct brw_wm_surface_key key;
+   struct brw_surface_key key;
    const GLuint surf = SURF_INDEX_TEXTURE(unit);
 
    memset(&key, 0, sizeof(key));
@@ -328,12 +333,12 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
 
 
 /**
- * Create the constant buffer surface.  Fragment shader constanst will be
+ * Create the constant buffer surface.  Vertex/fragment shader constants will be
  * read from this buffer with Data Port Read instructions/messages.
  */
 static dri_bo *
 brw_create_constant_surface( struct brw_context *brw,
-                             struct brw_wm_surface_key *key )
+                             struct brw_surface_key *key )
 {
    const GLint w = key->width - 1;
    struct brw_surface_state surf;
@@ -345,8 +350,6 @@ brw_create_constant_surface( struct brw_context *brw,
    surf.ss0.surface_type = BRW_SURFACE_BUFFER;
    surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 
-   /* This is ok for all textures with channel width 8bit or less:
-    */
    assert(key->bo);
    if (key->bo)
       surf.ss1.base_addr = key->bo->offset; /* reloc */
@@ -356,8 +359,8 @@ brw_create_constant_surface( struct brw_context *brw,
    surf.ss2.width = w & 0x7f;            /* bits 6:0 of size or width */
    surf.ss2.height = (w >> 7) & 0x1fff;  /* bits 19:7 of size or width */
    surf.ss3.depth = (w >> 20) & 0x7f;    /* bits 26:20 of size or width */
-   surf.ss3.pitch = (key->pitch * key->cpp) - 1;
-   brw_set_surface_tiling(&surf, key->tiling);
+   surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
+   brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
  
    bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
 			 key, sizeof(*key),
@@ -379,17 +382,17 @@ brw_create_constant_surface( struct brw_context *brw,
 
 
 /**
- * Update the surface state for a constant buffer.
+ * Update the surface state for a WM constant buffer.
  * The constant buffer will be (re)allocated here if needed.
  */
 static dri_bo *
-brw_update_constant_surface( GLcontext *ctx,
-                             GLuint surf,
-                             dri_bo *const_buffer,
-                             const struct gl_program_parameter_list *params)
+brw_update_wm_constant_surface( GLcontext *ctx,
+                                GLuint surf,
+                                dri_bo *const_buffer,
+                                const struct gl_program_parameter_list *params)
 {
    struct brw_context *brw = brw_context(ctx);
-   struct brw_wm_surface_key key;
+   struct brw_surface_key key;
    struct intel_context *intel = &brw->intel;
    const int size = params->NumParameters * 4 * sizeof(GLfloat);
 
@@ -402,7 +405,7 @@ brw_update_constant_surface( GLcontext *ctx,
    /* alloc new buffer if needed */
    if (!const_buffer) {
       const_buffer =
-         drm_intel_bo_alloc(intel->bufmgr, "vp/fp_const_buffer", size, 64);
+         drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64);
    }
 
    memset(&key, 0, sizeof(key));
@@ -437,6 +440,66 @@ brw_update_constant_surface( GLcontext *ctx,
 
 
 /**
+ * Update the surface state for a VS constant buffer.
+ * The constant buffer will be (re)allocated here if needed.
+ */
+static dri_bo *
+brw_update_vs_constant_surface( GLcontext *ctx,
+                                GLuint surf,
+                                dri_bo *const_buffer,
+                                const struct gl_program_parameter_list *params)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_surface_key key;
+   struct intel_context *intel = &brw->intel;
+   const int size = params->NumParameters * 4 * sizeof(GLfloat);
+
+   assert(surf == 0);
+
+   /* free old const buffer if too small */
+   if (const_buffer && const_buffer->size < size) {
+      dri_bo_unreference(const_buffer);
+      const_buffer = NULL;
+   }
+
+   /* alloc new buffer if needed */
+   if (!const_buffer) {
+      const_buffer =
+         drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);
+   }
+
+   memset(&key, 0, sizeof(key));
+
+   key.format = MESA_FORMAT_RGBA_FLOAT32;
+   key.internal_format = GL_RGBA;
+   key.bo = const_buffer;
+   key.depthmode = GL_NONE;
+   key.pitch = params->NumParameters;
+   key.width = params->NumParameters;
+   key.height = 1;
+   key.depth = 1;
+   key.cpp = 16;
+
+   /*
+   printf("%s:\n", __FUNCTION__);
+   printf("  width %d  height %d  depth %d  cpp %d  pitch %d\n",
+          key.width, key.height, key.depth, key.cpp, key.pitch);
+   */
+
+   dri_bo_unreference(brw->vs.surf_bo[surf]);
+   brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+                                            &key, sizeof(key),
+                                            &key.bo, key.bo ? 1 : 0,
+                                            NULL);
+   if (brw->vs.surf_bo[surf] == NULL) {
+      brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+   }
+
+   return const_buffer;
+}
+
+
+/**
  * Sets up a surface state structure to point at the given region.
  * While it is only used for the front/back buffer currently, it should be
  * usable for further buffers when doing ARB_draw_buffer support.
@@ -515,7 +578,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
 
       /* Key size will never match key size for textures, so we're safe. */
       brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
-					      &key, sizeof(key),
+                                               &key, sizeof(key),
 					       &region_bo, 1,
 					       &surf, sizeof(surf),
 					       NULL, NULL);
@@ -544,6 +607,8 @@ brw_wm_get_binding_table(struct brw_context *brw)
 {
    dri_bo *bind_bo;
 
+   assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
+
    bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
 			      NULL, 0,
 			      brw->wm.surf_bo, brw->wm.nr_surfaces,
@@ -603,25 +668,13 @@ static void prepare_wm_surfaces(struct brw_context *brw )
    old_nr_surfaces = brw->wm.nr_surfaces;
    brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
 
-   /* Update surface / buffer for vertex shader constant buffer */
-   {
-      const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER;
-      struct brw_vertex_program *vp =
-         (struct brw_vertex_program *) brw->vertex_program;
-      vp->const_buffer =
-         brw_update_constant_surface(ctx, surf, vp->const_buffer,
-                                     vp->program.Base.Parameters);
-
-      brw->wm.nr_surfaces = surf + 1;
-   }
-
    /* Update surface / buffer for fragment shader constant buffer */
    {
       const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
       struct brw_fragment_program *fp =
          (struct brw_fragment_program *) brw->fragment_program;
       fp->const_buffer =
-         brw_update_constant_surface(ctx, surf, fp->const_buffer,
+         brw_update_wm_constant_surface(ctx, surf, fp->const_buffer,
                                      fp->program.Base.Parameters);
 
       brw->wm.nr_surfaces = surf + 1;
@@ -655,17 +708,103 @@ static void prepare_wm_surfaces(struct brw_context *brw )
    brw->wm.bind_bo = brw_wm_get_binding_table(brw);
 
    if (brw->wm.nr_surfaces != old_nr_surfaces)
-      brw->state.dirty.brw |= BRW_NEW_NR_SURFACES;
+      brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
+}
+
+
+/**
+ * Constructs the binding table for the VS surface state.
+ */
+static dri_bo *
+brw_vs_get_binding_table(struct brw_context *brw)
+{
+   dri_bo *bind_bo;
+
+   assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF);
+
+   bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
+			      NULL, 0,
+			      brw->vs.surf_bo, brw->vs.nr_surfaces,
+			      NULL);
+
+   if (bind_bo == NULL) {
+      GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint);
+      uint32_t *data = malloc(data_size);
+      int i;
+
+      for (i = 0; i < brw->vs.nr_surfaces; i++)
+         if (brw->vs.surf_bo[i])
+            data[i] = brw->vs.surf_bo[i]->offset;
+         else
+            data[i] = 0;
+
+      bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
+				  NULL, 0,
+				  brw->vs.surf_bo, brw->vs.nr_surfaces,
+				  data, data_size,
+				  NULL, NULL);
+
+      /* Emit binding table relocations to surface state */
+      for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+	 if (brw->vs.surf_bo[i] != NULL) {
+	    dri_bo_emit_reloc(bind_bo,
+			      I915_GEM_DOMAIN_INSTRUCTION, 0,
+			      0,
+			      i * sizeof(GLuint),
+			      brw->vs.surf_bo[i]);
+	 }
+      }
+
+      free(data);
+   }
+
+   return bind_bo;
+}
+
+
+/**
+ * Vertex shader surfaces.  Just constant buffer for now.  Could add vertex 
+ * shader textures in the future.
+ */
+static void prepare_vs_surfaces(struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   /* Update surface / buffer for vertex shader constant buffer */
+   {
+      const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER;
+      struct brw_vertex_program *vp =
+         (struct brw_vertex_program *) brw->vertex_program;
+      vp->const_buffer =
+         brw_update_vs_constant_surface(ctx, surf, vp->const_buffer,
+                                        vp->program.Base.Parameters);
+
+      brw->vs.nr_surfaces = 1;
+   }
+
+   dri_bo_unreference(brw->vs.bind_bo);
+   brw->vs.bind_bo = brw_vs_get_binding_table(brw);
+
+   if (1)
+      brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+}
+
+
+static void
+prepare_surfaces(struct brw_context *brw)
+{
+   prepare_wm_surfaces(brw);
+   prepare_vs_surfaces(brw);
 }
 
 
 const struct brw_tracked_state brw_wm_surfaces = {
    .dirty = {
-      .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS,
+      .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM,
       .brw = BRW_NEW_CONTEXT,
       .cache = 0
    },
-   .prepare = prepare_wm_surfaces,
+   .prepare = prepare_surfaces,
 };
 
 




More information about the mesa-commit mailing list