Mesa (i965g-restart): i965g: first pass at vs immediates in curbe

Keith Whitwell keithw at kemper.freedesktop.org
Fri Nov 20 13:44:45 UTC 2009


Module: Mesa
Branch: i965g-restart
Commit: 63b0af07755201e5ad630bf7f67a7997263734d6
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=63b0af07755201e5ad630bf7f67a7997263734d6

Author: Keith Whitwell <keithw at vmware.com>
Date:   Thu Nov 19 19:51:04 2009 -0800

i965g: first pass at vs immediates in curbe

---

 src/gallium/drivers/i965/brw_context.h     |    6 ++
 src/gallium/drivers/i965/brw_curbe.c       |   40 ++++++---
 src/gallium/drivers/i965/brw_pipe_shader.c |   43 ++++++++++
 src/gallium/drivers/i965/brw_vs_emit.c     |  120 +++++++++++----------------
 4 files changed, 124 insertions(+), 85 deletions(-)

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 64279c4..096c8cf 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -149,12 +149,17 @@ struct brw_blend_state {
 
 struct brw_rasterizer_state;
 
+struct brw_immediate_data {
+   unsigned nr;
+   float (*data)[4];
+};
 
 struct brw_vertex_shader {
    const struct tgsi_token *tokens;
    struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
 
    struct tgsi_shader_info info;
+   struct brw_immediate_data immediates;
 
    GLuint has_flow_control:1;
    GLuint use_const_buffer:1;
@@ -189,6 +194,7 @@ struct brw_fragment_shader {
    struct tgsi_shader_info info;
 
    struct brw_fs_signature signature;
+   struct brw_immediate_data immediates;
 
    unsigned iz_lookup;
    //unsigned wm_lookup;
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 5fa1723..3e821d5 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -226,21 +226,34 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
    /* vertex shader constants */
    if (brw->curbe.vs_size) {
       GLuint offset = brw->curbe.vs_start * 16;
-      GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      struct brw_vertex_shader *vs = brw->curr.vertex_shader;
+      GLuint nr_immediate, nr_const;
 
-      /* XXX: note that constant buffers are currently *already* in
-       * buffer objects.  If we want to keep on putting them into the
-       * curbe, makes sense to treat constbuf's specially with malloc.
-       */
-      const GLfloat *value = screen->buffer_map( screen,
-						 brw->curr.vertex_constants,
-						 PIPE_BUFFER_USAGE_CPU_READ);
+      nr_immediate = vs->immediates.nr;
+      if (nr_immediate) {
+         memcpy(&buf[offset], 
+                vs->immediates.data,
+                nr_immediate * 4 * sizeof(float));
 
-      /* XXX: what if user's constant buffer is too small?
-       */
-      memcpy(&buf[offset], value, nr * 4 * sizeof(float));
+         offset += nr_immediate * 4;
+      }
 
-      screen->buffer_unmap( screen, brw->curr.vertex_constants );
+      nr_const = vs->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      if (nr_const) {
+         /* XXX: note that constant buffers are currently *already* in
+          * buffer objects.  If we want to keep on putting them into the
+          * curbe, makes sense to treat constbuf's specially with malloc.
+          */
+         const GLfloat *value = screen->buffer_map( screen,
+                                                    brw->curr.vertex_constants,
+                                                    PIPE_BUFFER_USAGE_CPU_READ);
+         
+         /* XXX: what if user's constant buffer is too small?
+          */
+         memcpy(&buf[offset], value, nr_const * 4 * sizeof(float));
+         
+         screen->buffer_unmap( screen, brw->curr.vertex_constants );
+      }
    }
 
    if (BRW_DEBUG & DEBUG_CURBE) {
@@ -263,8 +276,7 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
    } 
    else {
       /* constants have changed */
-      if (brw->curbe.last_buf)
-	 FREE(brw->curbe.last_buf);
+      FREE(brw->curbe.last_buf);
 
       brw->curbe.last_buf = buf;
       brw->curbe.last_bufsz = bufsz;
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 3222ee7..31a715a 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -55,6 +55,47 @@ static GLboolean has_flow_control(const struct tgsi_shader_info *info)
 }
 
 
+static void scan_immediates(const struct tgsi_token *tokens,
+                            const struct tgsi_shader_info *info,
+                            struct brw_immediate_data *imm)
+{
+   struct tgsi_parse_context parse;
+   boolean done = FALSE;
+
+   imm->nr = 0;
+   imm->data = MALLOC(info->immediate_count * 4 * sizeof(float));
+
+   tgsi_parse_init( &parse, tokens );
+   while (!tgsi_parse_end_of_tokens( &parse ) && !done) {
+      tgsi_parse_token( &parse );
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE: {
+	 static const float id[4] = {0,0,0,1};
+	 const float *value = &parse.FullToken.FullImmediate.u[0].Float;
+	 unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+         unsigned i;
+
+	 for (i = 0; i < size; i++)
+	    imm->data[imm->nr][i] = value[i];
+
+	 for (; i < 4; i++)
+	    imm->data[imm->nr][i] = id[i];
+         
+         imm->nr++;
+	 break;
+      }
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+	 done = 1;
+	 break;
+      }
+   }
+}
+
 
 static void brw_bind_fs_state( struct pipe_context *pipe, void *prog )
 {
@@ -106,6 +147,7 @@ static void *brw_create_fs_state( struct pipe_context *pipe,
       goto fail;
 
    tgsi_scan_shader(fs->tokens, &fs->info);
+   scan_immediates(fs->tokens, &fs->info, &fs->immediates);
 
    fs->signature.nr_inputs = fs->info.num_inputs;
    for (i = 0; i < fs->info.num_inputs; i++) {
@@ -150,6 +192,7 @@ static void *brw_create_vs_state( struct pipe_context *pipe,
       goto fail;
 
    tgsi_scan_shader(vs->tokens, &vs->info);
+   scan_immediates(vs->tokens, &vs->info, &vs->immediates);
 
    vs->id = brw->program_id++;
    vs->has_flow_control = has_flow_control(&vs->info);
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 52d4731..00f0af2 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -42,6 +42,15 @@
 #include "brw_vs.h"
 #include "brw_debug.h"
 
+/* Choose one of the 4 vec4's which can be packed into each 16-wide reg.
+ */
+static INLINE struct brw_reg brw_vec4_grf_repeat( GLuint reg, GLuint slot )
+{
+   int nr = reg + slot/2;
+   int subnr = (slot%2) * 4;
+
+   return stride(brw_vec4_grf(nr, subnr), 0, 4, 1);
+}
 
 
 static struct brw_reg get_tmp( struct brw_vs_compile *c )
@@ -119,7 +128,7 @@ static boolean find_output_slot( struct brw_vs_compile *c,
  */
 static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 {
-   GLuint i, reg = 0, mrf;
+   GLuint i, reg = 0, subreg = 0, mrf;
    int attributes_in_vue;
 
    /* Determine whether to use a real constant buffer or use a block
@@ -150,33 +159,57 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    /* User clip planes from curbe: 
     */
    if (c->key.nr_userclip) {
-      for (i = 0; i < c->key.nr_userclip; i++) {
-	 c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
+      /* Skip over fixed planes:  Or never read them into vs unit?
+       */
+      subreg += 6;
+
+      for (i = 0; i < c->key.nr_userclip; i++, subreg++) {
+	 c->userplane[i] = 
+            stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
       }     
 
       /* Deal with curbe alignment:
        */
-      reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;
+      subreg = align(subreg, 2);
+      /*reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;*/
    }
 
-   /* Vertex program parameters from curbe:
+
+   /* Immediates: always in the curbe.
+    *
+    * XXX: Can try to encode some immediates as brw immediates
+    * XXX: Make sure ureg sets minimal immediate size and respect it
+    * here.
     */
-   if (c->vp->use_const_buffer) {
-      /* get constants from a real constant buffer */
-      c->prog_data.curb_read_length = 0;
-      c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+   for (i = 0; i < c->vp->info.immediate_count; i++, subreg++) {
+      c->regs[TGSI_FILE_IMMEDIATE][i] = 
+         stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
    }
-   else {
-      /* use a section of the GRF for constants */
+   c->prog_data.nr_params = c->vp->info.immediate_count * 4;
+
+
+   /* Vertex constant buffer.
+    *
+    * Constants from the buffer can be either cached in the curbe or
+    * loaded as needed from the actual constant buffer.
+    */
+   if (!c->vp->use_const_buffer) {
       GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      for (i = 0; i < nr_params; i++) {
-         c->regs[TGSI_FILE_CONSTANT][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+
+      for (i = 0; i < nr_params; i++, subreg++) {
+         c->regs[TGSI_FILE_CONSTANT][i] =
+            stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
       }
-      reg += (nr_params + 1) / 2;
-      c->prog_data.curb_read_length = reg - 1;
-      c->prog_data.nr_params = nr_params * 4;
+
+      c->prog_data.nr_params += nr_params * 4;
    }
 
+   /* All regs allocated
+    */
+   reg += (subreg + 1) / 2;
+   c->prog_data.curb_read_length = reg - 1;
+
+
    /* Allocate input regs:  
     */
    c->nr_inputs = c->vp->info.num_inputs;
@@ -191,28 +224,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    if (c->nr_inputs == 0)
       reg++;
 
-   /* Allocate a GRF and load immediate values by hand with 4 MOVs!!!
-    *
-    * XXX: Try to encode float immediates as brw immediates
-    * XXX: Put immediates into the CURBE.
-    * XXX: Make sure ureg sets minimal immediate size and respect it
-    * here.
-    */
-   for (i = 0; i < c->nr_immediates; i++) {
-      struct brw_reg r;
-      int j;
-
-      c->regs[TGSI_FILE_IMMEDIATE][i] = 
-         r = brw_vec8_grf(reg, 0);
-
-      for (j = 0; j < 4; j++) {
-	 brw_MOV(&c->func, 
-		 brw_writemask(r, (1<<j)), 
-		 brw_imm_f(c->immediate[i][j]));
-      }
-
-      reg++;
-   }
 
 
    /* Allocate outputs.  The non-position outputs go straight into message regs.
@@ -1605,8 +1616,6 @@ void brw_vs_emit(struct brw_vs_compile *c)
    struct brw_instruction *end_inst, *last_inst;
    struct tgsi_parse_context parse;
    struct tgsi_full_instruction *inst;
-   boolean done = FALSE;
-   int i;
 
    if (BRW_DEBUG & DEBUG_VS)
       tgsi_dump(c->vp->tokens, 0); 
@@ -1616,37 +1625,6 @@ void brw_vs_emit(struct brw_vs_compile *c)
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_access_mode(p, BRW_ALIGN_16);
    
-   /* Inputs */
-   tgsi_parse_init( &parse, tokens );
-   while( !tgsi_parse_end_of_tokens( &parse ) ) {
-      tgsi_parse_token( &parse );
-
-      switch( parse.FullToken.Token.Type ) {
-      case TGSI_TOKEN_TYPE_DECLARATION:
-	 /* Nothing to do -- using info from tgsi_scan().
-	  */
-         break;
-
-      case TGSI_TOKEN_TYPE_IMMEDIATE: {
-	 static const float id[4] = {0,0,0,1};
-	 const float *imm = &parse.FullToken.FullImmediate.u[0].Float;
-	 unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
-
-	 for (i = 0; i < size; i++)
-	    c->immediate[c->nr_immediates][i] = imm[i];
-
-	 for ( ; i < 4; i++)
-	    c->immediate[c->nr_immediates][i] = id[i];
-
-	 c->nr_immediates++;
-	 break;
-      }
-
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-	 done = 1;
-	 break;
-      }
-   }
 
    /* Static register allocation
     */




More information about the mesa-commit mailing list