Mesa (instanced-arrays): Add lame support for instanceID to draw module.

Michał Król michal at kemper.freedesktop.org
Wed Dec 30 13:19:54 PST 2009


Module: Mesa
Branch: instanced-arrays
Commit: 543b9566bdaa48fea2df1866fa1310c1cdbcde27
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=543b9566bdaa48fea2df1866fa1310c1cdbcde27

Author: Michal Krol <michal at vmware.com>
Date:   Wed Dec 30 22:18:53 2009 +0100

Add lame support for instanceID to draw module.

It's all screaming for integer support -- fake it with float for now.

---

 src/gallium/auxiliary/draw/draw_pipe_vbuf.c        |    3 +-
 src/gallium/auxiliary/draw/draw_pt.h               |    3 +-
 src/gallium/auxiliary/draw/draw_pt_emit.c          |    3 +-
 src/gallium/auxiliary/draw/draw_pt_fetch.c         |   32 +++++++---
 src/gallium/auxiliary/draw/draw_pt_fetch_emit.c    |    1 +
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |   14 ++++-
 src/gallium/auxiliary/draw/draw_vs_varient.c       |    3 +
 src/gallium/auxiliary/translate/translate.h        |    6 ++
 .../auxiliary/translate/translate_generic.c        |   26 +++++---
 src/gallium/auxiliary/translate/translate_sse.c    |   70 +++++++++++++------
 src/gallium/drivers/svga/svga_state_vs.c           |    1 +
 11 files changed, 118 insertions(+), 44 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index bb8a8ff..d40c035 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -271,7 +271,8 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
 	 emit_sz = 0;
 	 break;
       }
-      
+
+      hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
       hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
       hw_key.element[i].input_buffer = src_buffer;
       hw_key.element[i].input_offset = src_offset;
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 20edf7a..d5e0d92 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -183,7 +183,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
 struct pt_fetch;
 void draw_pt_fetch_prepare( struct pt_fetch *fetch,
                             unsigned vertex_input_count,
-			    unsigned vertex_size );
+                            unsigned vertex_size,
+                            unsigned instance_id_index );
 
 void draw_pt_fetch_run( struct pt_fetch *fetch,
 			const unsigned *elts,
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index d0abeb9..4fb5327 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -121,7 +121,8 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
 	 emit_sz = 0;
 	 break;
       }
-      
+
+      hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
       hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
       hw_key.element[i].input_buffer = src_buffer;
       hw_key.element[i].input_offset = src_offset;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index f88a839..36c27e2 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -58,12 +58,14 @@ struct pt_fetch {
  */
 void draw_pt_fetch_prepare( struct pt_fetch *fetch,
                             unsigned vs_input_count,
-			    unsigned vertex_size )
+                            unsigned vertex_size,
+                            unsigned instance_id_index )
 {
    struct draw_context *draw = fetch->draw;
    unsigned nr_inputs;
-   unsigned i, nr = 0;
+   unsigned i, nr = 0, ei = 0;
    unsigned dst_offset = 0;
+   unsigned num_extra_inputs = 0;
    struct translate_key key;
 
    fetch->vertex_size = vertex_size;
@@ -78,6 +80,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
    {
       /* Need to set header->vertex_id = 0xffff somehow.
        */
+      key.element[nr].type = TRANSLATE_ELEMENT_NORMAL;
       key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
       key.element[nr].input_buffer = draw->pt.nr_vertex_buffers;
       key.element[nr].input_offset = 0;
@@ -92,16 +95,27 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
        */
       dst_offset += 4 * sizeof(float);
    }
-      
-   assert( draw->pt.nr_vertex_elements >= vs_input_count );
 
-   nr_inputs = MIN2( vs_input_count, draw->pt.nr_vertex_elements );
+   if (instance_id_index != ~0) {
+      num_extra_inputs++;
+   }
+
+   assert(draw->pt.nr_vertex_elements + num_extra_inputs >= vs_input_count);
+
+   nr_inputs = MIN2(vs_input_count, draw->pt.nr_vertex_elements + num_extra_inputs);
 
    for (i = 0; i < nr_inputs; i++) {
-      key.element[nr].input_format = draw->pt.vertex_element[i].src_format;
-      key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index;
-      key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset;
-      key.element[nr].instance_divisor = draw->pt.vertex_element[i].instance_divisor;
+      if (i == instance_id_index) {
+         key.element[nr].type = TRANSLATE_ELEMENT_INSTANCE_ID;
+         key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;    /* XXX: Make it UINT. */
+      } else {
+         key.element[nr].type = TRANSLATE_ELEMENT_NORMAL;
+         key.element[nr].input_format = draw->pt.vertex_element[ei].src_format;
+         key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index;
+         key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset;
+         key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor;
+         ei++;
+      }
       key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
       key.element[nr].output_offset = dst_offset;
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 771d94b..2a60447 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -166,6 +166,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
 	 continue;
       }
 
+      key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
       key.element[i].input_format = input_format;
       key.element[i].input_buffer = input_buffer;
       key.element[i].input_offset = input_offset;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 9321137..0238f2e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -58,6 +58,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
    struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
    struct draw_context *draw = fpme->draw;
    struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+   unsigned i;
+   boolean instance_id_index = ~0;
 
    /* Add one to num_outputs because the pipeline occasionally tags on
     * an additional texcoord, eg for AA lines.
@@ -65,6 +67,15 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
    unsigned nr = MAX2( vs->info.num_inputs,
 		       vs->info.num_outputs + 1 );
 
+   /* Scan for instanceID system value.
+    */
+   for (i = 0; i < vs->info.num_inputs; i++) {
+      if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
+         instance_id_index = i;
+         break;
+      }
+   }
+
    fpme->prim = prim;
    fpme->opt = opt;
 
@@ -78,7 +89,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
 
    draw_pt_fetch_prepare( fpme->fetch, 
                           vs->info.num_inputs,
-			  fpme->vertex_size );
+                          fpme->vertex_size,
+                          instance_id_index );
    /* XXX: it's not really gl rasterization rules we care about here,
     * but gl vs dx9 clip spaces.
     */
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 8e14bdd..60b7a3e 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -284,6 +284,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
    fetch.nr_elements = key->nr_inputs;
    fetch.output_stride = vsvg->temp_vertex_stride;
    for (i = 0; i < key->nr_inputs; i++) {
+      fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL;
       fetch.element[i].input_format = key->element[i].in.format;
       fetch.element[i].input_buffer = key->element[i].in.buffer;
       fetch.element[i].input_offset = key->element[i].in.offset;
@@ -299,6 +300,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
    for (i = 0; i < key->nr_outputs; i++) {
       if (key->element[i].out.format != EMIT_1F_PSIZE)
       {      
+         emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
          emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
          emit.element[i].input_buffer = 0;
          emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
@@ -308,6 +310,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
          assert(emit.element[i].input_offset <= fetch.output_stride);
       }
       else {
+         emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
          emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
          emit.element[i].input_buffer = 1;
          emit.element[i].input_offset = 0;
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index 9ae7a48..54ed2c1 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -44,8 +44,14 @@
 #include "pipe/p_format.h"
 #include "pipe/p_state.h"
 
+enum translate_element_type {
+   TRANSLATE_ELEMENT_NORMAL,
+   TRANSLATE_ELEMENT_INSTANCE_ID
+};
+
 struct translate_element 
 {
+   enum translate_element_type type;
    enum pipe_format input_format;
    enum pipe_format output_format;
    unsigned input_buffer:8;
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 742f03b..24727d4 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -46,6 +46,8 @@ struct translate_generic {
    struct translate translate;
 
    struct {
+      enum translate_element_type type;
+
       fetch_func fetch;
       unsigned buffer;
       unsigned input_offset;
@@ -632,22 +634,27 @@ static void PIPE_CDECL generic_run( struct translate *translate,
 
       for (attr = 0; attr < nr_attrs; attr++) {
 	 float data[4];
-         const char *src;
 
 	 char *dst = (vert + 
 		      tg->attrib[attr].output_offset);
 
-         if (tg->attrib[attr].instance_divisor) {
-            src = tg->attrib[attr].input_ptr +
-                  tg->attrib[attr].input_stride *
-                  (instance_id / tg->attrib[attr].instance_divisor);
+         if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+            const char *src;
+
+            if (tg->attrib[attr].instance_divisor) {
+               src = tg->attrib[attr].input_ptr +
+                     tg->attrib[attr].input_stride *
+                     (instance_id / tg->attrib[attr].instance_divisor);
+            } else {
+               src = tg->attrib[attr].input_ptr +
+                     tg->attrib[attr].input_stride * elt;
+            }
+
+            tg->attrib[attr].fetch( src, data );
          } else {
-            src = tg->attrib[attr].input_ptr +
-                  tg->attrib[attr].input_stride * elt;
+            data[0] = (float)instance_id;
          }
 
-	 tg->attrib[attr].fetch( src, data );
-
          if (0) debug_printf("vert %d attr %d: %f %f %f %f\n",
                              i, attr, data[0], data[1], data[2], data[3]);
 
@@ -700,6 +707,7 @@ struct translate *translate_generic_create( const struct translate_key *key )
    tg->translate.run = generic_run;
 
    for (i = 0; i < key->nr_elements; i++) {
+      tg->attrib[i].type = key->element[i].type;
 
       tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format);
       tg->attrib[i].buffer = key->element[i].input_buffer;
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index ba4a246..8e152a0 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -50,13 +50,15 @@ typedef void (PIPE_CDECL *run_func)( struct translate *translate,
                                      unsigned start,
                                      unsigned count,
                                      unsigned instance_id,
-                                     void *output_buffer );
+                                     void *output_buffer,
+                                     float instance_id_float );
 
 typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
                                           const unsigned *elts,
                                           unsigned count,
                                           unsigned instance_id,
-                                          void *output_buffer );
+                                          void *output_buffer,
+                                          float instance_id_float );
 
 struct translate_buffer {
    const void *base_ptr;
@@ -70,6 +72,9 @@ struct translate_buffer_varient {
 };
 
 
+#define ELEMENT_BUFFER_INSTANCE_ID  1001
+
+
 struct translate_sse {
    struct translate translate;
 
@@ -97,6 +102,7 @@ struct translate_sse {
 
    boolean use_instancing;
    unsigned instance_id;
+   float instance_id_float;   /* XXX: needed while no integer support in TGSI */
 
    run_func      gen_run;
    run_elts_func gen_run_elts;
@@ -443,6 +449,10 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
                                       unsigned var_idx,
                                       struct x86_reg elt )
 {
+   if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
+      return x86_make_disp(p->machine_EDX,
+                           get_offset(p, &p->instance_id_float));
+   }
    if (linear && p->nr_buffer_varients == 1) {
       return p->idx_EBX;
    }
@@ -577,6 +587,14 @@ static boolean build_vertex_emit( struct translate_sse *p,
       x86_mov(p->func,
               x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
               p->tmp_EAX);
+
+      /* XXX: temporary */
+      x86_mov(p->func,
+              p->tmp_EAX,
+              x86_fn_arg(p->func, 6));
+      x86_mov(p->func,
+              x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id_float)),
+              p->tmp_EAX);
    }
 
    /* Get vertex count, compare to zero
@@ -697,7 +715,8 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
 		    elts,
 		    count,
                     instance_id,
-		    output_buffer );
+                    output_buffer,
+                    (float)instance_id );
 }
 
 static void PIPE_CDECL translate_sse_run( struct translate *translate,
@@ -712,7 +731,8 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate,
 	       start,
 	       count,
                instance_id,
-	       output_buffer );
+               output_buffer,
+               (float)instance_id);
 }
 
 
@@ -735,29 +755,35 @@ struct translate *translate_sse2_create( const struct translate_key *key )
    p->translate.run = translate_sse_run;
 
    for (i = 0; i < key->nr_elements; i++) {
-      unsigned j;
+      if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
+         unsigned j;
 
-      p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 );
+         p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1);
 
-      if (key->element[i].instance_divisor) {
-         p->use_instancing = TRUE;
-      }
+         if (key->element[i].instance_divisor) {
+            p->use_instancing = TRUE;
+         }
 
-      /*
-       * Map vertex element to vertex buffer varient.
-       */
-      for (j = 0; j < p->nr_buffer_varients; j++) {
-         if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer &&
-             p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) {
-            break;
+         /*
+          * Map vertex element to vertex buffer varient.
+          */
+         for (j = 0; j < p->nr_buffer_varients; j++) {
+            if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer &&
+                p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) {
+               break;
+            }
          }
+         if (j == p->nr_buffer_varients) {
+            p->buffer_varient[j].buffer_index = key->element[i].input_buffer;
+            p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor;
+            p->nr_buffer_varients++;
+         }
+         p->element_to_buffer_varient[i] = j;
+      } else {
+         assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID);
+
+         p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID;
       }
-      if (j == p->nr_buffer_varients) {
-         p->buffer_varient[j].buffer_index = key->element[i].input_buffer;
-         p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor;
-         p->nr_buffer_varients++;
-      }
-      p->element_to_buffer_varient[i] = j;
    }
 
    if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 114de1a..82e7874 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -194,6 +194,7 @@ static int update_zero_stride( struct svga_context *svga,
 
          key.output_stride = 4 * sizeof(float);
          key.nr_elements = 1;
+         key.element[0].type = TRANSLATE_ELEMENT_NORMAL;
          key.element[0].input_format = vel->src_format;
          key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
          key.element[0].input_buffer = vel->vertex_buffer_index;



More information about the mesa-commit mailing list