[Mesa-dev] [PATCH] i965: enable ARB_instanced_arrays extension

Jordan Justen jordan.l.justen at intel.com
Sun May 27 21:08:07 PDT 2012


Set the step_rate value when drawing to implement
ARB_instanced_arrays for gen >= 4.

Signed-off-by: Jordan Justen <jordan.l.justen at intel.com>
---
 src/mesa/drivers/dri/i965/brw_context.h       |    4 +++
 src/mesa/drivers/dri/i965/brw_draw.c          |    1 +
 src/mesa/drivers/dri/i965/brw_draw_upload.c   |   44 +++++++++++++++++++++----
 src/mesa/drivers/dri/intel/intel_extensions.c |    1 +
 4 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 6e0e1ad..144f0f6 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -676,6 +676,7 @@ struct brw_vertex_buffer {
    uint32_t offset;
    /** Byte stride between elements in the uploaded array */
    GLuint stride;
+   GLuint step_rate;
 };
 struct brw_vertex_element {
    const struct gl_client_array *glarray;
@@ -738,6 +739,7 @@ struct brw_context
 	      uint32_t handle;
 	      uint32_t offset;
 	      uint32_t stride;
+	      uint32_t step_rate;
       } current_buffers[VERT_ATTRIB_MAX];
 
       struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
@@ -1046,6 +1048,8 @@ struct brw_context
       bool in_progress;
       bool enable_cut_index;
    } prim_restart;
+
+   uint32_t num_instances;
 };
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 16ce994..1069a63 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -466,6 +466,7 @@ static bool brw_try_draw_prims( struct gl_context *ctx,
       intel_batchbuffer_require_space(intel, estimated_max_prim_size, false);
       intel_batchbuffer_save_state(intel);
 
+      brw->num_instances = prim->num_instances;
       if (intel->gen < 6)
 	 brw_set_prim(brw, &prim[i]);
       else
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index cf77837..66c3b61 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -361,6 +361,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
    unsigned int min_index = brw->vb.min_index;
    unsigned int max_index = brw->vb.max_index;
    int delta, i, j;
+   GLboolean can_merge_uploads = GL_TRUE;
 
    struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
    GLuint nr_uploads = 0;
@@ -403,6 +404,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
 	    const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
 	    if (glarray->BufferObj == other->BufferObj &&
 		glarray->StrideB == other->StrideB &&
+		glarray->InstanceDivisor == other->InstanceDivisor &&
 		(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
 	    {
 	       input->buffer = brw->vb.enabled[k]->buffer;
@@ -420,6 +422,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
 	    drm_intel_bo_reference(buffer->bo);
 	    buffer->offset += (uintptr_t)glarray->Ptr;
 	    buffer->stride = glarray->StrideB;
+	    buffer->step_rate = glarray->InstanceDivisor;
 
 	    input->buffer = j++;
 	    input->offset = 0;
@@ -465,8 +468,14 @@ static void brw_prepare_vertices(struct brw_context *brw)
 	 }
 
 	 upload[nr_uploads++] = input;
+
 	 total_size = ALIGN(total_size, type_size);
 	 total_size += input->element_size;
+	 if (can_merge_uploads) {
+	    if ((total_size >= 2048) || (glarray->InstanceDivisor != 0)) {
+	       can_merge_uploads = GL_FALSE;
+	    }
+	 }
       }
    }
 
@@ -504,7 +513,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
 
 	 nr_uploads = 0;
       }
-      else if (total_size < 2048) {
+      else if (can_merge_uploads) {
 	 /* Upload non-interleaved arrays into a single interleaved array */
 	 struct brw_vertex_buffer *buffer;
 	 int count = MAX2(max_index - min_index + 1, 1);
@@ -539,6 +548,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
 	 intel_upload_unmap(&brw->intel, map, offset * count, offset,
 			    &buffer->bo, &buffer->offset);
 	 buffer->stride = offset;
+	 buffer->step_rate = 0;
 	 buffer->offset -= delta * offset;
 
 	 nr_uploads = 0;
@@ -547,9 +557,21 @@ static void brw_prepare_vertices(struct brw_context *brw)
    /* Upload non-interleaved arrays */
    for (i = 0; i < nr_uploads; i++) {
       struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
-      copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
-			      buffer, upload[i]->element_size);
+      if (upload[i]->glarray->InstanceDivisor == 0) {
+         copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
+                                 buffer, upload[i]->element_size);
+      } else {
+         /* This is an instanced attribute, since its InstanceDivisor
+          * is not zero. Therefore, its data will be stepped after the
+          * instanced draw has been run InstanceDivisor times.
+          */
+         uint32_t instanced_attr_max_index =
+            (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
+         copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
+                                 buffer, upload[i]->element_size);
+      }
       buffer->offset -= delta * buffer->stride;
+      buffer->step_rate = upload[i]->glarray->InstanceDivisor;
       upload[i]->buffer = j++;
       upload[i]->offset = 0;
    }
@@ -561,7 +583,8 @@ static void brw_prepare_vertices(struct brw_context *brw)
 	 int d;
 
 	 if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle ||
-	     brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride)
+	     brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride ||
+	     brw->vb.current_buffers[i].step_rate != brw->vb.buffers[i].step_rate)
 	    break;
 
 	 d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset;
@@ -643,9 +666,15 @@ static void brw_emit_vertices(struct brw_context *brw)
 	 uint32_t dw0;
 
 	 if (intel->gen >= 6) {
-	    dw0 = GEN6_VB0_ACCESS_VERTEXDATA | (i << GEN6_VB0_INDEX_SHIFT);
+	    dw0 = buffer->step_rate
+	             ? GEN6_VB0_ACCESS_INSTANCEDATA
+	             : GEN6_VB0_ACCESS_VERTEXDATA;
+	    dw0 |= i << GEN6_VB0_INDEX_SHIFT;
 	 } else {
-	    dw0 = BRW_VB0_ACCESS_VERTEXDATA | (i << BRW_VB0_INDEX_SHIFT);
+	    dw0 = buffer->step_rate
+	             ? BRW_VB0_ACCESS_INSTANCEDATA
+	             : BRW_VB0_ACCESS_VERTEXDATA;
+	    dw0 |= i << BRW_VB0_INDEX_SHIFT;
 	 }
 
 	 if (intel->gen >= 7)
@@ -657,11 +686,12 @@ static void brw_emit_vertices(struct brw_context *brw)
 	    OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
 	 } else
 	    OUT_BATCH(0);
-	 OUT_BATCH(0); /* Instance data step rate */
+	 OUT_BATCH(buffer->step_rate);
 
 	 brw->vb.current_buffers[i].handle = buffer->bo->handle;
 	 brw->vb.current_buffers[i].offset = buffer->offset;
 	 brw->vb.current_buffers[i].stride = buffer->stride;
+	 brw->vb.current_buffers[i].step_rate = buffer->step_rate;
       }
       brw->vb.nr_current_buffers = i;
       ADVANCE_BATCH();
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index 6c29aff..02b8e57 100755
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -115,6 +115,7 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.ARB_depth_buffer_float = true;
       ctx->Extensions.ARB_depth_clamp = true;
       ctx->Extensions.ARB_draw_instanced = true;
+      ctx->Extensions.ARB_instanced_arrays = true;
       ctx->Extensions.ARB_fragment_coord_conventions = true;
       ctx->Extensions.ARB_fragment_program_shadow = true;
       ctx->Extensions.ARB_fragment_shader = true;
-- 
1.7.9.5



More information about the mesa-dev mailing list