[Mesa-dev] [PATCH 4/4] i965/cs: Implement DispatchComputeIndirect support

Jordan Justen jordan.l.justen at intel.com
Sat Sep 19 15:50:49 PDT 2015


Signed-off-by: Jordan Justen <jordan.l.justen at intel.com>
---
 src/mesa/drivers/dri/i965/brw_compute.c | 57 ++++++++++++++++++++++++++++++---
 src/mesa/drivers/dri/i965/brw_defines.h |  2 ++
 src/mesa/drivers/dri/i965/intel_reg.h   |  5 +++
 3 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index 5693ab5..5641823 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -31,14 +31,46 @@
 #include "brw_draw.h"
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
 #include "brw_defines.h"
 
 
 static void
-brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
+brw_emit_gpgpu_walker(struct brw_context *brw,
+                      const void *compute_param,
+                      bool indirect)
 {
    const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
 
+   const GLuint *num_groups;
+   uint32_t indirect_flag;
+
+   if (!indirect) {
+      num_groups = (const GLuint *)compute_param;
+      indirect_flag = 0;
+   } else {
+      GLintptr indirect_offset = *(GLintptr*)compute_param;
+      static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
+      num_groups = indirect_group_counts;
+
+      struct gl_buffer_object *indirect_buffer = brw->ctx.DispatchIndirectBuffer;
+      drm_intel_bo *bo = intel_bufferobj_buffer(brw,
+            intel_buffer_object(indirect_buffer),
+            indirect_offset, 3 * sizeof(GLuint));
+
+      indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE;
+
+      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
+                            I915_GEM_DOMAIN_VERTEX, 0,
+                            indirect_offset + 0);
+      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
+                            I915_GEM_DOMAIN_VERTEX, 0,
+                            indirect_offset + 4);
+      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
+                            I915_GEM_DOMAIN_VERTEX, 0,
+                            indirect_offset + 8);
+   }
+
    const unsigned simd_size = prog_data->simd_size;
    unsigned group_size = prog_data->local_size[0] *
       prog_data->local_size[1] * prog_data->local_size[2];
@@ -52,7 +84,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
 
    uint32_t dwords = brw->gen < 8 ? 11 : 15;
    BEGIN_BATCH(dwords);
-   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2));
+   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
    OUT_BATCH(0);
    if (brw->gen >= 8) {
       OUT_BATCH(0);                     /* Indirect Data Length */
@@ -83,7 +115,9 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
 
 
 static void
-brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
+brw_dispatch_compute_common(struct gl_context *ctx,
+                            const void *compute_param,
+                            bool indirect)
 {
    struct brw_context *brw = brw_context(ctx);
    int estimated_buffer_space_needed;
@@ -117,7 +151,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
    brw->no_batch_wrap = true;
    brw_upload_compute_state(brw);
 
-   brw_emit_gpgpu_walker(brw, num_groups);
+   brw_emit_gpgpu_walker(brw, compute_param, indirect);
 
    brw->no_batch_wrap = false;
 
@@ -155,9 +189,24 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
     */
 }
 
+static void
+brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
+   brw_dispatch_compute_common(ctx,
+                               num_groups,
+                               false);
+}
+
+static void
+brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
+{
+   brw_dispatch_compute_common(ctx,
+                               &indirect,
+                               true);
+}
 
 void
 brw_init_compute_functions(struct dd_function_table *functions)
 {
    functions->DispatchCompute = brw_dispatch_compute;
+   functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 8fc8ceb..2de51d0 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2698,6 +2698,8 @@ enum brw_wm_barycentric_interp_mode {
 # define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK     INTEL_MASK(9, 0)
 #define MEDIA_STATE_FLUSH                       0x7004
 #define GPGPU_WALKER                            0x7105
+/* GEN7 DW0 */
+# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE   (1 << 10)
 /* GEN8+ DW2 */
 # define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT     0
 # define GPGPU_WALKER_INDIRECT_LENGTH_MASK      INTEL_MASK(15, 0)
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index 58007d3..a261c2b 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -173,6 +173,11 @@
 #define GEN7_3DPRIM_START_INSTANCE      0x243C
 #define GEN7_3DPRIM_BASE_VERTEX         0x2440
 
+/* Auto-Compute / Indirect Registers */
+#define GEN7_GPGPU_DISPATCHDIMX         0x2500
+#define GEN7_GPGPU_DISPATCHDIMY         0x2504
+#define GEN7_GPGPU_DISPATCHDIMZ         0x2508
+
 #define GEN7_CACHE_MODE_1               0x7004
 # define GEN8_HIZ_NP_PMA_FIX_ENABLE        (1 << 11)
 # define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)
-- 
2.5.1



More information about the mesa-dev mailing list