[Mesa-dev] [PATCH 4/4] i965/cs: Implement DispatchComputeIndirect support

Kristian Høgsberg krh at bitplanet.net
Tue Sep 22 17:01:03 PDT 2015


On Sat, Sep 19, 2015 at 03:50:49PM -0700, Jordan Justen wrote:
> Signed-off-by: Jordan Justen <jordan.l.justen at intel.com>
> ---
>  src/mesa/drivers/dri/i965/brw_compute.c | 57 ++++++++++++++++++++++++++++++---
>  src/mesa/drivers/dri/i965/brw_defines.h |  2 ++
>  src/mesa/drivers/dri/i965/intel_reg.h   |  5 +++
>  3 files changed, 60 insertions(+), 4 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
> index 5693ab5..5641823 100644
> --- a/src/mesa/drivers/dri/i965/brw_compute.c
> +++ b/src/mesa/drivers/dri/i965/brw_compute.c
> @@ -31,14 +31,46 @@
>  #include "brw_draw.h"
>  #include "brw_state.h"
>  #include "intel_batchbuffer.h"
> +#include "intel_buffer_objects.h"
>  #include "brw_defines.h"
>  
>  
>  static void
> -brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
> +brw_emit_gpgpu_walker(struct brw_context *brw,
> +                      const void *compute_param,
> +                      bool indirect)
>  {
>     const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
>  
> +   const GLuint *num_groups;
> +   uint32_t indirect_flag;
> +
> +   if (!indirect) {
> +      num_groups = (const GLuint *)compute_param;
> +      indirect_flag = 0;
> +   } else {
> +      GLintptr indirect_offset = *(GLintptr*)compute_param;

I would call this as

   brw_dispatch_compute_common(ctx, indirect, true);

from brw_dispatch_compute_indirect() instead of passing the address of
indirect and then just say

      GLintptr indirect_offset = (GLintptr)compute_param;

here.  GLintptr is sized so that that's guaranteed to work.

With that, series

Reviewed-by: Kristian Høgsberg <krh at bitplanet.net>

> +      static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
> +      num_groups = indirect_group_counts;
> +
> +      struct gl_buffer_object *indirect_buffer = brw->ctx.DispatchIndirectBuffer;
> +      drm_intel_bo *bo = intel_bufferobj_buffer(brw,
> +            intel_buffer_object(indirect_buffer),
> +            indirect_offset, 3 * sizeof(GLuint));
> +
> +      indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE;
> +
> +      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
> +                            I915_GEM_DOMAIN_VERTEX, 0,
> +                            indirect_offset + 0);
> +      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
> +                            I915_GEM_DOMAIN_VERTEX, 0,
> +                            indirect_offset + 4);
> +      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
> +                            I915_GEM_DOMAIN_VERTEX, 0,
> +                            indirect_offset + 8);
> +   }
> +
>     const unsigned simd_size = prog_data->simd_size;
>     unsigned group_size = prog_data->local_size[0] *
>        prog_data->local_size[1] * prog_data->local_size[2];
> @@ -52,7 +84,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
>  
>     uint32_t dwords = brw->gen < 8 ? 11 : 15;
>     BEGIN_BATCH(dwords);
> -   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2));
> +   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
>     OUT_BATCH(0);
>     if (brw->gen >= 8) {
>        OUT_BATCH(0);                     /* Indirect Data Length */
> @@ -83,7 +115,9 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
>  
>  
>  static void
> -brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
> +brw_dispatch_compute_common(struct gl_context *ctx,
> +                            const void *compute_param,
> +                            bool indirect)
>  {
>     struct brw_context *brw = brw_context(ctx);
>     int estimated_buffer_space_needed;
> @@ -117,7 +151,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
>     brw->no_batch_wrap = true;
>     brw_upload_compute_state(brw);
>  
> -   brw_emit_gpgpu_walker(brw, num_groups);
> +   brw_emit_gpgpu_walker(brw, compute_param, indirect);
>  
>     brw->no_batch_wrap = false;
>  
> @@ -155,9 +189,24 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
>      */
>  }
>  
> +static void
> +brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
> +   brw_dispatch_compute_common(ctx,
> +                               num_groups,
> +                               false);
> +}
> +
> +static void
> +brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
> +{
> +   brw_dispatch_compute_common(ctx,
> +                               &indirect,
> +                               true);
> +}
>  
>  void
>  brw_init_compute_functions(struct dd_function_table *functions)
>  {
>     functions->DispatchCompute = brw_dispatch_compute;
> +   functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 8fc8ceb..2de51d0 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -2698,6 +2698,8 @@ enum brw_wm_barycentric_interp_mode {
>  # define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK     INTEL_MASK(9, 0)
>  #define MEDIA_STATE_FLUSH                       0x7004
>  #define GPGPU_WALKER                            0x7105
> +/* GEN7 DW0 */
> +# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE   (1 << 10)
>  /* GEN8+ DW2 */
>  # define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT     0
>  # define GPGPU_WALKER_INDIRECT_LENGTH_MASK      INTEL_MASK(15, 0)
> diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
> index 58007d3..a261c2b 100644
> --- a/src/mesa/drivers/dri/i965/intel_reg.h
> +++ b/src/mesa/drivers/dri/i965/intel_reg.h
> @@ -173,6 +173,11 @@
>  #define GEN7_3DPRIM_START_INSTANCE      0x243C
>  #define GEN7_3DPRIM_BASE_VERTEX         0x2440
>  
> +/* Auto-Compute / Indirect Registers */
> +#define GEN7_GPGPU_DISPATCHDIMX         0x2500
> +#define GEN7_GPGPU_DISPATCHDIMY         0x2504
> +#define GEN7_GPGPU_DISPATCHDIMZ         0x2508
> +
>  #define GEN7_CACHE_MODE_1               0x7004
>  # define GEN8_HIZ_NP_PMA_FIX_ENABLE        (1 << 11)
>  # define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)
> -- 
> 2.5.1
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list