[Mesa-dev] [PATCH 4/4] i965/cs: Implement DispatchComputeIndirect support
Kristian Høgsberg
krh at bitplanet.net
Tue Sep 22 17:01:03 PDT 2015
On Sat, Sep 19, 2015 at 03:50:49PM -0700, Jordan Justen wrote:
> Signed-off-by: Jordan Justen <jordan.l.justen at intel.com>
> ---
> src/mesa/drivers/dri/i965/brw_compute.c | 57 ++++++++++++++++++++++++++++++---
> src/mesa/drivers/dri/i965/brw_defines.h | 2 ++
> src/mesa/drivers/dri/i965/intel_reg.h | 5 +++
> 3 files changed, 60 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
> index 5693ab5..5641823 100644
> --- a/src/mesa/drivers/dri/i965/brw_compute.c
> +++ b/src/mesa/drivers/dri/i965/brw_compute.c
> @@ -31,14 +31,46 @@
> #include "brw_draw.h"
> #include "brw_state.h"
> #include "intel_batchbuffer.h"
> +#include "intel_buffer_objects.h"
> #include "brw_defines.h"
>
>
> static void
> -brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
> +brw_emit_gpgpu_walker(struct brw_context *brw,
> + const void *compute_param,
> + bool indirect)
> {
> const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
>
> + const GLuint *num_groups;
> + uint32_t indirect_flag;
> +
> + if (!indirect) {
> + num_groups = (const GLuint *)compute_param;
> + indirect_flag = 0;
> + } else {
> + GLintptr indirect_offset = *(GLintptr*)compute_param;
I would call this as
brw_dispatch_compute_common(ctx, indirect, true);
from brw_dispatch_compute_indirect() instead of passing the address of
indirect and then just say
GLintptr indirect_offset = (GLintptr)compute_param;
here. GLintptr is sized so that that's guaranteed to work.
With that, series
Reviewed-by: Kristian Høgsberg <krh at bitplanet.net>
> + static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
> + num_groups = indirect_group_counts;
> +
> + struct gl_buffer_object *indirect_buffer = brw->ctx.DispatchIndirectBuffer;
> + drm_intel_bo *bo = intel_bufferobj_buffer(brw,
> + intel_buffer_object(indirect_buffer),
> + indirect_offset, 3 * sizeof(GLuint));
> +
> + indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE;
> +
> + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
> + I915_GEM_DOMAIN_VERTEX, 0,
> + indirect_offset + 0);
> + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
> + I915_GEM_DOMAIN_VERTEX, 0,
> + indirect_offset + 4);
> + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
> + I915_GEM_DOMAIN_VERTEX, 0,
> + indirect_offset + 8);
> + }
> +
> const unsigned simd_size = prog_data->simd_size;
> unsigned group_size = prog_data->local_size[0] *
> prog_data->local_size[1] * prog_data->local_size[2];
> @@ -52,7 +84,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
>
> uint32_t dwords = brw->gen < 8 ? 11 : 15;
> BEGIN_BATCH(dwords);
> - OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2));
> + OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
> OUT_BATCH(0);
> if (brw->gen >= 8) {
> OUT_BATCH(0); /* Indirect Data Length */
> @@ -83,7 +115,9 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
>
>
> static void
> -brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
> +brw_dispatch_compute_common(struct gl_context *ctx,
> + const void *compute_param,
> + bool indirect)
> {
> struct brw_context *brw = brw_context(ctx);
> int estimated_buffer_space_needed;
> @@ -117,7 +151,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
> brw->no_batch_wrap = true;
> brw_upload_compute_state(brw);
>
> - brw_emit_gpgpu_walker(brw, num_groups);
> + brw_emit_gpgpu_walker(brw, compute_param, indirect);
>
> brw->no_batch_wrap = false;
>
> @@ -155,9 +189,24 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
> */
> }
>
> +static void
> +brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
> + brw_dispatch_compute_common(ctx,
> + num_groups,
> + false);
> +}
> +
> +static void
> +brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
> +{
> + brw_dispatch_compute_common(ctx,
> + &indirect,
> + true);
> +}
>
> void
> brw_init_compute_functions(struct dd_function_table *functions)
> {
> functions->DispatchCompute = brw_dispatch_compute;
> + functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 8fc8ceb..2de51d0 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -2698,6 +2698,8 @@ enum brw_wm_barycentric_interp_mode {
> # define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0)
> #define MEDIA_STATE_FLUSH 0x7004
> #define GPGPU_WALKER 0x7105
> +/* GEN7 DW0 */
> +# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10)
> /* GEN8+ DW2 */
> # define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0
> # define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0)
> diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
> index 58007d3..a261c2b 100644
> --- a/src/mesa/drivers/dri/i965/intel_reg.h
> +++ b/src/mesa/drivers/dri/i965/intel_reg.h
> @@ -173,6 +173,11 @@
> #define GEN7_3DPRIM_START_INSTANCE 0x243C
> #define GEN7_3DPRIM_BASE_VERTEX 0x2440
>
> +/* Auto-Compute / Indirect Registers */
> +#define GEN7_GPGPU_DISPATCHDIMX 0x2500
> +#define GEN7_GPGPU_DISPATCHDIMY 0x2504
> +#define GEN7_GPGPU_DISPATCHDIMZ 0x2508
> +
> #define GEN7_CACHE_MODE_1 0x7004
> # define GEN8_HIZ_NP_PMA_FIX_ENABLE (1 << 11)
> # define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)
> --
> 2.5.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list