[Mesa-dev] [PATCH 8/9] freedreno: core compute support
Rob Clark
robdclark at gmail.com
Tue Apr 18 22:16:00 UTC 2017
Signed-off-by: Rob Clark <robdclark at gmail.com>
---
src/gallium/drivers/freedreno/freedreno_context.h | 14 +++-
src/gallium/drivers/freedreno/freedreno_draw.c | 47 ++++++++++++
src/gallium/drivers/freedreno/freedreno_gmem.c | 7 ++
src/gallium/drivers/freedreno/freedreno_gmem.h | 1 +
src/gallium/drivers/freedreno/freedreno_screen.c | 91 ++++++++++++++++++++++-
src/gallium/drivers/freedreno/freedreno_screen.h | 6 ++
src/gallium/drivers/freedreno/freedreno_state.c | 32 ++++++++
7 files changed, 195 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index d5d071d..a333c50 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -244,6 +244,7 @@ struct fd_context {
/* per shader-stage dirty status: */
enum fd_dirty_shader_state dirty_shader[PIPE_SHADER_TYPES];
+ void *compute;
struct pipe_blend_state *blend;
struct pipe_rasterizer_state *rasterizer;
struct pipe_depth_stencil_alpha_state *zsa;
@@ -288,6 +289,9 @@ struct fd_context {
void (*clear)(struct fd_context *ctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil);
+ /* compute: */
+ void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info);
+
/* constant emit: (note currently not used/needed for a2xx) */
void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
@@ -358,8 +362,16 @@ static inline void
fd_context_all_clean(struct fd_context *ctx)
{
ctx->dirty = 0;
- for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++)
+ for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
+ /* don't mark compute state as clean, since it is not emitted
+ * during normal draw call. The places that call _all_dirty(),
+ * it is safe to mark compute state dirty as well, but the
+ * inverse is not true.
+ */
+ if (i == PIPE_SHADER_COMPUTE)
+ continue;
ctx->dirty_shader[i] = 0;
+ }
}
static inline struct pipe_scissor_state *
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index 99bc840..6b145ba 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -396,6 +396,49 @@ fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
buffers, depth, stencil, x, y, w, h);
}
+static void
+fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_batch *batch, *save_batch = NULL;
+ unsigned i;
+
+ /* TODO maybe we don't want to allocate and flush a batch each time?
+ * We could use a special bogus (ie. won't match any fb state) key
+ * in the batch-case for compute shaders, and rely on the rest of
+ * the dependency tracking mechanism to tell us when the compute
+ * batch needs to be flushed?
+ */
+ batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx);
+ fd_batch_reference(&save_batch, ctx->batch);
+ fd_batch_reference(&ctx->batch, batch);
+
+ mtx_lock(&ctx->screen->lock);
+
+ /* Mark SSBOs as being written.. we don't actually know which ones are
+ * read vs written, so just assume the worst
+ */
+ foreach_bit(i, ctx->shaderbuf[PIPE_SHADER_COMPUTE].enabled_mask)
+ resource_read(batch, ctx->shaderbuf[PIPE_SHADER_COMPUTE].sb[i].buffer);
+
+ /* UBO's are read */
+ foreach_bit(i, ctx->constbuf[PIPE_SHADER_COMPUTE].enabled_mask)
+ resource_read(batch, ctx->constbuf[PIPE_SHADER_COMPUTE].cb[i].buffer);
+
+ /* Mark textures as being read */
+ foreach_bit(i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures)
+ resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture);
+
+ mtx_unlock(&ctx->screen->lock);
+
+ ctx->launch_grid(ctx, info);
+
+ fd_gmem_flush_compute(batch);
+
+ fd_batch_reference(&ctx->batch, save_batch);
+ fd_batch_reference(&save_batch, NULL);
+}
+
void
fd_draw_init(struct pipe_context *pctx)
{
@@ -403,4 +446,8 @@ fd_draw_init(struct pipe_context *pctx)
pctx->clear = fd_clear;
pctx->clear_render_target = fd_clear_render_target;
pctx->clear_depth_stencil = fd_clear_depth_stencil;
+
+ if (has_compute(fd_screen(pctx->screen))) {
+ pctx->launch_grid = fd_launch_grid;
+ }
}
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index d9f707d..e8b13de 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -435,6 +435,13 @@ fd_gmem_render_noop(struct fd_batch *batch)
flush_ring(batch);
}
+void
+fd_gmem_flush_compute(struct fd_batch *batch)
+{
+ render_sysmem(batch);
+ flush_ring(batch);
+}
+
/* tile needs restore if it isn't completely contained within the
* cleared scissor:
*/
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h
index 6598ea9..42a8dfa 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.h
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.h
@@ -63,6 +63,7 @@ struct fd_batch;
void fd_gmem_render_tiles(struct fd_batch *batch);
void fd_gmem_render_noop(struct fd_batch *batch);
+void fd_gmem_flush_compute(struct fd_batch *batch);
bool fd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile,
uint32_t buffers);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 6fd12cf..fdffb87 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -514,10 +514,24 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
- if ((fd_mesa_debug & FD_DBG_NIR) && is_ir3(screen))
+ switch (shader) {
+ case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_VERTEX:
+ if ((fd_mesa_debug & FD_DBG_NIR) && is_ir3(screen))
+ return PIPE_SHADER_IR_NIR;
+ return PIPE_SHADER_IR_TGSI;
+ default:
+ /* tgsi_to_nir doesn't really support much beyond FS/VS: */
+ debug_assert(is_ir3(screen));
return PIPE_SHADER_IR_NIR;
- return PIPE_SHADER_IR_TGSI;
+ }
+ break;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ if (is_ir3(screen)) {
+ return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
+ } else {
+ return (1 << PIPE_SHADER_IR_TGSI);
+ }
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -559,6 +573,78 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
return 0;
}
+/* TODO depending on how much the limits differ for a3xx/a4xx, maybe move this
+ * into per-generation backend?
+ */
+static int
+fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
+ enum pipe_compute_cap param, void *ret)
+{
+ struct fd_screen *screen = fd_screen(pscreen);
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+ if (ret) {
+ uint32_t *address_bits = ret;
+ address_bits[0] = 32;
+
+ if (is_a5xx(screen))
+ address_bits[0] = 64;
+ }
+ return 1 * sizeof(uint32_t);
+
+ case PIPE_COMPUTE_CAP_IR_TARGET:
+ sprintf(ret, "ir3");
+ return strlen(ret) * sizeof(char);
+
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ if (ret) {
+ uint64_t *grid_dimension = ret;
+ grid_dimension[0] = 3;
+ }
+ return 1 * sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ if (ret) {
+ uint64_t *grid_size = ret;
+ grid_size[0] = 65535;
+ grid_size[1] = 65535;
+ grid_size[2] = 65535;
+ }
+ return 3 * sizeof(uint64_t) ;
+
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ if (ret) {
+ uint64_t *grid_size = ret;
+ grid_size[0] = 1024;
+ grid_size[1] = 1024;
+ grid_size[2] = 64;
+ }
+ return 3 * sizeof(uint64_t) ;
+
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ if (ret) {
+ uint64_t *max_threads_per_block = ret;
+ *max_threads_per_block = 1024;
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+ break;
+ }
+
+ return 0;
+}
+
static const void *
fd_get_compiler_options(struct pipe_screen *pscreen,
enum pipe_shader_ir ir, unsigned shader)
@@ -751,6 +837,7 @@ fd_screen_create(struct fd_device *dev)
pscreen->get_param = fd_screen_get_param;
pscreen->get_paramf = fd_screen_get_paramf;
pscreen->get_shader_param = fd_screen_get_shader_param;
+ pscreen->get_compute_param = fd_get_compute_param;
pscreen->get_compiler_options = fd_get_compiler_options;
fd_resource_screen_init(pscreen);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index dac7224..83c0449 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -128,4 +128,10 @@ is_ir3(struct fd_screen *screen)
return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen);
}
+static inline bool
+has_compute(struct fd_screen *screen)
+{
+ return false;
+}
+
#endif /* FREEDRENO_SCREEN_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index 9955627..bb66a55 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -451,6 +451,32 @@ fd_set_stream_output_targets(struct pipe_context *pctx,
ctx->dirty |= FD_DIRTY_STREAMOUT;
}
+static void
+fd_bind_compute_state(struct pipe_context *pctx, void *state)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->compute = state;
+ ctx->dirty_shader[PIPE_SHADER_COMPUTE] |= FD_DIRTY_SHADER_PROG;
+}
+
+static void
+fd_set_compute_resources(struct pipe_context *pctx,
+ unsigned start, unsigned count, struct pipe_surface **prscs)
+{
+ // TODO
+}
+
+static void
+fd_set_global_binding(struct pipe_context *pctx,
+ unsigned first, unsigned count, struct pipe_resource **prscs,
+ uint32_t **handles)
+{
+ /* TODO only used by clover.. seems to need us to return the actual
+ * gpuaddr of the buffer.. which isn't really exposed to mesa atm.
+ * How is this used?
+ */
+}
+
void
fd_state_init(struct pipe_context *pctx)
{
@@ -484,4 +510,10 @@ fd_state_init(struct pipe_context *pctx)
pctx->create_stream_output_target = fd_create_stream_output_target;
pctx->stream_output_target_destroy = fd_stream_output_target_destroy;
pctx->set_stream_output_targets = fd_set_stream_output_targets;
+
+ if (has_compute(fd_screen(pctx->screen))) {
+ pctx->bind_compute_state = fd_bind_compute_state;
+ pctx->set_compute_resources = fd_set_compute_resources;
+ pctx->set_global_binding = fd_set_global_binding;
+ }
}
--
2.9.3
More information about the mesa-dev
mailing list