[Freedreno] [PATCH 09/12] freedreno: batch re-ordering support
Rob Clark
robdclark at gmail.com
Sat Jul 2 16:52:12 UTC 2016
For now, not enabled by default, but can be enabled (on a3xx/a4xx) with
FD_MESA_DEBUG=reorder.
Signed-off-by: Rob Clark <robdclark at gmail.com>
---
src/gallium/drivers/freedreno/freedreno_batch.c | 168 ++++++++++++++++++---
src/gallium/drivers/freedreno/freedreno_batch.h | 1 +
src/gallium/drivers/freedreno/freedreno_context.c | 38 ++---
src/gallium/drivers/freedreno/freedreno_context.h | 2 -
src/gallium/drivers/freedreno/freedreno_query_hw.c | 2 +-
src/gallium/drivers/freedreno/freedreno_resource.c | 6 +-
src/gallium/drivers/freedreno/freedreno_resource.h | 1 +
src/gallium/drivers/freedreno/freedreno_screen.c | 9 ++
src/gallium/drivers/freedreno/freedreno_screen.h | 2 +
src/gallium/drivers/freedreno/freedreno_state.c | 15 +-
src/gallium/drivers/freedreno/freedreno_util.h | 1 +
11 files changed, 188 insertions(+), 57 deletions(-)
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c
index 5c6ae76..9d5bcf8 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -25,26 +25,20 @@
*/
#include "util/list.h"
+#include "util/set.h"
+#include "util/hash_table.h"
#include "util/u_string.h"
#include "freedreno_batch.h"
#include "freedreno_context.h"
#include "freedreno_resource.h"
-struct fd_batch *
-fd_batch_create(struct fd_context *ctx)
+static void
+batch_init(struct fd_batch *batch)
{
- struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
- static unsigned seqno = 0;
+ struct fd_context *ctx = batch->ctx;
unsigned size = 0;
- if (!batch)
- return NULL;
-
- pipe_reference_init(&batch->reference, 1);
- batch->seqno = ++seqno;
- batch->ctx = ctx;
-
/* if kernel is too old to support unlimited # of cmd buffers, we
* have no option but to allocate large worst-case sizes so that
* we don't need to grow the ringbuffer. Performance is likely to
@@ -62,7 +56,11 @@ fd_batch_create(struct fd_context *ctx)
fd_ringbuffer_set_parent(batch->draw, batch->gmem);
fd_ringbuffer_set_parent(batch->binning, batch->gmem);
- list_inithead(&batch->used_resources);
+ batch->cleared = batch->partial_cleared = 0;
+ batch->restore = batch->resolve = 0;
+ batch->needs_flush = false;
+ batch->gmem_reason = 0;
+ batch->num_draws = 0;
/* reset maximal bounds: */
batch->max_scissor.minx = batch->max_scissor.miny = ~0;
@@ -73,16 +71,37 @@ fd_batch_create(struct fd_context *ctx)
if (is_a3xx(ctx->screen))
util_dynarray_init(&batch->rbrc_patches);
- return batch;
+ assert(LIST_IS_EMPTY(&batch->used_resources));
}
-void
-__fd_batch_destroy(struct fd_batch *batch)
+struct fd_batch *
+fd_batch_create(struct fd_context *ctx)
{
- fd_bc_invalidate_batch(batch);
+ struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
+ static unsigned seqno = 0;
- util_copy_framebuffer_state(&batch->framebuffer, NULL);
+ if (!batch)
+ return NULL;
+
+ DBG("%p", batch);
+
+ pipe_reference_init(&batch->reference, 1);
+ batch->seqno = ++seqno;
+ batch->ctx = ctx;
+
+ list_inithead(&batch->used_resources);
+
+ batch_init(batch);
+
+ batch->dependencies = _mesa_set_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ return batch;
+}
+
+static void
+batch_fini(struct fd_batch *batch)
+{
fd_ringbuffer_del(batch->draw);
fd_ringbuffer_del(batch->binning);
fd_ringbuffer_del(batch->gmem);
@@ -91,6 +110,51 @@ __fd_batch_destroy(struct fd_batch *batch)
if (is_a3xx(batch->ctx->screen))
util_dynarray_fini(&batch->rbrc_patches);
+}
+
+static void
+batch_reset(struct fd_batch *batch)
+{
+ struct set_entry *entry;
+
+ DBG("%p", batch);
+
+ batch_fini(batch);
+ batch_init(batch);
+
+ set_foreach(batch->dependencies, entry) {
+ struct fd_batch *dep = (struct fd_batch *)entry->key;
+ _mesa_set_remove(batch->dependencies, entry);
+ fd_batch_reference(&dep, NULL);
+ }
+}
+
+void
+fd_batch_reset(struct fd_batch *batch)
+{
+ if (batch->needs_flush)
+ batch_reset(batch);
+}
+
+static void
+unref_batch(struct set_entry *entry)
+{
+ struct fd_batch *batch = (struct fd_batch *)entry->key;
+ fd_batch_reference(&batch, NULL);
+}
+
+void
+__fd_batch_destroy(struct fd_batch *batch)
+{
+ fd_bc_invalidate_batch(batch);
+
+ DBG("%p", batch);
+
+ util_copy_framebuffer_state(&batch->framebuffer, NULL);
+
+ batch_fini(batch);
+
+ _mesa_set_destroy(batch->dependencies, unref_batch);
free(batch);
}
@@ -101,16 +165,26 @@ __fd_batch_describe(char* buf, const struct fd_batch *batch)
util_sprintf(buf, "fd_batch<%u>", batch->seqno);
}
-void
-fd_batch_flush(struct fd_batch *batch)
+static void
+batch_flush(struct fd_batch *batch)
{
struct fd_resource *rsc, *rsc_tmp;
+ struct set_entry *entry;
DBG("%p: needs_flush=%d", batch, batch->needs_flush);
if (!batch->needs_flush)
return;
+ batch->needs_flush = false;
+
+ set_foreach(batch->dependencies, entry) {
+ struct fd_batch *dep = (struct fd_batch *)entry->key;
+ fd_batch_flush(dep);
+ _mesa_set_remove(batch->dependencies, entry);
+ fd_batch_reference(&dep, NULL);
+ }
+
fd_gmem_render_tiles(batch);
/* go through all the used resources and clear their reading flag */
@@ -119,18 +193,67 @@ fd_batch_flush(struct fd_batch *batch)
debug_assert(rsc->status != 0);
rsc->status = 0;
fd_batch_reference(&rsc->pending_batch, NULL);
+ fd_batch_reference(&rsc->write_batch, NULL);
list_delinit(&rsc->list);
}
assert(LIST_IS_EMPTY(&batch->used_resources));
- batch->needs_flush = false;
- fd_bc_invalidate_batch(batch);
+
+ if (batch == batch->ctx->batch) {
+ batch_reset(batch);
+ } else {
+ fd_bc_invalidate_batch(batch);
+ }
+}
+
+void
+fd_batch_flush(struct fd_batch *batch)
+{
+ /* NOTE: we need to hold an extra ref across the body of flush,
+ * since the last ref to this batch could be dropped when cleaning
+ * up used_resources
+ */
+ struct fd_batch *tmp = NULL;
+ fd_batch_reference(&tmp, batch);
+ batch_flush(tmp);
+ fd_batch_reference(&tmp, NULL);
+}
+
+static void
+batch_add_dep(struct fd_batch *batch, struct fd_batch *dep)
+{
+ if (!_mesa_set_search(batch->dependencies, dep)) {
+ struct fd_batch *other = NULL;
+ fd_batch_reference(&other, dep);
+ _mesa_set_add(batch->dependencies, other);
+ }
+}
+
+static void
+batch_update_dep(struct fd_batch *batch, struct fd_resource *rsc,
+ enum fd_resource_status status)
+{
+ switch (status) {
+ case FD_PENDING_WRITE:
+ DBG("%p: flush forced! (%p, %d)\n", rsc->pending_batch, rsc, rsc->status);
+ fd_batch_flush(rsc->pending_batch);
+ assert(rsc->pending_batch == NULL);
+ break;
+ case FD_PENDING_READ:
+ if (rsc->write_batch)
+ batch_add_dep(batch, rsc->write_batch);
+ batch_add_dep(batch, rsc->pending_batch);
+ break;
+ }
}
void
fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc,
enum fd_resource_status status)
{
+ if (unlikely(rsc->pending_batch && (rsc->pending_batch != batch)))
+ batch_update_dep(batch, rsc, status);
+
rsc->status |= status;
if (rsc->stencil)
@@ -139,7 +262,6 @@ fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc,
/* TODO resources can actually be shared across contexts,
* so I'm not sure a single list-head will do the trick?
*/
- debug_assert((rsc->pending_batch == batch) || !rsc->pending_batch);
list_delinit(&rsc->list);
list_addtail(&rsc->list, &batch->used_resources);
fd_batch_reference(&rsc->pending_batch, batch);
@@ -154,5 +276,5 @@ fd_batch_check_size(struct fd_batch *batch)
struct fd_ringbuffer *ring = batch->draw;
if (((ring->cur - ring->start) > (ring->size/4 - 0x1000)) ||
(fd_mesa_debug & FD_DBG_FLUSH))
- fd_context_render(&batch->ctx->base);
+ fd_batch_flush(batch);
}
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h
index d500f95..44da3c4 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -129,6 +129,7 @@ struct fd_batch {
struct fd_batch * fd_batch_create(struct fd_context *ctx);
+void fd_batch_reset(struct fd_batch *batch);
void fd_batch_flush(struct fd_batch *batch);
void fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc,
enum fd_resource_status status);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 4359fb2..3a16a51 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -38,39 +38,27 @@
#include "freedreno_query_hw.h"
#include "freedreno_util.h"
-/* emit accumulated render cmds, needed for example if render target has
- * changed, or for flush()
- */
-void
-fd_context_render(struct pipe_context *pctx)
-{
- struct fd_context *ctx = fd_context(pctx);
- struct fd_batch *new_batch;
-
- fd_batch_flush(ctx->batch);
-
- new_batch = fd_batch_create(ctx);
- util_copy_framebuffer_state(&new_batch->framebuffer, &ctx->batch->framebuffer);
- fd_batch_reference(&ctx->batch, NULL);
- ctx->batch = new_batch;
-}
-
static void
fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
unsigned flags)
{
- struct fd_batch *batch = NULL;
-
- fd_batch_reference(&batch, fd_context(pctx)->batch);
-
- fd_context_render(pctx);
+ struct fd_context *ctx = fd_context(pctx);
+ uint32_t timestamp;
+
+ if (!ctx->screen->reorder) {
+ struct fd_batch *batch = NULL;
+ fd_batch_reference(&batch, ctx->batch);
+ fd_batch_flush(batch);
+ timestamp = fd_ringbuffer_timestamp(batch->gmem);
+ fd_batch_reference(&batch, NULL);
+ } else {
+ timestamp = fd_bc_flush(&ctx->batch_cache);
+ }
if (fence) {
fd_screen_fence_ref(pctx->screen, fence, NULL);
- *fence = fd_fence_create(pctx, fd_ringbuffer_timestamp(batch->gmem));
+ *fence = fd_fence_create(pctx, timestamp);
}
-
- fd_batch_reference(&batch, NULL);
}
/**
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 6be7437..012f452 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -378,8 +378,6 @@ struct pipe_context * fd_context_init(struct fd_context *ctx,
struct pipe_screen *pscreen, const uint8_t *primtypes,
void *priv);
-void fd_context_render(struct pipe_context *pctx);
-
void fd_context_destroy(struct pipe_context *pctx);
#endif /* FREEDRENO_CONTEXT_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c
index ec8bf20..a55aee2 100644
--- a/src/gallium/drivers/freedreno/freedreno_query_hw.c
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c
@@ -210,7 +210,7 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
if (!ctx->batch->needs_flush)
return true;
DBG("reading query result forces flush!");
- fd_context_render(&ctx->base);
+ fd_batch_flush(ctx->batch);
}
util_query_clear_result(result, q->type);
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 4fd8559..d7603b2 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -111,6 +111,7 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
rsc->timestamp = 0;
rsc->status = 0;
fd_batch_reference(&rsc->pending_batch, NULL);
+ fd_batch_reference(&rsc->write_batch, NULL);
list_delinit(&rsc->list);
util_range_set_empty(&rsc->valid_buffer_range);
}
@@ -327,7 +328,7 @@ fd_resource_transfer_map(struct pipe_context *pctx,
if (((ptrans->usage & PIPE_TRANSFER_WRITE) &&
pending(rsc, FD_PENDING_READ | FD_PENDING_WRITE)) ||
pending(rsc, FD_PENDING_WRITE))
- fd_context_render(pctx);
+ fd_batch_flush(rsc->pending_batch);
/* The GPU keeps track of how the various bo's are being used, and
* will wait if necessary for the proper operation to have
@@ -456,6 +457,7 @@ fd_resource_destroy(struct pipe_screen *pscreen,
if (rsc->bo)
fd_bo_del(rsc->bo);
fd_batch_reference(&rsc->pending_batch, NULL);
+ fd_batch_reference(&rsc->write_batch, NULL);
list_delinit(&rsc->list);
util_range_destroy(&rsc->valid_buffer_range);
FREE(rsc);
@@ -849,7 +851,7 @@ fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
struct fd_resource *rsc = fd_resource(prsc);
if (pending(rsc, FD_PENDING_WRITE | FD_PENDING_READ))
- fd_context_render(pctx);
+ fd_batch_flush(rsc->pending_batch);
}
void
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
index 3b990a9..2615527 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -96,6 +96,7 @@ struct fd_resource {
*/
struct list_head list;
struct fd_batch *pending_batch;
+ struct fd_batch *write_batch;
/* set of batches whose batch-cache key references this resource: */
struct set *batches;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 5255c10..a18df54 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -75,6 +75,7 @@ static const struct debug_named_value debug_options[] = {
{"flush", FD_DBG_FLUSH, "Force flush after every draw"},
{"deqp", FD_DBG_DEQP, "Enable dEQP hacks"},
{"nir", FD_DBG_NIR, "Prefer NIR as native IR"},
+ {"reorder", FD_DBG_REORDER,"Enable reordering for draws/blits"},
DEBUG_NAMED_VALUE_END
};
@@ -649,6 +650,14 @@ fd_screen_create(struct fd_device *dev)
goto fail;
}
+ /* NOTE: don't enable reordering on a2xx, since completely untested.
+ * Also, don't enable if we have too old of a kernel to support
+ * growable cmdstream buffers, since memory requirement for cmdstream
+ * buffers would be too much otherwise.
+ */
+ if ((screen->gpu_id >= 300) && (fd_device_version(dev) >= FD_VERSION_UNLIMITED_CMDS))
+ screen->reorder = !!(fd_mesa_debug & FD_DBG_REORDER);
+
pscreen->destroy = fd_screen_destroy;
pscreen->get_param = fd_screen_get_param;
pscreen->get_paramf = fd_screen_get_paramf;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index a81c778..67fa689 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -65,6 +65,8 @@ struct fd_screen {
struct fd_pipe *pipe;
int64_t cpu_gpu_time_delta;
+
+ bool reorder;
};
static inline struct fd_screen *
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index 98b56c7..63ffa0c 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -117,10 +117,17 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
struct fd_context *ctx = fd_context(pctx);
struct pipe_framebuffer_state *cso;
- DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush,
- framebuffer->cbufs[0], framebuffer->zsbuf);
-
- fd_context_render(pctx);
+ if (ctx->screen->reorder) {
+ struct fd_batch *batch =
+ fd_batch_from_fb(&ctx->batch_cache, ctx, framebuffer);
+ fd_batch_reference(&ctx->batch, NULL);
+ ctx->batch = batch;
+ ctx->dirty = ~0;
+ } else {
+ DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush,
+ framebuffer->cbufs[0], framebuffer->zsbuf);
+ fd_batch_flush(ctx->batch);
+ }
cso = &ctx->batch->framebuffer;
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index 8f125d9..5cb958e 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -75,6 +75,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
#define FD_DBG_FLUSH 0x1000
#define FD_DBG_DEQP 0x2000
#define FD_DBG_NIR 0x4000
+#define FD_DBG_REORDER 0x8000
extern int fd_mesa_debug;
extern bool fd_binning_enabled;
--
2.7.4
More information about the Freedreno
mailing list