[Freedreno] [PATCH 09/12] freedreno: batch re-ordering support

Rob Clark robdclark at gmail.com
Sat Jul 2 16:52:12 UTC 2016


For now, not enabled by default, but can be enabled (on a3xx/a4xx) with
FD_MESA_DEBUG=reorder.

Signed-off-by: Rob Clark <robdclark at gmail.com>
---
 src/gallium/drivers/freedreno/freedreno_batch.c    | 168 ++++++++++++++++++---
 src/gallium/drivers/freedreno/freedreno_batch.h    |   1 +
 src/gallium/drivers/freedreno/freedreno_context.c  |  38 ++---
 src/gallium/drivers/freedreno/freedreno_context.h  |   2 -
 src/gallium/drivers/freedreno/freedreno_query_hw.c |   2 +-
 src/gallium/drivers/freedreno/freedreno_resource.c |   6 +-
 src/gallium/drivers/freedreno/freedreno_resource.h |   1 +
 src/gallium/drivers/freedreno/freedreno_screen.c   |   9 ++
 src/gallium/drivers/freedreno/freedreno_screen.h   |   2 +
 src/gallium/drivers/freedreno/freedreno_state.c    |  15 +-
 src/gallium/drivers/freedreno/freedreno_util.h     |   1 +
 11 files changed, 188 insertions(+), 57 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c
index 5c6ae76..9d5bcf8 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -25,26 +25,20 @@
  */
 
 #include "util/list.h"
+#include "util/set.h"
+#include "util/hash_table.h"
 #include "util/u_string.h"
 
 #include "freedreno_batch.h"
 #include "freedreno_context.h"
 #include "freedreno_resource.h"
 
-struct fd_batch *
-fd_batch_create(struct fd_context *ctx)
+static void
+batch_init(struct fd_batch *batch)
 {
-	struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
-	static unsigned seqno = 0;
+	struct fd_context *ctx = batch->ctx;
 	unsigned size = 0;
 
-	if (!batch)
-		return NULL;
-
-	pipe_reference_init(&batch->reference, 1);
-	batch->seqno = ++seqno;
-	batch->ctx = ctx;
-
 	/* if kernel is too old to support unlimited # of cmd buffers, we
 	 * have no option but to allocate large worst-case sizes so that
 	 * we don't need to grow the ringbuffer.  Performance is likely to
@@ -62,7 +56,11 @@ fd_batch_create(struct fd_context *ctx)
 	fd_ringbuffer_set_parent(batch->draw, batch->gmem);
 	fd_ringbuffer_set_parent(batch->binning, batch->gmem);
 
-	list_inithead(&batch->used_resources);
+	batch->cleared = batch->partial_cleared = 0;
+	batch->restore = batch->resolve = 0;
+	batch->needs_flush = false;
+	batch->gmem_reason = 0;
+	batch->num_draws = 0;
 
 	/* reset maximal bounds: */
 	batch->max_scissor.minx = batch->max_scissor.miny = ~0;
@@ -73,16 +71,37 @@ fd_batch_create(struct fd_context *ctx)
 	if (is_a3xx(ctx->screen))
 		util_dynarray_init(&batch->rbrc_patches);
 
-	return batch;
+	assert(LIST_IS_EMPTY(&batch->used_resources));
 }
 
-void
-__fd_batch_destroy(struct fd_batch *batch)
+struct fd_batch *
+fd_batch_create(struct fd_context *ctx)
 {
-	fd_bc_invalidate_batch(batch);
+	struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
+	static unsigned seqno = 0;
 
-	util_copy_framebuffer_state(&batch->framebuffer, NULL);
+	if (!batch)
+		return NULL;
+
+	DBG("%p", batch);
+
+	pipe_reference_init(&batch->reference, 1);
+	batch->seqno = ++seqno;
+	batch->ctx = ctx;
+
+	list_inithead(&batch->used_resources);
+
+	batch_init(batch);
+
+	batch->dependencies = _mesa_set_create(NULL, _mesa_hash_pointer,
+			_mesa_key_pointer_equal);
 
+	return batch;
+}
+
+static void
+batch_fini(struct fd_batch *batch)
+{
 	fd_ringbuffer_del(batch->draw);
 	fd_ringbuffer_del(batch->binning);
 	fd_ringbuffer_del(batch->gmem);
@@ -91,6 +110,51 @@ __fd_batch_destroy(struct fd_batch *batch)
 
 	if (is_a3xx(batch->ctx->screen))
 		util_dynarray_fini(&batch->rbrc_patches);
+}
+
+static void
+batch_reset(struct fd_batch *batch)
+{
+	struct set_entry *entry;
+
+	DBG("%p", batch);
+
+	batch_fini(batch);
+	batch_init(batch);
+
+	set_foreach(batch->dependencies, entry) {
+		struct fd_batch *dep = (struct fd_batch *)entry->key;
+		_mesa_set_remove(batch->dependencies, entry);
+		fd_batch_reference(&dep, NULL);
+	}
+}
+
+void
+fd_batch_reset(struct fd_batch *batch)
+{
+	if (batch->needs_flush)
+		batch_reset(batch);
+}
+
+static void
+unref_batch(struct set_entry *entry)
+{
+	struct fd_batch *batch = (struct fd_batch *)entry->key;
+	fd_batch_reference(&batch, NULL);
+}
+
+void
+__fd_batch_destroy(struct fd_batch *batch)
+{
+	fd_bc_invalidate_batch(batch);
+
+	DBG("%p", batch);
+
+	util_copy_framebuffer_state(&batch->framebuffer, NULL);
+
+	batch_fini(batch);
+
+	_mesa_set_destroy(batch->dependencies, unref_batch);
 
 	free(batch);
 }
@@ -101,16 +165,26 @@ __fd_batch_describe(char* buf, const struct fd_batch *batch)
 	util_sprintf(buf, "fd_batch<%u>", batch->seqno);
 }
 
-void
-fd_batch_flush(struct fd_batch *batch)
+static void
+batch_flush(struct fd_batch *batch)
 {
 	struct fd_resource *rsc, *rsc_tmp;
+	struct set_entry *entry;
 
 	DBG("%p: needs_flush=%d", batch, batch->needs_flush);
 
 	if (!batch->needs_flush)
 		return;
 
+	batch->needs_flush = false;
+
+	set_foreach(batch->dependencies, entry) {
+		struct fd_batch *dep = (struct fd_batch *)entry->key;
+		fd_batch_flush(dep);
+		_mesa_set_remove(batch->dependencies, entry);
+		fd_batch_reference(&dep, NULL);
+	}
+
 	fd_gmem_render_tiles(batch);
 
 	/* go through all the used resources and clear their reading flag */
@@ -119,18 +193,67 @@ fd_batch_flush(struct fd_batch *batch)
 		debug_assert(rsc->status != 0);
 		rsc->status = 0;
 		fd_batch_reference(&rsc->pending_batch, NULL);
+		fd_batch_reference(&rsc->write_batch, NULL);
 		list_delinit(&rsc->list);
 	}
 
 	assert(LIST_IS_EMPTY(&batch->used_resources));
-	batch->needs_flush = false;
-	fd_bc_invalidate_batch(batch);
+
+	if (batch == batch->ctx->batch) {
+		batch_reset(batch);
+	} else {
+		fd_bc_invalidate_batch(batch);
+	}
+}
+
+void
+fd_batch_flush(struct fd_batch *batch)
+{
+	/* NOTE: we need to hold an extra ref across the body of flush,
+	 * since the last ref to this batch could be dropped when cleaning
+	 * up used_resources
+	 */
+	struct fd_batch *tmp = NULL;
+	fd_batch_reference(&tmp, batch);
+	batch_flush(tmp);
+	fd_batch_reference(&tmp, NULL);
+}
+
+static void
+batch_add_dep(struct fd_batch *batch, struct fd_batch *dep)
+{
+	if (!_mesa_set_search(batch->dependencies, dep)) {
+		struct fd_batch *other = NULL;
+		fd_batch_reference(&other, dep);
+		_mesa_set_add(batch->dependencies, other);
+	}
+}
+
+static void
+batch_update_dep(struct fd_batch *batch, struct fd_resource *rsc,
+		enum fd_resource_status status)
+{
+	switch (status) {
+	case FD_PENDING_WRITE:
+		DBG("%p: flush forced! (%p, %d)\n", rsc->pending_batch, rsc, rsc->status);
+		fd_batch_flush(rsc->pending_batch);
+		assert(rsc->pending_batch == NULL);
+		break;
+	case FD_PENDING_READ:
+		if (rsc->write_batch)
+			batch_add_dep(batch, rsc->write_batch);
+		batch_add_dep(batch, rsc->pending_batch);
+		break;
+	}
 }
 
 void
 fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc,
 		enum fd_resource_status status)
 {
+	if (unlikely(rsc->pending_batch && (rsc->pending_batch != batch)))
+		batch_update_dep(batch, rsc, status);
+
 	rsc->status |= status;
 
 	if (rsc->stencil)
@@ -139,7 +262,6 @@ fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc,
 	/* TODO resources can actually be shared across contexts,
 	 * so I'm not sure a single list-head will do the trick?
 	 */
-	debug_assert((rsc->pending_batch == batch) || !rsc->pending_batch);
 	list_delinit(&rsc->list);
 	list_addtail(&rsc->list, &batch->used_resources);
 	fd_batch_reference(&rsc->pending_batch, batch);
@@ -154,5 +276,5 @@ fd_batch_check_size(struct fd_batch *batch)
 	struct fd_ringbuffer *ring = batch->draw;
 	if (((ring->cur - ring->start) > (ring->size/4 - 0x1000)) ||
 			(fd_mesa_debug & FD_DBG_FLUSH))
-		fd_context_render(&batch->ctx->base);
+		fd_batch_flush(batch);
 }
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h
index d500f95..44da3c4 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -129,6 +129,7 @@ struct fd_batch {
 
 struct fd_batch * fd_batch_create(struct fd_context *ctx);
 
+void fd_batch_reset(struct fd_batch *batch);
 void fd_batch_flush(struct fd_batch *batch);
 void fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc,
 		enum fd_resource_status status);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 4359fb2..3a16a51 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -38,39 +38,27 @@
 #include "freedreno_query_hw.h"
 #include "freedreno_util.h"
 
-/* emit accumulated render cmds, needed for example if render target has
- * changed, or for flush()
- */
-void
-fd_context_render(struct pipe_context *pctx)
-{
-	struct fd_context *ctx = fd_context(pctx);
-	struct fd_batch *new_batch;
-
-	fd_batch_flush(ctx->batch);
-
-	new_batch = fd_batch_create(ctx);
-	util_copy_framebuffer_state(&new_batch->framebuffer, &ctx->batch->framebuffer);
-	fd_batch_reference(&ctx->batch, NULL);
-	ctx->batch = new_batch;
-}
-
 static void
 fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
 		unsigned flags)
 {
-	struct fd_batch *batch = NULL;
-
-	fd_batch_reference(&batch, fd_context(pctx)->batch);
-
-	fd_context_render(pctx);
+	struct fd_context *ctx = fd_context(pctx);
+	uint32_t timestamp;
+
+	if (!ctx->screen->reorder) {
+		struct fd_batch *batch = NULL;
+		fd_batch_reference(&batch, ctx->batch);
+		fd_batch_flush(batch);
+		timestamp = fd_ringbuffer_timestamp(batch->gmem);
+		fd_batch_reference(&batch, NULL);
+	} else {
+		timestamp = fd_bc_flush(&ctx->batch_cache);
+	}
 
 	if (fence) {
 		fd_screen_fence_ref(pctx->screen, fence, NULL);
-		*fence = fd_fence_create(pctx, fd_ringbuffer_timestamp(batch->gmem));
+		*fence = fd_fence_create(pctx, timestamp);
 	}
-
-	fd_batch_reference(&batch, NULL);
 }
 
 /**
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 6be7437..012f452 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -378,8 +378,6 @@ struct pipe_context * fd_context_init(struct fd_context *ctx,
 		struct pipe_screen *pscreen, const uint8_t *primtypes,
 		void *priv);
 
-void fd_context_render(struct pipe_context *pctx);
-
 void fd_context_destroy(struct pipe_context *pctx);
 
 #endif /* FREEDRENO_CONTEXT_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c
index ec8bf20..a55aee2 100644
--- a/src/gallium/drivers/freedreno/freedreno_query_hw.c
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c
@@ -210,7 +210,7 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
 		if (!ctx->batch->needs_flush)
 			return true;
 		DBG("reading query result forces flush!");
-		fd_context_render(&ctx->base);
+		fd_batch_flush(ctx->batch);
 	}
 
 	util_query_clear_result(result, q->type);
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 4fd8559..d7603b2 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -111,6 +111,7 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
 	rsc->timestamp = 0;
 	rsc->status = 0;
 	fd_batch_reference(&rsc->pending_batch, NULL);
+	fd_batch_reference(&rsc->write_batch, NULL);
 	list_delinit(&rsc->list);
 	util_range_set_empty(&rsc->valid_buffer_range);
 }
@@ -327,7 +328,7 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 		if (((ptrans->usage & PIPE_TRANSFER_WRITE) &&
 					pending(rsc, FD_PENDING_READ | FD_PENDING_WRITE)) ||
 				pending(rsc, FD_PENDING_WRITE))
-			fd_context_render(pctx);
+			fd_batch_flush(rsc->pending_batch);
 
 		/* The GPU keeps track of how the various bo's are being used, and
 		 * will wait if necessary for the proper operation to have
@@ -456,6 +457,7 @@ fd_resource_destroy(struct pipe_screen *pscreen,
 	if (rsc->bo)
 		fd_bo_del(rsc->bo);
 	fd_batch_reference(&rsc->pending_batch, NULL);
+	fd_batch_reference(&rsc->write_batch, NULL);
 	list_delinit(&rsc->list);
 	util_range_destroy(&rsc->valid_buffer_range);
 	FREE(rsc);
@@ -849,7 +851,7 @@ fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
 	struct fd_resource *rsc = fd_resource(prsc);
 
 	if (pending(rsc, FD_PENDING_WRITE | FD_PENDING_READ))
-		fd_context_render(pctx);
+		fd_batch_flush(rsc->pending_batch);
 }
 
 void
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
index 3b990a9..2615527 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -96,6 +96,7 @@ struct fd_resource {
 	 */
 	struct list_head list;
 	struct fd_batch *pending_batch;
+	struct fd_batch *write_batch;
 
 	/* set of batches whose batch-cache key references this resource: */
 	struct set *batches;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 5255c10..a18df54 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -75,6 +75,7 @@ static const struct debug_named_value debug_options[] = {
 		{"flush",     FD_DBG_FLUSH,  "Force flush after every draw"},
 		{"deqp",      FD_DBG_DEQP,   "Enable dEQP hacks"},
 		{"nir",       FD_DBG_NIR,    "Prefer NIR as native IR"},
+		{"reorder",   FD_DBG_REORDER,"Enable reordering for draws/blits"},
 		DEBUG_NAMED_VALUE_END
 };
 
@@ -649,6 +650,14 @@ fd_screen_create(struct fd_device *dev)
 		goto fail;
 	}
 
+	/* NOTE: don't enable reordering on a2xx, since completely untested.
+	 * Also, don't enable if we have too old of a kernel to support
+	 * growable cmdstream buffers, since memory requirement for cmdstream
+	 * buffers would be too much otherwise.
+	 */
+	if ((screen->gpu_id >= 300) && (fd_device_version(dev) >= FD_VERSION_UNLIMITED_CMDS))
+		screen->reorder = !!(fd_mesa_debug & FD_DBG_REORDER);
+
 	pscreen->destroy = fd_screen_destroy;
 	pscreen->get_param = fd_screen_get_param;
 	pscreen->get_paramf = fd_screen_get_paramf;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index a81c778..67fa689 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -65,6 +65,8 @@ struct fd_screen {
 	struct fd_pipe *pipe;
 
 	int64_t cpu_gpu_time_delta;
+
+	bool reorder;
 };
 
 static inline struct fd_screen *
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index 98b56c7..63ffa0c 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -117,10 +117,17 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
 	struct fd_context *ctx = fd_context(pctx);
 	struct pipe_framebuffer_state *cso;
 
-	DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush,
-			framebuffer->cbufs[0], framebuffer->zsbuf);
-
-	fd_context_render(pctx);
+	if (ctx->screen->reorder) {
+		struct fd_batch *batch =
+			fd_batch_from_fb(&ctx->batch_cache, ctx, framebuffer);
+		fd_batch_reference(&ctx->batch, NULL);
+		ctx->batch = batch;
+		ctx->dirty = ~0;
+	} else {
+		DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush,
+				framebuffer->cbufs[0], framebuffer->zsbuf);
+		fd_batch_flush(ctx->batch);
+	}
 
 	cso = &ctx->batch->framebuffer;
 
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index 8f125d9..5cb958e 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -75,6 +75,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
 #define FD_DBG_FLUSH    0x1000
 #define FD_DBG_DEQP     0x2000
 #define FD_DBG_NIR      0x4000
+#define FD_DBG_REORDER  0x8000
 
 extern int fd_mesa_debug;
 extern bool fd_binning_enabled;
-- 
2.7.4



More information about the Freedreno mailing list