[Mesa-dev] [PATCH v2 28/37] panfrost: Start tracking inter-batch dependencies
Boris Brezillon
boris.brezillon at collabora.com
Mon Sep 16 09:37:06 UTC 2019
The idea is to track which BO are being accessed and the type of access
to determine when a dependency exists. Thanks to that we can build a
dependency graph that will allow us to flush batches in the correct
order.
Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
---
src/gallium/drivers/panfrost/pan_context.h | 3 +
src/gallium/drivers/panfrost/pan_job.c | 324 ++++++++++++++++++++-
src/gallium/drivers/panfrost/pan_job.h | 3 +
3 files changed, 325 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index ce3e0c899a4f..3b09952345cf 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -114,6 +114,9 @@ struct panfrost_context {
struct panfrost_batch *batch;
struct hash_table *batches;
+ /* panfrost_bo -> panfrost_bo_access */
+ struct hash_table *accessed_bos;
+
/* Within a launch_grid call.. */
const struct pipe_grid_info *compute_grid;
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index ec397b855a69..04f4f22dab74 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -36,6 +36,29 @@
#include "pan_util.h"
#include "pandecode/decode.h"
+/* panfrost_bo_access is here to help us keep track of batch accesses to BOs
+ * and build a proper dependency graph such that batches can be pipelined for
+ * better GPU utilization.
+ *
+ * Each accessed BO has a corresponding entry in the ->accessed_bos hash table.
+ * A BO is either being written or read at any time, that's what the type field
+ * encodes.
+ * When the last access is a write, the batch writing the BO might have read
+ * dependencies (readers that have not been executed yet and want to read the
+ * previous BO content), and when the last access is a read, all readers might
+ * depend on another batch to push its results to memory. That's what the
+ * readers/writers keep track off.
+ * There can only be one writer at any given time, if a new batch wants to
+ * write to the same BO, a dependency will be added between the new writer and
+ * the old writer (at the batch level), and panfrost_bo_access->writer will be
+ * updated to point to the new writer.
+ */
+struct panfrost_bo_access {
+ uint32_t type;
+ struct util_dynarray readers;
+ struct panfrost_batch_fence *writer;
+};
+
static struct panfrost_batch_fence *
panfrost_create_batch_fence(struct panfrost_batch *batch)
{
@@ -94,6 +117,7 @@ panfrost_create_batch(struct panfrost_context *ctx,
util_dynarray_init(&batch->headers, batch);
util_dynarray_init(&batch->gpu_headers, batch);
+ util_dynarray_init(&batch->dependencies, batch);
batch->out_sync = panfrost_create_batch_fence(batch);
util_copy_framebuffer_state(&batch->key, key);
@@ -153,6 +177,10 @@ panfrost_free_batch(struct panfrost_batch *batch)
hash_table_foreach(batch->bos, entry)
panfrost_bo_unreference((struct panfrost_bo *)entry->key);
+ util_dynarray_foreach(&batch->dependencies,
+ struct panfrost_batch_fence *, dep)
+ panfrost_batch_fence_unreference(*dep);
+
/* The out_sync fence lifetime is different from the the batch one
* since other batches might want to wait on an fence of already
* submitted/signaled batch. All we need to do here is make sure the
@@ -166,6 +194,52 @@ panfrost_free_batch(struct panfrost_batch *batch)
ralloc_free(batch);
}
+#ifndef NDEBUG
+static bool
+panfrost_dep_graph_contains_batch(struct panfrost_batch *root,
+ struct panfrost_batch *batch)
+{
+ if (!root)
+ return false;
+
+ util_dynarray_foreach(&root->dependencies,
+ struct panfrost_batch_fence *, dep) {
+ if ((*dep)->batch == batch ||
+ panfrost_dep_graph_contains_batch((*dep)->batch, batch))
+ return true;
+ }
+
+ return false;
+}
+#endif
+
+static void
+panfrost_batch_add_dep(struct panfrost_batch *batch,
+ struct panfrost_batch_fence *newdep)
+{
+ if (batch == newdep->batch)
+ return;
+
+ util_dynarray_foreach(&batch->dependencies,
+ struct panfrost_batch_fence *, dep) {
+ if (*dep == newdep)
+ return;
+ }
+
+ /* Make sure the dependency graph is acyclic. */
+ assert(!panfrost_dep_graph_contains_batch(newdep->batch, batch));
+
+ panfrost_batch_fence_reference(newdep);
+ util_dynarray_append(&batch->dependencies,
+ struct panfrost_batch_fence *, newdep);
+
+ /* We now have a batch depending on us, let's make sure new draw/clear
+ * calls targeting the same FBO use a new batch object.
+ */
+ if (newdep->batch)
+ panfrost_freeze_batch(newdep->batch);
+}
+
static struct panfrost_batch *
panfrost_get_batch(struct panfrost_context *ctx,
const struct pipe_framebuffer_state *key)
@@ -216,6 +290,192 @@ panfrost_get_batch_for_fbo(struct panfrost_context *ctx)
return batch;
}
+static bool
+panfrost_batch_fence_is_signaled(struct panfrost_batch_fence *fence)
+{
+ if (fence->signaled)
+ return true;
+
+ /* Batch has not been submitted yet. */
+ if (fence->batch)
+ return false;
+
+ int ret = drmSyncobjWait(pan_screen(fence->ctx->base.screen)->fd,
+ &fence->syncobj, 1, 0, 0, NULL);
+
+ fence->signaled = ret >= 0;
+ return fence->signaled;
+}
+
+static void
+panfrost_bo_access_gc_fences(struct panfrost_context *ctx,
+ struct panfrost_bo_access *access,
+ const struct panfrost_bo *bo)
+{
+ if (access->writer && panfrost_batch_fence_is_signaled(access->writer)) {
+ panfrost_batch_fence_unreference(access->writer);
+ access->writer = NULL;
+ }
+
+ unsigned nreaders = 0;
+ util_dynarray_foreach(&access->readers, struct panfrost_batch_fence *,
+ reader) {
+ if (!*reader)
+ continue;
+
+ if (panfrost_batch_fence_is_signaled(*reader)) {
+ panfrost_batch_fence_unreference(*reader);
+ *reader = NULL;
+ } else {
+ nreaders++;
+ }
+ }
+
+ if (!nreaders)
+ util_dynarray_clear(&access->readers);
+}
+
+static void
+panfrost_gc_fences(struct panfrost_context *ctx)
+{
+ hash_table_foreach(ctx->accessed_bos, entry) {
+ struct panfrost_bo_access *access = entry->data;
+
+ assert(access);
+ panfrost_bo_access_gc_fences(ctx, access, entry->key);
+ if (!util_dynarray_num_elements(&access->readers,
+ struct panfrost_batch_fence *) &&
+ !access->writer)
+ _mesa_hash_table_remove(ctx->accessed_bos, entry);
+ }
+}
+
+static void
+panfrost_batch_update_bo_access(struct panfrost_batch *batch,
+ struct panfrost_bo *bo, uint32_t access_type)
+{
+ struct panfrost_context *ctx = batch->ctx;
+ struct panfrost_bo_access *access;
+ uint32_t old_access_type;
+ struct hash_entry *entry;
+
+ assert(access_type == PAN_BO_GPU_ACCESS_WRITE ||
+ access_type == PAN_BO_GPU_ACCESS_READ);
+
+ entry = _mesa_hash_table_search(ctx->accessed_bos, bo);
+ access = entry ? entry->data : NULL;
+ if (access) {
+ old_access_type = access->type;
+ } else {
+ access = rzalloc(ctx, struct panfrost_bo_access);
+ util_dynarray_init(&access->readers, access);
+ _mesa_hash_table_insert(ctx->accessed_bos, bo, access);
+ /* We are the first to access this BO, let's initialize
+ * old_access_type to our own access type in that case.
+ */
+ old_access_type = access_type;
+ access->type = access_type;
+ }
+
+ assert(access);
+
+ if (access_type == PAN_BO_GPU_ACCESS_WRITE &&
+ old_access_type == PAN_BO_GPU_ACCESS_READ) {
+ /* Previous access was a read and we want to write this BO.
+ * We first need to add explicit deps between our batch and
+ * the previous readers.
+ */
+ util_dynarray_foreach(&access->readers,
+ struct panfrost_batch_fence *, reader) {
+ /* We were already reading the BO, no need to add a dep
+ * on ourself (the acyclic check would complain about
+ * that).
+ */
+ if (!*reader || (*reader)->batch == batch)
+ continue;
+
+ panfrost_batch_add_dep(batch, *reader);
+ }
+ panfrost_batch_fence_reference(batch->out_sync);
+
+ /* We now are the new writer. */
+ access->writer = batch->out_sync;
+ access->type = access_type;
+
+ /* Release the previous readers and reset the readers array. */
+ util_dynarray_foreach(&access->readers,
+ struct panfrost_batch_fence *,
+ reader) {
+ if (!*reader)
+ continue;
+ panfrost_batch_fence_unreference(*reader);
+ }
+ util_dynarray_clear(&access->readers);
+ } else if (access_type == PAN_BO_GPU_ACCESS_WRITE &&
+ old_access_type == PAN_BO_GPU_ACCESS_WRITE) {
+ /* Previous access was a write and we want to write this BO.
+ * First check if we were the previous writer, in that case
+ * there's nothing to do. Otherwise we need to add a
+ * dependency between the new writer and the old one.
+ */
+ if (access->writer != batch->out_sync) {
+ if (access->writer) {
+ panfrost_batch_add_dep(batch, access->writer);
+ panfrost_batch_fence_unreference(access->writer);
+ }
+ panfrost_batch_fence_reference(batch->out_sync);
+ access->writer = batch->out_sync;
+ }
+ } else if (access_type == PAN_BO_GPU_ACCESS_READ &&
+ old_access_type == PAN_BO_GPU_ACCESS_WRITE) {
+ /* Previous access was a write and we want to read this BO.
+ * First check if we were the previous writer, in that case
+ * we want to keep the access type unchanged, as a write is
+ * more constraining than a read.
+ */
+ if (access->writer != batch->out_sync) {
+ /* Add a dependency on the previous writer. */
+ panfrost_batch_add_dep(batch, access->writer);
+
+ /* The previous access was a write, there's no reason
+ * to have entries in the readers array.
+ */
+ assert(!util_dynarray_num_elements(&access->readers,
+ struct panfrost_batch_fence *));
+
+ /* Add ourselves to the readers array. */
+ panfrost_batch_fence_reference(batch->out_sync);
+ util_dynarray_append(&access->readers,
+ struct panfrost_batch_fence *,
+ batch->out_sync);
+ access->type = access_type;
+ }
+ } else {
+ /* Previous access was a read and we want to read this BO.
+ * Add ourselves to the readers array if we're not already
+ * there and add a dependency on the previous writer (if any).
+ */
+ bool already_there = false;
+
+ util_dynarray_foreach(&access->readers,
+ struct panfrost_batch_fence *, reader) {
+ if (*reader && (*reader)->batch == batch) {
+ already_there = true;
+ break;
+ }
+ }
+
+ if (!already_there) {
+ panfrost_batch_fence_reference(batch->out_sync);
+ util_dynarray_append(&access->readers,
+ struct panfrost_batch_fence *,
+ batch->out_sync);
+ if (access->writer)
+ panfrost_batch_add_dep(batch, access->writer);
+ }
+ }
+}
+
void
panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
uint32_t flags)
@@ -233,6 +493,10 @@ panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
panfrost_bo_reference(bo);
} else {
old_flags = (uintptr_t)entry->data;
+
+ /* All batches have to agree on the shared flag. */
+ assert((old_flags & PAN_BO_GPU_ACCESS_SHARED) ==
+ (flags & PAN_BO_GPU_ACCESS_SHARED));
}
assert(entry);
@@ -242,6 +506,25 @@ panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
flags |= old_flags;
entry->data = (void *)(uintptr_t)flags;
+
+ /* If this is not a shared BO, we don't really care about dependency
+ * tracking.
+ */
+ if (!(flags & PAN_BO_GPU_ACCESS_SHARED))
+ return;
+
+ /* All dependencies should have been flushed before we execute the
+ * wallpaper draw, so it should be harmless to skip the
+ * update_bo_access() call.
+ */
+ if (batch == batch->ctx->wallpaper_batch)
+ return;
+
+ /* Only pass R/W flags to the dep tracking logic. */
+ assert(flags & PAN_BO_GPU_ACCESS_RW);
+ flags = (flags & PAN_BO_GPU_ACCESS_WRITE) ?
+ PAN_BO_GPU_ACCESS_WRITE : PAN_BO_GPU_ACCESS_READ;
+ panfrost_batch_update_bo_access(batch, bo, flags);
}
void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
@@ -461,15 +744,34 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
struct pipe_context *gallium = (struct pipe_context *) ctx;
struct panfrost_screen *screen = pan_screen(gallium->screen);
struct drm_panfrost_submit submit = {0,};
- uint32_t *bo_handles;
+ uint32_t *bo_handles, *in_syncs = NULL;
int ret;
-
- if (ctx->last_out_sync) {
+ if (reqs & PANFROST_JD_REQ_FS && batch->first_job.gpu)
submit.in_sync_count = 1;
- submit.in_syncs = (uintptr_t)&ctx->last_out_sync->syncobj;
+ else
+ submit.in_sync_count = util_dynarray_num_elements(&batch->dependencies,
+ struct panfrost_batch_fence *);
+
+ if (submit.in_sync_count) {
+ in_syncs = calloc(submit.in_sync_count, sizeof(*in_syncs));
+ assert(in_syncs);
}
+ /* The fragment job always depends on the vertex/tiler job if there's
+ * one
+ */
+ if (reqs & PANFROST_JD_REQ_FS && batch->first_job.gpu) {
+ in_syncs[0] = batch->out_sync->syncobj;
+ } else {
+ unsigned int i = 0;
+
+ util_dynarray_foreach(&batch->dependencies,
+ struct panfrost_batch_fence *, dep)
+ in_syncs[i++] = (*dep)->syncobj;
+ }
+
+ submit.in_syncs = (uintptr_t)in_syncs;
submit.out_sync = batch->out_sync->syncobj;
submit.jc = first_job_desc;
submit.requirements = reqs;
@@ -493,6 +795,7 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
submit.bo_handles = (u64) (uintptr_t) bo_handles;
ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
free(bo_handles);
+ free(in_syncs);
/* Release the last batch fence if any, and retain the new one */
if (ctx->last_out_sync)
@@ -543,6 +846,13 @@ panfrost_batch_submit(struct panfrost_batch *batch)
{
assert(batch);
+ /* Submit the dependencies first. */
+ util_dynarray_foreach(&batch->dependencies,
+ struct panfrost_batch_fence *, dep) {
+ if ((*dep)->batch)
+ panfrost_batch_submit((*dep)->batch);
+ }
+
struct panfrost_context *ctx = batch->ctx;
int ret;
@@ -562,7 +872,6 @@ panfrost_batch_submit(struct panfrost_batch *batch)
out:
panfrost_freeze_batch(batch);
- assert(!ctx->batch || batch == ctx->batch);
/* We always stall the pipeline for correct results since pipelined
* rendering is quite broken right now (to be fixed by the panfrost_job
@@ -571,6 +880,9 @@ out:
drmSyncobjWait(pan_screen(ctx->base.screen)->fd,
&batch->out_sync->syncobj, 1, INT64_MAX, 0, NULL);
panfrost_free_batch(batch);
+
+ /* Collect batch fences before returning */
+ panfrost_gc_fences(ctx);
}
void
@@ -777,4 +1089,6 @@ panfrost_batch_init(struct panfrost_context *ctx)
ctx->batches = _mesa_hash_table_create(ctx,
panfrost_batch_hash,
panfrost_batch_compare);
+ ctx->accessed_bos = _mesa_hash_table_create(ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
}
diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h
index 32bfc1fe3388..faf8cb06eebc 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -153,6 +153,9 @@ struct panfrost_batch {
/* Output sync object. Only valid when submitted is true. */
struct panfrost_batch_fence *out_sync;
+
+ /* Batch dependencies */
+ struct util_dynarray dependencies;
};
/* Functions for managing the above */
--
2.21.0
More information about the mesa-dev
mailing list