Mesa (master): panfrost: Fix fencing

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Dec 9 16:28:30 UTC 2020


Module: Mesa
Branch: master
Commit: 29f938a0ece889cd3236fca7e008bf0031de4be2
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=29f938a0ece889cd3236fca7e008bf0031de4be2

Author: Boris Brezillon <boris.brezillon at collabora.com>
Date:   Mon Nov 30 11:07:33 2020 +0100

panfrost: Fix fencing

Commit 64d6f56ad26f ("panfrost: Allocate syncobjs in panfrost_flush")
aimed at optimizing the fencing logic but it looks it also broke the
fence-based synchronization in subtle ways.

Indeed, now that the fence only waits on a single syncobj, we're not
guaranteed that all jobs queued in panfrost_flush_all_batches() will
be done when the fence is signaled, because jobs at the top level
(those stored in the batches hashmap) have not inter-dependencies.

Commit 9e397956b092 ("panfrost: signal syncobj if nothing is going to
be flushed") made this even more apparent by signaling the fence right
away if nothing was left to be drawn in the current context, thus
ignoring any of the batches left to flushed in the ->batches map.

If we want to keep relying the existing kernel APIs there's clearly no
ideal solution here. We can either go back to the original fencing
mechanism where each fence contained an array of syncobjs to be tested
or serialize jobs that have no explicit dependencies so we know the last
submitted job will also be the last one to return. The orginal approach
has proven to add quite a significant overhead (caused by the amount of
ioctls and the time spent in kernel space to gather dma fences attached
to those syncobjs and test them). So let's go for the simple solution
where we have a single syncobj bound to the context which we update to
point to the last job out_sync every time we submit a top-level job.

This approach implies reworking the way we create fences since we
need to capture the syncobj state at the time the fence is created.
Unfortunately, there's not SYNCOBJ_CLONE ioctl, which forces us to
export/create/import a fence so we have a new object that's not
subject to changes done to the context syncobj.

If we want to further optimize the logic, we should probably explore
some of those options:

1/ Adding array based SYNCOBJ ioctls (SYNCOBJ_{CREATE,DESTROY,CLONE}_ARRAY)
   so we can mitigate the cost of ioctls when we need to manipulate
   arrays of syncobjs
2/ Support synchronization jobs. That is, jobs that have a NULL job chain
   but an array of sync_in and a sync_out to allow creating
   synchronization points
3/ Add syncobj aggregators so we only have to wait on one syncobj from
   userspace. The syncobj aggregator would wait for all sub syncobjs to
   be signaled before signaling the top-level one.

Fixes: 64d6f56ad26f ("panfrost: Allocate syncobjs in panfrost_flush")
Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7831>

---

 .gitlab-ci/deqp-panfrost-t860-fails.txt    |  1 -
 src/gallium/drivers/panfrost/pan_compute.c |  2 +-
 src/gallium/drivers/panfrost/pan_context.c | 19 +++++++-----
 src/gallium/drivers/panfrost/pan_context.h |  3 ++
 src/gallium/drivers/panfrost/pan_job.c     | 49 +++++++++++++-----------------
 src/gallium/drivers/panfrost/pan_job.h     |  2 +-
 src/gallium/drivers/panfrost/pan_screen.c  | 40 ++++++++++++++++++++++--
 src/gallium/drivers/panfrost/pan_screen.h  |  2 +-
 8 files changed, 76 insertions(+), 42 deletions(-)

diff --git a/.gitlab-ci/deqp-panfrost-t860-fails.txt b/.gitlab-ci/deqp-panfrost-t860-fails.txt
index 503fbc0860e..a0f551b299a 100644
--- a/.gitlab-ci/deqp-panfrost-t860-fails.txt
+++ b/.gitlab-ci/deqp-panfrost-t860-fails.txt
@@ -33,7 +33,6 @@ dEQP-GLES3.functional.fbo.msaa.4_samples.rg32f,Fail
 dEQP-GLES3.functional.fbo.msaa.4_samples.rgba16f,Fail
 dEQP-GLES3.functional.fbo.msaa.4_samples.rgba32f,Fail
 dEQP-GLES3.functional.fbo.msaa.4_samples.stencil_index8,Fail
-dEQP-GLES3.functional.fence_sync.client_wait_sync_finish,Fail
 dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_loop_read_fragment,Crash
 dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_loop_read_vertex,Crash
 dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_read_fragment,Crash
diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c
index 6387dc1f73c..103327eab46 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -158,7 +158,7 @@ panfrost_launch_grid(struct pipe_context *pipe,
 
         panfrost_add_job(&batch->pool, &batch->scoreboard,
                          MALI_JOB_TYPE_COMPUTE, true, 0, &t, true);
-        panfrost_flush_all_batches(ctx, 0);
+        panfrost_flush_all_batches(ctx);
 }
 
 static void
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index b9bfc5a99bf..0b55528ec6a 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -160,16 +160,13 @@ panfrost_flush(
 {
         struct panfrost_context *ctx = pan_context(pipe);
         struct panfrost_device *dev = pan_device(pipe->screen);
-        uint32_t syncobj = 0;
 
-        if (fence)
-                drmSyncobjCreate(dev->fd, 0, &syncobj);
 
         /* Submit all pending jobs */
-        panfrost_flush_all_batches(ctx, syncobj);
+        panfrost_flush_all_batches(ctx);
 
         if (fence) {
-                struct panfrost_fence *f = panfrost_fence_create(ctx, syncobj);
+                struct panfrost_fence *f = panfrost_fence_create(ctx);
                 pipe->screen->fence_reference(pipe->screen, fence, NULL);
                 *fence = (struct pipe_fence_handle *)f;
         }
@@ -182,7 +179,7 @@ static void
 panfrost_texture_barrier(struct pipe_context *pipe, unsigned flags)
 {
         struct panfrost_context *ctx = pan_context(pipe);
-        panfrost_flush_all_batches(ctx, 0);
+        panfrost_flush_all_batches(ctx);
 }
 
 #define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_DRAW_MODE_##c;
@@ -1432,7 +1429,7 @@ panfrost_get_query_result(struct pipe_context *pipe,
 
         case PIPE_QUERY_PRIMITIVES_GENERATED:
         case PIPE_QUERY_PRIMITIVES_EMITTED:
-                panfrost_flush_all_batches(ctx, 0);
+                panfrost_flush_all_batches(ctx);
                 vresult->u64 = query->end - query->start;
                 break;
 
@@ -1620,5 +1617,13 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
         ctx->sample_mask = ~0;
         ctx->active_queries = true;
 
+        int ASSERTED ret;
+
+        /* Create a syncobj in a signaled state. Will be updated to point to the
+         * last queued job out_sync every time we submit a new job.
+         */
+        ret = drmSyncobjCreate(dev->fd, DRM_SYNCOBJ_CREATE_SIGNALED, &ctx->syncobj);
+        assert(!ret && ctx->syncobj);
+
         return gallium;
 }
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index e607b82f7b3..2a4bfd9176c 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -109,6 +109,9 @@ struct panfrost_context {
          * size from the kernel is 4kb */
         struct u_upload_mgr *state_uploader;
 
+        /* Sync obj used to keep track of in-flight jobs. */
+        uint32_t syncobj;
+
         /* Bound job batch and map of panfrost_batch_key to job batches */
         struct panfrost_batch *batch;
         struct hash_table *batches;
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index 9f215f2b743..eec81a92f73 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -938,6 +938,7 @@ static int
 panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
                             mali_ptr first_job_desc,
                             uint32_t reqs,
+                            uint32_t in_sync,
                             uint32_t out_sync)
 {
         struct panfrost_context *ctx = batch->ctx;
@@ -952,16 +953,16 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
          * after we're done but preventing double-frees if we were given a
          * syncobj */
 
-        bool our_sync = false;
-
-        if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
-                drmSyncobjCreate(dev->fd, 0, &out_sync);
-                our_sync = true;
-        }
+        if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
+                out_sync = ctx->syncobj;
 
         submit.out_sync = out_sync;
         submit.jc = first_job_desc;
         submit.requirements = reqs;
+        if (in_sync) {
+                submit.in_syncs = (u64)(uintptr_t)(&in_sync);
+                submit.in_sync_count = 1;
+        }
 
         bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +
                             panfrost_pool_num_bos(&batch->invisible_pool) +
@@ -989,9 +990,6 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
                 if (dev->debug & PAN_DBG_MSGS)
                         fprintf(stderr, "Error submitting: %m\n");
 
-                if (our_sync)
-                        drmSyncobjDestroy(dev->fd, out_sync);
-
                 return errno;
         }
 
@@ -1006,10 +1004,6 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
                 pandecode_jc(submit.jc, dev->quirks & IS_BIFROST, dev->gpu_id, minimal);
         }
 
-        /* Cleanup if we created the syncobj */
-        if (our_sync)
-                drmSyncobjDestroy(dev->fd, out_sync);
-
         return 0;
 }
 
@@ -1018,7 +1012,7 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
  * implicit dep between them) */
 
 static int
-panfrost_batch_submit_jobs(struct panfrost_batch *batch, uint32_t out_sync)
+panfrost_batch_submit_jobs(struct panfrost_batch *batch, uint32_t in_sync, uint32_t out_sync)
 {
         bool has_draws = batch->scoreboard.first_job;
         bool has_frag = batch->scoreboard.tiler_dep || batch->clear;
@@ -1026,7 +1020,7 @@ panfrost_batch_submit_jobs(struct panfrost_batch *batch, uint32_t out_sync)
 
         if (has_draws) {
                 ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job,
-                                0, has_frag ? 0 : out_sync);
+                                                  0, in_sync, has_frag ? 0 : out_sync);
                 assert(!ret);
         }
 
@@ -1041,7 +1035,8 @@ panfrost_batch_submit_jobs(struct panfrost_batch *batch, uint32_t out_sync)
                 mali_ptr fragjob = panfrost_fragment_job(batch,
                                 batch->scoreboard.tiler_dep != 0);
                 ret = panfrost_batch_submit_ioctl(batch, fragjob,
-                                PANFROST_JD_REQ_FS, out_sync);
+                                                  PANFROST_JD_REQ_FS, 0,
+                                                  out_sync);
                 assert(!ret);
         }
 
@@ -1049,7 +1044,8 @@ panfrost_batch_submit_jobs(struct panfrost_batch *batch, uint32_t out_sync)
 }
 
 static void
-panfrost_batch_submit(struct panfrost_batch *batch, uint32_t out_sync)
+panfrost_batch_submit(struct panfrost_batch *batch,
+                      uint32_t in_sync, uint32_t out_sync)
 {
         assert(batch);
         struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
@@ -1059,17 +1055,14 @@ panfrost_batch_submit(struct panfrost_batch *batch, uint32_t out_sync)
         util_dynarray_foreach(&batch->dependencies,
                               struct panfrost_batch_fence *, dep) {
                 if ((*dep)->batch)
-                        panfrost_batch_submit((*dep)->batch, 0);
+                        panfrost_batch_submit((*dep)->batch, 0, 0);
         }
 
         int ret;
 
         /* Nothing to do! */
-        if (!batch->scoreboard.first_job && !batch->clear) {
-                if (out_sync)
-                        drmSyncobjSignal(dev->fd, &out_sync, 1);
+        if (!batch->scoreboard.first_job && !batch->clear)
                 goto out;
-	}
 
         panfrost_batch_draw_wallpaper(batch);
 
@@ -1092,7 +1085,7 @@ panfrost_batch_submit(struct panfrost_batch *batch, uint32_t out_sync)
 
         panfrost_scoreboard_initialize_tiler(&batch->pool, &batch->scoreboard, polygon_list);
 
-        ret = panfrost_batch_submit_jobs(batch, out_sync);
+        ret = panfrost_batch_submit_jobs(batch, in_sync, out_sync);
 
         if (ret && dev->debug & PAN_DBG_MSGS)
                 fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
@@ -1122,16 +1115,16 @@ out:
 /* Submit all batches, applying the out_sync to the currently bound batch */
 
 void
-panfrost_flush_all_batches(struct panfrost_context *ctx, uint32_t out_sync)
+panfrost_flush_all_batches(struct panfrost_context *ctx)
 {
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        panfrost_batch_submit(batch, out_sync);
+        panfrost_batch_submit(batch, ctx->syncobj, ctx->syncobj);
 
         hash_table_foreach(ctx->batches, hentry) {
                 struct panfrost_batch *batch = hentry->data;
                 assert(batch);
 
-                panfrost_batch_submit(batch, 0);
+                panfrost_batch_submit(batch, ctx->syncobj, ctx->syncobj);
         }
 
         assert(!ctx->batches->entries);
@@ -1180,7 +1173,7 @@ panfrost_flush_batches_accessing_bo(struct panfrost_context *ctx,
                 return;
 
         if (access->writer && access->writer->batch)
-                panfrost_batch_submit(access->writer->batch, 0);
+                panfrost_batch_submit(access->writer->batch, ctx->syncobj, ctx->syncobj);
 
         if (!flush_readers)
                 return;
@@ -1188,7 +1181,7 @@ panfrost_flush_batches_accessing_bo(struct panfrost_context *ctx,
         util_dynarray_foreach(&access->readers, struct panfrost_batch_fence *,
                               reader) {
                 if (*reader && (*reader)->batch)
-                        panfrost_batch_submit((*reader)->batch, 0);
+                        panfrost_batch_submit((*reader)->batch, ctx->syncobj, ctx->syncobj);
         }
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h
index 69c30a4c178..6a905ee062d 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -157,7 +157,7 @@ panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
                          uint32_t create_flags, uint32_t access_flags);
 
 void
-panfrost_flush_all_batches(struct panfrost_context *ctx, uint32_t out_sync);
+panfrost_flush_all_batches(struct panfrost_context *ctx);
 
 bool
 panfrost_pending_batches_access_bo(struct panfrost_context *ctx,
diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c
index 8163aaef3fb..0132fd4c5fd 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -702,17 +702,51 @@ panfrost_fence_finish(struct pipe_screen *pscreen,
 }
 
 struct panfrost_fence *
-panfrost_fence_create(struct panfrost_context *ctx,
-                      uint32_t syncobj)
+panfrost_fence_create(struct panfrost_context *ctx)
 {
         struct panfrost_fence *f = calloc(1, sizeof(*f));
         if (!f)
                 return NULL;
 
+        struct panfrost_device *dev = pan_device(ctx->base.screen);
+        int fd = -1, ret;
+
+        /* Snapshot the last rendering out fence. We'd rather have another
+         * syncobj instead of a sync file, but this is all we get.
+         * (HandleToFD/FDToHandle just gives you another syncobj ID for the
+         * same syncobj).
+         */
+        ret = drmSyncobjExportSyncFile(dev->fd, ctx->syncobj, &fd);
+        if (ret || fd == -1) {
+                fprintf(stderr, "export failed\n");
+                goto err_free_fence;
+        }
+
+        ret = drmSyncobjCreate(dev->fd, 0, &f->syncobj);
+        if (ret) {
+                fprintf(stderr, "create syncobj failed\n");
+                goto err_close_fd;
+        }
+
+        ret = drmSyncobjImportSyncFile(dev->fd, f->syncobj, fd);
+        if (ret) {
+                fprintf(stderr, "create syncobj failed\n");
+                goto err_destroy_syncobj;
+        }
+
+        assert(f->syncobj != ctx->syncobj);
+        close(fd);
         pipe_reference_init(&f->reference, 1);
-        f->syncobj = syncobj;
 
         return f;
+
+err_destroy_syncobj:
+        drmSyncobjDestroy(dev->fd, f->syncobj);
+err_close_fd:
+        close(fd);
+err_free_fence:
+        free(f);
+        return NULL;
 }
 
 static const void *
diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h
index 6730bae7a2b..259773df44f 100644
--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -62,6 +62,6 @@ pan_device(struct pipe_screen *p)
 }
 
 struct panfrost_fence *
-panfrost_fence_create(struct panfrost_context *ctx, uint32_t syncobj);
+panfrost_fence_create(struct panfrost_context *ctx);
 
 #endif /* PAN_SCREEN_H */



More information about the mesa-commit mailing list