Mesa (master): gallium/u_threaded: fix staging and non-staging conflicts

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Dec 3 10:01:46 UTC 2020


Module: Mesa
Branch: master
Commit: 2900f82e1919dcb70d29f34e5ed10a09f7356b3e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2900f82e1919dcb70d29f34e5ed10a09f7356b3e

Author: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Date:   Mon Oct 12 13:28:37 2020 +0200

gallium/u_threaded: fix staging and non-staging conflicts

In the following sequence:
  - transfer_map(buffer, DISCARD) // use staging upload
  - memcpy(...)
  - transfer_unmap
  - draw
  - transfer_map(buffer, UNSYNCHRONIZED) // non-staging upload
  - memcpy(...)
  - draw

Currently the order of operations is:
  - map#1 - staging buffer
  - memcpy to staging buffer
  - map#2
  - memcpy to buffer
  - staging buffer copy to real buffer
  - draw#1
  - draw#2

When the 2nd map operation doesn't use UNSYNCHRONIZED, the tc_sync_msg() call
will make sure that the bo is unused before mapping it.

But, if it does use UNSYNCHRONIZED and the mapped intervals overlap this commit
clears the UNSYNCHRONIZED to make sure ordering is maintained.

This will affect performance, but correct output is better than fast output.

See https://gitlab.freedesktop.org/mesa/mesa/-/issues/3611.

Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7098>

---

 src/gallium/auxiliary/util/u_threaded_context.c | 61 ++++++++++++++++++++++---
 src/gallium/auxiliary/util/u_threaded_context.h |  8 ++++
 2 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index c6e462bcb00..0227be371ba 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -374,6 +374,8 @@ threaded_resource_init(struct pipe_resource *res)
    tres->base_valid_buffer_range = &tres->valid_buffer_range;
    tres->is_shared = false;
    tres->is_user_ptr = false;
+   tres->pending_staging_uploads = 0;
+   util_range_init(&tres->pending_staging_uploads_range);
 }
 
 void
@@ -384,6 +386,7 @@ threaded_resource_deinit(struct pipe_resource *res)
    if (tres->latest != &tres->b)
            pipe_resource_reference(&tres->latest, NULL);
    util_range_destroy(&tres->valid_buffer_range);
+   util_range_destroy(&tres->pending_staging_uploads_range);
 }
 
 struct pipe_context *
@@ -1618,15 +1621,33 @@ tc_transfer_map(struct pipe_context *_pipe,
          ttrans->b.stride = 0;
          ttrans->b.layer_stride = 0;
          *transfer = &ttrans->b;
+
+         p_atomic_inc(&tres->pending_staging_uploads);
+         util_range_add(resource, &tres->pending_staging_uploads_range,
+                        box->x, box->x + box->width);
+
          return map + (box->x % tc->map_buffer_alignment);
       }
+
+      if (usage & PIPE_MAP_UNSYNCHRONIZED &&
+          p_atomic_read(&tres->pending_staging_uploads) &&
+          util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
+         /* Write conflict detected between a staging transfer and the direct mapping we're
+          * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
+          * will have to wait for the staging transfer completion.
+          * Note: The conflict detection is only based on the mapped range, not on the actual
+          * written range(s).
+          */
+         usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
+      }
    }
 
    /* Unsychronized buffer mappings don't have to synchronize the thread. */
-   if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
+   if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
       tc_sync_msg(tc, resource->target != PIPE_BUFFER ? "  texture" :
                       usage & PIPE_MAP_DISCARD_RANGE ? "  discard_range" :
-                      usage & PIPE_MAP_READ ? "  read" : "  ??");
+                      usage & PIPE_MAP_READ ? "  read" : "  staging conflict");
+   }
 
    tc->bytes_mapped_estimate += box->width;
 
@@ -1719,10 +1740,27 @@ tc_transfer_flush_region(struct pipe_context *_pipe,
    p->box = *rel_box;
 }
 
+struct tc_transfer_unmap {
+   union {
+      struct pipe_transfer *transfer;
+      struct pipe_resource *resource;
+   };
+   bool was_staging_transfer;
+};
+
 static void
 tc_call_transfer_unmap(struct pipe_context *pipe, union tc_payload *payload)
 {
-   pipe->transfer_unmap(pipe, payload->transfer);
+   struct tc_transfer_unmap *p = (struct tc_transfer_unmap *) payload;
+   if (p->was_staging_transfer) {
+      struct threaded_resource *tres = threaded_resource(payload->resource);
+      /* Nothing to do except keeping track of staging uploads */
+      assert(tres->pending_staging_uploads > 0);
+      p_atomic_dec(&tres->pending_staging_uploads);
+      tc_set_resource_reference(&p->resource, NULL);
+      return;
+   }
+   pipe->transfer_unmap(pipe, p->transfer);
 }
 
 static void
@@ -1751,21 +1789,30 @@ tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
       return;
    }
 
+   bool was_staging_transfer = false;
+
    if (tres->b.target == PIPE_BUFFER) {
       if (transfer->usage & PIPE_MAP_WRITE &&
           !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
          tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
 
-      /* Staging transfers don't send the call to the driver. */
       if (ttrans->staging) {
+         was_staging_transfer = true;
+
          pipe_resource_reference(&ttrans->staging, NULL);
          pipe_resource_reference(&ttrans->b.resource, NULL);
          slab_free(&tc->pool_transfers, ttrans);
-         return;
       }
    }
-
-   tc_add_small_call(tc, TC_CALL_transfer_unmap)->transfer = transfer;
+   struct tc_transfer_unmap *p = tc_add_struct_typed_call(tc, TC_CALL_transfer_unmap,
+                                                          tc_transfer_unmap);
+   if (was_staging_transfer) {
+      tc_set_resource_reference(&p->resource, &tres->b);
+      p->was_staging_transfer = true;
+   } else {
+      p->transfer = transfer;
+      p->was_staging_transfer = false;
+   }
 
    /* tc_transfer_map directly maps the buffers, but tc_transfer_unmap
     * defers the unmap operation to the batch execution.
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h
index 561e896e684..3adec8e61f7 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -282,6 +282,14 @@ struct threaded_resource {
     * are too large for the visible VRAM window.
     */
    int max_forced_staging_uploads;
+
+   /* If positive, then a staging transfer is in progress.
+    */
+   int pending_staging_uploads;
+   /* If staging uploads are pending, this will hold the union of the mapped
+    * ranges.
+    */
+   struct util_range pending_staging_uploads_range;
 };
 
 struct threaded_transfer {



More information about the mesa-commit mailing list