Mesa (main): gallium: simplify VRAM uploads by adding PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Aug 9 12:35:33 UTC 2021
Module: Mesa
Branch: main
Commit: 59fe704c45df49a6c1937e1b25c921b9342e624a
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=59fe704c45df49a6c1937e1b25c921b9342e624a
Author: Marek Olšák <marek.olsak at amd.com>
Date: Fri Aug 6 01:02:50 2021 -0400
gallium: simplify VRAM uploads by adding PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY
When this flag is set, u_threaded_context will try not to map it directly
for better buffer placement. It's set by drivers when visible VRAM is too
small.
Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz at gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12257>
---
src/gallium/auxiliary/util/u_threaded_context.c | 7 ++---
src/gallium/auxiliary/util/u_threaded_context.h | 6 -----
src/gallium/drivers/radeonsi/si_buffer.c | 35 +++++++++----------------
src/gallium/drivers/radeonsi/si_pipe.h | 1 -
src/gallium/include/pipe/p_defines.h | 1 +
5 files changed, 15 insertions(+), 35 deletions(-)
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index 8e9898310ab..5994a6a3097 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -1996,11 +1996,8 @@ tc_improve_map_buffer_flags(struct threaded_context *tc,
if (usage & (PIPE_MAP_DISCARD_RANGE |
PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
!(usage & PIPE_MAP_PERSISTENT) &&
- /* Try not to decrement the counter if it's not positive. Still racy,
- * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */
- tres->max_forced_staging_uploads > 0 &&
- tc->use_forced_staging_uploads &&
- p_atomic_dec_return(&tres->max_forced_staging_uploads) >= 0) {
+ tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
+ tc->use_forced_staging_uploads) {
usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
PIPE_MAP_UNSYNCHRONIZED);
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h
index 04088166248..0e1189d7f70 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -333,12 +333,6 @@ struct threaded_resource {
*/
uint32_t buffer_id_unique;
- /* If positive, prefer DISCARD_RANGE with a staging buffer over any other
- * method of CPU access when map flags allow it. Useful for buffers that
- * are too large for the visible VRAM window.
- */
- int max_forced_staging_uploads;
-
/* If positive, then a staging transfer is in progress.
*/
int pending_staging_uploads;
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
index 8ade9694b49..c958ab8acd5 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -147,27 +147,21 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
/* Set expected VRAM and GART usage for the buffer. */
res->vram_usage_kb = 0;
res->gart_usage_kb = 0;
- res->max_forced_staging_uploads = 0;
- res->b.max_forced_staging_uploads = 0;
if (res->domains & RADEON_DOMAIN_VRAM) {
res->vram_usage_kb = MAX2(1, size / 1024);
- if (!sscreen->info.smart_access_memory) {
- /* We don't want to evict buffers from VRAM by mapping them for CPU access,
- * because they might never be moved back again. If a buffer is large enough,
- * upload data by copying from a temporary GTT buffer. 8K might not seem much,
- * but there can be 100000 buffers.
- *
- * This tweak improves performance for viewperf.
- */
- const unsigned min_size = 8196; /* tuned to minimize mapped VRAM */
- /* Number of uploads before mapping directly. A very high number helps display lists (snx). */
- const unsigned max_staging_uploads = 1000000;
-
- res->max_forced_staging_uploads = res->b.max_forced_staging_uploads =
- sscreen->info.has_dedicated_vram && size >= min_size ? max_staging_uploads : 0;
- }
+ /* We don't want to evict buffers from VRAM by mapping them for CPU access,
+ * because they might never be moved back again. If a buffer is large enough,
+ * upload data by copying from a temporary GTT buffer. 8K might not seem much,
+ * but there can be 100000 buffers.
+ *
+ * This tweak improves performance for viewperf creo & snx.
+ */
+ if (!sscreen->info.smart_access_memory &&
+ sscreen->info.has_dedicated_vram &&
+ size >= 8196)
+ res->b.b.flags |= PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY;
} else if (res->domains & RADEON_DOMAIN_GTT) {
res->gart_usage_kb = MAX2(1, size / 1024);
}
@@ -296,8 +290,6 @@ void si_replace_buffer_storage(struct pipe_context *ctx, struct pipe_resource *d
radeon_bo_reference(sctx->screen->ws, &sdst->buf, ssrc->buf);
sdst->gpu_address = ssrc->gpu_address;
sdst->b.b.bind = ssrc->b.b.bind;
- sdst->b.max_forced_staging_uploads = ssrc->b.max_forced_staging_uploads;
- sdst->max_forced_staging_uploads = ssrc->max_forced_staging_uploads;
sdst->flags = ssrc->flags;
assert(sdst->vram_usage_kb == ssrc->vram_usage_kb);
@@ -395,10 +387,7 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resour
bool force_discard_range = false;
if (usage & (PIPE_MAP_DISCARD_WHOLE_RESOURCE | PIPE_MAP_DISCARD_RANGE) &&
!(usage & PIPE_MAP_PERSISTENT) &&
- /* Try not to decrement the counter if it's not positive. Still racy,
- * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */
- buf->max_forced_staging_uploads > 0 &&
- p_atomic_dec_return(&buf->max_forced_staging_uploads) >= 0) {
+ buf->b.b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY) {
usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE | PIPE_MAP_UNSYNCHRONIZED);
usage |= PIPE_MAP_DISCARD_RANGE;
force_discard_range = true;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e385c57b394..a8722b20fa3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -301,7 +301,6 @@ struct si_resource {
enum radeon_bo_domain domains:8;
enum radeon_bo_flag flags:16;
unsigned bind_history;
- int max_forced_staging_uploads;
/* The buffer range which is initialized (with a write transfer,
* streamout, DMA, or as a random access target). The rest of
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 09db0b8b334..488a3968082 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -526,6 +526,7 @@ enum pipe_flush_flags
#define PIPE_RESOURCE_FLAG_SINGLE_THREAD_USE (1 << 4)
#define PIPE_RESOURCE_FLAG_ENCRYPTED (1 << 5)
#define PIPE_RESOURCE_FLAG_DONT_OVER_ALLOCATE (1 << 6)
+#define PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY (1 << 7) /* for small visible VRAM */
#define PIPE_RESOURCE_FLAG_DRV_PRIV (1 << 8) /* driver/winsys private */
#define PIPE_RESOURCE_FLAG_FRONTEND_PRIV (1 << 24) /* gallium frontend private */
More information about the mesa-commit
mailing list