Mesa (main): radeonsi: use the new flag AMDGPU_GEM_CREATE_DISCARDABLE
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon May 16 11:32:14 UTC 2022
Module: Mesa
Branch: main
Commit: e9e9086b664f94662b539c21d21b0e6eee70a95b
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e9e9086b664f94662b539c21d21b0e6eee70a95b
Author: Marek Olšák <marek.olsak at amd.com>
Date: Tue May 10 16:57:08 2022 -0400
radeonsi: use the new flag AMDGPU_GEM_CREATE_DISCARDABLE
It forces the best placement (usually VRAM) and evictions discard the contents
instead of copying.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16466>
---
src/gallium/drivers/radeonsi/si_buffer.c | 4 ++++
src/gallium/drivers/radeonsi/si_compute.c | 3 ++-
src/gallium/drivers/radeonsi/si_cp_dma.c | 3 ++-
src/gallium/drivers/radeonsi/si_pipe.c | 3 ++-
src/gallium/drivers/radeonsi/si_pipe.h | 2 ++
src/gallium/drivers/radeonsi/si_state_shaders.cpp | 15 ++++++++++-----
src/gallium/include/winsys/radeon_winsys.h | 7 ++++++-
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 9 ++++++++-
src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 3 ++-
9 files changed, 38 insertions(+), 11 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
index 46ba6144782..0ead3433e27 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -147,6 +147,10 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
res->b.b.flags & SI_RESOURCE_FLAG_GL2_BYPASS)
res->flags |= RADEON_FLAG_GL2_BYPASS;
+ if (res->b.b.flags & SI_RESOURCE_FLAG_DISCARDABLE &&
+ sscreen->info.drm_major == 3 && sscreen->info.drm_minor >= 47)
+ res->flags |= RADEON_FLAG_DISCARDABLE;
+
/* Set expected VRAM and GART usage for the buffer. */
res->memory_usage_kb = MAX2(1, size / 1024);
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 7c17341dd76..ccefbc1e2c4 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -481,7 +481,8 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_s
sctx->compute_scratch_buffer =
si_aligned_buffer_create(&sctx->screen->b,
- PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+ PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+ SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
scratch_needed, sctx->screen->info.pte_fragment_size);
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 04f5a663397..f8a2a892ab2 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -260,7 +260,8 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size, uns
if (!sctx->scratch_buffer || sctx->scratch_buffer->b.b.width0 < scratch_size) {
si_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer = si_aligned_buffer_create(&sctx->screen->b,
- PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+ PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+ SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT, scratch_size, 256);
if (!sctx->scratch_buffer)
return;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 94b622a2b3f..073c1300143 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1356,7 +1356,8 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->attribute_ring = si_aligned_buffer_create(&sscreen->b,
PIPE_RESOURCE_FLAG_UNMAPPABLE |
SI_RESOURCE_FLAG_32BIT |
- SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+ SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+ SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
/* TODO: remove the overallocation */
attr_ring_size * 16, 2 * 1024 * 1024);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 645cbb57089..a492faec8f6 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -146,6 +146,8 @@ extern "C" {
#define SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(x) \
(((x) >> SI_RESOURCE_FLAG_MICRO_TILE_MODE_SHIFT) & 0x3)
#define SI_RESOURCE_FLAG_GL2_BYPASS (PIPE_RESOURCE_FLAG_DRV_PRIV << 12)
+/* Discard instead of evict. */
+#define SI_RESOURCE_FLAG_DISCARDABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 13)
enum si_has_gs {
GS_OFF,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 14152c07e49..23970d9a212 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -3754,7 +3754,8 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
pipe_resource_reference(&sctx->esgs_ring, NULL);
sctx->esgs_ring =
pipe_aligned_buffer_create(sctx->b.screen,
- PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+ PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+ SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
esgs_ring_size, sctx->screen->info.pte_fragment_size);
if (!sctx->esgs_ring)
@@ -3765,7 +3766,8 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
pipe_resource_reference(&sctx->gsvs_ring, NULL);
sctx->gsvs_ring =
pipe_aligned_buffer_create(sctx->b.screen,
- PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+ PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+ SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
gsvs_ring_size, sctx->screen->info.pte_fragment_size);
if (!sctx->gsvs_ring)
@@ -3987,7 +3989,8 @@ bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes)
sctx->scratch_buffer = si_aligned_buffer_create(
&sctx->screen->b,
- PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+ PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+ SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT, scratch_needed_size,
sctx->screen->info.pte_fragment_size);
if (!sctx->scratch_buffer)
@@ -4017,7 +4020,8 @@ void si_init_tess_factor_ring(struct si_context *sctx)
sctx->tess_rings = pipe_aligned_buffer_create(sctx->b.screen,
PIPE_RESOURCE_FLAG_UNMAPPABLE |
SI_RESOURCE_FLAG_32BIT |
- SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+ SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+ SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
sctx->screen->hs.tess_offchip_ring_size +
sctx->screen->hs.tess_factor_ring_size,
@@ -4030,7 +4034,8 @@ void si_init_tess_factor_ring(struct si_context *sctx)
PIPE_RESOURCE_FLAG_UNMAPPABLE |
PIPE_RESOURCE_FLAG_ENCRYPTED |
SI_RESOURCE_FLAG_32BIT |
- SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+ SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+ SI_RESOURCE_FLAG_DISCARDABLE,
PIPE_USAGE_DEFAULT,
sctx->screen->hs.tess_offchip_ring_size +
sctx->screen->hs.tess_factor_ring_size,
diff --git a/src/gallium/include/winsys/radeon_winsys.h b/src/gallium/include/winsys/radeon_winsys.h
index e94cdf964c0..61479d4eb44 100644
--- a/src/gallium/include/winsys/radeon_winsys.h
+++ b/src/gallium/include/winsys/radeon_winsys.h
@@ -76,6 +76,10 @@ enum radeon_bo_flag
RADEON_FLAG_ENCRYPTED = (1 << 7),
RADEON_FLAG_GL2_BYPASS = (1 << 8), /* only gfx9 and newer */
RADEON_FLAG_DRIVER_INTERNAL = (1 << 9),
+ /* Discard on eviction (instead of moving the buffer to GTT).
+ * This guarantees that this buffer will never be moved to GTT.
+ */
+ RADEON_FLAG_DISCARDABLE = (1 << 10),
};
enum radeon_map_flags
@@ -811,7 +815,8 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
/* These are unsupported flags. */
/* RADEON_FLAG_DRIVER_INTERNAL is ignored. It doesn't affect allocators. */
- if (flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE))
+ if (flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE |
+ RADEON_FLAG_DISCARDABLE))
return -1;
int heap = 0;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index b61396894c5..c73ba72e586 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -527,9 +527,15 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (flags & RADEON_FLAG_GTT_WC)
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
+ if (flags & RADEON_FLAG_DISCARDABLE &&
+ ws->info.drm_minor >= 47)
+ request.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
+
if (ws->zero_all_vram_allocs &&
(request.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM))
request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+
if ((flags & RADEON_FLAG_ENCRYPTED) &&
ws->info.has_tmz_support) {
request.flags |= AMDGPU_GEM_CREATE_ENCRYPTED;
@@ -1405,7 +1411,8 @@ no_slab:
alignment = align(alignment, ws->info.gart_page_size);
}
- bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
+ bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+ !(flags & RADEON_FLAG_DISCARDABLE);
if (use_reusable_pool) {
/* RADEON_FLAG_NO_SUBALLOC is irrelevant for the cache. */
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 38c317c0243..f4e49b80805 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -1053,7 +1053,8 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
size = align(size, ws->info.gart_page_size);
alignment = align(alignment, ws->info.gart_page_size);
- bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
+ bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+ !(flags & RADEON_FLAG_DISCARDABLE);
/* Shared resources don't use cached heaps. */
if (use_reusable_pool) {
More information about the mesa-commit
mailing list