Mesa (main): radeonsi: use the new flag AMDGPU_GEM_CREATE_DISCARDABLE

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon May 16 11:32:14 UTC 2022


Module: Mesa
Branch: main
Commit: e9e9086b664f94662b539c21d21b0e6eee70a95b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e9e9086b664f94662b539c21d21b0e6eee70a95b

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Tue May 10 16:57:08 2022 -0400

radeonsi: use the new flag AMDGPU_GEM_CREATE_DISCARDABLE

It forces the best placement (usually VRAM) and evictions discard the contents
instead of copying.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16466>

---

 src/gallium/drivers/radeonsi/si_buffer.c          |  4 ++++
 src/gallium/drivers/radeonsi/si_compute.c         |  3 ++-
 src/gallium/drivers/radeonsi/si_cp_dma.c          |  3 ++-
 src/gallium/drivers/radeonsi/si_pipe.c            |  3 ++-
 src/gallium/drivers/radeonsi/si_pipe.h            |  2 ++
 src/gallium/drivers/radeonsi/si_state_shaders.cpp | 15 ++++++++++-----
 src/gallium/include/winsys/radeon_winsys.h        |  7 ++++++-
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c         |  9 ++++++++-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c     |  3 ++-
 9 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
index 46ba6144782..0ead3433e27 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -147,6 +147,10 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
        res->b.b.flags & SI_RESOURCE_FLAG_GL2_BYPASS)
       res->flags |= RADEON_FLAG_GL2_BYPASS;
 
+   if (res->b.b.flags & SI_RESOURCE_FLAG_DISCARDABLE &&
+       sscreen->info.drm_major == 3 && sscreen->info.drm_minor >= 47)
+      res->flags |= RADEON_FLAG_DISCARDABLE;
+
    /* Set expected VRAM and GART usage for the buffer. */
    res->memory_usage_kb = MAX2(1, size / 1024);
 
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 7c17341dd76..ccefbc1e2c4 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -481,7 +481,8 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_s
 
       sctx->compute_scratch_buffer =
          si_aligned_buffer_create(&sctx->screen->b,
-                                  PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                  PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+                                  SI_RESOURCE_FLAG_DISCARDABLE,
                                   PIPE_USAGE_DEFAULT,
                                   scratch_needed, sctx->screen->info.pte_fragment_size);
 
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 04f5a663397..f8a2a892ab2 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -260,7 +260,8 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size, uns
    if (!sctx->scratch_buffer || sctx->scratch_buffer->b.b.width0 < scratch_size) {
       si_resource_reference(&sctx->scratch_buffer, NULL);
       sctx->scratch_buffer = si_aligned_buffer_create(&sctx->screen->b,
-                                                      PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                                      PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+                                                      SI_RESOURCE_FLAG_DISCARDABLE,
                                                       PIPE_USAGE_DEFAULT, scratch_size, 256);
       if (!sctx->scratch_buffer)
          return;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 94b622a2b3f..073c1300143 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1356,7 +1356,8 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
       sscreen->attribute_ring = si_aligned_buffer_create(&sscreen->b,
                                                          PIPE_RESOURCE_FLAG_UNMAPPABLE |
                                                          SI_RESOURCE_FLAG_32BIT |
-                                                         SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                                         SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+                                                         SI_RESOURCE_FLAG_DISCARDABLE,
                                                          PIPE_USAGE_DEFAULT,
                                                          /* TODO: remove the overallocation */
                                                          attr_ring_size * 16, 2 * 1024 * 1024);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 645cbb57089..a492faec8f6 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -146,6 +146,8 @@ extern "C" {
 #define SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(x)                                                    \
    (((x) >> SI_RESOURCE_FLAG_MICRO_TILE_MODE_SHIFT) & 0x3)
 #define SI_RESOURCE_FLAG_GL2_BYPASS        (PIPE_RESOURCE_FLAG_DRV_PRIV << 12)
+/* Discard instead of evict. */
+#define SI_RESOURCE_FLAG_DISCARDABLE       (PIPE_RESOURCE_FLAG_DRV_PRIV << 13)
 
 enum si_has_gs {
    GS_OFF,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 14152c07e49..23970d9a212 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -3754,7 +3754,8 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
       pipe_resource_reference(&sctx->esgs_ring, NULL);
       sctx->esgs_ring =
          pipe_aligned_buffer_create(sctx->b.screen,
-                                    PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                    PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+                                    SI_RESOURCE_FLAG_DISCARDABLE,
                                     PIPE_USAGE_DEFAULT,
                                     esgs_ring_size, sctx->screen->info.pte_fragment_size);
       if (!sctx->esgs_ring)
@@ -3765,7 +3766,8 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
       pipe_resource_reference(&sctx->gsvs_ring, NULL);
       sctx->gsvs_ring =
          pipe_aligned_buffer_create(sctx->b.screen,
-                                    PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                    PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+                                    SI_RESOURCE_FLAG_DISCARDABLE,
                                     PIPE_USAGE_DEFAULT,
                                     gsvs_ring_size, sctx->screen->info.pte_fragment_size);
       if (!sctx->gsvs_ring)
@@ -3987,7 +3989,8 @@ bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes)
 
          sctx->scratch_buffer = si_aligned_buffer_create(
             &sctx->screen->b,
-            PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+            PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+            SI_RESOURCE_FLAG_DISCARDABLE,
             PIPE_USAGE_DEFAULT, scratch_needed_size,
             sctx->screen->info.pte_fragment_size);
          if (!sctx->scratch_buffer)
@@ -4017,7 +4020,8 @@ void si_init_tess_factor_ring(struct si_context *sctx)
    sctx->tess_rings = pipe_aligned_buffer_create(sctx->b.screen,
                                                  PIPE_RESOURCE_FLAG_UNMAPPABLE |
                                                  SI_RESOURCE_FLAG_32BIT |
-                                                 SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                                 SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+                                                 SI_RESOURCE_FLAG_DISCARDABLE,
                                                  PIPE_USAGE_DEFAULT,
                                                  sctx->screen->hs.tess_offchip_ring_size +
                                                  sctx->screen->hs.tess_factor_ring_size,
@@ -4030,7 +4034,8 @@ void si_init_tess_factor_ring(struct si_context *sctx)
                                                         PIPE_RESOURCE_FLAG_UNMAPPABLE |
                                                         PIPE_RESOURCE_FLAG_ENCRYPTED |
                                                         SI_RESOURCE_FLAG_32BIT |
-                                                        SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                                        SI_RESOURCE_FLAG_DRIVER_INTERNAL |
+                                                        SI_RESOURCE_FLAG_DISCARDABLE,
                                                         PIPE_USAGE_DEFAULT,
                                                         sctx->screen->hs.tess_offchip_ring_size +
                                                         sctx->screen->hs.tess_factor_ring_size,
diff --git a/src/gallium/include/winsys/radeon_winsys.h b/src/gallium/include/winsys/radeon_winsys.h
index e94cdf964c0..61479d4eb44 100644
--- a/src/gallium/include/winsys/radeon_winsys.h
+++ b/src/gallium/include/winsys/radeon_winsys.h
@@ -76,6 +76,10 @@ enum radeon_bo_flag
   RADEON_FLAG_ENCRYPTED = (1 << 7),
   RADEON_FLAG_GL2_BYPASS = (1 << 8), /* only gfx9 and newer */
   RADEON_FLAG_DRIVER_INTERNAL = (1 << 9),
+   /* Discard on eviction (instead of moving the buffer to GTT).
+    * This guarantees that this buffer will never be moved to GTT.
+    */
+  RADEON_FLAG_DISCARDABLE = (1 << 10),
 };
 
 enum radeon_map_flags
@@ -811,7 +815,8 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
 
    /* These are unsupported flags. */
    /* RADEON_FLAG_DRIVER_INTERNAL is ignored. It doesn't affect allocators. */
-   if (flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE))
+   if (flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE |
+                RADEON_FLAG_DISCARDABLE))
       return -1;
 
    int heap = 0;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index b61396894c5..c73ba72e586 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -527,9 +527,15 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
       request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
    if (flags & RADEON_FLAG_GTT_WC)
       request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
+   if (flags & RADEON_FLAG_DISCARDABLE &&
+       ws->info.drm_minor >= 47)
+      request.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
+
    if (ws->zero_all_vram_allocs &&
        (request.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM))
       request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+
    if ((flags & RADEON_FLAG_ENCRYPTED) &&
        ws->info.has_tmz_support) {
       request.flags |= AMDGPU_GEM_CREATE_ENCRYPTED;
@@ -1405,7 +1411,8 @@ no_slab:
       alignment = align(alignment, ws->info.gart_page_size);
    }
 
-   bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
+   bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+                            !(flags & RADEON_FLAG_DISCARDABLE);
 
    if (use_reusable_pool) {
        /* RADEON_FLAG_NO_SUBALLOC is irrelevant for the cache. */
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 38c317c0243..f4e49b80805 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -1053,7 +1053,8 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
    size = align(size, ws->info.gart_page_size);
    alignment = align(alignment, ws->info.gart_page_size);
 
-   bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
+   bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+                            !(flags & RADEON_FLAG_DISCARDABLE);
 
    /* Shared resources don't use cached heaps. */
    if (use_reusable_pool) {



More information about the mesa-commit mailing list