Mesa (main): radeonsi: change max TBO/SSBO sizes again and rework max alloc size

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jun 8 10:54:35 UTC 2022


Module: Mesa
Branch: main
Commit: aee8ee17a5056bb8e1f3144d34b52612ac7d3b25
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=aee8ee17a5056bb8e1f3144d34b52612ac7d3b25

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Sun Jun  5 17:32:27 2022 -0400

radeonsi: change max TBO/SSBO sizes again and rework max alloc size

Allow 1/4 of the max heap size, but maximum of 512 MB on 32-bit
architectures.

Reviewed-by: Mihai Preda <mhpreda at gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16901>

---

 src/amd/common/ac_gpu_info.c                      |  6 +++---
 src/amd/common/ac_gpu_info.h                      |  2 +-
 src/gallium/drivers/r600/r600_pipe.c              |  2 +-
 src/gallium/drivers/r600/r600_pipe_common.c       |  9 ++++-----
 src/gallium/drivers/radeonsi/si_get.c             | 24 ++++++++++++++++++-----
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 16 ++++++++++-----
 6 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 212dc7ef359..e784b1e3f03 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -803,9 +803,9 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
     * allocations can fail or cause buffer movement failures in the kernel.
     */
    if (info->has_dedicated_vram)
-      info->max_alloc_size = info->vram_size * 0.8;
+      info->max_heap_size_kb = info->vram_size_kb;
    else
-      info->max_alloc_size = info->gart_size * 0.7;
+      info->max_heap_size_kb = info->gart_size_kb;
 
    info->vram_type = amdinfo->vram_type;
    info->vram_bit_width = amdinfo->vram_bit_width;
@@ -1367,7 +1367,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
    fprintf(f, "    vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024 * 1024));
    fprintf(f, "    vram_type = %i\n", info->vram_type);
    fprintf(f, "    vram_bit_width = %i\n", info->vram_bit_width);
-   fprintf(f, "    max_alloc_size = %i MB\n", (int)DIV_ROUND_UP(info->max_alloc_size, 1024 * 1024));
+   fprintf(f, "    max_heap_size_kb = %i MB\n", (int)DIV_ROUND_UP(info->max_heap_size_kb, 1024));
    fprintf(f, "    min_alloc_size = %u\n", info->min_alloc_size);
    fprintf(f, "    address32_hi = 0x%x\n", info->address32_hi);
    fprintf(f, "    has_dedicated_vram = %u\n", info->has_dedicated_vram);
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 9bcaf74d3a0..eadd64407b5 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -116,7 +116,7 @@ struct radeon_info {
    uint64_t vram_vis_size;
    uint32_t vram_bit_width;
    uint32_t vram_type;
-   uint64_t max_alloc_size;
+   uint32_t max_heap_size_kb;
    uint32_t min_alloc_size;
    uint32_t address32_hi;
    bool has_dedicated_vram;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 0210b292991..091573dd08f 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -348,7 +348,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 		return 0;
 
 	case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
-		return MIN2(rscreen->b.info.max_alloc_size, INT_MAX);
+		return MIN2(rscreen->b.info.max_heap_size_kb * 1024ull / 4, INT_MAX);
 
         case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
                 return R600_MAP_BUFFER_ALIGNMENT;
diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c
index f95bb9fd611..6283dc83f44 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@@ -991,8 +991,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
 			 * 4 * MAX_MEM_ALLOC_SIZE.
 			 */
 			*max_global_size = MIN2(4 * max_mem_alloc_size,
-						MAX2(rscreen->info.gart_size,
-						     rscreen->info.vram_size));
+						rscreen->info.max_heap_size_kb * 1024ull);
 		}
 		return sizeof(uint64_t);
 
@@ -1016,7 +1015,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
 		if (ret) {
 			uint64_t *max_mem_alloc_size = ret;
 
-			*max_mem_alloc_size = rscreen->info.max_alloc_size;
+			*max_mem_alloc_size = (rscreen->info.max_heap_size_kb / 4) * 1024ull;
 		}
 		return sizeof(uint64_t);
 
@@ -1287,8 +1286,8 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
 		printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
 		printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
 		printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
-		printf("max_alloc_size = %i MB\n",
-		       (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
+		printf("max_heap_size = %i MB\n",
+		       (int)DIV_ROUND_UP(rscreen->info.max_heap_size_kb, 1024));
 		printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
 		printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram);
 		printf("r600_has_virtual_memory = %i\n", rscreen->info.r600_has_virtual_memory);
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 7ac187ed2ff..b737c80c1fb 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -234,11 +234,22 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
       return 4096 * 1024;
 
    case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
-   case PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT:
+   case PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT: {
+      /* Return 1/4th of the heap size as the maximum because the max size is not practically
+       * allocatable. Also, this can only return UINT32_MAX at most.
+       */
+      unsigned max_size = MIN2((sscreen->info.max_heap_size_kb * 1024ull) / 4, UINT32_MAX);
+
       /* Allow max 512 MB to pass CTS with a 32-bit build. */
-      return MIN2(sscreen->info.max_alloc_size, 512 * 1024 * 1024);
+      if (sizeof(void*) == 4)
+         max_size = MIN2(max_size, 512 * 1024 * 1024);
+
+      return max_size;
+   }
+
    case PIPE_CAP_MAX_TEXTURE_MB:
-      return sscreen->info.max_alloc_size / (1024 * 1024);
+      /* Allow 1/4th of the heap size. */
+      return sscreen->info.max_heap_size_kb / 1024 / 4;
 
    case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
@@ -861,7 +872,7 @@ static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir
           * 4 * MAX_MEM_ALLOC_SIZE.
           */
          *max_global_size =
-            MIN2(4 * max_mem_alloc_size, MAX2(sscreen->info.gart_size, sscreen->info.vram_size));
+            MIN2(4 * max_mem_alloc_size, sscreen->info.max_heap_size_kb * 1024ull);
       }
       return sizeof(uint64_t);
 
@@ -888,7 +899,10 @@ static int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir
       if (ret) {
          uint64_t *max_mem_alloc_size = ret;
 
-         *max_mem_alloc_size = sscreen->info.max_alloc_size;
+         /* Return 1/4 of the heap size as the maximum because the max size is not practically
+          * allocatable.
+          */
+         *max_mem_alloc_size = (sscreen->info.max_heap_size_kb / 4) * 1024ull;
       }
       return sizeof(uint64_t);
 
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index b40f9bed945..8217632e7a6 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -375,14 +375,20 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
    /* Radeon allocates all buffers contiguously, which makes large allocations
     * unlikely to succeed. */
    if (ws->info.has_dedicated_vram)
-      ws->info.max_alloc_size = ws->info.vram_size * 0.7;
+      ws->info.max_heap_size_kb = ws->info.vram_size_kb;
    else
-      ws->info.max_alloc_size = ws->info.gart_size * 0.7;
+      ws->info.max_heap_size_kb = ws->info.gart_size_kb;
 
+   /* Old kernel driver limitation for allocation sizes. We only use this to limit per-buffer
+    * allocation size.
+    */
    if (ws->info.drm_minor < 40)
-      ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024);
-   /* Both 32-bit and 64-bit address spaces only have 4GB. */
-   ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024);
+      ws->info.max_heap_size_kb = MIN2(ws->info.max_heap_size_kb, 256 * 1024);
+
+   /* Both 32-bit and 64-bit address spaces only have 4GB.
+    * This is a limitation of the VM allocator in the winsys.
+    */
+   ws->info.max_heap_size_kb = MIN2(ws->info.max_heap_size_kb, 4 * 1024 * 1024); /* 4 GB */
 
    /* Get max clock frequency info and convert it to MHz */
    radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,



More information about the mesa-commit mailing list