Mesa (main): radv/ac: introduce a new common function for hs calcs.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed May 11 02:50:36 UTC 2022


Module: Mesa
Branch: main
Commit: d4c7ffc550f6df9bb23ec704d0b05825ad3da1ce
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d4c7ffc550f6df9bb23ec704d0b05825ad3da1ce

Author: Dave Airlie <airlied at redhat.com>
Date:   Tue May 10 11:41:54 2022 +1000

radv/ac: introduce a new common function for hs calcs.

This ports the radv code to the new ac code.

Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16415>

---

 src/amd/common/ac_gpu_info.c   | 82 ++++++++++++++++++++++++++++++++++
 src/amd/common/ac_gpu_info.h   | 12 +++++
 src/amd/vulkan/radv_device.c   | 99 ++++--------------------------------------
 src/amd/vulkan/radv_pipeline.c |  2 +-
 src/amd/vulkan/radv_private.h  |  7 +--
 5 files changed, 105 insertions(+), 97 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index eb3562eb84e..e1f1022c52d 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -1756,3 +1756,85 @@ unsigned ac_get_compute_resource_limits(struct radeon_info *info, unsigned waves
    }
    return compute_resource_limits;
 }
+
+void ac_get_hs_info(struct radeon_info *info,
+                    struct ac_hs_info *hs)
+{
+   bool double_offchip_buffers = info->chip_class >= GFX7 &&
+                                 info->family != CHIP_CARRIZO &&
+                                 info->family != CHIP_STONEY;
+   unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
+   unsigned max_offchip_buffers;
+   unsigned offchip_granularity;
+   unsigned hs_offchip_param;
+
+   hs->tess_offchip_block_dw_size =
+      info->family == CHIP_HAWAII ? 4096 : 8192;
+
+   /*
+    * Per RadeonSI:
+    * This must be one less than the maximum number due to a hw limitation.
+    * Various hardware bugs need this.
+    *
+    * Per AMDVLK:
+    * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
+    * Gfx7 should limit max_offchip_buffers to 508
+    * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
+    *
+    * Follow AMDVLK here.
+    */
+   if (info->chip_class >= GFX10) {
+      max_offchip_buffers_per_se = 128;
+   } else if (info->family == CHIP_VEGA10 ||
+              info->chip_class == GFX7 ||
+              info->chip_class == GFX6)
+      --max_offchip_buffers_per_se;
+
+   max_offchip_buffers = max_offchip_buffers_per_se * info->max_se;
+
+   /* Hawaii has a bug with offchip buffers > 256 that can be worked
+    * around by setting 4K granularity.
+    */
+   if (hs->tess_offchip_block_dw_size == 4096) {
+      assert(info->family == CHIP_HAWAII);
+      offchip_granularity = V_03093C_X_4K_DWORDS;
+   } else {
+      assert(hs->tess_offchip_block_dw_size == 8192);
+      offchip_granularity = V_03093C_X_8K_DWORDS;
+   }
+
+   switch (info->chip_class) {
+   case GFX6:
+      max_offchip_buffers = MIN2(max_offchip_buffers, 126);
+      break;
+   case GFX7:
+   case GFX8:
+   case GFX9:
+      max_offchip_buffers = MIN2(max_offchip_buffers, 508);
+      break;
+   case GFX10:
+      break;
+   default:
+      break;
+   }
+
+   hs->max_offchip_buffers = max_offchip_buffers;
+
+   if (info->chip_class >= GFX10_3) {
+      hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
+                         S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
+   } else if (info->chip_class >= GFX7) {
+      if (info->chip_class >= GFX8)
+         --max_offchip_buffers;
+      hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
+                         S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
+   } else {
+      hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
+   }
+
+   hs->hs_offchip_param = hs_offchip_param;
+
+   hs->tess_factor_ring_size = 32768 * info->max_se;
+   hs->tess_offchip_ring_offset = align(hs->tess_factor_ring_size, 64 * 1024);
+   hs->tess_offchip_ring_size = hs->max_offchip_buffers * hs->tess_offchip_block_dw_size * 4;
+}
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index b85821c87f3..814e259bed1 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -261,6 +261,18 @@ void ac_get_harvested_configs(struct radeon_info *info, unsigned raster_config,
 unsigned ac_get_compute_resource_limits(struct radeon_info *info, unsigned waves_per_threadgroup,
                                         unsigned max_waves_per_sh, unsigned threadgroups_per_cu);
 
+struct ac_hs_info {
+   uint32_t tess_offchip_block_dw_size;
+   uint32_t max_offchip_buffers;
+   uint32_t hs_offchip_param;
+   uint32_t tess_factor_ring_size;
+   uint32_t tess_offchip_ring_offset;
+   uint32_t tess_offchip_ring_size;
+};
+
+void ac_get_hs_info(struct radeon_info *info,
+                    struct ac_hs_info *hs);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index b850485b45c..9822deb815d 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2735,88 +2735,6 @@ radv_device_init_gs_info(struct radv_device *device)
                                                   device->physical_device->rad_info.family);
 }
 
-static void
-radv_device_init_hs_info(struct radv_device *device)
-{
-   bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
-                                 device->physical_device->rad_info.family != CHIP_CARRIZO &&
-                                 device->physical_device->rad_info.family != CHIP_STONEY;
-   unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
-   unsigned max_offchip_buffers;
-   unsigned offchip_granularity;
-   unsigned hs_offchip_param;
-
-   device->tess_offchip_block_dw_size =
-      device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
-
-   /*
-    * Per RadeonSI:
-    * This must be one less than the maximum number due to a hw limitation.
-    * Various hardware bugs need this.
-    *
-    * Per AMDVLK:
-    * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
-    * Gfx7 should limit max_offchip_buffers to 508
-    * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
-    *
-    * Follow AMDVLK here.
-    */
-   if (device->physical_device->rad_info.chip_class >= GFX10) {
-      max_offchip_buffers_per_se = 128;
-   } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
-              device->physical_device->rad_info.chip_class == GFX7 ||
-              device->physical_device->rad_info.chip_class == GFX6)
-      --max_offchip_buffers_per_se;
-
-   max_offchip_buffers = max_offchip_buffers_per_se * device->physical_device->rad_info.max_se;
-
-   /* Hawaii has a bug with offchip buffers > 256 that can be worked
-    * around by setting 4K granularity.
-    */
-   if (device->tess_offchip_block_dw_size == 4096) {
-      assert(device->physical_device->rad_info.family == CHIP_HAWAII);
-      offchip_granularity = V_03093C_X_4K_DWORDS;
-   } else {
-      assert(device->tess_offchip_block_dw_size == 8192);
-      offchip_granularity = V_03093C_X_8K_DWORDS;
-   }
-
-   switch (device->physical_device->rad_info.chip_class) {
-   case GFX6:
-      max_offchip_buffers = MIN2(max_offchip_buffers, 126);
-      break;
-   case GFX7:
-   case GFX8:
-   case GFX9:
-      max_offchip_buffers = MIN2(max_offchip_buffers, 508);
-      break;
-   case GFX10:
-      break;
-   default:
-      break;
-   }
-
-   device->max_offchip_buffers = max_offchip_buffers;
-
-   if (device->physical_device->rad_info.chip_class >= GFX10_3) {
-      hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
-                         S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
-   } else if (device->physical_device->rad_info.chip_class >= GFX7) {
-      if (device->physical_device->rad_info.chip_class >= GFX8)
-         --max_offchip_buffers;
-      hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
-                         S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
-   } else {
-      hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
-   }
-
-   device->hs_offchip_param = hs_offchip_param;
-
-   device->tess_factor_ring_size = 32768 * device->physical_device->rad_info.max_se;
-   device->tess_offchip_ring_offset = align(device->tess_factor_ring_size, 64 * 1024);
-   device->tess_offchip_ring_size = device->max_offchip_buffers * device->tess_offchip_block_dw_size * 4;
-}
-
 static VkResult
 radv_device_init_border_color(struct radv_device *device)
 {
@@ -3410,7 +3328,8 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
 
    radv_device_init_gs_info(device);
 
-   radv_device_init_hs_info(device);
+   ac_get_hs_info(&device->physical_device->rad_info,
+                  &device->hs);
 
    if (device->instance->debug_flags & RADV_DEBUG_HANG) {
       /* Enable GPU hangs detection and dump logs if a GPU hang is
@@ -3755,11 +3674,11 @@ radv_fill_shader_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_
 
    if (tess_rings_bo) {
       uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
-      uint64_t tess_offchip_va = tess_va + queue->device->tess_offchip_ring_offset;
+      uint64_t tess_offchip_va = tess_va + queue->device->hs.tess_offchip_ring_offset;
 
       desc[0] = tess_va;
       desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
-      desc[2] = queue->device->tess_factor_ring_size;
+      desc[2] = queue->device->hs.tess_factor_ring_size;
       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
 
@@ -3773,7 +3692,7 @@ radv_fill_shader_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_
 
       desc[4] = tess_offchip_va;
       desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
-      desc[6] = queue->device->tess_offchip_ring_size;
+      desc[6] = queue->device->hs.tess_offchip_ring_size;
       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
 
@@ -3834,7 +3753,7 @@ radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
    if (!tess_rings_bo)
       return;
 
-   tf_ring_size = queue->device->tess_factor_ring_size / 4;
+   tf_ring_size = queue->device->hs.tess_factor_ring_size / 4;
    tf_va = radv_buffer_get_va(tess_rings_bo);
 
    radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
@@ -3849,11 +3768,11 @@ radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
       } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
          radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
       }
-      radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, queue->device->hs_offchip_param);
+      radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, queue->device->hs.hs_offchip_param);
    } else {
       radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size));
       radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
-      radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, queue->device->hs_offchip_param);
+      radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, queue->device->hs.hs_offchip_param);
    }
 }
 
@@ -4051,7 +3970,7 @@ radv_update_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave
 
    if (add_tess_rings) {
       result = queue->device->ws->buffer_create(
-         queue->device->ws, queue->device->tess_offchip_ring_offset + queue->device->tess_offchip_ring_size, 256,
+         queue->device->ws, queue->device->hs.tess_offchip_ring_offset + queue->device->hs.tess_offchip_ring_size, 256,
          RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo);
       if (result != VK_SUCCESS)
          goto fail;
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index b1636671fa4..63b5400836e 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3553,7 +3553,7 @@ gather_tess_info(struct radv_device *device, struct radv_pipeline_stage *stages,
       tess_in_patch_size, tess_out_patch_size,
       stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_inputs,
       stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_outputs,
-      stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs, device->tess_offchip_block_dw_size,
+      stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs, device->hs.tess_offchip_block_dw_size,
       device->physical_device->rad_info.chip_class, device->physical_device->rad_info.family);
 
    /* LDS size used by VS+TCS for storing TCS inputs and outputs. */
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 4cbfc84df88..2e5917f9456 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -774,16 +774,11 @@ struct radv_device {
    int queue_count[RADV_MAX_QUEUE_FAMILIES];
 
    bool pbb_allowed;
-   uint32_t tess_offchip_block_dw_size;
    uint32_t scratch_waves;
    uint32_t dispatch_initiator;
 
    uint32_t gs_table_depth;
-   uint32_t hs_offchip_param;
-   uint32_t max_offchip_buffers;
-   uint32_t tess_offchip_ring_size;
-   uint32_t tess_offchip_ring_offset;
-   uint32_t tess_factor_ring_size;
+   struct ac_hs_info hs;
 
    /* MSAA sample locations.
     * The first index is the sample index.



More information about the mesa-commit mailing list