Mesa (main): radv/ac: introduce a new common function for hs calcs.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed May 11 02:50:36 UTC 2022
Module: Mesa
Branch: main
Commit: d4c7ffc550f6df9bb23ec704d0b05825ad3da1ce
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d4c7ffc550f6df9bb23ec704d0b05825ad3da1ce
Author: Dave Airlie <airlied at redhat.com>
Date: Tue May 10 11:41:54 2022 +1000
radv/ac: introduce a new common function for hs calcs.
This ports the radv code to the new ac code.
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16415>
---
src/amd/common/ac_gpu_info.c | 82 ++++++++++++++++++++++++++++++++++
src/amd/common/ac_gpu_info.h | 12 +++++
src/amd/vulkan/radv_device.c | 99 ++++--------------------------------------
src/amd/vulkan/radv_pipeline.c | 2 +-
src/amd/vulkan/radv_private.h | 7 +--
5 files changed, 105 insertions(+), 97 deletions(-)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index eb3562eb84e..e1f1022c52d 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -1756,3 +1756,85 @@ unsigned ac_get_compute_resource_limits(struct radeon_info *info, unsigned waves
}
return compute_resource_limits;
}
+
+void ac_get_hs_info(struct radeon_info *info,
+ struct ac_hs_info *hs)
+{
+ bool double_offchip_buffers = info->chip_class >= GFX7 &&
+ info->family != CHIP_CARRIZO &&
+ info->family != CHIP_STONEY;
+ unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
+ unsigned max_offchip_buffers;
+ unsigned offchip_granularity;
+ unsigned hs_offchip_param;
+
+ hs->tess_offchip_block_dw_size =
+ info->family == CHIP_HAWAII ? 4096 : 8192;
+
+ /*
+ * Per RadeonSI:
+ * This must be one less than the maximum number due to a hw limitation.
+ * Various hardware bugs need this.
+ *
+ * Per AMDVLK:
+ * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
+ * Gfx7 should limit max_offchip_buffers to 508
+ * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
+ *
+ * Follow AMDVLK here.
+ */
+ if (info->chip_class >= GFX10) {
+ max_offchip_buffers_per_se = 128;
+ } else if (info->family == CHIP_VEGA10 ||
+ info->chip_class == GFX7 ||
+ info->chip_class == GFX6)
+ --max_offchip_buffers_per_se;
+
+ max_offchip_buffers = max_offchip_buffers_per_se * info->max_se;
+
+ /* Hawaii has a bug with offchip buffers > 256 that can be worked
+ * around by setting 4K granularity.
+ */
+ if (hs->tess_offchip_block_dw_size == 4096) {
+ assert(info->family == CHIP_HAWAII);
+ offchip_granularity = V_03093C_X_4K_DWORDS;
+ } else {
+ assert(hs->tess_offchip_block_dw_size == 8192);
+ offchip_granularity = V_03093C_X_8K_DWORDS;
+ }
+
+ switch (info->chip_class) {
+ case GFX6:
+ max_offchip_buffers = MIN2(max_offchip_buffers, 126);
+ break;
+ case GFX7:
+ case GFX8:
+ case GFX9:
+ max_offchip_buffers = MIN2(max_offchip_buffers, 508);
+ break;
+ case GFX10:
+ break;
+ default:
+ break;
+ }
+
+ hs->max_offchip_buffers = max_offchip_buffers;
+
+ if (info->chip_class >= GFX10_3) {
+ hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
+ S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
+ } else if (info->chip_class >= GFX7) {
+ if (info->chip_class >= GFX8)
+ --max_offchip_buffers;
+ hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
+ S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
+ } else {
+ hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
+ }
+
+ hs->hs_offchip_param = hs_offchip_param;
+
+ hs->tess_factor_ring_size = 32768 * info->max_se;
+ hs->tess_offchip_ring_offset = align(hs->tess_factor_ring_size, 64 * 1024);
+ hs->tess_offchip_ring_size = hs->max_offchip_buffers * hs->tess_offchip_block_dw_size * 4;
+}
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index b85821c87f3..814e259bed1 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -261,6 +261,18 @@ void ac_get_harvested_configs(struct radeon_info *info, unsigned raster_config,
unsigned ac_get_compute_resource_limits(struct radeon_info *info, unsigned waves_per_threadgroup,
unsigned max_waves_per_sh, unsigned threadgroups_per_cu);
+struct ac_hs_info {
+ uint32_t tess_offchip_block_dw_size;
+ uint32_t max_offchip_buffers;
+ uint32_t hs_offchip_param;
+ uint32_t tess_factor_ring_size;
+ uint32_t tess_offchip_ring_offset;
+ uint32_t tess_offchip_ring_size;
+};
+
+void ac_get_hs_info(struct radeon_info *info,
+ struct ac_hs_info *hs);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index b850485b45c..9822deb815d 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2735,88 +2735,6 @@ radv_device_init_gs_info(struct radv_device *device)
device->physical_device->rad_info.family);
}
-static void
-radv_device_init_hs_info(struct radv_device *device)
-{
- bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
- device->physical_device->rad_info.family != CHIP_CARRIZO &&
- device->physical_device->rad_info.family != CHIP_STONEY;
- unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
- unsigned max_offchip_buffers;
- unsigned offchip_granularity;
- unsigned hs_offchip_param;
-
- device->tess_offchip_block_dw_size =
- device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
-
- /*
- * Per RadeonSI:
- * This must be one less than the maximum number due to a hw limitation.
- * Various hardware bugs need this.
- *
- * Per AMDVLK:
- * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
- * Gfx7 should limit max_offchip_buffers to 508
- * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
- *
- * Follow AMDVLK here.
- */
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- max_offchip_buffers_per_se = 128;
- } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
- device->physical_device->rad_info.chip_class == GFX7 ||
- device->physical_device->rad_info.chip_class == GFX6)
- --max_offchip_buffers_per_se;
-
- max_offchip_buffers = max_offchip_buffers_per_se * device->physical_device->rad_info.max_se;
-
- /* Hawaii has a bug with offchip buffers > 256 that can be worked
- * around by setting 4K granularity.
- */
- if (device->tess_offchip_block_dw_size == 4096) {
- assert(device->physical_device->rad_info.family == CHIP_HAWAII);
- offchip_granularity = V_03093C_X_4K_DWORDS;
- } else {
- assert(device->tess_offchip_block_dw_size == 8192);
- offchip_granularity = V_03093C_X_8K_DWORDS;
- }
-
- switch (device->physical_device->rad_info.chip_class) {
- case GFX6:
- max_offchip_buffers = MIN2(max_offchip_buffers, 126);
- break;
- case GFX7:
- case GFX8:
- case GFX9:
- max_offchip_buffers = MIN2(max_offchip_buffers, 508);
- break;
- case GFX10:
- break;
- default:
- break;
- }
-
- device->max_offchip_buffers = max_offchip_buffers;
-
- if (device->physical_device->rad_info.chip_class >= GFX10_3) {
- hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
- S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
- } else if (device->physical_device->rad_info.chip_class >= GFX7) {
- if (device->physical_device->rad_info.chip_class >= GFX8)
- --max_offchip_buffers;
- hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
- S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
- } else {
- hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
- }
-
- device->hs_offchip_param = hs_offchip_param;
-
- device->tess_factor_ring_size = 32768 * device->physical_device->rad_info.max_se;
- device->tess_offchip_ring_offset = align(device->tess_factor_ring_size, 64 * 1024);
- device->tess_offchip_ring_size = device->max_offchip_buffers * device->tess_offchip_block_dw_size * 4;
-}
-
static VkResult
radv_device_init_border_color(struct radv_device *device)
{
@@ -3410,7 +3328,8 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
radv_device_init_gs_info(device);
- radv_device_init_hs_info(device);
+ ac_get_hs_info(&device->physical_device->rad_info,
+ &device->hs);
if (device->instance->debug_flags & RADV_DEBUG_HANG) {
/* Enable GPU hangs detection and dump logs if a GPU hang is
@@ -3755,11 +3674,11 @@ radv_fill_shader_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_
if (tess_rings_bo) {
uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
- uint64_t tess_offchip_va = tess_va + queue->device->tess_offchip_ring_offset;
+ uint64_t tess_offchip_va = tess_va + queue->device->hs.tess_offchip_ring_offset;
desc[0] = tess_va;
desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
- desc[2] = queue->device->tess_factor_ring_size;
+ desc[2] = queue->device->hs.tess_factor_ring_size;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
@@ -3773,7 +3692,7 @@ radv_fill_shader_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_
desc[4] = tess_offchip_va;
desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
- desc[6] = queue->device->tess_offchip_ring_size;
+ desc[6] = queue->device->hs.tess_offchip_ring_size;
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
@@ -3834,7 +3753,7 @@ radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
if (!tess_rings_bo)
return;
- tf_ring_size = queue->device->tess_factor_ring_size / 4;
+ tf_ring_size = queue->device->hs.tess_factor_ring_size / 4;
tf_va = radv_buffer_get_va(tess_rings_bo);
radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
@@ -3849,11 +3768,11 @@ radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
}
- radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, queue->device->hs_offchip_param);
+ radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, queue->device->hs.hs_offchip_param);
} else {
radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size));
radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
- radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, queue->device->hs_offchip_param);
+ radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, queue->device->hs.hs_offchip_param);
}
}
@@ -4051,7 +3970,7 @@ radv_update_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave
if (add_tess_rings) {
result = queue->device->ws->buffer_create(
- queue->device->ws, queue->device->tess_offchip_ring_offset + queue->device->tess_offchip_ring_size, 256,
+ queue->device->ws, queue->device->hs.tess_offchip_ring_offset + queue->device->hs.tess_offchip_ring_size, 256,
RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo);
if (result != VK_SUCCESS)
goto fail;
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index b1636671fa4..63b5400836e 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3553,7 +3553,7 @@ gather_tess_info(struct radv_device *device, struct radv_pipeline_stage *stages,
tess_in_patch_size, tess_out_patch_size,
stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_inputs,
stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_outputs,
- stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs, device->tess_offchip_block_dw_size,
+ stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs, device->hs.tess_offchip_block_dw_size,
device->physical_device->rad_info.chip_class, device->physical_device->rad_info.family);
/* LDS size used by VS+TCS for storing TCS inputs and outputs. */
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 4cbfc84df88..2e5917f9456 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -774,16 +774,11 @@ struct radv_device {
int queue_count[RADV_MAX_QUEUE_FAMILIES];
bool pbb_allowed;
- uint32_t tess_offchip_block_dw_size;
uint32_t scratch_waves;
uint32_t dispatch_initiator;
uint32_t gs_table_depth;
- uint32_t hs_offchip_param;
- uint32_t max_offchip_buffers;
- uint32_t tess_offchip_ring_size;
- uint32_t tess_offchip_ring_offset;
- uint32_t tess_factor_ring_size;
+ struct ac_hs_info hs;
/* MSAA sample locations.
* The first index is the sample index.
More information about the mesa-commit
mailing list