Mesa (main): ac: Add task shader ring information.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jun 8 09:12:36 UTC 2022
Module: Mesa
Branch: main
Commit: ac5ab8d227e48e7572a4fa3311ac7c900aae3082
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ac5ab8d227e48e7572a4fa3311ac7c900aae3082
Author: Timur Kristóf <timur.kristof at gmail.com>
Date: Tue May 31 13:20:23 2022 +0200
ac: Add task shader ring information.
Similarly to tessellation rings information, move the task
rings info to ac_gpu_info.
Signed-off-by: Timur Kristóf <timur.kristof at gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16737>
---
src/amd/common/ac_gpu_info.c | 39 ++++++++++++++++++++++++++++++++++++
src/amd/common/ac_gpu_info.h | 44 +++++++++++++++++++++++++++++++++++++++++
src/amd/vulkan/radv_constants.h | 5 -----
src/amd/vulkan/radv_device.c | 19 +-----------------
src/amd/vulkan/radv_private.h | 4 +---
src/amd/vulkan/radv_shader.c | 8 ++++----
6 files changed, 89 insertions(+), 30 deletions(-)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index e867228dec0..212dc7ef359 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -1858,3 +1858,42 @@ void ac_get_hs_info(struct radeon_info *info,
hs->tess_offchip_ring_offset = align(hs->tess_factor_ring_size, 64 * 1024);
hs->tess_offchip_ring_size = hs->max_offchip_buffers * hs->tess_offchip_block_dw_size * 4;
}
+
+static uint16_t get_task_num_entries(enum radeon_family fam)
+{
+ /* Number of task shader ring entries. Needs to be a power of two.
+ * Use a low number on smaller chips so we don't waste space,
+ * but keep it high on bigger chips so it doesn't inhibit parallelism.
+ *
+ * This number is compiled into task/mesh shaders as a constant.
+ * In order to ensure this works fine with the shader cache, we must
+ * base this decision on the chip family, not the number of CUs in
+ * the current GPU. (So, the cache remains consistent for all
+ * chips in the same family.)
+ */
+ switch (fam) {
+ case CHIP_VANGOGH:
+ case CHIP_NAVI24:
+ case CHIP_REMBRANDT:
+ return 256;
+ case CHIP_NAVI21:
+ case CHIP_NAVI22:
+ case CHIP_NAVI23:
+ default:
+ return 1024;
+ }
+}
+
+void ac_get_task_info(struct radeon_info *info,
+ struct ac_task_info *task_info)
+{
+ const uint16_t num_entries = get_task_num_entries(info->family);
+ const uint32_t draw_ring_bytes = num_entries * AC_TASK_DRAW_ENTRY_BYTES;
+ const uint32_t payload_ring_bytes = num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES;
+
+ /* Ensure that the addresses of each ring are 256 byte aligned. */
+ task_info->num_entries = num_entries;
+ task_info->draw_ring_offset = ALIGN(AC_TASK_CTRLBUF_BYTES, 256);
+ task_info->payload_ring_offset = ALIGN(task_info->draw_ring_offset + draw_ring_bytes, 256);
+ task_info->bo_size_bytes = task_info->payload_ring_offset + payload_ring_bytes;
+}
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 07f1cbb556f..9bcaf74d3a0 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -274,6 +274,50 @@ struct ac_hs_info {
void ac_get_hs_info(struct radeon_info *info,
struct ac_hs_info *hs);
+/* Task rings BO layout information.
+ * This BO is shared between GFX and ACE queues so that the ACE and GFX
+ * firmware can cooperate on task->mesh dispatches and is also used to
+ * store the task payload which is passed to mesh shaders.
+ *
+ * The driver only needs to create this BO once,
+ * and it will always be able to accomodate the maximum needed
+ * task payload size.
+ *
+ * The following memory layout is used:
+ * 1. Control buffer: 9 DWORDs, 256 byte aligned
+ * Used by the firmware to maintain the current state.
+ * (padding)
+ * 2. Draw ring: 4 DWORDs per entry, 256 byte aligned
+ * Task shaders store the mesh dispatch size here.
+ * (padding)
+ * 3. Payload ring: 16K bytes per entry, 256 byte aligned.
+ * This is where task payload is stored by task shaders and
+ * read by mesh shaders.
+ *
+ */
+struct ac_task_info {
+ uint32_t draw_ring_offset;
+ uint32_t payload_ring_offset;
+ uint32_t bo_size_bytes;
+ uint16_t num_entries;
+};
+
+/* Size of each payload entry in the task payload ring.
+ * Spec requires minimum 16K bytes.
+ */
+#define AC_TASK_PAYLOAD_ENTRY_BYTES 16384
+
+/* Size of each draw entry in the task draw ring.
+ * 4 DWORDs per entry.
+ */
+#define AC_TASK_DRAW_ENTRY_BYTES 16
+
+/* Size of the task control buffer. 9 DWORDs. */
+#define AC_TASK_CTRLBUF_BYTES 36
+
+void ac_get_task_info(struct radeon_info *info,
+ struct ac_task_info *task_info);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/amd/vulkan/radv_constants.h b/src/amd/vulkan/radv_constants.h
index c40330f3bc3..4b6d3b9667b 100644
--- a/src/amd/vulkan/radv_constants.h
+++ b/src/amd/vulkan/radv_constants.h
@@ -91,11 +91,6 @@
*/
#define RADV_MAX_MEMORY_ALLOCATION_SIZE 0xFFFFFFFCull
-/* Size of each payload entry in the task payload ring.
- * Spec requires minimum 16K bytes.
- */
-#define RADV_TASK_PAYLOAD_ENTRY_BYTES 16384
-
/* Number of invocations in each subgroup. */
#define RADV_SUBGROUP_SIZE 64
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index d19a41ae345..4f3d7d730cc 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -834,24 +834,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
ac_get_gs_table_depth(device->rad_info.gfx_level, device->rad_info.family);
ac_get_hs_info(&device->rad_info, &device->hs);
-
- /* Number of task shader ring entries. Needs to be a power of two.
- * Use a low number on smaller chips so we don't waste space,
- * but keep it high on bigger chips so it doesn't inhibit parallelism.
- */
- switch (device->rad_info.family) {
- case CHIP_VANGOGH:
- case CHIP_NAVI24:
- case CHIP_REMBRANDT:
- device->task_num_entries = 256;
- break;
- case CHIP_NAVI21:
- case CHIP_NAVI22:
- case CHIP_NAVI23:
- default:
- device->task_num_entries = 1024;
- break;
- }
+ ac_get_task_info(&device->rad_info, &device->task_info);
*device_out = device;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index d2295187b20..2f01c6e573f 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -332,9 +332,7 @@ struct radv_physical_device {
uint32_t gs_table_depth;
struct ac_hs_info hs;
-
- /* Number of entries in the task shader ring buffers. */
- uint32_t task_num_entries;
+ struct ac_task_info task_info;
};
struct radv_instance {
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 21174c2ed10..20dd59c3c83 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -1094,12 +1094,12 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta
return true;
} else if (nir->info.stage == MESA_SHADER_TASK) {
ac_nir_apply_first_task_to_task_shader(nir);
- ac_nir_lower_task_outputs_to_mem(nir, RADV_TASK_PAYLOAD_ENTRY_BYTES,
- device->physical_device->task_num_entries);
+ ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
+ device->physical_device->task_info.num_entries);
return true;
} else if (nir->info.stage == MESA_SHADER_MESH) {
- ac_nir_lower_mesh_inputs_to_mem(nir, RADV_TASK_PAYLOAD_ENTRY_BYTES,
- device->physical_device->task_num_entries);
+ ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
+ device->physical_device->task_info.num_entries);
return true;
}
More information about the mesa-commit
mailing list