[Mesa-dev] [PATCH] radv: optimise compute dispatch to avoid looking up the sgpr repeatedly.
Dave Airlie
airlied at gmail.com
Wed Jun 7 03:48:05 UTC 2017
From: Dave Airlie <airlied at redhat.com>
Same as we did for draw dispatch and vertex sgprs.
---
src/amd/vulkan/radv_cmd_buffer.c | 23 +++++++++--------------
src/amd/vulkan/radv_pipeline.c | 6 ++++++
src/amd/vulkan/radv_private.h | 4 ++++
3 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index a069945..a4ddd7e 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2872,13 +2872,10 @@ void radv_CmdDispatch(
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
- MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
- if (loc->sgpr_idx != -1) {
- assert(!loc->indirect);
+ if (cmd_buffer->state.compute_pipeline->compute.cs_grid_size_sgpr) {
uint8_t grid_used = cmd_buffer->state.compute_pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
- assert(loc->num_sgprs == grid_used);
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used);
+ radeon_set_sh_reg_seq(cmd_buffer->cs, cmd_buffer->state.compute_pipeline->compute.cs_grid_size_sgpr,
+ grid_used);
radeon_emit(cmd_buffer->cs, x);
if (grid_used > 1)
radeon_emit(cmd_buffer->cs, y);
@@ -2912,9 +2909,9 @@ void radv_CmdDispatchIndirect(
radv_flush_compute_state(cmd_buffer);
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 25);
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
- MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
- if (loc->sgpr_idx != -1) {
+
+
+ if (cmd_buffer->state.compute_pipeline->compute.cs_grid_size_sgpr) {
uint8_t grid_used = cmd_buffer->state.compute_pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
for (unsigned i = 0; i < grid_used; ++i) {
radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
@@ -2922,7 +2919,7 @@ void radv_CmdDispatchIndirect(
COPY_DATA_DST_SEL(COPY_DATA_REG));
radeon_emit(cmd_buffer->cs, (va + 4 * i));
radeon_emit(cmd_buffer->cs, (va + 4 * i) >> 32);
- radeon_emit(cmd_buffer->cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i);
+ radeon_emit(cmd_buffer->cs, (cmd_buffer->state.compute_pipeline->compute.cs_grid_size_sgpr >> 2) + i);
radeon_emit(cmd_buffer->cs, 0);
}
}
@@ -2984,11 +2981,9 @@ void radv_unaligned_dispatch(
S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]) |
S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
- MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
- if (loc->sgpr_idx != -1) {
+ if (cmd_buffer->state.compute_pipeline->compute.cs_grid_size_sgpr) {
uint8_t grid_used = cmd_buffer->state.compute_pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used);
+ radeon_set_sh_reg_seq(cmd_buffer->cs, cmd_buffer->state.compute_pipeline->compute.cs_grid_size_sgpr, grid_used);
radeon_emit(cmd_buffer->cs, blocks[0]);
if (grid_used > 1)
radeon_emit(cmd_buffer->cs, blocks[1]);
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index ccbe20d..bda4c74 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2375,6 +2375,12 @@ static VkResult radv_compute_pipeline_create(
pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
+
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline,
+ MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
+ if (loc->sgpr_idx != -1) {
+ pipeline->compute.cs_grid_size_sgpr = R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4;
+ }
result = radv_pipeline_scratch_init(device, pipeline);
if (result != VK_SUCCESS) {
radv_pipeline_destroy(device, pipeline, pAllocator);
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 8f60d9b..29db05c 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1090,6 +1090,10 @@ struct radv_pipeline {
bool tess_partial_vs_wave;
bool partial_es_wave;
} graphics;
+
+ struct {
+ uint32_t cs_grid_size_sgpr;
+ } compute;
};
unsigned max_waves;
--
2.9.4
More information about the mesa-dev
mailing list