[Mesa-dev] [PATCH 3/9] radv: optimise compute shader grid size emission.
Dave Airlie
airlied at gmail.com
Tue Apr 18 03:57:06 UTC 2017
From: Dave Airlie <airlied at redhat.com>
Signed-off-by: Dave Airlie <airlied at redhat.com>
---
src/amd/common/ac_nir_to_llvm.c | 13 ++++++++-----
src/amd/common/ac_shader_info.c | 3 +++
src/amd/common/ac_shader_info.h | 3 +++
src/amd/vulkan/radv_cmd_buffer.c | 23 +++++++++++++++--------
4 files changed, 29 insertions(+), 13 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7161caf..7d19a78 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -604,7 +604,8 @@ static void create_function(struct nir_to_llvm_context *ctx)
switch (ctx->stage) {
case MESA_SHADER_COMPUTE:
- arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */
+ if (ctx->shader_info->info.cs.grid_components_used)
+ arg_types[arg_idx++] = LLVMVectorType(ctx->i32, ctx->shader_info->info.cs.grid_components_used); /* grid size */
user_sgpr_count = arg_idx;
arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
arg_types[arg_idx++] = ctx->i32;
@@ -765,10 +766,12 @@ static void create_function(struct nir_to_llvm_context *ctx)
switch (ctx->stage) {
case MESA_SHADER_COMPUTE:
- set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3);
- user_sgpr_idx += 3;
- ctx->num_work_groups =
- LLVMGetParam(ctx->main_function, arg_idx++);
+ if (ctx->shader_info->info.cs.grid_components_used) {
+ set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, ctx->shader_info->info.cs.grid_components_used);
+ user_sgpr_idx += ctx->shader_info->info.cs.grid_components_used;
+ ctx->num_work_groups =
+ LLVMGetParam(ctx->main_function, arg_idx++);
+ }
ctx->workgroup_ids =
LLVMGetParam(ctx->main_function, arg_idx++);
ctx->tg_size =
diff --git a/src/amd/common/ac_shader_info.c b/src/amd/common/ac_shader_info.c
index 5061860..6be66a9 100644
--- a/src/amd/common/ac_shader_info.c
+++ b/src/amd/common/ac_shader_info.c
@@ -37,6 +37,9 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, struct ac_shader_info *info)
case nir_intrinsic_load_draw_id:
info->vs.needs_draw_id = true;
break;
+ case nir_intrinsic_load_num_work_groups:
+ info->cs.grid_components_used = instr->num_components;
+ break;
default:
break;
}
diff --git a/src/amd/common/ac_shader_info.h b/src/amd/common/ac_shader_info.h
index 9d43637..5acfb21 100644
--- a/src/amd/common/ac_shader_info.h
+++ b/src/amd/common/ac_shader_info.h
@@ -36,6 +36,9 @@ struct ac_shader_info {
struct {
bool needs_sample_positions;
} ps;
+ struct {
+ uint8_t grid_components_used;
+ } cs;
};
void
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 22ae0c4..343e043 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2791,11 +2791,14 @@ void radv_CmdDispatch(
MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
if (loc->sgpr_idx != -1) {
assert(!loc->indirect);
- assert(loc->num_sgprs == 3);
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
+ uint8_t grid_used = cmd_buffer->state.pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
+ assert(loc->num_sgprs == grid_used);
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used);
radeon_emit(cmd_buffer->cs, x);
- radeon_emit(cmd_buffer->cs, y);
- radeon_emit(cmd_buffer->cs, z);
+ if (grid_used > 1)
+ radeon_emit(cmd_buffer->cs, y);
+ if (grid_used > 2)
+ radeon_emit(cmd_buffer->cs, z);
}
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
@@ -2827,7 +2830,8 @@ void radv_CmdDispatchIndirect(
struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
if (loc->sgpr_idx != -1) {
- for (unsigned i = 0; i < 3; ++i) {
+ uint8_t grid_used = cmd_buffer->state.pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
+ for (unsigned i = 0; i < grid_used; ++i) {
radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_REG));
@@ -2898,10 +2902,13 @@ void radv_unaligned_dispatch(
struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
if (loc->sgpr_idx != -1) {
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
+ uint8_t grid_used = cmd_buffer->state.pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used);
radeon_emit(cmd_buffer->cs, blocks[0]);
- radeon_emit(cmd_buffer->cs, blocks[1]);
- radeon_emit(cmd_buffer->cs, blocks[2]);
+ if (grid_used > 1)
+ radeon_emit(cmd_buffer->cs, blocks[1]);
+ if (grid_used > 2)
+ radeon_emit(cmd_buffer->cs, blocks[2]);
}
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
PKT3_SHADER_TYPE_S(1));
--
2.9.3
More information about the mesa-dev
mailing list