Mesa (master): radeonsi: support ARB_compute_variable_group_size
Nicolai Hähnle
nh at kemper.freedesktop.org
Mon Oct 10 08:39:56 UTC 2016
Module: Mesa
Branch: master
Commit: 77c81164bc1cd9ec98b32c40753f590791450434
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=77c81164bc1cd9ec98b32c40753f590791450434
Author: Nicolai Hähnle <nicolai.haehnle at amd.com>
Date: Fri Sep 9 10:08:11 2016 +0200
radeonsi: support ARB_compute_variable_group_size
Not sure if it's possible to avoid programming the block size twice (once for
the userdata and once for the dispatch).
Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net>
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
---
docs/features.txt | 2 +-
docs/relnotes/12.1.0.html | 2 +-
src/gallium/drivers/radeon/r600_pipe_common.c | 10 +++++-
src/gallium/drivers/radeon/r600_pipe_common.h | 2 ++
src/gallium/drivers/radeonsi/si_compute.c | 10 +++++-
src/gallium/drivers/radeonsi/si_shader.c | 44 ++++++++++++++++++---------
src/gallium/drivers/radeonsi/si_shader.h | 4 ++-
7 files changed, 55 insertions(+), 19 deletions(-)
diff --git a/docs/features.txt b/docs/features.txt
index e91ef6c..533971f 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -279,7 +279,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
GL_ARB_bindless_texture started (airlied)
GL_ARB_cl_event not started
- GL_ARB_compute_variable_group_size DONE (nvc0)
+ GL_ARB_compute_variable_group_size DONE (nvc0, radeonsi)
GL_ARB_ES3_2_compatibility DONE (i965/gen8+)
GL_ARB_fragment_shader_interlock not started
GL_ARB_gl_spirv not started
diff --git a/docs/relnotes/12.1.0.html b/docs/relnotes/12.1.0.html
index 9ddd99c..2e4b669 100644
--- a/docs/relnotes/12.1.0.html
+++ b/docs/relnotes/12.1.0.html
@@ -49,7 +49,7 @@ Note: some of the new features are only available with certain drivers.
<li>GL_ARB_ES3_1_compatibility on i965</li>
<li>GL_ARB_ES3_2_compatibility on i965/gen8+</li>
<li>GL_ARB_clear_texture on r600, radeonsi</li>
-<li>GL_ARB_compute_variable_group_size on nvc0</li>
+<li>GL_ARB_compute_variable_group_size on nvc0, radeonsi</li>
<li>GL_ARB_cull_distance on radeonsi</li>
<li>GL_ARB_enhanced_layouts on i965</li>
<li>GL_ARB_indirect_parameters on radeonsi</li>
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 44863ee..3dbcbc6 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -1037,7 +1037,15 @@ static int r600_get_compute_param(struct pipe_screen *screen,
}
return sizeof(uint32_t);
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
- return 0;
+ if (ret) {
+ uint64_t *max_variable_threads_per_block = ret;
+ if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
+ ir_type == PIPE_SHADER_IR_TGSI)
+ *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
+ else
+ *max_variable_threads_per_block = 0;
+ }
+ return sizeof(uint64_t);
}
fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 54991e8..290b228 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -106,6 +106,8 @@
#define R600_MAP_BUFFER_ALIGNMENT 64
#define R600_MAX_VIEWPORTS 16
+#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
+
enum r600_coherency {
R600_COHERENCY_NONE, /* no cache flushes needed */
R600_COHERENCY_SHADER,
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 1d1df2f..e59bafe 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -601,11 +601,19 @@ static void si_setup_tgsi_grid(struct si_context *sctx,
radeon_emit(cs, 0);
}
} else {
+ struct si_compute *program = sctx->cs_shader_state.program;
+ bool variable_group_size =
+ program->shader.selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
- radeon_set_sh_reg_seq(cs, grid_size_reg, 3);
+ radeon_set_sh_reg_seq(cs, grid_size_reg, variable_group_size ? 6 : 3);
radeon_emit(cs, info->grid[0]);
radeon_emit(cs, info->grid[1]);
radeon_emit(cs, info->grid[2]);
+ if (variable_group_size) {
+ radeon_emit(cs, info->block[0]);
+ radeon_emit(cs, info->block[1]);
+ radeon_emit(cs, info->block[2]);
+ }
}
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index ff51c8b..49d4121 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1770,16 +1770,21 @@ static void declare_system_value(
LLVMValueRef values[3];
unsigned i;
unsigned *properties = ctx->shader->selector->info.properties;
- unsigned sizes[3] = {
- properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
- properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
- properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
- };
- for (i = 0; i < 3; ++i)
- values[i] = lp_build_const_int32(gallivm, sizes[i]);
+ if (properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] != 0) {
+ unsigned sizes[3] = {
+ properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
+ properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
+ properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
+ };
+
+ for (i = 0; i < 3; ++i)
+ values[i] = lp_build_const_int32(gallivm, sizes[i]);
- value = lp_build_gather_values(gallivm, values, 3);
+ value = lp_build_gather_values(gallivm, values, 3);
+ } else {
+ value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_BLOCK_SIZE);
+ }
break;
}
@@ -5680,6 +5685,7 @@ static void create_function(struct si_shader_context *ctx)
case PIPE_SHADER_COMPUTE:
params[SI_PARAM_GRID_SIZE] = v3i32;
+ params[SI_PARAM_BLOCK_SIZE] = v3i32;
params[SI_PARAM_BLOCK_ID] = v3i32;
last_sgpr = SI_PARAM_BLOCK_ID;
@@ -5716,7 +5722,12 @@ static void create_function(struct si_shader_context *ctx)
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
- assert(max_work_group_size);
+ if (!max_work_group_size) {
+ /* This is a variable group size compute shader,
+ * compile it for the maximum possible group size.
+ */
+ max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
+ }
radeon_llvm_add_attribute(ctx->radeon_bld.main_fn,
"amdgpu-max-work-group-size",
@@ -6653,11 +6664,16 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
unsigned max_vgprs = 256;
unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
unsigned max_sgprs_per_wave = 128;
- unsigned min_waves_per_cu =
- DIV_ROUND_UP(props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
- props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
- props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH],
- wave_size);
+ unsigned max_block_threads;
+
+ if (props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH])
+ max_block_threads = props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
+ props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
+ props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
+ else
+ max_block_threads = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
+
+ unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size);
unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4);
max_vgprs = max_vgprs / min_waves_per_simd;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 67cb67d2..f2618ac 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -129,7 +129,8 @@ enum {
/* CS only */
SI_SGPR_GRID_SIZE = SI_NUM_RESOURCE_SGPRS,
- SI_CS_NUM_USER_SGPR = SI_SGPR_GRID_SIZE + 3
+ SI_SGPR_BLOCK_SIZE = SI_SGPR_GRID_SIZE + 3,
+ SI_CS_NUM_USER_SGPR = SI_SGPR_BLOCK_SIZE + 3
};
/* LLVM function parameter indices */
@@ -219,6 +220,7 @@ enum {
/* CS only parameters */
SI_PARAM_GRID_SIZE = SI_NUM_RESOURCE_PARAMS,
+ SI_PARAM_BLOCK_SIZE,
SI_PARAM_BLOCK_ID,
SI_PARAM_THREAD_ID,
More information about the mesa-commit
mailing list