[Mesa-dev] [PATCH] radeonsi: support TGSI compute shaders with variable block size
Ian Romanick
idr at freedesktop.org
Fri Sep 9 18:29:29 UTC 2016
On 09/09/2016 05:29 AM, Marek Olšák wrote:
> On Fri, Sep 9, 2016 at 10:12 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
>> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>>
>> Not sure if it's possible to avoid programming the block size twice (once for
>> the userdata and once for the dispatch).
>>
>> Since the shaders are compiled with a pessimistic upper limit on the number of
>> registers, asynchronously compiling variants may be worth considering in the
>> future if we observe the shaders to be dispatched with small block sizes.
>> ---
>> I think this is sufficient to support variable group sizes on radeonsi, but
>> it's completely untested. Do you keep the latest version of your series in a
>> public repository somewhere?
>>
>> src/gallium/drivers/radeonsi/si_compute.c | 10 +++++++++-
>> src/gallium/drivers/radeonsi/si_shader.c | 29 ++++++++++++++++++++---------
>> src/gallium/drivers/radeonsi/si_shader.h | 4 +++-
>> 3 files changed, 32 insertions(+), 11 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
>> index 5041761..26e096c 100644
>> --- a/src/gallium/drivers/radeonsi/si_compute.c
>> +++ b/src/gallium/drivers/radeonsi/si_compute.c
>> @@ -379,25 +379,33 @@ static void si_setup_tgsi_grid(struct si_context *sctx,
>> for (i = 0; i < 3; ++i) {
>> radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
>> radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
>> COPY_DATA_DST_SEL(COPY_DATA_REG));
>> radeon_emit(cs, (va + 4 * i));
>> radeon_emit(cs, (va + 4 * i) >> 32);
>> radeon_emit(cs, (grid_size_reg >> 2) + i);
>> radeon_emit(cs, 0);
>> }
>> } else {
>> + struct si_compute *program = sctx->cs_shader_state.program;
>> + bool variable_group_size =
>> + program->shader.selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
>>
>> - radeon_set_sh_reg_seq(cs, grid_size_reg, 3);
>> + radeon_set_sh_reg_seq(cs, grid_size_reg, variable_group_size ? 6 : 3);
>> radeon_emit(cs, info->grid[0]);
>> radeon_emit(cs, info->grid[1]);
>> radeon_emit(cs, info->grid[2]);
>> + if (variable_group_size) {
>> + radeon_emit(cs, info->block[0]);
>> + radeon_emit(cs, info->block[1]);
>> + radeon_emit(cs, info->block[2]);
>> + }
>> }
>> }
>>
>> static void si_emit_dispatch_packets(struct si_context *sctx,
>> const struct pipe_grid_info *info)
>> {
>> struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
>> bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
>> unsigned waves_per_threadgroup =
>> DIV_ROUND_UP(info->block[0] * info->block[1] * info->block[2], 64);
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
>> index 0b7de18..730ee21 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>> @@ -1783,30 +1783,35 @@ static void declare_system_value(
>>
>> case TGSI_SEMANTIC_GRID_SIZE:
>> value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_GRID_SIZE);
>> break;
>>
>> case TGSI_SEMANTIC_BLOCK_SIZE:
>> {
>> LLVMValueRef values[3];
>> unsigned i;
>> unsigned *properties = ctx->shader->selector->info.properties;
>> - unsigned sizes[3] = {
>> - properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
>> - properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
>> - properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
>> - };
>>
>> - for (i = 0; i < 3; ++i)
>> - values[i] = lp_build_const_int32(gallivm, sizes[i]);
>> + if (properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] != 0) {
>> + unsigned sizes[3] = {
>> + properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
>> + properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
>> + properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
>> + };
>> +
>> + for (i = 0; i < 3; ++i)
>> + values[i] = lp_build_const_int32(gallivm, sizes[i]);
>>
>> - value = lp_build_gather_values(gallivm, values, 3);
>> + value = lp_build_gather_values(gallivm, values, 3);
>> + } else {
>> + value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_BLOCK_SIZE);
>> + }
>> break;
>> }
>>
>> case TGSI_SEMANTIC_BLOCK_ID:
>> value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_BLOCK_ID);
>> break;
>>
>> case TGSI_SEMANTIC_THREAD_ID:
>> value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_THREAD_ID);
>> break;
>> @@ -5705,20 +5710,21 @@ static void create_function(struct si_shader_context *ctx)
>>
>> for (i = 0; i < num_return_sgprs; i++)
>> returns[i] = ctx->i32;
>> for (; i < num_returns; i++)
>> returns[i] = ctx->f32;
>> }
>> break;
>>
>> case PIPE_SHADER_COMPUTE:
>> params[SI_PARAM_GRID_SIZE] = v3i32;
>> + params[SI_PARAM_BLOCK_SIZE] = v3i32;
>> params[SI_PARAM_BLOCK_ID] = v3i32;
>> last_sgpr = SI_PARAM_BLOCK_ID;
>>
>> params[SI_PARAM_THREAD_ID] = v3i32;
>> num_params = SI_PARAM_THREAD_ID + 1;
>> break;
>> default:
>> assert(0 && "unimplemented shader");
>> return;
>> }
>> @@ -5741,21 +5747,26 @@ static void create_function(struct si_shader_context *ctx)
>> S_0286D0_LINEAR_CENTROID_ENA(1) |
>> S_0286D0_FRONT_FACE_ENA(1) |
>> S_0286D0_POS_FIXED_PT_ENA(1));
>> } else if (ctx->type == PIPE_SHADER_COMPUTE) {
>> const unsigned *properties = shader->selector->info.properties;
>> unsigned max_work_group_size =
>> properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
>> properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
>> properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
>>
>> - assert(max_work_group_size);
>> + if (!max_work_group_size) {
>> + /* This is a variable group size compute shader,
>> + * compile it for the maximum possible group size.
>> + */
>> + max_work_group_size = 2048;
>> + }
>
> The VGPR limit is 32. That's too limiting. I'd prefer it if
> GL_MAX_COMPUTE_VARIABLE_WORK_GROUP_INVOCATIONS_ARB was set to 512 or
> 1024. That should move the limit to 128 or 64, respectively.
The minimum allowed by the spec is 512, so it seems likely that most
applications will only expect that limit.
> Marek
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list