[Mesa-dev] [PATCH 14/20] radeonsi: implement TGSI compute dispatch
Marek Olšák
maraeo at gmail.com
Mon Apr 4 17:41:08 UTC 2016
This should use radeon_set_sh_reg... like other patches.
Marek
On Sat, Apr 2, 2016 at 3:10 PM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> ---
> src/gallium/drivers/radeonsi/si_compute.c | 104 ++++++++++++++++++++++--------
> 1 file changed, 77 insertions(+), 27 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
> index 74db8d4..64ad2f3 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -346,13 +346,85 @@ static void si_upload_compute_input(struct si_context *sctx,
> pipe_resource_reference((struct pipe_resource**)&input_buffer, NULL);
> }
>
> +static void si_setup_tgsi_grid(struct si_context *sctx,
> + const struct pipe_grid_info *info)
> +{
> + struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> + unsigned grid_size_reg = R_00B900_COMPUTE_USER_DATA_0 +
> + 4 * SI_SGPR_GRID_SIZE;
> +
> + if (info->indirect) {
> + uint64_t base_va = r600_resource(info->indirect)->gpu_address;
> + uint64_t va = base_va + info->indirect_offset;
> + int i;
> +
> + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> + (struct r600_resource *)info->indirect,
> + RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
> +
> + for (i = 0; i < 3; ++i) {
> + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
> + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
> + COPY_DATA_DST_SEL(COPY_DATA_REG));
> + radeon_emit(cs, (va + 4 * i));
> + radeon_emit(cs, (va + 4 * i) >> 32);
> + radeon_emit(cs, (grid_size_reg >> 2) + i);
> + radeon_emit(cs, 0);
> + }
> + } else {
> +
> + radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 3, 0));
> + radeon_emit(cs, (grid_size_reg - SI_SH_REG_OFFSET) >> 2);
> + radeon_emit(cs, info->grid[0]);
> + radeon_emit(cs, info->grid[1]);
> + radeon_emit(cs, info->grid[2]);
> + }
> +}
> +
> +static void si_emit_dispatch_packets(struct si_context *sctx,
> + const struct pipe_grid_info *info)
> +{
> + struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> +
> + radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 3, 0));
> + radeon_emit(cs, (R_00B81C_COMPUTE_NUM_THREAD_X - SI_SH_REG_OFFSET) >> 2);
> + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]));
> + radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]));
> + radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]));
> +
> + if (info->indirect) {
> + uint64_t base_va = r600_resource(info->indirect)->gpu_address;
> +
> + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> + (struct r600_resource *)info->indirect,
> + RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
> +
> + radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) |
> + PKT3_SHADER_TYPE_S(1));
> + radeon_emit(cs, 1);
> + radeon_emit(cs, base_va);
> + radeon_emit(cs, base_va >> 32);
> +
> + radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) |
> + PKT3_SHADER_TYPE_S(1));
> + radeon_emit(cs, info->indirect_offset);
> + radeon_emit(cs, 1);
> + } else {
> + radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
> + PKT3_SHADER_TYPE_S(1));
> + radeon_emit(cs, info->grid[0]);
> + radeon_emit(cs, info->grid[1]);
> + radeon_emit(cs, info->grid[2]);
> + radeon_emit(cs, 1);
> + }
> +}
> +
> static void si_launch_grid(
> struct pipe_context *ctx, const struct pipe_grid_info *info)
> {
> struct si_context *sctx = (struct si_context*)ctx;
> struct si_compute *program = sctx->cs_shader_state.program;
> - struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
> - unsigned i;
> + int i;
>
> si_need_cs_space(sctx);
>
> @@ -367,21 +439,12 @@ static void si_launch_grid(
> SI_CONTEXT_FLAG_COMPUTE;
> si_emit_cache_flush(sctx, NULL);
>
> - pm4->compute_pkt = true;
> -
> if (!si_switch_compute_shader(sctx, program, &program->shader, info->pc))
> return;
>
> if (program->input_size)
> si_upload_compute_input(sctx, info);
>
> - si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X,
> - S_00B81C_NUM_THREAD_FULL(info->block[0]));
> - si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y,
> - S_00B820_NUM_THREAD_FULL(info->block[1]));
> - si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z,
> - S_00B824_NUM_THREAD_FULL(info->block[2]));
> -
> /* Global buffers */
> for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {
> struct r600_resource *buffer =
> @@ -394,23 +457,10 @@ static void si_launch_grid(
> RADEON_PRIO_COMPUTE_GLOBAL);
> }
>
> - si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT);
> - si_pm4_cmd_add(pm4, info->grid[0]); /* Thread groups DIM_X */
> - si_pm4_cmd_add(pm4, info->grid[1]); /* Thread groups DIM_Y */
> - si_pm4_cmd_add(pm4, info->grid[2]); /* Thread gropus DIM_Z */
> - si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */
> - si_pm4_cmd_end(pm4, false);
> -
> - si_pm4_emit(sctx, pm4);
> -
> -#if 0
> - fprintf(stderr, "cdw: %i\n", sctx->cs->cdw);
> - for (i = 0; i < sctx->cs->cdw; i++) {
> - fprintf(stderr, "%4i : 0x%08X\n", i, sctx->cs->buf[i]);
> - }
> -#endif
> + if (program->ir_type == PIPE_SHADER_IR_TGSI)
> + si_setup_tgsi_grid(sctx, info);
>
> - si_pm4_free_state(sctx, pm4, ~0);
> + si_emit_dispatch_packets(sctx, info);
>
> sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
> SI_CONTEXT_INV_VMEM_L1 |
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list