[Mesa-dev] [PATCH 09/10] radeonsi/compute: Enable PIPE_SHADER_IR_NATIVE for compute shaders

Wed Oct 8 02:22:42 PDT 2014

There already is a function called si_shader_create. I don't think it
would be nice to add si_create_shader. Can we choose a better naming
here? (for both functions if needed)

Marek

On Mon, Oct 6, 2014 at 9:44 PM, Tom Stellard <thomas.stellard at amd.com> wrote:
> ---
>  src/gallium/drivers/radeonsi/si_compute.c |  51 +++++----------
>  src/gallium/drivers/radeonsi/si_pipe.c    |   2 +-
>  src/gallium/drivers/radeonsi/si_shader.c  | 104 ++++++++++++++++++------------
>  src/gallium/drivers/radeonsi/si_shader.h  |   7 ++
>  4 files changed, 88 insertions(+), 76 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
> index 490845b..a133380 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -23,14 +23,14 @@
>   */
>
>  #include "util/u_memory.h"
> +#include "radeon/r600_pipe_common.h"
> +#include "radeon/radeon_elf_util.h"
>
>  #include "radeon/r600_cs.h"
>  #include "si_pipe.h"
>  #include "si_shader.h"
>  #include "sid.h"
>
> -#include "radeon/radeon_llvm_util.h"
> -
>  #define MAX_GLOBAL_BUFFERS 20
>  #define NUM_USER_SGPRS 4
>
> @@ -40,14 +40,12 @@ struct si_compute {
>         unsigned local_size;
>         unsigned private_size;
>         unsigned input_size;
> -       unsigned num_kernels;
> -       struct si_shader *kernels;
> +       struct radeon_shader_binary binary;
> +       struct si_shader program;
>         unsigned num_user_sgprs;
>
>         struct r600_resource *input_buffer;
>         struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
> -
> -       LLVMContextRef llvm_ctx;
>  };
>
>  static void *si_create_compute_state(
> @@ -57,10 +55,7 @@ static void *si_create_compute_state(
>         struct si_context *sctx = (struct si_context *)ctx;
>         struct si_compute *program = CALLOC_STRUCT(si_compute);
>         const struct pipe_llvm_program_header *header;
> -       const unsigned char *code;
> -       unsigned i;
> -
> -       program->llvm_ctx = LLVMContextCreate();
> +       const char *code;
>
>         header = cso->prog;
>         code = cso->prog + sizeof(struct pipe_llvm_program_header);
> @@ -70,16 +65,9 @@ static void *si_create_compute_state(
>         program->private_size = cso->req_private_mem;
>         program->input_size = cso->req_input_mem;
>
> -       program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, code,
> -                                                       header->num_bytes);
> -       program->kernels = CALLOC(sizeof(struct si_shader),
> -                                                       program->num_kernels);
> -       for (i = 0; i < program->num_kernels; i++) {
> -               LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
> -                                                       code, header->num_bytes);
> -               si_compile_llvm(sctx->screen, &program->kernels[i], mod);
> -               LLVMDisposeModule(mod);
> -       }
> +       memset(&program->binary, 0, sizeof(program->binary));
> +       radeon_elf_read(code, header->num_bytes, &program->binary, true);
> +       si_create_shader(sctx->screen, &program->program, &program->binary);
>
>         program->input_buffer = si_resource_create_custom(sctx->b.b.screen,
>                 PIPE_USAGE_IMMUTABLE, program->input_size);
> @@ -177,7 +165,7 @@ static void si_launch_grid(
>         uint64_t shader_va;
>         unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
>         unsigned i;
> -       struct si_shader *shader = &program->kernels[pc];
> +       struct si_shader *shader = &program->program;
>         unsigned lds_blocks;
>         unsigned num_waves_for_scratch;
>
> @@ -194,6 +182,9 @@ static void si_launch_grid(
>
>         pm4->compute_pkt = true;
>
> +       /* Read the config informatio */
> +       si_shader_binary_read_config(&program->binary, &program->program, pc);
> +
>         /* Upload the kernel arguments */
>
>         /* The extra num_work_size_bytes are for work group / work item size information */
> @@ -285,7 +276,7 @@ static void si_launch_grid(
>                                                 0x190 /* Default value */);
>         }
>
> -       shader_va = shader->bo->gpu_address;
> +       shader_va = shader->bo->gpu_address + pc;
>         si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
>         si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
>         si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
> @@ -384,22 +375,12 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
>                 return;
>         }
>
> -       if (program->kernels) {
> -               for (int i = 0; i < program->num_kernels; i++){
> -                       if (program->kernels[i].bo){
> -                               si_shader_destroy(ctx, &program->kernels[i]);
> -                       }
> -               }
> -               FREE(program->kernels);
> -       }
> -
> -       if (program->llvm_ctx){
> -               LLVMContextDispose(program->llvm_ctx);
> -       }
>         pipe_resource_reference(
>                 (struct pipe_resource **)&program->input_buffer, NULL);
>
> -       //And then free the program itself.
> +       FREE(program->binary.code);
> +       FREE(program->binary.config);
> +       FREE(program->binary.rodata);
>         FREE(program);
>  }
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 2cce5cc..ad6f518 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -334,7 +334,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
>         case PIPE_SHADER_COMPUTE:
>                 switch (param) {
>                 case PIPE_SHADER_CAP_PREFERRED_IR:
> -                       return PIPE_SHADER_IR_LLVM;
> +                       return PIPE_SHADER_IR_NATIVE;
>                 case PIPE_SHADER_CAP_DOUBLES:
>                         return 0; /* XXX: Enable doubles once the compiler can
>                                      handle them. */
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 9d2cc80..401da1b 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -33,6 +33,7 @@
>  #include "gallivm/lp_bld_arit.h"
>  #include "gallivm/lp_bld_flow.h"
>  #include "radeon/radeon_llvm.h"
> +#include "radeon/radeon_elf_util.h"
>  #include "radeon/radeon_llvm_emit.h"
>  #include "util/u_memory.h"
>  #include "tgsi/tgsi_parse.h"
> @@ -2625,52 +2626,34 @@ static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
>         }
>  }
>
> -int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
> -                   LLVMModuleRef mod)
> +void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
> +                               struct si_shader *shader,
> +                               unsigned symbol_offset)
>  {
> -       unsigned r; /* llvm_compile result */
>         unsigned i;
> -       unsigned char *ptr;
> -       struct radeon_shader_binary binary;
> -       bool dump = r600_can_dump_shader(&sscreen->b,
> -                       shader->selector ? shader->selector->tokens : NULL);
> -       const char * gpu_family = r600_get_llvm_processor_name(sscreen->b.family);
> -       unsigned code_size;
> -
> -       /* Use LLVM to compile shader */
> -       memset(&binary, 0, sizeof(binary));
> -       r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
> -
> -       /* Output binary dump if rscreen->debug_flags are set */
> -       if (dump && ! binary.disassembled) {
> -               fprintf(stderr, "SI CODE:\n");
> -               for (i = 0; i < binary.code_size; i+=4 ) {
> -                       fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3],
> -                               binary.code[i + 2], binary.code[i + 1],
> -                               binary.code[i]);
> -               }
> -       }
> +       const unsigned char *config =
> +               radeon_shader_binary_config_start(binary, symbol_offset);
>
>         /* XXX: We may be able to emit some of these values directly rather than
>          * extracting fields to be emitted later.
>          */
> -       /* Parse config data in compiled binary */
> -       for (i = 0; i < binary.config_size; i+= 8) {
> -               unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + i));
> -               unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
> +
> +       for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
> +               unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
> +               unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
>                 switch (reg) {
>                 case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
>                 case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
>                 case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
>                 case R_00B848_COMPUTE_PGM_RSRC1:
> -                       shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8;
> -                       shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4;
> +                       shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
> +                       shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
>                         break;
>                 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
> -                       shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value);
> +                       shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
>                         break;
>                 case R_00B84C_COMPUTE_PGM_RSRC2:
> -                       shader->lds_size = G_00B84C_LDS_SIZE(value);
> +                       shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
>                         break;
>                 case R_0286CC_SPI_PS_INPUT_ENA:
>                         shader->spi_ps_input_ena = value;
> @@ -2686,9 +2669,32 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
>                         break;
>                 }
>         }
> +}
> +
> +int si_create_shader(struct si_screen *sscreen,
> +               struct si_shader *shader,
> +               const struct radeon_shader_binary *binary)
> +{
> +
> +       unsigned i;
> +       unsigned code_size;
> +       unsigned char *ptr;
> +       bool dump  = r600_can_dump_shader(&sscreen->b,
> +               shader->selector ? shader->selector->tokens : NULL);
> +
> +       if (dump && !binary->disassembled) {
> +               fprintf(stderr, "SI CODE:\n");
> +               for (i = 0; i < binary->code_size; i+=4 ) {
> +                       fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
> +                               binary->code[i + 2], binary->code[i + 1],
> +                               binary->code[i]);
> +               }
> +       }
> +
> +       si_shader_binary_read_config(binary, shader, 0);
>
>         /* copy new shader */
> -       code_size = binary.code_size + binary.rodata_size;
> +       code_size = binary->code_size + binary->rodata_size;
>         r600_resource_reference(&shader->bo, NULL);
>         shader->bo = si_resource_create_custom(&sscreen->b.b, PIPE_USAGE_IMMUTABLE,
>                                                code_size);
> @@ -2696,19 +2702,37 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
>                 return -ENOMEM;
>         }
>
> -       ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_WRITE);
> -       util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size);
> -       if (binary.rodata_size > 0) {
> -               ptr += binary.code_size;
> -               util_memcpy_cpu_to_le32(ptr, binary.rodata, binary.rodata_size);
> +
> +       ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_READ_WRITE);
> +       util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
> +       if (binary->rodata_size > 0) {
> +               ptr += binary->code_size;
> +               util_memcpy_cpu_to_le32(ptr, binary->rodata, binary->rodata_size);
>         }
>
>         sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
>
> -       free(binary.code);
> -       free(binary.config);
> -       free(binary.rodata);
> +       return 0;
> +}
> +
> +int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
> +                                                       LLVMModuleRef mod)
> +{
> +       int r = 0;
> +       struct radeon_shader_binary binary;
> +       bool dump = r600_can_dump_shader(&sscreen->b,
> +                       shader->selector ? shader->selector->tokens : NULL);
> +       memset(&binary, 0, sizeof(binary));
> +       r = radeon_llvm_compile(mod, &binary,
> +               r600_get_llvm_processor_name(sscreen->b.family), dump);
>
> +       if (r) {
> +               return r;
> +       }
> +       r = si_create_shader(sscreen, shader, &binary);
> +       FREE(binary.code);
> +       FREE(binary.config);
> +       FREE(binary.rodata);
>         return r;
>  }
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index d8a63df..c616bc4 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -31,6 +31,8 @@
>
>  #include <llvm-c/Core.h> /* LLVMModuleRef */
>
> +struct radeon_shader_binary;
> +
>  #define SI_SGPR_CONST          0
>  #define SI_SGPR_SAMPLER                2
>  #define SI_SGPR_RESOURCE       4
> @@ -204,5 +206,10 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader);
>  int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
>                     LLVMModuleRef mod);
>  void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
> +int si_create_shader(struct si_screen *sscreen, struct si_shader *shader,
> +               const struct radeon_shader_binary *binary);
> +void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
> +                               struct si_shader *shader,
> +                               unsigned symbol_offset);
>
>  #endif
> --
> 1.8.5.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev