[Mesa-dev] [PATCH 09/10] radeonsi/compute: Enable PIPE_SHADER_IR_NATIVE for compute shaders
Marek Olšák
maraeo at gmail.com
Wed Oct 8 02:22:42 PDT 2014
There already is a function called si_shader_create. I don't think it
would be nice to add si_create_shader. Can we choose a better naming
here? (for both functions if needed)
Marek
On Mon, Oct 6, 2014 at 9:44 PM, Tom Stellard <thomas.stellard at amd.com> wrote:
> ---
> src/gallium/drivers/radeonsi/si_compute.c | 51 +++++----------
> src/gallium/drivers/radeonsi/si_pipe.c | 2 +-
> src/gallium/drivers/radeonsi/si_shader.c | 104 ++++++++++++++++++------------
> src/gallium/drivers/radeonsi/si_shader.h | 7 ++
> 4 files changed, 88 insertions(+), 76 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
> index 490845b..a133380 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -23,14 +23,14 @@
> */
>
> #include "util/u_memory.h"
> +#include "radeon/r600_pipe_common.h"
> +#include "radeon/radeon_elf_util.h"
>
> #include "radeon/r600_cs.h"
> #include "si_pipe.h"
> #include "si_shader.h"
> #include "sid.h"
>
> -#include "radeon/radeon_llvm_util.h"
> -
> #define MAX_GLOBAL_BUFFERS 20
> #define NUM_USER_SGPRS 4
>
> @@ -40,14 +40,12 @@ struct si_compute {
> unsigned local_size;
> unsigned private_size;
> unsigned input_size;
> - unsigned num_kernels;
> - struct si_shader *kernels;
> + struct radeon_shader_binary binary;
> + struct si_shader program;
> unsigned num_user_sgprs;
>
> struct r600_resource *input_buffer;
> struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
> -
> - LLVMContextRef llvm_ctx;
> };
>
> static void *si_create_compute_state(
> @@ -57,10 +55,7 @@ static void *si_create_compute_state(
> struct si_context *sctx = (struct si_context *)ctx;
> struct si_compute *program = CALLOC_STRUCT(si_compute);
> const struct pipe_llvm_program_header *header;
> - const unsigned char *code;
> - unsigned i;
> -
> - program->llvm_ctx = LLVMContextCreate();
> + const char *code;
>
> header = cso->prog;
> code = cso->prog + sizeof(struct pipe_llvm_program_header);
> @@ -70,16 +65,9 @@ static void *si_create_compute_state(
> program->private_size = cso->req_private_mem;
> program->input_size = cso->req_input_mem;
>
> - program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, code,
> - header->num_bytes);
> - program->kernels = CALLOC(sizeof(struct si_shader),
> - program->num_kernels);
> - for (i = 0; i < program->num_kernels; i++) {
> - LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
> - code, header->num_bytes);
> - si_compile_llvm(sctx->screen, &program->kernels[i], mod);
> - LLVMDisposeModule(mod);
> - }
> + memset(&program->binary, 0, sizeof(program->binary));
> + radeon_elf_read(code, header->num_bytes, &program->binary, true);
> + si_create_shader(sctx->screen, &program->program, &program->binary);
>
> program->input_buffer = si_resource_create_custom(sctx->b.b.screen,
> PIPE_USAGE_IMMUTABLE, program->input_size);
> @@ -177,7 +165,7 @@ static void si_launch_grid(
> uint64_t shader_va;
> unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
> unsigned i;
> - struct si_shader *shader = &program->kernels[pc];
> + struct si_shader *shader = &program->program;
> unsigned lds_blocks;
> unsigned num_waves_for_scratch;
>
> @@ -194,6 +182,9 @@ static void si_launch_grid(
>
> pm4->compute_pkt = true;
>
> + /* Read the config informatio */
> + si_shader_binary_read_config(&program->binary, &program->program, pc);
> +
> /* Upload the kernel arguments */
>
> /* The extra num_work_size_bytes are for work group / work item size information */
> @@ -285,7 +276,7 @@ static void si_launch_grid(
> 0x190 /* Default value */);
> }
>
> - shader_va = shader->bo->gpu_address;
> + shader_va = shader->bo->gpu_address + pc;
> si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
> si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
> si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
> @@ -384,22 +375,12 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
> return;
> }
>
> - if (program->kernels) {
> - for (int i = 0; i < program->num_kernels; i++){
> - if (program->kernels[i].bo){
> - si_shader_destroy(ctx, &program->kernels[i]);
> - }
> - }
> - FREE(program->kernels);
> - }
> -
> - if (program->llvm_ctx){
> - LLVMContextDispose(program->llvm_ctx);
> - }
> pipe_resource_reference(
> (struct pipe_resource **)&program->input_buffer, NULL);
>
> - //And then free the program itself.
> + FREE(program->binary.code);
> + FREE(program->binary.config);
> + FREE(program->binary.rodata);
> FREE(program);
> }
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 2cce5cc..ad6f518 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -334,7 +334,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
> case PIPE_SHADER_COMPUTE:
> switch (param) {
> case PIPE_SHADER_CAP_PREFERRED_IR:
> - return PIPE_SHADER_IR_LLVM;
> + return PIPE_SHADER_IR_NATIVE;
> case PIPE_SHADER_CAP_DOUBLES:
> return 0; /* XXX: Enable doubles once the compiler can
> handle them. */
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 9d2cc80..401da1b 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -33,6 +33,7 @@
> #include "gallivm/lp_bld_arit.h"
> #include "gallivm/lp_bld_flow.h"
> #include "radeon/radeon_llvm.h"
> +#include "radeon/radeon_elf_util.h"
> #include "radeon/radeon_llvm_emit.h"
> #include "util/u_memory.h"
> #include "tgsi/tgsi_parse.h"
> @@ -2625,52 +2626,34 @@ static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
> }
> }
>
> -int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
> - LLVMModuleRef mod)
> +void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
> + struct si_shader *shader,
> + unsigned symbol_offset)
> {
> - unsigned r; /* llvm_compile result */
> unsigned i;
> - unsigned char *ptr;
> - struct radeon_shader_binary binary;
> - bool dump = r600_can_dump_shader(&sscreen->b,
> - shader->selector ? shader->selector->tokens : NULL);
> - const char * gpu_family = r600_get_llvm_processor_name(sscreen->b.family);
> - unsigned code_size;
> -
> - /* Use LLVM to compile shader */
> - memset(&binary, 0, sizeof(binary));
> - r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
> -
> - /* Output binary dump if rscreen->debug_flags are set */
> - if (dump && ! binary.disassembled) {
> - fprintf(stderr, "SI CODE:\n");
> - for (i = 0; i < binary.code_size; i+=4 ) {
> - fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3],
> - binary.code[i + 2], binary.code[i + 1],
> - binary.code[i]);
> - }
> - }
> + const unsigned char *config =
> + radeon_shader_binary_config_start(binary, symbol_offset);
>
> /* XXX: We may be able to emit some of these values directly rather than
> * extracting fields to be emitted later.
> */
> - /* Parse config data in compiled binary */
> - for (i = 0; i < binary.config_size; i+= 8) {
> - unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + i));
> - unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
> +
> + for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
> + unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
> + unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
> switch (reg) {
> case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
> case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
> case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
> case R_00B848_COMPUTE_PGM_RSRC1:
> - shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8;
> - shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4;
> + shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
> + shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
> break;
> case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
> - shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value);
> + shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
> break;
> case R_00B84C_COMPUTE_PGM_RSRC2:
> - shader->lds_size = G_00B84C_LDS_SIZE(value);
> + shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
> break;
> case R_0286CC_SPI_PS_INPUT_ENA:
> shader->spi_ps_input_ena = value;
> @@ -2686,9 +2669,32 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
> break;
> }
> }
> +}
> +
> +int si_create_shader(struct si_screen *sscreen,
> + struct si_shader *shader,
> + const struct radeon_shader_binary *binary)
> +{
> +
> + unsigned i;
> + unsigned code_size;
> + unsigned char *ptr;
> + bool dump = r600_can_dump_shader(&sscreen->b,
> + shader->selector ? shader->selector->tokens : NULL);
> +
> + if (dump && !binary->disassembled) {
> + fprintf(stderr, "SI CODE:\n");
> + for (i = 0; i < binary->code_size; i+=4 ) {
> + fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
> + binary->code[i + 2], binary->code[i + 1],
> + binary->code[i]);
> + }
> + }
> +
> + si_shader_binary_read_config(binary, shader, 0);
>
> /* copy new shader */
> - code_size = binary.code_size + binary.rodata_size;
> + code_size = binary->code_size + binary->rodata_size;
> r600_resource_reference(&shader->bo, NULL);
> shader->bo = si_resource_create_custom(&sscreen->b.b, PIPE_USAGE_IMMUTABLE,
> code_size);
> @@ -2696,19 +2702,37 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
> return -ENOMEM;
> }
>
> - ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_WRITE);
> - util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size);
> - if (binary.rodata_size > 0) {
> - ptr += binary.code_size;
> - util_memcpy_cpu_to_le32(ptr, binary.rodata, binary.rodata_size);
> +
> + ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_READ_WRITE);
> + util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
> + if (binary->rodata_size > 0) {
> + ptr += binary->code_size;
> + util_memcpy_cpu_to_le32(ptr, binary->rodata, binary->rodata_size);
> }
>
> sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
>
> - free(binary.code);
> - free(binary.config);
> - free(binary.rodata);
> + return 0;
> +}
> +
> +int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
> + LLVMModuleRef mod)
> +{
> + int r = 0;
> + struct radeon_shader_binary binary;
> + bool dump = r600_can_dump_shader(&sscreen->b,
> + shader->selector ? shader->selector->tokens : NULL);
> + memset(&binary, 0, sizeof(binary));
> + r = radeon_llvm_compile(mod, &binary,
> + r600_get_llvm_processor_name(sscreen->b.family), dump);
>
> + if (r) {
> + return r;
> + }
> + r = si_create_shader(sscreen, shader, &binary);
> + FREE(binary.code);
> + FREE(binary.config);
> + FREE(binary.rodata);
> return r;
> }
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index d8a63df..c616bc4 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -31,6 +31,8 @@
>
> #include <llvm-c/Core.h> /* LLVMModuleRef */
>
> +struct radeon_shader_binary;
> +
> #define SI_SGPR_CONST 0
> #define SI_SGPR_SAMPLER 2
> #define SI_SGPR_RESOURCE 4
> @@ -204,5 +206,10 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader);
> int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
> LLVMModuleRef mod);
> void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
> +int si_create_shader(struct si_screen *sscreen, struct si_shader *shader,
> + const struct radeon_shader_binary *binary);
> +void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
> + struct si_shader *shader,
> + unsigned symbol_offset);
>
> #endif
> --
> 1.8.5.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list