[Mesa-dev] [PATCH 2/3] r600g/compute: Enable PIPE_SHADER_IR_NATIVE for compute shaders v2
Tom Stellard
thomas.stellard at amd.com
Thu Oct 16 16:41:08 PDT 2014
v2:
- Drop dependency on LLVM >= 3.5.1
---
src/gallium/drivers/r600/evergreen_compute.c | 167 +++++++++++++--------
.../drivers/r600/evergreen_compute_internal.h | 12 +-
src/gallium/drivers/r600/r600_llvm.c | 71 ++++++---
src/gallium/drivers/r600/r600_llvm.h | 10 ++
src/gallium/drivers/r600/r600_pipe.c | 4 +
src/gallium/drivers/r600/r600_pipe.h | 1 +
src/gallium/drivers/radeon/radeon_llvm_util.c | 6 +-
src/gallium/drivers/radeon/radeon_llvm_util.h | 6 +-
8 files changed, 180 insertions(+), 97 deletions(-)
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 38b78c7..7a17d1e 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -49,6 +49,7 @@
#ifdef HAVE_OPENCL
#include "radeon_llvm_util.h"
#endif
+#include "radeon_elf_util.h"
#include <inttypes.h>
/**
@@ -198,18 +199,42 @@ void *evergreen_create_compute_state(
{
struct r600_context *ctx = (struct r600_context *)ctx_;
struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute);
-
-#ifdef HAVE_OPENCL
const struct pipe_llvm_program_header * header;
- const unsigned char * code;
- unsigned i;
-
- shader->llvm_ctx = LLVMContextCreate();
+ const char *code;
+ void *p;
+ boolean use_kill;
COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n");
-
header = cso->prog;
code = cso->prog + sizeof(struct pipe_llvm_program_header);
+#if HAVE_LLVM < 0x0306
+#ifdef HAVE_OPENCL
+ (void)use_kill;
+ (void)p;
+ shader->llvm_ctx = LLVMContextCreate();
+ shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx,
+ code, header->num_bytes);
+ shader->kernels = CALLOC(sizeof(struct r600_kernel),
+ shader->num_kernels);
+ {
+ unsigned i;
+ for (i = 0; i < shader->num_kernels; i++) {
+ struct r600_kernel *kernel = &shader->kernels[i];
+ kernel->llvm_module = radeon_llvm_get_kernel_module(
+ shader->llvm_ctx, i, code, header->num_bytes);
+ }
+ }
+#endif
+#else
+ memset(&shader->binary, 0, sizeof(shader->binary));
+ radeon_elf_read(code, header->num_bytes, &shader->binary, true);
+ r600_create_shader(&shader->bc, &shader->binary, &use_kill);
+
+ shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
+ shader->bc.ndw * 4);
+ p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
+ memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
+ ctx->b.ws->buffer_unmap(shader->code_bo->cs_buf);
#endif
shader->ctx = (struct r600_context*)ctx;
@@ -217,17 +242,6 @@ void *evergreen_create_compute_state(
shader->private_size = cso->req_private_mem;
shader->input_size = cso->req_input_mem;
-#ifdef HAVE_OPENCL
- shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, code,
- header->num_bytes);
- shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels);
-
- for (i = 0; i < shader->num_kernels; i++) {
- struct r600_kernel *kernel = &shader->kernels[i];
- kernel->llvm_module = radeon_llvm_get_kernel_module(shader->llvm_ctx, i,
- code, header->num_bytes);
- }
-#endif
return shader;
}
@@ -238,14 +252,6 @@ void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
if (!shader)
return;
- FREE(shader->kernels);
-
-#ifdef HAVE_OPENCL
- if (shader->llvm_ctx){
- LLVMContextDispose(shader->llvm_ctx);
- }
-#endif
-
FREE(shader);
}
@@ -347,7 +353,13 @@ static void evergreen_emit_direct_dispatch(
unsigned wave_divisor = (16 * num_pipes);
int group_size = 1;
int grid_size = 1;
- unsigned lds_size = shader->local_size / 4 + shader->active_kernel->bc.nlds_dw;
+ unsigned lds_size = shader->local_size / 4 +
+#if HAVE_LLVM < 0x0306
+ shader->active_kernel->bc.nlds_dw;
+#else
+ shader->bc.nlds_dw;
+#endif
+
/* Calculate group_size/grid_size */
for (i = 0; i < 3; i++) {
@@ -520,19 +532,34 @@ void evergreen_emit_cs_shader(
struct r600_cs_shader_state *state =
(struct r600_cs_shader_state*)atom;
struct r600_pipe_compute *shader = state->shader;
- struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ uint64_t va;
+ struct r600_resource *code_bo;
+ unsigned ngpr, nstack;
+
+#if HAVE_LLVM < 0x0306
+ struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
+ code_bo = kernel->code_bo;
+ va = kernel->code_bo->gpu_address;
+ ngpr = kernel->bc.ngpr;
+ nstack = kernel->bc.nstack;
+#else
+ code_bo = shader->code_bo;
+ va = shader->code_bo->gpu_address + state->pc;
+ ngpr = shader->bc.ngpr;
+ nstack = shader->bc.nstack;
+#endif
r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
- radeon_emit(cs, kernel->code_bo->gpu_address >> 8); /* R_0288D0_SQ_PGM_START_LS */
+ radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
radeon_emit(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */
- S_0288D4_NUM_GPRS(kernel->bc.ngpr)
- | S_0288D4_STACK_SIZE(kernel->bc.nstack));
+ S_0288D4_NUM_GPRS(ngpr)
+ | S_0288D4_STACK_SIZE(nstack));
radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
- kernel->code_bo, RADEON_USAGE_READ,
+ code_bo, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA));
}
@@ -542,46 +569,54 @@ static void evergreen_launch_grid(
uint32_t pc, const void *input)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
-
struct r600_pipe_compute *shader = ctx->cs_shader_state.shader;
- struct r600_kernel *kernel = &shader->kernels[pc];
-
- COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
+ boolean use_kill;
+#if HAVE_LLVM < 0x0306
#ifdef HAVE_OPENCL
-
- if (!kernel->code_bo) {
- void *p;
- struct r600_bytecode *bc = &kernel->bc;
- LLVMModuleRef mod = kernel->llvm_module;
- boolean use_kill = false;
- bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0;
- unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS;
- unsigned sb_disasm = use_sb ||
- (ctx->screen->b.debug_flags & DBG_SB_DISASM);
-
- r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family,
- ctx->screen->has_compressed_msaa_texturing);
- bc->type = TGSI_PROCESSOR_COMPUTE;
- bc->isa = ctx->isa;
- r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump);
-
- if (dump && !sb_disasm) {
- r600_bytecode_disasm(bc);
- } else if ((dump && sb_disasm) || use_sb) {
- if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb))
- R600_ERR("r600_sb_bytecode_process failed!\n");
- }
-
- kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
- kernel->bc.ndw * 4);
- p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE);
- memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
- ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf);
- }
+ struct r600_kernel *kernel = &shader->kernels[pc];
+ (void)use_kill;
+ if (!kernel->code_bo) {
+ void *p;
+ struct r600_bytecode *bc = &kernel->bc;
+ LLVMModuleRef mod = kernel->llvm_module;
+ boolean use_kill = false;
+ bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0;
+ unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS;
+ unsigned sb_disasm = use_sb ||
+ (ctx->screen->b.debug_flags & DBG_SB_DISASM);
+
+ r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family,
+ ctx->screen->has_compressed_msaa_texturing);
+ bc->type = TGSI_PROCESSOR_COMPUTE;
+ bc->isa = ctx->isa;
+ r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump);
+
+ if (dump && !sb_disasm) {
+ r600_bytecode_disasm(bc);
+ } else if ((dump && sb_disasm) || use_sb) {
+ if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb))
+ R600_ERR("r600_sb_bytecode_process failed!\n");
+ }
+
+ kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
+ kernel->bc.ndw * 4);
+ p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE);
+ memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
+ ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf);
+ }
#endif
shader->active_kernel = kernel;
ctx->cs_shader_state.kernel_index = pc;
+#else
+ ctx->cs_shader_state.pc = pc;
+ /* Get the config information for this kernel. */
+ r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill);
+#endif
+
+ COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
+
+
evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
compute_emit_cs(ctx, block_layout, grid_layout);
}
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h
index 0929d8d..95593dd 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.h
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.h
@@ -27,6 +27,8 @@
#include "r600_asm.h"
+#if HAVE_LLVM < 0x0306
+
struct r600_kernel {
unsigned count;
#ifdef HAVE_OPENCL
@@ -36,13 +38,21 @@ struct r600_kernel {
struct r600_bytecode bc;
};
+#endif
+
struct r600_pipe_compute {
struct r600_context *ctx;
+#if HAVE_LLVM < 0x0306
unsigned num_kernels;
struct r600_kernel *kernels;
-
struct r600_kernel *active_kernel;
+#endif
+
+ struct radeon_shader_binary binary;
+ struct r600_resource *code_bo;
+ struct r600_bytecode bc;
+
unsigned local_size;
unsigned private_size;
unsigned input_size;
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 7661419..c19693a 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -13,8 +13,9 @@
#include "r600_opcodes.h"
#include "r600_shader.h"
#include "r600_pipe.h"
-#include "radeon/radeon_llvm.h"
-#include "radeon/radeon_llvm_emit.h"
+#include "radeon_llvm.h"
+#include "radeon_llvm_emit.h"
+#include "radeon_elf_util.h"
#include <stdio.h>
@@ -818,31 +819,20 @@ LLVMModuleRef r600_tgsi_llvm(
#define R_028868_SQ_PGM_RESOURCES_VS 0x028868
#define R_028850_SQ_PGM_RESOURCES_PS 0x028850
-unsigned r600_llvm_compile(
- LLVMModuleRef mod,
- enum radeon_family family,
- struct r600_bytecode *bc,
- boolean *use_kill,
- unsigned dump)
+void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
+ struct r600_bytecode *bc,
+ uint64_t symbol_offset,
+ boolean *use_kill)
{
- unsigned r;
- struct radeon_shader_binary binary;
- const char * gpu_family = r600_get_llvm_processor_name(family);
unsigned i;
+ const unsigned char *config =
+ radeon_shader_binary_config_start(binary, symbol_offset);
- memset(&binary, 0, sizeof(struct radeon_shader_binary));
- r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
-
- assert(binary.code_size % 4 == 0);
- bc->bytecode = CALLOC(1, binary.code_size);
- memcpy(bc->bytecode, binary.code, binary.code_size);
- bc->ndw = binary.code_size / 4;
-
- for (i = 0; i < binary.config_size; i+= 8) {
+ for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
unsigned reg =
- util_le32_to_cpu(*(uint32_t*)(binary.config + i));
+ util_le32_to_cpu(*(uint32_t*)(config + i));
unsigned value =
- util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
+ util_le32_to_cpu(*(uint32_t*)(config + i + 4));
switch (reg) {
/* R600 / R700 */
case R_028850_SQ_PGM_RESOURCES_PS:
@@ -851,8 +841,8 @@ unsigned r600_llvm_compile(
case R_028844_SQ_PGM_RESOURCES_PS:
case R_028860_SQ_PGM_RESOURCES_VS:
case R_0288D4_SQ_PGM_RESOURCES_LS:
- bc->ngpr = G_028844_NUM_GPRS(value);
- bc->nstack = G_028844_STACK_SIZE(value);
+ bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value));
+ bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value));
break;
case R_02880C_DB_SHADER_CONTROL:
*use_kill = G_02880C_KILL_ENABLE(value);
@@ -863,6 +853,39 @@ unsigned r600_llvm_compile(
}
}
+}
+
+unsigned r600_create_shader(struct r600_bytecode *bc,
+ const struct radeon_shader_binary *binary,
+ boolean *use_kill)
+
+{
+ assert(binary->code_size % 4 == 0);
+ bc->bytecode = CALLOC(1, binary->code_size);
+ memcpy(bc->bytecode, binary->code, binary->code_size);
+ bc->ndw = binary->code_size / 4;
+
+ r600_shader_binary_read_config(binary, bc, 0, use_kill);
+
+ return 0;
+}
+
+unsigned r600_llvm_compile(
+ LLVMModuleRef mod,
+ enum radeon_family family,
+ struct r600_bytecode *bc,
+ boolean *use_kill,
+ unsigned dump)
+{
+ unsigned r;
+ struct radeon_shader_binary binary;
+ const char * gpu_family = r600_get_llvm_processor_name(family);
+
+ memset(&binary, 0, sizeof(struct radeon_shader_binary));
+ r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
+
+ r = r600_create_shader(bc, &binary, use_kill);
+
FREE(binary.code);
FREE(binary.config);
diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h
index 3840a5a..9b5304d 100644
--- a/src/gallium/drivers/r600/r600_llvm.h
+++ b/src/gallium/drivers/r600/r600_llvm.h
@@ -10,6 +10,7 @@
struct r600_bytecode;
struct r600_shader_ctx;
struct radeon_llvm_context;
+struct radeon_shader_binary;
enum radeon_family;
LLVMModuleRef r600_tgsi_llvm(
@@ -23,6 +24,15 @@ unsigned r600_llvm_compile(
boolean *use_kill,
unsigned dump);
+unsigned r600_create_shader(struct r600_bytecode *bc,
+ const struct radeon_shader_binary *binary,
+ boolean *use_kill);
+
+void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
+ struct r600_bytecode *bc,
+ uint64_t symbol_offset,
+ boolean *use_kill);
+
#endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */
#endif /* R600_LLVM_H */
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 3962fee..197fa42 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -469,7 +469,11 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
if (shader == PIPE_SHADER_COMPUTE) {
+#if HAVE_LLVM < 0x0306
return PIPE_SHADER_IR_LLVM;
+#else
+ return PIPE_SHADER_IR_NATIVE;
+#endif
} else {
return PIPE_SHADER_IR_TGSI;
}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index fa9d34b..40b0328 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -146,6 +146,7 @@ struct r600_clip_state {
struct r600_cs_shader_state {
struct r600_atom atom;
unsigned kernel_index;
+ unsigned pc;
struct r600_pipe_compute *shader;
};
diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c
index ec11559..0dfd9ad 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_util.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
@@ -34,7 +34,7 @@
#include <llvm-c/Transforms/PassManagerBuilder.h>
LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
- const unsigned char * bitcode, unsigned bitcode_len)
+ const char * bitcode, unsigned bitcode_len)
{
LLVMMemoryBufferRef buf;
LLVMModuleRef module;
@@ -47,7 +47,7 @@ LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
}
unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
- const unsigned char *bitcode, unsigned bitcode_len)
+ const char *bitcode, unsigned bitcode_len)
{
LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
@@ -88,7 +88,7 @@ static void radeon_llvm_optimize(LLVMModuleRef mod)
}
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
- const unsigned char *bitcode, unsigned bitcode_len)
+ const char *bitcode, unsigned bitcode_len)
{
LLVMModuleRef mod;
unsigned num_kernels;
diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.h b/src/gallium/drivers/radeon/radeon_llvm_util.h
index 733c329..cc1932a 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_util.h
+++ b/src/gallium/drivers/radeon/radeon_llvm_util.h
@@ -30,10 +30,10 @@
#include <llvm-c/Core.h>
LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
- const unsigned char * bitcode, unsigned bitcode_len);
+ const char * bitcode, unsigned bitcode_len);
unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
- const unsigned char *bitcode, unsigned bitcode_len);
+ const char *bitcode, unsigned bitcode_len);
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
- const unsigned char *bitcode, unsigned bitcode_len);
+ const char *bitcode, unsigned bitcode_len);
#endif
--
1.8.5.5
More information about the mesa-dev
mailing list