[Mesa-dev] [PATCH 2/2] r600g: make tgsi-to-llvm generates store.pixel* intrinsic for fs
Vincent Lejeune
vljn at ovi.com
Sun Oct 7 12:11:16 PDT 2012
---
src/gallium/drivers/r600/eg_asm.c | 17 ++++++++
src/gallium/drivers/r600/r600_asm.c | 17 ++++++++
src/gallium/drivers/r600/r600_asm.h | 2 +
src/gallium/drivers/r600/r600_llvm.c | 66 ++++++++++++++++++++++++++++----
src/gallium/drivers/r600/r600_shader.c | 53 +++++++++++++++++++++++--
src/gallium/drivers/radeon/radeon_llvm.h | 4 ++
6 files changed, 147 insertions(+), 12 deletions(-)
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 00ac4a8..69617d9 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -144,3 +144,20 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
}
return 0;
}
+
+void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
+{
+ output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
+ output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
+ output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
+ output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
+
+ output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
+ output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
+ output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
+ output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
+ output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
+ output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
+ output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
+ output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+}
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 63bd8e9..3a6bce0 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2939,3 +2939,20 @@ void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint3
G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
}
}
+
+void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
+{
+ output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
+ output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
+ output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
+ output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
+
+ output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
+ output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
+ output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
+ output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
+ output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
+ output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
+ output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
+ output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 403365b..6d57778 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -243,5 +243,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_context *rctx, struct r6
void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf);
int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id);
void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
+void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
+void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
#endif
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 71ea578..dddc867 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -115,6 +115,8 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct lp_build_context * base = &bld_base->base;
unsigned i;
+
+ unsigned color_count = 0;
/* Add the necessary export instructions */
for (i = 0; i < ctx->output_reg_count; i++) {
@@ -123,18 +125,66 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
LLVMValueRef output;
unsigned adjusted_reg_idx = i +
ctx->reserved_reg_count;
- LLVMValueRef reg_index = lp_build_const_int32(
- base->gallivm,
- radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
output = LLVMBuildLoad(base->gallivm->builder,
ctx->soa.outputs[i][chan], "");
- lp_build_intrinsic_binary(
- base->gallivm->builder,
- "llvm.AMDGPU.store.output",
- LLVMVoidTypeInContext(base->gallivm->context),
- output, reg_index);
+ if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+ LLVMValueRef reg_index = lp_build_const_int32(
+ base->gallivm,
+ radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
+ lp_build_intrinsic_binary(
+ base->gallivm->builder,
+ "llvm.AMDGPU.store.output",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ output, reg_index);
+ } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+ switch (ctx->r600_outputs[i].name) {
+ case TGSI_SEMANTIC_COLOR:
+ if ( color_count/4 < ctx->color_buffer_count + ctx->extra_buffer) {
+ if (ctx->fs_color_all) {
+ for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
+ LLVMValueRef reg_index = lp_build_const_int32(
+ base->gallivm,
+ (j * 4) + chan);
+ lp_build_intrinsic_binary(
+ base->gallivm->builder,
+ "llvm.R600.store.pixel.color",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ output, reg_index);
+ }
+ } else {
+ LLVMValueRef reg_index = lp_build_const_int32(
+ base->gallivm,
+ (color_count++/4) * 4 + chan);
+ lp_build_intrinsic_binary(
+ base->gallivm->builder,
+ "llvm.R600.store.pixel.color",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ output, reg_index);
+ }
+ }
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ if (chan != 2)
+ continue;
+ lp_build_intrinsic_unary(
+ base->gallivm->builder,
+ "llvm.R600.store.pixel.depth",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ output);
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ if (chan != 1)
+ continue;
+ lp_build_intrinsic_unary(
+ base->gallivm->builder,
+ "llvm.R600.store.pixel.stencil",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ output);
+ break;
+ }
+ }
}
}
}
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index bf4877a..56e25b5 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -522,6 +522,21 @@ static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
return bytes_read;
}
+static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx,
+ unsigned char * bytes, unsigned bytes_read)
+{
+ struct r600_bytecode_output output;
+ memset(&output, 0, sizeof(struct r600_bytecode_output));
+ uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
+ uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
+ if (ctx->bc->chip_class >= EVERGREEN)
+ eg_bytecode_export_read(&output, word0,word1);
+ else
+ r600_bytecode_export_read(&output, word0,word1);
+ r600_bytecode_add_output(ctx->bc, &output);
+ return bytes_read;
+}
+
static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
unsigned char * bytes, unsigned num_bytes)
{
@@ -556,6 +571,10 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
bytes_read);
break;
+ case 5:
+ bytes_read = r600_export_from_byte_stream(ctx, bytes,
+ bytes_read);
+ break;
default:
/* XXX: Error here */
break;
@@ -1336,7 +1355,11 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
radeon_llvm_ctx.two_side = shader->two_side;
radeon_llvm_ctx.face_input = ctx.face_gpr;
radeon_llvm_ctx.r600_inputs = ctx.shader->input;
+ radeon_llvm_ctx.r600_outputs = ctx.shader->output;
+ radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
+ radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN);
+ radeon_llvm_ctx.extra_buffer = key.dual_src_blend;
mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
dump = 1;
@@ -1598,6 +1621,24 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
}
break;
case TGSI_PROCESSOR_FRAGMENT:
+ if (use_llvm) {
+ if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
+ /* never export more colors than the number of CBs */
+ if (next_pixel_base && next_pixel_base >= key.nr_cbufs + key.dual_src_blend) {
+ /* skip export */
+ j--;
+ continue;
+ }
+ next_pixel_base++;
+ shader->nr_ps_color_exports++;
+ if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN) && key.nr_cbufs) {
+ shader->nr_ps_color_exports += key.nr_cbufs - 1;
+ next_pixel_base += key.nr_cbufs - 1;
+ }
+ } else {
+ continue;
+ }
+ } else {
if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
/* never export more colors than the number of CBs */
if (next_pixel_base && next_pixel_base >= key.nr_cbufs + key.dual_src_blend) {
@@ -1644,6 +1685,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
r = -EINVAL;
goto out_err;
}
+ }
break;
default:
R600_ERR("unsupported processor type %d\n", ctx.type);
@@ -1706,10 +1748,13 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
}
}
/* add output to bytecode */
- for (i = 0; i < noutput; i++) {
- r = r600_bytecode_add_output(ctx.bc, &output[i]);
- if (r)
- goto out_err;
+ if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT ||
+ (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0)) {
+ for (i = 0; i < noutput; i++) {
+ r = r600_bytecode_add_output(ctx.bc, &output[i]);
+ if (r)
+ goto out_err;
+ }
}
/* add program end */
if (ctx.bc->chip_class == CAYMAN)
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index 6118b11..948e8cf 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -59,6 +59,10 @@ struct radeon_llvm_context {
unsigned face_input;
unsigned two_side;
struct r600_shader_io * r600_inputs;
+ struct r600_shader_io * r600_outputs;
+ unsigned color_buffer_count;
+ unsigned extra_buffer;
+ unsigned fs_color_all;
/*=== Front end configuration ===*/
--
1.7.11.4
More information about the mesa-dev
mailing list