[Mesa-dev] [PATCH] r600g: make tgsi-to-llvm generates store.pixel* intrinsic for fs
Tom Stellard
tom at stellard.net
Wed Oct 10 14:04:00 PDT 2012
On Wed, Oct 10, 2012 at 08:20:57PM +0200, Vincent Lejeune wrote:
> ---
> src/gallium/drivers/r600/eg_asm.c | 17 +++++++
> src/gallium/drivers/r600/r600_asm.c | 17 +++++++
> src/gallium/drivers/r600/r600_asm.h | 2 +
> src/gallium/drivers/r600/r600_llvm.c | 66 ++++++++++++++++++++++---
> src/gallium/drivers/r600/r600_shader.c | 83 +++++++++++++++++++++++---------
> src/gallium/drivers/radeon/radeon_llvm.h | 3 ++
> 6 files changed, 157 insertions(+), 31 deletions(-)
>
One small coding style issue below, with that fix:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
> index 00ac4a8..69617d9 100644
> --- a/src/gallium/drivers/r600/eg_asm.c
> +++ b/src/gallium/drivers/r600/eg_asm.c
> @@ -144,3 +144,20 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
> }
> return 0;
> }
> +
> +void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
> +{
> + output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
> + output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
> + output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
> + output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
> +
> + output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
> + output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
> + output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
> + output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
> + output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
> + output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
> + output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
> + output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
> +}
> diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
> index 37e8bf0..7d4aa69 100644
> --- a/src/gallium/drivers/r600/r600_asm.c
> +++ b/src/gallium/drivers/r600/r600_asm.c
> @@ -2938,3 +2938,20 @@ void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint3
> G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
> }
> }
> +
> +void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
> +{
> + output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
> + output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
> + output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
> + output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
> +
> + output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
> + output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
> + output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
> + output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
> + output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
> + output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
> + output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
> + output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
> +}
> diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
> index d8f258f..8009d97 100644
> --- a/src/gallium/drivers/r600/r600_asm.h
> +++ b/src/gallium/drivers/r600/r600_asm.h
> @@ -244,5 +244,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
> void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf);
> int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id);
> void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
> +void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
> +void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
>
> #endif
> diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
> index c6e60af..85289d5 100644
> --- a/src/gallium/drivers/r600/r600_llvm.c
> +++ b/src/gallium/drivers/r600/r600_llvm.c
> @@ -151,6 +151,8 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
> struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
> struct lp_build_context * base = &bld_base->base;
> unsigned i;
> +
> + unsigned color_count = 0;
>
> /* Add the necessary export instructions */
> for (i = 0; i < ctx->output_reg_count; i++) {
> @@ -159,18 +161,66 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
> LLVMValueRef output;
> unsigned adjusted_reg_idx = i +
> ctx->reserved_reg_count;
> - LLVMValueRef reg_index = lp_build_const_int32(
> - base->gallivm,
> - radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
>
> output = LLVMBuildLoad(base->gallivm->builder,
> ctx->soa.outputs[i][chan], "");
>
> - lp_build_intrinsic_binary(
> - base->gallivm->builder,
> - "llvm.AMDGPU.store.output",
> - LLVMVoidTypeInContext(base->gallivm->context),
> - output, reg_index);
> + if (ctx->type == TGSI_PROCESSOR_VERTEX) {
> + LLVMValueRef reg_index = lp_build_const_int32(
> + base->gallivm,
> + radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
> + lp_build_intrinsic_binary(
> + base->gallivm->builder,
> + "llvm.AMDGPU.store.output",
> + LLVMVoidTypeInContext(base->gallivm->context),
> + output, reg_index);
> + } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
> + switch (ctx->r600_outputs[i].name) {
> + case TGSI_SEMANTIC_COLOR:
> + if ( color_count/4 < ctx->color_buffer_count) {
> + if (ctx->fs_color_all) {
> + for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
> + LLVMValueRef reg_index = lp_build_const_int32(
> + base->gallivm,
> + (j * 4) + chan);
> + lp_build_intrinsic_binary(
> + base->gallivm->builder,
> + "llvm.R600.store.pixel.color",
> + LLVMVoidTypeInContext(base->gallivm->context),
> + output, reg_index);
> + }
> + } else {
> + LLVMValueRef reg_index = lp_build_const_int32(
> + base->gallivm,
> + (color_count++/4) * 4 + chan);
> + lp_build_intrinsic_binary(
> + base->gallivm->builder,
> + "llvm.R600.store.pixel.color",
> + LLVMVoidTypeInContext(base->gallivm->context),
> + output, reg_index);
> + }
> + }
> + break;
> + case TGSI_SEMANTIC_POSITION:
> + if (chan != 2)
> + continue;
> + lp_build_intrinsic_unary(
> + base->gallivm->builder,
> + "llvm.R600.store.pixel.depth",
> + LLVMVoidTypeInContext(base->gallivm->context),
> + output);
> + break;
> + case TGSI_SEMANTIC_STENCIL:
> + if (chan != 1)
> + continue;
> + lp_build_intrinsic_unary(
> + base->gallivm->builder,
> + "llvm.R600.store.pixel.stencil",
> + LLVMVoidTypeInContext(base->gallivm->context),
> + output);
> + break;
> + }
> + }
> }
> }
> }
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index 0b45d4f..81745af 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -522,6 +522,21 @@ static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
> return bytes_read;
> }
>
> +static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx,
> + unsigned char * bytes, unsigned bytes_read)
> +{
> + struct r600_bytecode_output output;
> + memset(&output, 0, sizeof(struct r600_bytecode_output));
> + uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
> + uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
> + if (ctx->bc->chip_class >= EVERGREEN)
> + eg_bytecode_export_read(&output, word0,word1);
> + else
> + r600_bytecode_export_read(&output, word0,word1);
> + r600_bytecode_add_output(ctx->bc, &output);
> + return bytes_read;
> +}
> +
> static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
> unsigned char * bytes, unsigned num_bytes)
> {
> @@ -556,6 +571,10 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
> bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
> bytes_read);
> break;
> + case 5:
> + bytes_read = r600_export_from_byte_stream(ctx, bytes,
> + bytes_read);
> + break;
> default:
> /* XXX: Error here */
> break;
> @@ -1348,7 +1367,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
> radeon_llvm_ctx.two_side = shader->two_side;
> radeon_llvm_ctx.face_input = ctx.face_gpr;
> radeon_llvm_ctx.r600_inputs = ctx.shader->input;
> + radeon_llvm_ctx.r600_outputs = ctx.shader->output;
> + radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
> radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
> + radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN);
> mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
> if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
> dump = 1;
> @@ -1617,28 +1639,40 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
> j--;
> continue;
> }
> - output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
> - output[j].array_base = next_pixel_base++;
> - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
> + next_pixel_base++;
> shader->nr_ps_color_exports++;
> - if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) {
> - for (k = 1; k < key.nr_cbufs; k++) {
> - j++;
> - memset(&output[j], 0, sizeof(struct r600_bytecode_output));
> - output[j].gpr = shader->output[i].gpr;
> - output[j].elem_size = 3;
> - output[j].swizzle_x = 0;
> - output[j].swizzle_y = 1;
> - output[j].swizzle_z = 2;
> - output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
> - output[j].burst_count = 1;
> - output[j].barrier = 1;
> - output[j].array_base = next_pixel_base++;
> - output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
> - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
> - shader->nr_ps_color_exports++;
> + if (use_llvm) {
> + if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN) && key.nr_cbufs) {
> + shader->nr_ps_color_exports += key.nr_cbufs - 1;
> + next_pixel_base += key.nr_cbufs - 1;
> + } else {
> + continue;
> }
> + } else {
> + output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
> + output[j].array_base = next_pixel_base - 1;
> + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
> + if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) {
> + for (k = 1; k < key.nr_cbufs; k++) {
> + j++;
> + memset(&output[j], 0, sizeof(struct r600_bytecode_output));
> + output[j].gpr = shader->output[i].gpr;
> + output[j].elem_size = 3;
> + output[j].swizzle_x = 0;
> + output[j].swizzle_y = 1;
> + output[j].swizzle_z = 2;
> + output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
> + output[j].burst_count = 1;
> + output[j].barrier = 1;
> + output[j].array_base = next_pixel_base++;
> + output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
> + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
> + shader->nr_ps_color_exports++;
> + }
> + }
> }
> + } else if (use_llvm) {
> + continue;
> } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
> output[j].array_base = 61;
> output[j].swizzle_x = 2;
> @@ -1718,10 +1752,13 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
> }
> }
> /* add output to bytecode */
> - for (i = 0; i < noutput; i++) {
> - r = r600_bytecode_add_output(ctx.bc, &output[i]);
> - if (r)
> - goto out_err;
> + if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT ||
> + (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0)) {
> + for (i = 0; i < noutput; i++) {
> + r = r600_bytecode_add_output(ctx.bc, &output[i]);
> + if (r)
> + goto out_err;
> + }
The indentation looks off here.
> }
> /* add program end */
> if (ctx.bc->chip_class == CAYMAN)
> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
> index 6118b11..61975c4 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm.h
> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
> @@ -59,6 +59,9 @@ struct radeon_llvm_context {
> unsigned face_input;
> unsigned two_side;
> struct r600_shader_io * r600_inputs;
> + struct r600_shader_io * r600_outputs;
> + unsigned color_buffer_count;
> + unsigned fs_color_all;
>
> /*=== Front end configuration ===*/
>
> --
> 1.7.11.7
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list