[Mesa-dev] [PATCH 2/2] r600g: make tgsi-to-llvm generates store.pixel* intrinsic for fs

Tom Stellard tom at stellard.net
Tue Oct 9 07:11:57 PDT 2012


On Sun, Oct 07, 2012 at 09:11:16PM +0200, Vincent Lejeune wrote:
> ---
>  src/gallium/drivers/r600/eg_asm.c        | 17 ++++++++
>  src/gallium/drivers/r600/r600_asm.c      | 17 ++++++++
>  src/gallium/drivers/r600/r600_asm.h      |  2 +
>  src/gallium/drivers/r600/r600_llvm.c     | 66 ++++++++++++++++++++++++++++----
>  src/gallium/drivers/r600/r600_shader.c   | 53 +++++++++++++++++++++++--
>  src/gallium/drivers/radeon/radeon_llvm.h |  4 ++
>  6 files changed, 147 insertions(+), 12 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
> index 00ac4a8..69617d9 100644
> --- a/src/gallium/drivers/r600/eg_asm.c
> +++ b/src/gallium/drivers/r600/eg_asm.c
> @@ -144,3 +144,20 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
>  	}
>  	return 0;
>  }
> +
> +void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
> +{
> +	output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
> +	output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
> +	output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
> +	output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
> +
> +	output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
> +	output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
> +	output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
> +	output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
> +	output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
> +	output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
> +	output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
> +	output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
> +}
> diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
> index 63bd8e9..3a6bce0 100644
> --- a/src/gallium/drivers/r600/r600_asm.c
> +++ b/src/gallium/drivers/r600/r600_asm.c
> @@ -2939,3 +2939,20 @@ void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint3
>  			G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
>  	}
>  }
> +
> +void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
> +{
> +	output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
> +	output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
> +	output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
> +	output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
> +
> +	output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
> +	output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
> +	output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
> +	output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
> +	output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
> +	output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
> +	output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
> +	output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
> +}
> diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
> index 403365b..6d57778 100644
> --- a/src/gallium/drivers/r600/r600_asm.h
> +++ b/src/gallium/drivers/r600/r600_asm.h
> @@ -243,5 +243,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_context *rctx, struct r6
>  void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf);
>  int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id);
>  void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
> +void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
> +void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
>  
>  #endif
> diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
> index 71ea578..dddc867 100644
> --- a/src/gallium/drivers/r600/r600_llvm.c
> +++ b/src/gallium/drivers/r600/r600_llvm.c
> @@ -115,6 +115,8 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
>  	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
>  	struct lp_build_context * base = &bld_base->base;
>  	unsigned i;
> +	
> +	unsigned color_count = 0;
>  
>  	/* Add the necessary export instructions */
>  	for (i = 0; i < ctx->output_reg_count; i++) {
> @@ -123,18 +125,66 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
>  			LLVMValueRef output;
>  			unsigned adjusted_reg_idx = i +
>  					ctx->reserved_reg_count;
> -			LLVMValueRef reg_index = lp_build_const_int32(
> -				base->gallivm,
> -				radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
>  
>  			output = LLVMBuildLoad(base->gallivm->builder,
>  				ctx->soa.outputs[i][chan], "");
>  
> -			lp_build_intrinsic_binary(
> -				base->gallivm->builder,
> -				"llvm.AMDGPU.store.output",
> -				LLVMVoidTypeInContext(base->gallivm->context),
> -				output, reg_index);
> +			if (ctx->type == TGSI_PROCESSOR_VERTEX) {
> +				LLVMValueRef reg_index = lp_build_const_int32(
> +					base->gallivm,
> +					radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
> +				lp_build_intrinsic_binary(
> +					base->gallivm->builder,
> +					"llvm.AMDGPU.store.output",
> +					LLVMVoidTypeInContext(base->gallivm->context),
> +					output, reg_index);
> +			} else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
> +				switch (ctx->r600_outputs[i].name) {
> +				case TGSI_SEMANTIC_COLOR:
> +					if ( color_count/4 < ctx->color_buffer_count + ctx->extra_buffer) {
> +						if (ctx->fs_color_all) {
> +							for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
> +								LLVMValueRef reg_index = lp_build_const_int32(
> +									base->gallivm,
> +									(j * 4) + chan);
> +								lp_build_intrinsic_binary(
> +									base->gallivm->builder,
> +									"llvm.R600.store.pixel.color",
> +									LLVMVoidTypeInContext(base->gallivm->context),
> +									output, reg_index);
> +							}
> +						} else {
> +							LLVMValueRef reg_index = lp_build_const_int32(
> +								base->gallivm,
> +								(color_count++/4) * 4 + chan);
> +							lp_build_intrinsic_binary(
> +								base->gallivm->builder,
> +								"llvm.R600.store.pixel.color",
> +								LLVMVoidTypeInContext(base->gallivm->context),
> +								output, reg_index);
> +						}
> +					}
> +					break;
> +				case TGSI_SEMANTIC_POSITION:
> +					if (chan != 2)
> +						continue;
> +					lp_build_intrinsic_unary(
> +						base->gallivm->builder,
> +						"llvm.R600.store.pixel.depth",
> +						LLVMVoidTypeInContext(base->gallivm->context),
> +						output);
> +					break;
> +				case TGSI_SEMANTIC_STENCIL:
> +					if (chan != 1)
> +						continue;
> +					lp_build_intrinsic_unary(
> +						base->gallivm->builder,
> +						"llvm.R600.store.pixel.stencil",
> +						LLVMVoidTypeInContext(base->gallivm->context),
> +						output);
> +					break;
> +				}
> +			}
>  		}
>  	}
>  }
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index bf4877a..56e25b5 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -522,6 +522,21 @@ static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
>  	return bytes_read;
>  }
>  
> +static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx,
> +	unsigned char * bytes, unsigned bytes_read)
> +{
> +	struct r600_bytecode_output output;
> +	memset(&output, 0, sizeof(struct r600_bytecode_output));
> +	uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
> +	uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
> +	if (ctx->bc->chip_class >= EVERGREEN)
> +		eg_bytecode_export_read(&output, word0,word1);
> +	else
> +		r600_bytecode_export_read(&output, word0,word1);
> +	r600_bytecode_add_output(ctx->bc, &output);
> +	return bytes_read;
> +}
> +
>  static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
>  				unsigned char * bytes,	unsigned num_bytes)
>  {
> @@ -556,6 +571,10 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
>  			bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
>  								bytes_read);
>  			break;
> +		case 5:
> +            bytes_read = r600_export_from_byte_stream(ctx, bytes,
> +                                bytes_read);
> +            break;
>  		default:
>  			/* XXX: Error here */
>  			break;
> @@ -1336,7 +1355,11 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
>  		radeon_llvm_ctx.two_side = shader->two_side;
>  		radeon_llvm_ctx.face_input = ctx.face_gpr;
>  		radeon_llvm_ctx.r600_inputs = ctx.shader->input;
> +		radeon_llvm_ctx.r600_outputs = ctx.shader->output;
> +		radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
>  		radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
> +		radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN);
> +		radeon_llvm_ctx.extra_buffer = key.dual_src_blend;
>  		mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
>  		if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
>  			dump = 1;
> @@ -1598,6 +1621,24 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
>  			}
>  			break;
>  		case TGSI_PROCESSOR_FRAGMENT:

I think we can remove some duplicated code in the hunk below:

> +		if (use_llvm) {
> +			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
> +				/* never export more colors than the number of CBs */
> +				if (next_pixel_base && next_pixel_base >= key.nr_cbufs + key.dual_src_blend) {
> +					/* skip export */
> +					j--;
> +					continue;
> +				}
The if statement above looks the same for poth the llvm and non-llvm
path.
> +				next_pixel_base++;
> +				shader->nr_ps_color_exports++;

The shader->nr_ps_color_exports++ statement is also the same for the
llvm and non-llvm paths.

> +				if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN) && key.nr_cbufs) {
> +					shader->nr_ps_color_exports += key.nr_cbufs - 1;
> +					next_pixel_base += key.nr_cbufs - 1;
> +				}
> +			} else {
> +				continue;
> +			}
> +		} else {
>  			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
>  				/* never export more colors than the number of CBs */
>  				if (next_pixel_base && next_pixel_base >= key.nr_cbufs + key.dual_src_blend) {

You can move the if(use_llvm) check down a little more to reduce this
duplicated code, for example:

+		if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
+			/* never export more colors than the number of CBs */
+			if (next_pixel_base && next_pixel_base >= key.nr_cbufs + key.dual_src_blend) {
+				/* skip export */
+				j--;
+				continue;
+			}
+			shader->nr_ps_color_exports++;
+			if (use_llvm) {
+				next_pixel_base++;
+				if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN) && key.nr_cbufs) {
+					shader->nr_ps_color_exports += key.nr_cbufs - 1;
+					next_pixel_base += key.nr_cbufs - 1;
+				}
+			} else {
+				/* non-llvm code goes here */
+			}
+		} else {
+			continue;
+		}

> @@ -1644,6 +1685,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
>  				r = -EINVAL;
>  				goto out_err;
>  			}
> +		}
>  			break;
>  		default:
>  			R600_ERR("unsupported processor type %d\n", ctx.type);
> @@ -1706,10 +1748,13 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
>  		}
>  	}
>  	/* add output to bytecode */
> -	for (i = 0; i < noutput; i++) {
> -		r = r600_bytecode_add_output(ctx.bc, &output[i]);
> -		if (r)
> -			goto out_err;
> +	if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT ||
> +		(ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0)) {
> +		for (i = 0; i < noutput; i++) {
> +			r = r600_bytecode_add_output(ctx.bc, &output[i]);
> +			if (r)
> +				goto out_err;
> +		}
>  	}
>  	/* add program end */
>  	if (ctx.bc->chip_class == CAYMAN)
> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
> index 6118b11..948e8cf 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm.h
> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
> @@ -59,6 +59,10 @@ struct radeon_llvm_context {
>  	unsigned face_input;
>  	unsigned two_side;
>  	struct r600_shader_io * r600_inputs;
> +	struct r600_shader_io * r600_outputs;
> +	unsigned color_buffer_count;
> +	unsigned extra_buffer;
> +	unsigned fs_color_all;
>  
>  	/*=== Front end configuration ===*/
>  
> -- 
> 1.7.11.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list