[Mesa-dev] [PATCH 3/3] r600g: tgsi-to-llvm generates interpolation intrinsics

Tom Stellard tom at stellard.net
Tue Sep 18 07:50:36 PDT 2012


On Tue, Sep 18, 2012 at 03:59:28PM +0200, Vincent Lejeune wrote:

I took a quick look at this and it seems OK to me, but I'm not really
familiar with the interpolation code, so it might be helpful to have
someone else look at it too.

You should make sure to test for regressions with an without the LLVM
compiler, because you modify some non-LLVM shader code with this patch.

-Tom

> ---
>  src/gallium/drivers/r600/r600_llvm.c     | 102 +++++++++++++++--
>  src/gallium/drivers/r600/r600_shader.c   | 191 +++++++++++++++----------------
>  src/gallium/drivers/r600/r600_shader.h   |  38 ++++++
>  src/gallium/drivers/radeon/radeon_llvm.h |   7 ++
>  4 files changed, 232 insertions(+), 106 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
> index e77758b..ea23ec8 100644
> --- a/src/gallium/drivers/r600/r600_llvm.c
> +++ b/src/gallium/drivers/r600/r600_llvm.c
> @@ -72,20 +72,106 @@ static void llvm_load_input(
>  	const struct tgsi_full_declaration *decl)
>  {
>  	unsigned chan;
> +	const char * intr_name;
>  
>  	for (chan = 0; chan < 4; chan++) {
>  		unsigned soa_index = radeon_llvm_reg_index_soa(input_index,
> -								chan);
> -
> +	                                               chan);
> +
> +		int reg_index = 0;
> +	
> +		if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
> +			if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
> +				intr_name = "llvm.AMDGPU.load.input.position";
> +				reg_index = ctx->reserved_reg_count * 4 + chan;
> +			} else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
> +				LLVMValueRef reg = lp_build_const_int32(
> +				ctx->soa.bld_base.base.gallivm,
> +				ctx->tgsi_inputs[input_index].gpr * 4);
> +				LLVMValueRef face = build_intrinsic(
> +				   ctx->soa.bld_base.base.gallivm->builder,
> +				   "llvm.AMDGPU.load.input.face",
> +				   LLVMInt1TypeInContext(ctx->soa.bld_base.base.gallivm->context),
> +				   &reg, 1,
> +				   LLVMReadNoneAttribute);
> +				LLVMValueRef one = lp_build_const_float(
> +				   ctx->soa.bld_base.base.gallivm,
> +				   1.0f);
> +				LLVMValueRef minus_one = lp_build_const_float(
> +				   ctx->soa.bld_base.base.gallivm,
> +				   -1.0f);
> +				ctx->inputs[soa_index] = LLVMBuildSelect(
> +				   ctx->soa.bld_base.base.gallivm->builder,
> +				   face,
> +				   one, minus_one, "");
> +				continue;
> +			} else {
> +				reg_index = ctx->first_lds ++;
> +				switch (decl->Interp.Interpolate) {
> +				case TGSI_INTERPOLATE_COLOR:
> +					intr_name = "llvm.AMDGPU.load.input.perspective";
> +					break;
> +				case TGSI_INTERPOLATE_CONSTANT:
> +					intr_name = "llvm.AMDGPU.load.input.constant";
> +					break;
> +				case TGSI_INTERPOLATE_LINEAR:
> +					intr_name = "llvm.AMDGPU.load.input.linear";
> +					break;
> +				case TGSI_INTERPOLATE_PERSPECTIVE:
> +					intr_name = "llvm.AMDGPU.load.input.perspective";
> +					break;
> +				default:
> +					fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
> +					return;
> +				}
> +			}
> +		} else {
> +			reg_index = ctx->reserved_reg_count * 4 + soa_index;
> +			intr_name = "llvm.R600.load.input";
> +		}
> +	
> +	
>  		/* The * 4 is assuming that we are in soa mode. */
>  		LLVMValueRef reg = lp_build_const_int32(
> -				ctx->soa.bld_base.base.gallivm,
> -				soa_index + (ctx->reserved_reg_count * 4));
> +		   ctx->soa.bld_base.base.gallivm,
> +		   reg_index);
> +	
>  		ctx->inputs[soa_index] = build_intrinsic(
> -				ctx->soa.bld_base.base.gallivm->builder,
> -				"llvm.R600.load.input",
> -				ctx->soa.bld_base.base.elem_type, &reg, 1,
> -				LLVMReadNoneAttribute);
> +		   ctx->soa.bld_base.base.gallivm->builder,
> +		   intr_name,
> +		   ctx->soa.bld_base.base.elem_type, &reg, 1,
> +		   LLVMReadNoneAttribute);
> +	}
> +   
> +	if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && ctx->two_side) {
> +		for (chan = 0; chan < 4; chan++) {
> +			unsigned soa_index = radeon_llvm_reg_index_soa(input_index,
> +			   chan);
> +			LLVMValueRef backcolor_reg = lp_build_const_int32(
> +			   ctx->soa.bld_base.base.gallivm,
> +			   ctx->last_lds * 4 + chan);
> +			LLVMValueRef backcolor = build_intrinsic(
> +			   ctx->soa.bld_base.base.gallivm->builder,
> +			   intr_name,
> +			   ctx->soa.bld_base.base.elem_type, &backcolor_reg, 1,
> +			   LLVMReadNoneAttribute);
> +			LLVMValueRef face_reg = lp_build_const_int32(
> +			   ctx->soa.bld_base.base.gallivm,
> +			   ctx->face_input * 4);
> +			LLVMValueRef face = build_intrinsic(
> +			   ctx->soa.bld_base.base.gallivm->builder,
> +			   "llvm.AMDGPU.load.input.face",
> +			   LLVMInt1TypeInContext(ctx->soa.bld_base.base.gallivm->context),
> +			   &face_reg, 1,
> +			   LLVMReadNoneAttribute);
> +			ctx->inputs[soa_index] = LLVMBuildSelect(
> +			   ctx->soa.bld_base.base.gallivm->builder,
> +			   face,
> +			   ctx->inputs[soa_index],
> +			   backcolor,
> +			   "");
> +		}
> +		ctx->last_lds++;
>  	}
>  }
>  
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index 3e79764..125c124 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -28,8 +28,6 @@
>  
>  #include "pipe/p_shader_tokens.h"
>  #include "tgsi/tgsi_info.h"
> -#include "tgsi/tgsi_parse.h"
> -#include "tgsi/tgsi_scan.h"
>  #include "tgsi/tgsi_dump.h"
>  #include "util/u_memory.h"
>  #include <stdio.h>
> @@ -164,42 +162,6 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader
>   */
>  struct r600_shader_tgsi_instruction;
>  
> -struct r600_shader_src {
> -	unsigned				sel;
> -	unsigned				swizzle[4];
> -	unsigned				neg;
> -	unsigned				abs;
> -	unsigned				rel;
> -	uint32_t				value[4];
> -};
> -
> -struct r600_shader_ctx {
> -	struct tgsi_shader_info			info;
> -	struct tgsi_parse_context		parse;
> -	const struct tgsi_token			*tokens;
> -	unsigned				type;
> -	unsigned				file_offset[TGSI_FILE_COUNT];
> -	unsigned				temp_reg;
> -	struct r600_shader_tgsi_instruction	*inst_info;
> -	struct r600_bytecode			*bc;
> -	struct r600_shader			*shader;
> -	struct r600_shader_src			src[4];
> -	uint32_t				*literals;
> -	uint32_t				nliterals;
> -	uint32_t				max_driver_temp_used;
> -	/* needed for evergreen interpolation */
> -	boolean                                 input_centroid;
> -	boolean                                 input_linear;
> -	boolean                                 input_perspective;
> -	int					num_interp_gpr;
> -	int					face_gpr;
> -	int					colors_used;
> -	boolean                 clip_vertex_write;
> -	unsigned                cv_output;
> -	int					fragcoord_input;
> -	int					native_integers;
> -};
> -
>  struct r600_shader_tgsi_instruction {
>  	unsigned	tgsi_opcode;
>  	unsigned	is_op3;
> @@ -317,6 +279,21 @@ static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
>  	alu.bank_swizzle_force = bytes[bytes_read++];
>  	alu.omod = bytes[bytes_read++];
>  	alu.index_mode = bytes[bytes_read++];
> +	
> +	if (alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY)
> +	{
> +	  alu.src[1].sel += 448;
> +	  alu.bank_swizzle_force = SQ_ALU_VEC_210;
> +	}
> +	
> +	if( alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW) {
> +  	   alu.src[1].sel += 448;
> +	   alu.bank_swizzle_force = SQ_ALU_VEC_210;
> +	}
> +	
> +	if (alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0) {
> +	   alu.src[0].sel += 448;
> +	}
>  
>  
>  	if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) ||
> @@ -741,16 +718,18 @@ static int r600_spi_sid(struct r600_shader_io * io)
>  };
>  
>  /* turn input into interpolate on EG */
> -static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
> +static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index, unsigned use_llvm)
>  {
>  	int r = 0;
>  
>  	if (ctx->shader->input[index].spi_sid) {
>  		ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
> -		if (ctx->shader->input[index].interpolate > 0) {
> -			r = evergreen_interp_alu(ctx, index);
> -		} else {
> -			r = evergreen_interp_flat(ctx, index);
> +		if (!use_llvm) {
> +			if (ctx->shader->input[index].interpolate > 0) {
> +				r = evergreen_interp_alu(ctx, index);
> +			} else {
> +				r = evergreen_interp_flat(ctx, index);
> +			}
>  		}
>  	}
>  	return r;
> @@ -785,7 +764,7 @@ static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back
>  	return 0;
>  }
>  
> -static int tgsi_declaration(struct r600_shader_ctx *ctx)
> +static int tgsi_declaration(struct r600_shader_ctx *ctx, unsigned use_llvm)
>  {
>  	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
>  	unsigned i;
> @@ -813,7 +792,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
>  				break;
>  			}
>  			if (ctx->bc->chip_class >= EVERGREEN) {
> -				if ((r = evergreen_interp_input(ctx, i)))
> +				if ((r = evergreen_interp_input(ctx, i, use_llvm)))
>  					return r;
>  			}
>  		}
> @@ -1119,7 +1098,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
>  	return 0;
>  }
>  
> -static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
> +static int process_twoside_color_inputs(struct r600_shader_ctx *ctx, unsigned use_llvm)
>  {
>  	int i, r, count = ctx->shader->ninput;
>  
> @@ -1146,15 +1125,17 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
>  			ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]);
>  			ctx->shader->input[ni].gpr = gpr++;
>  
> -			if (ctx->bc->chip_class >= EVERGREEN) {
> -				r = evergreen_interp_input(ctx, ni);
> +			if (!use_llvm) {
> +				if (ctx->bc->chip_class >= EVERGREEN) {
> +					r = evergreen_interp_input(ctx, ni, use_llvm);
> +					if (r)
> +						return r;
> +				}
> +
> +				r = select_twoside_color(ctx, i, ni);
>  				if (r)
>  					return r;
>  			}
> -
> -			r = select_twoside_color(ctx, i, ni);
> -			if (r)
> -				return r;
>  		}
>  	}
>  	return 0;
> @@ -1239,7 +1220,6 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
>  		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
>  	}
>  
> -	/* LLVM backend setup */
>  #ifdef R600_USE_LLVM
>  	if (use_llvm && ctx.info.indirect_files) {
>  		fprintf(stderr, "Warning: R600 LLVM backend does not support "
> @@ -1247,34 +1227,13 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
>  				"backend.\n");
>  		use_llvm = 0;
>  	}
> -	if (use_llvm) {
> -		struct radeon_llvm_context radeon_llvm_ctx;
> -		LLVMModuleRef mod;
> -		unsigned dump = 0;
> -		memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
> -		radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
> -		mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
> -		if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
> -			dump = 1;
> -		}
> -		if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
> -							rctx->family, dump)) {
> -			FREE(inst_bytes);
> -			radeon_llvm_dispose(&radeon_llvm_ctx);
> -			use_llvm = 0;
> -			fprintf(stderr, "R600 LLVM backend failed to compile "
> -				"shader.  Falling back to TGSI\n");
> -		} else {
> -			ctx.file_offset[TGSI_FILE_OUTPUT] =
> -					ctx.file_offset[TGSI_FILE_INPUT];
> -		}
> -		radeon_llvm_dispose(&radeon_llvm_ctx);
> -	}
>  #endif
> -	/* End of LLVM backend setup */
>  
> -	if (!use_llvm) {
> +	if (use_llvm) {
>  		ctx.file_offset[TGSI_FILE_OUTPUT] =
> +			ctx.file_offset[TGSI_FILE_INPUT];
> +	} else {
> +	   ctx.file_offset[TGSI_FILE_OUTPUT] =
>  			ctx.file_offset[TGSI_FILE_INPUT] +
>  			ctx.info.file_max[TGSI_FILE_INPUT] + 1;
>  	}
> @@ -1310,7 +1269,7 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
>  			ctx.nliterals++;
>  			break;
>  		case TGSI_TOKEN_TYPE_DECLARATION:
> -			r = tgsi_declaration(&ctx);
> +			r = tgsi_declaration(&ctx, use_llvm);
>  			if (r)
>  				goto out_err;
>  			break;
> @@ -1335,13 +1294,62 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
>  			goto out_err;
>  		}
>  	}
> +	
> +/* LLVM backend setup */
> +#ifdef R600_USE_LLVM
> +	if (use_llvm) {
> +		struct radeon_llvm_context radeon_llvm_ctx;
> +		LLVMModuleRef mod;
> +		unsigned dump = 0;
> +		memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
> +		radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
> +		radeon_llvm_ctx.type = ctx.type;
> +		radeon_llvm_ctx.two_side = shader->two_side;
> +		radeon_llvm_ctx.face_input = (ctx.face_gpr>=0)?ctx.face_gpr:(ctx.file_offset[TGSI_FILE_INPUT] +
> +		                                                              ctx.info.file_max[TGSI_FILE_INPUT] + 1);
> +		radeon_llvm_ctx.tgsi_inputs = ctx.shader->input;
> +		radeon_llvm_ctx.last_lds = ctx.shader->nlds;
> +		mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
> +		if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
> +			dump = 1;
> +		}
> +		if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
> +							rctx->family, dump)) {
> +			FREE(inst_bytes);
> +			radeon_llvm_dispose(&radeon_llvm_ctx);
> +			use_llvm = 0;
> +			fprintf(stderr, "R600 LLVM backend failed to compile "
> +				"shader.  Falling back to TGSI\n");
> +		} else {
> +			ctx.file_offset[TGSI_FILE_OUTPUT] =
> +					ctx.file_offset[TGSI_FILE_INPUT];
> +		}
> +		radeon_llvm_dispose(&radeon_llvm_ctx);
> +	}
> +#endif
> +/* End of LLVM backend setup */
>  
>  	if (shader->fs_write_all && rctx->chip_class >= EVERGREEN)
>  		shader->nr_ps_max_color_exports = 8;
>  
> -	if (ctx.fragcoord_input >= 0) {
> -		if (ctx.bc->chip_class == CAYMAN) {
> -			for (j = 0 ; j < 4; j++) {
> +	if (!use_llvm) {
> +		if (ctx.fragcoord_input >= 0) {
> +			if (ctx.bc->chip_class == CAYMAN) {
> +				for (j = 0 ; j < 4; j++) {
> +					struct r600_bytecode_alu alu;
> +					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +					alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
> +					alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
> +					alu.src[0].chan = 3;
> +
> +					alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
> +					alu.dst.chan = j;
> +					alu.dst.write = (j == 3);
> +					alu.last = 1;
> +					if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
> +						return r;
> +				}
> +			} else {
>  				struct r600_bytecode_alu alu;
>  				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>  				alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
> @@ -1349,30 +1357,17 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
>  				alu.src[0].chan = 3;
>  
>  				alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
> -				alu.dst.chan = j;
> -				alu.dst.write = (j == 3);
> +				alu.dst.chan = 3;
> +				alu.dst.write = 1;
>  				alu.last = 1;
>  				if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
>  					return r;
>  			}
> -		} else {
> -			struct r600_bytecode_alu alu;
> -			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> -			alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
> -			alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
> -			alu.src[0].chan = 3;
> -
> -			alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
> -			alu.dst.chan = 3;
> -			alu.dst.write = 1;
> -			alu.last = 1;
> -			if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
> -				return r;
>  		}
>  	}
>  
>  	if (shader->two_side && ctx.colors_used) {
> -		if ((r = process_twoside_color_inputs(&ctx)))
> +		if ((r = process_twoside_color_inputs(&ctx, use_llvm)))
>  			return r;
>  	}
>  
> diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
> index eb0bbf6..8f70241 100644
> --- a/src/gallium/drivers/r600/r600_shader.h
> +++ b/src/gallium/drivers/r600/r600_shader.h
> @@ -24,6 +24,44 @@
>  #define R600_SHADER_H
>  
>  #include "r600_asm.h"
> +#include "tgsi/tgsi_parse.h"
> +#include "tgsi/tgsi_scan.h"
> +
> +struct r600_shader_src {
> +	unsigned				sel;
> +	unsigned				swizzle[4];
> +	unsigned				neg;
> +	unsigned				abs;
> +	unsigned				rel;
> +	uint32_t				value[4];
> +};
> +
> +struct r600_shader_ctx {
> +	struct tgsi_shader_info			info;
> +	struct tgsi_parse_context		parse;
> +	const struct tgsi_token			*tokens;
> +	unsigned				type;
> +	unsigned				file_offset[TGSI_FILE_COUNT];
> +	unsigned				temp_reg;
> +	struct r600_shader_tgsi_instruction	*inst_info;
> +	struct r600_bytecode			*bc;
> +	struct r600_shader			*shader;
> +	struct r600_shader_src			src[4];
> +	uint32_t				*literals;
> +	uint32_t				nliterals;
> +	uint32_t				max_driver_temp_used;
> +	/* needed for evergreen interpolation */
> +	boolean                                 input_centroid;
> +	boolean                                 input_linear;
> +	boolean                                 input_perspective;
> +	int					num_interp_gpr;
> +	int					face_gpr;
> +	int					colors_used;
> +	boolean                 clip_vertex_write;
> +	unsigned                cv_output;
> +	int					fragcoord_input;
> +	int					native_integers;
> +};
>  
>  struct r600_shader_io {
>  	unsigned		name;
> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
> index 7a32bb0..d8ed85e 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm.h
> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
> @@ -53,6 +53,13 @@ struct radeon_llvm_loop {
>  struct radeon_llvm_context {
>  
>  	struct lp_build_tgsi_soa_context soa;
> +	unsigned type;
> +	unsigned fragcoord_input;
> +	unsigned face_input;
> +	unsigned two_side;
> +	unsigned last_lds;
> +	unsigned first_lds;
> +	struct r600_shader_io * tgsi_inputs;
>  
>  	/*=== Front end configuration ===*/
>  
> -- 
> 1.7.11.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list