[Mesa-dev] [PATCH] r600g: do all CUBE ALU operations before gradient texture operations (v2)

Glenn Kennard glenn.kennard at gmail.com
Sun Nov 23 18:18:41 PST 2014


On Mon, 24 Nov 2014 01:32:36 +0100, Dave Airlie <airlied at gmail.com> wrote:

> From: Dave Airlie <airlied at redhat.com>
>
> This moves all the CUBE section above the gradients section,
> it also fixes a bug with gradient handling in the bytecode handling
> where the gradient dst register should be ignored but isn't
> causing the subsequent sample_g to fail into a second tex clause.
>

Bytecode handling is not affected by v2 of this patch, might want to  
update the description

> v2: avoid changes to bytecode by using spare temps
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/gallium/drivers/r600/r600_shader.c | 136  
> +++++++++++++++++----------------
>  1 file changed, 72 insertions(+), 64 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_shader.c  
> b/src/gallium/drivers/r600/r600_shader.c
> index 41caac3..e7b39be 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -5095,6 +5095,14 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
>  	    inst->Instruction.Opcode == TGSI_OPCODE_TG4)
>  		sampler_src_reg = 2;
> +	/* TGSI moves the sampler to src reg 3 for TXD */
> +	if (inst->Instruction.Opcode == TGSI_OPCODE_TXD)
> +		sampler_src_reg = 3;
> +
> +	sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ?  
> 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
> +	if (sampler_index_mode)
> +		ctx->shader->uses_index_registers = true;
> +
>  	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
> 	if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
> @@ -5109,67 +5117,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
>  		}
>  	}
> -	if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
> -		int temp_h, temp_v;
> -		/* TGSI moves the sampler to src reg 3 for TXD */
> -		sampler_src_reg = 3;
> -
> -		sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ?  
> 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
> -
> -		src_loaded = TRUE;
> -		for (i = 0; i < 3; i++) {
> -			int treg = r600_get_temp(ctx);
> -
> -			if (i == 0)
> -				src_gpr = treg;
> -			else if (i == 1)
> -				temp_h = treg;
> -			else
> -				temp_v = treg;
> -
> -			for (j = 0; j < 4; j++) {
> -				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> -				alu.op = ALU_OP1_MOV;
> -                                r600_bytecode_src(&alu.src[0],  
> &ctx->src[i], j);
> -                                alu.dst.sel = treg;
> -                                alu.dst.chan = j;
> -                                if (j == 3)
> -                                   alu.last = 1;
> -                                alu.dst.write = 1;
> -                                r = r600_bytecode_add_alu(ctx->bc,  
> &alu);
> -                                if (r)
> -                                    return r;
> -			}
> -		}
> -		for (i = 1; i < 3; i++) {
> -			/* set gradients h/v */
> -			memset(&tex, 0, sizeof(struct r600_bytecode_tex));
> -			tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H :
> -				FETCH_OP_SET_GRADIENTS_V;
> -			tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
> -			tex.sampler_index_mode = sampler_index_mode;
> -			tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
> -			tex.resource_index_mode = sampler_index_mode;
> -
> -			tex.src_gpr = (i == 1) ? temp_h : temp_v;
> -			tex.src_sel_x = 0;
> -			tex.src_sel_y = 1;
> -			tex.src_sel_z = 2;
> -			tex.src_sel_w = 3;
> -
> -			tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm  
> scheduler */
> -			tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
> -			if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
> -				tex.coord_type_x = 1;
> -				tex.coord_type_y = 1;
> -				tex.coord_type_z = 1;
> -				tex.coord_type_w = 1;
> -			}
> -			r = r600_bytecode_add_tex(ctx->bc, &tex);
> -			if (r)
> -				return r;
> -		}
> -	} else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
> +	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
>  		int out_chan;
>  		/* Add perspective divide */
>  		if (ctx->bc->chip_class == CAYMAN) {
> @@ -5233,9 +5181,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
>  		src_gpr = ctx->temp_reg;
>  	}
> -	sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ?  
> 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
> -	if (sampler_index_mode)
> -		ctx->shader->uses_index_registers = true;
> 	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
>  	     inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
> @@ -5454,6 +5399,69 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
>  		src_gpr = ctx->temp_reg;
>  	}
> +	if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
> +		int temp_h, temp_v;
> +		int start_val = 0;
> +
> +		/* if we've already loaded the src (i.e. CUBE don't reload it). */
> +		if (src_loaded == TRUE)
> +			start_val = 1;
> +		else
> +			src_loaded = TRUE;
> +		for (i = start_val; i < 3; i++) {
> +			int treg = r600_get_temp(ctx);
> +
> +			if (i == 0)
> +				src_gpr = treg;
> +			else if (i == 1)
> +				temp_h = treg;
> +			else
> +				temp_v = treg;
> +
> +			for (j = 0; j < 4; j++) {
> +				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +				alu.op = ALU_OP1_MOV;
> +                                r600_bytecode_src(&alu.src[0],  
> &ctx->src[i], j);
> +                                alu.dst.sel = treg;
> +                                alu.dst.chan = j;
> +                                if (j == 3)
> +                                   alu.last = 1;
> +                                alu.dst.write = 1;
> +                                r = r600_bytecode_add_alu(ctx->bc,  
> &alu);
> +                                if (r)
> +                                    return r;
> +			}
> +		}
> +		for (i = 1; i < 3; i++) {
> +			/* set gradients h/v */
> +			memset(&tex, 0, sizeof(struct r600_bytecode_tex));
> +			tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H :
> +				FETCH_OP_SET_GRADIENTS_V;
> +			tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
> +			tex.sampler_index_mode = sampler_index_mode;
> +			tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
> +			tex.resource_index_mode = sampler_index_mode;
> +
> +			tex.src_gpr = (i == 1) ? temp_h : temp_v;
> +			tex.src_sel_x = 0;
> +			tex.src_sel_y = 1;
> +			tex.src_sel_z = 2;
> +			tex.src_sel_w = 3;
> +
> +			tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm  
> scheduler */
> +			tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
> +			if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
> +				tex.coord_type_x = 1;
> +				tex.coord_type_y = 1;
> +				tex.coord_type_z = 1;
> +				tex.coord_type_w = 1;
> +			}
> +			r = r600_bytecode_add_tex(ctx->bc, &tex);
> +			if (r)
> +				return r;
> +		}
> +	}
> +
>  	if (src_requires_loading && !src_loaded) {
>  		for (i = 0; i < 4; i++) {
>  			memset(&alu, 0, sizeof(struct r600_bytecode_alu));


ARB_shader_texture_lod piglits go from 76/90 to 88/90, and fixes a number  
of tex-miplevel-selection tests.

Some remaining Cube/1DArrayShadow failures.

Worthwhile improvement as is, so
Reviewed-by: Glenn Kennard <glenn.kennard at gmail.com>


More information about the mesa-dev mailing list