[Mesa-dev] [PATCH] r600g: fixup AR handling (v3)

Vadim Girlin vadimgirlin at gmail.com
Wed Jan 18 16:30:13 PST 2012


On Wed, 2012-01-18 at 21:49 +0000, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
> 
> So it appears R600s (except rv670) do AR handling different using a different
> opcode. This patch fixes up r600g to work properly on r600.
> 
> This fixes ~100 piglit tests here (in GLSL1.30 mode) on rv610.
> 
> v3: add index_mode as per the docs.
> 
> This still fails any dst relative tests for some reason I can't quite see yet,
> but it passes a lot more tests than without.

I guess it's the problem described in the r6xx_r7xx_3d.pdf:

"6.1.4 Shader GPR Indexing may return incorrect result
This affects R600, RV630 and RV610, but not RV670 or RS780.
...
	MOV R[A0.x +2], R33
	ADD R20, R20, R2 // H/w thinks R2 is the same as the prev dest 
				and will substitute PV"

Vadim

> 
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/gallium/drivers/r600/r600_asm.c    |   48 ++++++++++++++++++++++++++++---
>  src/gallium/drivers/r600/r600_asm.h    |    8 ++++-
>  src/gallium/drivers/r600/r600_shader.c |    6 +++-
>  src/gallium/drivers/r600/r600_sq.h     |    7 ++++
>  4 files changed, 62 insertions(+), 7 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
> index 8234744..aad286b 100644
> --- a/src/gallium/drivers/r600/r600_asm.c
> +++ b/src/gallium/drivers/r600/r600_asm.c
> @@ -94,6 +94,7 @@ static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode *
>  		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
>  		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA:
>  		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
> +		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT:
>  		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
>  		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
>  		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
> @@ -249,10 +250,11 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
>  	return tex;
>  }
>  
> -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class)
> +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, unsigned ar_handling)
>  {
>  	LIST_INITHEAD(&bc->cf);
>  	bc->chip_class = chip_class;
> +	bc->ar_handling = ar_handling;
>  }
>  
>  static int r600_bytecode_add_cf(struct r600_bytecode *bc)
> @@ -441,7 +443,8 @@ static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *
>  		return !alu->is_op3 && (
>  			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA ||
>  			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR ||
> -			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
> +			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT ||
> +			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT);
>  	case EVERGREEN:
>  	case CAYMAN:
>  	default:
> @@ -457,7 +460,8 @@ static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_a
>  	case R600:
>  	case R700:
>  		return is_alu_reduction_inst(bc, alu) ||
> -			is_alu_mova_inst(bc, alu);
> +			(is_alu_mova_inst(bc, alu) && 
> +			 (alu->inst != V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT));
>  	case EVERGREEN:
>  	case CAYMAN:
>  	default:
> @@ -478,6 +482,7 @@ static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode
>  	case R700:
>  		if (!alu->is_op3)
>  			return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
> +				alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT ||
>  				alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
>  			        alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT ||
>  				alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
> @@ -1236,12 +1241,43 @@ static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r60
>  	return 0;
>  }
>  
> +
> +/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
> +static int load_ar_r6xx(struct r600_bytecode *bc)
> +{
> +	struct r600_bytecode_alu alu;
> +	int r;
> +
> +	if (bc->ar_loaded)
> +		return 0;
> +
> +	/* hack to avoid making MOVA the last instruction in the clause */
> +	if ((bc->cf_last->ndw>>1) >= 110)
> +		bc->force_add_cf = 1;
> +
> +	memset(&alu, 0, sizeof(alu));
> +	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT;
> +	alu.src[0].sel = bc->ar_reg;
> +	alu.last = 1;
> +	alu.index_mode = INDEX_MODE_LOOP;
> +	r = r600_bytecode_add_alu(bc, &alu);
> +	if (r)
> +		return r;
> +
> +	bc->cf_last->r6xx_uses_waterfall = 1;
> +	bc->ar_loaded = 1;
> +	return 0;
> +}
> +
>  /* load AR register from gpr (bc->ar_reg) with MOVA_INT */
>  static int load_ar(struct r600_bytecode *bc)
>  {
>  	struct r600_bytecode_alu alu;
>  	int r;
>  
> +	if (bc->ar_handling)
> +		return load_ar_r6xx(bc);
> +
>  	if (bc->ar_loaded)
>  		return 0;
>  
> @@ -1599,6 +1635,7 @@ static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecod
>  				S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
>  				S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
>  				S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
> +				S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) |
>  				S_SQ_ALU_WORD0_LAST(alu->last);
>  
>  	if (alu->is_op3) {
> @@ -2286,7 +2323,8 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
>  			fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel);
>  			fprintf(stderr, "REL:%d ", alu->src[1].rel);
>  			fprintf(stderr, "CHAN:%d ", alu->src[1].chan);
> -			fprintf(stderr, "NEG:%d) ", alu->src[1].neg);
> +			fprintf(stderr, "NEG:%d ", alu->src[1].neg);
> +			fprintf(stderr, "IM:%d) ", alu->index_mode);
>  			fprintf(stderr, "LAST:%d)\n", alu->last);
>  			id++;
>  			fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' ');
> @@ -2565,7 +2603,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
>  	}
>  
>  	memset(&bc, 0, sizeof(bc));
> -	r600_bytecode_init(&bc, rctx->chip_class);
> +	r600_bytecode_init(&bc, rctx->chip_class, 0);
>  
>  	for (i = 0; i < ve->count; i++) {
>  		if (elements[i].instance_divisor > 1) {
> diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
> index d0ff75d..40abb25 100644
> --- a/src/gallium/drivers/r600/r600_asm.h
> +++ b/src/gallium/drivers/r600/r600_asm.h
> @@ -54,6 +54,7 @@ struct r600_bytecode_alu {
>  	unsigned			bank_swizzle;
>  	unsigned			bank_swizzle_force;
>  	unsigned			omod;
> +	unsigned                        index_mode;
>  };
>  
>  struct r600_bytecode_tex {
> @@ -176,6 +177,10 @@ struct r600_cf_callstack {
>  	int				max;
>  };
>  
> +#define AR_HANDLE_NORMAL 0
> +#define AR_HANDLE_RV6XX 1 /* except RV670 */
> +
> +
>  struct r600_bytecode {
>  	enum chip_class			chip_class;
>  	int				type;
> @@ -194,13 +199,14 @@ struct r600_bytecode {
>  	struct r600_cf_callstack	callstack[SQ_MAX_CALL_DEPTH];
>  	unsigned	ar_loaded;
>  	unsigned	ar_reg;
> +	unsigned        ar_handling;
>  };
>  
>  /* eg_asm.c */
>  int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
>  
>  /* r600_asm.c */
> -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class);
> +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, unsigned ar_handling);
>  void r600_bytecode_clear(struct r600_bytecode *bc);
>  int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu);
>  int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx);
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index 59d41cf..1f19190 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -804,10 +804,14 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
>  	unsigned output_done, noutput;
>  	unsigned opcode;
>  	int i, j, r = 0, pos0;
> +	unsigned ar_handling = AR_HANDLE_NORMAL;
> +
> +	if ((rctx->chip_class == R600) && (rctx->family != CHIP_RV670))
> +		ar_handling = AR_HANDLE_RV6XX;
>  
>  	ctx.bc = &shader->bc;
>  	ctx.shader = shader;
> -	r600_bytecode_init(ctx.bc, rctx->chip_class);
> +	r600_bytecode_init(ctx.bc, rctx->chip_class, ar_handling);
>  	ctx.tokens = tokens;
>  	tgsi_scan_shader(tokens, &ctx.info);
>  	tgsi_parse_init(&ctx.parse, tokens);
> diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
> index b9c4126..4b2a19a 100644
> --- a/src/gallium/drivers/r600/r600_sq.h
> +++ b/src/gallium/drivers/r600/r600_sq.h
> @@ -471,4 +471,11 @@
>  #define SQ_ALU_SCL_122                           0x00000001
>  #define SQ_ALU_SCL_212                           0x00000002
>  #define SQ_ALU_SCL_221                           0x00000003
> +
> +#define   INDEX_MODE_AR_X 0
> +#define   INDEX_MODE_AR_Y 1
> +#define   INDEX_MODE_AR_Z 2
> +#define   INDEX_MODE_AR_W 3
> +#define   INDEX_MODE_LOOP 4
> +
>  #endif





More information about the mesa-dev mailing list