[Mesa-dev] [PATCH] r600g: fixup AR handling (v3)
Vadim Girlin
vadimgirlin at gmail.com
Wed Jan 18 16:30:13 PST 2012
On Wed, 2012-01-18 at 21:49 +0000, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> So it appears R600s (except rv670) do AR handling different using a different
> opcode. This patch fixes up r600g to work properly on r600.
>
> This fixes ~100 piglit tests here (in GLSL1.30 mode) on rv610.
>
> v3: add index_mode as per the docs.
>
> This still fails any dst relative tests for some reason I can't quite see yet,
> but it passes a lot more tests than without.
I guess it's the problem described in the r6xx_r7xx_3d.pdf:
"6.1.4 Shader GPR Indexing may return incorrect result
This affects R600, RV630 and RV610, but not RV670 or RS780.
...
MOV R[A0.x +2], R33
ADD R20, R20, R2 // H/w thinks R2 is the same as the prev dest
and will substitute PV"
Vadim
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/gallium/drivers/r600/r600_asm.c | 48 ++++++++++++++++++++++++++++---
> src/gallium/drivers/r600/r600_asm.h | 8 ++++-
> src/gallium/drivers/r600/r600_shader.c | 6 +++-
> src/gallium/drivers/r600/r600_sq.h | 7 ++++
> 4 files changed, 62 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
> index 8234744..aad286b 100644
> --- a/src/gallium/drivers/r600/r600_asm.c
> +++ b/src/gallium/drivers/r600/r600_asm.c
> @@ -94,6 +94,7 @@ static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode *
> case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
> case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA:
> case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
> + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT:
> case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
> case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
> case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
> @@ -249,10 +250,11 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
> return tex;
> }
>
> -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class)
> +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, unsigned ar_handling)
> {
> LIST_INITHEAD(&bc->cf);
> bc->chip_class = chip_class;
> + bc->ar_handling = ar_handling;
> }
>
> static int r600_bytecode_add_cf(struct r600_bytecode *bc)
> @@ -441,7 +443,8 @@ static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *
> return !alu->is_op3 && (
> alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA ||
> alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR ||
> - alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
> + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT ||
> + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT);
> case EVERGREEN:
> case CAYMAN:
> default:
> @@ -457,7 +460,8 @@ static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_a
> case R600:
> case R700:
> return is_alu_reduction_inst(bc, alu) ||
> - is_alu_mova_inst(bc, alu);
> + (is_alu_mova_inst(bc, alu) &&
> + (alu->inst != V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT));
> case EVERGREEN:
> case CAYMAN:
> default:
> @@ -478,6 +482,7 @@ static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode
> case R700:
> if (!alu->is_op3)
> return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
> + alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT ||
> alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
> alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT ||
> alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
> @@ -1236,12 +1241,43 @@ static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r60
> return 0;
> }
>
> +
> +/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
> +static int load_ar_r6xx(struct r600_bytecode *bc)
> +{
> + struct r600_bytecode_alu alu;
> + int r;
> +
> + if (bc->ar_loaded)
> + return 0;
> +
> + /* hack to avoid making MOVA the last instruction in the clause */
> + if ((bc->cf_last->ndw>>1) >= 110)
> + bc->force_add_cf = 1;
> +
> + memset(&alu, 0, sizeof(alu));
> + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT;
> + alu.src[0].sel = bc->ar_reg;
> + alu.last = 1;
> + alu.index_mode = INDEX_MODE_LOOP;
> + r = r600_bytecode_add_alu(bc, &alu);
> + if (r)
> + return r;
> +
> + bc->cf_last->r6xx_uses_waterfall = 1;
> + bc->ar_loaded = 1;
> + return 0;
> +}
> +
> /* load AR register from gpr (bc->ar_reg) with MOVA_INT */
> static int load_ar(struct r600_bytecode *bc)
> {
> struct r600_bytecode_alu alu;
> int r;
>
> + if (bc->ar_handling)
> + return load_ar_r6xx(bc);
> +
> if (bc->ar_loaded)
> return 0;
>
> @@ -1599,6 +1635,7 @@ static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecod
> S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
> S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
> S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
> + S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) |
> S_SQ_ALU_WORD0_LAST(alu->last);
>
> if (alu->is_op3) {
> @@ -2286,7 +2323,8 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
> fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel);
> fprintf(stderr, "REL:%d ", alu->src[1].rel);
> fprintf(stderr, "CHAN:%d ", alu->src[1].chan);
> - fprintf(stderr, "NEG:%d) ", alu->src[1].neg);
> + fprintf(stderr, "NEG:%d ", alu->src[1].neg);
> + fprintf(stderr, "IM:%d) ", alu->index_mode);
> fprintf(stderr, "LAST:%d)\n", alu->last);
> id++;
> fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' ');
> @@ -2565,7 +2603,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
> }
>
> memset(&bc, 0, sizeof(bc));
> - r600_bytecode_init(&bc, rctx->chip_class);
> + r600_bytecode_init(&bc, rctx->chip_class, 0);
>
> for (i = 0; i < ve->count; i++) {
> if (elements[i].instance_divisor > 1) {
> diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
> index d0ff75d..40abb25 100644
> --- a/src/gallium/drivers/r600/r600_asm.h
> +++ b/src/gallium/drivers/r600/r600_asm.h
> @@ -54,6 +54,7 @@ struct r600_bytecode_alu {
> unsigned bank_swizzle;
> unsigned bank_swizzle_force;
> unsigned omod;
> + unsigned index_mode;
> };
>
> struct r600_bytecode_tex {
> @@ -176,6 +177,10 @@ struct r600_cf_callstack {
> int max;
> };
>
> +#define AR_HANDLE_NORMAL 0
> +#define AR_HANDLE_RV6XX 1 /* except RV670 */
> +
> +
> struct r600_bytecode {
> enum chip_class chip_class;
> int type;
> @@ -194,13 +199,14 @@ struct r600_bytecode {
> struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
> unsigned ar_loaded;
> unsigned ar_reg;
> + unsigned ar_handling;
> };
>
> /* eg_asm.c */
> int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
>
> /* r600_asm.c */
> -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class);
> +void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, unsigned ar_handling);
> void r600_bytecode_clear(struct r600_bytecode *bc);
> int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu);
> int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx);
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index 59d41cf..1f19190 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -804,10 +804,14 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
> unsigned output_done, noutput;
> unsigned opcode;
> int i, j, r = 0, pos0;
> + unsigned ar_handling = AR_HANDLE_NORMAL;
> +
> + if ((rctx->chip_class == R600) && (rctx->family != CHIP_RV670))
> + ar_handling = AR_HANDLE_RV6XX;
>
> ctx.bc = &shader->bc;
> ctx.shader = shader;
> - r600_bytecode_init(ctx.bc, rctx->chip_class);
> + r600_bytecode_init(ctx.bc, rctx->chip_class, ar_handling);
> ctx.tokens = tokens;
> tgsi_scan_shader(tokens, &ctx.info);
> tgsi_parse_init(&ctx.parse, tokens);
> diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
> index b9c4126..4b2a19a 100644
> --- a/src/gallium/drivers/r600/r600_sq.h
> +++ b/src/gallium/drivers/r600/r600_sq.h
> @@ -471,4 +471,11 @@
> #define SQ_ALU_SCL_122 0x00000001
> #define SQ_ALU_SCL_212 0x00000002
> #define SQ_ALU_SCL_221 0x00000003
> +
> +#define INDEX_MODE_AR_X 0
> +#define INDEX_MODE_AR_Y 1
> +#define INDEX_MODE_AR_Z 2
> +#define INDEX_MODE_AR_W 3
> +#define INDEX_MODE_LOOP 4
> +
> #endif
More information about the mesa-dev
mailing list