commit 3f66c5d0daf7ef15ddc9f7bc22967d95d52ab2af Author: chr Date: Sat May 16 16:43:09 2009 +0200 - Introduce emit_cvt and use it where applicable (flr, abs, sat, and in set). - Restructure LIT again, now src == dst case is completely taken care of there. - Change emit_kil and add negation support (the generated insn was different before, but this is how the blob does it here, should look into this). - Remove unnecessary MALLOC and FREE in program dump ifdef block. - In alloc_immd(), also put -f and 0.5 * f in the immd buffer, as it is now this might save some space. Well, that's a lot of rather unrelated changes, maybe I should break things up more. diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 3116735..4dc5676 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -335,7 +335,7 @@ alloc_immd(struct nv50_pc *pc, float f) break; if (hw == pc->immd_nr * 4) - hw = ctor_immd(pc, f, 0, 0, 0) * 4; + hw = ctor_immd(pc, f, -f, 0.5f * f, 0) * 4; r->type = P_IMMD; r->hw = hw; @@ -790,6 +790,48 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, e); } +#define CVTOP_RN 0x01 +#define CVTOP_FLOOR 0x03 +#define CVTOP_CEIL 0x05 +#define CVTOP_TRUNC 0x07 +#define CVTOP_SAT 0x08 +#define CVTOP_ABS 0x10 + +#define CVT_F32_F32 0xc4 +#define CVT_F32_S32 0x44 +#define CVT_F32_U32 0x64 +#define CVT_S32_F32 0x8c +#define CVT_S32_S32 0x0c +#define CVT_R32_F32 0xcc + +static void +emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, + int wp, unsigned cop, unsigned fmt) +{ + struct nv50_program_exec *e; + + e = exec(pc); + set_long(pc, e); + + e->inst[0] |= 0xa0000000; + e->inst[1] |= 0x00004000; + e->inst[1] |= (cop << 16); + e->inst[1] |= (fmt << 24); + set_src_0(pc, src, e); + + if (wp >= 0) + set_pred_wr(pc, 1, wp, e); + + if (dst) + set_dst(pc, dst, e); + else { + e->inst[0] |= 0x000001fc; + e->inst[1] |= 0x00000008; + } + + emit(pc, e); +} + static void emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) @@ -821,34 +863,16 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, set_src_1(pc, src1, e); emit(pc, e); - /* cvt.f32.u32 */ - e = exec(pc); - e->inst[0] = 0xa0000001; - e->inst[1] = 0x64014780; - set_dst(pc, rdst, e); - set_src_0(pc, dst, e); - emit(pc, e); - + emit_cvt(pc, rdst, dst, -1, CVTOP_RN, CVT_F32_U32); + if (dst != rdst) free_temp(pc, dst); } -static void +static INLINE void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0xa0000000; /* cvt */ - set_long(pc, e); - e->inst[1] |= (6 << 29); /* cvt */ - e->inst[1] |= 0x08000000; /* integer mode */ - e->inst[1] |= 0x04000000; /* 32 bit */ - e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */ - e->inst[1] |= (1 << 14); /* src .f32 */ - set_dst(pc, dst, e); - set_src_0(pc, src, e); - - emit(pc, e); + emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_R32_F32); } static void @@ -865,21 +889,10 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, free_temp(pc, temp); } -static void +static INLINE void emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0xa0000000; /* cvt */ - set_long(pc, e); - e->inst[1] |= (6 << 29); /* cvt */ - e->inst[1] |= 0x04000000; /* 32 bit */ - e->inst[1] |= (1 << 14); /* src .f32 */ - e->inst[1] |= ((1 << 6) << 14); /* .abs */ - set_dst(pc, dst, e); - set_src_0(pc, src, e); - - emit(pc, e); + emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32); } static void @@ -894,10 +907,7 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, boolean allow32 = pc->allow32; if (mask & (3 << 1)) { - if (mask & (1 << 1)) - tmp[0] = dst[1]; - else - tmp[0] = temp_temp(pc); + tmp[0] = alloc_temp(pc, NULL); emit_minmax(pc, 4, tmp[0], src[0], zero); } @@ -920,6 +930,12 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, pc->allow32 = allow32; + if (mask & (1 << 1)) + assimilate_temp(pc, dst[1], tmp[0]); + else + if (mask & (1 << 2)) + free_temp(pc, tmp[0]); + /* do this last, in case src[i,j] == dst[0,3] */ if (mask & (1 << 0)) emit_mov(pc, dst[0], one); @@ -953,21 +969,16 @@ static void emit_kil(struct nv50_pc *pc, struct nv50_reg *src) { struct nv50_program_exec *e; - const int r_pred = 1; - /* Sets predicate reg ? */ - e = exec(pc); - e->inst[0] = 0xa00001fd; - e->inst[1] = 0xc4014788; - set_src_0(pc, src, e); - set_pred_wr(pc, 1, r_pred, e); - emit(pc, e); + emit_cvt(pc, NULL, src, 0, CVTOP_RN, CVT_F32_F32); + if (pc->negate) + pc->p->exec_tail->inst[1] |= 0x20000000; - /* This is probably KILP */ + /* @p0.lt kil */ e = exec(pc); - e->inst[0] = 0x000001fe; set_long(pc, e); - set_pred(pc, 1 /* LT? */, r_pred, e); + e->inst[0] |= 0x00000002; + set_pred(pc, 1, 0, e); emit(pc, e); } @@ -1100,6 +1111,7 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: case TGSI_OPCODE_MUL: + case TGSI_OPCODE_KIL: return 0; case TGSI_OPCODE_ADD: case TGSI_OPCODE_SUB: @@ -1281,14 +1293,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) rdst[c] = dst[c]; dst[c] = alloc_preferred_temp(pc, rdst[c]->rhw); } - } else if (inst->Instruction.Opcode == TGSI_OPCODE_LIT) { - /* XXX: shouldn't give LIT an extra case here */ - if (src[0][1] == dst[1] || - src[0][3] == dst[1]) { - assimilate = TRUE; - rdst[1] = dst[1]; - dst[1] = alloc_temp(pc, NULL); - } } i = -1; @@ -1585,21 +1589,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (sat) { for (c = 0; c < 4; c++) { - struct nv50_program_exec *e; - if (!(mask & (1 << c))) continue; - e = exec(pc); - - e->inst[0] = 0xa0000000; /* cvt */ - set_long(pc, e); - e->inst[1] |= (6 << 29); /* cvt */ - e->inst[1] |= 0x04000000; /* 32 bit */ - e->inst[1] |= (1 << 14); /* src .f32 */ - e->inst[1] |= ((1 << 5) << 14); /* .sat */ - set_dst(pc, rdst[c], e); - set_src_0(pc, dst[c], e); - emit(pc, e); + emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT, CVT_F32_F32); } } else if (assimilate) { for (c = 0; c < 4; c++) @@ -2272,13 +2264,11 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) #ifdef NV50_PROGRAM_DUMP NOUVEAU_ERR("-------\n"); - up = ptr = MALLOC(p->exec_size * 4); for (e = p->exec_head; e; e = e->next) { NOUVEAU_ERR("0x%08x\n", e->inst[0]); if (is_long(e)) NOUVEAU_ERR("0x%08x\n", e->inst[1]); } - FREE(up); #endif up = ptr = MALLOC(p->exec_size * 4);