commit e611bc623c1526d5c806964287edf31b4b346d0d Author: chr Date: Sat May 16 16:50:23 2009 +0200 - Unify moving result from temporary to destination registers. - Don't do mov and cvt.sat for MOV_SAT, just cvt.sat suffices. diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index ed9e20b..4a03cf5 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -221,7 +221,7 @@ alloc_preferred_temp(struct nv50_pc *pc, int hw) static void assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - assert(dst->index != -1 && src->index == -1 && src->hw != -1); + assert(src->index == -1 && src->hw != -1); if (dst->hw != -1) pc->r_temp[dst->hw] = NULL; @@ -1311,22 +1311,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) temp = alloc_temp(pc, NULL); emit_precossin(pc, temp, src[0][0]); emit_flop(pc, 5, temp, temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } break; case TGSI_OPCODE_DP3: temp = alloc_temp(pc, NULL); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } break; case TGSI_OPCODE_DP4: temp = alloc_temp(pc, NULL); @@ -1334,11 +1324,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); emit_mad(pc, temp, src[0][3], src[1][3], temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } break; case TGSI_OPCODE_DPH: temp = alloc_temp(pc, NULL); @@ -1346,11 +1331,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); emit_add(pc, temp, src[1][3], temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } break; case TGSI_OPCODE_DST: { @@ -1370,11 +1350,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) temp = alloc_temp(pc, NULL); emit_preex2(pc, temp, src[0][0]); emit_flop(pc, 6, temp, temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { @@ -1405,11 +1380,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) case TGSI_OPCODE_LG2: temp = alloc_temp(pc, NULL); emit_flop(pc, 3, temp, src[0][0]); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } break; case TGSI_OPCODE_LRP: temp = alloc_temp(pc, NULL); @@ -1419,6 +1389,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_sub(pc, temp, src[1][c], src[2][c]); emit_mad(pc, dst[c], temp, src[0][c], src[2][c]); } + free_temp(pc, temp); + temp = NULL; break; case TGSI_OPCODE_MAD: for (c = 0; c < 4; c++) { @@ -1442,6 +1414,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_MOV: + if (sat) { + dst[0] = src[0][0]; + dst[1] = src[0][1]; + dst[2] = src[0][2]; + dst[3] = src[0][3]; + break; + } for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -1458,11 +1437,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) case TGSI_OPCODE_POW: temp = alloc_temp(pc, NULL); emit_pow(pc, temp, src[0][0], src[1][0]); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } break; case TGSI_OPCODE_RCP: for (c = 0; c < 4; c++) { @@ -1493,6 +1467,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_flop(pc, 5, dst[0], temp); if (mask & (1 << 1)) emit_flop(pc, 4, dst[1], temp); + free_temp(pc, temp); + temp = NULL; break; case TGSI_OPCODE_SGE: for (c = 0; c < 4; c++) { @@ -1505,11 +1481,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) temp = alloc_temp(pc, NULL); emit_precossin(pc, temp, src[0][0]); emit_flop(pc, 4, temp, temp); - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_mov(pc, dst[c], temp); - } break; case TGSI_OPCODE_SLT: for (c = 0; c < 4; c++) { @@ -1573,8 +1544,24 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return FALSE; } - if (temp) - free_temp(pc, temp); + i = -1; + if (temp) { + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + if (i >= 0) + emit_mov(pc, dst[c], dst[i]); + else if (dst[c]->type == P_TEMP) { + assimilate_temp(pc, dst[c], temp); + i = c; + temp = NULL; + } else + emit_mov(pc, dst[c], temp); + } + + if (temp) + free_temp(pc, temp); + } if (sat) { for (c = 0; c < 4; c++) {