[PATCH] nv50: better insn generation
Christoph Bumiller
e0425955 at student.tuwien.ac.at
Sun Jun 21 10:07:09 PDT 2009
Don't use extra TEMPs unnecessarily in some cases.
---
src/gallium/drivers/nv50/nv50_program.c | 120 +++++++++++++++---------------
1 files changed, 60 insertions(+), 60 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index d7ab28a..5594560 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1294,18 +1294,20 @@ static boolean
nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
{
const struct tgsi_full_instruction *inst = &tok->FullInstruction;
- struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
- unsigned mask, sat, unit;
+ struct nv50_reg *rdst[4], *dst[4], *src[3][4];
+ struct nv50_reg **pp_rtmp, *rtmp = NULL, *temp = NULL;
+ unsigned mask, sat, unit = 0;
boolean assimilate = FALSE;
- int i, c;
+ int i, c, nr_dst = 0;
mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
for (c = 0; c < 4; c++) {
- if (mask & (1 << c))
+ if (mask & (1 << c)) {
dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
- else
+ ++nr_dst;
+ } else
dst[c] = NULL;
rdst[c] = NULL;
src[0][c] = NULL;
@@ -1313,8 +1315,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
src[2][c] = NULL;
}
+ pp_rtmp = &dst[ffs(mask) - 1];
+ if (*pp_rtmp && (*pp_rtmp)->type != P_TEMP && (nr_dst > 1 || sat))
+ pp_rtmp = &temp;
+
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i];
+ const struct tgsi_full_src_register *fs =
+ &inst->FullSrcRegisters[i];
if (fs->SrcRegister.File == TGSI_FILE_SAMPLER)
unit = fs->SrcRegister.Index;
@@ -1327,10 +1334,15 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
if (sat) {
for (c = 0; c < 4; c++) {
rdst[c] = dst[c];
- dst[c] = temp_temp(pc);
+ if (dst[c] && dst[c]->type != P_TEMP)
+ dst[c] = temp_temp(pc);
}
- } else
- if (direct2dest_op(inst)) {
+ }
+
+ if (direct2dest_op(inst) && (*pp_rtmp)) {
+ /* We really don't lose the real dst as we do not
+ * get here if sat overwrites dst with temp.
+ */
for (c = 0; c < 4; c++) {
if (!dst[c] || dst[c]->type != P_TEMP)
continue;
@@ -1341,7 +1353,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
dst[c] == src[2][i])
break;
}
- if (i == 4)
+ if (i == 4 || !dst[i])
continue;
assimilate = TRUE;
@@ -1367,48 +1379,32 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
break;
case TGSI_OPCODE_COS:
temp = temp_temp(pc);
+ rtmp = *pp_rtmp;
emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, 5, temp, temp);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
- }
+ emit_flop(pc, 5, rtmp, temp);
break;
case TGSI_OPCODE_DP3:
temp = temp_temp(pc);
+ rtmp = *pp_rtmp;
emit_mul(pc, temp, src[0][0], src[1][0]);
emit_mad(pc, temp, src[0][1], src[1][1], temp);
- emit_mad(pc, temp, src[0][2], src[1][2], temp);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
- }
+ emit_mad(pc, rtmp, src[0][2], src[1][2], temp);
break;
case TGSI_OPCODE_DP4:
temp = temp_temp(pc);
+ rtmp = *pp_rtmp;
emit_mul(pc, temp, src[0][0], src[1][0]);
emit_mad(pc, temp, src[0][1], src[1][1], temp);
emit_mad(pc, temp, src[0][2], src[1][2], temp);
- emit_mad(pc, temp, src[0][3], src[1][3], temp);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
- }
+ emit_mad(pc, rtmp, src[0][3], src[1][3], temp);
break;
case TGSI_OPCODE_DPH:
temp = temp_temp(pc);
+ rtmp = *pp_rtmp;
emit_mul(pc, temp, src[0][0], src[1][0]);
emit_mad(pc, temp, src[0][1], src[1][1], temp);
emit_mad(pc, temp, src[0][2], src[1][2], temp);
- emit_add(pc, temp, src[1][3], temp);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
- }
+ emit_add(pc, rtmp, src[1][3], temp);
break;
case TGSI_OPCODE_DST:
{
@@ -1426,13 +1422,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
break;
case TGSI_OPCODE_EX2:
temp = temp_temp(pc);
+ rtmp = *pp_rtmp;
emit_preex2(pc, temp, src[0][0]);
- emit_flop(pc, 6, temp, temp);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
- }
+ emit_flop(pc, 6, rtmp, temp);
break;
case TGSI_OPCODE_FLR:
for (c = 0; c < 4; c++) {
@@ -1461,13 +1453,10 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
emit_lit(pc, &dst[0], mask, &src[0][0]);
break;
case TGSI_OPCODE_LG2:
- temp = temp_temp(pc);
- emit_flop(pc, 3, temp, src[0][0]);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
- }
+ rtmp = *pp_rtmp;
+ if (!rtmp)
+ rtmp = temp_temp(pc);
+ emit_flop(pc, 3, rtmp, src[0][0]);
break;
case TGSI_OPCODE_LRP:
temp = temp_temp(pc);
@@ -1523,18 +1512,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
break;
case TGSI_OPCODE_RCP:
- for (c = 3; c >= 0; c--) {
- if (!(mask & (1 << c)))
- continue;
- emit_flop(pc, 0, dst[c], src[0][0]);
- }
+ rtmp = *pp_rtmp;
+ if (!rtmp)
+ rtmp = temp_temp(pc);
+ emit_flop(pc, 0, rtmp, src[0][0]);
break;
case TGSI_OPCODE_RSQ:
- for (c = 3; c >= 0; c--) {
- if (!(mask & (1 << c)))
- continue;
- emit_flop(pc, 2, dst[c], src[0][0]);
- }
+ rtmp = *pp_rtmp;
+ if (!rtmp)
+ rtmp = temp_temp(pc);
+ emit_flop(pc, 2, rtmp, src[0][0]);
break;
case TGSI_OPCODE_SCS:
temp = temp_temp(pc);
@@ -1557,6 +1544,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
break;
case TGSI_OPCODE_SIN:
temp = temp_temp(pc);
+ rtmp = *pp_rtmp;
emit_precossin(pc, temp, src[0][0]);
emit_flop(pc, 4, temp, temp);
for (c = 0; c < 4; c++) {
@@ -1611,14 +1599,26 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
return FALSE;
}
+ if (rtmp) {
+ if (sat)
+ dst[0] = dst[1] = dst[2] = dst[3] = rtmp;
+ else {
+ for (c = 0; c < 4; c++) {
+ if (mask & (1 << c))
+ emit_mov(pc, dst[c], rtmp);
+ }
+ }
+ }
+
if (sat) {
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT,
- CVT_F32_F32);
+ emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT, 0xc4);
}
- } else if (assimilate) {
+ }
+
+ if (assimilate) {
for (c = 0; c < 4; c++)
if (rdst[c])
assimilate_temp(pc, rdst[c], dst[c]);
--
1.6.0.6
--------------090503050107050804030002
Content-Type: text/plain;
name="0012-nv50-initial-support-for-IF-ELSE-ENDIF-insns.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename*0="0012-nv50-initial-support-for-IF-ELSE-ENDIF-insns.patch"
More information about the Nouveau
mailing list