[PATCH 05/13] nv50: SIN and COS use src0.w for dst.w
Christoph Bumiller
e0425955 at student.tuwien.ac.at
Mon Sep 7 09:12:29 PDT 2009
Try to save some MOVs.
---
src/gallium/drivers/nv50/nv50_program.c | 36 +++++++++++++++++-------------
1 files changed, 20 insertions(+), 16 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index d198032..e7beb26 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1337,6 +1337,7 @@ static boolean
is_scalar_op(unsigned op)
{
switch (op) {
+ case TGSI_OPCODE_COS:
case TGSI_OPCODE_DP2:
case TGSI_OPCODE_DP3:
case TGSI_OPCODE_DP4:
@@ -1346,12 +1347,11 @@ is_scalar_op(unsigned op)
case TGSI_OPCODE_POW:
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SIN:
/*
- case TGSI_OPCODE_COS:
case TGSI_OPCODE_KIL:
case TGSI_OPCODE_LIT:
case TGSI_OPCODE_SCS:
- case TGSI_OPCODE_SIN:
*/
return TRUE;
default:
@@ -1468,14 +1468,16 @@ nv50_program_tx_insn(struct nv50_pc *pc,
}
break;
case TGSI_OPCODE_COS:
- temp = temp_temp(pc);
- emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, 5, temp, temp);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
+ if (mask & 8) {
+ emit_precossin(pc, temp, src[0][3]);
+ emit_flop(pc, 5, dst[3], temp);
+ if (!(mask &= 7))
+ break;
+ if (temp == dst[3])
+ temp = brdc = temp_temp(pc);
}
+ emit_precossin(pc, temp, src[0][0]);
+ emit_flop(pc, 5, brdc, temp);
break;
case TGSI_OPCODE_DP3:
emit_mul(pc, temp, src[0][0], src[1][0]);
@@ -1612,14 +1614,16 @@ nv50_program_tx_insn(struct nv50_pc *pc,
}
break;
case TGSI_OPCODE_SIN:
- temp = temp_temp(pc);
- emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, 4, temp, temp);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], temp);
+ if (mask & 8) {
+ emit_precossin(pc, temp, src[0][3]);
+ emit_flop(pc, 4, dst[3], temp);
+ if (!(mask &= 7))
+ break;
+ if (temp == dst[3])
+ temp = brdc = temp_temp(pc);
}
+ emit_precossin(pc, temp, src[0][0]);
+ emit_flop(pc, 4, brdc, temp);
break;
case TGSI_OPCODE_SLT:
for (c = 0; c < 4; c++) {
--
1.6.3.3
--------------010208090904000401070505
Content-Type: text/plain;
name="0006-nv50-handle-SEQ-SGT-SLE-SNE-opcodes.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="0006-nv50-handle-SEQ-SGT-SLE-SNE-opcodes.patch"
More information about the Nouveau
mailing list