[PATCH 05/13] nv50: SIN and COS use src0.w for dst.w

Christoph Bumiller e0425955 at student.tuwien.ac.at
Mon Sep 7 09:12:29 PDT 2009


Try to save some MOVs.
---
 src/gallium/drivers/nv50/nv50_program.c |   36 +++++++++++++++++-------------
 1 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index d198032..e7beb26 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1337,6 +1337,7 @@ static boolean
 is_scalar_op(unsigned op)
 {
 	switch (op) {
+	case TGSI_OPCODE_COS:
 	case TGSI_OPCODE_DP2:
 	case TGSI_OPCODE_DP3:
 	case TGSI_OPCODE_DP4:
@@ -1346,12 +1347,11 @@ is_scalar_op(unsigned op)
 	case TGSI_OPCODE_POW:
 	case TGSI_OPCODE_RCP:
 	case TGSI_OPCODE_RSQ:
+	case TGSI_OPCODE_SIN:
 		/*
-	case TGSI_OPCODE_COS:
 	case TGSI_OPCODE_KIL:
 	case TGSI_OPCODE_LIT:
 	case TGSI_OPCODE_SCS:
-	case TGSI_OPCODE_SIN:
 		*/
 		return TRUE;
 	default:
@@ -1468,14 +1468,16 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		}
 		break;
 	case TGSI_OPCODE_COS:
-		temp = temp_temp(pc);
-		emit_precossin(pc, temp, src[0][0]);
-		emit_flop(pc, 5, temp, temp);
-		for (c = 0; c < 4; c++) {
-			if (!(mask & (1 << c)))
-				continue;
-			emit_mov(pc, dst[c], temp);
+		if (mask & 8) {
+			emit_precossin(pc, temp, src[0][3]);
+			emit_flop(pc, 5, dst[3], temp);
+			if (!(mask &= 7))
+				break;
+			if (temp == dst[3])
+				temp = brdc = temp_temp(pc);
 		}
+		emit_precossin(pc, temp, src[0][0]);
+		emit_flop(pc, 5, brdc, temp);
 		break;
 	case TGSI_OPCODE_DP3:
 		emit_mul(pc, temp, src[0][0], src[1][0]);
@@ -1612,14 +1614,16 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		}
 		break;
 	case TGSI_OPCODE_SIN:
-		temp = temp_temp(pc);
-		emit_precossin(pc, temp, src[0][0]);
-		emit_flop(pc, 4, temp, temp);
-		for (c = 0; c < 4; c++) {
-			if (!(mask & (1 << c)))
-				continue;
-			emit_mov(pc, dst[c], temp);
+		if (mask & 8) {
+			emit_precossin(pc, temp, src[0][3]);
+			emit_flop(pc, 4, dst[3], temp);
+			if (!(mask &= 7))
+				break;
+			if (temp == dst[3])
+				temp = brdc = temp_temp(pc);
 		}
+		emit_precossin(pc, temp, src[0][0]);
+		emit_flop(pc, 4, brdc, temp);
 		break;
 	case TGSI_OPCODE_SLT:
 		for (c = 0; c < 4; c++) {
-- 
1.6.3.3


--------------010208090904000401070505
Content-Type: text/plain;
 name="0006-nv50-handle-SEQ-SGT-SLE-SNE-opcodes.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="0006-nv50-handle-SEQ-SGT-SLE-SNE-opcodes.patch"



More information about the Nouveau mailing list